From 7c0f40abbf7aae01e44ec3898a97a26100864a5d Mon Sep 17 00:00:00 2001
From: Ian Moffett <ian@mirocom.org>
Date: Sat, 21 Mar 2026 17:15:24 -0400
Subject: [PATCH] frontend: Add parsing of shell blocks

Signed-off-by: Ian Moffett <ian@mirocom.org>
---
 example/ref.quip         |  1 +
 frontend/lexer.c         | 72 ++++++++++++++++++++++++++++++++++++----
 frontend/parser.c        |  5 ++-
 include/frontend/token.h |  3 +-
 4 files changed, 70 insertions(+), 11 deletions(-)

diff --git a/example/ref.quip b/example/ref.quip
index a37e400..b39dc77 100644
--- a/example/ref.quip
+++ b/example/ref.quip
@@ -1,6 +1,7 @@
 .cc clang
 .ld   lld-link
 
+# Shell block
 CFLAGS ::
      -Iinclude/
      -mgeneral-regs-only
diff --git a/frontend/lexer.c b/frontend/lexer.c
index 82d5016..4dbd0f6 100644
--- a/frontend/lexer.c
+++ b/frontend/lexer.c
@@ -21,6 +21,8 @@
 #include "common/ptrbox.h"
 #include "common/trace.h"
 
+#define SHELL_BLOCK_TERMINATE '~'
+
 /*
  * Test if a given character counts as a whitespace character
  *
@@ -247,6 +249,69 @@ lexer_scan_directive(struct quip_state *state, struct token *tokres)
     return -1;
 }
 
+/*
+ * Scan a shell block
+ *
+ * @state:   Quip state
+ * @tokres:  Token result is written here
+ *
+ * Returns zero on success
+ */
+static int
+lexer_scan_shellblock(struct quip_state *state, struct token *tokres)
+{
+    char c, *buf;
+    size_t bufsz, bufcap;
+    bool is_leading = true;
+
+    bufsz = 0;
+    bufcap = 8;
+    if ((buf = malloc(bufcap)) == NULL) {
+        return -1;
+    }
+
+    for (;;) {
+        if ((c = lexer_consume(state, false)) == '\0') {
+            trace_error(state, "unexpected end of file within shellblock\n");
+            free(buf);
+            return -1;
+        }
+
+        /* Handle overflow if needed */
+        if (bufsz > bufcap - 1) {
+            bufcap += 8;
+            buf = realloc(buf, bufcap);
+        }
+
+        /* Handle newlines */
+        if (c == '\n') {
+            buf[bufsz++] = ' ';
+            is_leading = true;
+            continue;
+        }
+
+        if (c == SHELL_BLOCK_TERMINATE) {
+            buf[bufsz] = '\0';
+            break;
+        }
+
+        if (is_leading && lexer_is_ws(c)) {
+            continue;
+        }
+
+        is_leading = false;
+        buf[bufsz++] = c;
+        if (buf == NULL) {
+            return -1;
+        }
+    }
+
+    tokres->type = TT_SHELLBLOCK;
+    tokres->s = ptrbox_strdup(&state->ptrbox, buf);
+    free(buf);
+    return 0;
+}
+
 int
 lexer_scan(struct quip_state *state, struct token *tokres)
 {
@@ -274,17 +339,12 @@ lexer_scan(struct quip_state *state, struct token *tokres)
     case ':':
         tokres->c = c;
         if ((c = lexer_consume(state, false)) == ':') {
-            tokres->type = TT_COLONDUB;
-            return 0;
+            return lexer_scan_shellblock(state, tokres);
         }
 
         lexer_putback(state, c);
         tokres->type = TT_COLON;
         return 0;
-    case '~':
-        tokres->type = TT_TILDE;
-        tokres->c = c;
-        return 0;
     default:
         if (lexer_scan_name(state, c, tokres) == 0) {
             return 0;
diff --git a/frontend/parser.c b/frontend/parser.c
index a7d1f7e..833f495 100644
--- a/frontend/parser.c
+++ b/frontend/parser.c
@@ -37,11 +37,10 @@ static const char *toktab[] = {
     [TT_NONE]       = symtok("none"),
     [TT_NAME]       = symtok("name"),
     [TT_NEWLINE]    = symtok("newline"),
+    [TT_SHELLBLOCK] = symtok("shellblock"),
     [TT_CC]         = qtok(".cc"),
     [TT_LD]         = qtok(".ld"),
-    [TT_COLON]      = qtok(":"),
-    [TT_TILDE]      = qtok("~"),
-    [TT_COLONDUB]   = qtok("::")
+    [TT_COLON]      = qtok(":")
 };
 
 int
diff --git a/include/frontend/token.h b/include/frontend/token.h
index 9d15b7c..4aa507c 100644
--- a/include/frontend/token.h
+++ b/include/frontend/token.h
@@ -18,11 +18,10 @@ typedef enum {
     TT_NONE,        /* [none] */
     TT_NAME,        /* [name] */
     TT_NEWLINE,     /* [newline] */
+    TT_SHELLBLOCK,  /* [shellblock] */
     TT_CC,          /* '.cc' */
     TT_LD,          /* '.ld' */
     TT_COLON,       /* ':' */
-    TT_TILDE,       /* '~' */
-    TT_COLONDUB,    /* '::' */
 } tt_t;
 
 /*