Merge branch 'main' into simplified-quotes

author: Bruce Hill <bruce@bruce-hill.com> 2025-09-06 14:47:45 -0400
committer: Bruce Hill <bruce@bruce-hill.com> 2025-09-06 14:47:45 -0400
commit: a8316252db95e3d77f9f0e9beb89cfcb4573d5b1 (patch)
tree: e5905bce9611e35ccb2f84481232fca0e657ff42 /src/parse
parent: a0ac652cd1eebdc42425b34f1685f8cb20cb4eea (diff)
parent: 73246764f88f6f652316ee0c138a990d836698a7 (diff)
15 files changed, 156 insertions, 169 deletions
diff --git a/src/parse/binops.c b/src/parse/binops.c
index 7ccf1379..4676b249 100644
--- a/src/parse/binops.c
+++ b/src/parse/binops.c
@@ -9,33 +9,6 @@
 #include "suffixes.h"
 #include "utils.h"
 
-int op_tightness[] = {
-    [Power] = 9,
-    [Multiply] = 8,
-    [Divide] = 8,
-    [Mod] = 8,
-    [Mod1] = 8,
-    [Plus] = 7,
-    [Minus] = 7,
-    [Concat] = 6,
-    [LeftShift] = 5,
-    [RightShift] = 5,
-    [UnsignedLeftShift] = 5,
-    [UnsignedRightShift] = 5,
-    [Min] = 4,
-    [Max] = 4,
-    [Equals] = 3,
-    [NotEquals] = 3,
-    [LessThan] = 2,
-    [LessThanOrEquals] = 2,
-    [GreaterThan] = 2,
-    [GreaterThanOrEquals] = 2,
-    [Compare] = 2,
-    [And] = 1,
-    [Or] = 1,
-    [Xor] = 1,
-};
-
 ast_e match_binary_operator(const char **pos) {
     switch (**pos) {
     case '+': {
@@ -94,7 +67,7 @@ ast_t *parse_infix_expr(parse_ctx_t *ctx, const char *pos, int min_tightness) {
     for (ast_e op; (op = match_binary_operator(&pos)) != Unknown && op_tightness[op] >= min_tightness; spaces(&pos)) {
         ast_t *key = NULL;
         if (op == Min || op == Max) {
-            key = NewAST(ctx->file, pos, pos, Var, .name = "$");
+            key = NewAST(ctx->file, pos, pos, Var, .name = (op == Min ? "_min_" : "_max_"));
             for (bool progress = true; progress;) {
                 ast_t *new_term;
                 progress =
@@ -108,7 +81,7 @@ ast_t *parse_infix_expr(parse_ctx_t *ctx, const char *pos, int min_tightness) {
             else if (key) pos = key->end;
         }
 
-        whitespace(&pos);
+        whitespace(ctx, &pos);
         if (get_line_number(ctx->file, pos) != starting_line && get_indent(ctx, pos) < starting_indent)
             parser_err(ctx, pos, eol(pos), "I expected this line to be at least as indented than the line above it");
 
diff --git a/src/parse/containers.c b/src/parse/containers.c
index 821cbdd4..73d30ecd 100644
--- a/src/parse/containers.c
+++ b/src/parse/containers.c
@@ -16,7 +16,7 @@ ast_t *parse_list(parse_ctx_t *ctx, const char *pos) {
     const char *start = pos;
     if (!match(&pos, "[")) return NULL;
 
-    whitespace(&pos);
+    whitespace(ctx, &pos);
 
     ast_list_t *items = NULL;
     for (;;) {
@@ -29,9 +29,9 @@ ast_t *parse_list(parse_ctx_t *ctx, const char *pos) {
             suffixed = parse_comprehension_suffix(ctx, item);
         }
         items = new (ast_list_t, .ast = item, .next = items);
-        if (!match_separator(&pos)) break;
+        if (!match_separator(ctx, &pos)) break;
     }
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this list");
 
     REVERSE_LIST(items);
@@ -42,14 +42,14 @@ ast_t *parse_table(parse_ctx_t *ctx, const char *pos) {
     const char *start = pos;
     if (!match(&pos, "{")) return NULL;
 
-    whitespace(&pos);
+    whitespace(ctx, &pos);
 
     ast_list_t *entries = NULL;
     for (;;) {
         const char *entry_start = pos;
         ast_t *key = optional(ctx, &pos, parse_extended_expr);
         if (!key) break;
-        whitespace(&pos);
+        whitespace(ctx, &pos);
         if (!match(&pos, "=")) return NULL;
         ast_t *value = expect(ctx, pos - 1, &pos, parse_expr, "I couldn't parse the value for this table entry");
         ast_t *entry = NewAST(ctx->file, entry_start, pos, TableEntry, .key = key, .value = value);
@@ -60,37 +60,37 @@ ast_t *parse_table(parse_ctx_t *ctx, const char *pos) {
             suffixed = parse_comprehension_suffix(ctx, entry);
         }
         entries = new (ast_list_t, .ast = entry, .next = entries);
-        if (!match_separator(&pos)) break;
+        if (!match_separator(ctx, &pos)) break;
     }
 
     REVERSE_LIST(entries);
 
-    whitespace(&pos);
+    whitespace(ctx, &pos);
 
     ast_t *fallback = NULL, *default_value = NULL;
     if (match(&pos, ";")) {
         for (;;) {
-            whitespace(&pos);
+            whitespace(ctx, &pos);
             const char *attr_start = pos;
             if (match_word(&pos, "fallback")) {
-                whitespace(&pos);
+                whitespace(ctx, &pos);
                 if (!match(&pos, "=")) parser_err(ctx, attr_start, pos, "I expected an '=' after 'fallback'");
                 if (fallback) parser_err(ctx, attr_start, pos, "This table already has a fallback");
                 fallback = expect(ctx, attr_start, &pos, parse_expr, "I expected a fallback table");
             } else if (match_word(&pos, "default")) {
-                whitespace(&pos);
+                whitespace(ctx, &pos);
                 if (!match(&pos, "=")) parser_err(ctx, attr_start, pos, "I expected an '=' after 'default'");
                 if (default_value) parser_err(ctx, attr_start, pos, "This table already has a default");
                 default_value = expect(ctx, attr_start, &pos, parse_expr, "I expected a default value");
             } else {
                 break;
             }
-            whitespace(&pos);
+            whitespace(ctx, &pos);
             if (!match(&pos, ",")) break;
         }
     }
 
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     expect_closing(ctx, &pos, "}", "I wasn't able to parse the rest of this table");
 
     return NewAST(ctx->file, start, pos, Table, .default_value = default_value, .entries = entries,
@@ -102,13 +102,13 @@ ast_t *parse_set(parse_ctx_t *ctx, const char *pos) {
     if (match(&pos, "||")) return NewAST(ctx->file, start, pos, Set);
 
     if (!match(&pos, "|")) return NULL;
-    whitespace(&pos);
+    whitespace(ctx, &pos);
 
     ast_list_t *items = NULL;
     for (;;) {
         ast_t *item = optional(ctx, &pos, parse_extended_expr);
         if (!item) break;
-        whitespace(&pos);
+        whitespace(ctx, &pos);
         ast_t *suffixed = parse_comprehension_suffix(ctx, item);
         while (suffixed) {
             item = suffixed;
@@ -116,12 +116,12 @@ ast_t *parse_set(parse_ctx_t *ctx, const char *pos) {
             suffixed = parse_comprehension_suffix(ctx, item);
         }
         items = new (ast_list_t, .ast = item, .next = items);
-        if (!match_separator(&pos)) break;
+        if (!match_separator(ctx, &pos)) break;
     }
 
     REVERSE_LIST(items);
 
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     expect_closing(ctx, &pos, "|", "I wasn't able to parse the rest of this set");
 
     return NewAST(ctx->file, start, pos, Set, .items = items);
diff --git a/src/parse/context.c b/src/parse/context.c
new file mode 100644
index 00000000..cd8d16bc
--- /dev/null
+++ b/src/parse/context.c
@@ -0,0 +1,8 @@
+// A context parameter that gets passed around during parsing.
+
+#include "../stdlib/memory.h"
+#include "../stdlib/pointers.h"
+#include "../stdlib/tables.h"
+#include "../stdlib/types.h"
+
+const TypeInfo_t *parse_comments_info = Table$info(Pointer$info("@", &Memory$info), Pointer$info("@", &Memory$info));
diff --git a/src/parse/context.h b/src/parse/context.h
index 6008060e..f1e3be2f 100644
--- a/src/parse/context.h
+++ b/src/parse/context.h
@@ -4,10 +4,15 @@
 #include <setjmp.h>
 #include <stdint.h>
 
+#include "../stdlib/datatypes.h"
 #include "../stdlib/files.h"
+#include "../stdlib/types.h"
+
+extern const TypeInfo_t *parse_comments_info;
 
 typedef struct {
     file_t *file;
     jmp_buf *on_err;
     int64_t next_lambda_id;
+    Table_t comments; // Map of <start pos> -> <end pos>
 } parse_ctx_t;
diff --git a/src/parse/controlflow.c b/src/parse/controlflow.c
index 6f6292af..1087e20e 100644
--- a/src/parse/controlflow.c
+++ b/src/parse/controlflow.c
@@ -36,7 +36,7 @@ ast_t *parse_block(parse_ctx_t *ctx, const char *pos) {
     if (indent(ctx, &pos)) {
     indented:;
         int64_t block_indent = get_indent(ctx, pos);
-        whitespace(&pos);
+        whitespace(ctx, &pos);
         while (*pos) {
             ast_t *stmt = optional(ctx, &pos, parse_statement);
             if (!stmt) {
@@ -55,7 +55,7 @@ ast_t *parse_block(parse_ctx_t *ctx, const char *pos) {
                 break;
             }
             statements = new (ast_list_t, .ast = stmt, .next = statements);
-            whitespace(&pos);
+            whitespace(ctx, &pos);
 
             // Guard against having two valid statements on the same line, separated by spaces (but no newlines):
             if (!memchr(stmt->end, '\n', (size_t)(pos - stmt->end))) {
@@ -131,18 +131,8 @@ ast_t *parse_while(parse_ctx_t *ctx, const char *pos) {
     // while condition ["do"] [<indent>] body
     const char *start = pos;
     if (!match_word(&pos, "while")) return NULL;
-
-    const char *tmp = pos;
-    // Shorthand form: `while when ...`
-    if (match_word(&tmp, "when")) {
-        ast_t *when = expect(ctx, start, &pos, parse_when, "I expected a 'when' block after this");
-        if (!when->__data.When.else_body) when->__data.When.else_body = NewAST(ctx->file, pos, pos, Stop);
-        return NewAST(ctx->file, start, pos, While, .body = when);
-    }
-
-    (void)match_word(&pos, "do"); // Optional 'do'
-
     ast_t *condition = expect(ctx, start, &pos, parse_expr, "I don't see a viable condition for this 'while'");
+    (void)match_word(&pos, "do"); // Optional 'do'
     ast_t *body = expect(ctx, start, &pos, parse_block, "I expected a body for this 'while'");
     return NewAST(ctx->file, start, pos, While, .condition = condition, .body = body);
 }
@@ -174,7 +164,7 @@ ast_t *parse_if(parse_ctx_t *ctx, const char *pos) {
     ast_t *body = expect(ctx, start, &pos, parse_block, "I expected a body for this 'if' statement");
 
     const char *tmp = pos;
-    whitespace(&tmp);
+    whitespace(ctx, &tmp);
     ast_t *else_body = NULL;
     const char *else_start = pos;
     if (get_indent(ctx, tmp) == starting_indent && match_word(&tmp, "else")) {
@@ -198,7 +188,7 @@ ast_t *parse_when(parse_ctx_t *ctx, const char *pos) {
 
     when_clause_t *clauses = NULL;
     const char *tmp = pos;
-    whitespace(&tmp);
+    whitespace(ctx, &tmp);
     while (get_indent(ctx, tmp) == starting_indent && match_word(&tmp, "is")) {
         pos = tmp;
         spaces(&pos);
@@ -217,7 +207,7 @@ ast_t *parse_when(parse_ctx_t *ctx, const char *pos) {
         }
         clauses = new_clauses;
         tmp = pos;
-        whitespace(&tmp);
+        whitespace(ctx, &tmp);
     }
     REVERSE_LIST(clauses);
 
@@ -255,7 +245,7 @@ ast_t *parse_for(parse_ctx_t *ctx, const char *pos) {
     ast_t *body = expect(ctx, start, &pos, parse_block, "I expected a body for this 'for'");
 
     const char *else_start = pos;
-    whitespace(&else_start);
+    whitespace(ctx, &else_start);
     ast_t *empty = NULL;
     if (match_word(&else_start, "else") && get_indent(ctx, else_start) == starting_indent) {
         pos = else_start;
diff --git a/src/parse/expressions.c b/src/parse/expressions.c
index d643d4e7..df0a10a7 100644
--- a/src/parse/expressions.c
+++ b/src/parse/expressions.c
@@ -10,10 +10,10 @@
 #include "context.h"
 #include "controlflow.h"
 #include "errors.h"
+#include "expressions.h"
 #include "files.h"
 #include "functions.h"
 #include "numbers.h"
-#include "expressions.h"
 #include "suffixes.h"
 #include "text.h"
 #include "types.h"
@@ -23,7 +23,7 @@ ast_t *parse_parens(parse_ctx_t *ctx, const char *pos) {
     const char *start = pos;
     spaces(&pos);
     if (!match(&pos, "(")) return NULL;
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     ast_t *expr = optional(ctx, &pos, parse_extended_expr);
     if (!expr) return NULL;
 
@@ -34,7 +34,7 @@ ast_t *parse_parens(parse_ctx_t *ctx, const char *pos) {
         comprehension = parse_comprehension_suffix(ctx, expr);
     }
 
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this expression");
 
     // Update the span to include the parens:
@@ -45,11 +45,13 @@ ast_t *parse_reduction(parse_ctx_t *ctx, const char *pos) {
     const char *start = pos;
     if (!match(&pos, "(")) return NULL;
 
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     ast_e op = match_binary_operator(&pos);
     if (op == Unknown) return NULL;
 
-    ast_t *key = NewAST(ctx->file, pos, pos, Var, .name = "$");
+    const char *op_str = binop_info[op].operator;
+    assert(op_str);
+    ast_t *key = NewAST(ctx->file, pos, pos, Var, .name = op_str);
     for (bool progress = true; progress;) {
         ast_t *new_term;
         progress =
@@ -61,7 +63,7 @@ ast_t *parse_reduction(parse_ctx_t *ctx, const char *pos) {
     if (key && key->tag == Var) key = NULL;
     else if (key) pos = key->end;
 
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     if (!match(&pos, ":")) return NULL;
 
     ast_t *iter = optional(ctx, &pos, parse_extended_expr);
@@ -73,7 +75,7 @@ ast_t *parse_reduction(parse_ctx_t *ctx, const char *pos) {
         suffixed = parse_comprehension_suffix(ctx, iter);
     }
 
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this reduction");
 
     return NewAST(ctx->file, start, pos, Reduction, .iter = iter, .op = op, .key = key);
@@ -164,14 +166,14 @@ ast_t *parse_deserialize(parse_ctx_t *ctx, const char *pos) {
 
     spaces(&pos);
     expect_str(ctx, start, &pos, "(", "I expected arguments for this `deserialize` call");
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     ast_t *value = expect(ctx, start, &pos, parse_extended_expr, "I expected an expression here");
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     expect_str(ctx, start, &pos, "->",
                "I expected a `-> Type` for this `deserialize` call so I know what it deserializes to");
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     type_ast_t *type = expect(ctx, start, &pos, parse_type, "I couldn't parse the type for this deserialization");
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     expect_closing(ctx, &pos, ")", "I expected a closing ')' for this `deserialize` call");
     return NewAST(ctx->file, start, pos, Deserialize, .value = value, .type = type);
 }
@@ -238,10 +240,10 @@ ast_t *parse_expr_str(const char *str) {
     };
 
     const char *pos = file->text;
-    whitespace(&pos);
+    whitespace(&ctx, &pos);
     ast_t *ast = parse_extended_expr(&ctx, pos);
     pos = ast->end;
-    whitespace(&pos);
+    whitespace(&ctx, &pos);
     if (pos < file->text + file->len && *pos != '\0')
         parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the string");
     return ast;
diff --git a/src/parse/files.c b/src/parse/files.c
index 8078d544..5ff41c68 100644
--- a/src/parse/files.c
+++ b/src/parse/files.c
@@ -11,9 +11,9 @@
 #include "../stdlib/util.h"
 #include "context.h"
 #include "errors.h"
+#include "expressions.h"
 #include "files.h"
 #include "functions.h"
-#include "expressions.h"
 #include "statements.h"
 #include "text.h"
 #include "typedefs.h"
@@ -33,11 +33,11 @@ static ast_t *parse_top_declaration(parse_ctx_t *ctx, const char *pos) {
 
 ast_t *parse_file_body(parse_ctx_t *ctx, const char *pos) {
     const char *start = pos;
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     ast_list_t *statements = NULL;
     for (;;) {
         const char *next = pos;
-        whitespace(&next);
+        whitespace(ctx, &next);
         if (get_indent(ctx, next) != 0) break;
         ast_t *stmt;
         if ((stmt = optional(ctx, &pos, parse_struct_def)) || (stmt = optional(ctx, &pos, parse_func_def))
@@ -47,12 +47,12 @@ ast_t *parse_file_body(parse_ctx_t *ctx, const char *pos) {
             || (stmt = optional(ctx, &pos, parse_inline_c)) || (stmt = optional(ctx, &pos, parse_top_declaration))) {
             statements = new (ast_list_t, .ast = stmt, .next = statements);
             pos = stmt->end;
-            whitespace(&pos); // TODO: check for newline
+            whitespace(ctx, &pos); // TODO: check for newline
         } else {
             break;
         }
     }
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     if (pos < ctx->file->text + ctx->file->len && *pos != '\0') {
         parser_err(ctx, pos, eol(pos), "I expect all top-level statements to be declarations of some kind");
     }
@@ -90,10 +90,10 @@ ast_t *parse_file(const char *path, jmp_buf *on_err) {
     if (match(&pos, "#!")) // shebang
         some_not(&pos, "\r\n");
 
-    whitespace(&pos);
+    whitespace(&ctx, &pos);
     ast = parse_file_body(&ctx, pos);
     pos = ast->end;
-    whitespace(&pos);
+    whitespace(&ctx, &pos);
     if (pos < file->text + file->len && *pos != '\0') {
         parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the file");
     }
@@ -171,10 +171,10 @@ ast_t *parse_file_str(const char *str) {
     };
 
     const char *pos = file->text;
-    whitespace(&pos);
+    whitespace(&ctx, &pos);
     ast_t *ast = parse_file_body(&ctx, pos);
     pos = ast->end;
-    whitespace(&pos);
+    whitespace(&ctx, &pos);
     if (pos < file->text + file->len && *pos != '\0')
         parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the string");
     return ast;
diff --git a/src/parse/functions.c b/src/parse/functions.c
index 37505ac5..ceb0a8bc 100644
--- a/src/parse/functions.c
+++ b/src/parse/functions.c
@@ -26,37 +26,42 @@ arg_ast_t *parse_args(parse_ctx_t *ctx, const char **pos) {
         type_ast_t *type = NULL;
 
         typedef struct name_list_s {
+            const char *start, *end;
             const char *name, *alias;
             struct name_list_s *next;
         } name_list_t;
 
         name_list_t *names = NULL;
         for (;;) {
-            whitespace(pos);
+            whitespace(ctx, pos);
             const char *name = get_id(pos);
             if (!name) break;
-            whitespace(pos);
+            const char *name_start = *pos;
+            whitespace(ctx, pos);
 
             const char *alias = NULL;
             if (match(pos, "|")) {
-                whitespace(pos);
+                whitespace(ctx, pos);
                 alias = get_id(pos);
                 if (!alias) parser_err(ctx, *pos, *pos, "I expected an argument alias after `|`");
             }
 
             if (match(pos, ":")) {
                 type = expect(ctx, *pos - 1, pos, parse_type, "I expected a type here");
-                names = new (name_list_t, .name = name, .alias = alias, .next = names);
-                whitespace(pos);
+                whitespace(ctx, pos);
                 if (match(pos, "="))
                     default_val = expect(ctx, *pos - 1, pos, parse_term, "I expected a value after this '='");
+                names =
+                    new (name_list_t, .start = name_start, .end = *pos, .name = name, .alias = alias, .next = names);
                 break;
             } else if (strncmp(*pos, "==", 2) != 0 && match(pos, "=")) {
                 default_val = expect(ctx, *pos - 1, pos, parse_term, "I expected a value after this '='");
-                names = new (name_list_t, .name = name, .alias = alias, .next = names);
+                names =
+                    new (name_list_t, .start = name_start, .end = *pos, .name = name, .alias = alias, .next = names);
                 break;
             } else if (name) {
-                names = new (name_list_t, .name = name, .alias = alias, .next = names);
+                names =
+                    new (name_list_t, .start = name_start, .end = *pos, .name = name, .alias = alias, .next = names);
                 spaces(pos);
                 if (!match(pos, ",")) break;
             } else {
@@ -71,10 +76,10 @@ arg_ast_t *parse_args(parse_ctx_t *ctx, const char **pos) {
 
         REVERSE_LIST(names);
         for (; names; names = names->next)
-            args = new (arg_ast_t, .name = names->name, .alias = names->alias, .type = type, .value = default_val,
-                        .next = args);
+            args = new (arg_ast_t, .start = names->start, .end = names->end, .name = names->name, .alias = names->alias,
+                        .type = type, .value = default_val, .next = args);
 
-        if (!match_separator(pos)) break;
+        if (!match_separator(ctx, pos)) break;
     }
 
     REVERSE_LIST(args);
@@ -95,19 +100,19 @@ ast_t *parse_func_def(parse_ctx_t *ctx, const char *pos) {
     arg_ast_t *args = parse_args(ctx, &pos);
     spaces(&pos);
     type_ast_t *ret_type = match(&pos, "->") ? optional(ctx, &pos, parse_type) : NULL;
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     bool is_inline = false;
     ast_t *cache_ast = NULL;
-    for (bool specials = match(&pos, ";"); specials; specials = match_separator(&pos)) {
+    for (bool specials = match(&pos, ";"); specials; specials = match_separator(ctx, &pos)) {
         const char *flag_start = pos;
         if (match_word(&pos, "inline")) {
             is_inline = true;
         } else if (match_word(&pos, "cached")) {
             if (!cache_ast) cache_ast = NewAST(ctx->file, pos, pos, Int, .str = "-1");
         } else if (match_word(&pos, "cache_size")) {
-            whitespace(&pos);
+            whitespace(ctx, &pos);
             if (!match(&pos, "=")) parser_err(ctx, flag_start, pos, "I expected a value for 'cache_size'");
-            whitespace(&pos);
+            whitespace(ctx, &pos);
             cache_ast = expect(ctx, start, &pos, parse_expr, "I expected a maximum size for the cache");
         }
     }
@@ -129,19 +134,19 @@ ast_t *parse_convert_def(parse_ctx_t *ctx, const char *pos) {
     arg_ast_t *args = parse_args(ctx, &pos);
     spaces(&pos);
     type_ast_t *ret_type = match(&pos, "->") ? optional(ctx, &pos, parse_type) : NULL;
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     bool is_inline = false;
     ast_t *cache_ast = NULL;
-    for (bool specials = match(&pos, ";"); specials; specials = match_separator(&pos)) {
+    for (bool specials = match(&pos, ";"); specials; specials = match_separator(ctx, &pos)) {
         const char *flag_start = pos;
         if (match_word(&pos, "inline")) {
             is_inline = true;
         } else if (match_word(&pos, "cached")) {
             if (!cache_ast) cache_ast = NewAST(ctx->file, pos, pos, Int, .str = "-1");
         } else if (match_word(&pos, "cache_size")) {
-            whitespace(&pos);
+            whitespace(ctx, &pos);
             if (!match(&pos, "=")) parser_err(ctx, flag_start, pos, "I expected a value for 'cache_size'");
-            whitespace(&pos);
+            whitespace(ctx, &pos);
             cache_ast = expect(ctx, start, &pos, parse_expr, "I expected a maximum size for the cache");
         }
     }
diff --git a/src/parse/statements.c b/src/parse/statements.c
index a30231f0..9606acdc 100644
--- a/src/parse/statements.c
+++ b/src/parse/statements.c
@@ -8,8 +8,8 @@
 #include "../stdlib/util.h"
 #include "context.h"
 #include "errors.h"
-#include "files.h"
 #include "expressions.h"
+#include "files.h"
 #include "statements.h"
 #include "suffixes.h"
 #include "types.h"
@@ -46,7 +46,7 @@ ast_t *parse_assignment(parse_ctx_t *ctx, const char *pos) {
         targets = new (ast_list_t, .ast = lhs, .next = targets);
         spaces(&pos);
         if (!match(&pos, ",")) break;
-        whitespace(&pos);
+        whitespace(ctx, &pos);
     }
 
     if (!targets) return NULL;
@@ -62,7 +62,7 @@ ast_t *parse_assignment(parse_ctx_t *ctx, const char *pos) {
         values = new (ast_list_t, .ast = rhs, .next = values);
         spaces(&pos);
         if (!match(&pos, ",")) break;
-        whitespace(&pos);
+        whitespace(ctx, &pos);
     }
 
     REVERSE_LIST(targets);
@@ -101,7 +101,7 @@ ast_t *parse_doctest(parse_ctx_t *ctx, const char *pos) {
     if (!match(&pos, ">>")) return NULL;
     spaces(&pos);
     ast_t *expr = expect(ctx, start, &pos, parse_statement, "I couldn't parse the expression for this doctest");
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     ast_t *expected = NULL;
     if (match(&pos, "=")) {
         spaces(&pos);
@@ -120,7 +120,7 @@ ast_t *parse_assert(parse_ctx_t *ctx, const char *pos) {
     spaces(&pos);
     ast_t *message = NULL;
     if (match(&pos, ",")) {
-        whitespace(&pos);
+        whitespace(ctx, &pos);
         message = expect(ctx, start, &pos, parse_extended_expr, "I couldn't parse the error message for this assert");
     } else {
         pos = expr->end;
diff --git a/src/parse/suffixes.c b/src/parse/suffixes.c
index 7e748caf..cb54b2f6 100644
--- a/src/parse/suffixes.c
+++ b/src/parse/suffixes.c
@@ -14,10 +14,10 @@
 ast_t *parse_field_suffix(parse_ctx_t *ctx, ast_t *lhs) {
     if (!lhs) return NULL;
     const char *pos = lhs->end;
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     if (!match(&pos, ".")) return NULL;
     if (*pos == '.') return NULL;
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     bool dollar = match(&pos, "$");
     const char *field = get_id(&pos);
     if (!field) return NULL;
@@ -44,9 +44,9 @@ ast_t *parse_index_suffix(parse_ctx_t *ctx, ast_t *lhs) {
     const char *start = lhs->start;
     const char *pos = lhs->end;
     if (!match(&pos, "[")) return NULL;
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     ast_t *index = optional(ctx, &pos, parse_extended_expr);
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     bool unchecked = match(&pos, ";") && (spaces(&pos), match_word(&pos, "unchecked") != 0);
     expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this index");
     return NewAST(ctx->file, start, pos, Index, .indexed = lhs, .index = index, .unchecked = unchecked);
@@ -57,7 +57,7 @@ ast_t *parse_comprehension_suffix(parse_ctx_t *ctx, ast_t *expr) {
     if (!expr) return NULL;
     const char *start = expr->start;
     const char *pos = expr->end;
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     if (!match_word(&pos, "for")) return NULL;
 
     ast_list_t *vars = NULL;
@@ -73,7 +73,7 @@ ast_t *parse_comprehension_suffix(parse_ctx_t *ctx, ast_t *expr) {
     expect_str(ctx, start, &pos, "in", "I expected an 'in' for this 'for'");
     ast_t *iter = expect(ctx, start, &pos, parse_expr, "I expected an iterable value for this 'for'");
     const char *next_pos = pos;
-    whitespace(&next_pos);
+    whitespace(ctx, &next_pos);
     ast_t *filter = NULL;
     if (match_word(&next_pos, "if")) {
         pos = next_pos;
@@ -115,13 +115,13 @@ ast_t *parse_method_call_suffix(parse_ctx_t *ctx, ast_t *self) {
     if (!fn) return NULL;
     spaces(&pos);
     if (!match(&pos, "(")) return NULL;
-    whitespace(&pos);
+    whitespace(ctx, &pos);
 
     arg_ast_t *args = NULL;
     for (;;) {
         const char *arg_start = pos;
         const char *name = get_id(&pos);
-        whitespace(&pos);
+        whitespace(ctx, &pos);
         if (!name || !match(&pos, "=")) {
             name = NULL;
             pos = arg_start;
@@ -132,12 +132,12 @@ ast_t *parse_method_call_suffix(parse_ctx_t *ctx, ast_t *self) {
             if (name) parser_err(ctx, arg_start, pos, "I expected an argument here");
             break;
         }
-        args = new (arg_ast_t, .name = name, .value = arg, .next = args);
-        if (!match_separator(&pos)) break;
+        args = new (arg_ast_t, .start = arg_start, .end = arg->end, .name = name, .value = arg, .next = args);
+        if (!match_separator(ctx, &pos)) break;
     }
     REVERSE_LIST(args);
 
-    whitespace(&pos);
+    whitespace(ctx, &pos);
 
     if (!match(&pos, ")")) parser_err(ctx, start, pos, "This parenthesis is unclosed");
 
@@ -152,13 +152,13 @@ ast_t *parse_fncall_suffix(parse_ctx_t *ctx, ast_t *fn) {
 
     if (!match(&pos, "(")) return NULL;
 
-    whitespace(&pos);
+    whitespace(ctx, &pos);
 
     arg_ast_t *args = NULL;
     for (;;) {
         const char *arg_start = pos;
         const char *name = get_id(&pos);
-        whitespace(&pos);
+        whitespace(ctx, &pos);
         if (!name || !match(&pos, "=")) {
             name = NULL;
             pos = arg_start;
@@ -170,10 +170,10 @@ ast_t *parse_fncall_suffix(parse_ctx_t *ctx, ast_t *fn) {
             break;
         }
         args = new (arg_ast_t, .name = name, .value = arg, .next = args);
-        if (!match_separator(&pos)) break;
+        if (!match_separator(ctx, &pos)) break;
     }
 
-    whitespace(&pos);
+    whitespace(ctx, &pos);
 
     if (!match(&pos, ")")) parser_err(ctx, start, pos, "This parenthesis is unclosed");
 
diff --git a/src/parse/text.c b/src/parse/text.c
index 8897fd34..30ff8656 100644
--- a/src/parse/text.c
+++ b/src/parse/text.c
@@ -148,21 +148,19 @@ ast_t *parse_inline_c(parse_ctx_t *ctx, const char *pos) {
 
     spaces(&pos);
     type_ast_t *type = NULL;
-    ast_list_t *chunks;
     if (match(&pos, ":")) {
         type = expect(ctx, start, &pos, parse_type, "I couldn't parse the type for this C_code code");
         spaces(&pos);
-        chunks = _parse_text_helper(ctx, &pos);
-        if (type) {
-            chunks = new (ast_list_t, .ast = NewAST(ctx->file, pos, pos, TextLiteral, Text("({")), .next = chunks);
-            REVERSE_LIST(chunks);
-            chunks = new (ast_list_t, .ast = NewAST(ctx->file, pos, pos, TextLiteral, Text("; })")), .next = chunks);
-            REVERSE_LIST(chunks);
-        }
-    } else {
-        chunks = _parse_text_helper(ctx, &pos);
     }
 
+    static const char *quote_chars = "\"'`|/;([{<";
+    if (!strchr(quote_chars, *pos))
+        parser_err(ctx, pos, pos + 1,
+                   "This is not a valid string quotation character. Valid characters are: \"'`|/;([{<");
+
+    char quote = *(pos++);
+    char unquote = closing[(int)quote] ? closing[(int)quote] : quote;
+    ast_list_t *chunks = _parse_text_helper(ctx, &pos, quote, unquote, '@', false);
     return NewAST(ctx->file, start, pos, InlineCCode, .chunks = chunks, .type_ast = type);
 }
 
diff --git a/src/parse/typedefs.c b/src/parse/typedefs.c
index 73fe9d7c..6e5e40d0 100644
--- a/src/parse/typedefs.c
+++ b/src/parse/typedefs.c
@@ -16,12 +16,12 @@
 
 ast_t *parse_namespace(parse_ctx_t *ctx, const char *pos) {
     const char *start = pos;
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     int64_t indent = get_indent(ctx, pos);
     ast_list_t *statements = NULL;
     for (;;) {
         const char *next = pos;
-        whitespace(&next);
+        whitespace(ctx, &next);
         if (get_indent(ctx, next) != indent) break;
         ast_t *stmt;
         if ((stmt = optional(ctx, &pos, parse_struct_def)) || (stmt = optional(ctx, &pos, parse_func_def))
@@ -31,7 +31,7 @@ ast_t *parse_namespace(parse_ctx_t *ctx, const char *pos) {
             || (stmt = optional(ctx, &pos, parse_inline_c)) || (stmt = optional(ctx, &pos, parse_declaration))) {
             statements = new (ast_list_t, .ast = stmt, .next = statements);
             pos = stmt->end;
-            whitespace(&pos); // TODO: check for newline
+            whitespace(ctx, &pos); // TODO: check for newline
             // if (!(space_types & WHITESPACE_NEWLINES)) {
             //     pos = stmt->end;
             //     break;
@@ -62,10 +62,10 @@ ast_t *parse_struct_def(parse_ctx_t *ctx, const char *pos) {
 
     arg_ast_t *fields = parse_args(ctx, &pos);
 
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     bool secret = false, external = false, opaque = false;
     if (match(&pos, ";")) { // Extra flags
-        whitespace(&pos);
+        whitespace(ctx, &pos);
         for (;;) {
             if (match_word(&pos, "secret")) {
                 secret = true;
@@ -79,7 +79,7 @@ ast_t *parse_struct_def(parse_ctx_t *ctx, const char *pos) {
                 break;
             }
 
-            if (!match_separator(&pos)) break;
+            if (!match_separator(ctx, &pos)) break;
         }
     }
 
@@ -87,7 +87,7 @@ ast_t *parse_struct_def(parse_ctx_t *ctx, const char *pos) {
 
     ast_t *namespace = NULL;
     const char *ns_pos = pos;
-    whitespace(&ns_pos);
+    whitespace(ctx, &ns_pos);
     int64_t ns_indent = get_indent(ctx, ns_pos);
     if (ns_indent > starting_indent) {
         pos = ns_pos;
@@ -110,9 +110,10 @@ ast_t *parse_enum_def(parse_ctx_t *ctx, const char *pos) {
     if (!match(&pos, "(")) return NULL;
 
     tag_ast_t *tags = NULL;
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     for (;;) {
         spaces(&pos);
+        const char *tag_start = pos;
         const char *tag_name = get_id(&pos);
         if (!tag_name) break;
 
@@ -120,25 +121,26 @@ ast_t *parse_enum_def(parse_ctx_t *ctx, const char *pos) {
         arg_ast_t *fields;
         bool secret = false;
         if (match(&pos, "(")) {
-            whitespace(&pos);
+            whitespace(ctx, &pos);
             fields = parse_args(ctx, &pos);
-            whitespace(&pos);
+            whitespace(ctx, &pos);
             if (match(&pos, ";")) { // Extra flags
-                whitespace(&pos);
+                whitespace(ctx, &pos);
                 secret = match_word(&pos, "secret");
-                whitespace(&pos);
+                whitespace(ctx, &pos);
             }
             expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this tagged union member");
         } else {
             fields = NULL;
         }
 
-        tags = new (tag_ast_t, .name = tag_name, .fields = fields, .secret = secret, .next = tags);
+        tags = new (tag_ast_t, .start = tag_start, .end = pos, .name = tag_name, .fields = fields, .secret = secret,
+                    .next = tags);
 
-        if (!match_separator(&pos)) break;
+        if (!match_separator(ctx, &pos)) break;
     }
 
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this enum definition");
 
     REVERSE_LIST(tags);
@@ -147,7 +149,7 @@ ast_t *parse_enum_def(parse_ctx_t *ctx, const char *pos) {
 
     ast_t *namespace = NULL;
     const char *ns_pos = pos;
-    whitespace(&ns_pos);
+    whitespace(ctx, &ns_pos);
     int64_t ns_indent = get_indent(ctx, ns_pos);
     if (ns_indent > starting_indent) {
         pos = ns_pos;
@@ -170,7 +172,7 @@ ast_t *parse_lang_def(parse_ctx_t *ctx, const char *pos) {
 
     ast_t *namespace = NULL;
     const char *ns_pos = pos;
-    whitespace(&ns_pos);
+    whitespace(ctx, &ns_pos);
     int64_t ns_indent = get_indent(ctx, ns_pos);
     if (ns_indent > starting_indent) {
         pos = ns_pos;
@@ -192,7 +194,7 @@ ast_t *parse_extend(parse_ctx_t *ctx, const char *pos) {
 
     ast_t *body = NULL;
     const char *ns_pos = pos;
-    whitespace(&ns_pos);
+    whitespace(ctx, &ns_pos);
     int64_t ns_indent = get_indent(ctx, ns_pos);
     if (ns_indent > starting_indent) {
         pos = ns_pos;
diff --git a/src/parse/types.c b/src/parse/types.c
index 54bc0c03..ffb7d869 100644
--- a/src/parse/types.c
+++ b/src/parse/types.c
@@ -9,19 +9,19 @@
 #include "../stdlib/print.h"
 #include "context.h"
 #include "errors.h"
-#include "functions.h"
 #include "expressions.h"
+#include "functions.h"
 #include "types.h"
 #include "utils.h"
 
 type_ast_t *parse_table_type(parse_ctx_t *ctx, const char *pos) {
     const char *start = pos;
     if (!match(&pos, "{")) return NULL;
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     type_ast_t *key_type = parse_type(ctx, pos);
     if (!key_type) return NULL;
     pos = key_type->end;
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     type_ast_t *value_type = NULL;
     if (match(&pos, "=")) {
         value_type = expect(ctx, start, &pos, parse_type, "I couldn't parse the rest of this table type");
@@ -35,7 +35,7 @@ type_ast_t *parse_table_type(parse_ctx_t *ctx, const char *pos) {
         default_value =
             expect(ctx, start, &pos, parse_extended_expr, "I couldn't parse the default value for this table");
     }
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     expect_closing(ctx, &pos, "}", "I wasn't able to parse the rest of this table type");
     return NewTypeAST(ctx->file, start, pos, TableTypeAST, .key = key_type, .value = value_type,
                       .default_value = default_value);
@@ -44,11 +44,11 @@ type_ast_t *parse_table_type(parse_ctx_t *ctx, const char *pos) {
 type_ast_t *parse_set_type(parse_ctx_t *ctx, const char *pos) {
     const char *start = pos;
     if (!match(&pos, "|")) return NULL;
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     type_ast_t *item_type = parse_type(ctx, pos);
     if (!item_type) return NULL;
     pos = item_type->end;
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     expect_closing(ctx, &pos, "|", "I wasn't able to parse the rest of this set type");
     return NewTypeAST(ctx->file, start, pos, SetTypeAST, .item = item_type);
 }
@@ -113,10 +113,10 @@ type_ast_t *parse_non_optional_type(parse_ctx_t *ctx, const char *pos) {
                     || (type = parse_table_type(ctx, pos)) || (type = parse_set_type(ctx, pos))
                     || (type = parse_type_name(ctx, pos)) || (type = parse_func_type(ctx, pos)));
     if (!success && match(&pos, "(")) {
-        whitespace(&pos);
+        whitespace(ctx, &pos);
         type = optional(ctx, &pos, parse_type);
         if (!type) return NULL;
-        whitespace(&pos);
+        whitespace(ctx, &pos);
         expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this type");
         type->start = start;
         type->end = pos;
@@ -144,11 +144,11 @@ type_ast_t *parse_type_str(const char *str) {
     };
 
     const char *pos = file->text;
-    whitespace(&pos);
+    whitespace(&ctx, &pos);
     type_ast_t *ast = parse_type(&ctx, pos);
     if (!ast) return ast;
     pos = ast->end;
-    whitespace(&pos);
+    whitespace(&ctx, &pos);
     if (strlen(pos) > 0) {
         parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the type");
     }
diff --git a/src/parse/utils.c b/src/parse/utils.c
index 7e827ac6..0644bfa0 100644
--- a/src/parse/utils.c
+++ b/src/parse/utils.c
@@ -6,6 +6,7 @@
 #include <unictype.h>
 #include <uniname.h>
 
+#include "../stdlib/tables.h"
 #include "../stdlib/util.h"
 #include "errors.h"
 #include "utils.h"
@@ -43,8 +44,8 @@ size_t some_not(const char **pos, const char *forbid) {
 
 size_t spaces(const char **pos) { return some_of(pos, " \t"); }
 
-void whitespace(const char **pos) {
-    while (some_of(pos, " \t\r\n") || comment(pos))
+void whitespace(parse_ctx_t *ctx, const char **pos) {
+    while (some_of(pos, " \t\r\n") || comment(ctx, pos))
         continue;
 }
 
@@ -95,9 +96,12 @@ const char *get_id(const char **inout) {
 
 PUREFUNC const char *eol(const char *str) { return str + strcspn(str, "\r\n"); }
 
-bool comment(const char **pos) {
+bool comment(parse_ctx_t *ctx, const char **pos) {
     if ((*pos)[0] == '#') {
+        const char *start = *pos;
         *pos += strcspn(*pos, "\r\n");
+        const char *end = *pos;
+        Table$set(&ctx->comments, &start, &end, parse_comments_info);
         return true;
     } else {
         return false;
@@ -129,7 +133,7 @@ PUREFUNC int64_t get_indent(parse_ctx_t *ctx, const char *pos) {
 bool indent(parse_ctx_t *ctx, const char **out) {
     const char *pos = *out;
     int64_t starting_indent = get_indent(ctx, pos);
-    whitespace(&pos);
+    whitespace(ctx, &pos);
     const char *next_line = get_line(ctx->file, get_line_number(ctx->file, pos));
     if (next_line <= *out) return false;
 
@@ -239,12 +243,12 @@ const char *unescape(parse_ctx_t *ctx, const char **out) {
 #pragma GCC diagnostic pop
 #endif
 
-bool match_separator(const char **pos) { // Either comma or newline
+bool match_separator(parse_ctx_t *ctx, const char **pos) { // Either comma or newline
     const char *p = *pos;
     int separators = 0;
     for (;;) {
         if (some_of(&p, "\r\n,")) ++separators;
-        else if (!comment(&p) && !some_of(&p, " \t")) break;
+        else if (!comment(ctx, &p) && !some_of(&p, " \t")) break;
     }
     if (separators > 0) {
         *pos = p;
diff --git a/src/parse/utils.h b/src/parse/utils.h
index ba54120a..b8fb0756 100644
--- a/src/parse/utils.h
+++ b/src/parse/utils.h
@@ -12,16 +12,16 @@ CONSTFUNC bool is_keyword(const char *word);
 size_t some_of(const char **pos, const char *allow);
 size_t some_not(const char **pos, const char *forbid);
 size_t spaces(const char **pos);
-void whitespace(const char **pos);
+void whitespace(parse_ctx_t *ctx, const char **pos);
 size_t match(const char **pos, const char *target);
 size_t match_word(const char **pos, const char *word);
 const char *get_word(const char **pos);
 const char *get_id(const char **pos);
-bool comment(const char **pos);
+bool comment(parse_ctx_t *ctx, const char **pos);
 bool indent(parse_ctx_t *ctx, const char **pos);
 const char *eol(const char *str);
 PUREFUNC int64_t get_indent(parse_ctx_t *ctx, const char *pos);
 const char *unescape(parse_ctx_t *ctx, const char **out);
 bool is_xid_continue_next(const char *pos);
 bool newline_with_indentation(const char **out, int64_t target);
-bool match_separator(const char **pos);
+bool match_separator(parse_ctx_t *ctx, const char **pos);
author	Bruce Hill <bruce@bruce-hill.com>	2025-09-06 14:47:45 -0400
committer	Bruce Hill <bruce@bruce-hill.com>	2025-09-06 14:47:45 -0400
commit	a8316252db95e3d77f9f0e9beb89cfcb4573d5b1 (patch)
tree	e5905bce9611e35ccb2f84481232fca0e657ff42 /src/parse
parent	a0ac652cd1eebdc42425b34f1685f8cb20cb4eea (diff)
parent	73246764f88f6f652316ee0c138a990d836698a7 (diff)