aboutsummaryrefslogtreecommitdiff
path: root/src/parse
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2025-09-06 14:47:45 -0400
committerBruce Hill <bruce@bruce-hill.com>2025-09-06 14:47:45 -0400
commita8316252db95e3d77f9f0e9beb89cfcb4573d5b1 (patch)
treee5905bce9611e35ccb2f84481232fca0e657ff42 /src/parse
parenta0ac652cd1eebdc42425b34f1685f8cb20cb4eea (diff)
parent73246764f88f6f652316ee0c138a990d836698a7 (diff)
Merge branch 'main' into simplified-quotes
Diffstat (limited to 'src/parse')
-rw-r--r--src/parse/binops.c31
-rw-r--r--src/parse/containers.c32
-rw-r--r--src/parse/context.c8
-rw-r--r--src/parse/context.h5
-rw-r--r--src/parse/controlflow.c24
-rw-r--r--src/parse/expressions.c28
-rw-r--r--src/parse/files.c18
-rw-r--r--src/parse/functions.c41
-rw-r--r--src/parse/statements.c10
-rw-r--r--src/parse/suffixes.c30
-rw-r--r--src/parse/text.c18
-rw-r--r--src/parse/typedefs.c38
-rw-r--r--src/parse/types.c20
-rw-r--r--src/parse/utils.c16
-rw-r--r--src/parse/utils.h6
15 files changed, 156 insertions, 169 deletions
diff --git a/src/parse/binops.c b/src/parse/binops.c
index 7ccf1379..4676b249 100644
--- a/src/parse/binops.c
+++ b/src/parse/binops.c
@@ -9,33 +9,6 @@
#include "suffixes.h"
#include "utils.h"
-int op_tightness[] = {
- [Power] = 9,
- [Multiply] = 8,
- [Divide] = 8,
- [Mod] = 8,
- [Mod1] = 8,
- [Plus] = 7,
- [Minus] = 7,
- [Concat] = 6,
- [LeftShift] = 5,
- [RightShift] = 5,
- [UnsignedLeftShift] = 5,
- [UnsignedRightShift] = 5,
- [Min] = 4,
- [Max] = 4,
- [Equals] = 3,
- [NotEquals] = 3,
- [LessThan] = 2,
- [LessThanOrEquals] = 2,
- [GreaterThan] = 2,
- [GreaterThanOrEquals] = 2,
- [Compare] = 2,
- [And] = 1,
- [Or] = 1,
- [Xor] = 1,
-};
-
ast_e match_binary_operator(const char **pos) {
switch (**pos) {
case '+': {
@@ -94,7 +67,7 @@ ast_t *parse_infix_expr(parse_ctx_t *ctx, const char *pos, int min_tightness) {
for (ast_e op; (op = match_binary_operator(&pos)) != Unknown && op_tightness[op] >= min_tightness; spaces(&pos)) {
ast_t *key = NULL;
if (op == Min || op == Max) {
- key = NewAST(ctx->file, pos, pos, Var, .name = "$");
+ key = NewAST(ctx->file, pos, pos, Var, .name = (op == Min ? "_min_" : "_max_"));
for (bool progress = true; progress;) {
ast_t *new_term;
progress =
@@ -108,7 +81,7 @@ ast_t *parse_infix_expr(parse_ctx_t *ctx, const char *pos, int min_tightness) {
else if (key) pos = key->end;
}
- whitespace(&pos);
+ whitespace(ctx, &pos);
if (get_line_number(ctx->file, pos) != starting_line && get_indent(ctx, pos) < starting_indent)
parser_err(ctx, pos, eol(pos), "I expected this line to be at least as indented than the line above it");
diff --git a/src/parse/containers.c b/src/parse/containers.c
index 821cbdd4..73d30ecd 100644
--- a/src/parse/containers.c
+++ b/src/parse/containers.c
@@ -16,7 +16,7 @@ ast_t *parse_list(parse_ctx_t *ctx, const char *pos) {
const char *start = pos;
if (!match(&pos, "[")) return NULL;
- whitespace(&pos);
+ whitespace(ctx, &pos);
ast_list_t *items = NULL;
for (;;) {
@@ -29,9 +29,9 @@ ast_t *parse_list(parse_ctx_t *ctx, const char *pos) {
suffixed = parse_comprehension_suffix(ctx, item);
}
items = new (ast_list_t, .ast = item, .next = items);
- if (!match_separator(&pos)) break;
+ if (!match_separator(ctx, &pos)) break;
}
- whitespace(&pos);
+ whitespace(ctx, &pos);
expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this list");
REVERSE_LIST(items);
@@ -42,14 +42,14 @@ ast_t *parse_table(parse_ctx_t *ctx, const char *pos) {
const char *start = pos;
if (!match(&pos, "{")) return NULL;
- whitespace(&pos);
+ whitespace(ctx, &pos);
ast_list_t *entries = NULL;
for (;;) {
const char *entry_start = pos;
ast_t *key = optional(ctx, &pos, parse_extended_expr);
if (!key) break;
- whitespace(&pos);
+ whitespace(ctx, &pos);
if (!match(&pos, "=")) return NULL;
ast_t *value = expect(ctx, pos - 1, &pos, parse_expr, "I couldn't parse the value for this table entry");
ast_t *entry = NewAST(ctx->file, entry_start, pos, TableEntry, .key = key, .value = value);
@@ -60,37 +60,37 @@ ast_t *parse_table(parse_ctx_t *ctx, const char *pos) {
suffixed = parse_comprehension_suffix(ctx, entry);
}
entries = new (ast_list_t, .ast = entry, .next = entries);
- if (!match_separator(&pos)) break;
+ if (!match_separator(ctx, &pos)) break;
}
REVERSE_LIST(entries);
- whitespace(&pos);
+ whitespace(ctx, &pos);
ast_t *fallback = NULL, *default_value = NULL;
if (match(&pos, ";")) {
for (;;) {
- whitespace(&pos);
+ whitespace(ctx, &pos);
const char *attr_start = pos;
if (match_word(&pos, "fallback")) {
- whitespace(&pos);
+ whitespace(ctx, &pos);
if (!match(&pos, "=")) parser_err(ctx, attr_start, pos, "I expected an '=' after 'fallback'");
if (fallback) parser_err(ctx, attr_start, pos, "This table already has a fallback");
fallback = expect(ctx, attr_start, &pos, parse_expr, "I expected a fallback table");
} else if (match_word(&pos, "default")) {
- whitespace(&pos);
+ whitespace(ctx, &pos);
if (!match(&pos, "=")) parser_err(ctx, attr_start, pos, "I expected an '=' after 'default'");
if (default_value) parser_err(ctx, attr_start, pos, "This table already has a default");
default_value = expect(ctx, attr_start, &pos, parse_expr, "I expected a default value");
} else {
break;
}
- whitespace(&pos);
+ whitespace(ctx, &pos);
if (!match(&pos, ",")) break;
}
}
- whitespace(&pos);
+ whitespace(ctx, &pos);
expect_closing(ctx, &pos, "}", "I wasn't able to parse the rest of this table");
return NewAST(ctx->file, start, pos, Table, .default_value = default_value, .entries = entries,
@@ -102,13 +102,13 @@ ast_t *parse_set(parse_ctx_t *ctx, const char *pos) {
if (match(&pos, "||")) return NewAST(ctx->file, start, pos, Set);
if (!match(&pos, "|")) return NULL;
- whitespace(&pos);
+ whitespace(ctx, &pos);
ast_list_t *items = NULL;
for (;;) {
ast_t *item = optional(ctx, &pos, parse_extended_expr);
if (!item) break;
- whitespace(&pos);
+ whitespace(ctx, &pos);
ast_t *suffixed = parse_comprehension_suffix(ctx, item);
while (suffixed) {
item = suffixed;
@@ -116,12 +116,12 @@ ast_t *parse_set(parse_ctx_t *ctx, const char *pos) {
suffixed = parse_comprehension_suffix(ctx, item);
}
items = new (ast_list_t, .ast = item, .next = items);
- if (!match_separator(&pos)) break;
+ if (!match_separator(ctx, &pos)) break;
}
REVERSE_LIST(items);
- whitespace(&pos);
+ whitespace(ctx, &pos);
expect_closing(ctx, &pos, "|", "I wasn't able to parse the rest of this set");
return NewAST(ctx->file, start, pos, Set, .items = items);
diff --git a/src/parse/context.c b/src/parse/context.c
new file mode 100644
index 00000000..cd8d16bc
--- /dev/null
+++ b/src/parse/context.c
@@ -0,0 +1,8 @@
+// A context parameter that gets passed around during parsing.
+
+#include "../stdlib/memory.h"
+#include "../stdlib/pointers.h"
+#include "../stdlib/tables.h"
+#include "../stdlib/types.h"
+
+const TypeInfo_t *parse_comments_info = Table$info(Pointer$info("@", &Memory$info), Pointer$info("@", &Memory$info));
diff --git a/src/parse/context.h b/src/parse/context.h
index 6008060e..f1e3be2f 100644
--- a/src/parse/context.h
+++ b/src/parse/context.h
@@ -4,10 +4,15 @@
#include <setjmp.h>
#include <stdint.h>
+#include "../stdlib/datatypes.h"
#include "../stdlib/files.h"
+#include "../stdlib/types.h"
+
+extern const TypeInfo_t *parse_comments_info;
typedef struct {
file_t *file;
jmp_buf *on_err;
int64_t next_lambda_id;
+ Table_t comments; // Map of <start pos> -> <end pos>
} parse_ctx_t;
diff --git a/src/parse/controlflow.c b/src/parse/controlflow.c
index 6f6292af..1087e20e 100644
--- a/src/parse/controlflow.c
+++ b/src/parse/controlflow.c
@@ -36,7 +36,7 @@ ast_t *parse_block(parse_ctx_t *ctx, const char *pos) {
if (indent(ctx, &pos)) {
indented:;
int64_t block_indent = get_indent(ctx, pos);
- whitespace(&pos);
+ whitespace(ctx, &pos);
while (*pos) {
ast_t *stmt = optional(ctx, &pos, parse_statement);
if (!stmt) {
@@ -55,7 +55,7 @@ ast_t *parse_block(parse_ctx_t *ctx, const char *pos) {
break;
}
statements = new (ast_list_t, .ast = stmt, .next = statements);
- whitespace(&pos);
+ whitespace(ctx, &pos);
// Guard against having two valid statements on the same line, separated by spaces (but no newlines):
if (!memchr(stmt->end, '\n', (size_t)(pos - stmt->end))) {
@@ -131,18 +131,8 @@ ast_t *parse_while(parse_ctx_t *ctx, const char *pos) {
// while condition ["do"] [<indent>] body
const char *start = pos;
if (!match_word(&pos, "while")) return NULL;
-
- const char *tmp = pos;
- // Shorthand form: `while when ...`
- if (match_word(&tmp, "when")) {
- ast_t *when = expect(ctx, start, &pos, parse_when, "I expected a 'when' block after this");
- if (!when->__data.When.else_body) when->__data.When.else_body = NewAST(ctx->file, pos, pos, Stop);
- return NewAST(ctx->file, start, pos, While, .body = when);
- }
-
- (void)match_word(&pos, "do"); // Optional 'do'
-
ast_t *condition = expect(ctx, start, &pos, parse_expr, "I don't see a viable condition for this 'while'");
+ (void)match_word(&pos, "do"); // Optional 'do'
ast_t *body = expect(ctx, start, &pos, parse_block, "I expected a body for this 'while'");
return NewAST(ctx->file, start, pos, While, .condition = condition, .body = body);
}
@@ -174,7 +164,7 @@ ast_t *parse_if(parse_ctx_t *ctx, const char *pos) {
ast_t *body = expect(ctx, start, &pos, parse_block, "I expected a body for this 'if' statement");
const char *tmp = pos;
- whitespace(&tmp);
+ whitespace(ctx, &tmp);
ast_t *else_body = NULL;
const char *else_start = pos;
if (get_indent(ctx, tmp) == starting_indent && match_word(&tmp, "else")) {
@@ -198,7 +188,7 @@ ast_t *parse_when(parse_ctx_t *ctx, const char *pos) {
when_clause_t *clauses = NULL;
const char *tmp = pos;
- whitespace(&tmp);
+ whitespace(ctx, &tmp);
while (get_indent(ctx, tmp) == starting_indent && match_word(&tmp, "is")) {
pos = tmp;
spaces(&pos);
@@ -217,7 +207,7 @@ ast_t *parse_when(parse_ctx_t *ctx, const char *pos) {
}
clauses = new_clauses;
tmp = pos;
- whitespace(&tmp);
+ whitespace(ctx, &tmp);
}
REVERSE_LIST(clauses);
@@ -255,7 +245,7 @@ ast_t *parse_for(parse_ctx_t *ctx, const char *pos) {
ast_t *body = expect(ctx, start, &pos, parse_block, "I expected a body for this 'for'");
const char *else_start = pos;
- whitespace(&else_start);
+ whitespace(ctx, &else_start);
ast_t *empty = NULL;
if (match_word(&else_start, "else") && get_indent(ctx, else_start) == starting_indent) {
pos = else_start;
diff --git a/src/parse/expressions.c b/src/parse/expressions.c
index d643d4e7..df0a10a7 100644
--- a/src/parse/expressions.c
+++ b/src/parse/expressions.c
@@ -10,10 +10,10 @@
#include "context.h"
#include "controlflow.h"
#include "errors.h"
+#include "expressions.h"
#include "files.h"
#include "functions.h"
#include "numbers.h"
-#include "expressions.h"
#include "suffixes.h"
#include "text.h"
#include "types.h"
@@ -23,7 +23,7 @@ ast_t *parse_parens(parse_ctx_t *ctx, const char *pos) {
const char *start = pos;
spaces(&pos);
if (!match(&pos, "(")) return NULL;
- whitespace(&pos);
+ whitespace(ctx, &pos);
ast_t *expr = optional(ctx, &pos, parse_extended_expr);
if (!expr) return NULL;
@@ -34,7 +34,7 @@ ast_t *parse_parens(parse_ctx_t *ctx, const char *pos) {
comprehension = parse_comprehension_suffix(ctx, expr);
}
- whitespace(&pos);
+ whitespace(ctx, &pos);
expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this expression");
// Update the span to include the parens:
@@ -45,11 +45,13 @@ ast_t *parse_reduction(parse_ctx_t *ctx, const char *pos) {
const char *start = pos;
if (!match(&pos, "(")) return NULL;
- whitespace(&pos);
+ whitespace(ctx, &pos);
ast_e op = match_binary_operator(&pos);
if (op == Unknown) return NULL;
- ast_t *key = NewAST(ctx->file, pos, pos, Var, .name = "$");
+ const char *op_str = binop_info[op].operator;
+ assert(op_str);
+ ast_t *key = NewAST(ctx->file, pos, pos, Var, .name = op_str);
for (bool progress = true; progress;) {
ast_t *new_term;
progress =
@@ -61,7 +63,7 @@ ast_t *parse_reduction(parse_ctx_t *ctx, const char *pos) {
if (key && key->tag == Var) key = NULL;
else if (key) pos = key->end;
- whitespace(&pos);
+ whitespace(ctx, &pos);
if (!match(&pos, ":")) return NULL;
ast_t *iter = optional(ctx, &pos, parse_extended_expr);
@@ -73,7 +75,7 @@ ast_t *parse_reduction(parse_ctx_t *ctx, const char *pos) {
suffixed = parse_comprehension_suffix(ctx, iter);
}
- whitespace(&pos);
+ whitespace(ctx, &pos);
expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this reduction");
return NewAST(ctx->file, start, pos, Reduction, .iter = iter, .op = op, .key = key);
@@ -164,14 +166,14 @@ ast_t *parse_deserialize(parse_ctx_t *ctx, const char *pos) {
spaces(&pos);
expect_str(ctx, start, &pos, "(", "I expected arguments for this `deserialize` call");
- whitespace(&pos);
+ whitespace(ctx, &pos);
ast_t *value = expect(ctx, start, &pos, parse_extended_expr, "I expected an expression here");
- whitespace(&pos);
+ whitespace(ctx, &pos);
expect_str(ctx, start, &pos, "->",
"I expected a `-> Type` for this `deserialize` call so I know what it deserializes to");
- whitespace(&pos);
+ whitespace(ctx, &pos);
type_ast_t *type = expect(ctx, start, &pos, parse_type, "I couldn't parse the type for this deserialization");
- whitespace(&pos);
+ whitespace(ctx, &pos);
expect_closing(ctx, &pos, ")", "I expected a closing ')' for this `deserialize` call");
return NewAST(ctx->file, start, pos, Deserialize, .value = value, .type = type);
}
@@ -238,10 +240,10 @@ ast_t *parse_expr_str(const char *str) {
};
const char *pos = file->text;
- whitespace(&pos);
+ whitespace(&ctx, &pos);
ast_t *ast = parse_extended_expr(&ctx, pos);
pos = ast->end;
- whitespace(&pos);
+ whitespace(&ctx, &pos);
if (pos < file->text + file->len && *pos != '\0')
parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the string");
return ast;
diff --git a/src/parse/files.c b/src/parse/files.c
index 8078d544..5ff41c68 100644
--- a/src/parse/files.c
+++ b/src/parse/files.c
@@ -11,9 +11,9 @@
#include "../stdlib/util.h"
#include "context.h"
#include "errors.h"
+#include "expressions.h"
#include "files.h"
#include "functions.h"
-#include "expressions.h"
#include "statements.h"
#include "text.h"
#include "typedefs.h"
@@ -33,11 +33,11 @@ static ast_t *parse_top_declaration(parse_ctx_t *ctx, const char *pos) {
ast_t *parse_file_body(parse_ctx_t *ctx, const char *pos) {
const char *start = pos;
- whitespace(&pos);
+ whitespace(ctx, &pos);
ast_list_t *statements = NULL;
for (;;) {
const char *next = pos;
- whitespace(&next);
+ whitespace(ctx, &next);
if (get_indent(ctx, next) != 0) break;
ast_t *stmt;
if ((stmt = optional(ctx, &pos, parse_struct_def)) || (stmt = optional(ctx, &pos, parse_func_def))
@@ -47,12 +47,12 @@ ast_t *parse_file_body(parse_ctx_t *ctx, const char *pos) {
|| (stmt = optional(ctx, &pos, parse_inline_c)) || (stmt = optional(ctx, &pos, parse_top_declaration))) {
statements = new (ast_list_t, .ast = stmt, .next = statements);
pos = stmt->end;
- whitespace(&pos); // TODO: check for newline
+ whitespace(ctx, &pos); // TODO: check for newline
} else {
break;
}
}
- whitespace(&pos);
+ whitespace(ctx, &pos);
if (pos < ctx->file->text + ctx->file->len && *pos != '\0') {
parser_err(ctx, pos, eol(pos), "I expect all top-level statements to be declarations of some kind");
}
@@ -90,10 +90,10 @@ ast_t *parse_file(const char *path, jmp_buf *on_err) {
if (match(&pos, "#!")) // shebang
some_not(&pos, "\r\n");
- whitespace(&pos);
+ whitespace(&ctx, &pos);
ast = parse_file_body(&ctx, pos);
pos = ast->end;
- whitespace(&pos);
+ whitespace(&ctx, &pos);
if (pos < file->text + file->len && *pos != '\0') {
parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the file");
}
@@ -171,10 +171,10 @@ ast_t *parse_file_str(const char *str) {
};
const char *pos = file->text;
- whitespace(&pos);
+ whitespace(&ctx, &pos);
ast_t *ast = parse_file_body(&ctx, pos);
pos = ast->end;
- whitespace(&pos);
+ whitespace(&ctx, &pos);
if (pos < file->text + file->len && *pos != '\0')
parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the string");
return ast;
diff --git a/src/parse/functions.c b/src/parse/functions.c
index 37505ac5..ceb0a8bc 100644
--- a/src/parse/functions.c
+++ b/src/parse/functions.c
@@ -26,37 +26,42 @@ arg_ast_t *parse_args(parse_ctx_t *ctx, const char **pos) {
type_ast_t *type = NULL;
typedef struct name_list_s {
+ const char *start, *end;
const char *name, *alias;
struct name_list_s *next;
} name_list_t;
name_list_t *names = NULL;
for (;;) {
- whitespace(pos);
+ whitespace(ctx, pos);
const char *name = get_id(pos);
if (!name) break;
- whitespace(pos);
+ const char *name_start = *pos;
+ whitespace(ctx, pos);
const char *alias = NULL;
if (match(pos, "|")) {
- whitespace(pos);
+ whitespace(ctx, pos);
alias = get_id(pos);
if (!alias) parser_err(ctx, *pos, *pos, "I expected an argument alias after `|`");
}
if (match(pos, ":")) {
type = expect(ctx, *pos - 1, pos, parse_type, "I expected a type here");
- names = new (name_list_t, .name = name, .alias = alias, .next = names);
- whitespace(pos);
+ whitespace(ctx, pos);
if (match(pos, "="))
default_val = expect(ctx, *pos - 1, pos, parse_term, "I expected a value after this '='");
+ names =
+ new (name_list_t, .start = name_start, .end = *pos, .name = name, .alias = alias, .next = names);
break;
} else if (strncmp(*pos, "==", 2) != 0 && match(pos, "=")) {
default_val = expect(ctx, *pos - 1, pos, parse_term, "I expected a value after this '='");
- names = new (name_list_t, .name = name, .alias = alias, .next = names);
+ names =
+ new (name_list_t, .start = name_start, .end = *pos, .name = name, .alias = alias, .next = names);
break;
} else if (name) {
- names = new (name_list_t, .name = name, .alias = alias, .next = names);
+ names =
+ new (name_list_t, .start = name_start, .end = *pos, .name = name, .alias = alias, .next = names);
spaces(pos);
if (!match(pos, ",")) break;
} else {
@@ -71,10 +76,10 @@ arg_ast_t *parse_args(parse_ctx_t *ctx, const char **pos) {
REVERSE_LIST(names);
for (; names; names = names->next)
- args = new (arg_ast_t, .name = names->name, .alias = names->alias, .type = type, .value = default_val,
- .next = args);
+ args = new (arg_ast_t, .start = names->start, .end = names->end, .name = names->name, .alias = names->alias,
+ .type = type, .value = default_val, .next = args);
- if (!match_separator(pos)) break;
+ if (!match_separator(ctx, pos)) break;
}
REVERSE_LIST(args);
@@ -95,19 +100,19 @@ ast_t *parse_func_def(parse_ctx_t *ctx, const char *pos) {
arg_ast_t *args = parse_args(ctx, &pos);
spaces(&pos);
type_ast_t *ret_type = match(&pos, "->") ? optional(ctx, &pos, parse_type) : NULL;
- whitespace(&pos);
+ whitespace(ctx, &pos);
bool is_inline = false;
ast_t *cache_ast = NULL;
- for (bool specials = match(&pos, ";"); specials; specials = match_separator(&pos)) {
+ for (bool specials = match(&pos, ";"); specials; specials = match_separator(ctx, &pos)) {
const char *flag_start = pos;
if (match_word(&pos, "inline")) {
is_inline = true;
} else if (match_word(&pos, "cached")) {
if (!cache_ast) cache_ast = NewAST(ctx->file, pos, pos, Int, .str = "-1");
} else if (match_word(&pos, "cache_size")) {
- whitespace(&pos);
+ whitespace(ctx, &pos);
if (!match(&pos, "=")) parser_err(ctx, flag_start, pos, "I expected a value for 'cache_size'");
- whitespace(&pos);
+ whitespace(ctx, &pos);
cache_ast = expect(ctx, start, &pos, parse_expr, "I expected a maximum size for the cache");
}
}
@@ -129,19 +134,19 @@ ast_t *parse_convert_def(parse_ctx_t *ctx, const char *pos) {
arg_ast_t *args = parse_args(ctx, &pos);
spaces(&pos);
type_ast_t *ret_type = match(&pos, "->") ? optional(ctx, &pos, parse_type) : NULL;
- whitespace(&pos);
+ whitespace(ctx, &pos);
bool is_inline = false;
ast_t *cache_ast = NULL;
- for (bool specials = match(&pos, ";"); specials; specials = match_separator(&pos)) {
+ for (bool specials = match(&pos, ";"); specials; specials = match_separator(ctx, &pos)) {
const char *flag_start = pos;
if (match_word(&pos, "inline")) {
is_inline = true;
} else if (match_word(&pos, "cached")) {
if (!cache_ast) cache_ast = NewAST(ctx->file, pos, pos, Int, .str = "-1");
} else if (match_word(&pos, "cache_size")) {
- whitespace(&pos);
+ whitespace(ctx, &pos);
if (!match(&pos, "=")) parser_err(ctx, flag_start, pos, "I expected a value for 'cache_size'");
- whitespace(&pos);
+ whitespace(ctx, &pos);
cache_ast = expect(ctx, start, &pos, parse_expr, "I expected a maximum size for the cache");
}
}
diff --git a/src/parse/statements.c b/src/parse/statements.c
index a30231f0..9606acdc 100644
--- a/src/parse/statements.c
+++ b/src/parse/statements.c
@@ -8,8 +8,8 @@
#include "../stdlib/util.h"
#include "context.h"
#include "errors.h"
-#include "files.h"
#include "expressions.h"
+#include "files.h"
#include "statements.h"
#include "suffixes.h"
#include "types.h"
@@ -46,7 +46,7 @@ ast_t *parse_assignment(parse_ctx_t *ctx, const char *pos) {
targets = new (ast_list_t, .ast = lhs, .next = targets);
spaces(&pos);
if (!match(&pos, ",")) break;
- whitespace(&pos);
+ whitespace(ctx, &pos);
}
if (!targets) return NULL;
@@ -62,7 +62,7 @@ ast_t *parse_assignment(parse_ctx_t *ctx, const char *pos) {
values = new (ast_list_t, .ast = rhs, .next = values);
spaces(&pos);
if (!match(&pos, ",")) break;
- whitespace(&pos);
+ whitespace(ctx, &pos);
}
REVERSE_LIST(targets);
@@ -101,7 +101,7 @@ ast_t *parse_doctest(parse_ctx_t *ctx, const char *pos) {
if (!match(&pos, ">>")) return NULL;
spaces(&pos);
ast_t *expr = expect(ctx, start, &pos, parse_statement, "I couldn't parse the expression for this doctest");
- whitespace(&pos);
+ whitespace(ctx, &pos);
ast_t *expected = NULL;
if (match(&pos, "=")) {
spaces(&pos);
@@ -120,7 +120,7 @@ ast_t *parse_assert(parse_ctx_t *ctx, const char *pos) {
spaces(&pos);
ast_t *message = NULL;
if (match(&pos, ",")) {
- whitespace(&pos);
+ whitespace(ctx, &pos);
message = expect(ctx, start, &pos, parse_extended_expr, "I couldn't parse the error message for this assert");
} else {
pos = expr->end;
diff --git a/src/parse/suffixes.c b/src/parse/suffixes.c
index 7e748caf..cb54b2f6 100644
--- a/src/parse/suffixes.c
+++ b/src/parse/suffixes.c
@@ -14,10 +14,10 @@
ast_t *parse_field_suffix(parse_ctx_t *ctx, ast_t *lhs) {
if (!lhs) return NULL;
const char *pos = lhs->end;
- whitespace(&pos);
+ whitespace(ctx, &pos);
if (!match(&pos, ".")) return NULL;
if (*pos == '.') return NULL;
- whitespace(&pos);
+ whitespace(ctx, &pos);
bool dollar = match(&pos, "$");
const char *field = get_id(&pos);
if (!field) return NULL;
@@ -44,9 +44,9 @@ ast_t *parse_index_suffix(parse_ctx_t *ctx, ast_t *lhs) {
const char *start = lhs->start;
const char *pos = lhs->end;
if (!match(&pos, "[")) return NULL;
- whitespace(&pos);
+ whitespace(ctx, &pos);
ast_t *index = optional(ctx, &pos, parse_extended_expr);
- whitespace(&pos);
+ whitespace(ctx, &pos);
bool unchecked = match(&pos, ";") && (spaces(&pos), match_word(&pos, "unchecked") != 0);
expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this index");
return NewAST(ctx->file, start, pos, Index, .indexed = lhs, .index = index, .unchecked = unchecked);
@@ -57,7 +57,7 @@ ast_t *parse_comprehension_suffix(parse_ctx_t *ctx, ast_t *expr) {
if (!expr) return NULL;
const char *start = expr->start;
const char *pos = expr->end;
- whitespace(&pos);
+ whitespace(ctx, &pos);
if (!match_word(&pos, "for")) return NULL;
ast_list_t *vars = NULL;
@@ -73,7 +73,7 @@ ast_t *parse_comprehension_suffix(parse_ctx_t *ctx, ast_t *expr) {
expect_str(ctx, start, &pos, "in", "I expected an 'in' for this 'for'");
ast_t *iter = expect(ctx, start, &pos, parse_expr, "I expected an iterable value for this 'for'");
const char *next_pos = pos;
- whitespace(&next_pos);
+ whitespace(ctx, &next_pos);
ast_t *filter = NULL;
if (match_word(&next_pos, "if")) {
pos = next_pos;
@@ -115,13 +115,13 @@ ast_t *parse_method_call_suffix(parse_ctx_t *ctx, ast_t *self) {
if (!fn) return NULL;
spaces(&pos);
if (!match(&pos, "(")) return NULL;
- whitespace(&pos);
+ whitespace(ctx, &pos);
arg_ast_t *args = NULL;
for (;;) {
const char *arg_start = pos;
const char *name = get_id(&pos);
- whitespace(&pos);
+ whitespace(ctx, &pos);
if (!name || !match(&pos, "=")) {
name = NULL;
pos = arg_start;
@@ -132,12 +132,12 @@ ast_t *parse_method_call_suffix(parse_ctx_t *ctx, ast_t *self) {
if (name) parser_err(ctx, arg_start, pos, "I expected an argument here");
break;
}
- args = new (arg_ast_t, .name = name, .value = arg, .next = args);
- if (!match_separator(&pos)) break;
+ args = new (arg_ast_t, .start = arg_start, .end = arg->end, .name = name, .value = arg, .next = args);
+ if (!match_separator(ctx, &pos)) break;
}
REVERSE_LIST(args);
- whitespace(&pos);
+ whitespace(ctx, &pos);
if (!match(&pos, ")")) parser_err(ctx, start, pos, "This parenthesis is unclosed");
@@ -152,13 +152,13 @@ ast_t *parse_fncall_suffix(parse_ctx_t *ctx, ast_t *fn) {
if (!match(&pos, "(")) return NULL;
- whitespace(&pos);
+ whitespace(ctx, &pos);
arg_ast_t *args = NULL;
for (;;) {
const char *arg_start = pos;
const char *name = get_id(&pos);
- whitespace(&pos);
+ whitespace(ctx, &pos);
if (!name || !match(&pos, "=")) {
name = NULL;
pos = arg_start;
@@ -170,10 +170,10 @@ ast_t *parse_fncall_suffix(parse_ctx_t *ctx, ast_t *fn) {
break;
}
args = new (arg_ast_t, .name = name, .value = arg, .next = args);
- if (!match_separator(&pos)) break;
+ if (!match_separator(ctx, &pos)) break;
}
- whitespace(&pos);
+ whitespace(ctx, &pos);
if (!match(&pos, ")")) parser_err(ctx, start, pos, "This parenthesis is unclosed");
diff --git a/src/parse/text.c b/src/parse/text.c
index 8897fd34..30ff8656 100644
--- a/src/parse/text.c
+++ b/src/parse/text.c
@@ -148,21 +148,19 @@ ast_t *parse_inline_c(parse_ctx_t *ctx, const char *pos) {
spaces(&pos);
type_ast_t *type = NULL;
- ast_list_t *chunks;
if (match(&pos, ":")) {
type = expect(ctx, start, &pos, parse_type, "I couldn't parse the type for this C_code code");
spaces(&pos);
- chunks = _parse_text_helper(ctx, &pos);
- if (type) {
- chunks = new (ast_list_t, .ast = NewAST(ctx->file, pos, pos, TextLiteral, Text("({")), .next = chunks);
- REVERSE_LIST(chunks);
- chunks = new (ast_list_t, .ast = NewAST(ctx->file, pos, pos, TextLiteral, Text("; })")), .next = chunks);
- REVERSE_LIST(chunks);
- }
- } else {
- chunks = _parse_text_helper(ctx, &pos);
}
+ static const char *quote_chars = "\"'`|/;([{<";
+ if (!strchr(quote_chars, *pos))
+ parser_err(ctx, pos, pos + 1,
+ "This is not a valid string quotation character. Valid characters are: \"'`|/;([{<");
+
+ char quote = *(pos++);
+ char unquote = closing[(int)quote] ? closing[(int)quote] : quote;
+ ast_list_t *chunks = _parse_text_helper(ctx, &pos, quote, unquote, '@', false);
return NewAST(ctx->file, start, pos, InlineCCode, .chunks = chunks, .type_ast = type);
}
diff --git a/src/parse/typedefs.c b/src/parse/typedefs.c
index 73fe9d7c..6e5e40d0 100644
--- a/src/parse/typedefs.c
+++ b/src/parse/typedefs.c
@@ -16,12 +16,12 @@
ast_t *parse_namespace(parse_ctx_t *ctx, const char *pos) {
const char *start = pos;
- whitespace(&pos);
+ whitespace(ctx, &pos);
int64_t indent = get_indent(ctx, pos);
ast_list_t *statements = NULL;
for (;;) {
const char *next = pos;
- whitespace(&next);
+ whitespace(ctx, &next);
if (get_indent(ctx, next) != indent) break;
ast_t *stmt;
if ((stmt = optional(ctx, &pos, parse_struct_def)) || (stmt = optional(ctx, &pos, parse_func_def))
@@ -31,7 +31,7 @@ ast_t *parse_namespace(parse_ctx_t *ctx, const char *pos) {
|| (stmt = optional(ctx, &pos, parse_inline_c)) || (stmt = optional(ctx, &pos, parse_declaration))) {
statements = new (ast_list_t, .ast = stmt, .next = statements);
pos = stmt->end;
- whitespace(&pos); // TODO: check for newline
+ whitespace(ctx, &pos); // TODO: check for newline
// if (!(space_types & WHITESPACE_NEWLINES)) {
// pos = stmt->end;
// break;
@@ -62,10 +62,10 @@ ast_t *parse_struct_def(parse_ctx_t *ctx, const char *pos) {
arg_ast_t *fields = parse_args(ctx, &pos);
- whitespace(&pos);
+ whitespace(ctx, &pos);
bool secret = false, external = false, opaque = false;
if (match(&pos, ";")) { // Extra flags
- whitespace(&pos);
+ whitespace(ctx, &pos);
for (;;) {
if (match_word(&pos, "secret")) {
secret = true;
@@ -79,7 +79,7 @@ ast_t *parse_struct_def(parse_ctx_t *ctx, const char *pos) {
break;
}
- if (!match_separator(&pos)) break;
+ if (!match_separator(ctx, &pos)) break;
}
}
@@ -87,7 +87,7 @@ ast_t *parse_struct_def(parse_ctx_t *ctx, const char *pos) {
ast_t *namespace = NULL;
const char *ns_pos = pos;
- whitespace(&ns_pos);
+ whitespace(ctx, &ns_pos);
int64_t ns_indent = get_indent(ctx, ns_pos);
if (ns_indent > starting_indent) {
pos = ns_pos;
@@ -110,9 +110,10 @@ ast_t *parse_enum_def(parse_ctx_t *ctx, const char *pos) {
if (!match(&pos, "(")) return NULL;
tag_ast_t *tags = NULL;
- whitespace(&pos);
+ whitespace(ctx, &pos);
for (;;) {
spaces(&pos);
+ const char *tag_start = pos;
const char *tag_name = get_id(&pos);
if (!tag_name) break;
@@ -120,25 +121,26 @@ ast_t *parse_enum_def(parse_ctx_t *ctx, const char *pos) {
arg_ast_t *fields;
bool secret = false;
if (match(&pos, "(")) {
- whitespace(&pos);
+ whitespace(ctx, &pos);
fields = parse_args(ctx, &pos);
- whitespace(&pos);
+ whitespace(ctx, &pos);
if (match(&pos, ";")) { // Extra flags
- whitespace(&pos);
+ whitespace(ctx, &pos);
secret = match_word(&pos, "secret");
- whitespace(&pos);
+ whitespace(ctx, &pos);
}
expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this tagged union member");
} else {
fields = NULL;
}
- tags = new (tag_ast_t, .name = tag_name, .fields = fields, .secret = secret, .next = tags);
+ tags = new (tag_ast_t, .start = tag_start, .end = pos, .name = tag_name, .fields = fields, .secret = secret,
+ .next = tags);
- if (!match_separator(&pos)) break;
+ if (!match_separator(ctx, &pos)) break;
}
- whitespace(&pos);
+ whitespace(ctx, &pos);
expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this enum definition");
REVERSE_LIST(tags);
@@ -147,7 +149,7 @@ ast_t *parse_enum_def(parse_ctx_t *ctx, const char *pos) {
ast_t *namespace = NULL;
const char *ns_pos = pos;
- whitespace(&ns_pos);
+ whitespace(ctx, &ns_pos);
int64_t ns_indent = get_indent(ctx, ns_pos);
if (ns_indent > starting_indent) {
pos = ns_pos;
@@ -170,7 +172,7 @@ ast_t *parse_lang_def(parse_ctx_t *ctx, const char *pos) {
ast_t *namespace = NULL;
const char *ns_pos = pos;
- whitespace(&ns_pos);
+ whitespace(ctx, &ns_pos);
int64_t ns_indent = get_indent(ctx, ns_pos);
if (ns_indent > starting_indent) {
pos = ns_pos;
@@ -192,7 +194,7 @@ ast_t *parse_extend(parse_ctx_t *ctx, const char *pos) {
ast_t *body = NULL;
const char *ns_pos = pos;
- whitespace(&ns_pos);
+ whitespace(ctx, &ns_pos);
int64_t ns_indent = get_indent(ctx, ns_pos);
if (ns_indent > starting_indent) {
pos = ns_pos;
diff --git a/src/parse/types.c b/src/parse/types.c
index 54bc0c03..ffb7d869 100644
--- a/src/parse/types.c
+++ b/src/parse/types.c
@@ -9,19 +9,19 @@
#include "../stdlib/print.h"
#include "context.h"
#include "errors.h"
-#include "functions.h"
#include "expressions.h"
+#include "functions.h"
#include "types.h"
#include "utils.h"
type_ast_t *parse_table_type(parse_ctx_t *ctx, const char *pos) {
const char *start = pos;
if (!match(&pos, "{")) return NULL;
- whitespace(&pos);
+ whitespace(ctx, &pos);
type_ast_t *key_type = parse_type(ctx, pos);
if (!key_type) return NULL;
pos = key_type->end;
- whitespace(&pos);
+ whitespace(ctx, &pos);
type_ast_t *value_type = NULL;
if (match(&pos, "=")) {
value_type = expect(ctx, start, &pos, parse_type, "I couldn't parse the rest of this table type");
@@ -35,7 +35,7 @@ type_ast_t *parse_table_type(parse_ctx_t *ctx, const char *pos) {
default_value =
expect(ctx, start, &pos, parse_extended_expr, "I couldn't parse the default value for this table");
}
- whitespace(&pos);
+ whitespace(ctx, &pos);
expect_closing(ctx, &pos, "}", "I wasn't able to parse the rest of this table type");
return NewTypeAST(ctx->file, start, pos, TableTypeAST, .key = key_type, .value = value_type,
.default_value = default_value);
@@ -44,11 +44,11 @@ type_ast_t *parse_table_type(parse_ctx_t *ctx, const char *pos) {
type_ast_t *parse_set_type(parse_ctx_t *ctx, const char *pos) {
const char *start = pos;
if (!match(&pos, "|")) return NULL;
- whitespace(&pos);
+ whitespace(ctx, &pos);
type_ast_t *item_type = parse_type(ctx, pos);
if (!item_type) return NULL;
pos = item_type->end;
- whitespace(&pos);
+ whitespace(ctx, &pos);
expect_closing(ctx, &pos, "|", "I wasn't able to parse the rest of this set type");
return NewTypeAST(ctx->file, start, pos, SetTypeAST, .item = item_type);
}
@@ -113,10 +113,10 @@ type_ast_t *parse_non_optional_type(parse_ctx_t *ctx, const char *pos) {
|| (type = parse_table_type(ctx, pos)) || (type = parse_set_type(ctx, pos))
|| (type = parse_type_name(ctx, pos)) || (type = parse_func_type(ctx, pos)));
if (!success && match(&pos, "(")) {
- whitespace(&pos);
+ whitespace(ctx, &pos);
type = optional(ctx, &pos, parse_type);
if (!type) return NULL;
- whitespace(&pos);
+ whitespace(ctx, &pos);
expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this type");
type->start = start;
type->end = pos;
@@ -144,11 +144,11 @@ type_ast_t *parse_type_str(const char *str) {
};
const char *pos = file->text;
- whitespace(&pos);
+ whitespace(&ctx, &pos);
type_ast_t *ast = parse_type(&ctx, pos);
if (!ast) return ast;
pos = ast->end;
- whitespace(&pos);
+ whitespace(&ctx, &pos);
if (strlen(pos) > 0) {
parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the type");
}
diff --git a/src/parse/utils.c b/src/parse/utils.c
index 7e827ac6..0644bfa0 100644
--- a/src/parse/utils.c
+++ b/src/parse/utils.c
@@ -6,6 +6,7 @@
#include <unictype.h>
#include <uniname.h>
+#include "../stdlib/tables.h"
#include "../stdlib/util.h"
#include "errors.h"
#include "utils.h"
@@ -43,8 +44,8 @@ size_t some_not(const char **pos, const char *forbid) {
size_t spaces(const char **pos) { return some_of(pos, " \t"); }
-void whitespace(const char **pos) {
- while (some_of(pos, " \t\r\n") || comment(pos))
+void whitespace(parse_ctx_t *ctx, const char **pos) {
+ while (some_of(pos, " \t\r\n") || comment(ctx, pos))
continue;
}
@@ -95,9 +96,12 @@ const char *get_id(const char **inout) {
PUREFUNC const char *eol(const char *str) { return str + strcspn(str, "\r\n"); }
-bool comment(const char **pos) {
+bool comment(parse_ctx_t *ctx, const char **pos) {
if ((*pos)[0] == '#') {
+ const char *start = *pos;
*pos += strcspn(*pos, "\r\n");
+ const char *end = *pos;
+ Table$set(&ctx->comments, &start, &end, parse_comments_info);
return true;
} else {
return false;
@@ -129,7 +133,7 @@ PUREFUNC int64_t get_indent(parse_ctx_t *ctx, const char *pos) {
bool indent(parse_ctx_t *ctx, const char **out) {
const char *pos = *out;
int64_t starting_indent = get_indent(ctx, pos);
- whitespace(&pos);
+ whitespace(ctx, &pos);
const char *next_line = get_line(ctx->file, get_line_number(ctx->file, pos));
if (next_line <= *out) return false;
@@ -239,12 +243,12 @@ const char *unescape(parse_ctx_t *ctx, const char **out) {
#pragma GCC diagnostic pop
#endif
-bool match_separator(const char **pos) { // Either comma or newline
+bool match_separator(parse_ctx_t *ctx, const char **pos) { // Either comma or newline
const char *p = *pos;
int separators = 0;
for (;;) {
if (some_of(&p, "\r\n,")) ++separators;
- else if (!comment(&p) && !some_of(&p, " \t")) break;
+ else if (!comment(ctx, &p) && !some_of(&p, " \t")) break;
}
if (separators > 0) {
*pos = p;
diff --git a/src/parse/utils.h b/src/parse/utils.h
index ba54120a..b8fb0756 100644
--- a/src/parse/utils.h
+++ b/src/parse/utils.h
@@ -12,16 +12,16 @@ CONSTFUNC bool is_keyword(const char *word);
size_t some_of(const char **pos, const char *allow);
size_t some_not(const char **pos, const char *forbid);
size_t spaces(const char **pos);
-void whitespace(const char **pos);
+void whitespace(parse_ctx_t *ctx, const char **pos);
size_t match(const char **pos, const char *target);
size_t match_word(const char **pos, const char *word);
const char *get_word(const char **pos);
const char *get_id(const char **pos);
-bool comment(const char **pos);
+bool comment(parse_ctx_t *ctx, const char **pos);
bool indent(parse_ctx_t *ctx, const char **pos);
const char *eol(const char *str);
PUREFUNC int64_t get_indent(parse_ctx_t *ctx, const char *pos);
const char *unescape(parse_ctx_t *ctx, const char **out);
bool is_xid_continue_next(const char *pos);
bool newline_with_indentation(const char **out, int64_t target);
-bool match_separator(const char **pos);
+bool match_separator(parse_ctx_t *ctx, const char **pos);