aboutsummaryrefslogtreecommitdiff
path: root/parse.c
diff options
context:
space:
mode:
Diffstat (limited to 'parse.c')
-rw-r--r--parse.c1759
1 files changed, 1759 insertions, 0 deletions
diff --git a/parse.c b/parse.c
new file mode 100644
index 00000000..845b3ca0
--- /dev/null
+++ b/parse.c
@@ -0,0 +1,1759 @@
+// Parse SSS code using recursive descent
+#include <ctype.h>
+#include <gc.h>
+#include <libgen.h>
+#include <linux/limits.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistr.h>
+#include <unictype.h>
+#include <signal.h>
+
+#include "ast.h"
+#include "util.h"
+
+typedef struct {
+ sss_file_t *file;
+ jmp_buf *on_err;
+} parse_ctx_t;
+
+typedef ast_t* (parser_t)(parse_ctx_t*,const char*);
+
+extern void builtin_fail(const char *fmt, ...);
+
+#define PARSER(name) ast_t *name(parse_ctx_t *ctx, const char *pos)
+
+#define STUB_PARSER(name) PARSER(name) { (void)ctx; (void)pos; return NULL; }
+
+int op_tightness[] = {
+ [BINOP_POWER]=1,
+ [BINOP_MULT]=2, [BINOP_DIVIDE]=2, [BINOP_MOD]=2, [BINOP_MOD1]=2,
+ [BINOP_PLUS]=3, [BINOP_MINUS]=3,
+ [BINOP_CONCAT]=4,
+ [BINOP_LSHIFT]=5, [BINOP_RSHIFT]=5,
+ [BINOP_MIN]=6, [BINOP_MAX]=6,
+ [BINOP_EQ]=7, [BINOP_NE]=7,
+ [BINOP_LT]=8, [BINOP_LE]=8, [BINOP_GT]=8, [BINOP_GE]=8,
+ [BINOP_AND]=9, [BINOP_OR]=9, [BINOP_XOR]=9,
+};
+
+static const char *keywords[] = {
+ "yes", "xor", "while", "use", "then", "struct", "stop", "skip", "return",
+ "or", "not", "no", "mod1", "mod", "in", "if", "func", "for", "extern",
+ "enum", "else", "do", "and", "_mix_", "_min_", "_max_",
+ NULL,
+};
+
+enum {NORMAL_FUNCTION=0, EXTERN_FUNCTION=1};
+
+static inline size_t some_of(const char **pos, const char *allow);
+static inline size_t some_not(const char **pos, const char *forbid);
+static inline size_t spaces(const char **pos);
+static inline size_t whitespace(const char **pos);
+static inline size_t match(const char **pos, const char *target);
+static inline void expect_str(parse_ctx_t *ctx, const char *start, const char **pos, const char *target, const char *fmt, ...);
+static inline void expect_closing(parse_ctx_t *ctx, const char **pos, const char *target, const char *fmt, ...);
+static inline size_t match_word(const char **pos, const char *word);
+static inline const char* get_word(const char **pos);
+static inline const char* get_id(const char **pos);
+static inline bool comment(const char **pos);
+static inline bool indent(parse_ctx_t *ctx, const char **pos);
+static inline binop_e match_binary_operator(const char **pos);
+static ast_t *parse_fncall_suffix(parse_ctx_t *ctx, ast_t *fn, bool is_extern);
+static ast_t *parse_field_suffix(parse_ctx_t *ctx, ast_t *lhs);
+static ast_t *parse_index_suffix(parse_ctx_t *ctx, ast_t *lhs);
+static arg_list_t *parse_args(parse_ctx_t *ctx, const char **pos, bool allow_unnamed);
+static PARSER(parse_for);
+static PARSER(parse_while);
+static PARSER(parse_if);
+static PARSER(parse_expr);
+static PARSER(parse_extended_expr);
+static PARSER(parse_term_no_suffix);
+static PARSER(parse_term);
+static PARSER(parse_inline_block);
+static PARSER(parse_statement);
+static PARSER(parse_block);
+static PARSER(parse_opt_indented_block);
+static PARSER(parse_var);
+static PARSER(parse_type_def);
+static PARSER(parse_func_def);
+static PARSER(parse_extern);
+static PARSER(parse_declaration);
+static PARSER(parse_doctest);
+static PARSER(parse_use);
+static PARSER(parse_linker);
+static PARSER(parse_namespace);
+
+static type_ast_t *parse_type(parse_ctx_t *ctx, const char *pos);
+static type_ast_t *parse_enum_type(parse_ctx_t *ctx, const char *pos);
+
+//
+// Print a parse error and exit (or use the on_err longjmp)
+//
+__attribute__((noreturn))
+static void vparser_err(parse_ctx_t *ctx, const char *start, const char *end, const char *fmt, va_list args) {
+ if (isatty(STDERR_FILENO) && !getenv("NO_COLOR"))
+ fputs("\x1b[31;1;7m", stderr);
+ fprintf(stderr, "%s:%ld.%ld: ", ctx->file->relative_filename, sss_get_line_number(ctx->file, start),
+ sss_get_line_column(ctx->file, start));
+ vfprintf(stderr, fmt, args);
+ if (isatty(STDERR_FILENO) && !getenv("NO_COLOR"))
+ fputs(" \x1b[m", stderr);
+ fputs("\n\n", stderr);
+
+ fprint_span(stderr, ctx->file, start, end, "\x1b[31;1;7m", 2, isatty(STDERR_FILENO) && !getenv("NO_COLOR"));
+ fputs("\n", stderr);
+
+ if (ctx->on_err)
+ longjmp(*ctx->on_err, 1);
+ raise(SIGABRT);
+ exit(1);
+}
+
+//
+// Wrapper for vparser_err
+//
+__attribute__((noreturn))
+static void parser_err(parse_ctx_t *ctx, const char *start, const char *end, const char *fmt, ...) {
+ va_list args;
+ va_start(args, fmt);
+ vparser_err(ctx, start, end, fmt, args);
+ va_end(args);
+}
+
+//
+// Convert an escape sequence like \n to a string
+//
+const char *unescape(const char **out) {
+ const char **endpos = out;
+ const char *escape = *out;
+ static const char *unescapes[256] = {['a']="\a",['b']="\b",['e']="\e",['f']="\f",['n']="\n",['r']="\r",['t']="\t",['v']="\v",['_']=" "};
+ assert(*escape == '\\');
+ if (unescapes[(int)escape[1]]) {
+ *endpos = escape + 2;
+ return heap_str(unescapes[(int)escape[1]]);
+ } else if (escape[1] == 'x' && escape[2] && escape[3]) {
+ char *endptr = (char*)&escape[3+1];
+ char c = (char)strtol(escape+2, &endptr, 16);
+ *endpos = escape + 4;
+ return heap_strn(&c, 1);
+ } else if ('0' <= escape[1] && escape[1] <= '7' && '0' <= escape[2] && escape[2] <= '7' && '0' <= escape[3] && escape[3] <= '7') {
+ char *endptr = (char*)&escape[4];
+ char c = (char)strtol(escape+1, &endptr, 8);
+ *endpos = escape + 4;
+ return heap_strn(&c, 1);
+ } else {
+ *endpos = escape + 2;
+ return heap_strn(escape+1, 1);
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////// Text-based parsing primitives ///////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////
+size_t some_of(const char **pos, const char *allow) {
+ size_t len = strspn(*pos, allow);
+ *pos += len;
+ return len;
+}
+
+size_t some_not(const char **pos, const char *forbid) {
+ size_t len = strcspn(*pos, forbid);
+ *pos += len;
+ return len;
+}
+
+size_t spaces(const char **pos) { return some_of(pos, " \t"); }
+size_t whitespace(const char **pos) {
+ const char *p0 = *pos;
+ while (some_of(pos, " \t\r\n") || comment(pos))
+ continue;
+ return (size_t)(*pos - p0);
+}
+
+size_t match(const char **pos, const char *target) {
+ size_t len = strlen(target);
+ if (strncmp(*pos, target, len) != 0)
+ return 0;
+ *pos += len;
+ return len;
+}
+
+static inline bool is_xid_continue_next(const char *pos) {
+ ucs4_t point = 0;
+ u8_next(&point, (const uint8_t*)pos);
+ return uc_is_property_xid_continue(point);
+}
+
+//
+// Expect a string (potentially after whitespace) and emit a parser error if it's not there
+//
+static void expect_str(
+ parse_ctx_t *ctx, const char *start, const char **pos, const char *target, const char *fmt, ...) {
+ spaces(pos);
+ if (match(pos, target)) {
+ char lastchar = target[strlen(target)-1];
+ if (!(isalpha(lastchar) || isdigit(lastchar) || lastchar == '_'))
+ return;
+ if (!is_xid_continue_next(*pos))
+ return;
+ }
+
+ if (isatty(STDERR_FILENO) && !getenv("NO_COLOR"))
+ fputs("\x1b[31;1;7m", stderr);
+ va_list args;
+ va_start(args, fmt);
+ vparser_err(ctx, start, *pos, fmt, args);
+ va_end(args);
+}
+
+//
+// Helper for matching closing parens with good error messages
+//
+static void expect_closing(
+ parse_ctx_t *ctx, const char **pos, const char *closing, const char *fmt, ...) {
+ const char *start = *pos;
+ spaces(pos);
+ if (match(pos, closing))
+ return;
+
+ const char *eol = strchr(*pos, '\n');
+ const char *next = strstr(*pos, closing);
+
+ const char *end = eol < next ? eol : next;
+
+ if (isatty(STDERR_FILENO) && !getenv("NO_COLOR"))
+ fputs("\x1b[31;1;7m", stderr);
+ va_list args;
+ va_start(args, fmt);
+ vparser_err(ctx, start, end, fmt, args);
+ va_end(args);
+}
+
+#define expect(ctx, start, pos, parser, ...) ({ \
+ const char **_pos = pos; \
+ spaces(_pos); \
+ auto _result = parser(ctx, *_pos); \
+ if (!_result) { \
+ if (isatty(STDERR_FILENO) && !getenv("NO_COLOR")) \
+ fputs("\x1b[31;1;7m", stderr); \
+ parser_err(ctx, start, *_pos, __VA_ARGS__); \
+ } \
+ *_pos = _result->end; \
+ _result; })
+
+#define optional(ctx, pos, parser) ({ \
+ const char **_pos = pos; \
+ spaces(_pos); \
+ auto _result = parser(ctx, *_pos); \
+ if (_result) *_pos = _result->end; \
+ _result; })
+
+size_t match_word(const char **out, const char *word) {
+ const char *pos = *out;
+ spaces(&pos);
+ if (!match(&pos, word) || is_xid_continue_next(pos))
+ return 0;
+
+ *out = pos;
+ return strlen(word);
+}
+
+bool match_group(const char **out, char open) {
+ static char mirror_delim[256] = {['(']=')', ['{']='}', ['<']='>', ['[']=']'};
+ const char *pos = *out;
+ if (*pos != open) return 0;
+ char close = mirror_delim[(int)open] ? mirror_delim[(int)open] : open;
+ int depth = 1;
+ for (++pos; *pos && depth > 0; ++pos) {
+ if (*pos == close) --depth;
+ else if (*pos == open) ++depth;
+ }
+ if (depth == 0) {
+ *out = pos;
+ return true;
+ } else return false;
+}
+
+const char *get_word(const char **inout) {
+ const char *word = *inout;
+ spaces(&word);
+ const uint8_t *pos = (const uint8_t*)word;
+ ucs4_t point;
+ pos = u8_next(&point, pos);
+ if (!uc_is_property_xid_start(point) && point != '_')
+ return NULL;
+
+ for (const uint8_t *next; (next = u8_next(&point, pos)); pos = next) {
+ if (!uc_is_property_xid_continue(point))
+ break;
+ }
+ *inout = (const char*)pos;
+ return heap_strn(word, (size_t)((const char*)pos - word));
+}
+
+const char *get_id(const char **inout) {
+ const char *pos = *inout;
+ const char *word = get_word(&pos);
+ if (!word) return word;
+ for (int i = 0; keywords[i]; i++)
+ if (strcmp(word, keywords[i]) == 0)
+ return NULL;
+ *inout = pos;
+ return word;
+}
+
+bool comment(const char **pos) {
+ if (!match(pos, "//"))
+ return false;
+ some_not(pos, "\r\n");
+ return true;
+}
+
+bool indent(parse_ctx_t *ctx, const char **out) {
+ const char *pos = *out;
+ int64_t starting_indent = sss_get_indent(ctx->file, pos);
+ whitespace(&pos);
+ if (sss_get_line_number(ctx->file, pos) == sss_get_line_number(ctx->file, *out))
+ return false;
+
+ if (sss_get_indent(ctx->file, pos) > starting_indent) {
+ *out = pos;
+ return true;
+ }
+
+ return false;
+}
+
+bool match_indentation(const char **out, int64_t target) {
+ const char *pos = *out;
+ for (int64_t indentation = 0; indentation < target; ) {
+ switch (*pos) {
+ case ' ': indentation += 1; ++pos; break;
+ case '\t': indentation += 4; ++pos; break;
+ default: return false;
+ }
+ }
+ *out = pos;
+ return true;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////// AST-based parsers /////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+PARSER(parse_parens) {
+ const char *start = pos;
+ spaces(&pos);
+ if (!match(&pos, "(")) return NULL;
+ whitespace(&pos);
+ ast_t *expr = optional(ctx, &pos, parse_extended_expr);
+ if (!expr) return NULL;
+ expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this expression");
+
+ // Update the span to include the parens:
+ return new(ast_t, .file=(ctx)->file, .start=start, .end=pos,
+ .tag=expr->tag, .__data=expr->__data);
+}
+
+PARSER(parse_int) {
+ const char *start = pos;
+ bool negative = match(&pos, "-");
+ if (!isdigit(*pos)) return false;
+ int64_t i = 0;
+ if (match(&pos, "0x")) { // Hex
+ size_t span = strspn(pos, "0123456789abcdefABCDEF_");
+ char *buf = GC_MALLOC_ATOMIC(span+1);
+ memset(buf, 0, span+1);
+ for (char *src = (char*)pos, *dest = buf; src < pos+span; ++src) {
+ if (*src != '_') *(dest++) = *src;
+ }
+ i = strtol(buf, NULL, 16);
+ pos += span;
+ } else if (match(&pos, "0b")) { // Binary
+ size_t span = strspn(pos, "01_");
+ char *buf = GC_MALLOC_ATOMIC(span+1);
+ memset(buf, 0, span+1);
+ for (char *src = (char*)pos, *dest = buf; src < pos+span; ++src) {
+ if (*src != '_') *(dest++) = *src;
+ }
+ i = strtol(buf, NULL, 2);
+ pos += span;
+ } else if (match(&pos, "0o")) { // Octal
+ size_t span = strspn(pos, "01234567_");
+ char *buf = GC_MALLOC_ATOMIC(span+1);
+ memset(buf, 0, span+1);
+ for (char *src = (char*)pos, *dest = buf; src < pos+span; ++src) {
+ if (*src != '_') *(dest++) = *src;
+ }
+ i = strtol(buf, NULL, 8);
+ pos += span;
+ } else { // Decimal
+ size_t span = strspn(pos, "0123456789_");
+ char *buf = GC_MALLOC_ATOMIC(span+1);
+ memset(buf, 0, span+1);
+ for (char *src = (char*)pos, *dest = buf; src < pos+span; ++src) {
+ if (*src != '_') *(dest++) = *src;
+ }
+ i = strtol(buf, NULL, 10);
+ pos += span;
+ }
+
+ if (match(&pos, "e") || match(&pos, "f")) // floating point literal
+ return NULL;
+
+ if (negative) i *= -1;
+
+ if (match(&pos, "%")) {
+ double d = (double)i / 100.;
+ return NewAST(ctx->file, start, pos, Num, .n=d, .precision=64);
+ }
+
+ match(&pos, "_");
+ int64_t precision = 64;
+ if (match(&pos, "i64")) precision = 64;
+ else if (match(&pos, "i32")) precision = 32;
+ else if (match(&pos, "i16")) precision = 16;
+ else if (match(&pos, "i8")) precision = 8;
+
+ // else if (match(&pos, ".") || match(&pos, "e")) return NULL; // looks like a float
+
+ return NewAST(ctx->file, start, pos, Int, .i=i, .precision=precision);
+}
+
+type_ast_t *parse_table_type(parse_ctx_t *ctx, const char *pos) {
+ const char *start = pos;
+ if (!match(&pos, "{")) return NULL;
+ whitespace(&pos);
+ type_ast_t *key_type = parse_type(ctx, pos);
+ if (!key_type) return NULL;
+ pos = key_type->end;
+ whitespace(&pos);
+ if (!match(&pos, "=>")) return NULL;
+ type_ast_t *value_type = expect(ctx, start, &pos, parse_type, "I couldn't parse the rest of this table type");
+ whitespace(&pos);
+ expect_closing(ctx, &pos, "}", "I wasn't able to parse the rest of this table type");
+ return NewTypeAST(ctx->file, start, pos, TypeTable, .key=key_type, .value=value_type);
+}
+
+type_ast_t *parse_struct_type(parse_ctx_t *ctx, const char *pos) {
+ const char *start = pos;
+ if (!match(&pos, "struct")) return NULL;
+ spaces(&pos);
+ if (!match(&pos, "(")) return NULL;
+ arg_list_t *args = parse_args(ctx, &pos, false);
+ whitespace(&pos);
+ expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this struct type");
+ return NewTypeAST(ctx->file, start, pos, TypeStruct, .fields=args);
+}
+
+type_ast_t *parse_func_type(parse_ctx_t *ctx, const char *pos) {
+ const char *start = pos;
+ if (!match_word(&pos, "func")) return NULL;
+ spaces(&pos);
+ if (!match(&pos, "(")) return NULL;
+ arg_list_t *args = parse_args(ctx, &pos, true);
+ expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this function type");
+ spaces(&pos);
+ if (!match(&pos, "->")) return NULL;
+ type_ast_t *ret = optional(ctx, &pos, parse_type);
+ return NewTypeAST(ctx->file, start, pos, TypeFunction, .args=args, .ret=ret);
+}
+
+type_ast_t *parse_array_type(parse_ctx_t *ctx, const char *pos) {
+ const char *start = pos;
+ if (!match(&pos, "[")) return NULL;
+ type_ast_t *type = expect(ctx, start, &pos, parse_type,
+ "I couldn't parse an array item type after this point");
+ expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this array type");
+ return NewTypeAST(ctx->file, start, pos, TypeArray, .item=type);
+}
+
+type_ast_t *parse_pointer_type(parse_ctx_t *ctx, const char *pos) {
+ const char *start = pos;
+ bool optional = false, is_stack = false;
+ if (match(&pos, "@"))
+ optional = false;
+ else if (match(&pos, "?"))
+ optional = true;
+ else if (match(&pos, "&"))
+ is_stack = true;
+ else
+ return NULL;
+
+ spaces(&pos);
+ bool is_readonly = match(&pos, "(readonly)");
+ spaces(&pos);
+ type_ast_t *type = expect(ctx, start, &pos, parse_type,
+ "I couldn't parse a pointer type after this point");
+ return NewTypeAST(ctx->file, start, pos, TypePointer, .pointed=type, .is_optional=optional, .is_stack=is_stack, .is_readonly=is_readonly);
+}
+
+type_ast_t *parse_type_name(parse_ctx_t *ctx, const char *pos) {
+ const char *start = pos;
+ const char *id = get_id(&pos);
+ if (!id) return NULL;
+ for (;;) {
+ const char *next = pos;
+ spaces(&next);
+ if (!match(&next, ".")) break;
+ const char *next_id = get_id(&next);
+ if (!next_id) break;
+ id = heap_strf("%s.%s", id, next_id);
+ pos = next;
+ }
+ return NewTypeAST(ctx->file, start, pos, TypeVar, .var.name=id);
+}
+
+type_ast_t *parse_type(parse_ctx_t *ctx, const char *pos) {
+ const char *start = pos;
+ type_ast_t *type = NULL;
+ bool success = (false
+ || (type=parse_enum_type(ctx, pos))
+ || (type=parse_pointer_type(ctx, pos))
+ || (type=parse_array_type(ctx, pos))
+ || (type=parse_table_type(ctx, pos))
+ || (type=parse_struct_type(ctx, pos))
+ || (type=parse_type_name(ctx, pos))
+ || (type=parse_func_type(ctx, pos))
+ );
+ if (!success && match(&pos, "(")) {
+ whitespace(&pos);
+ type = optional(ctx, &pos, parse_type);
+ if (!type) return NULL;
+ whitespace(&pos);
+ expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this type");
+ type->start = start;
+ type->end = pos;
+ }
+
+ if (!type) return NULL;
+
+ pos = type->end;
+ return type;
+}
+
+PARSER(parse_num) {
+ const char *start = pos;
+ bool negative = match(&pos, "-");
+ if (!isdigit(*pos) && *pos != '.') return NULL;
+
+ size_t len = strspn(pos, "0123456789_");
+ if (strncmp(pos+len, "..", 2) == 0)
+ return NULL;
+ else if (pos[len] == '.')
+ len += 1 + strspn(pos + len + 1, "0123456789");
+ else if (pos[len] != 'e' && pos[len] != 'f' && pos[len] != '%')
+ return NULL;
+ if (pos[len] == 'e')
+ len += 1 + strspn(pos + len + 1, "-0123456789_");
+ char *buf = GC_MALLOC_ATOMIC(len+1);
+ memset(buf, 0, len+1);
+ for (char *src = (char*)pos, *dest = buf; src < pos+len; ++src) {
+ if (*src != '_') *(dest++) = *src;
+ }
+ double d = strtod(buf, NULL);
+ pos += len;
+
+ if (negative) d *= -1;
+
+ int64_t precision = 64;
+ match(&pos, "_");
+ if (match(&pos, "f64")) precision = 64;
+ else if (match(&pos, "f32")) precision = 32;
+
+ if (match(&pos, "%")) {
+ d /= 100.;
+ }
+
+ return NewAST(ctx->file, start, pos, Num, .n=d, .precision=precision);
+}
+
+PARSER(parse_array) {
+ const char *start = pos;
+ if (!match(&pos, "[")) return NULL;
+
+ whitespace(&pos);
+
+ ast_list_t *items = NULL;
+ type_ast_t *item_type = NULL;
+ if (match(&pos, ":")) {
+ whitespace(&pos);
+ item_type = expect(ctx, pos-1, &pos, parse_type, "I couldn't parse a type for this array");
+ }
+
+ for (;;) {
+ whitespace(&pos);
+ ast_t *item = optional(ctx, &pos, parse_extended_expr);
+ if (!item) break;
+ items = new(ast_list_t, .ast=item, .next=items);
+ whitespace(&pos);
+ if (!match(&pos, ",")) break;
+ }
+ whitespace(&pos);
+ expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this array");
+
+ if (!item_type && !items)
+ parser_err(ctx, start, pos, "Empty arrays must specify what type they would contain (e.g. [:Int])");
+
+ REVERSE_LIST(items);
+ return NewAST(ctx->file, start, pos, Array, .type=item_type, .items=items);
+}
+
+PARSER(parse_table) {
+ const char *start = pos;
+ if (!match(&pos, "{")) return NULL;
+
+ whitespace(&pos);
+
+ ast_list_t *entries = NULL;
+ type_ast_t *key_type = NULL, *value_type = NULL;
+ if (match(&pos, ":")) {
+ whitespace(&pos);
+ key_type = expect(ctx, pos-1, &pos, parse_type, "I couldn't parse a key type for this table");
+ whitespace(&pos);
+ if (!match(&pos, "=>"))
+ parser_err(ctx, pos, pos, "I expected an '=>' for this table type");
+ value_type = expect(ctx, pos-1, &pos, parse_type, "I couldn't parse a value type for this table");
+ }
+
+ for (;;) {
+ whitespace(&pos);
+ const char *entry_start = pos;
+ ast_t *key = optional(ctx, &pos, parse_extended_expr);
+ if (!key) break;
+ whitespace(&pos);
+ if (!match(&pos, "=>")) return NULL;
+ ast_t *value = expect(ctx, pos-1, &pos, parse_expr, "I couldn't parse the value for this table entry");
+
+ ast_t *entry = NewAST(ctx->file, entry_start, pos, TableEntry, .key=key, .value=value);
+ for (bool progress = true; progress; ) {
+ ast_t *new_entry;
+ progress = (false
+ || (new_entry=parse_index_suffix(ctx, entry))
+ || (new_entry=parse_field_suffix(ctx, entry))
+ || (new_entry=parse_fncall_suffix(ctx, entry, NORMAL_FUNCTION))
+ );
+ if (progress) entry = new_entry;
+ }
+ pos = entry->end;
+
+ entries = new(ast_list_t, .ast=entry, .next=entries);
+ whitespace(&pos);
+ if (!match(&pos, ",")) break;
+ }
+
+ REVERSE_LIST(entries);
+
+ if (!key_type && !value_type && !entries)
+ return NULL;
+
+ whitespace(&pos);
+
+ ast_t *fallback = NULL, *default_val = NULL;
+ if (match(&pos, ";")) {
+ for (;;) {
+ whitespace(&pos);
+ const char *attr_start = pos;
+ if (match(&pos, "fallback")) {
+ whitespace(&pos);
+ if (!match(&pos, "=")) parser_err(ctx, attr_start, pos, "I expected an '=' after 'fallback'");
+ if (fallback)
+ parser_err(ctx, attr_start, pos, "This table already has a fallback");
+ fallback = expect(ctx, attr_start, &pos, parse_expr, "I expected a fallback table");
+ } else if (match(&pos, "default")) {
+ whitespace(&pos);
+ if (!match(&pos, "=")) parser_err(ctx, attr_start, pos, "I expected an '=' after 'default'");
+ if (default_val)
+ parser_err(ctx, attr_start, pos, "This table already has a default value");
+ default_val = expect(ctx, attr_start, &pos, parse_expr, "I expected a default value for this table");
+ } else {
+ break;
+ }
+ whitespace(&pos);
+ if (!match(&pos, ";")) break;
+ }
+ }
+
+ whitespace(&pos);
+ expect_closing(ctx, &pos, "}", "I wasn't able to parse the rest of this table");
+
+ return NewAST(ctx->file, start, pos, Table, .key_type=key_type, .value_type=value_type, .entries=entries, .fallback=fallback, .default_value=default_val);
+}
+
+ast_t *parse_field_suffix(parse_ctx_t *ctx, ast_t *lhs) {
+ if (!lhs) return NULL;
+ const char *pos = lhs->end;
+ whitespace(&pos);
+ if (!match(&pos, ".")) return NULL;
+ if (*pos == '.') return NULL;
+ whitespace(&pos);
+ bool dollar = match(&pos, "$");
+ const char* field = get_id(&pos);
+ if (!field) return NULL;
+ if (dollar) field = heap_strf("$%s", field);
+ return NewAST(ctx->file, lhs->start, pos, FieldAccess, .fielded=lhs, .field=field);
+}
+
+PARSER(parse_reduction) {
+ const char *start = pos;
+ if (!match(&pos, "(")) return NULL;
+
+ spaces(&pos);
+ const char *combo_start = pos;
+ binop_e op = match_binary_operator(&pos);
+ if (op == BINOP_UNKNOWN) return NULL;
+
+ ast_t *combination;
+ ast_t *lhs = NewAST(ctx->file, pos, pos, Var, .var.name="lhs.0");
+ ast_t *rhs = NewAST(ctx->file, pos, pos, Var, .var.name="rhs.0");
+ if (op == BINOP_MIN || op == BINOP_MAX) {
+ for (bool progress = true; progress; ) {
+ ast_t *new_term;
+ progress = (false
+ || (new_term=parse_index_suffix(ctx, rhs))
+ || (new_term=parse_field_suffix(ctx, rhs))
+ || (new_term=parse_fncall_suffix(ctx, rhs, NORMAL_FUNCTION))
+ );
+ if (progress) rhs = new_term;
+ }
+ if (rhs->tag == Var) rhs = NULL;
+ else pos = rhs->end;
+ combination = op == BINOP_MIN ?
+ NewAST(ctx->file, combo_start, pos, Min, .lhs=lhs, .rhs=lhs, .key=rhs)
+ : NewAST(ctx->file, combo_start, pos, Max, .lhs=lhs, .rhs=lhs, .key=rhs);
+ } else {
+ combination = NewAST(ctx->file, combo_start, pos, BinaryOp, .op=op, .lhs=lhs, .rhs=rhs);
+ }
+
+ spaces(&pos);
+ if (!match(&pos, ")")) return NULL;
+
+ ast_t *iter = optional(ctx, &pos, parse_extended_expr);
+ if (!iter) return NULL;
+
+ ast_t *fallback = NULL;
+ if (match_word(&pos, "else"))
+ fallback = expect(ctx, pos-4, &pos, parse_expr, "I couldn't parse the expression after this 'else'");
+
+ return NewAST(ctx->file, start, pos, Reduction, .iter=iter, .combination=combination, .fallback=fallback);
+}
+
+ast_t *parse_index_suffix(parse_ctx_t *ctx, ast_t *lhs) {
+ if (!lhs) return NULL;
+ const char *start = lhs->start;
+ const char *pos = lhs->end;
+ if (!match(&pos, "[")) return NULL;
+ whitespace(&pos);
+ ast_t *index = NULL;
+ if (match(&pos, ".")) {
+ // array[.field]
+ const char *field_start = pos-1;
+ const char *field = get_id(&pos);
+ if (field)
+ index = NewAST(ctx->file, field_start, pos, FieldAccess, .field=field);
+ else
+ --pos;
+ }
+
+ if (!index) {
+ // obj[expr]
+ index = optional(ctx, &pos, parse_extended_expr);
+ }
+ whitespace(&pos);
+ bool unchecked = match(&pos, ";") && (spaces(&pos), match_word(&pos, "unchecked") != 0);
+ expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this index");
+ return NewAST(ctx->file, start, pos, Index, .indexed=lhs, .index=index, .unchecked=unchecked);
+}
+
+PARSER(parse_if) {
+ // if <condition> [then] <body> [else <body>]
+ const char *start = pos;
+ int64_t starting_indent = sss_get_indent(ctx->file, pos);
+
+ if (!match_word(&pos, "if"))
+ return NULL;
+
+ ast_t *condition = optional(ctx, &pos, parse_declaration);
+ if (!condition) condition = expect(ctx, start, &pos, parse_expr,
+ "I expected to find an expression for this 'if'");
+
+ match_word(&pos, "then"); // optional
+
+ ast_t *body = expect(ctx, start, &pos, parse_opt_indented_block, "I expected a body for this 'if' statement");
+
+ const char *tmp = pos;
+ whitespace(&tmp);
+ ast_t *else_body = NULL;
+ if (sss_get_indent(ctx->file, tmp) == starting_indent && match_word(&tmp, "else")) {
+ pos = tmp;
+ else_body = expect(ctx, start, &pos, parse_opt_indented_block, "I expected a body for this 'else'");
+ }
+ return NewAST(ctx->file, start, pos, If, .condition=condition, .body=body, .else_body=else_body);
+}
+
+PARSER(parse_for) {
+ // for [k,] v in iter [<indent>] body
+ const char *start = pos;
+ if (!match_word(&pos, "for")) return NULL;
+ ast_t *index = expect(ctx, start, &pos, parse_var, "I expected an iteration variable for this 'for'");
+ spaces(&pos);
+ ast_t *value = NULL;
+ if (match(&pos, ",")) {
+ value = expect(ctx, pos-1, &pos, parse_var, "I expected a variable after this comma");
+ }
+ expect_str(ctx, start, &pos, "in", "I expected an 'in' for this 'for'");
+ ast_t *iter = expect(ctx, start, &pos, parse_expr, "I expected an iterable value for this 'for'");
+ match(&pos, "do"); // optional
+ ast_t *body = expect(ctx, start, &pos, parse_opt_indented_block, "I expected a body for this 'for'");
+ return NewAST(ctx->file, start, pos, For, .index=value ? index : NULL, .value=value ? value : index, .iter=iter, .body=body);
+}
+
+PARSER(parse_while) {
+ // while condition [do] [<indent>] body
+ const char *start = pos;
+ if (!match_word(&pos, "while")) return NULL;
+ ast_t *condition = expect(ctx, start, &pos, parse_expr, "I don't see a viable condition for this 'while'");
+ match(&pos, "do"); // optional
+ ast_t *body = expect(ctx, start, &pos, parse_opt_indented_block, "I expected a body for this 'while'");
+ const char *tmp = pos;
+ whitespace(&tmp);
+ return NewAST(ctx->file, start, pos, While, .condition=condition, .body=body);
+}
+
+PARSER(parse_heap_alloc) {
+ const char *start = pos;
+ if (!match(&pos, "@")) return NULL;
+ spaces(&pos);
+ ast_t *val = expect(ctx, start, &pos, parse_expr, "I expected an expression for this '@'");
+ return NewAST(ctx->file, start, pos, UnaryOp, .op=UNOP_HEAP_ALLOCATE, .value=val);
+}
+
+PARSER(parse_stack_reference) {
+ const char *start = pos;
+ if (!match(&pos, "&")) return NULL;
+ spaces(&pos);
+ ast_t *val = expect(ctx, start, &pos, parse_expr, "I expected an expression for this '&'");
+ return NewAST(ctx->file, start, pos, UnaryOp, .op=UNOP_STACK_REFERENCE, .value=val);
+}
+
+PARSER(parse_not) {
+ const char *start = pos;
+ if (!match_word(&pos, "not")) return NULL;
+ spaces(&pos);
+ ast_t *val = expect(ctx, start, &pos, parse_expr, "I expected an expression for this 'not'");
+ return NewAST(ctx->file, start, pos, UnaryOp, .op=UNOP_NOT, .value=val);
+}
+
+PARSER(parse_negative) {
+ const char *start = pos;
+ if (!match(&pos, "-")) return NULL;
+ spaces(&pos);
+ ast_t *val = expect(ctx, start, &pos, parse_term, "I expected an expression for this '-'");
+ return NewAST(ctx->file, start, pos, UnaryOp, .op=UNOP_NEGATIVE, .value=val);
+}
+
+PARSER(parse_bool) {
+ const char *start = pos;
+ if (match_word(&pos, "yes"))
+ return NewAST(ctx->file, start, pos, Bool, .b=true);
+ else if (match_word(&pos, "no"))
+ return NewAST(ctx->file, start, pos, Bool, .b=false);
+ else
+ return NULL;
+}
+
+PARSER(parse_char) {
+ const char *start = pos;
+ if (*pos == '`') {
+ ++pos;
+ char c = *pos;
+ ++pos;
+ return NewAST(ctx->file, start, pos, Char, .c=c);
+ } else if (*pos == '\\') {
+ char c = unescape(&pos)[0];
+ return NewAST(ctx->file, start, pos, Char, .c=c);
+ } else {
+ return NULL;
+ }
+}
+
+PARSER(parse_interpolation) {
+ const char *start = pos;
+ ++pos; // ignore the initial character, typically a '$', but might be other stuff like '@' in different contexts
+ bool labelled = match(&pos, ":");
+ ast_t *value = optional(ctx, &pos, parse_parens);
+ if (!value) value = optional(ctx, &pos, parse_term);
+ if (!value) {
+ match_group(&pos, '(');
+ parser_err(ctx, start, pos, "This interpolation didn't parse");
+ }
+ return NewAST(ctx->file, start, pos, Interp, .value=value, .labelled=labelled);
+}
+
+PARSER(parse_string) {
+ static const char closing[128] = {['(']=')', ['[']=']', ['<']='>', ['{']='}'};
+ static const bool escapes[128] = {['\'']='\x1B', ['(']='\x1B', ['>']='\x1B', ['/']='\x1B'};
+ static const char interps[128] = {['>']='@', ['/']='@', ['\'']='\x1A', ['(']='\x1A'};
+
+ const char *string_start = pos;
+ char open, close;
+ if (match(&pos, "$")) {
+ open = *pos;
+ close = closing[(int)open] ? closing[(int)open] : open;
+ ++pos;
+ } else {
+ if (*pos != '\'' && *pos != '"')
+ return NULL;
+ open = *pos;
+ close = *pos;
+ ++pos;
+ }
+
+ char interp_char = interps[(int)open] ? interps[(int)open] : '$';
+ char escape_char = escapes[(int)open] ? escapes[(int)open] : '\\';
+
+ if (open == ':' || open == '>')
+ spaces(&pos);
+
+ ast_list_t *chunks = NULL;
+ if (*pos == '\r' || *pos == '\n') { // Multiline string
+ char special[] = {'\n','\r',interp_char,escape_char,'\0'};
+ int64_t starting_indent = sss_get_indent(ctx->file, pos);
+ // indentation-delimited string
+ match(&pos, "\r");
+ match(&pos, "\n");
+ int64_t first_line = sss_get_line_number(ctx->file, pos);
+ int64_t indented = sss_get_indent(ctx->file, pos);
+ pos = sss_get_line(ctx->file, first_line);
+ while (pos < ctx->file->text + ctx->file->len) {
+ const char *eol = strchrnul(pos, '\n');
+ if (eol == pos + strspn(pos, " \t\r")) { // Empty line
+ ast_t *ast = NewAST(ctx->file, pos, eol, StringLiteral, .str="\n");
+ chunks = new(ast_list_t, .ast=ast, .next=chunks);
+ pos = eol + 1;
+ continue;
+ }
+ if (!match_indentation(&pos, starting_indent))
+ parser_err(ctx, pos, strchrnul(pos, '\n'), "This isn't a valid indentation level for this unterminated string");
+
+ if (*pos == close) {
+ ++pos;
+ goto finished;
+ }
+
+ if (!match_indentation(&pos, (indented - starting_indent)))
+ parser_err(ctx, pos, strchrnul(pos, '\n'), "I was expecting this to have %lu extra indentation beyond %lu",
+ (indented - starting_indent), starting_indent);
+
+ while (pos < eol+1) {
+ size_t len = strcspn(pos, special);
+ if (pos[len] == '\r') ++len;
+ if (pos[len] == '\n') ++len;
+
+ if (len > 0) {
+ ast_t *chunk = NewAST(ctx->file, pos, pos+len-1, StringLiteral, .str=heap_strn(pos, len));
+ chunks = new(ast_list_t, .ast=chunk, .next=chunks);
+ }
+
+ pos += len;
+
+ if (*pos == escape_char) {
+ const char *start = pos;
+ const char* unescaped = unescape(&pos);
+ ast_t *chunk = NewAST(ctx->file, start, pos, StringLiteral, .str=unescaped);
+ chunks = new(ast_list_t, .ast=chunk, .next=chunks);
+ ++pos;
+ } else if (*pos == interp_char) {
+ ast_t *chunk = parse_interpolation(ctx, pos);
+ chunks = new(ast_list_t, .ast=chunk, .next=chunks);
+ pos = chunk->end;
+ }
+ }
+ }
+ finished:;
+ // Strip trailing newline:
+ if (chunks) {
+ ast_t *last_chunk = chunks->ast;
+ if (last_chunk->tag == StringLiteral) {
+ auto str = Match(last_chunk, StringLiteral);
+ const char* trimmed = heap_strn(str->str, strlen(str->str)-1);
+ chunks->ast = NewAST(ctx->file, last_chunk->start, last_chunk->end-1, StringLiteral, .str=trimmed);
+ }
+ }
+ } else { // Inline string
+ char special[] = {'\n','\r',open,close,interp_char,escape_char,'\0'};
+ int depth = 1;
+ while (depth > 0 && *pos) {
+ size_t len = strcspn(pos, special);
+ if (len > 0) {
+ ast_t *chunk = NewAST(ctx->file, pos, pos+len-1, StringLiteral, .str=heap_strn(pos, len));
+ chunks = new(ast_list_t, .ast=chunk, .next=chunks);
+ pos += len;
+ }
+
+ if (*pos == interp_char) {
+ ast_t *chunk = parse_interpolation(ctx, pos);
+ chunks = new(ast_list_t, .ast=chunk, .next=chunks);
+ pos = chunk->end;
+ } else if (*pos == escape_char) {
+ const char *start = pos;
+ const char* unescaped = unescape(&pos);
+ ast_t *chunk = NewAST(ctx->file, start, pos, StringLiteral, .str=unescaped);
+ chunks = new(ast_list_t, .ast=chunk, .next=chunks);
+ } else if (*pos == '\r' || *pos == '\n') {
+ if (open == ' ' || open == ':' || open == '>') goto string_finished;
+ parser_err(ctx, string_start, pos, "This line ended without closing the string");
+ } else if (*pos == close) { // if open == close, then don't do nesting (i.e. check 'close' first)
+ --depth;
+ if (depth > 0) {
+ ast_t *chunk = NewAST(ctx->file, pos, pos+1, StringLiteral, .str=heap_strn(pos, 1));
+ chunks = new(ast_list_t, .ast=chunk, .next=chunks);
+ }
+ ++pos;
+ } else if (*pos == open) {
+ ++depth;
+ ast_t *chunk = NewAST(ctx->file, pos, pos+1, StringLiteral, .str=heap_strn(pos, 1));
+ chunks = new(ast_list_t, .ast=chunk, .next=chunks);
+ ++pos;
+ } else {
+ ast_t *chunk = NewAST(ctx->file, pos, pos+1, StringLiteral, .str=heap_strn(pos, 1));
+ ++pos;
+ chunks = new(ast_list_t, .ast=chunk, .next=chunks);
+ }
+ }
+ }
+ string_finished:;
+ REVERSE_LIST(chunks);
+ return NewAST(ctx->file, string_start, pos, StringJoin, .children=chunks);
+}
+
+PARSER(parse_skip) {
+ const char *start = pos;
+ if (!match_word(&pos, "skip")) return NULL;
+ spaces(&pos);
+ const char* target;
+ if (match_word(&pos, "for")) target = "for";
+ else if (match_word(&pos, "while")) target = "while";
+ else target = get_id(&pos);
+ ast_t *skip = NewAST(ctx->file, start, pos, Skip, .target=target);
+ return skip;
+}
+
+PARSER(parse_stop) {
+ const char *start = pos;
+ if (!match_word(&pos, "stop")) return NULL;
+ spaces(&pos);
+ const char* target;
+ if (match_word(&pos, "for")) target = "for";
+ else if (match_word(&pos, "while")) target = "while";
+ else target = get_id(&pos);
+ ast_t *stop = NewAST(ctx->file, start, pos, Stop, .target=target);
+ return stop;
+}
+
+PARSER(parse_return) {
+ const char *start = pos;
+ if (!match_word(&pos, "return")) return NULL;
+ spaces(&pos);
+ ast_t *value = optional(ctx, &pos, parse_expr);
+ ast_t *ret = NewAST(ctx->file, start, pos, Return, .value=value);
+ return ret;
+}
+
+PARSER(parse_lambda) {
+ const char *start = pos;
+ if (!match_word(&pos, "func"))
+ return NULL;
+ spaces(&pos);
+ if (!match(&pos, "("))
+ return NULL;
+ arg_list_t *args = parse_args(ctx, &pos, false);
+ spaces(&pos);
+ expect_closing(ctx, &pos, ")", "I was expecting a ')' to finish this anonymous function's arguments");
+ ast_t *body = optional(ctx, &pos, parse_opt_indented_block);
+ return NewAST(ctx->file, start, pos, Lambda, .args=args, .body=body);
+}
+
+PARSER(parse_nil) {
+ const char *start = pos;
+ if (!match(&pos, "!")) return NULL;
+ type_ast_t *type = parse_type(ctx, pos);
+ if (!type) return NULL;
+ return NewAST(ctx->file, start, type->end, Nil, .type=type);
+}
+
+PARSER(parse_var) {
+ const char *start = pos;
+ const char* name = get_id(&pos);
+ if (!name) return NULL;
+ return NewAST(ctx->file, start, pos, Var, .var.name=name);
+}
+
+PARSER(parse_term_no_suffix) {
+ spaces(&pos);
+ ast_t *term = NULL;
+ (void)(
+ false
+ || (term=parse_nil(ctx, pos))
+ || (term=parse_num(ctx, pos))
+ || (term=parse_int(ctx, pos))
+ || (term=parse_negative(ctx, pos))
+ || (term=parse_heap_alloc(ctx, pos))
+ || (term=parse_stack_reference(ctx, pos))
+ || (term=parse_bool(ctx, pos))
+ || (term=parse_char(ctx, pos))
+ || (term=parse_string(ctx, pos))
+ || (term=parse_lambda(ctx, pos))
+ || (term=parse_parens(ctx, pos))
+ || (term=parse_table(ctx, pos))
+ || (term=parse_var(ctx, pos))
+ || (term=parse_array(ctx, pos))
+ || (term=parse_reduction(ctx, pos))
+ || (term=parse_skip(ctx, pos))
+ || (term=parse_stop(ctx, pos))
+ || (term=parse_return(ctx, pos))
+ || (term=parse_not(ctx, pos))
+ || (term=parse_extern(ctx, pos))
+ );
+ return term;
+}
+
+PARSER(parse_term) {
+ ast_t *term = parse_term_no_suffix(ctx, pos);
+ if (!term) return NULL;
+
+ for (bool progress = true; progress; ) {
+ ast_t *new_term;
+ progress = (false
+ || (new_term=parse_index_suffix(ctx, term))
+ || (new_term=parse_field_suffix(ctx, term))
+ || (new_term=parse_fncall_suffix(ctx, term, NORMAL_FUNCTION))
+ );
+ if (progress) term = new_term;
+ }
+ return term;
+}
+
+ast_t *parse_fncall_suffix(parse_ctx_t *ctx, ast_t *fn, bool is_extern) {
+ if (!fn) return NULL;
+
+ const char *start = fn->start;
+ const char *pos = fn->end;
+
+ if (!match(&pos, "(")) return NULL;
+
+ whitespace(&pos);
+
+ ast_list_t *args = NULL;
+ for (;;) {
+ const char *arg_start = pos;
+ const char *name = get_id(&pos);
+ whitespace(&pos);
+ if (name) {
+ if (match(&pos, "=")) {
+ whitespace(&pos);
+ ast_t *arg = parse_expr(ctx, pos);
+ if (!arg) parser_err(ctx, arg_start, pos, "I couldn't parse this keyword argument value");
+ ast_t *kwarg = NewAST(ctx->file, arg_start, arg->end, KeywordArg,
+ .name=name, .arg=arg);
+ args = new(ast_list_t, .ast=kwarg, .next=args);
+ pos = kwarg->end;
+ goto got_arg;
+ }
+ pos = arg_start;
+ }
+
+ ast_t *arg = optional(ctx, &pos, parse_expr);
+ if (!arg) break;
+ args = new(ast_list_t, .ast=arg, .next=args);
+
+ got_arg:;
+
+ whitespace(&pos);
+ if (!match(&pos, ","))
+ break;
+ whitespace(&pos);
+ }
+
+ whitespace(&pos);
+
+ if (!match(&pos, ")"))
+ parser_err(ctx, start, pos, "This parenthesis is unclosed");
+
+ type_ast_t *extern_return_type = NULL;
+ if (is_extern) {
+ if (match(&pos, ":"))
+ extern_return_type = expect(ctx, start, &pos, parse_type, "I couldn't parse the return type of this external function call");
+ else
+ extern_return_type = NewTypeAST(ctx->file, pos, pos, TypeVar, .var.name="Void");
+ }
+ REVERSE_LIST(args);
+ return NewAST(ctx->file, start, pos, FunctionCall, .fn=fn, .args=args, .extern_return_type=extern_return_type);
+}
+
+binop_e match_binary_operator(const char **pos)
+{
+ switch (**pos) {
+ case '+': {
+ *pos += 1;
+ return match(pos, "+") ? BINOP_CONCAT : BINOP_PLUS;
+ }
+ case '-': {
+ *pos += 1;
+ if ((*pos)[0] != ' ' && (*pos)[-2] == ' ') // looks like `fn -5`
+ return BINOP_UNKNOWN;
+ return BINOP_MINUS;
+ }
+ case '*': *pos += 1; return BINOP_MULT;
+ case '/': *pos += 1; return BINOP_DIVIDE;
+ case '^': *pos += 1; return BINOP_POWER;
+ case '<': *pos += 1; return match(pos, "=") ? BINOP_LE : (match(pos, "<") ? BINOP_LSHIFT : BINOP_LT);
+ case '>': *pos += 1; return match(pos, "=") ? BINOP_GE : (match(pos, ">") ? BINOP_RSHIFT : BINOP_GT);
+ default: {
+ if (match(pos, "!=")) return BINOP_NE;
+ else if (match(pos, "==") && **pos != '=') return BINOP_EQ;
+ else if (match_word(pos, "and")) return BINOP_AND;
+ else if (match_word(pos, "or")) return BINOP_OR;
+ else if (match_word(pos, "xor")) return BINOP_XOR;
+ else if (match_word(pos, "mod1")) return BINOP_MOD1;
+ else if (match_word(pos, "mod")) return BINOP_MOD;
+ else if (match_word(pos, "_min_")) return BINOP_MIN;
+ else if (match_word(pos, "_max_")) return BINOP_MAX;
+ else return BINOP_UNKNOWN;
+ }
+ }
+}
+
+static ast_t *parse_infix_expr(parse_ctx_t *ctx, const char *pos, int min_tightness) {
+ ast_t *lhs = optional(ctx, &pos, parse_term);
+ if (!lhs) return NULL;
+
+ spaces(&pos);
+ binop_e op = match_binary_operator(&pos);
+ if (op == BINOP_UNKNOWN || op_tightness[op] < min_tightness)
+ return lhs;
+
+ ast_t *key = NULL;
+ if (op == BINOP_MIN || op == BINOP_MAX) {
+ key = NewAST(ctx->file, pos, pos, Var, .var.name=op == BINOP_MIN ? "_min_" : "_max_");
+ for (bool progress = true; progress; ) {
+ ast_t *new_term;
+ progress = (false
+ || (new_term=parse_index_suffix(ctx, key))
+ || (new_term=parse_field_suffix(ctx, key))
+ || (new_term=parse_fncall_suffix(ctx, key, NORMAL_FUNCTION))
+ );
+ if (progress) key = new_term;
+ }
+ if (key->tag == Var) key = NULL;
+ else pos = key->end;
+
+ }
+ spaces(&pos);
+ ast_t *rhs = parse_infix_expr(ctx, pos, op_tightness[op]);
+ if (!rhs) return lhs;
+ pos = rhs->end;
+
+ switch (op) {
+ case BINOP_MIN:
+ return NewAST(ctx->file, lhs->start, rhs->end, Min, .lhs=lhs, .rhs=rhs, .key=key);
+ case BINOP_MAX:
+ return NewAST(ctx->file, lhs->start, rhs->end, Max, .lhs=lhs, .rhs=rhs, .key=key);
+ default:
+ return NewAST(ctx->file, lhs->start, rhs->end, BinaryOp, .lhs=lhs, .rhs=rhs, .op=op);
+ }
+}
+
+ast_t *parse_expr(parse_ctx_t *ctx, const char *pos) {
+ return parse_infix_expr(ctx, pos, 0);
+}
+
+PARSER(parse_declaration) {
+ const char *start = pos;
+ ast_t *var = parse_var(ctx, pos);
+ if (!var) return NULL;
+ pos = var->end;
+ spaces(&pos);
+ if (!match(&pos, ":=")) return NULL;
+ spaces(&pos);
+ ast_t *val = optional(ctx, &pos, parse_use);
+ if (!val) val = optional(ctx, &pos, parse_extended_expr);
+ if (!val) parser_err(ctx, pos, strchrnul(pos, '\n'), "This declaration value didn't parse");
+ return NewAST(ctx->file, start, pos, Declare, .var=var, .value=val);
+}
+
+PARSER(parse_update) {
+ const char *start = pos;
+ ast_t *lhs = optional(ctx, &pos, parse_expr);
+ if (!lhs) return NULL;
+ spaces(&pos);
+ binop_e op;
+ if (match(&pos, "+=")) op = BINOP_PLUS;
+ else if (match(&pos, "++=")) op = BINOP_CONCAT;
+ else if (match(&pos, "-=")) op = BINOP_MINUS;
+ else if (match(&pos, "*=")) op = BINOP_MULT;
+ else if (match(&pos, "/=")) op = BINOP_DIVIDE;
+ else if (match(&pos, "and=")) op = BINOP_AND;
+ else if (match(&pos, "or=")) op = BINOP_OR;
+ else if (match(&pos, "xor=")) op = BINOP_XOR;
+ else return NULL;
+ ast_t *rhs = expect(ctx, start, &pos, parse_extended_expr, "I expected an expression here");
+ return NewAST(ctx->file, start, pos, UpdateAssign, .lhs=lhs, .rhs=rhs, .op=op);
+}
+
+PARSER(parse_assignment) {
+ const char *start = pos;
+ ast_list_t *targets = NULL;
+ for (;;) {
+ ast_t *lhs = optional(ctx, &pos, parse_term);
+ if (!lhs) break;
+ targets = new(ast_list_t, .ast=lhs, .next=targets);
+ spaces(&pos);
+ if (!match(&pos, ",")) break;
+ whitespace(&pos);
+ }
+
+ if (!targets) return NULL;
+
+ spaces(&pos);
+ if (!match(&pos, "=")) return NULL;
+ if (match(&pos, "=")) return NULL; // == comparison
+
+ ast_list_t *values = NULL;
+ for (;;) {
+ ast_t *rhs = optional(ctx, &pos, parse_extended_expr);
+ if (!rhs) break;
+ values = new(ast_list_t, .ast=rhs, .next=values);
+ spaces(&pos);
+ if (!match(&pos, ",")) break;
+ whitespace(&pos);
+ }
+
+ REVERSE_LIST(targets);
+ REVERSE_LIST(values);
+
+ return NewAST(ctx->file, start, pos, Assign, .targets=targets, .values=values);
+}
+
+PARSER(parse_statement) {
+ ast_t *stmt = NULL;
+ if ((stmt=parse_declaration(ctx, pos))
+ || (stmt=parse_doctest(ctx, pos))
+ || (stmt=parse_func_def(ctx, pos))
+ || (stmt=parse_use(ctx,pos)))
+ return stmt;
+
+ if (!(false
+ || (stmt=parse_update(ctx, pos))
+ || (stmt=parse_assignment(ctx, pos))
+ ))
+ stmt = parse_extended_expr(ctx, pos);
+
+ for (bool progress = (stmt != NULL); progress; ) {
+ ast_t *new_stmt;
+ progress = false;
+ if (stmt->tag == Var)
+ progress = (new_stmt=parse_fncall_suffix(ctx, stmt, NORMAL_FUNCTION));
+
+ if (progress) stmt = new_stmt;
+ }
+ return stmt;
+
+}
+
+PARSER(parse_extended_expr) {
+ ast_t *expr = NULL;
+
+ if (false
+ || (expr=optional(ctx, &pos, parse_for))
+ || (expr=optional(ctx, &pos, parse_while))
+ || (expr=optional(ctx, &pos, parse_if))
+ )
+ return expr;
+
+ return parse_expr(ctx, pos);
+}
+
+PARSER(parse_block) {
+ int64_t block_indent = sss_get_indent(ctx->file, pos);
+ const char *start = pos;
+ whitespace(&pos);
+ ast_list_t *statements = NULL;
+ while (*pos) {
+ ast_t *stmt = optional(ctx, &pos, parse_statement);
+ if (!stmt) {
+ spaces(&pos);
+ if (*pos && *pos != '\r' && *pos != '\n')
+ parser_err(ctx, pos, strchrnul(pos, '\n'), "I couldn't parse this line");
+ break;
+ }
+ statements = new(ast_list_t, .ast=stmt, .next=statements);
+ whitespace(&pos);
+ if (sss_get_indent(ctx->file, pos) != block_indent) {
+ pos = stmt->end; // backtrack
+ break;
+ }
+ }
+ REVERSE_LIST(statements);
+ return NewAST(ctx->file, start, pos, Block, .statements=statements);
+}
+
+PARSER(parse_opt_indented_block) {
+ return indent(ctx, &pos) ? parse_block(ctx, pos) : parse_inline_block(ctx, pos);
+}
+
+PARSER(parse_namespace) {
+ const char *start = pos;
+ whitespace(&pos);
+ int64_t indent = sss_get_indent(ctx->file, pos);
+ ast_list_t *statements = NULL;
+ for (;;) {
+ const char *next = pos;
+ whitespace(&next);
+ if (sss_get_indent(ctx->file, next) != indent) break;
+ ast_t *stmt;
+ if ((stmt=optional(ctx, &pos, parse_type_def))
+ ||(stmt=optional(ctx, &pos, parse_linker))
+ ||(stmt=optional(ctx, &pos, parse_statement)))
+ {
+ statements = new(ast_list_t, .ast=stmt, .next=statements);
+ pos = stmt->end;
+ whitespace(&pos);
+ } else {
+ if (sss_get_indent(ctx->file, next) > indent && next < strchrnul(next, '\n'))
+ parser_err(ctx, next, strchrnul(next, '\n'), "I couldn't parse this namespace statement");
+ break;
+ }
+ }
+ REVERSE_LIST(statements);
+ return NewAST(ctx->file, start, pos, Block, .statements=statements);
+}
+
+PARSER(parse_type_def) {
+ // type Foo := Type... \n body...
+ const char *start = pos;
+ if (!match_word(&pos, "type")) return NULL;
+
+ int64_t starting_indent = sss_get_indent(ctx->file, pos);
+
+ const char *name = get_id(&pos);
+ if (!name) return NULL;
+ spaces(&pos);
+
+ if (!match(&pos, ":=")) return NULL;
+ type_ast_t *type_ast = expect(ctx, start, &pos, parse_type, "I expected a type after this ':='");
+
+ const char *ns_pos = pos;
+ whitespace(&ns_pos);
+ int64_t ns_indent = sss_get_indent(ctx->file, ns_pos);
+ ast_t *namespace = NULL;
+ if (ns_indent > starting_indent) {
+ pos = ns_pos;
+ namespace = optional(ctx, &pos, parse_namespace);
+ }
+ if (!namespace)
+ namespace = NewAST(ctx->file, pos, pos, Block, .statements=NULL);
+ return NewAST(ctx->file, start, pos, TypeDef, .var.name=name, .type=type_ast, .namespace=namespace);
+}
+
+type_ast_t *parse_enum_type(parse_ctx_t *ctx, const char *pos) {
+ // tagged union: enum Foo := a|b(x:Int,y:Int)=5|...
+ const char *start = pos;
+
+ if (!match_word(&pos, "enum")) return NULL;
+ spaces(&pos);
+ if (!match(&pos, "(")) return NULL;
+
+ tag_t *tags = NULL;
+ int64_t next_value = 0;
+
+ whitespace(&pos);
+ for (;;) {
+ const char *tag_start = pos;
+
+ spaces(&pos);
+ const char *tag_name = get_id(&pos);
+ if (!tag_name) break;
+
+ spaces(&pos);
+ arg_list_t *fields;
+ if (match(&pos, "(")) {
+ whitespace(&pos);
+ fields = parse_args(ctx, &pos, false);
+ whitespace(&pos);
+ expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this tagged union member");
+ } else {
+ fields = NULL;
+ }
+
+ spaces(&pos);
+ if (match(&pos, "=")) {
+ ast_t *val = expect(ctx, tag_start, &pos, parse_int, "I expected an integer literal after this '='");
+ next_value = Match(val, Int)->i;
+ }
+
+ // Check for duplicate values:
+ for (tag_t *t = tags; t; t = t->next) {
+ if (t->value == next_value)
+ parser_err(ctx, tag_start, pos, "This tag value (%ld) is a duplicate of an earlier tag value", next_value);
+ }
+
+ type_ast_t *type = NewTypeAST(ctx->file, tag_start, pos, TypeStruct, .fields=fields);
+ tags = new(tag_t, .name=tag_name, .value=next_value, .type=type, .next=tags);
+
+ const char *next_pos = pos;
+ whitespace(&next_pos);
+ if (!match(&next_pos, "|"))
+ break;
+ whitespace(&next_pos);
+ pos = next_pos;
+ ++next_value;
+ }
+
+ whitespace(&pos);
+ expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this enum definition");
+
+ REVERSE_LIST(tags);
+
+ return NewTypeAST(ctx->file, start, pos, TypeTaggedUnion, .tags=tags);
+}
+
+arg_list_t *parse_args(parse_ctx_t *ctx, const char **pos, bool allow_unnamed)
+{
+ arg_list_t *args = NULL;
+ for (;;) {
+ const char *batch_start = *pos;
+ ast_t *default_val = NULL;
+ type_ast_t *type = NULL;
+
+ typedef struct var_list_s {
+ var_t *var;
+ struct var_list_s *next;
+ } var_list_t;
+
+ var_list_t *vars = NULL;
+ for (;;) {
+ whitespace(pos);
+ const char *name_start = *pos;
+ const char *name = get_id(pos);
+ if (!name) break;
+ whitespace(pos);
+ if (strncmp(*pos, "==", 2) != 0 && match(pos, "=")) {
+ default_val = expect(ctx, *pos-1, pos, parse_term, "I expected a value after this '='");
+ vars = new(var_list_t, .var=new(var_t, .name=name), .next=vars);
+ break;
+ } else if (match(pos, ":")) {
+ type = expect(ctx, *pos-1, pos, parse_type, "I expected a type here");
+ vars = new(var_list_t, .var=new(var_t, .name=name), .next=vars);
+ break;
+ } else if (allow_unnamed) {
+ *pos = name_start;
+ type = optional(ctx, pos, parse_type);
+ if (type)
+ vars = new(var_list_t, .var=NULL, .next=vars);
+ break;
+ } else if (name) {
+ vars = new(var_list_t, .var=new(var_t, .name=name), .next=vars);
+ spaces(pos);
+ if (!match(pos, ",")) break;
+ } else {
+ break;
+ }
+ }
+ if (!vars) break;
+ if (!default_val && !type)
+ parser_err(ctx, batch_start, *pos, "I expected a ':' and type, or '=' and a default value after this parameter (%s)",
+ vars->var->name);
+
+ REVERSE_LIST(vars);
+ for (; vars; vars = vars->next)
+ args = new(arg_list_t, .var=vars->var, .type=type, .default_val=default_val);
+ whitespace(pos);
+ match(pos, ",");
+ }
+
+ REVERSE_LIST(args);
+ return args;
+}
+
+PARSER(parse_func_def) {
+ const char *start = pos;
+ if (!match_word(&pos, "func")) return NULL;
+
+ ast_t *name = optional(ctx, &pos, parse_var);
+ if (!name) return NULL;
+
+ spaces(&pos);
+
+ if (!match(&pos, "(")) return NULL;
+
+ arg_list_t *args = parse_args(ctx, &pos, false);
+ whitespace(&pos);
+ bool is_inline = false;
+ ast_t *cache_ast = NULL;
+ for (; whitespace(&pos), (match(&pos, ";") || match(&pos, ",")); ) {
+ const char *flag_start = pos;
+ if (match_word(&pos, "inline")) {
+ is_inline = true;
+ } else if (match_word(&pos, "cached")) {
+ if (!cache_ast) cache_ast = NewAST(ctx->file, pos, pos, Int, .i=INT64_MAX, .precision=64);
+ } else if (match_word(&pos, "cache_size")) {
+ if (whitespace(&pos), !match(&pos, "="))
+ parser_err(ctx, flag_start, pos, "I expected a value for 'cache_size'");
+ whitespace(&pos);
+ cache_ast = expect(ctx, start, &pos, parse_expr, "I expected a maximum size for the cache");
+ }
+ }
+ expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this function definition");
+
+ type_ast_t *ret_type = NULL;
+ spaces(&pos);
+ if (match(&pos, "->") || match(&pos, ":"))
+ ret_type = optional(ctx, &pos, parse_type);
+
+ ast_t *body = expect(ctx, start, &pos, parse_opt_indented_block,
+ "This function needs a body block");
+ return NewAST(ctx->file, start, pos, FunctionDef,
+ .name=name, .args=args, .ret_type=ret_type, .body=body, .cache=cache_ast,
+ .is_inline=is_inline);
+}
+
+PARSER(parse_extern) {
+ const char *start = pos;
+ if (!match_word(&pos, "extern")) return NULL;
+ spaces(&pos);
+ bool address = (match(&pos, "&") != 0);
+ const char* name = get_id(&pos);
+ spaces(&pos);
+ // extern function call:
+ if (match(&pos, "(")) {
+ return parse_fncall_suffix(ctx, NewAST(ctx->file, start, pos-1, Var, .var.name=name), EXTERN_FUNCTION);
+ }
+ if (!match(&pos, ":"))
+ parser_err(ctx, start, pos, "I couldn't get a type for this extern");
+ type_ast_t *type = expect(ctx, start, &pos, parse_type, "I couldn't parse the type for this extern");
+ return NewAST(ctx->file, start, pos, Extern, .name=name, .type=type, .address=address);
+}
+
+PARSER(parse_doctest) {
+ const char *start = pos;
+ if (!match(&pos, ">>>")) return NULL;
+ spaces(&pos);
+ ast_t *expr = expect(ctx, start, &pos, parse_statement, "I couldn't parse the expression for this doctest");
+ whitespace(&pos);
+ const char* output = NULL;
+ if (match(&pos, "===")) {
+ spaces(&pos);
+ const char *output_start = pos,
+ *output_end = strchrnul(pos, '\n');
+ if (output_end <= output_start)
+ parser_err(ctx, output_start, output_end, "You're missing expected output here");
+ output = heap_strn(output_start, (size_t)(output_end - output_start));
+ pos = output_end;
+ }
+ return NewAST(ctx->file, start, pos, DocTest, .expr=expr, .output=output);
+}
+
+PARSER(parse_use) {
+ const char *start = pos;
+ if (!match_word(&pos, "use")) return NULL;
+ spaces(&pos);
+ size_t path_len = strcspn(pos, " \t\r\n;");
+ if (path_len < 1)
+ parser_err(ctx, start, pos, "There is no filename here to use");
+ char *path = heap_strf("%.*s.sss", (int)path_len, pos);
+ pos += path_len;
+ char *resolved_path = resolve_path(path, ctx->file->filename);
+ if (!resolved_path)
+ parser_err(ctx, start, pos, "No such file exists: \"%s\"", path);
+ while (match(&pos, ";")) continue;
+ return NewAST(ctx->file, start, pos, Use, .path=resolved_path);
+}
+
+PARSER(parse_linker) {
+ const char *start = pos;
+ if (!match_word(&pos, "!link")) return NULL;
+ spaces(&pos);
+ size_t len = strcspn(pos, "\r\n");
+ const char *directive = heap_strn(pos, len);
+ return NewAST(ctx->file, start, pos, LinkerDirective, .directive=directive);
+}
+
+PARSER(parse_inline_block) {
+ spaces(&pos);
+ const char *start = pos;
+ ast_list_t *statements = NULL;
+ while (*pos) {
+ spaces(&pos);
+ ast_t *stmt = optional(ctx, &pos, parse_statement);
+ if (!stmt) break;
+ statements = new(ast_list_t, .ast=stmt, .next=statements);
+ spaces(&pos);
+ if (!match(&pos, ";")) break;
+ }
+ REVERSE_LIST(statements);
+ return NewAST(ctx->file, start, pos, Block, .statements=statements);
+}
+
+ast_t *parse_file(sss_file_t *file, jmp_buf *on_err) {
+ parse_ctx_t ctx = {
+ .file=file,
+ .on_err=on_err,
+ };
+
+ const char *pos = file->text;
+ if (match(&pos, "#!")) // shebang
+ some_not(&pos, "\r\n");
+
+ whitespace(&pos);
+ ast_t *ast = parse_namespace(&ctx, pos);
+ pos = ast->end;
+ whitespace(&pos);
+ if (strlen(pos) > 0) {
+ parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the file");
+ }
+ return ast;
+}
+
+type_ast_t *parse_type_str(const char *str) {
+ sss_file_t *file = sss_spoof_file("<type>", str);
+ parse_ctx_t ctx = {
+ .file=file,
+ .on_err=NULL,
+ };
+
+ const char *pos = file->text;
+ whitespace(&pos);
+ type_ast_t *ast = parse_type(&ctx, pos);
+ if (!ast) return ast;
+ pos = ast->end;
+ whitespace(&pos);
+ if (strlen(pos) > 0) {
+ parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the type");
+ }
+ return ast;
+}
+
+ast_t *parse_expression_str(const char *str) {
+ sss_file_t *file = sss_spoof_file("<expression>", str);
+ parse_ctx_t ctx = {
+ .file=file,
+ .on_err=NULL,
+ };
+
+ const char *pos = file->text;
+ whitespace(&pos);
+ ast_t *ast = parse_extended_expr(&ctx, pos);
+ if (!ast) return ast;
+ pos = ast->end;
+ whitespace(&pos);
+ if (strlen(pos) > 0) {
+ parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the expression");
+ }
+ return ast;
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0