aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile43
-rw-r--r--ast.c179
-rw-r--r--ast.h290
-rw-r--r--compile.c185
-rw-r--r--compile.h11
-rw-r--r--files.c317
-rw-r--r--files.h43
-rw-r--r--foo.c12
-rw-r--r--nextlang.c23
-rw-r--r--parse.c1759
-rw-r--r--parse.h9
-rw-r--r--util.c84
-rw-r--r--util.h29
13 files changed, 2984 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..fb2b6414
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,43 @@
+CC=gcc
+PREFIX=/usr/local
+VERSION=0.12.1
+CCONFIG=-std=c11 -Werror -D_XOPEN_SOURCE=700 -D_POSIX_C_SOURCE=200809L -fPIC -ftrapv -fvisibility=hidden -flto -fno-fat-lto-objects -Wl,-flto
+LDFLAGS=-Wl,-rpath '-Wl,$$ORIGIN'
+# MAKEFLAGS := --jobs=$(shell nproc) --output-sync=target
+CWARN=-Wall -Wextra -Wno-format
+ # -Wpedantic -Wsign-conversion -Wtype-limits -Wunused-result -Wnull-dereference \
+ # -Waggregate-return -Walloc-zero -Walloca -Warith-conversion -Wcast-align -Wcast-align=strict \
+ # -Wdangling-else -Wdate-time -Wdisabled-optimization -Wdouble-promotion -Wduplicated-branches \
+ # -Wduplicated-cond -Wexpansion-to-defined -Wfloat-conversion -Wfloat-equal -Wformat-nonliteral \
+ # -Wformat-security -Wformat-signedness -Wframe-address -Winline -Winvalid-pch -Wjump-misses-init \
+ # -Wlogical-op -Wlong-long -Wmissing-format-attribute -Wmissing-include-dirs -Wmissing-noreturn \
+ # -Wnull-dereference -Woverlength-strings -Wpacked -Wpacked-not-aligned -Wpointer-arith \
+ # -Wredundant-decls -Wshadow -Wshadow=compatible-local -Wshadow=global -Wshadow=local \
+ # -Wsign-conversion -Wstack-protector -Wsuggest-attribute=const -Wswitch-default -Wswitch-enum \
+ # -Wsync-nand -Wtrampolines -Wundef -Wunsuffixed-float-constants -Wunused -Wunused-but-set-variable \
+ # -Wunused-const-variable -Wunused-local-typedefs -Wunused-macros -Wvariadic-macros -Wvector-operation-performance \
+ # -Wvla -Wwrite-strings
+OSFLAGS != case $$(uname -s) in *BSD|Darwin) echo '-D_BSD_SOURCE';; Linux) echo '-D_GNU_SOURCE';; *) echo '-D_DEFAULT_SOURCE';; esac
+EXTRA=
+G=-ggdb
+O=-Og
+CFLAGS=$(CCONFIG) $(EXTRA) $(CWARN) $(G) $(O) $(OSFLAGS)
+LDLIBS=-lgc -lgccjit -lcord -lm -lunistring
+
+all: nextlang
+
+nextlang: nextlang.c parse.o files.o util.o ast.o compile.o
+
+SipHash/halfsiphash.c:
+ git submodule update --init --recursive
+
+tags:
+ ctags **/*.[ch]
+
+clean:
+ rm -f nextlang *.o
+
+%.1: %.1.md
+ pandoc --lua-filter=.pandoc/bold-code.lua -s $< -t man -o $@
+
+.PHONY: all clean install uninstall test
diff --git a/ast.c b/ast.c
new file mode 100644
index 00000000..b62a058c
--- /dev/null
+++ b/ast.c
@@ -0,0 +1,179 @@
+// Some basic operations defined on AST nodes, mainly converting to
+// strings for debugging.
+#include <gc/cord.h>
+#include <stdarg.h>
+#include <printf.h>
+
+#include "ast.h"
+
+static const char *OP_NAMES[] = {
+ [BINOP_UNKNOWN]="unknown",
+ [UNOP_NOT]="not", [UNOP_NEGATIVE]="negative",
+ [BINOP_POWER]="^", [BINOP_MULT]="*", [BINOP_DIVIDE]="/",
+ [BINOP_MOD]="mod", [BINOP_MOD1]="mod1", [BINOP_PLUS]="+", [BINOP_MINUS]="minus",
+ [BINOP_CONCAT]="++", [BINOP_LSHIFT]="<<", [BINOP_RSHIFT]=">>", [BINOP_MIN]="min",
+ [BINOP_MAX]="max", [BINOP_EQ]="==", [BINOP_NE]="!=", [BINOP_LT]="<",
+ [BINOP_LE]="<=", [BINOP_GT]=">", [BINOP_GE]=">=", [BINOP_AND]="and", [BINOP_OR]="or", [BINOP_XOR]="xor",
+};
+
+static CORD ast_to_cord(ast_t *ast);
+static CORD ast_list_to_cord(ast_list_t *asts);
+static CORD type_ast_to_cord(type_ast_t *t);
+static CORD arg_list_to_cord(arg_list_t *args);
+static CORD tags_to_cord(tag_t *tags);
+
+#define TO_CORD(x) _Generic(x, \
+ ast_t*: ast_to_cord(x), \
+ ast_list_t*: ast_list_to_cord(x), \
+ type_ast_t*: type_ast_to_cord(x), \
+ arg_list_t*: arg_list_to_cord(x), \
+ tag_t*: tags_to_cord(x), \
+ const char *: x, \
+ int64_t: CORD_asprintf("%ld", x), \
+ unsigned short int: CORD_asprintf("%d", x), \
+ double: CORD_asprintf("%g", x), \
+ bool: CORD_asprintf("%s", x ? "yes" : "no"), \
+ unsigned char: CORD_asprintf("%s", x ? "yes" : "no"))
+
+CORD ast_list_to_cord(ast_list_t *asts)
+{
+ if (!asts)
+ return "\x1b[35mNULL\x1b[m";
+
+ CORD c = "[";
+ for (; asts; asts = asts->next) {
+ c = CORD_cat(c, ast_to_cord(asts->ast));
+ if (asts->next) c = CORD_cat(c, ", ");
+ }
+ c = CORD_cat(c, "]");
+ return c;
+}
+
+CORD arg_list_to_cord(arg_list_t *args) {
+ CORD c = "Args(";
+ for (; args; args = args->next) {
+ if (args->var && args->var->name)
+ c = CORD_cat(c, args->var->name);
+ if (args->type)
+ CORD_sprintf(&c, "%r:%s", c, type_ast_to_cord(args->type));
+ if (args->default_val)
+ CORD_sprintf(&c, "%r=%s", c, ast_to_cord(args->default_val));
+ if (args->next) c = CORD_cat(c, ", ");
+ }
+ c = CORD_cat(c, ")");
+ return c;
+}
+
+CORD tags_to_cord(tag_t *tags) {
+ CORD c = "Tags(";
+ for (; tags; tags = tags->next) {
+ if (tags->name)
+ c = CORD_cat(c, tags->name);
+ CORD_sprintf(&c, "%r:%s=%ld", c, type_ast_to_cord(tags->type), tags->value);
+ if (tags->next) c = CORD_cat(c, ", ");
+ }
+ c = CORD_cat(c, ")");
+ return c;
+}
+
+CORD ast_to_cord(ast_t *ast)
+{
+ if (!ast) return "\x1b[35mNULL\x1b[m";
+
+ switch (ast->tag) {
+#define T(type, ...) case type: { auto data = ast->__data.type; (void)data; return CORD_asprintf("\x1b[34;1m" #type "\x1b[m" __VA_ARGS__); }
+ T(Unknown, "Unknown")
+ T(Nil, "(%r)", type_ast_to_cord(data.type))
+ T(Bool, "(\x1b[35m%s\x1b[m)", data.b ? "yes" : "no")
+ T(Var, "(\x1b[36;1m%s\x1b[m)", data.var.name)
+ T(Int, "(\x1b[35m%ld\x1b[m, precision=%ld)", data.i, data.precision)
+ T(Num, "(\x1b[35m%ld\x1b[m, precision=%ld)", data.n, data.precision)
+ T(Char, "(\x1b[35m'%c'\x1b[m)", data.c)
+ T(StringLiteral, "\x1b[35m\"%s\"\x1b[m", data.str)
+ T(StringJoin, "(%r)", ast_list_to_cord(data.children))
+ T(Interp, "(%r)", ast_to_cord(data.value))
+ T(Declare, "(var=%s, value=%s)", ast_to_cord(data.var), ast_to_cord(data.value))
+ T(Assign, "(targets=%r, values=%r)", ast_list_to_cord(data.targets), ast_list_to_cord(data.values))
+ T(BinaryOp, "(%r, %s, %r)", ast_to_cord(data.lhs), OP_NAMES[data.op], ast_to_cord(data.rhs))
+ T(UpdateAssign, "(%r, %s, %r)", ast_to_cord(data.lhs), OP_NAMES[data.op], ast_to_cord(data.rhs))
+ T(UnaryOp, "(%s, %r)", OP_NAMES[data.op], ast_to_cord(data.value))
+ T(Min, "(%r, %r, key=%r)", ast_to_cord(data.lhs), ast_to_cord(data.rhs), ast_to_cord(data.key))
+ T(Max, "(%r, %r, key=%r)", ast_to_cord(data.lhs), ast_to_cord(data.rhs), ast_to_cord(data.key))
+ T(Array, "(%r, type=%r)", ast_list_to_cord(data.items), type_ast_to_cord(data.type))
+ T(Table, "(key_type=%r, value_type=%r, fallback=%r, default_value=%r, entries=%r)",
+ type_ast_to_cord(data.key_type), type_ast_to_cord(data.value_type),
+ ast_to_cord(data.fallback), ast_to_cord(data.default_value),
+ ast_list_to_cord(data.entries))
+ T(TableEntry, "(%r => %r)", ast_to_cord(data.key), ast_to_cord(data.value))
+ T(FunctionDef, "(name=%r, args=%r, ret=%r, body=%r)", ast_to_cord(data.name),
+ arg_list_to_cord(data.args), type_ast_to_cord(data.ret_type), ast_to_cord(data.body))
+ T(Lambda, "(args=%r, body=%r)", arg_list_to_cord(data.args), ast_to_cord(data.body))
+ T(FunctionCall, "(fn=%r, args=%r)", ast_to_cord(data.fn), ast_list_to_cord(data.args))
+ T(KeywordArg, "(%s=%r)", ast_to_cord(data.arg))
+ T(Block, "(%r)", ast_list_to_cord(data.statements))
+ T(For, "(index=%r, value=%r, iter=%r, body=%r)", ast_to_cord(data.index), ast_to_cord(data.value),
+ ast_to_cord(data.iter), ast_to_cord(data.body))
+ T(While, "(condition=%r, body=%r)", ast_to_cord(data.condition), ast_to_cord(data.body))
+ T(If, "(condition=%r, body=%r, else=%r)", ast_to_cord(data.condition), ast_to_cord(data.body), ast_to_cord(data.else_body))
+ T(Reduction, "(iter=%r, combination=%r, fallback=%r)", ast_to_cord(data.iter), ast_to_cord(data.combination), ast_to_cord(data.fallback))
+ T(Skip, "(%s)", data.target)
+ T(Stop, "(%s)", data.target)
+ T(Pass, "")
+ T(Return, "(%r)", ast_to_cord(data.value))
+ T(Extern, "(name=%s, type=%r)", data.name, type_ast_to_cord(data.type))
+ T(TypeDef, "(%s, type=%r, namespace=%r)", data.var.name, type_ast_to_cord(data.type), ast_to_cord(data.namespace))
+ T(Index, "(indexed=%r, index=%r)", ast_to_cord(data.indexed), ast_to_cord(data.index))
+ T(FieldAccess, "(fielded=%r, field=%s)", ast_to_cord(data.fielded), data.field)
+ T(DocTest, "(expr=%r, output=%s)", ast_to_cord(data.expr), data.output)
+ T(Use, "(%s)", data.path)
+ T(LinkerDirective, "(%s)", data.directive)
+#undef T
+ }
+ return NULL;
+}
+
+CORD type_ast_to_cord(type_ast_t *t)
+{
+ if (!t) return "\x1b[35mNULL\x1b[m";
+
+ switch (t->tag) {
+#define T(type, ...) case type: { auto data = t->__data.type; (void)data; return CORD_asprintf("\x1b[32;1m" #type "\x1b[m" __VA_ARGS__); }
+ T(TypeUnknown, "")
+ T(TypeVar, "(\x1b[36;1m%s\x1b[m)", data.var.name)
+ T(TypePointer, "(%r, is_optional=%d, is_stack=%d, is_readonly=%d)",
+ type_ast_to_cord(data.pointed), data.is_optional,
+ data.is_stack, data.is_readonly)
+ T(TypeStruct, "(%r)", arg_list_to_cord(data.fields))
+ T(TypeTaggedUnion, "(%r)", tags_to_cord(data.tags))
+ T(TypeArray, "(%r)", type_ast_to_cord(data.item))
+ T(TypeTable, "(%r => %r)", type_ast_to_cord(data.key), type_ast_to_cord(data.value))
+ T(TypeFunction, "(args=%r, ret=%r)", arg_list_to_cord(data.args), type_ast_to_cord(data.ret))
+#undef T
+ }
+ return NULL;
+}
+
+const char *ast_to_str(ast_t *ast) {
+ CORD c = ast_to_cord(ast);
+ return CORD_to_char_star(c);
+}
+
+const char *type_ast_to_str(type_ast_t *t) {
+ CORD c = type_ast_to_cord(t);
+ return CORD_to_char_star(c);
+}
+
+int printf_ast(FILE *stream, const struct printf_info *info, const void *const args[])
+{
+ ast_t *ast = *(ast_t**)(args[0]);
+ if (ast) {
+ if (info->alt)
+ return fprintf(stream, "%.*s", (int)(ast->end - ast->start), ast->start);
+ else
+ return fprintf(stream, "%s", ast_to_str(ast));
+ } else {
+ return fputs("(null)", stream);
+ }
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/ast.h b/ast.h
new file mode 100644
index 00000000..0d90d4b5
--- /dev/null
+++ b/ast.h
@@ -0,0 +1,290 @@
+#pragma once
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <printf.h>
+
+#include "files.h"
+#include "util.h"
+
+#define NewAST(_file, _start, _end, ast_tag, ...) (new(ast_t, .file=_file, .start=_start, .end=_end,\
+ .tag=ast_tag, .__data.ast_tag={__VA_ARGS__}))
+#define NewTypeAST(_file, _start, _end, ast_tag, ...) (new(type_ast_t, .file=_file, .start=_start, .end=_end,\
+ .tag=ast_tag, .__data.ast_tag={__VA_ARGS__}))
+#define FakeAST(ast_tag, ...) (new(ast_t, .tag=ast_tag, .__data.ast_tag={__VA_ARGS__}))
+#define WrapAST(ast, ast_tag, ...) (new(ast_t, .file=(ast)->file, .start=(ast)->start, .end=(ast)->end, .tag=ast_tag, .__data.ast_tag={__VA_ARGS__}))
+#define StringAST(ast, _str) WrapAST(ast, StringLiteral, .str=heap_str(_str))
+
+struct binding_s;
+typedef struct type_ast_s type_ast_t;
+typedef struct ast_s ast_t;
+
+typedef struct {
+ const char *name;
+ struct binding_s *binding;
+} var_t;
+
+typedef struct ast_list_s {
+ ast_t *ast;
+ struct ast_list_s *next;
+} ast_list_t;
+
+typedef struct arg_list_s {
+ var_t *var;
+ type_ast_t *type;
+ ast_t *default_val;
+ struct arg_list_s *next;
+} arg_list_t;
+
+#define REVERSE_LIST(list) do { \
+ __typeof(list) _prev = NULL; \
+ __typeof(list) _next = NULL; \
+ auto _current = list; \
+ while (_current != NULL) { \
+ _next = _current->next; \
+ _current->next = _prev; \
+ _prev = _current; \
+ _current = _next; \
+ } \
+ list = _prev; \
+} while(0)
+
+typedef enum {
+ UNOP_UNKNOWN,
+ UNOP_NOT=1, UNOP_NEGATIVE,
+ UNOP_HEAP_ALLOCATE,
+ UNOP_STACK_REFERENCE,
+} unop_e;
+
+typedef enum {
+ BINOP_UNKNOWN,
+ BINOP_POWER=100, BINOP_MULT, BINOP_DIVIDE, BINOP_MOD, BINOP_MOD1, BINOP_PLUS,
+ BINOP_MINUS, BINOP_CONCAT, BINOP_LSHIFT, BINOP_RSHIFT, BINOP_MIN,
+ BINOP_MAX, BINOP_EQ, BINOP_NE, BINOP_LT, BINOP_LE, BINOP_GT, BINOP_GE,
+ BINOP_AND, BINOP_OR, BINOP_XOR,
+} binop_e;
+
+typedef enum {
+ TypeUnknown,
+ TypeVar,
+ TypePointer,
+ TypeStruct,
+ TypeTaggedUnion,
+ TypeArray,
+ TypeTable,
+ TypeFunction,
+} type_ast_e;
+
+typedef struct tag_s {
+ const char *name;
+ struct type_ast_s *type;
+ int64_t value;
+ struct tag_s *next;
+} tag_t;
+
+struct type_ast_s {
+ type_ast_e tag;
+ sss_file_t *file;
+ const char *start, *end;
+ union {
+ struct {} TypeUnknown;
+ struct {
+ var_t var;
+ } TypeVar;
+ struct {
+ type_ast_t *pointed;
+ bool is_optional:1, is_stack:1, is_readonly:1;
+ } TypePointer;
+ struct {
+ arg_list_t *fields;
+ } TypeStruct;
+ struct {
+ tag_t *tags;
+ } TypeTaggedUnion;
+ struct {
+ type_ast_t *item;
+ } TypeArray;
+ struct {
+ type_ast_t *key, *value;
+ } TypeTable;
+ struct {
+ arg_list_t *args;
+ type_ast_t *ret;
+ } TypeFunction;
+ } __data;
+};
+
+typedef enum {
+ Unknown = 0,
+ Nil, Bool, Var,
+ Int, Num, Char,
+ StringLiteral, StringJoin, Interp,
+ Declare, Assign,
+ BinaryOp, UnaryOp, UpdateAssign,
+ Min, Max,
+ Array, Table, TableEntry,
+ FunctionDef, Lambda,
+ FunctionCall, KeywordArg,
+ Block,
+ For, While, If,
+ Reduction,
+ Skip, Stop, Pass,
+ Return,
+ Extern,
+ TypeDef,
+ Index, FieldAccess,
+ DocTest,
+ Use,
+ LinkerDirective,
+} ast_e;
+
+struct ast_s {
+ ast_e tag;
+ sss_file_t *file;
+ const char *start, *end;
+ union {
+ struct {} Unknown;
+ struct {
+ type_ast_t *type;
+ } Nil;
+ struct {
+ bool b;
+ } Bool;
+ struct {
+ var_t var;
+ } Var;
+ struct {
+ int64_t i;
+ enum { INT_64BIT, INT_32BIT, INT_16BIT, INT_8BIT } precision;
+ } Int;
+ struct {
+ double n;
+ enum { NUM_64BIT, NUM_32BIT } precision;
+ } Num;
+ struct {
+ char c;
+ } Char;
+ struct {
+ const char *str;
+ } StringLiteral;
+ struct {
+ ast_list_t *children;
+ } StringJoin;
+ struct {
+ ast_t *value;
+ bool labelled:1, colorize:1, quote_string:1;
+ } Interp;
+ struct {
+ ast_t *var;
+ ast_t *value;
+ } Declare;
+ struct {
+ ast_list_t *targets, *values;
+ } Assign;
+ struct {
+ ast_t *lhs;
+ binop_e op;
+ ast_t *rhs;
+ } BinaryOp, UpdateAssign;
+ struct {
+ unop_e op;
+ ast_t *value;
+ } UnaryOp;
+ struct {
+ ast_t *lhs, *rhs, *key;
+ } Min, Max;
+ struct {
+ type_ast_t *type;
+ ast_list_t *items;
+ } Array;
+ struct {
+ type_ast_t *key_type, *value_type;
+ ast_t *fallback, *default_value;
+ ast_list_t *entries;
+ } Table;
+ struct {
+ ast_t *key, *value;
+ } TableEntry;
+ struct {
+ ast_t *name;
+ arg_list_t *args;
+ type_ast_t *ret_type;
+ ast_t *body;
+ ast_t *cache;
+ bool is_inline;
+ } FunctionDef;
+ struct {
+ arg_list_t *args;
+ ast_t *body;
+ } Lambda;
+ struct {
+ ast_t *fn;
+ ast_list_t *args;
+ type_ast_t *extern_return_type;
+ } FunctionCall;
+ struct {
+ const char *name;
+ ast_t *arg;
+ } KeywordArg;
+ struct {
+ ast_list_t *statements;
+ } Block;
+ struct {
+ ast_t *index, *value, *iter, *body;
+ } For;
+ struct {
+ ast_t *condition, *body;
+ } While;
+ struct {
+ ast_t *condition, *body, *else_body;
+ } If;
+ struct {
+ ast_t *iter, *combination, *fallback;
+ } Reduction;
+ struct {
+ const char *target;
+ } Skip, Stop;
+ struct {} Pass;
+ struct {
+ ast_t *value;
+ } Return;
+ struct {
+ const char *name;
+ type_ast_t *type;
+ bool address;
+ } Extern;
+ struct {
+ var_t var;
+ type_ast_t *type;
+ ast_t *namespace;
+ } TypeDef;
+ struct {
+ ast_t *indexed, *index;
+ bool unchecked;
+ } Index;
+ struct {
+ ast_t *fielded;
+ const char *field;
+ } FieldAccess;
+ struct {
+ ast_t *expr;
+ const char *output;
+ bool skip_source:1;
+ } DocTest;
+ struct {
+ const char *path;
+ sss_file_t *file;
+ bool main_program;
+ } Use;
+ struct {
+ const char *directive;
+ } LinkerDirective;
+ } __data;
+};
+
+const char *ast_to_str(ast_t *ast);
+const char *type_ast_to_str(type_ast_t *ast);
+int printf_ast(FILE *stream, const struct printf_info *info, const void *const args[]);
+ast_list_t *get_ast_children(ast_t *ast);
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/compile.c b/compile.c
new file mode 100644
index 00000000..e4ae0a48
--- /dev/null
+++ b/compile.c
@@ -0,0 +1,185 @@
+
+#include <ctype.h>
+#include <gc/cord.h>
+#include <gc.h>
+#include <stdio.h>
+
+#include "ast.h"
+#include "util.h"
+
+static CORD compile_type(type_ast_t *t)
+{
+ switch (t->tag) {
+ case TypeVar: return Match(t, TypeVar)->var.name;
+ default: errx(1, "Not implemented");
+ }
+}
+
+CORD compile(ast_t *ast)
+{
+ switch (ast->tag) {
+ case Nil: return "NULL";
+ case Bool: return Match(ast, Bool)->b ? "true" : "false";
+ case Var: return Match(ast, Var)->var.name;
+ case Int: return CORD_asprintf("((Int%ld_t)%ld)", Match(ast, Int)->precision, Match(ast, Int)->i);
+ case Num: return CORD_asprintf(Match(ast, Num)->precision == 64 ? "%g" : "%gf", Match(ast, Num)->n);
+ case Char: return CORD_asprintf("'\\x%02X'", (int)Match(ast, Char)->c);
+ case UnaryOp: {
+ auto unop = Match(ast, UnaryOp);
+ CORD expr = compile(unop->value);
+ switch (unop->op) {
+ case UNOP_NOT: return CORD_cat("!", expr);
+ case UNOP_NEGATIVE: return CORD_cat("-", expr);
+ case UNOP_HEAP_ALLOCATE: return CORD_asprintf("__heap(%r)", expr);
+ case UNOP_STACK_REFERENCE: return CORD_asprintf("__stack(%r)", expr);
+ default: break;
+ }
+ errx(1, "Invalid unop");
+ }
+ case BinaryOp: {
+ auto binop = Match(ast, BinaryOp);
+ CORD lhs = compile(binop->lhs);
+ CORD rhs = compile(binop->rhs);
+ switch (binop->op) {
+ case BINOP_MULT: return CORD_asprintf("(%r * %r)", lhs, rhs);
+ case BINOP_DIVIDE: return CORD_asprintf("(%r / %r)", lhs, rhs);
+ case BINOP_MOD: return CORD_asprintf("(%r %% %r)", lhs, rhs);
+ case BINOP_PLUS: return CORD_asprintf("(%r + %r)", lhs, rhs);
+ case BINOP_MINUS: return CORD_asprintf("(%r - %r)", lhs, rhs);
+ case BINOP_LSHIFT: return CORD_asprintf("(%r << %r)", lhs, rhs);
+ case BINOP_RSHIFT: return CORD_asprintf("(%r >> %r)", lhs, rhs);
+ case BINOP_EQ: return CORD_asprintf("(%r == %r)", lhs, rhs);
+ case BINOP_NE: return CORD_asprintf("(%r != %r)", lhs, rhs);
+ case BINOP_LT: return CORD_asprintf("(%r < %r)", lhs, rhs);
+ case BINOP_LE: return CORD_asprintf("(%r <= %r)", lhs, rhs);
+ case BINOP_GT: return CORD_asprintf("(%r > %r)", lhs, rhs);
+ case BINOP_GE: return CORD_asprintf("(%r >= %r)", lhs, rhs);
+ case BINOP_AND: return CORD_asprintf("(%r && %r)", lhs, rhs);
+ case BINOP_OR: return CORD_asprintf("(%r || %r)", lhs, rhs);
+ default: break;
+ }
+ errx(1, "unimplemented binop");
+ }
+ case UpdateAssign: {
+ auto update = Match(ast, UpdateAssign);
+ CORD lhs = compile(update->lhs);
+ CORD rhs = compile(update->rhs);
+ switch (update->op) {
+ case BINOP_MULT: return CORD_asprintf("%r *= %r", lhs, rhs);
+ case BINOP_DIVIDE: return CORD_asprintf("%r /= %r", lhs, rhs);
+ case BINOP_MOD: return CORD_asprintf("%r = %r %% %r", lhs, lhs, rhs);
+ case BINOP_PLUS: return CORD_asprintf("%r += %r", lhs, rhs);
+ case BINOP_MINUS: return CORD_asprintf("%r -= %r", lhs, rhs);
+ case BINOP_LSHIFT: return CORD_asprintf("%r <<= %r", lhs, rhs);
+ case BINOP_RSHIFT: return CORD_asprintf("%r >>= %r", lhs, rhs);
+ case BINOP_EQ: return CORD_asprintf("%r = (%r == %r)", lhs, lhs, rhs);
+ case BINOP_NE: return CORD_asprintf("%r = (%r != %r)", lhs, lhs, rhs);
+ case BINOP_LT: return CORD_asprintf("%r = (%r < %r)", lhs, lhs, rhs);
+ case BINOP_LE: return CORD_asprintf("%r = (%r <= %r)", lhs, lhs, rhs);
+ case BINOP_GT: return CORD_asprintf("%r = (%r > %r)", lhs, lhs, rhs);
+ case BINOP_GE: return CORD_asprintf("%r = (%r >= %r)", lhs, lhs, rhs);
+ case BINOP_AND: return CORD_asprintf("%r = (%r && %r)", lhs, lhs, rhs);
+ case BINOP_OR: return CORD_asprintf("%r = (%r || %r)", lhs, lhs, rhs);
+ default: break;
+ }
+ errx(1, "unimplemented binop");
+ }
+ case StringLiteral: {
+ const char *str = Match(ast, StringLiteral)->str;
+ CORD c = "\"";
+ for (; *str; ++str) {
+ switch (*str) {
+ case '\\': c = CORD_cat(c, "\\\\"); break;
+ case '"': c = CORD_cat(c, "\\\""); break;
+ case '\a': c = CORD_cat(c, "\\a"); break;
+ case '\b': c = CORD_cat(c, "\\b"); break;
+ case '\n': c = CORD_cat(c, "\\n"); break;
+ case '\r': c = CORD_cat(c, "\\r"); break;
+ case '\t': c = CORD_cat(c, "\\t"); break;
+ case '\v': c = CORD_cat(c, "\\v"); break;
+ default: {
+ if (isprint(*str))
+ c = CORD_cat_char(c, *str);
+ else
+ CORD_sprintf(&c, "%r\\x%02X", *str);
+ break;
+ }
+ }
+ }
+ return CORD_cat_char(c, '"');
+ }
+ case StringJoin: {
+ CORD c = NULL;
+ for (ast_list_t *chunk = Match(ast, StringJoin)->children; chunk; chunk = chunk->next) {
+ if (c) CORD_sprintf(&c, "CORD_cat(%r, %r)", c, compile(chunk->ast));
+ else c = compile(chunk->ast);
+ }
+ return c;
+ }
+ case Interp: {
+ return CORD_asprintf("__cord(%r)", compile(Match(ast, Interp)->value));
+ }
+ case Block: {
+ CORD c = NULL;
+ for (ast_list_t *stmt = Match(ast, Block)->statements; stmt; stmt = stmt->next) {
+ c = CORD_cat(c, compile(stmt->ast));
+ c = CORD_cat(c, ";\n");
+ }
+ return c;
+ }
+ case Declare: {
+ auto decl = Match(ast, Declare);
+ return CORD_asprintf("auto %r = %r", decl->var, decl->value);
+ }
+ case Assign: {
+ auto assign = Match(ast, Assign);
+ CORD c = NULL;
+ for (ast_list_t *target = assign->targets, *value = assign->values; target && value; target = target->next, value = value->next) {
+ CORD_sprintf(&c, "%r = %r", compile(target->ast), compile(value->ast));
+ if (target->next) c = CORD_cat(c, ", ");
+ }
+ return c;
+ }
+ // Min, Max,
+ // Array, Table, TableEntry,
+ case FunctionDef: {
+ auto fndef = Match(ast, FunctionDef);
+ CORD c = CORD_asprintf("%r %r(", fndef->ret_type ? compile_type(fndef->ret_type) : "void", compile(fndef->name));
+ for (arg_list_t *arg = fndef->args; arg; arg = arg->next) {
+ CORD_sprintf(&c, "%r%r %s", c, compile_type(arg->type), arg->var->name);
+ if (arg->next) c = CORD_cat(c, ", ");
+ }
+ c = CORD_cat(c, ") {\n");
+ c = CORD_cat(c, compile(fndef->body));
+ c = CORD_cat(c, "}");
+ return c;
+ }
+ case FunctionCall: {
+ auto call = Match(ast, FunctionCall);
+ CORD c = CORD_cat_char(compile(call->fn), '(');
+ for (ast_list_t *arg = call->args; arg; arg = arg->next) {
+ c = CORD_cat(c, compile(arg->ast));
+ if (arg->next) c = CORD_cat(c, ", ");
+ }
+ return CORD_cat_char(c, ')');
+ }
+ // Lambda,
+ // FunctionCall, KeywordArg,
+ // Block,
+ // For, While, If,
+ // Reduction,
+ // Skip, Stop, Pass,
+ // Return,
+ // Extern,
+ // TypeDef,
+ // Index, FieldAccess,
+ // DocTest,
+ // Use,
+ // LinkerDirective,
+ case Unknown: errx(1, "Unknown AST");
+ default: break;
+ }
+ return NULL;
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/compile.h b/compile.h
new file mode 100644
index 00000000..790b33a4
--- /dev/null
+++ b/compile.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include <gc/cord.h>
+#include <gc.h>
+#include <stdio.h>
+
+#include "util.h"
+
+CORD compile(ast_t *ast);
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/files.c b/files.c
new file mode 100644
index 00000000..51a8740c
--- /dev/null
+++ b/files.c
@@ -0,0 +1,317 @@
+//
+// files.c - Implementation of some file loading functionality.
+//
+
+#include <err.h>
+#include <fcntl.h>
+#include <gc.h>
+#include <libgen.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/param.h>
+
+#include "files.h"
+#include "util.h"
+
+static const int tabstop = 4;
+
+public char *resolve_path(const char *path, const char *relative_to)
+{
+ if (!relative_to || streq(relative_to, "/dev/stdin")) relative_to = ".";
+ if (!path || strlen(path) == 0) return NULL;
+
+ // Resolve the path to an absolute path, assuming it's relative to the file
+ // it was found in:
+ char buf[PATH_MAX] = {0};
+ if (streq(path, "~") || strncmp(path, "~/", 2) == 0) {
+ char *resolved = realpath(heap_strf("%s%s", getenv("HOME"), path+1), buf);
+ if (resolved) return heap_str(resolved);
+ } else if (streq(path, ".") || strncmp(path, "./", 2) == 0) {
+ char *relative_dir = dirname(heap_str(relative_to));
+ char *resolved = realpath(heap_strf("%s/%s", relative_dir, path), buf);
+ if (resolved) return heap_str(resolved);
+ } else if (path[0] == '/') {
+ // Absolute path:
+ char *resolved = realpath(path, buf);
+ if (resolved) return heap_str(resolved);
+ } else {
+ // Relative path:
+ char *blpath = heap_str(getenv("SSSPATH"));
+ char *relative_dir = dirname(heap_str(relative_to));
+ for (char *dir; (dir = strsep(&blpath, ":")); ) {
+ if (dir[0] == '/') {
+ char *resolved = realpath(heap_strf("%s/%s", dir, path), buf);
+ if (resolved) return heap_str(resolved);
+ } else if (dir[0] == '~' && (dir[1] == '\0' || dir[1] == '/')) {
+ char *resolved = realpath(heap_strf("%s%s/%s", getenv("HOME"), dir, path), buf);
+ if (resolved) return heap_str(resolved);
+ } else if (streq(dir, ".") || strncmp(dir, "./", 2) == 0) {
+ char *resolved = realpath(heap_strf("%s/%s", relative_dir, path), buf);
+ if (resolved) return heap_str(resolved);
+ } else if (streq(dir, ".") || streq(dir, "..") || strncmp(dir, "./", 2) == 0 || strncmp(dir, "../", 3) == 0) {
+ char *resolved = realpath(heap_strf("%s/%s/%s", relative_dir, dir, path), buf);
+ if (resolved) return heap_str(resolved);
+ } else {
+ char *resolved = realpath(heap_strf("%s/%s", dir, path), buf);
+ if (resolved) return heap_str(resolved);
+ }
+ }
+ }
+ return NULL;
+}
+
+static sss_file_t *_load_file(const char* filename, FILE *file)
+{
+ if (!file) return NULL;
+
+ sss_file_t *ret = new(sss_file_t, .filename=filename);
+
+ size_t file_size = 0, line_cap = 0;
+ char *file_buf = NULL, *line_buf = NULL;
+ FILE *mem = open_memstream(&file_buf, &file_size);
+ int64_t line_len = 0;
+ while ((line_len = getline(&line_buf, &line_cap, file)) >= 0) {
+ sss_line_t line_info = {.offset=file_size, .indent=0, .is_empty=false};
+ char *p;
+ for (p = line_buf; *p == ' ' || *p == '\t'; ++p)
+ line_info.indent += *p == ' ' ? 1 : 4;
+ line_info.is_empty = *p != '\r' && *p != '\n';
+ if (ret->line_capacity <= ret->num_lines) {
+ ret->lines = GC_REALLOC(ret->lines, sizeof(sss_line_t)*(ret->line_capacity += 32));
+ }
+ ret->lines[ret->num_lines++] = line_info;
+ fwrite(line_buf, sizeof(char), line_len, mem);
+ fflush(mem);
+ }
+ fclose(file);
+
+ char *copy = GC_MALLOC_ATOMIC(file_size+1);
+ memcpy(copy, file_buf, file_size);
+ copy[file_size] = '\0';
+ ret->text = copy;
+ fclose(mem);
+
+ free(file_buf);
+ ret->relative_filename = filename;
+ if (filename && filename[0] != '<' && !streq(filename, "/dev/stdin")) {
+ filename = resolve_path(filename, ".");
+ // Convert to relative path (if applicable)
+ char buf[PATH_MAX];
+ char *cwd = getcwd(buf, sizeof(buf));
+ int64_t cwd_len = strlen(cwd);
+ if (strncmp(cwd, filename, cwd_len) == 0 && filename[cwd_len] == '/')
+ ret->relative_filename = &filename[cwd_len+1];
+ }
+ return ret;
+}
+
+//
+// Read an entire file into memory.
+//
+public sss_file_t *sss_load_file(const char* filename)
+{
+ FILE *file = filename[0] ? fopen(filename, "r") : stdin;
+ return _load_file(filename, file);
+}
+
+//
+// Create a virtual file from a string.
+//
+public sss_file_t *sss_spoof_file(const char* filename, const char *text)
+{
+ FILE *file = fmemopen((char*)text, strlen(text)+1, "r");
+ return _load_file(filename, file);
+}
+
+//
+// Given a pointer, determine which line number it points to (1-indexed)
+//
+public int64_t sss_get_line_number(sss_file_t *f, const char *p)
+{
+ // Binary search:
+ int64_t lo = 0, hi = (int64_t)f->num_lines-1;
+ if (p < f->text) return 0;
+ int64_t offset = (int64_t)(p - f->text);
+ while (lo <= hi) {
+ int64_t mid = (lo + hi) / 2;
+ sss_line_t *line = &f->lines[mid];
+ if (line->offset == offset)
+ return mid + 1;
+ else if (line->offset < offset)
+ lo = mid + 1;
+ else if (line->offset > offset)
+ hi = mid - 1;
+ }
+ return lo; // Return the line number whose line starts closest before p
+}
+
+//
+// Given a pointer, determine which line column it points to.
+//
+public int64_t sss_get_line_column(sss_file_t *f, const char *p)
+{
+ int64_t line_no = sss_get_line_number(f, p);
+ sss_line_t *line = &f->lines[line_no-1];
+ return 1 + (int64_t)(p - (f->text + line->offset));
+}
+
+//
+// Given a pointer, get the indentation of the line it's on.
+//
+public int64_t sss_get_indent(sss_file_t *f, const char *p)
+{
+ int64_t line_no = sss_get_line_number(f, p);
+ sss_line_t *line = &f->lines[line_no-1];
+ return line->indent;
+}
+
+//
+// Return a pointer to the line with the specified line number (1-indexed)
+//
+public const char *sss_get_line(sss_file_t *f, int64_t line_number)
+{
+ if (line_number == 0 || line_number > (int64_t)f->num_lines) return NULL;
+ sss_line_t *line = &f->lines[line_number-1];
+ return f->text + line->offset;
+}
+
+//
+// Return a value like /foo:line:col
+//
+public const char *sss_get_file_pos(sss_file_t *f, const char *p)
+{
+ return heap_strf("%s:%ld:%ld", f->filename, sss_get_line_number(f, p), sss_get_line_column(f, p));
+}
+
+static int fputc_column(FILE *out, char c, char print_char, int *column)
+{
+ int printed = 0;
+ if (print_char == '\t') print_char = ' ';
+ if (c == '\t') {
+ for (int to_fill = tabstop - (*column % tabstop); to_fill > 0; --to_fill) {
+ printed += fputc(print_char, out);
+ ++*column;
+ }
+ } else {
+ printed += fputc(print_char, out);
+ ++*column;
+ }
+ return printed;
+}
+
+//
+// Print a span from a file
+//
+public int fprint_span(FILE *out, sss_file_t *file, const char *start, const char *end, const char *hl_color, int64_t context_lines, bool use_color)
+{
+ if (!file) return 0;
+
+ // Handle spans that come from multiple files:
+ if (start < file->text || start > file->text + file->len)
+ start = end;
+ if (end < file->text || end > file->text + file->len)
+ end = start;
+ // Just in case neither end of the span came from this file:
+ if (end < file->text || end > file->text + file->len)
+ start = end = file->text;
+
+ const char *lineno_fmt, *normal_color, *empty_marker;
+ bool print_carets = false;
+ int printed = 0;
+ if (use_color) {
+ lineno_fmt = "\x1b[0;2m%*lu\x1b(0\x78\x1b(B\x1b[m ";
+ normal_color = "\x1b[m";
+ empty_marker = "\x1b(0\x61\x1b(B";
+ printed += fprintf(out, "\x1b[33;4;1m%s\x1b[m\n", file->relative_filename);
+ } else {
+ lineno_fmt = "%*lu| ";
+ hl_color = "";
+ normal_color = "";
+ empty_marker = " ";
+ print_carets = true;
+ printed += fprintf(out, "%s\n", file->relative_filename);
+ }
+
+ if (context_lines == 0)
+ return fprintf(out, "%s%.*s%s", hl_color, (int)(end - start), start, normal_color);
+
+ int64_t start_line = sss_get_line_number(file, start),
+ end_line = sss_get_line_number(file, end);
+
+ int64_t first_line = start_line - (context_lines - 1),
+ last_line = end_line + (context_lines - 1);
+
+ if (first_line < 1) first_line = 1;
+ if (last_line > file->num_lines) last_line = file->num_lines;
+
+ int digits = 1;
+ for (int64_t i = last_line; i > 0; i /= 10) ++digits;
+
+ for (int64_t line_no = first_line; line_no <= last_line; ++line_no) {
+ if (line_no > first_line + 5 && line_no < last_line - 5) {
+ if (use_color)
+ printed += fprintf(out, "\x1b[0;2;3;4m ... %ld lines omitted ... \x1b[m\n", (last_line - first_line) - 11);
+ else
+ printed += fprintf(out, " ... %ld lines omitted ...\n", (last_line - first_line) - 11);
+ line_no = last_line - 6;
+ continue;
+ }
+
+ printed += fprintf(out, lineno_fmt, digits, line_no);
+ const char *line = sss_get_line(file, line_no);
+ if (!line) break;
+
+ int column = 0;
+ const char *p = line;
+ // Before match
+ for (; *p && *p != '\r' && *p != '\n' && p < start; ++p)
+ printed += fputc_column(out, *p, *p, &column);
+
+ // Zero-width matches
+ if (p == start && start == end) {
+ printed += fprintf(out, "%s%s%s", hl_color, empty_marker, normal_color);
+ column += 1;
+ }
+
+ // Inside match
+ if (start <= p && p < end) {
+ printed += fputs(hl_color, out);
+ for (; *p && *p != '\r' && *p != '\n' && p < end; ++p)
+ printed += fputc_column(out, *p, *p, &column);
+ printed += fputs(normal_color, out);
+ }
+
+ // After match
+ for (; *p && *p != '\r' && *p != '\n'; ++p)
+ printed += fputc_column(out, *p, *p, &column);
+
+ printed += fprintf(out, "\n");
+
+ const char *eol = strchrnul(line, '\n');
+ if (print_carets && start >= line && start < eol && line <= start) {
+ for (int num = 0; num < digits; num++)
+ printed += fputc(' ', out);
+ printed += fputs(": ", out);
+ int column = 0;
+ for (const char *sp = line; *sp && *sp != '\n'; ++sp) {
+ char print_char;
+ if (sp < start)
+ print_char = ' ';
+ else if (sp == start && sp == end)
+ print_char = '^';
+ else if (sp >= start && sp < end)
+ print_char = '-';
+ else
+ print_char = ' ';
+ printed += fputc_column(out, *sp, print_char, &column);
+ }
+ printed += fputs("\n", out);
+ }
+ }
+ fflush(out);
+ return printed;
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/files.h b/files.h
new file mode 100644
index 00000000..0ff91568
--- /dev/null
+++ b/files.h
@@ -0,0 +1,43 @@
+//
+// files.h - Definitions of an API for loading files.
+//
+#pragma once
+
+#include <stdalign.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+
+typedef struct {
+ int64_t offset;
+ int64_t indent:63;
+ bool is_empty:1;
+} sss_line_t;
+
+typedef struct {
+ const char *filename, *relative_filename;
+ const char *text;
+ int64_t len;
+ int64_t num_lines, line_capacity;
+ sss_line_t *lines;
+} sss_file_t;
+
+char *resolve_path(const char *path, const char *relative_to);
+__attribute__((nonnull))
+sss_file_t *sss_load_file(const char *filename);
+__attribute__((nonnull, returns_nonnull))
+sss_file_t *sss_spoof_file(const char *filename, const char *text);
+__attribute__((pure, nonnull))
+int64_t sss_get_line_number(sss_file_t *f, const char *p);
+__attribute__((pure, nonnull))
+int64_t sss_get_line_column(sss_file_t *f, const char *p);
+__attribute__((pure, nonnull))
+int64_t sss_get_indent(sss_file_t *f, const char *p);
+__attribute__((pure, nonnull))
+const char *sss_get_line(sss_file_t *f, int64_t line_number);
+__attribute__((pure, nonnull))
+const char *sss_get_file_pos(sss_file_t *f, const char *p);
+int fprint_span(FILE *out, sss_file_t *file, const char *start, const char *end, const char *hl_color, int64_t context_lines, bool use_color);
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/foo.c b/foo.c
new file mode 100644
index 00000000..0d78c720
--- /dev/null
+++ b/foo.c
@@ -0,0 +1,12 @@
+#include <stdio.h>
+
+int main(void) {
+ int x = 23;
+ const char *s = "Hi";
+#define say(x) _Generic(x, int: printf("%d\n", x), char *: puts(s), default: puts("???"))
+ say(x);
+ say(s);
+#define all(...) { __VA_ARGS__; }
+ all(say("one"); say(2))
+ return 0;
+}
diff --git a/nextlang.c b/nextlang.c
new file mode 100644
index 00000000..617adc98
--- /dev/null
+++ b/nextlang.c
@@ -0,0 +1,23 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <gc.h>
+#include <gc/cord.h>
+
+#include "ast.h"
+#include "parse.h"
+#include "compile.h"
+
+int main(int argc, char *argv[])
+{
+ if (argc < 2) return 1;
+
+ sss_file_t *f = sss_load_file(argv[1]);
+ ast_t *ast = parse_file(f, NULL);
+ const char *s = ast_to_str(ast);
+ puts(s);
+ CORD c = compile(ast);
+ CORD_put(c, stdout);
+ return 0;
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/parse.c b/parse.c
new file mode 100644
index 00000000..845b3ca0
--- /dev/null
+++ b/parse.c
@@ -0,0 +1,1759 @@
+// Parse SSS code using recursive descent
+#include <ctype.h>
+#include <gc.h>
+#include <libgen.h>
+#include <linux/limits.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistr.h>
+#include <unictype.h>
+#include <signal.h>
+
+#include "ast.h"
+#include "util.h"
+
+typedef struct {
+ sss_file_t *file;
+ jmp_buf *on_err;
+} parse_ctx_t;
+
+typedef ast_t* (parser_t)(parse_ctx_t*,const char*);
+
+extern void builtin_fail(const char *fmt, ...);
+
+#define PARSER(name) ast_t *name(parse_ctx_t *ctx, const char *pos)
+
+#define STUB_PARSER(name) PARSER(name) { (void)ctx; (void)pos; return NULL; }
+
+int op_tightness[] = {
+ [BINOP_POWER]=1,
+ [BINOP_MULT]=2, [BINOP_DIVIDE]=2, [BINOP_MOD]=2, [BINOP_MOD1]=2,
+ [BINOP_PLUS]=3, [BINOP_MINUS]=3,
+ [BINOP_CONCAT]=4,
+ [BINOP_LSHIFT]=5, [BINOP_RSHIFT]=5,
+ [BINOP_MIN]=6, [BINOP_MAX]=6,
+ [BINOP_EQ]=7, [BINOP_NE]=7,
+ [BINOP_LT]=8, [BINOP_LE]=8, [BINOP_GT]=8, [BINOP_GE]=8,
+ [BINOP_AND]=9, [BINOP_OR]=9, [BINOP_XOR]=9,
+};
+
+static const char *keywords[] = {
+ "yes", "xor", "while", "use", "then", "struct", "stop", "skip", "return",
+ "or", "not", "no", "mod1", "mod", "in", "if", "func", "for", "extern",
+ "enum", "else", "do", "and", "_mix_", "_min_", "_max_",
+ NULL,
+};
+
+enum {NORMAL_FUNCTION=0, EXTERN_FUNCTION=1};
+
+static inline size_t some_of(const char **pos, const char *allow);
+static inline size_t some_not(const char **pos, const char *forbid);
+static inline size_t spaces(const char **pos);
+static inline size_t whitespace(const char **pos);
+static inline size_t match(const char **pos, const char *target);
+static inline void expect_str(parse_ctx_t *ctx, const char *start, const char **pos, const char *target, const char *fmt, ...);
+static inline void expect_closing(parse_ctx_t *ctx, const char **pos, const char *target, const char *fmt, ...);
+static inline size_t match_word(const char **pos, const char *word);
+static inline const char* get_word(const char **pos);
+static inline const char* get_id(const char **pos);
+static inline bool comment(const char **pos);
+static inline bool indent(parse_ctx_t *ctx, const char **pos);
+static inline binop_e match_binary_operator(const char **pos);
+static ast_t *parse_fncall_suffix(parse_ctx_t *ctx, ast_t *fn, bool is_extern);
+static ast_t *parse_field_suffix(parse_ctx_t *ctx, ast_t *lhs);
+static ast_t *parse_index_suffix(parse_ctx_t *ctx, ast_t *lhs);
+static arg_list_t *parse_args(parse_ctx_t *ctx, const char **pos, bool allow_unnamed);
+static PARSER(parse_for);
+static PARSER(parse_while);
+static PARSER(parse_if);
+static PARSER(parse_expr);
+static PARSER(parse_extended_expr);
+static PARSER(parse_term_no_suffix);
+static PARSER(parse_term);
+static PARSER(parse_inline_block);
+static PARSER(parse_statement);
+static PARSER(parse_block);
+static PARSER(parse_opt_indented_block);
+static PARSER(parse_var);
+static PARSER(parse_type_def);
+static PARSER(parse_func_def);
+static PARSER(parse_extern);
+static PARSER(parse_declaration);
+static PARSER(parse_doctest);
+static PARSER(parse_use);
+static PARSER(parse_linker);
+static PARSER(parse_namespace);
+
+static type_ast_t *parse_type(parse_ctx_t *ctx, const char *pos);
+static type_ast_t *parse_enum_type(parse_ctx_t *ctx, const char *pos);
+
+//
+// Print a parse error and exit (or use the on_err longjmp)
+//
+__attribute__((noreturn))
+static void vparser_err(parse_ctx_t *ctx, const char *start, const char *end, const char *fmt, va_list args) {
+ if (isatty(STDERR_FILENO) && !getenv("NO_COLOR"))
+ fputs("\x1b[31;1;7m", stderr);
+ fprintf(stderr, "%s:%ld.%ld: ", ctx->file->relative_filename, sss_get_line_number(ctx->file, start),
+ sss_get_line_column(ctx->file, start));
+ vfprintf(stderr, fmt, args);
+ if (isatty(STDERR_FILENO) && !getenv("NO_COLOR"))
+ fputs(" \x1b[m", stderr);
+ fputs("\n\n", stderr);
+
+ fprint_span(stderr, ctx->file, start, end, "\x1b[31;1;7m", 2, isatty(STDERR_FILENO) && !getenv("NO_COLOR"));
+ fputs("\n", stderr);
+
+ if (ctx->on_err)
+ longjmp(*ctx->on_err, 1);
+ raise(SIGABRT);
+ exit(1);
+}
+
+//
+// Wrapper for vparser_err
+//
+__attribute__((noreturn))
+static void parser_err(parse_ctx_t *ctx, const char *start, const char *end, const char *fmt, ...) {
+ va_list args;
+ va_start(args, fmt);
+ vparser_err(ctx, start, end, fmt, args);
+ va_end(args);
+}
+
+//
+// Convert an escape sequence like \n to a string
+//
+const char *unescape(const char **out) {
+ const char **endpos = out;
+ const char *escape = *out;
+ static const char *unescapes[256] = {['a']="\a",['b']="\b",['e']="\e",['f']="\f",['n']="\n",['r']="\r",['t']="\t",['v']="\v",['_']=" "};
+ assert(*escape == '\\');
+ if (unescapes[(int)escape[1]]) {
+ *endpos = escape + 2;
+ return heap_str(unescapes[(int)escape[1]]);
+ } else if (escape[1] == 'x' && escape[2] && escape[3]) {
+ char *endptr = (char*)&escape[3+1];
+ char c = (char)strtol(escape+2, &endptr, 16);
+ *endpos = escape + 4;
+ return heap_strn(&c, 1);
+ } else if ('0' <= escape[1] && escape[1] <= '7' && '0' <= escape[2] && escape[2] <= '7' && '0' <= escape[3] && escape[3] <= '7') {
+ char *endptr = (char*)&escape[4];
+ char c = (char)strtol(escape+1, &endptr, 8);
+ *endpos = escape + 4;
+ return heap_strn(&c, 1);
+ } else {
+ *endpos = escape + 2;
+ return heap_strn(escape+1, 1);
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////// Text-based parsing primitives ///////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////
+size_t some_of(const char **pos, const char *allow) {
+ size_t len = strspn(*pos, allow);
+ *pos += len;
+ return len;
+}
+
+size_t some_not(const char **pos, const char *forbid) {
+ size_t len = strcspn(*pos, forbid);
+ *pos += len;
+ return len;
+}
+
+size_t spaces(const char **pos) { return some_of(pos, " \t"); }
+size_t whitespace(const char **pos) {
+ const char *p0 = *pos;
+ while (some_of(pos, " \t\r\n") || comment(pos))
+ continue;
+ return (size_t)(*pos - p0);
+}
+
+size_t match(const char **pos, const char *target) {
+ size_t len = strlen(target);
+ if (strncmp(*pos, target, len) != 0)
+ return 0;
+ *pos += len;
+ return len;
+}
+
+static inline bool is_xid_continue_next(const char *pos) {
+ ucs4_t point = 0;
+ u8_next(&point, (const uint8_t*)pos);
+ return uc_is_property_xid_continue(point);
+}
+
+//
+// Expect a string (potentially after whitespace) and emit a parser error if it's not there
+//
+static void expect_str(
+ parse_ctx_t *ctx, const char *start, const char **pos, const char *target, const char *fmt, ...) {
+ spaces(pos);
+ if (match(pos, target)) {
+ char lastchar = target[strlen(target)-1];
+ if (!(isalpha(lastchar) || isdigit(lastchar) || lastchar == '_'))
+ return;
+ if (!is_xid_continue_next(*pos))
+ return;
+ }
+
+ if (isatty(STDERR_FILENO) && !getenv("NO_COLOR"))
+ fputs("\x1b[31;1;7m", stderr);
+ va_list args;
+ va_start(args, fmt);
+ vparser_err(ctx, start, *pos, fmt, args);
+ va_end(args);
+}
+
+//
+// Helper for matching closing parens with good error messages
+//
+static void expect_closing(
+ parse_ctx_t *ctx, const char **pos, const char *closing, const char *fmt, ...) {
+ const char *start = *pos;
+ spaces(pos);
+ if (match(pos, closing))
+ return;
+
+ const char *eol = strchr(*pos, '\n');
+ const char *next = strstr(*pos, closing);
+
+ const char *end = eol < next ? eol : next;
+
+ if (isatty(STDERR_FILENO) && !getenv("NO_COLOR"))
+ fputs("\x1b[31;1;7m", stderr);
+ va_list args;
+ va_start(args, fmt);
+ vparser_err(ctx, start, end, fmt, args);
+ va_end(args);
+}
+
+#define expect(ctx, start, pos, parser, ...) ({ \
+ const char **_pos = pos; \
+ spaces(_pos); \
+ auto _result = parser(ctx, *_pos); \
+ if (!_result) { \
+ if (isatty(STDERR_FILENO) && !getenv("NO_COLOR")) \
+ fputs("\x1b[31;1;7m", stderr); \
+ parser_err(ctx, start, *_pos, __VA_ARGS__); \
+ } \
+ *_pos = _result->end; \
+ _result; })
+
+#define optional(ctx, pos, parser) ({ \
+ const char **_pos = pos; \
+ spaces(_pos); \
+ auto _result = parser(ctx, *_pos); \
+ if (_result) *_pos = _result->end; \
+ _result; })
+
+size_t match_word(const char **out, const char *word) {
+ const char *pos = *out;
+ spaces(&pos);
+ if (!match(&pos, word) || is_xid_continue_next(pos))
+ return 0;
+
+ *out = pos;
+ return strlen(word);
+}
+
+bool match_group(const char **out, char open) {
+ static char mirror_delim[256] = {['(']=')', ['{']='}', ['<']='>', ['[']=']'};
+ const char *pos = *out;
+ if (*pos != open) return 0;
+ char close = mirror_delim[(int)open] ? mirror_delim[(int)open] : open;
+ int depth = 1;
+ for (++pos; *pos && depth > 0; ++pos) {
+ if (*pos == close) --depth;
+ else if (*pos == open) ++depth;
+ }
+ if (depth == 0) {
+ *out = pos;
+ return true;
+ } else return false;
+}
+
+const char *get_word(const char **inout) {
+ const char *word = *inout;
+ spaces(&word);
+ const uint8_t *pos = (const uint8_t*)word;
+ ucs4_t point;
+ pos = u8_next(&point, pos);
+ if (!uc_is_property_xid_start(point) && point != '_')
+ return NULL;
+
+ for (const uint8_t *next; (next = u8_next(&point, pos)); pos = next) {
+ if (!uc_is_property_xid_continue(point))
+ break;
+ }
+ *inout = (const char*)pos;
+ return heap_strn(word, (size_t)((const char*)pos - word));
+}
+
+const char *get_id(const char **inout) {
+ const char *pos = *inout;
+ const char *word = get_word(&pos);
+ if (!word) return word;
+ for (int i = 0; keywords[i]; i++)
+ if (strcmp(word, keywords[i]) == 0)
+ return NULL;
+ *inout = pos;
+ return word;
+}
+
+bool comment(const char **pos) {
+ if (!match(pos, "//"))
+ return false;
+ some_not(pos, "\r\n");
+ return true;
+}
+
+bool indent(parse_ctx_t *ctx, const char **out) {
+ const char *pos = *out;
+ int64_t starting_indent = sss_get_indent(ctx->file, pos);
+ whitespace(&pos);
+ if (sss_get_line_number(ctx->file, pos) == sss_get_line_number(ctx->file, *out))
+ return false;
+
+ if (sss_get_indent(ctx->file, pos) > starting_indent) {
+ *out = pos;
+ return true;
+ }
+
+ return false;
+}
+
+bool match_indentation(const char **out, int64_t target) {
+ const char *pos = *out;
+ for (int64_t indentation = 0; indentation < target; ) {
+ switch (*pos) {
+ case ' ': indentation += 1; ++pos; break;
+ case '\t': indentation += 4; ++pos; break;
+ default: return false;
+ }
+ }
+ *out = pos;
+ return true;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////// AST-based parsers /////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+PARSER(parse_parens) {
+ const char *start = pos;
+ spaces(&pos);
+ if (!match(&pos, "(")) return NULL;
+ whitespace(&pos);
+ ast_t *expr = optional(ctx, &pos, parse_extended_expr);
+ if (!expr) return NULL;
+ expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this expression");
+
+ // Update the span to include the parens:
+ return new(ast_t, .file=(ctx)->file, .start=start, .end=pos,
+ .tag=expr->tag, .__data=expr->__data);
+}
+
+PARSER(parse_int) {
+ const char *start = pos;
+ bool negative = match(&pos, "-");
+ if (!isdigit(*pos)) return false;
+ int64_t i = 0;
+ if (match(&pos, "0x")) { // Hex
+ size_t span = strspn(pos, "0123456789abcdefABCDEF_");
+ char *buf = GC_MALLOC_ATOMIC(span+1);
+ memset(buf, 0, span+1);
+ for (char *src = (char*)pos, *dest = buf; src < pos+span; ++src) {
+ if (*src != '_') *(dest++) = *src;
+ }
+ i = strtol(buf, NULL, 16);
+ pos += span;
+ } else if (match(&pos, "0b")) { // Binary
+ size_t span = strspn(pos, "01_");
+ char *buf = GC_MALLOC_ATOMIC(span+1);
+ memset(buf, 0, span+1);
+ for (char *src = (char*)pos, *dest = buf; src < pos+span; ++src) {
+ if (*src != '_') *(dest++) = *src;
+ }
+ i = strtol(buf, NULL, 2);
+ pos += span;
+ } else if (match(&pos, "0o")) { // Octal
+ size_t span = strspn(pos, "01234567_");
+ char *buf = GC_MALLOC_ATOMIC(span+1);
+ memset(buf, 0, span+1);
+ for (char *src = (char*)pos, *dest = buf; src < pos+span; ++src) {
+ if (*src != '_') *(dest++) = *src;
+ }
+ i = strtol(buf, NULL, 8);
+ pos += span;
+ } else { // Decimal
+ size_t span = strspn(pos, "0123456789_");
+ char *buf = GC_MALLOC_ATOMIC(span+1);
+ memset(buf, 0, span+1);
+ for (char *src = (char*)pos, *dest = buf; src < pos+span; ++src) {
+ if (*src != '_') *(dest++) = *src;
+ }
+ i = strtol(buf, NULL, 10);
+ pos += span;
+ }
+
+ if (match(&pos, "e") || match(&pos, "f")) // floating point literal
+ return NULL;
+
+ if (negative) i *= -1;
+
+ if (match(&pos, "%")) {
+ double d = (double)i / 100.;
+ return NewAST(ctx->file, start, pos, Num, .n=d, .precision=64);
+ }
+
+ match(&pos, "_");
+ int64_t precision = 64;
+ if (match(&pos, "i64")) precision = 64;
+ else if (match(&pos, "i32")) precision = 32;
+ else if (match(&pos, "i16")) precision = 16;
+ else if (match(&pos, "i8")) precision = 8;
+
+ // else if (match(&pos, ".") || match(&pos, "e")) return NULL; // looks like a float
+
+ return NewAST(ctx->file, start, pos, Int, .i=i, .precision=precision);
+}
+
+type_ast_t *parse_table_type(parse_ctx_t *ctx, const char *pos) {
+ const char *start = pos;
+ if (!match(&pos, "{")) return NULL;
+ whitespace(&pos);
+ type_ast_t *key_type = parse_type(ctx, pos);
+ if (!key_type) return NULL;
+ pos = key_type->end;
+ whitespace(&pos);
+ if (!match(&pos, "=>")) return NULL;
+ type_ast_t *value_type = expect(ctx, start, &pos, parse_type, "I couldn't parse the rest of this table type");
+ whitespace(&pos);
+ expect_closing(ctx, &pos, "}", "I wasn't able to parse the rest of this table type");
+ return NewTypeAST(ctx->file, start, pos, TypeTable, .key=key_type, .value=value_type);
+}
+
+type_ast_t *parse_struct_type(parse_ctx_t *ctx, const char *pos) {
+ const char *start = pos;
+ if (!match(&pos, "struct")) return NULL;
+ spaces(&pos);
+ if (!match(&pos, "(")) return NULL;
+ arg_list_t *args = parse_args(ctx, &pos, false);
+ whitespace(&pos);
+ expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this struct type");
+ return NewTypeAST(ctx->file, start, pos, TypeStruct, .fields=args);
+}
+
+type_ast_t *parse_func_type(parse_ctx_t *ctx, const char *pos) {
+ const char *start = pos;
+ if (!match_word(&pos, "func")) return NULL;
+ spaces(&pos);
+ if (!match(&pos, "(")) return NULL;
+ arg_list_t *args = parse_args(ctx, &pos, true);
+ expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this function type");
+ spaces(&pos);
+ if (!match(&pos, "->")) return NULL;
+ type_ast_t *ret = optional(ctx, &pos, parse_type);
+ return NewTypeAST(ctx->file, start, pos, TypeFunction, .args=args, .ret=ret);
+}
+
+type_ast_t *parse_array_type(parse_ctx_t *ctx, const char *pos) {
+ const char *start = pos;
+ if (!match(&pos, "[")) return NULL;
+ type_ast_t *type = expect(ctx, start, &pos, parse_type,
+ "I couldn't parse an array item type after this point");
+ expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this array type");
+ return NewTypeAST(ctx->file, start, pos, TypeArray, .item=type);
+}
+
+type_ast_t *parse_pointer_type(parse_ctx_t *ctx, const char *pos) {
+ const char *start = pos;
+ bool optional = false, is_stack = false;
+ if (match(&pos, "@"))
+ optional = false;
+ else if (match(&pos, "?"))
+ optional = true;
+ else if (match(&pos, "&"))
+ is_stack = true;
+ else
+ return NULL;
+
+ spaces(&pos);
+ bool is_readonly = match(&pos, "(readonly)");
+ spaces(&pos);
+ type_ast_t *type = expect(ctx, start, &pos, parse_type,
+ "I couldn't parse a pointer type after this point");
+ return NewTypeAST(ctx->file, start, pos, TypePointer, .pointed=type, .is_optional=optional, .is_stack=is_stack, .is_readonly=is_readonly);
+}
+
+type_ast_t *parse_type_name(parse_ctx_t *ctx, const char *pos) {
+ const char *start = pos;
+ const char *id = get_id(&pos);
+ if (!id) return NULL;
+ for (;;) {
+ const char *next = pos;
+ spaces(&next);
+ if (!match(&next, ".")) break;
+ const char *next_id = get_id(&next);
+ if (!next_id) break;
+ id = heap_strf("%s.%s", id, next_id);
+ pos = next;
+ }
+ return NewTypeAST(ctx->file, start, pos, TypeVar, .var.name=id);
+}
+
+type_ast_t *parse_type(parse_ctx_t *ctx, const char *pos) {
+ const char *start = pos;
+ type_ast_t *type = NULL;
+ bool success = (false
+ || (type=parse_enum_type(ctx, pos))
+ || (type=parse_pointer_type(ctx, pos))
+ || (type=parse_array_type(ctx, pos))
+ || (type=parse_table_type(ctx, pos))
+ || (type=parse_struct_type(ctx, pos))
+ || (type=parse_type_name(ctx, pos))
+ || (type=parse_func_type(ctx, pos))
+ );
+ if (!success && match(&pos, "(")) {
+ whitespace(&pos);
+ type = optional(ctx, &pos, parse_type);
+ if (!type) return NULL;
+ whitespace(&pos);
+ expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this type");
+ type->start = start;
+ type->end = pos;
+ }
+
+ if (!type) return NULL;
+
+ pos = type->end;
+ return type;
+}
+
+PARSER(parse_num) {
+ const char *start = pos;
+ bool negative = match(&pos, "-");
+ if (!isdigit(*pos) && *pos != '.') return NULL;
+
+ size_t len = strspn(pos, "0123456789_");
+ if (strncmp(pos+len, "..", 2) == 0)
+ return NULL;
+ else if (pos[len] == '.')
+ len += 1 + strspn(pos + len + 1, "0123456789");
+ else if (pos[len] != 'e' && pos[len] != 'f' && pos[len] != '%')
+ return NULL;
+ if (pos[len] == 'e')
+ len += 1 + strspn(pos + len + 1, "-0123456789_");
+ char *buf = GC_MALLOC_ATOMIC(len+1);
+ memset(buf, 0, len+1);
+ for (char *src = (char*)pos, *dest = buf; src < pos+len; ++src) {
+ if (*src != '_') *(dest++) = *src;
+ }
+ double d = strtod(buf, NULL);
+ pos += len;
+
+ if (negative) d *= -1;
+
+ int64_t precision = 64;
+ match(&pos, "_");
+ if (match(&pos, "f64")) precision = 64;
+ else if (match(&pos, "f32")) precision = 32;
+
+ if (match(&pos, "%")) {
+ d /= 100.;
+ }
+
+ return NewAST(ctx->file, start, pos, Num, .n=d, .precision=precision);
+}
+
+PARSER(parse_array) {
+ const char *start = pos;
+ if (!match(&pos, "[")) return NULL;
+
+ whitespace(&pos);
+
+ ast_list_t *items = NULL;
+ type_ast_t *item_type = NULL;
+ if (match(&pos, ":")) {
+ whitespace(&pos);
+ item_type = expect(ctx, pos-1, &pos, parse_type, "I couldn't parse a type for this array");
+ }
+
+ for (;;) {
+ whitespace(&pos);
+ ast_t *item = optional(ctx, &pos, parse_extended_expr);
+ if (!item) break;
+ items = new(ast_list_t, .ast=item, .next=items);
+ whitespace(&pos);
+ if (!match(&pos, ",")) break;
+ }
+ whitespace(&pos);
+ expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this array");
+
+ if (!item_type && !items)
+ parser_err(ctx, start, pos, "Empty arrays must specify what type they would contain (e.g. [:Int])");
+
+ REVERSE_LIST(items);
+ return NewAST(ctx->file, start, pos, Array, .type=item_type, .items=items);
+}
+
+PARSER(parse_table) {
+ const char *start = pos;
+ if (!match(&pos, "{")) return NULL;
+
+ whitespace(&pos);
+
+ ast_list_t *entries = NULL;
+ type_ast_t *key_type = NULL, *value_type = NULL;
+ if (match(&pos, ":")) {
+ whitespace(&pos);
+ key_type = expect(ctx, pos-1, &pos, parse_type, "I couldn't parse a key type for this table");
+ whitespace(&pos);
+ if (!match(&pos, "=>"))
+ parser_err(ctx, pos, pos, "I expected an '=>' for this table type");
+ value_type = expect(ctx, pos-1, &pos, parse_type, "I couldn't parse a value type for this table");
+ }
+
+ for (;;) {
+ whitespace(&pos);
+ const char *entry_start = pos;
+ ast_t *key = optional(ctx, &pos, parse_extended_expr);
+ if (!key) break;
+ whitespace(&pos);
+ if (!match(&pos, "=>")) return NULL;
+ ast_t *value = expect(ctx, pos-1, &pos, parse_expr, "I couldn't parse the value for this table entry");
+
+ ast_t *entry = NewAST(ctx->file, entry_start, pos, TableEntry, .key=key, .value=value);
+ for (bool progress = true; progress; ) {
+ ast_t *new_entry;
+ progress = (false
+ || (new_entry=parse_index_suffix(ctx, entry))
+ || (new_entry=parse_field_suffix(ctx, entry))
+ || (new_entry=parse_fncall_suffix(ctx, entry, NORMAL_FUNCTION))
+ );
+ if (progress) entry = new_entry;
+ }
+ pos = entry->end;
+
+ entries = new(ast_list_t, .ast=entry, .next=entries);
+ whitespace(&pos);
+ if (!match(&pos, ",")) break;
+ }
+
+ REVERSE_LIST(entries);
+
+ if (!key_type && !value_type && !entries)
+ return NULL;
+
+ whitespace(&pos);
+
+ ast_t *fallback = NULL, *default_val = NULL;
+ if (match(&pos, ";")) {
+ for (;;) {
+ whitespace(&pos);
+ const char *attr_start = pos;
+ if (match(&pos, "fallback")) {
+ whitespace(&pos);
+ if (!match(&pos, "=")) parser_err(ctx, attr_start, pos, "I expected an '=' after 'fallback'");
+ if (fallback)
+ parser_err(ctx, attr_start, pos, "This table already has a fallback");
+ fallback = expect(ctx, attr_start, &pos, parse_expr, "I expected a fallback table");
+ } else if (match(&pos, "default")) {
+ whitespace(&pos);
+ if (!match(&pos, "=")) parser_err(ctx, attr_start, pos, "I expected an '=' after 'default'");
+ if (default_val)
+ parser_err(ctx, attr_start, pos, "This table already has a default value");
+ default_val = expect(ctx, attr_start, &pos, parse_expr, "I expected a default value for this table");
+ } else {
+ break;
+ }
+ whitespace(&pos);
+ if (!match(&pos, ";")) break;
+ }
+ }
+
+ whitespace(&pos);
+ expect_closing(ctx, &pos, "}", "I wasn't able to parse the rest of this table");
+
+ return NewAST(ctx->file, start, pos, Table, .key_type=key_type, .value_type=value_type, .entries=entries, .fallback=fallback, .default_value=default_val);
+}
+
+ast_t *parse_field_suffix(parse_ctx_t *ctx, ast_t *lhs) {
+ if (!lhs) return NULL;
+ const char *pos = lhs->end;
+ whitespace(&pos);
+ if (!match(&pos, ".")) return NULL;
+ if (*pos == '.') return NULL;
+ whitespace(&pos);
+ bool dollar = match(&pos, "$");
+ const char* field = get_id(&pos);
+ if (!field) return NULL;
+ if (dollar) field = heap_strf("$%s", field);
+ return NewAST(ctx->file, lhs->start, pos, FieldAccess, .fielded=lhs, .field=field);
+}
+
+PARSER(parse_reduction) {
+ const char *start = pos;
+ if (!match(&pos, "(")) return NULL;
+
+ spaces(&pos);
+ const char *combo_start = pos;
+ binop_e op = match_binary_operator(&pos);
+ if (op == BINOP_UNKNOWN) return NULL;
+
+ ast_t *combination;
+ ast_t *lhs = NewAST(ctx->file, pos, pos, Var, .var.name="lhs.0");
+ ast_t *rhs = NewAST(ctx->file, pos, pos, Var, .var.name="rhs.0");
+ if (op == BINOP_MIN || op == BINOP_MAX) {
+ for (bool progress = true; progress; ) {
+ ast_t *new_term;
+ progress = (false
+ || (new_term=parse_index_suffix(ctx, rhs))
+ || (new_term=parse_field_suffix(ctx, rhs))
+ || (new_term=parse_fncall_suffix(ctx, rhs, NORMAL_FUNCTION))
+ );
+ if (progress) rhs = new_term;
+ }
+ if (rhs->tag == Var) rhs = NULL;
+ else pos = rhs->end;
+ combination = op == BINOP_MIN ?
+ NewAST(ctx->file, combo_start, pos, Min, .lhs=lhs, .rhs=lhs, .key=rhs)
+ : NewAST(ctx->file, combo_start, pos, Max, .lhs=lhs, .rhs=lhs, .key=rhs);
+ } else {
+ combination = NewAST(ctx->file, combo_start, pos, BinaryOp, .op=op, .lhs=lhs, .rhs=rhs);
+ }
+
+ spaces(&pos);
+ if (!match(&pos, ")")) return NULL;
+
+ ast_t *iter = optional(ctx, &pos, parse_extended_expr);
+ if (!iter) return NULL;
+
+ ast_t *fallback = NULL;
+ if (match_word(&pos, "else"))
+ fallback = expect(ctx, pos-4, &pos, parse_expr, "I couldn't parse the expression after this 'else'");
+
+ return NewAST(ctx->file, start, pos, Reduction, .iter=iter, .combination=combination, .fallback=fallback);
+}
+
+ast_t *parse_index_suffix(parse_ctx_t *ctx, ast_t *lhs) {
+ if (!lhs) return NULL;
+ const char *start = lhs->start;
+ const char *pos = lhs->end;
+ if (!match(&pos, "[")) return NULL;
+ whitespace(&pos);
+ ast_t *index = NULL;
+ if (match(&pos, ".")) {
+ // array[.field]
+ const char *field_start = pos-1;
+ const char *field = get_id(&pos);
+ if (field)
+ index = NewAST(ctx->file, field_start, pos, FieldAccess, .field=field);
+ else
+ --pos;
+ }
+
+ if (!index) {
+ // obj[expr]
+ index = optional(ctx, &pos, parse_extended_expr);
+ }
+ whitespace(&pos);
+ bool unchecked = match(&pos, ";") && (spaces(&pos), match_word(&pos, "unchecked") != 0);
+ expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this index");
+ return NewAST(ctx->file, start, pos, Index, .indexed=lhs, .index=index, .unchecked=unchecked);
+}
+
+PARSER(parse_if) {
+ // if <condition> [then] <body> [else <body>]
+ const char *start = pos;
+ int64_t starting_indent = sss_get_indent(ctx->file, pos);
+
+ if (!match_word(&pos, "if"))
+ return NULL;
+
+ ast_t *condition = optional(ctx, &pos, parse_declaration);
+ if (!condition) condition = expect(ctx, start, &pos, parse_expr,
+ "I expected to find an expression for this 'if'");
+
+ match_word(&pos, "then"); // optional
+
+ ast_t *body = expect(ctx, start, &pos, parse_opt_indented_block, "I expected a body for this 'if' statement");
+
+ const char *tmp = pos;
+ whitespace(&tmp);
+ ast_t *else_body = NULL;
+ if (sss_get_indent(ctx->file, tmp) == starting_indent && match_word(&tmp, "else")) {
+ pos = tmp;
+ else_body = expect(ctx, start, &pos, parse_opt_indented_block, "I expected a body for this 'else'");
+ }
+ return NewAST(ctx->file, start, pos, If, .condition=condition, .body=body, .else_body=else_body);
+}
+
+PARSER(parse_for) {
+ // for [k,] v in iter [<indent>] body
+ const char *start = pos;
+ if (!match_word(&pos, "for")) return NULL;
+ ast_t *index = expect(ctx, start, &pos, parse_var, "I expected an iteration variable for this 'for'");
+ spaces(&pos);
+ ast_t *value = NULL;
+ if (match(&pos, ",")) {
+ value = expect(ctx, pos-1, &pos, parse_var, "I expected a variable after this comma");
+ }
+ expect_str(ctx, start, &pos, "in", "I expected an 'in' for this 'for'");
+ ast_t *iter = expect(ctx, start, &pos, parse_expr, "I expected an iterable value for this 'for'");
+ match(&pos, "do"); // optional
+ ast_t *body = expect(ctx, start, &pos, parse_opt_indented_block, "I expected a body for this 'for'");
+ return NewAST(ctx->file, start, pos, For, .index=value ? index : NULL, .value=value ? value : index, .iter=iter, .body=body);
+}
+
+PARSER(parse_while) {
+ // while condition [do] [<indent>] body
+ const char *start = pos;
+ if (!match_word(&pos, "while")) return NULL;
+ ast_t *condition = expect(ctx, start, &pos, parse_expr, "I don't see a viable condition for this 'while'");
+ match(&pos, "do"); // optional
+ ast_t *body = expect(ctx, start, &pos, parse_opt_indented_block, "I expected a body for this 'while'");
+ const char *tmp = pos;
+ whitespace(&tmp);
+ return NewAST(ctx->file, start, pos, While, .condition=condition, .body=body);
+}
+
+PARSER(parse_heap_alloc) {
+ const char *start = pos;
+ if (!match(&pos, "@")) return NULL;
+ spaces(&pos);
+ ast_t *val = expect(ctx, start, &pos, parse_expr, "I expected an expression for this '@'");
+ return NewAST(ctx->file, start, pos, UnaryOp, .op=UNOP_HEAP_ALLOCATE, .value=val);
+}
+
+PARSER(parse_stack_reference) {
+ const char *start = pos;
+ if (!match(&pos, "&")) return NULL;
+ spaces(&pos);
+ ast_t *val = expect(ctx, start, &pos, parse_expr, "I expected an expression for this '&'");
+ return NewAST(ctx->file, start, pos, UnaryOp, .op=UNOP_STACK_REFERENCE, .value=val);
+}
+
+PARSER(parse_not) {
+ const char *start = pos;
+ if (!match_word(&pos, "not")) return NULL;
+ spaces(&pos);
+ ast_t *val = expect(ctx, start, &pos, parse_expr, "I expected an expression for this 'not'");
+ return NewAST(ctx->file, start, pos, UnaryOp, .op=UNOP_NOT, .value=val);
+}
+
+PARSER(parse_negative) {
+ const char *start = pos;
+ if (!match(&pos, "-")) return NULL;
+ spaces(&pos);
+ ast_t *val = expect(ctx, start, &pos, parse_term, "I expected an expression for this '-'");
+ return NewAST(ctx->file, start, pos, UnaryOp, .op=UNOP_NEGATIVE, .value=val);
+}
+
+PARSER(parse_bool) {
+ const char *start = pos;
+ if (match_word(&pos, "yes"))
+ return NewAST(ctx->file, start, pos, Bool, .b=true);
+ else if (match_word(&pos, "no"))
+ return NewAST(ctx->file, start, pos, Bool, .b=false);
+ else
+ return NULL;
+}
+
+PARSER(parse_char) {
+ const char *start = pos;
+ if (*pos == '`') {
+ ++pos;
+ char c = *pos;
+ ++pos;
+ return NewAST(ctx->file, start, pos, Char, .c=c);
+ } else if (*pos == '\\') {
+ char c = unescape(&pos)[0];
+ return NewAST(ctx->file, start, pos, Char, .c=c);
+ } else {
+ return NULL;
+ }
+}
+
+PARSER(parse_interpolation) {
+ const char *start = pos;
+ ++pos; // ignore the initial character, typically a '$', but might be other stuff like '@' in different contexts
+ bool labelled = match(&pos, ":");
+ ast_t *value = optional(ctx, &pos, parse_parens);
+ if (!value) value = optional(ctx, &pos, parse_term);
+ if (!value) {
+ match_group(&pos, '(');
+ parser_err(ctx, start, pos, "This interpolation didn't parse");
+ }
+ return NewAST(ctx->file, start, pos, Interp, .value=value, .labelled=labelled);
+}
+
+PARSER(parse_string) {
+ static const char closing[128] = {['(']=')', ['[']=']', ['<']='>', ['{']='}'};
+ static const bool escapes[128] = {['\'']='\x1B', ['(']='\x1B', ['>']='\x1B', ['/']='\x1B'};
+ static const char interps[128] = {['>']='@', ['/']='@', ['\'']='\x1A', ['(']='\x1A'};
+
+ const char *string_start = pos;
+ char open, close;
+ if (match(&pos, "$")) {
+ open = *pos;
+ close = closing[(int)open] ? closing[(int)open] : open;
+ ++pos;
+ } else {
+ if (*pos != '\'' && *pos != '"')
+ return NULL;
+ open = *pos;
+ close = *pos;
+ ++pos;
+ }
+
+ char interp_char = interps[(int)open] ? interps[(int)open] : '$';
+ char escape_char = escapes[(int)open] ? escapes[(int)open] : '\\';
+
+ if (open == ':' || open == '>')
+ spaces(&pos);
+
+ ast_list_t *chunks = NULL;
+ if (*pos == '\r' || *pos == '\n') { // Multiline string
+ char special[] = {'\n','\r',interp_char,escape_char,'\0'};
+ int64_t starting_indent = sss_get_indent(ctx->file, pos);
+ // indentation-delimited string
+ match(&pos, "\r");
+ match(&pos, "\n");
+ int64_t first_line = sss_get_line_number(ctx->file, pos);
+ int64_t indented = sss_get_indent(ctx->file, pos);
+ pos = sss_get_line(ctx->file, first_line);
+ while (pos < ctx->file->text + ctx->file->len) {
+ const char *eol = strchrnul(pos, '\n');
+ if (eol == pos + strspn(pos, " \t\r")) { // Empty line
+ ast_t *ast = NewAST(ctx->file, pos, eol, StringLiteral, .str="\n");
+ chunks = new(ast_list_t, .ast=ast, .next=chunks);
+ pos = eol + 1;
+ continue;
+ }
+ if (!match_indentation(&pos, starting_indent))
+ parser_err(ctx, pos, strchrnul(pos, '\n'), "This isn't a valid indentation level for this unterminated string");
+
+ if (*pos == close) {
+ ++pos;
+ goto finished;
+ }
+
+ if (!match_indentation(&pos, (indented - starting_indent)))
+ parser_err(ctx, pos, strchrnul(pos, '\n'), "I was expecting this to have %lu extra indentation beyond %lu",
+ (indented - starting_indent), starting_indent);
+
+ while (pos < eol+1) {
+ size_t len = strcspn(pos, special);
+ if (pos[len] == '\r') ++len;
+ if (pos[len] == '\n') ++len;
+
+ if (len > 0) {
+ ast_t *chunk = NewAST(ctx->file, pos, pos+len-1, StringLiteral, .str=heap_strn(pos, len));
+ chunks = new(ast_list_t, .ast=chunk, .next=chunks);
+ }
+
+ pos += len;
+
+ if (*pos == escape_char) {
+ const char *start = pos;
+ const char* unescaped = unescape(&pos);
+ ast_t *chunk = NewAST(ctx->file, start, pos, StringLiteral, .str=unescaped);
+ chunks = new(ast_list_t, .ast=chunk, .next=chunks);
+ ++pos;
+ } else if (*pos == interp_char) {
+ ast_t *chunk = parse_interpolation(ctx, pos);
+ chunks = new(ast_list_t, .ast=chunk, .next=chunks);
+ pos = chunk->end;
+ }
+ }
+ }
+ finished:;
+ // Strip trailing newline:
+ if (chunks) {
+ ast_t *last_chunk = chunks->ast;
+ if (last_chunk->tag == StringLiteral) {
+ auto str = Match(last_chunk, StringLiteral);
+ const char* trimmed = heap_strn(str->str, strlen(str->str)-1);
+ chunks->ast = NewAST(ctx->file, last_chunk->start, last_chunk->end-1, StringLiteral, .str=trimmed);
+ }
+ }
+ } else { // Inline string
+ char special[] = {'\n','\r',open,close,interp_char,escape_char,'\0'};
+ int depth = 1;
+ while (depth > 0 && *pos) {
+ size_t len = strcspn(pos, special);
+ if (len > 0) {
+ ast_t *chunk = NewAST(ctx->file, pos, pos+len-1, StringLiteral, .str=heap_strn(pos, len));
+ chunks = new(ast_list_t, .ast=chunk, .next=chunks);
+ pos += len;
+ }
+
+ if (*pos == interp_char) {
+ ast_t *chunk = parse_interpolation(ctx, pos);
+ chunks = new(ast_list_t, .ast=chunk, .next=chunks);
+ pos = chunk->end;
+ } else if (*pos == escape_char) {
+ const char *start = pos;
+ const char* unescaped = unescape(&pos);
+ ast_t *chunk = NewAST(ctx->file, start, pos, StringLiteral, .str=unescaped);
+ chunks = new(ast_list_t, .ast=chunk, .next=chunks);
+ } else if (*pos == '\r' || *pos == '\n') {
+ if (open == ' ' || open == ':' || open == '>') goto string_finished;
+ parser_err(ctx, string_start, pos, "This line ended without closing the string");
+ } else if (*pos == close) { // if open == close, then don't do nesting (i.e. check 'close' first)
+ --depth;
+ if (depth > 0) {
+ ast_t *chunk = NewAST(ctx->file, pos, pos+1, StringLiteral, .str=heap_strn(pos, 1));
+ chunks = new(ast_list_t, .ast=chunk, .next=chunks);
+ }
+ ++pos;
+ } else if (*pos == open) {
+ ++depth;
+ ast_t *chunk = NewAST(ctx->file, pos, pos+1, StringLiteral, .str=heap_strn(pos, 1));
+ chunks = new(ast_list_t, .ast=chunk, .next=chunks);
+ ++pos;
+ } else {
+ ast_t *chunk = NewAST(ctx->file, pos, pos+1, StringLiteral, .str=heap_strn(pos, 1));
+ ++pos;
+ chunks = new(ast_list_t, .ast=chunk, .next=chunks);
+ }
+ }
+ }
+ string_finished:;
+ REVERSE_LIST(chunks);
+ return NewAST(ctx->file, string_start, pos, StringJoin, .children=chunks);
+}
+
+PARSER(parse_skip) {
+ const char *start = pos;
+ if (!match_word(&pos, "skip")) return NULL;
+ spaces(&pos);
+ const char* target;
+ if (match_word(&pos, "for")) target = "for";
+ else if (match_word(&pos, "while")) target = "while";
+ else target = get_id(&pos);
+ ast_t *skip = NewAST(ctx->file, start, pos, Skip, .target=target);
+ return skip;
+}
+
+PARSER(parse_stop) {
+ const char *start = pos;
+ if (!match_word(&pos, "stop")) return NULL;
+ spaces(&pos);
+ const char* target;
+ if (match_word(&pos, "for")) target = "for";
+ else if (match_word(&pos, "while")) target = "while";
+ else target = get_id(&pos);
+ ast_t *stop = NewAST(ctx->file, start, pos, Stop, .target=target);
+ return stop;
+}
+
+PARSER(parse_return) {
+ const char *start = pos;
+ if (!match_word(&pos, "return")) return NULL;
+ spaces(&pos);
+ ast_t *value = optional(ctx, &pos, parse_expr);
+ ast_t *ret = NewAST(ctx->file, start, pos, Return, .value=value);
+ return ret;
+}
+
+PARSER(parse_lambda) {
+ const char *start = pos;
+ if (!match_word(&pos, "func"))
+ return NULL;
+ spaces(&pos);
+ if (!match(&pos, "("))
+ return NULL;
+ arg_list_t *args = parse_args(ctx, &pos, false);
+ spaces(&pos);
+ expect_closing(ctx, &pos, ")", "I was expecting a ')' to finish this anonymous function's arguments");
+ ast_t *body = optional(ctx, &pos, parse_opt_indented_block);
+ return NewAST(ctx->file, start, pos, Lambda, .args=args, .body=body);
+}
+
+PARSER(parse_nil) {
+ const char *start = pos;
+ if (!match(&pos, "!")) return NULL;
+ type_ast_t *type = parse_type(ctx, pos);
+ if (!type) return NULL;
+ return NewAST(ctx->file, start, type->end, Nil, .type=type);
+}
+
+PARSER(parse_var) {
+ const char *start = pos;
+ const char* name = get_id(&pos);
+ if (!name) return NULL;
+ return NewAST(ctx->file, start, pos, Var, .var.name=name);
+}
+
+PARSER(parse_term_no_suffix) {
+ spaces(&pos);
+ ast_t *term = NULL;
+ (void)(
+ false
+ || (term=parse_nil(ctx, pos))
+ || (term=parse_num(ctx, pos))
+ || (term=parse_int(ctx, pos))
+ || (term=parse_negative(ctx, pos))
+ || (term=parse_heap_alloc(ctx, pos))
+ || (term=parse_stack_reference(ctx, pos))
+ || (term=parse_bool(ctx, pos))
+ || (term=parse_char(ctx, pos))
+ || (term=parse_string(ctx, pos))
+ || (term=parse_lambda(ctx, pos))
+ || (term=parse_parens(ctx, pos))
+ || (term=parse_table(ctx, pos))
+ || (term=parse_var(ctx, pos))
+ || (term=parse_array(ctx, pos))
+ || (term=parse_reduction(ctx, pos))
+ || (term=parse_skip(ctx, pos))
+ || (term=parse_stop(ctx, pos))
+ || (term=parse_return(ctx, pos))
+ || (term=parse_not(ctx, pos))
+ || (term=parse_extern(ctx, pos))
+ );
+ return term;
+}
+
+PARSER(parse_term) {
+ ast_t *term = parse_term_no_suffix(ctx, pos);
+ if (!term) return NULL;
+
+ for (bool progress = true; progress; ) {
+ ast_t *new_term;
+ progress = (false
+ || (new_term=parse_index_suffix(ctx, term))
+ || (new_term=parse_field_suffix(ctx, term))
+ || (new_term=parse_fncall_suffix(ctx, term, NORMAL_FUNCTION))
+ );
+ if (progress) term = new_term;
+ }
+ return term;
+}
+
+ast_t *parse_fncall_suffix(parse_ctx_t *ctx, ast_t *fn, bool is_extern) {
+ if (!fn) return NULL;
+
+ const char *start = fn->start;
+ const char *pos = fn->end;
+
+ if (!match(&pos, "(")) return NULL;
+
+ whitespace(&pos);
+
+ ast_list_t *args = NULL;
+ for (;;) {
+ const char *arg_start = pos;
+ const char *name = get_id(&pos);
+ whitespace(&pos);
+ if (name) {
+ if (match(&pos, "=")) {
+ whitespace(&pos);
+ ast_t *arg = parse_expr(ctx, pos);
+ if (!arg) parser_err(ctx, arg_start, pos, "I couldn't parse this keyword argument value");
+ ast_t *kwarg = NewAST(ctx->file, arg_start, arg->end, KeywordArg,
+ .name=name, .arg=arg);
+ args = new(ast_list_t, .ast=kwarg, .next=args);
+ pos = kwarg->end;
+ goto got_arg;
+ }
+ pos = arg_start;
+ }
+
+ ast_t *arg = optional(ctx, &pos, parse_expr);
+ if (!arg) break;
+ args = new(ast_list_t, .ast=arg, .next=args);
+
+ got_arg:;
+
+ whitespace(&pos);
+ if (!match(&pos, ","))
+ break;
+ whitespace(&pos);
+ }
+
+ whitespace(&pos);
+
+ if (!match(&pos, ")"))
+ parser_err(ctx, start, pos, "This parenthesis is unclosed");
+
+ type_ast_t *extern_return_type = NULL;
+ if (is_extern) {
+ if (match(&pos, ":"))
+ extern_return_type = expect(ctx, start, &pos, parse_type, "I couldn't parse the return type of this external function call");
+ else
+ extern_return_type = NewTypeAST(ctx->file, pos, pos, TypeVar, .var.name="Void");
+ }
+ REVERSE_LIST(args);
+ return NewAST(ctx->file, start, pos, FunctionCall, .fn=fn, .args=args, .extern_return_type=extern_return_type);
+}
+
+binop_e match_binary_operator(const char **pos)
+{
+ switch (**pos) {
+ case '+': {
+ *pos += 1;
+ return match(pos, "+") ? BINOP_CONCAT : BINOP_PLUS;
+ }
+ case '-': {
+ *pos += 1;
+ if ((*pos)[0] != ' ' && (*pos)[-2] == ' ') // looks like `fn -5`
+ return BINOP_UNKNOWN;
+ return BINOP_MINUS;
+ }
+ case '*': *pos += 1; return BINOP_MULT;
+ case '/': *pos += 1; return BINOP_DIVIDE;
+ case '^': *pos += 1; return BINOP_POWER;
+ case '<': *pos += 1; return match(pos, "=") ? BINOP_LE : (match(pos, "<") ? BINOP_LSHIFT : BINOP_LT);
+ case '>': *pos += 1; return match(pos, "=") ? BINOP_GE : (match(pos, ">") ? BINOP_RSHIFT : BINOP_GT);
+ default: {
+ if (match(pos, "!=")) return BINOP_NE;
+ else if (match(pos, "==") && **pos != '=') return BINOP_EQ;
+ else if (match_word(pos, "and")) return BINOP_AND;
+ else if (match_word(pos, "or")) return BINOP_OR;
+ else if (match_word(pos, "xor")) return BINOP_XOR;
+ else if (match_word(pos, "mod1")) return BINOP_MOD1;
+ else if (match_word(pos, "mod")) return BINOP_MOD;
+ else if (match_word(pos, "_min_")) return BINOP_MIN;
+ else if (match_word(pos, "_max_")) return BINOP_MAX;
+ else return BINOP_UNKNOWN;
+ }
+ }
+}
+
+static ast_t *parse_infix_expr(parse_ctx_t *ctx, const char *pos, int min_tightness) {
+ ast_t *lhs = optional(ctx, &pos, parse_term);
+ if (!lhs) return NULL;
+
+ spaces(&pos);
+ binop_e op = match_binary_operator(&pos);
+ if (op == BINOP_UNKNOWN || op_tightness[op] < min_tightness)
+ return lhs;
+
+ ast_t *key = NULL;
+ if (op == BINOP_MIN || op == BINOP_MAX) {
+ key = NewAST(ctx->file, pos, pos, Var, .var.name=op == BINOP_MIN ? "_min_" : "_max_");
+ for (bool progress = true; progress; ) {
+ ast_t *new_term;
+ progress = (false
+ || (new_term=parse_index_suffix(ctx, key))
+ || (new_term=parse_field_suffix(ctx, key))
+ || (new_term=parse_fncall_suffix(ctx, key, NORMAL_FUNCTION))
+ );
+ if (progress) key = new_term;
+ }
+ if (key->tag == Var) key = NULL;
+ else pos = key->end;
+
+ }
+ spaces(&pos);
+ ast_t *rhs = parse_infix_expr(ctx, pos, op_tightness[op]);
+ if (!rhs) return lhs;
+ pos = rhs->end;
+
+ switch (op) {
+ case BINOP_MIN:
+ return NewAST(ctx->file, lhs->start, rhs->end, Min, .lhs=lhs, .rhs=rhs, .key=key);
+ case BINOP_MAX:
+ return NewAST(ctx->file, lhs->start, rhs->end, Max, .lhs=lhs, .rhs=rhs, .key=key);
+ default:
+ return NewAST(ctx->file, lhs->start, rhs->end, BinaryOp, .lhs=lhs, .rhs=rhs, .op=op);
+ }
+}
+
+ast_t *parse_expr(parse_ctx_t *ctx, const char *pos) {
+ return parse_infix_expr(ctx, pos, 0);
+}
+
+PARSER(parse_declaration) {
+ const char *start = pos;
+ ast_t *var = parse_var(ctx, pos);
+ if (!var) return NULL;
+ pos = var->end;
+ spaces(&pos);
+ if (!match(&pos, ":=")) return NULL;
+ spaces(&pos);
+ ast_t *val = optional(ctx, &pos, parse_use);
+ if (!val) val = optional(ctx, &pos, parse_extended_expr);
+ if (!val) parser_err(ctx, pos, strchrnul(pos, '\n'), "This declaration value didn't parse");
+ return NewAST(ctx->file, start, pos, Declare, .var=var, .value=val);
+}
+
+PARSER(parse_update) {
+ const char *start = pos;
+ ast_t *lhs = optional(ctx, &pos, parse_expr);
+ if (!lhs) return NULL;
+ spaces(&pos);
+ binop_e op;
+ if (match(&pos, "+=")) op = BINOP_PLUS;
+ else if (match(&pos, "++=")) op = BINOP_CONCAT;
+ else if (match(&pos, "-=")) op = BINOP_MINUS;
+ else if (match(&pos, "*=")) op = BINOP_MULT;
+ else if (match(&pos, "/=")) op = BINOP_DIVIDE;
+ else if (match(&pos, "and=")) op = BINOP_AND;
+ else if (match(&pos, "or=")) op = BINOP_OR;
+ else if (match(&pos, "xor=")) op = BINOP_XOR;
+ else return NULL;
+ ast_t *rhs = expect(ctx, start, &pos, parse_extended_expr, "I expected an expression here");
+ return NewAST(ctx->file, start, pos, UpdateAssign, .lhs=lhs, .rhs=rhs, .op=op);
+}
+
+PARSER(parse_assignment) {
+ const char *start = pos;
+ ast_list_t *targets = NULL;
+ for (;;) {
+ ast_t *lhs = optional(ctx, &pos, parse_term);
+ if (!lhs) break;
+ targets = new(ast_list_t, .ast=lhs, .next=targets);
+ spaces(&pos);
+ if (!match(&pos, ",")) break;
+ whitespace(&pos);
+ }
+
+ if (!targets) return NULL;
+
+ spaces(&pos);
+ if (!match(&pos, "=")) return NULL;
+ if (match(&pos, "=")) return NULL; // == comparison
+
+ ast_list_t *values = NULL;
+ for (;;) {
+ ast_t *rhs = optional(ctx, &pos, parse_extended_expr);
+ if (!rhs) break;
+ values = new(ast_list_t, .ast=rhs, .next=values);
+ spaces(&pos);
+ if (!match(&pos, ",")) break;
+ whitespace(&pos);
+ }
+
+ REVERSE_LIST(targets);
+ REVERSE_LIST(values);
+
+ return NewAST(ctx->file, start, pos, Assign, .targets=targets, .values=values);
+}
+
+PARSER(parse_statement) {
+ ast_t *stmt = NULL;
+ if ((stmt=parse_declaration(ctx, pos))
+ || (stmt=parse_doctest(ctx, pos))
+ || (stmt=parse_func_def(ctx, pos))
+ || (stmt=parse_use(ctx,pos)))
+ return stmt;
+
+ if (!(false
+ || (stmt=parse_update(ctx, pos))
+ || (stmt=parse_assignment(ctx, pos))
+ ))
+ stmt = parse_extended_expr(ctx, pos);
+
+ for (bool progress = (stmt != NULL); progress; ) {
+ ast_t *new_stmt;
+ progress = false;
+ if (stmt->tag == Var)
+ progress = (new_stmt=parse_fncall_suffix(ctx, stmt, NORMAL_FUNCTION));
+
+ if (progress) stmt = new_stmt;
+ }
+ return stmt;
+
+}
+
+PARSER(parse_extended_expr) {
+ ast_t *expr = NULL;
+
+ if (false
+ || (expr=optional(ctx, &pos, parse_for))
+ || (expr=optional(ctx, &pos, parse_while))
+ || (expr=optional(ctx, &pos, parse_if))
+ )
+ return expr;
+
+ return parse_expr(ctx, pos);
+}
+
+PARSER(parse_block) {
+ int64_t block_indent = sss_get_indent(ctx->file, pos);
+ const char *start = pos;
+ whitespace(&pos);
+ ast_list_t *statements = NULL;
+ while (*pos) {
+ ast_t *stmt = optional(ctx, &pos, parse_statement);
+ if (!stmt) {
+ spaces(&pos);
+ if (*pos && *pos != '\r' && *pos != '\n')
+ parser_err(ctx, pos, strchrnul(pos, '\n'), "I couldn't parse this line");
+ break;
+ }
+ statements = new(ast_list_t, .ast=stmt, .next=statements);
+ whitespace(&pos);
+ if (sss_get_indent(ctx->file, pos) != block_indent) {
+ pos = stmt->end; // backtrack
+ break;
+ }
+ }
+ REVERSE_LIST(statements);
+ return NewAST(ctx->file, start, pos, Block, .statements=statements);
+}
+
+PARSER(parse_opt_indented_block) {
+ return indent(ctx, &pos) ? parse_block(ctx, pos) : parse_inline_block(ctx, pos);
+}
+
+PARSER(parse_namespace) {
+ const char *start = pos;
+ whitespace(&pos);
+ int64_t indent = sss_get_indent(ctx->file, pos);
+ ast_list_t *statements = NULL;
+ for (;;) {
+ const char *next = pos;
+ whitespace(&next);
+ if (sss_get_indent(ctx->file, next) != indent) break;
+ ast_t *stmt;
+ if ((stmt=optional(ctx, &pos, parse_type_def))
+ ||(stmt=optional(ctx, &pos, parse_linker))
+ ||(stmt=optional(ctx, &pos, parse_statement)))
+ {
+ statements = new(ast_list_t, .ast=stmt, .next=statements);
+ pos = stmt->end;
+ whitespace(&pos);
+ } else {
+ if (sss_get_indent(ctx->file, next) > indent && next < strchrnul(next, '\n'))
+ parser_err(ctx, next, strchrnul(next, '\n'), "I couldn't parse this namespace statement");
+ break;
+ }
+ }
+ REVERSE_LIST(statements);
+ return NewAST(ctx->file, start, pos, Block, .statements=statements);
+}
+
+PARSER(parse_type_def) {
+ // type Foo := Type... \n body...
+ const char *start = pos;
+ if (!match_word(&pos, "type")) return NULL;
+
+ int64_t starting_indent = sss_get_indent(ctx->file, pos);
+
+ const char *name = get_id(&pos);
+ if (!name) return NULL;
+ spaces(&pos);
+
+ if (!match(&pos, ":=")) return NULL;
+ type_ast_t *type_ast = expect(ctx, start, &pos, parse_type, "I expected a type after this ':='");
+
+ const char *ns_pos = pos;
+ whitespace(&ns_pos);
+ int64_t ns_indent = sss_get_indent(ctx->file, ns_pos);
+ ast_t *namespace = NULL;
+ if (ns_indent > starting_indent) {
+ pos = ns_pos;
+ namespace = optional(ctx, &pos, parse_namespace);
+ }
+ if (!namespace)
+ namespace = NewAST(ctx->file, pos, pos, Block, .statements=NULL);
+ return NewAST(ctx->file, start, pos, TypeDef, .var.name=name, .type=type_ast, .namespace=namespace);
+}
+
+type_ast_t *parse_enum_type(parse_ctx_t *ctx, const char *pos) {
+ // tagged union: enum Foo := a|b(x:Int,y:Int)=5|...
+ const char *start = pos;
+
+ if (!match_word(&pos, "enum")) return NULL;
+ spaces(&pos);
+ if (!match(&pos, "(")) return NULL;
+
+ tag_t *tags = NULL;
+ int64_t next_value = 0;
+
+ whitespace(&pos);
+ for (;;) {
+ const char *tag_start = pos;
+
+ spaces(&pos);
+ const char *tag_name = get_id(&pos);
+ if (!tag_name) break;
+
+ spaces(&pos);
+ arg_list_t *fields;
+ if (match(&pos, "(")) {
+ whitespace(&pos);
+ fields = parse_args(ctx, &pos, false);
+ whitespace(&pos);
+ expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this tagged union member");
+ } else {
+ fields = NULL;
+ }
+
+ spaces(&pos);
+ if (match(&pos, "=")) {
+ ast_t *val = expect(ctx, tag_start, &pos, parse_int, "I expected an integer literal after this '='");
+ next_value = Match(val, Int)->i;
+ }
+
+ // Check for duplicate values:
+ for (tag_t *t = tags; t; t = t->next) {
+ if (t->value == next_value)
+ parser_err(ctx, tag_start, pos, "This tag value (%ld) is a duplicate of an earlier tag value", next_value);
+ }
+
+ type_ast_t *type = NewTypeAST(ctx->file, tag_start, pos, TypeStruct, .fields=fields);
+ tags = new(tag_t, .name=tag_name, .value=next_value, .type=type, .next=tags);
+
+ const char *next_pos = pos;
+ whitespace(&next_pos);
+ if (!match(&next_pos, "|"))
+ break;
+ whitespace(&next_pos);
+ pos = next_pos;
+ ++next_value;
+ }
+
+ whitespace(&pos);
+ expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this enum definition");
+
+ REVERSE_LIST(tags);
+
+ return NewTypeAST(ctx->file, start, pos, TypeTaggedUnion, .tags=tags);
+}
+
+arg_list_t *parse_args(parse_ctx_t *ctx, const char **pos, bool allow_unnamed)
+{
+ arg_list_t *args = NULL;
+ for (;;) {
+ const char *batch_start = *pos;
+ ast_t *default_val = NULL;
+ type_ast_t *type = NULL;
+
+ typedef struct var_list_s {
+ var_t *var;
+ struct var_list_s *next;
+ } var_list_t;
+
+ var_list_t *vars = NULL;
+ for (;;) {
+ whitespace(pos);
+ const char *name_start = *pos;
+ const char *name = get_id(pos);
+ if (!name) break;
+ whitespace(pos);
+ if (strncmp(*pos, "==", 2) != 0 && match(pos, "=")) {
+ default_val = expect(ctx, *pos-1, pos, parse_term, "I expected a value after this '='");
+ vars = new(var_list_t, .var=new(var_t, .name=name), .next=vars);
+ break;
+ } else if (match(pos, ":")) {
+ type = expect(ctx, *pos-1, pos, parse_type, "I expected a type here");
+ vars = new(var_list_t, .var=new(var_t, .name=name), .next=vars);
+ break;
+ } else if (allow_unnamed) {
+ *pos = name_start;
+ type = optional(ctx, pos, parse_type);
+ if (type)
+ vars = new(var_list_t, .var=NULL, .next=vars);
+ break;
+ } else if (name) {
+ vars = new(var_list_t, .var=new(var_t, .name=name), .next=vars);
+ spaces(pos);
+ if (!match(pos, ",")) break;
+ } else {
+ break;
+ }
+ }
+ if (!vars) break;
+ if (!default_val && !type)
+ parser_err(ctx, batch_start, *pos, "I expected a ':' and type, or '=' and a default value after this parameter (%s)",
+ vars->var->name);
+
+ REVERSE_LIST(vars);
+ for (; vars; vars = vars->next)
+ args = new(arg_list_t, .var=vars->var, .type=type, .default_val=default_val);
+ whitespace(pos);
+ match(pos, ",");
+ }
+
+ REVERSE_LIST(args);
+ return args;
+}
+
+PARSER(parse_func_def) {
+ const char *start = pos;
+ if (!match_word(&pos, "func")) return NULL;
+
+ ast_t *name = optional(ctx, &pos, parse_var);
+ if (!name) return NULL;
+
+ spaces(&pos);
+
+ if (!match(&pos, "(")) return NULL;
+
+ arg_list_t *args = parse_args(ctx, &pos, false);
+ whitespace(&pos);
+ bool is_inline = false;
+ ast_t *cache_ast = NULL;
+ for (; whitespace(&pos), (match(&pos, ";") || match(&pos, ",")); ) {
+ const char *flag_start = pos;
+ if (match_word(&pos, "inline")) {
+ is_inline = true;
+ } else if (match_word(&pos, "cached")) {
+ if (!cache_ast) cache_ast = NewAST(ctx->file, pos, pos, Int, .i=INT64_MAX, .precision=64);
+ } else if (match_word(&pos, "cache_size")) {
+ if (whitespace(&pos), !match(&pos, "="))
+ parser_err(ctx, flag_start, pos, "I expected a value for 'cache_size'");
+ whitespace(&pos);
+ cache_ast = expect(ctx, start, &pos, parse_expr, "I expected a maximum size for the cache");
+ }
+ }
+ expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this function definition");
+
+ type_ast_t *ret_type = NULL;
+ spaces(&pos);
+ if (match(&pos, "->") || match(&pos, ":"))
+ ret_type = optional(ctx, &pos, parse_type);
+
+ ast_t *body = expect(ctx, start, &pos, parse_opt_indented_block,
+ "This function needs a body block");
+ return NewAST(ctx->file, start, pos, FunctionDef,
+ .name=name, .args=args, .ret_type=ret_type, .body=body, .cache=cache_ast,
+ .is_inline=is_inline);
+}
+
+PARSER(parse_extern) {
+ const char *start = pos;
+ if (!match_word(&pos, "extern")) return NULL;
+ spaces(&pos);
+ bool address = (match(&pos, "&") != 0);
+ const char* name = get_id(&pos);
+ spaces(&pos);
+ // extern function call:
+ if (match(&pos, "(")) {
+ return parse_fncall_suffix(ctx, NewAST(ctx->file, start, pos-1, Var, .var.name=name), EXTERN_FUNCTION);
+ }
+ if (!match(&pos, ":"))
+ parser_err(ctx, start, pos, "I couldn't get a type for this extern");
+ type_ast_t *type = expect(ctx, start, &pos, parse_type, "I couldn't parse the type for this extern");
+ return NewAST(ctx->file, start, pos, Extern, .name=name, .type=type, .address=address);
+}
+
+PARSER(parse_doctest) {
+ const char *start = pos;
+ if (!match(&pos, ">>>")) return NULL;
+ spaces(&pos);
+ ast_t *expr = expect(ctx, start, &pos, parse_statement, "I couldn't parse the expression for this doctest");
+ whitespace(&pos);
+ const char* output = NULL;
+ if (match(&pos, "===")) {
+ spaces(&pos);
+ const char *output_start = pos,
+ *output_end = strchrnul(pos, '\n');
+ if (output_end <= output_start)
+ parser_err(ctx, output_start, output_end, "You're missing expected output here");
+ output = heap_strn(output_start, (size_t)(output_end - output_start));
+ pos = output_end;
+ }
+ return NewAST(ctx->file, start, pos, DocTest, .expr=expr, .output=output);
+}
+
+PARSER(parse_use) {
+ const char *start = pos;
+ if (!match_word(&pos, "use")) return NULL;
+ spaces(&pos);
+ size_t path_len = strcspn(pos, " \t\r\n;");
+ if (path_len < 1)
+ parser_err(ctx, start, pos, "There is no filename here to use");
+ char *path = heap_strf("%.*s.sss", (int)path_len, pos);
+ pos += path_len;
+ char *resolved_path = resolve_path(path, ctx->file->filename);
+ if (!resolved_path)
+ parser_err(ctx, start, pos, "No such file exists: \"%s\"", path);
+ while (match(&pos, ";")) continue;
+ return NewAST(ctx->file, start, pos, Use, .path=resolved_path);
+}
+
+PARSER(parse_linker) {
+ const char *start = pos;
+ if (!match_word(&pos, "!link")) return NULL;
+ spaces(&pos);
+ size_t len = strcspn(pos, "\r\n");
+ const char *directive = heap_strn(pos, len);
+ return NewAST(ctx->file, start, pos, LinkerDirective, .directive=directive);
+}
+
+PARSER(parse_inline_block) {
+ spaces(&pos);
+ const char *start = pos;
+ ast_list_t *statements = NULL;
+ while (*pos) {
+ spaces(&pos);
+ ast_t *stmt = optional(ctx, &pos, parse_statement);
+ if (!stmt) break;
+ statements = new(ast_list_t, .ast=stmt, .next=statements);
+ spaces(&pos);
+ if (!match(&pos, ";")) break;
+ }
+ REVERSE_LIST(statements);
+ return NewAST(ctx->file, start, pos, Block, .statements=statements);
+}
+
+ast_t *parse_file(sss_file_t *file, jmp_buf *on_err) {
+ parse_ctx_t ctx = {
+ .file=file,
+ .on_err=on_err,
+ };
+
+ const char *pos = file->text;
+ if (match(&pos, "#!")) // shebang
+ some_not(&pos, "\r\n");
+
+ whitespace(&pos);
+ ast_t *ast = parse_namespace(&ctx, pos);
+ pos = ast->end;
+ whitespace(&pos);
+ if (strlen(pos) > 0) {
+ parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the file");
+ }
+ return ast;
+}
+
+type_ast_t *parse_type_str(const char *str) {
+ sss_file_t *file = sss_spoof_file("<type>", str);
+ parse_ctx_t ctx = {
+ .file=file,
+ .on_err=NULL,
+ };
+
+ const char *pos = file->text;
+ whitespace(&pos);
+ type_ast_t *ast = parse_type(&ctx, pos);
+ if (!ast) return ast;
+ pos = ast->end;
+ whitespace(&pos);
+ if (strlen(pos) > 0) {
+ parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the type");
+ }
+ return ast;
+}
+
+ast_t *parse_expression_str(const char *str) {
+ sss_file_t *file = sss_spoof_file("<expression>", str);
+ parse_ctx_t ctx = {
+ .file=file,
+ .on_err=NULL,
+ };
+
+ const char *pos = file->text;
+ whitespace(&pos);
+ ast_t *ast = parse_extended_expr(&ctx, pos);
+ if (!ast) return ast;
+ pos = ast->end;
+ whitespace(&pos);
+ if (strlen(pos) > 0) {
+ parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the expression");
+ }
+ return ast;
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/parse.h b/parse.h
new file mode 100644
index 00000000..aaaa671a
--- /dev/null
+++ b/parse.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <setjmp.h>
+
+#include "ast.h"
+
+type_ast_t *parse_type_str(const char *str);
+ast_t *parse_expression_str(const char *str);
+ast_t *parse_file(sss_file_t *file, jmp_buf *on_err);
diff --git a/util.c b/util.c
new file mode 100644
index 00000000..4ee5ef7a
--- /dev/null
+++ b/util.c
@@ -0,0 +1,84 @@
+#include <ctype.h>
+#include <gc.h>
+#include <gc/cord.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "util.h"
+
+public char *heap_strn(const char *str, size_t len)
+{
+ if (!str) return NULL;
+ if (len == 0) return "";
+ char *heaped = GC_MALLOC_ATOMIC(len + 1);
+ memcpy(heaped, str, len);
+ heaped[len] = '\0';
+ return heaped;
+}
+
+public char *heap_str(const char *str)
+{
+ if (!str) return NULL;
+ return heap_strn(str, strlen(str));
+}
+
+public char *heap_strf(const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ char *tmp = NULL;
+ int len = vasprintf(&tmp, fmt, args);
+ if (len < 0) return NULL;
+ va_end(args);
+ char *ret = heap_strn(tmp, (size_t)len);
+ free(tmp);
+ return ret;
+}
+
+// Name mangling algorithm to produce valid identifiers:
+// Escape individual chars as "_x%02X"
+// Things being escaped:
+// - Leading digit
+// - Non alphanumeric/non-underscore characters
+// - "_" when followed by "x" and two uppercase hex digits
+public char *mangle(const char *name)
+{
+ size_t len = 0;
+ for (const char *p = name; *p; p++) {
+ if ((!isalnum(*p) && *p != '_') // Non-identifier character
+ || (p == name && isdigit(*p)) // Leading digit
+ || (p[0] == '_' && p[1] == 'x' && strspn(p+2, "ABCDEF0123456789") >= 2)) { // Looks like hex escape
+ len += strlen("_x00"); // Hex escape
+ } else {
+ len += 1;
+ }
+ }
+ char *mangled = GC_MALLOC_ATOMIC(len + 1);
+ char *dest = mangled;
+ for (const char *src = name; *src; src++) {
+ if ((!isalnum(*src) && *src != '_') // Non-identifier character
+ || (src == name && isdigit(*src)) // Leading digit
+ || (src[0] == '_' && src[1] == 'x' && strspn(src+2, "ABCDEF0123456789") >= 2)) { // Looks like hex escape
+ dest += sprintf(dest, "_x%02X", *src); // Hex escape
+ } else {
+ *(dest++) = *src;
+ }
+ }
+ mangled[len] = '\0';
+ return mangled;
+}
+
+CORD CORD_asprintf(const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ CORD c = NULL;
+ CORD_vsprintf(&c, fmt, args);
+ va_end(args);
+ return c;
+}
+
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/util.h b/util.h
new file mode 100644
index 00000000..416c0b3e
--- /dev/null
+++ b/util.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <assert.h>
+#include <gc.h>
+#include <gc/cord.h>
+#include <stdio.h>
+#include <string.h>
+#include <err.h>
+
+#define streq(a, b) (((a) == NULL && (b) == NULL) || (((a) == NULL) == ((b) == NULL) && strcmp(a, b) == 0))
+#define new(t, ...) ((t*)memcpy(GC_MALLOC(sizeof(t)), &(t){__VA_ARGS__}, sizeof(t)))
+#define grow(arr, new_size) ((typeof (arr))GC_REALLOC(arr, (sizeof(arr[0]))*(new_size)))
+#define Match(x, _tag) ((x)->tag == _tag ? &(x)->__data._tag : (errx(1, __FILE__ ":%d This was supposed to be a " # _tag "\n", __LINE__), &(x)->__data._tag))
+#define Tagged(t, _tag, ...) new(t, .tag=_tag, .__data._tag={__VA_ARGS__})
+
+#ifndef auto
+#define auto __auto_type
+#endif
+
+#ifndef public
+#define public __attribute__ ((visibility ("default")))
+#endif
+
+char *heap_strn(const char *str, size_t len);
+char *heap_str(const char *str);
+char *heap_strf(const char *fmt, ...);
+CORD CORD_asprintf(const char *fmt, ...);
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0