diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2024-02-04 15:23:59 -0500 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2024-02-04 15:23:59 -0500 |
| commit | 98f0c51119f9d42d733f44cb516b1c2bcd9061af (patch) | |
| tree | 39ab4fa635f858b76b9a8bbf84701c2788d5f498 | |
Initial commit
| -rw-r--r-- | Makefile | 43 | ||||
| -rw-r--r-- | ast.c | 179 | ||||
| -rw-r--r-- | ast.h | 290 | ||||
| -rw-r--r-- | compile.c | 185 | ||||
| -rw-r--r-- | compile.h | 11 | ||||
| -rw-r--r-- | files.c | 317 | ||||
| -rw-r--r-- | files.h | 43 | ||||
| -rw-r--r-- | foo.c | 12 | ||||
| -rw-r--r-- | nextlang.c | 23 | ||||
| -rw-r--r-- | parse.c | 1759 | ||||
| -rw-r--r-- | parse.h | 9 | ||||
| -rw-r--r-- | util.c | 84 | ||||
| -rw-r--r-- | util.h | 29 |
13 files changed, 2984 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..fb2b6414 --- /dev/null +++ b/Makefile @@ -0,0 +1,43 @@ +CC=gcc +PREFIX=/usr/local +VERSION=0.12.1 +CCONFIG=-std=c11 -Werror -D_XOPEN_SOURCE=700 -D_POSIX_C_SOURCE=200809L -fPIC -ftrapv -fvisibility=hidden -flto -fno-fat-lto-objects -Wl,-flto +LDFLAGS=-Wl,-rpath '-Wl,$$ORIGIN' +# MAKEFLAGS := --jobs=$(shell nproc) --output-sync=target +CWARN=-Wall -Wextra -Wno-format + # -Wpedantic -Wsign-conversion -Wtype-limits -Wunused-result -Wnull-dereference \ + # -Waggregate-return -Walloc-zero -Walloca -Warith-conversion -Wcast-align -Wcast-align=strict \ + # -Wdangling-else -Wdate-time -Wdisabled-optimization -Wdouble-promotion -Wduplicated-branches \ + # -Wduplicated-cond -Wexpansion-to-defined -Wfloat-conversion -Wfloat-equal -Wformat-nonliteral \ + # -Wformat-security -Wformat-signedness -Wframe-address -Winline -Winvalid-pch -Wjump-misses-init \ + # -Wlogical-op -Wlong-long -Wmissing-format-attribute -Wmissing-include-dirs -Wmissing-noreturn \ + # -Wnull-dereference -Woverlength-strings -Wpacked -Wpacked-not-aligned -Wpointer-arith \ + # -Wredundant-decls -Wshadow -Wshadow=compatible-local -Wshadow=global -Wshadow=local \ + # -Wsign-conversion -Wstack-protector -Wsuggest-attribute=const -Wswitch-default -Wswitch-enum \ + # -Wsync-nand -Wtrampolines -Wundef -Wunsuffixed-float-constants -Wunused -Wunused-but-set-variable \ + # -Wunused-const-variable -Wunused-local-typedefs -Wunused-macros -Wvariadic-macros -Wvector-operation-performance \ + # -Wvla -Wwrite-strings +OSFLAGS != case $$(uname -s) in *BSD|Darwin) echo '-D_BSD_SOURCE';; Linux) echo '-D_GNU_SOURCE';; *) echo '-D_DEFAULT_SOURCE';; esac +EXTRA= +G=-ggdb +O=-Og +CFLAGS=$(CCONFIG) $(EXTRA) $(CWARN) $(G) $(O) $(OSFLAGS) +LDLIBS=-lgc -lgccjit -lcord -lm -lunistring + +all: nextlang + +nextlang: nextlang.c parse.o files.o util.o ast.o compile.o + +SipHash/halfsiphash.c: + git submodule update --init --recursive + +tags: + ctags **/*.[ch] + +clean: + rm -f nextlang *.o + +%.1: %.1.md + pandoc --lua-filter=.pandoc/bold-code.lua -s $< -t man -o $@ + +.PHONY: all clean install uninstall test @@ -0,0 +1,179 @@ +// Some basic operations defined on AST nodes, mainly converting to +// strings for debugging. +#include <gc/cord.h> +#include <stdarg.h> +#include <printf.h> + +#include "ast.h" + +static const char *OP_NAMES[] = { + [BINOP_UNKNOWN]="unknown", + [UNOP_NOT]="not", [UNOP_NEGATIVE]="negative", + [BINOP_POWER]="^", [BINOP_MULT]="*", [BINOP_DIVIDE]="/", + [BINOP_MOD]="mod", [BINOP_MOD1]="mod1", [BINOP_PLUS]="+", [BINOP_MINUS]="minus", + [BINOP_CONCAT]="++", [BINOP_LSHIFT]="<<", [BINOP_RSHIFT]=">>", [BINOP_MIN]="min", + [BINOP_MAX]="max", [BINOP_EQ]="==", [BINOP_NE]="!=", [BINOP_LT]="<", + [BINOP_LE]="<=", [BINOP_GT]=">", [BINOP_GE]=">=", [BINOP_AND]="and", [BINOP_OR]="or", [BINOP_XOR]="xor", +}; + +static CORD ast_to_cord(ast_t *ast); +static CORD ast_list_to_cord(ast_list_t *asts); +static CORD type_ast_to_cord(type_ast_t *t); +static CORD arg_list_to_cord(arg_list_t *args); +static CORD tags_to_cord(tag_t *tags); + +#define TO_CORD(x) _Generic(x, \ + ast_t*: ast_to_cord(x), \ + ast_list_t*: ast_list_to_cord(x), \ + type_ast_t*: type_ast_to_cord(x), \ + arg_list_t*: arg_list_to_cord(x), \ + tag_t*: tags_to_cord(x), \ + const char *: x, \ + int64_t: CORD_asprintf("%ld", x), \ + unsigned short int: CORD_asprintf("%d", x), \ + double: CORD_asprintf("%g", x), \ + bool: CORD_asprintf("%s", x ? "yes" : "no"), \ + unsigned char: CORD_asprintf("%s", x ? "yes" : "no")) + +CORD ast_list_to_cord(ast_list_t *asts) +{ + if (!asts) + return "\x1b[35mNULL\x1b[m"; + + CORD c = "["; + for (; asts; asts = asts->next) { + c = CORD_cat(c, ast_to_cord(asts->ast)); + if (asts->next) c = CORD_cat(c, ", "); + } + c = CORD_cat(c, "]"); + return c; +} + +CORD arg_list_to_cord(arg_list_t *args) { + CORD c = "Args("; + for (; args; args = args->next) { + if (args->var && args->var->name) + c = CORD_cat(c, args->var->name); + if (args->type) + CORD_sprintf(&c, "%r:%s", c, type_ast_to_cord(args->type)); + if (args->default_val) + CORD_sprintf(&c, "%r=%s", c, ast_to_cord(args->default_val)); + if (args->next) c = CORD_cat(c, ", "); + } + c = CORD_cat(c, ")"); + return c; +} + +CORD tags_to_cord(tag_t *tags) { + CORD c = "Tags("; + for (; tags; tags = tags->next) { + if (tags->name) + c = CORD_cat(c, tags->name); + CORD_sprintf(&c, "%r:%s=%ld", c, type_ast_to_cord(tags->type), tags->value); + if (tags->next) c = CORD_cat(c, ", "); + } + c = CORD_cat(c, ")"); + return c; +} + +CORD ast_to_cord(ast_t *ast) +{ + if (!ast) return "\x1b[35mNULL\x1b[m"; + + switch (ast->tag) { +#define T(type, ...) case type: { auto data = ast->__data.type; (void)data; return CORD_asprintf("\x1b[34;1m" #type "\x1b[m" __VA_ARGS__); } + T(Unknown, "Unknown") + T(Nil, "(%r)", type_ast_to_cord(data.type)) + T(Bool, "(\x1b[35m%s\x1b[m)", data.b ? "yes" : "no") + T(Var, "(\x1b[36;1m%s\x1b[m)", data.var.name) + T(Int, "(\x1b[35m%ld\x1b[m, precision=%ld)", data.i, data.precision) + T(Num, "(\x1b[35m%ld\x1b[m, precision=%ld)", data.n, data.precision) + T(Char, "(\x1b[35m'%c'\x1b[m)", data.c) + T(StringLiteral, "\x1b[35m\"%s\"\x1b[m", data.str) + T(StringJoin, "(%r)", ast_list_to_cord(data.children)) + T(Interp, "(%r)", ast_to_cord(data.value)) + T(Declare, "(var=%s, value=%s)", ast_to_cord(data.var), ast_to_cord(data.value)) + T(Assign, "(targets=%r, values=%r)", ast_list_to_cord(data.targets), ast_list_to_cord(data.values)) + T(BinaryOp, "(%r, %s, %r)", ast_to_cord(data.lhs), OP_NAMES[data.op], ast_to_cord(data.rhs)) + T(UpdateAssign, "(%r, %s, %r)", ast_to_cord(data.lhs), OP_NAMES[data.op], ast_to_cord(data.rhs)) + T(UnaryOp, "(%s, %r)", OP_NAMES[data.op], ast_to_cord(data.value)) + T(Min, "(%r, %r, key=%r)", ast_to_cord(data.lhs), ast_to_cord(data.rhs), ast_to_cord(data.key)) + T(Max, "(%r, %r, key=%r)", ast_to_cord(data.lhs), ast_to_cord(data.rhs), ast_to_cord(data.key)) + T(Array, "(%r, type=%r)", ast_list_to_cord(data.items), type_ast_to_cord(data.type)) + T(Table, "(key_type=%r, value_type=%r, fallback=%r, default_value=%r, entries=%r)", + type_ast_to_cord(data.key_type), type_ast_to_cord(data.value_type), + ast_to_cord(data.fallback), ast_to_cord(data.default_value), + ast_list_to_cord(data.entries)) + T(TableEntry, "(%r => %r)", ast_to_cord(data.key), ast_to_cord(data.value)) + T(FunctionDef, "(name=%r, args=%r, ret=%r, body=%r)", ast_to_cord(data.name), + arg_list_to_cord(data.args), type_ast_to_cord(data.ret_type), ast_to_cord(data.body)) + T(Lambda, "(args=%r, body=%r)", arg_list_to_cord(data.args), ast_to_cord(data.body)) + T(FunctionCall, "(fn=%r, args=%r)", ast_to_cord(data.fn), ast_list_to_cord(data.args)) + T(KeywordArg, "(%s=%r)", ast_to_cord(data.arg)) + T(Block, "(%r)", ast_list_to_cord(data.statements)) + T(For, "(index=%r, value=%r, iter=%r, body=%r)", ast_to_cord(data.index), ast_to_cord(data.value), + ast_to_cord(data.iter), ast_to_cord(data.body)) + T(While, "(condition=%r, body=%r)", ast_to_cord(data.condition), ast_to_cord(data.body)) + T(If, "(condition=%r, body=%r, else=%r)", ast_to_cord(data.condition), ast_to_cord(data.body), ast_to_cord(data.else_body)) + T(Reduction, "(iter=%r, combination=%r, fallback=%r)", ast_to_cord(data.iter), ast_to_cord(data.combination), ast_to_cord(data.fallback)) + T(Skip, "(%s)", data.target) + T(Stop, "(%s)", data.target) + T(Pass, "") + T(Return, "(%r)", ast_to_cord(data.value)) + T(Extern, "(name=%s, type=%r)", data.name, type_ast_to_cord(data.type)) + T(TypeDef, "(%s, type=%r, namespace=%r)", data.var.name, type_ast_to_cord(data.type), ast_to_cord(data.namespace)) + T(Index, "(indexed=%r, index=%r)", ast_to_cord(data.indexed), ast_to_cord(data.index)) + T(FieldAccess, "(fielded=%r, field=%s)", ast_to_cord(data.fielded), data.field) + T(DocTest, "(expr=%r, output=%s)", ast_to_cord(data.expr), data.output) + T(Use, "(%s)", data.path) + T(LinkerDirective, "(%s)", data.directive) +#undef T + } + return NULL; +} + +CORD type_ast_to_cord(type_ast_t *t) +{ + if (!t) return "\x1b[35mNULL\x1b[m"; + + switch (t->tag) { +#define T(type, ...) case type: { auto data = t->__data.type; (void)data; return CORD_asprintf("\x1b[32;1m" #type "\x1b[m" __VA_ARGS__); } + T(TypeUnknown, "") + T(TypeVar, "(\x1b[36;1m%s\x1b[m)", data.var.name) + T(TypePointer, "(%r, is_optional=%d, is_stack=%d, is_readonly=%d)", + type_ast_to_cord(data.pointed), data.is_optional, + data.is_stack, data.is_readonly) + T(TypeStruct, "(%r)", arg_list_to_cord(data.fields)) + T(TypeTaggedUnion, "(%r)", tags_to_cord(data.tags)) + T(TypeArray, "(%r)", type_ast_to_cord(data.item)) + T(TypeTable, "(%r => %r)", type_ast_to_cord(data.key), type_ast_to_cord(data.value)) + T(TypeFunction, "(args=%r, ret=%r)", arg_list_to_cord(data.args), type_ast_to_cord(data.ret)) +#undef T + } + return NULL; +} + +const char *ast_to_str(ast_t *ast) { + CORD c = ast_to_cord(ast); + return CORD_to_char_star(c); +} + +const char *type_ast_to_str(type_ast_t *t) { + CORD c = type_ast_to_cord(t); + return CORD_to_char_star(c); +} + +int printf_ast(FILE *stream, const struct printf_info *info, const void *const args[]) +{ + ast_t *ast = *(ast_t**)(args[0]); + if (ast) { + if (info->alt) + return fprintf(stream, "%.*s", (int)(ast->end - ast->start), ast->start); + else + return fprintf(stream, "%s", ast_to_str(ast)); + } else { + return fputs("(null)", stream); + } +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 @@ -0,0 +1,290 @@ +#pragma once +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <printf.h> + +#include "files.h" +#include "util.h" + +#define NewAST(_file, _start, _end, ast_tag, ...) (new(ast_t, .file=_file, .start=_start, .end=_end,\ + .tag=ast_tag, .__data.ast_tag={__VA_ARGS__})) +#define NewTypeAST(_file, _start, _end, ast_tag, ...) (new(type_ast_t, .file=_file, .start=_start, .end=_end,\ + .tag=ast_tag, .__data.ast_tag={__VA_ARGS__})) +#define FakeAST(ast_tag, ...) (new(ast_t, .tag=ast_tag, .__data.ast_tag={__VA_ARGS__})) +#define WrapAST(ast, ast_tag, ...) (new(ast_t, .file=(ast)->file, .start=(ast)->start, .end=(ast)->end, .tag=ast_tag, .__data.ast_tag={__VA_ARGS__})) +#define StringAST(ast, _str) WrapAST(ast, StringLiteral, .str=heap_str(_str)) + +struct binding_s; +typedef struct type_ast_s type_ast_t; +typedef struct ast_s ast_t; + +typedef struct { + const char *name; + struct binding_s *binding; +} var_t; + +typedef struct ast_list_s { + ast_t *ast; + struct ast_list_s *next; +} ast_list_t; + +typedef struct arg_list_s { + var_t *var; + type_ast_t *type; + ast_t *default_val; + struct arg_list_s *next; +} arg_list_t; + +#define REVERSE_LIST(list) do { \ + __typeof(list) _prev = NULL; \ + __typeof(list) _next = NULL; \ + auto _current = list; \ + while (_current != NULL) { \ + _next = _current->next; \ + _current->next = _prev; \ + _prev = _current; \ + _current = _next; \ + } \ + list = _prev; \ +} while(0) + +typedef enum { + UNOP_UNKNOWN, + UNOP_NOT=1, UNOP_NEGATIVE, + UNOP_HEAP_ALLOCATE, + UNOP_STACK_REFERENCE, +} unop_e; + +typedef enum { + BINOP_UNKNOWN, + BINOP_POWER=100, BINOP_MULT, BINOP_DIVIDE, BINOP_MOD, BINOP_MOD1, BINOP_PLUS, + BINOP_MINUS, BINOP_CONCAT, BINOP_LSHIFT, BINOP_RSHIFT, BINOP_MIN, + BINOP_MAX, BINOP_EQ, BINOP_NE, BINOP_LT, BINOP_LE, BINOP_GT, BINOP_GE, + BINOP_AND, BINOP_OR, BINOP_XOR, +} binop_e; + +typedef enum { + TypeUnknown, + TypeVar, + TypePointer, + TypeStruct, + TypeTaggedUnion, + TypeArray, + TypeTable, + TypeFunction, +} type_ast_e; + +typedef struct tag_s { + const char *name; + struct type_ast_s *type; + int64_t value; + struct tag_s *next; +} tag_t; + +struct type_ast_s { + type_ast_e tag; + sss_file_t *file; + const char *start, *end; + union { + struct {} TypeUnknown; + struct { + var_t var; + } TypeVar; + struct { + type_ast_t *pointed; + bool is_optional:1, is_stack:1, is_readonly:1; + } TypePointer; + struct { + arg_list_t *fields; + } TypeStruct; + struct { + tag_t *tags; + } TypeTaggedUnion; + struct { + type_ast_t *item; + } TypeArray; + struct { + type_ast_t *key, *value; + } TypeTable; + struct { + arg_list_t *args; + type_ast_t *ret; + } TypeFunction; + } __data; +}; + +typedef enum { + Unknown = 0, + Nil, Bool, Var, + Int, Num, Char, + StringLiteral, StringJoin, Interp, + Declare, Assign, + BinaryOp, UnaryOp, UpdateAssign, + Min, Max, + Array, Table, TableEntry, + FunctionDef, Lambda, + FunctionCall, KeywordArg, + Block, + For, While, If, + Reduction, + Skip, Stop, Pass, + Return, + Extern, + TypeDef, + Index, FieldAccess, + DocTest, + Use, + LinkerDirective, +} ast_e; + +struct ast_s { + ast_e tag; + sss_file_t *file; + const char *start, *end; + union { + struct {} Unknown; + struct { + type_ast_t *type; + } Nil; + struct { + bool b; + } Bool; + struct { + var_t var; + } Var; + struct { + int64_t i; + enum { INT_64BIT, INT_32BIT, INT_16BIT, INT_8BIT } precision; + } Int; + struct { + double n; + enum { NUM_64BIT, NUM_32BIT } precision; + } Num; + struct { + char c; + } Char; + struct { + const char *str; + } StringLiteral; + struct { + ast_list_t *children; + } StringJoin; + struct { + ast_t *value; + bool labelled:1, colorize:1, quote_string:1; + } Interp; + struct { + ast_t *var; + ast_t *value; + } Declare; + struct { + ast_list_t *targets, *values; + } Assign; + struct { + ast_t *lhs; + binop_e op; + ast_t *rhs; + } BinaryOp, UpdateAssign; + struct { + unop_e op; + ast_t *value; + } UnaryOp; + struct { + ast_t *lhs, *rhs, *key; + } Min, Max; + struct { + type_ast_t *type; + ast_list_t *items; + } Array; + struct { + type_ast_t *key_type, *value_type; + ast_t *fallback, *default_value; + ast_list_t *entries; + } Table; + struct { + ast_t *key, *value; + } TableEntry; + struct { + ast_t *name; + arg_list_t *args; + type_ast_t *ret_type; + ast_t *body; + ast_t *cache; + bool is_inline; + } FunctionDef; + struct { + arg_list_t *args; + ast_t *body; + } Lambda; + struct { + ast_t *fn; + ast_list_t *args; + type_ast_t *extern_return_type; + } FunctionCall; + struct { + const char *name; + ast_t *arg; + } KeywordArg; + struct { + ast_list_t *statements; + } Block; + struct { + ast_t *index, *value, *iter, *body; + } For; + struct { + ast_t *condition, *body; + } While; + struct { + ast_t *condition, *body, *else_body; + } If; + struct { + ast_t *iter, *combination, *fallback; + } Reduction; + struct { + const char *target; + } Skip, Stop; + struct {} Pass; + struct { + ast_t *value; + } Return; + struct { + const char *name; + type_ast_t *type; + bool address; + } Extern; + struct { + var_t var; + type_ast_t *type; + ast_t *namespace; + } TypeDef; + struct { + ast_t *indexed, *index; + bool unchecked; + } Index; + struct { + ast_t *fielded; + const char *field; + } FieldAccess; + struct { + ast_t *expr; + const char *output; + bool skip_source:1; + } DocTest; + struct { + const char *path; + sss_file_t *file; + bool main_program; + } Use; + struct { + const char *directive; + } LinkerDirective; + } __data; +}; + +const char *ast_to_str(ast_t *ast); +const char *type_ast_to_str(type_ast_t *ast); +int printf_ast(FILE *stream, const struct printf_info *info, const void *const args[]); +ast_list_t *get_ast_children(ast_t *ast); + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/compile.c b/compile.c new file mode 100644 index 00000000..e4ae0a48 --- /dev/null +++ b/compile.c @@ -0,0 +1,185 @@ + +#include <ctype.h> +#include <gc/cord.h> +#include <gc.h> +#include <stdio.h> + +#include "ast.h" +#include "util.h" + +static CORD compile_type(type_ast_t *t) +{ + switch (t->tag) { + case TypeVar: return Match(t, TypeVar)->var.name; + default: errx(1, "Not implemented"); + } +} + +CORD compile(ast_t *ast) +{ + switch (ast->tag) { + case Nil: return "NULL"; + case Bool: return Match(ast, Bool)->b ? "true" : "false"; + case Var: return Match(ast, Var)->var.name; + case Int: return CORD_asprintf("((Int%ld_t)%ld)", Match(ast, Int)->precision, Match(ast, Int)->i); + case Num: return CORD_asprintf(Match(ast, Num)->precision == 64 ? "%g" : "%gf", Match(ast, Num)->n); + case Char: return CORD_asprintf("'\\x%02X'", (int)Match(ast, Char)->c); + case UnaryOp: { + auto unop = Match(ast, UnaryOp); + CORD expr = compile(unop->value); + switch (unop->op) { + case UNOP_NOT: return CORD_cat("!", expr); + case UNOP_NEGATIVE: return CORD_cat("-", expr); + case UNOP_HEAP_ALLOCATE: return CORD_asprintf("__heap(%r)", expr); + case UNOP_STACK_REFERENCE: return CORD_asprintf("__stack(%r)", expr); + default: break; + } + errx(1, "Invalid unop"); + } + case BinaryOp: { + auto binop = Match(ast, BinaryOp); + CORD lhs = compile(binop->lhs); + CORD rhs = compile(binop->rhs); + switch (binop->op) { + case BINOP_MULT: return CORD_asprintf("(%r * %r)", lhs, rhs); + case BINOP_DIVIDE: return CORD_asprintf("(%r / %r)", lhs, rhs); + case BINOP_MOD: return CORD_asprintf("(%r %% %r)", lhs, rhs); + case BINOP_PLUS: return CORD_asprintf("(%r + %r)", lhs, rhs); + case BINOP_MINUS: return CORD_asprintf("(%r - %r)", lhs, rhs); + case BINOP_LSHIFT: return CORD_asprintf("(%r << %r)", lhs, rhs); + case BINOP_RSHIFT: return CORD_asprintf("(%r >> %r)", lhs, rhs); + case BINOP_EQ: return CORD_asprintf("(%r == %r)", lhs, rhs); + case BINOP_NE: return CORD_asprintf("(%r != %r)", lhs, rhs); + case BINOP_LT: return CORD_asprintf("(%r < %r)", lhs, rhs); + case BINOP_LE: return CORD_asprintf("(%r <= %r)", lhs, rhs); + case BINOP_GT: return CORD_asprintf("(%r > %r)", lhs, rhs); + case BINOP_GE: return CORD_asprintf("(%r >= %r)", lhs, rhs); + case BINOP_AND: return CORD_asprintf("(%r && %r)", lhs, rhs); + case BINOP_OR: return CORD_asprintf("(%r || %r)", lhs, rhs); + default: break; + } + errx(1, "unimplemented binop"); + } + case UpdateAssign: { + auto update = Match(ast, UpdateAssign); + CORD lhs = compile(update->lhs); + CORD rhs = compile(update->rhs); + switch (update->op) { + case BINOP_MULT: return CORD_asprintf("%r *= %r", lhs, rhs); + case BINOP_DIVIDE: return CORD_asprintf("%r /= %r", lhs, rhs); + case BINOP_MOD: return CORD_asprintf("%r = %r %% %r", lhs, lhs, rhs); + case BINOP_PLUS: return CORD_asprintf("%r += %r", lhs, rhs); + case BINOP_MINUS: return CORD_asprintf("%r -= %r", lhs, rhs); + case BINOP_LSHIFT: return CORD_asprintf("%r <<= %r", lhs, rhs); + case BINOP_RSHIFT: return CORD_asprintf("%r >>= %r", lhs, rhs); + case BINOP_EQ: return CORD_asprintf("%r = (%r == %r)", lhs, lhs, rhs); + case BINOP_NE: return CORD_asprintf("%r = (%r != %r)", lhs, lhs, rhs); + case BINOP_LT: return CORD_asprintf("%r = (%r < %r)", lhs, lhs, rhs); + case BINOP_LE: return CORD_asprintf("%r = (%r <= %r)", lhs, lhs, rhs); + case BINOP_GT: return CORD_asprintf("%r = (%r > %r)", lhs, lhs, rhs); + case BINOP_GE: return CORD_asprintf("%r = (%r >= %r)", lhs, lhs, rhs); + case BINOP_AND: return CORD_asprintf("%r = (%r && %r)", lhs, lhs, rhs); + case BINOP_OR: return CORD_asprintf("%r = (%r || %r)", lhs, lhs, rhs); + default: break; + } + errx(1, "unimplemented binop"); + } + case StringLiteral: { + const char *str = Match(ast, StringLiteral)->str; + CORD c = "\""; + for (; *str; ++str) { + switch (*str) { + case '\\': c = CORD_cat(c, "\\\\"); break; + case '"': c = CORD_cat(c, "\\\""); break; + case '\a': c = CORD_cat(c, "\\a"); break; + case '\b': c = CORD_cat(c, "\\b"); break; + case '\n': c = CORD_cat(c, "\\n"); break; + case '\r': c = CORD_cat(c, "\\r"); break; + case '\t': c = CORD_cat(c, "\\t"); break; + case '\v': c = CORD_cat(c, "\\v"); break; + default: { + if (isprint(*str)) + c = CORD_cat_char(c, *str); + else + CORD_sprintf(&c, "%r\\x%02X", *str); + break; + } + } + } + return CORD_cat_char(c, '"'); + } + case StringJoin: { + CORD c = NULL; + for (ast_list_t *chunk = Match(ast, StringJoin)->children; chunk; chunk = chunk->next) { + if (c) CORD_sprintf(&c, "CORD_cat(%r, %r)", c, compile(chunk->ast)); + else c = compile(chunk->ast); + } + return c; + } + case Interp: { + return CORD_asprintf("__cord(%r)", compile(Match(ast, Interp)->value)); + } + case Block: { + CORD c = NULL; + for (ast_list_t *stmt = Match(ast, Block)->statements; stmt; stmt = stmt->next) { + c = CORD_cat(c, compile(stmt->ast)); + c = CORD_cat(c, ";\n"); + } + return c; + } + case Declare: { + auto decl = Match(ast, Declare); + return CORD_asprintf("auto %r = %r", decl->var, decl->value); + } + case Assign: { + auto assign = Match(ast, Assign); + CORD c = NULL; + for (ast_list_t *target = assign->targets, *value = assign->values; target && value; target = target->next, value = value->next) { + CORD_sprintf(&c, "%r = %r", compile(target->ast), compile(value->ast)); + if (target->next) c = CORD_cat(c, ", "); + } + return c; + } + // Min, Max, + // Array, Table, TableEntry, + case FunctionDef: { + auto fndef = Match(ast, FunctionDef); + CORD c = CORD_asprintf("%r %r(", fndef->ret_type ? compile_type(fndef->ret_type) : "void", compile(fndef->name)); + for (arg_list_t *arg = fndef->args; arg; arg = arg->next) { + CORD_sprintf(&c, "%r%r %s", c, compile_type(arg->type), arg->var->name); + if (arg->next) c = CORD_cat(c, ", "); + } + c = CORD_cat(c, ") {\n"); + c = CORD_cat(c, compile(fndef->body)); + c = CORD_cat(c, "}"); + return c; + } + case FunctionCall: { + auto call = Match(ast, FunctionCall); + CORD c = CORD_cat_char(compile(call->fn), '('); + for (ast_list_t *arg = call->args; arg; arg = arg->next) { + c = CORD_cat(c, compile(arg->ast)); + if (arg->next) c = CORD_cat(c, ", "); + } + return CORD_cat_char(c, ')'); + } + // Lambda, + // FunctionCall, KeywordArg, + // Block, + // For, While, If, + // Reduction, + // Skip, Stop, Pass, + // Return, + // Extern, + // TypeDef, + // Index, FieldAccess, + // DocTest, + // Use, + // LinkerDirective, + case Unknown: errx(1, "Unknown AST"); + default: break; + } + return NULL; +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/compile.h b/compile.h new file mode 100644 index 00000000..790b33a4 --- /dev/null +++ b/compile.h @@ -0,0 +1,11 @@ +#pragma once + +#include <gc/cord.h> +#include <gc.h> +#include <stdio.h> + +#include "util.h" + +CORD compile(ast_t *ast); + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/files.c b/files.c new file mode 100644 index 00000000..51a8740c --- /dev/null +++ b/files.c @@ -0,0 +1,317 @@ +// +// files.c - Implementation of some file loading functionality. +// + +#include <err.h> +#include <fcntl.h> +#include <gc.h> +#include <libgen.h> +#include <limits.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <sys/param.h> + +#include "files.h" +#include "util.h" + +static const int tabstop = 4; + +public char *resolve_path(const char *path, const char *relative_to) +{ + if (!relative_to || streq(relative_to, "/dev/stdin")) relative_to = "."; + if (!path || strlen(path) == 0) return NULL; + + // Resolve the path to an absolute path, assuming it's relative to the file + // it was found in: + char buf[PATH_MAX] = {0}; + if (streq(path, "~") || strncmp(path, "~/", 2) == 0) { + char *resolved = realpath(heap_strf("%s%s", getenv("HOME"), path+1), buf); + if (resolved) return heap_str(resolved); + } else if (streq(path, ".") || strncmp(path, "./", 2) == 0) { + char *relative_dir = dirname(heap_str(relative_to)); + char *resolved = realpath(heap_strf("%s/%s", relative_dir, path), buf); + if (resolved) return heap_str(resolved); + } else if (path[0] == '/') { + // Absolute path: + char *resolved = realpath(path, buf); + if (resolved) return heap_str(resolved); + } else { + // Relative path: + char *blpath = heap_str(getenv("SSSPATH")); + char *relative_dir = dirname(heap_str(relative_to)); + for (char *dir; (dir = strsep(&blpath, ":")); ) { + if (dir[0] == '/') { + char *resolved = realpath(heap_strf("%s/%s", dir, path), buf); + if (resolved) return heap_str(resolved); + } else if (dir[0] == '~' && (dir[1] == '\0' || dir[1] == '/')) { + char *resolved = realpath(heap_strf("%s%s/%s", getenv("HOME"), dir, path), buf); + if (resolved) return heap_str(resolved); + } else if (streq(dir, ".") || strncmp(dir, "./", 2) == 0) { + char *resolved = realpath(heap_strf("%s/%s", relative_dir, path), buf); + if (resolved) return heap_str(resolved); + } else if (streq(dir, ".") || streq(dir, "..") || strncmp(dir, "./", 2) == 0 || strncmp(dir, "../", 3) == 0) { + char *resolved = realpath(heap_strf("%s/%s/%s", relative_dir, dir, path), buf); + if (resolved) return heap_str(resolved); + } else { + char *resolved = realpath(heap_strf("%s/%s", dir, path), buf); + if (resolved) return heap_str(resolved); + } + } + } + return NULL; +} + +static sss_file_t *_load_file(const char* filename, FILE *file) +{ + if (!file) return NULL; + + sss_file_t *ret = new(sss_file_t, .filename=filename); + + size_t file_size = 0, line_cap = 0; + char *file_buf = NULL, *line_buf = NULL; + FILE *mem = open_memstream(&file_buf, &file_size); + int64_t line_len = 0; + while ((line_len = getline(&line_buf, &line_cap, file)) >= 0) { + sss_line_t line_info = {.offset=file_size, .indent=0, .is_empty=false}; + char *p; + for (p = line_buf; *p == ' ' || *p == '\t'; ++p) + line_info.indent += *p == ' ' ? 1 : 4; + line_info.is_empty = *p != '\r' && *p != '\n'; + if (ret->line_capacity <= ret->num_lines) { + ret->lines = GC_REALLOC(ret->lines, sizeof(sss_line_t)*(ret->line_capacity += 32)); + } + ret->lines[ret->num_lines++] = line_info; + fwrite(line_buf, sizeof(char), line_len, mem); + fflush(mem); + } + fclose(file); + + char *copy = GC_MALLOC_ATOMIC(file_size+1); + memcpy(copy, file_buf, file_size); + copy[file_size] = '\0'; + ret->text = copy; + fclose(mem); + + free(file_buf); + ret->relative_filename = filename; + if (filename && filename[0] != '<' && !streq(filename, "/dev/stdin")) { + filename = resolve_path(filename, "."); + // Convert to relative path (if applicable) + char buf[PATH_MAX]; + char *cwd = getcwd(buf, sizeof(buf)); + int64_t cwd_len = strlen(cwd); + if (strncmp(cwd, filename, cwd_len) == 0 && filename[cwd_len] == '/') + ret->relative_filename = &filename[cwd_len+1]; + } + return ret; +} + +// +// Read an entire file into memory. +// +public sss_file_t *sss_load_file(const char* filename) +{ + FILE *file = filename[0] ? fopen(filename, "r") : stdin; + return _load_file(filename, file); +} + +// +// Create a virtual file from a string. +// +public sss_file_t *sss_spoof_file(const char* filename, const char *text) +{ + FILE *file = fmemopen((char*)text, strlen(text)+1, "r"); + return _load_file(filename, file); +} + +// +// Given a pointer, determine which line number it points to (1-indexed) +// +public int64_t sss_get_line_number(sss_file_t *f, const char *p) +{ + // Binary search: + int64_t lo = 0, hi = (int64_t)f->num_lines-1; + if (p < f->text) return 0; + int64_t offset = (int64_t)(p - f->text); + while (lo <= hi) { + int64_t mid = (lo + hi) / 2; + sss_line_t *line = &f->lines[mid]; + if (line->offset == offset) + return mid + 1; + else if (line->offset < offset) + lo = mid + 1; + else if (line->offset > offset) + hi = mid - 1; + } + return lo; // Return the line number whose line starts closest before p +} + +// +// Given a pointer, determine which line column it points to. +// +public int64_t sss_get_line_column(sss_file_t *f, const char *p) +{ + int64_t line_no = sss_get_line_number(f, p); + sss_line_t *line = &f->lines[line_no-1]; + return 1 + (int64_t)(p - (f->text + line->offset)); +} + +// +// Given a pointer, get the indentation of the line it's on. +// +public int64_t sss_get_indent(sss_file_t *f, const char *p) +{ + int64_t line_no = sss_get_line_number(f, p); + sss_line_t *line = &f->lines[line_no-1]; + return line->indent; +} + +// +// Return a pointer to the line with the specified line number (1-indexed) +// +public const char *sss_get_line(sss_file_t *f, int64_t line_number) +{ + if (line_number == 0 || line_number > (int64_t)f->num_lines) return NULL; + sss_line_t *line = &f->lines[line_number-1]; + return f->text + line->offset; +} + +// +// Return a value like /foo:line:col +// +public const char *sss_get_file_pos(sss_file_t *f, const char *p) +{ + return heap_strf("%s:%ld:%ld", f->filename, sss_get_line_number(f, p), sss_get_line_column(f, p)); +} + +static int fputc_column(FILE *out, char c, char print_char, int *column) +{ + int printed = 0; + if (print_char == '\t') print_char = ' '; + if (c == '\t') { + for (int to_fill = tabstop - (*column % tabstop); to_fill > 0; --to_fill) { + printed += fputc(print_char, out); + ++*column; + } + } else { + printed += fputc(print_char, out); + ++*column; + } + return printed; +} + +// +// Print a span from a file +// +public int fprint_span(FILE *out, sss_file_t *file, const char *start, const char *end, const char *hl_color, int64_t context_lines, bool use_color) +{ + if (!file) return 0; + + // Handle spans that come from multiple files: + if (start < file->text || start > file->text + file->len) + start = end; + if (end < file->text || end > file->text + file->len) + end = start; + // Just in case neither end of the span came from this file: + if (end < file->text || end > file->text + file->len) + start = end = file->text; + + const char *lineno_fmt, *normal_color, *empty_marker; + bool print_carets = false; + int printed = 0; + if (use_color) { + lineno_fmt = "\x1b[0;2m%*lu\x1b(0\x78\x1b(B\x1b[m "; + normal_color = "\x1b[m"; + empty_marker = "\x1b(0\x61\x1b(B"; + printed += fprintf(out, "\x1b[33;4;1m%s\x1b[m\n", file->relative_filename); + } else { + lineno_fmt = "%*lu| "; + hl_color = ""; + normal_color = ""; + empty_marker = " "; + print_carets = true; + printed += fprintf(out, "%s\n", file->relative_filename); + } + + if (context_lines == 0) + return fprintf(out, "%s%.*s%s", hl_color, (int)(end - start), start, normal_color); + + int64_t start_line = sss_get_line_number(file, start), + end_line = sss_get_line_number(file, end); + + int64_t first_line = start_line - (context_lines - 1), + last_line = end_line + (context_lines - 1); + + if (first_line < 1) first_line = 1; + if (last_line > file->num_lines) last_line = file->num_lines; + + int digits = 1; + for (int64_t i = last_line; i > 0; i /= 10) ++digits; + + for (int64_t line_no = first_line; line_no <= last_line; ++line_no) { + if (line_no > first_line + 5 && line_no < last_line - 5) { + if (use_color) + printed += fprintf(out, "\x1b[0;2;3;4m ... %ld lines omitted ... \x1b[m\n", (last_line - first_line) - 11); + else + printed += fprintf(out, " ... %ld lines omitted ...\n", (last_line - first_line) - 11); + line_no = last_line - 6; + continue; + } + + printed += fprintf(out, lineno_fmt, digits, line_no); + const char *line = sss_get_line(file, line_no); + if (!line) break; + + int column = 0; + const char *p = line; + // Before match + for (; *p && *p != '\r' && *p != '\n' && p < start; ++p) + printed += fputc_column(out, *p, *p, &column); + + // Zero-width matches + if (p == start && start == end) { + printed += fprintf(out, "%s%s%s", hl_color, empty_marker, normal_color); + column += 1; + } + + // Inside match + if (start <= p && p < end) { + printed += fputs(hl_color, out); + for (; *p && *p != '\r' && *p != '\n' && p < end; ++p) + printed += fputc_column(out, *p, *p, &column); + printed += fputs(normal_color, out); + } + + // After match + for (; *p && *p != '\r' && *p != '\n'; ++p) + printed += fputc_column(out, *p, *p, &column); + + printed += fprintf(out, "\n"); + + const char *eol = strchrnul(line, '\n'); + if (print_carets && start >= line && start < eol && line <= start) { + for (int num = 0; num < digits; num++) + printed += fputc(' ', out); + printed += fputs(": ", out); + int column = 0; + for (const char *sp = line; *sp && *sp != '\n'; ++sp) { + char print_char; + if (sp < start) + print_char = ' '; + else if (sp == start && sp == end) + print_char = '^'; + else if (sp >= start && sp < end) + print_char = '-'; + else + print_char = ' '; + printed += fputc_column(out, *sp, print_char, &column); + } + printed += fputs("\n", out); + } + } + fflush(out); + return printed; +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/files.h b/files.h new file mode 100644 index 00000000..0ff91568 --- /dev/null +++ b/files.h @@ -0,0 +1,43 @@ +// +// files.h - Definitions of an API for loading files. +// +#pragma once + +#include <stdalign.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <unistd.h> + +typedef struct { + int64_t offset; + int64_t indent:63; + bool is_empty:1; +} sss_line_t; + +typedef struct { + const char *filename, *relative_filename; + const char *text; + int64_t len; + int64_t num_lines, line_capacity; + sss_line_t *lines; +} sss_file_t; + +char *resolve_path(const char *path, const char *relative_to); +__attribute__((nonnull)) +sss_file_t *sss_load_file(const char *filename); +__attribute__((nonnull, returns_nonnull)) +sss_file_t *sss_spoof_file(const char *filename, const char *text); +__attribute__((pure, nonnull)) +int64_t sss_get_line_number(sss_file_t *f, const char *p); +__attribute__((pure, nonnull)) +int64_t sss_get_line_column(sss_file_t *f, const char *p); +__attribute__((pure, nonnull)) +int64_t sss_get_indent(sss_file_t *f, const char *p); +__attribute__((pure, nonnull)) +const char *sss_get_line(sss_file_t *f, int64_t line_number); +__attribute__((pure, nonnull)) +const char *sss_get_file_pos(sss_file_t *f, const char *p); +int fprint_span(FILE *out, sss_file_t *file, const char *start, const char *end, const char *hl_color, int64_t context_lines, bool use_color); + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 @@ -0,0 +1,12 @@ +#include <stdio.h> + +int main(void) { + int x = 23; + const char *s = "Hi"; +#define say(x) _Generic(x, int: printf("%d\n", x), char *: puts(s), default: puts("???")) + say(x); + say(s); +#define all(...) { __VA_ARGS__; } + all(say("one"); say(2)) + return 0; +} diff --git a/nextlang.c b/nextlang.c new file mode 100644 index 00000000..617adc98 --- /dev/null +++ b/nextlang.c @@ -0,0 +1,23 @@ +#include <stdio.h> +#include <stdlib.h> +#include <gc.h> +#include <gc/cord.h> + +#include "ast.h" +#include "parse.h" +#include "compile.h" + +int main(int argc, char *argv[]) +{ + if (argc < 2) return 1; + + sss_file_t *f = sss_load_file(argv[1]); + ast_t *ast = parse_file(f, NULL); + const char *s = ast_to_str(ast); + puts(s); + CORD c = compile(ast); + CORD_put(c, stdout); + return 0; +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/parse.c b/parse.c new file mode 100644 index 00000000..845b3ca0 --- /dev/null +++ b/parse.c @@ -0,0 +1,1759 @@ +// Parse SSS code using recursive descent +#include <ctype.h> +#include <gc.h> +#include <libgen.h> +#include <linux/limits.h> +#include <setjmp.h> +#include <stdarg.h> +#include <stdbool.h> +#include <string.h> +#include <unistr.h> +#include <unictype.h> +#include <signal.h> + +#include "ast.h" +#include "util.h" + +typedef struct { + sss_file_t *file; + jmp_buf *on_err; +} parse_ctx_t; + +typedef ast_t* (parser_t)(parse_ctx_t*,const char*); + +extern void builtin_fail(const char *fmt, ...); + +#define PARSER(name) ast_t *name(parse_ctx_t *ctx, const char *pos) + +#define STUB_PARSER(name) PARSER(name) { (void)ctx; (void)pos; return NULL; } + +int op_tightness[] = { + [BINOP_POWER]=1, + [BINOP_MULT]=2, [BINOP_DIVIDE]=2, [BINOP_MOD]=2, [BINOP_MOD1]=2, + [BINOP_PLUS]=3, [BINOP_MINUS]=3, + [BINOP_CONCAT]=4, + [BINOP_LSHIFT]=5, [BINOP_RSHIFT]=5, + [BINOP_MIN]=6, [BINOP_MAX]=6, + [BINOP_EQ]=7, [BINOP_NE]=7, + [BINOP_LT]=8, [BINOP_LE]=8, [BINOP_GT]=8, [BINOP_GE]=8, + [BINOP_AND]=9, [BINOP_OR]=9, [BINOP_XOR]=9, +}; + +static const char *keywords[] = { + "yes", "xor", "while", "use", "then", "struct", "stop", "skip", "return", + "or", "not", "no", "mod1", "mod", "in", "if", "func", "for", "extern", + "enum", "else", "do", "and", "_mix_", "_min_", "_max_", + NULL, +}; + +enum {NORMAL_FUNCTION=0, EXTERN_FUNCTION=1}; + +static inline size_t some_of(const char **pos, const char *allow); +static inline size_t some_not(const char **pos, const char *forbid); +static inline size_t spaces(const char **pos); +static inline size_t whitespace(const char **pos); +static inline size_t match(const char **pos, const char *target); +static inline void expect_str(parse_ctx_t *ctx, const char *start, const char **pos, const char *target, const char *fmt, ...); +static inline void expect_closing(parse_ctx_t *ctx, const char **pos, const char *target, const char *fmt, ...); +static inline size_t match_word(const char **pos, const char *word); +static inline const char* get_word(const char **pos); +static inline const char* get_id(const char **pos); +static inline bool comment(const char **pos); +static inline bool indent(parse_ctx_t *ctx, const char **pos); +static inline binop_e match_binary_operator(const char **pos); +static ast_t *parse_fncall_suffix(parse_ctx_t *ctx, ast_t *fn, bool is_extern); +static ast_t *parse_field_suffix(parse_ctx_t *ctx, ast_t *lhs); +static ast_t *parse_index_suffix(parse_ctx_t *ctx, ast_t *lhs); +static arg_list_t *parse_args(parse_ctx_t *ctx, const char **pos, bool allow_unnamed); +static PARSER(parse_for); +static PARSER(parse_while); +static PARSER(parse_if); +static PARSER(parse_expr); +static PARSER(parse_extended_expr); +static PARSER(parse_term_no_suffix); +static PARSER(parse_term); +static PARSER(parse_inline_block); +static PARSER(parse_statement); +static PARSER(parse_block); +static PARSER(parse_opt_indented_block); +static PARSER(parse_var); +static PARSER(parse_type_def); +static PARSER(parse_func_def); +static PARSER(parse_extern); +static PARSER(parse_declaration); +static PARSER(parse_doctest); +static PARSER(parse_use); +static PARSER(parse_linker); +static PARSER(parse_namespace); + +static type_ast_t *parse_type(parse_ctx_t *ctx, const char *pos); +static type_ast_t *parse_enum_type(parse_ctx_t *ctx, const char *pos); + +// +// Print a parse error and exit (or use the on_err longjmp) +// +__attribute__((noreturn)) +static void vparser_err(parse_ctx_t *ctx, const char *start, const char *end, const char *fmt, va_list args) { + if (isatty(STDERR_FILENO) && !getenv("NO_COLOR")) + fputs("\x1b[31;1;7m", stderr); + fprintf(stderr, "%s:%ld.%ld: ", ctx->file->relative_filename, sss_get_line_number(ctx->file, start), + sss_get_line_column(ctx->file, start)); + vfprintf(stderr, fmt, args); + if (isatty(STDERR_FILENO) && !getenv("NO_COLOR")) + fputs(" \x1b[m", stderr); + fputs("\n\n", stderr); + + fprint_span(stderr, ctx->file, start, end, "\x1b[31;1;7m", 2, isatty(STDERR_FILENO) && !getenv("NO_COLOR")); + fputs("\n", stderr); + + if (ctx->on_err) + longjmp(*ctx->on_err, 1); + raise(SIGABRT); + exit(1); +} + +// +// Wrapper for vparser_err +// +__attribute__((noreturn)) +static void parser_err(parse_ctx_t *ctx, const char *start, const char *end, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + vparser_err(ctx, start, end, fmt, args); + va_end(args); +} + +// +// Convert an escape sequence like \n to a string +// +const char *unescape(const char **out) { + const char **endpos = out; + const char *escape = *out; + static const char *unescapes[256] = {['a']="\a",['b']="\b",['e']="\e",['f']="\f",['n']="\n",['r']="\r",['t']="\t",['v']="\v",['_']=" "}; + assert(*escape == '\\'); + if (unescapes[(int)escape[1]]) { + *endpos = escape + 2; + return heap_str(unescapes[(int)escape[1]]); + } else if (escape[1] == 'x' && escape[2] && escape[3]) { + char *endptr = (char*)&escape[3+1]; + char c = (char)strtol(escape+2, &endptr, 16); + *endpos = escape + 4; + return heap_strn(&c, 1); + } else if ('0' <= escape[1] && escape[1] <= '7' && '0' <= escape[2] && escape[2] <= '7' && '0' <= escape[3] && escape[3] <= '7') { + char *endptr = (char*)&escape[4]; + char c = (char)strtol(escape+1, &endptr, 8); + *endpos = escape + 4; + return heap_strn(&c, 1); + } else { + *endpos = escape + 2; + return heap_strn(escape+1, 1); + } +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////// Text-based parsing primitives /////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////// +size_t some_of(const char **pos, const char *allow) { + size_t len = strspn(*pos, allow); + *pos += len; + return len; +} + +size_t some_not(const char **pos, const char *forbid) { + size_t len = strcspn(*pos, forbid); + *pos += len; + return len; +} + +size_t spaces(const char **pos) { return some_of(pos, " \t"); } +size_t whitespace(const char **pos) { + const char *p0 = *pos; + while (some_of(pos, " \t\r\n") || comment(pos)) + continue; + return (size_t)(*pos - p0); +} + +size_t match(const char **pos, const char *target) { + size_t len = strlen(target); + if (strncmp(*pos, target, len) != 0) + return 0; + *pos += len; + return len; +} + +static inline bool is_xid_continue_next(const char *pos) { + ucs4_t point = 0; + u8_next(&point, (const uint8_t*)pos); + return uc_is_property_xid_continue(point); +} + +// +// Expect a string (potentially after whitespace) and emit a parser error if it's not there +// +static void expect_str( + parse_ctx_t *ctx, const char *start, const char **pos, const char *target, const char *fmt, ...) { + spaces(pos); + if (match(pos, target)) { + char lastchar = target[strlen(target)-1]; + if (!(isalpha(lastchar) || isdigit(lastchar) || lastchar == '_')) + return; + if (!is_xid_continue_next(*pos)) + return; + } + + if (isatty(STDERR_FILENO) && !getenv("NO_COLOR")) + fputs("\x1b[31;1;7m", stderr); + va_list args; + va_start(args, fmt); + vparser_err(ctx, start, *pos, fmt, args); + va_end(args); +} + +// +// Helper for matching closing parens with good error messages +// +static void expect_closing( + parse_ctx_t *ctx, const char **pos, const char *closing, const char *fmt, ...) { + const char *start = *pos; + spaces(pos); + if (match(pos, closing)) + return; + + const char *eol = strchr(*pos, '\n'); + const char *next = strstr(*pos, closing); + + const char *end = eol < next ? eol : next; + + if (isatty(STDERR_FILENO) && !getenv("NO_COLOR")) + fputs("\x1b[31;1;7m", stderr); + va_list args; + va_start(args, fmt); + vparser_err(ctx, start, end, fmt, args); + va_end(args); +} + +#define expect(ctx, start, pos, parser, ...) ({ \ + const char **_pos = pos; \ + spaces(_pos); \ + auto _result = parser(ctx, *_pos); \ + if (!_result) { \ + if (isatty(STDERR_FILENO) && !getenv("NO_COLOR")) \ + fputs("\x1b[31;1;7m", stderr); \ + parser_err(ctx, start, *_pos, __VA_ARGS__); \ + } \ + *_pos = _result->end; \ + _result; }) + +#define optional(ctx, pos, parser) ({ \ + const char **_pos = pos; \ + spaces(_pos); \ + auto _result = parser(ctx, *_pos); \ + if (_result) *_pos = _result->end; \ + _result; }) + +size_t match_word(const char **out, const char *word) { + const char *pos = *out; + spaces(&pos); + if (!match(&pos, word) || is_xid_continue_next(pos)) + return 0; + + *out = pos; + return strlen(word); +} + +bool match_group(const char **out, char open) { + static char mirror_delim[256] = {['(']=')', ['{']='}', ['<']='>', ['[']=']'}; + const char *pos = *out; + if (*pos != open) return 0; + char close = mirror_delim[(int)open] ? mirror_delim[(int)open] : open; + int depth = 1; + for (++pos; *pos && depth > 0; ++pos) { + if (*pos == close) --depth; + else if (*pos == open) ++depth; + } + if (depth == 0) { + *out = pos; + return true; + } else return false; +} + +const char *get_word(const char **inout) { + const char *word = *inout; + spaces(&word); + const uint8_t *pos = (const uint8_t*)word; + ucs4_t point; + pos = u8_next(&point, pos); + if (!uc_is_property_xid_start(point) && point != '_') + return NULL; + + for (const uint8_t *next; (next = u8_next(&point, pos)); pos = next) { + if (!uc_is_property_xid_continue(point)) + break; + } + *inout = (const char*)pos; + return heap_strn(word, (size_t)((const char*)pos - word)); +} + +const char *get_id(const char **inout) { + const char *pos = *inout; + const char *word = get_word(&pos); + if (!word) return word; + for (int i = 0; keywords[i]; i++) + if (strcmp(word, keywords[i]) == 0) + return NULL; + *inout = pos; + return word; +} + +bool comment(const char **pos) { + if (!match(pos, "//")) + return false; + some_not(pos, "\r\n"); + return true; +} + +bool indent(parse_ctx_t *ctx, const char **out) { + const char *pos = *out; + int64_t starting_indent = sss_get_indent(ctx->file, pos); + whitespace(&pos); + if (sss_get_line_number(ctx->file, pos) == sss_get_line_number(ctx->file, *out)) + return false; + + if (sss_get_indent(ctx->file, pos) > starting_indent) { + *out = pos; + return true; + } + + return false; +} + +bool match_indentation(const char **out, int64_t target) { + const char *pos = *out; + for (int64_t indentation = 0; indentation < target; ) { + switch (*pos) { + case ' ': indentation += 1; ++pos; break; + case '\t': indentation += 4; ++pos; break; + default: return false; + } + } + *out = pos; + return true; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////// AST-based parsers ///////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////// + +PARSER(parse_parens) { + const char *start = pos; + spaces(&pos); + if (!match(&pos, "(")) return NULL; + whitespace(&pos); + ast_t *expr = optional(ctx, &pos, parse_extended_expr); + if (!expr) return NULL; + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this expression"); + + // Update the span to include the parens: + return new(ast_t, .file=(ctx)->file, .start=start, .end=pos, + .tag=expr->tag, .__data=expr->__data); +} + +PARSER(parse_int) { + const char *start = pos; + bool negative = match(&pos, "-"); + if (!isdigit(*pos)) return false; + int64_t i = 0; + if (match(&pos, "0x")) { // Hex + size_t span = strspn(pos, "0123456789abcdefABCDEF_"); + char *buf = GC_MALLOC_ATOMIC(span+1); + memset(buf, 0, span+1); + for (char *src = (char*)pos, *dest = buf; src < pos+span; ++src) { + if (*src != '_') *(dest++) = *src; + } + i = strtol(buf, NULL, 16); + pos += span; + } else if (match(&pos, "0b")) { // Binary + size_t span = strspn(pos, "01_"); + char *buf = GC_MALLOC_ATOMIC(span+1); + memset(buf, 0, span+1); + for (char *src = (char*)pos, *dest = buf; src < pos+span; ++src) { + if (*src != '_') *(dest++) = *src; + } + i = strtol(buf, NULL, 2); + pos += span; + } else if (match(&pos, "0o")) { // Octal + size_t span = strspn(pos, "01234567_"); + char *buf = GC_MALLOC_ATOMIC(span+1); + memset(buf, 0, span+1); + for (char *src = (char*)pos, *dest = buf; src < pos+span; ++src) { + if (*src != '_') *(dest++) = *src; + } + i = strtol(buf, NULL, 8); + pos += span; + } else { // Decimal + size_t span = strspn(pos, "0123456789_"); + char *buf = GC_MALLOC_ATOMIC(span+1); + memset(buf, 0, span+1); + for (char *src = (char*)pos, *dest = buf; src < pos+span; ++src) { + if (*src != '_') *(dest++) = *src; + } + i = strtol(buf, NULL, 10); + pos += span; + } + + if (match(&pos, "e") || match(&pos, "f")) // floating point literal + return NULL; + + if (negative) i *= -1; + + if (match(&pos, "%")) { + double d = (double)i / 100.; + return NewAST(ctx->file, start, pos, Num, .n=d, .precision=64); + } + + match(&pos, "_"); + int64_t precision = 64; + if (match(&pos, "i64")) precision = 64; + else if (match(&pos, "i32")) precision = 32; + else if (match(&pos, "i16")) precision = 16; + else if (match(&pos, "i8")) precision = 8; + + // else if (match(&pos, ".") || match(&pos, "e")) return NULL; // looks like a float + + return NewAST(ctx->file, start, pos, Int, .i=i, .precision=precision); +} + +type_ast_t *parse_table_type(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + if (!match(&pos, "{")) return NULL; + whitespace(&pos); + type_ast_t *key_type = parse_type(ctx, pos); + if (!key_type) return NULL; + pos = key_type->end; + whitespace(&pos); + if (!match(&pos, "=>")) return NULL; + type_ast_t *value_type = expect(ctx, start, &pos, parse_type, "I couldn't parse the rest of this table type"); + whitespace(&pos); + expect_closing(ctx, &pos, "}", "I wasn't able to parse the rest of this table type"); + return NewTypeAST(ctx->file, start, pos, TypeTable, .key=key_type, .value=value_type); +} + +type_ast_t *parse_struct_type(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + if (!match(&pos, "struct")) return NULL; + spaces(&pos); + if (!match(&pos, "(")) return NULL; + arg_list_t *args = parse_args(ctx, &pos, false); + whitespace(&pos); + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this struct type"); + return NewTypeAST(ctx->file, start, pos, TypeStruct, .fields=args); +} + +type_ast_t *parse_func_type(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + if (!match_word(&pos, "func")) return NULL; + spaces(&pos); + if (!match(&pos, "(")) return NULL; + arg_list_t *args = parse_args(ctx, &pos, true); + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this function type"); + spaces(&pos); + if (!match(&pos, "->")) return NULL; + type_ast_t *ret = optional(ctx, &pos, parse_type); + return NewTypeAST(ctx->file, start, pos, TypeFunction, .args=args, .ret=ret); +} + +type_ast_t *parse_array_type(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + if (!match(&pos, "[")) return NULL; + type_ast_t *type = expect(ctx, start, &pos, parse_type, + "I couldn't parse an array item type after this point"); + expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this array type"); + return NewTypeAST(ctx->file, start, pos, TypeArray, .item=type); +} + +type_ast_t *parse_pointer_type(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + bool optional = false, is_stack = false; + if (match(&pos, "@")) + optional = false; + else if (match(&pos, "?")) + optional = true; + else if (match(&pos, "&")) + is_stack = true; + else + return NULL; + + spaces(&pos); + bool is_readonly = match(&pos, "(readonly)"); + spaces(&pos); + type_ast_t *type = expect(ctx, start, &pos, parse_type, + "I couldn't parse a pointer type after this point"); + return NewTypeAST(ctx->file, start, pos, TypePointer, .pointed=type, .is_optional=optional, .is_stack=is_stack, .is_readonly=is_readonly); +} + +type_ast_t *parse_type_name(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + const char *id = get_id(&pos); + if (!id) return NULL; + for (;;) { + const char *next = pos; + spaces(&next); + if (!match(&next, ".")) break; + const char *next_id = get_id(&next); + if (!next_id) break; + id = heap_strf("%s.%s", id, next_id); + pos = next; + } + return NewTypeAST(ctx->file, start, pos, TypeVar, .var.name=id); +} + +type_ast_t *parse_type(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + type_ast_t *type = NULL; + bool success = (false + || (type=parse_enum_type(ctx, pos)) + || (type=parse_pointer_type(ctx, pos)) + || (type=parse_array_type(ctx, pos)) + || (type=parse_table_type(ctx, pos)) + || (type=parse_struct_type(ctx, pos)) + || (type=parse_type_name(ctx, pos)) + || (type=parse_func_type(ctx, pos)) + ); + if (!success && match(&pos, "(")) { + whitespace(&pos); + type = optional(ctx, &pos, parse_type); + if (!type) return NULL; + whitespace(&pos); + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this type"); + type->start = start; + type->end = pos; + } + + if (!type) return NULL; + + pos = type->end; + return type; +} + +PARSER(parse_num) { + const char *start = pos; + bool negative = match(&pos, "-"); + if (!isdigit(*pos) && *pos != '.') return NULL; + + size_t len = strspn(pos, "0123456789_"); + if (strncmp(pos+len, "..", 2) == 0) + return NULL; + else if (pos[len] == '.') + len += 1 + strspn(pos + len + 1, "0123456789"); + else if (pos[len] != 'e' && pos[len] != 'f' && pos[len] != '%') + return NULL; + if (pos[len] == 'e') + len += 1 + strspn(pos + len + 1, "-0123456789_"); + char *buf = GC_MALLOC_ATOMIC(len+1); + memset(buf, 0, len+1); + for (char *src = (char*)pos, *dest = buf; src < pos+len; ++src) { + if (*src != '_') *(dest++) = *src; + } + double d = strtod(buf, NULL); + pos += len; + + if (negative) d *= -1; + + int64_t precision = 64; + match(&pos, "_"); + if (match(&pos, "f64")) precision = 64; + else if (match(&pos, "f32")) precision = 32; + + if (match(&pos, "%")) { + d /= 100.; + } + + return NewAST(ctx->file, start, pos, Num, .n=d, .precision=precision); +} + +PARSER(parse_array) { + const char *start = pos; + if (!match(&pos, "[")) return NULL; + + whitespace(&pos); + + ast_list_t *items = NULL; + type_ast_t *item_type = NULL; + if (match(&pos, ":")) { + whitespace(&pos); + item_type = expect(ctx, pos-1, &pos, parse_type, "I couldn't parse a type for this array"); + } + + for (;;) { + whitespace(&pos); + ast_t *item = optional(ctx, &pos, parse_extended_expr); + if (!item) break; + items = new(ast_list_t, .ast=item, .next=items); + whitespace(&pos); + if (!match(&pos, ",")) break; + } + whitespace(&pos); + expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this array"); + + if (!item_type && !items) + parser_err(ctx, start, pos, "Empty arrays must specify what type they would contain (e.g. [:Int])"); + + REVERSE_LIST(items); + return NewAST(ctx->file, start, pos, Array, .type=item_type, .items=items); +} + +PARSER(parse_table) { + const char *start = pos; + if (!match(&pos, "{")) return NULL; + + whitespace(&pos); + + ast_list_t *entries = NULL; + type_ast_t *key_type = NULL, *value_type = NULL; + if (match(&pos, ":")) { + whitespace(&pos); + key_type = expect(ctx, pos-1, &pos, parse_type, "I couldn't parse a key type for this table"); + whitespace(&pos); + if (!match(&pos, "=>")) + parser_err(ctx, pos, pos, "I expected an '=>' for this table type"); + value_type = expect(ctx, pos-1, &pos, parse_type, "I couldn't parse a value type for this table"); + } + + for (;;) { + whitespace(&pos); + const char *entry_start = pos; + ast_t *key = optional(ctx, &pos, parse_extended_expr); + if (!key) break; + whitespace(&pos); + if (!match(&pos, "=>")) return NULL; + ast_t *value = expect(ctx, pos-1, &pos, parse_expr, "I couldn't parse the value for this table entry"); + + ast_t *entry = NewAST(ctx->file, entry_start, pos, TableEntry, .key=key, .value=value); + for (bool progress = true; progress; ) { + ast_t *new_entry; + progress = (false + || (new_entry=parse_index_suffix(ctx, entry)) + || (new_entry=parse_field_suffix(ctx, entry)) + || (new_entry=parse_fncall_suffix(ctx, entry, NORMAL_FUNCTION)) + ); + if (progress) entry = new_entry; + } + pos = entry->end; + + entries = new(ast_list_t, .ast=entry, .next=entries); + whitespace(&pos); + if (!match(&pos, ",")) break; + } + + REVERSE_LIST(entries); + + if (!key_type && !value_type && !entries) + return NULL; + + whitespace(&pos); + + ast_t *fallback = NULL, *default_val = NULL; + if (match(&pos, ";")) { + for (;;) { + whitespace(&pos); + const char *attr_start = pos; + if (match(&pos, "fallback")) { + whitespace(&pos); + if (!match(&pos, "=")) parser_err(ctx, attr_start, pos, "I expected an '=' after 'fallback'"); + if (fallback) + parser_err(ctx, attr_start, pos, "This table already has a fallback"); + fallback = expect(ctx, attr_start, &pos, parse_expr, "I expected a fallback table"); + } else if (match(&pos, "default")) { + whitespace(&pos); + if (!match(&pos, "=")) parser_err(ctx, attr_start, pos, "I expected an '=' after 'default'"); + if (default_val) + parser_err(ctx, attr_start, pos, "This table already has a default value"); + default_val = expect(ctx, attr_start, &pos, parse_expr, "I expected a default value for this table"); + } else { + break; + } + whitespace(&pos); + if (!match(&pos, ";")) break; + } + } + + whitespace(&pos); + expect_closing(ctx, &pos, "}", "I wasn't able to parse the rest of this table"); + + return NewAST(ctx->file, start, pos, Table, .key_type=key_type, .value_type=value_type, .entries=entries, .fallback=fallback, .default_value=default_val); +} + +ast_t *parse_field_suffix(parse_ctx_t *ctx, ast_t *lhs) { + if (!lhs) return NULL; + const char *pos = lhs->end; + whitespace(&pos); + if (!match(&pos, ".")) return NULL; + if (*pos == '.') return NULL; + whitespace(&pos); + bool dollar = match(&pos, "$"); + const char* field = get_id(&pos); + if (!field) return NULL; + if (dollar) field = heap_strf("$%s", field); + return NewAST(ctx->file, lhs->start, pos, FieldAccess, .fielded=lhs, .field=field); +} + +PARSER(parse_reduction) { + const char *start = pos; + if (!match(&pos, "(")) return NULL; + + spaces(&pos); + const char *combo_start = pos; + binop_e op = match_binary_operator(&pos); + if (op == BINOP_UNKNOWN) return NULL; + + ast_t *combination; + ast_t *lhs = NewAST(ctx->file, pos, pos, Var, .var.name="lhs.0"); + ast_t *rhs = NewAST(ctx->file, pos, pos, Var, .var.name="rhs.0"); + if (op == BINOP_MIN || op == BINOP_MAX) { + for (bool progress = true; progress; ) { + ast_t *new_term; + progress = (false + || (new_term=parse_index_suffix(ctx, rhs)) + || (new_term=parse_field_suffix(ctx, rhs)) + || (new_term=parse_fncall_suffix(ctx, rhs, NORMAL_FUNCTION)) + ); + if (progress) rhs = new_term; + } + if (rhs->tag == Var) rhs = NULL; + else pos = rhs->end; + combination = op == BINOP_MIN ? + NewAST(ctx->file, combo_start, pos, Min, .lhs=lhs, .rhs=lhs, .key=rhs) + : NewAST(ctx->file, combo_start, pos, Max, .lhs=lhs, .rhs=lhs, .key=rhs); + } else { + combination = NewAST(ctx->file, combo_start, pos, BinaryOp, .op=op, .lhs=lhs, .rhs=rhs); + } + + spaces(&pos); + if (!match(&pos, ")")) return NULL; + + ast_t *iter = optional(ctx, &pos, parse_extended_expr); + if (!iter) return NULL; + + ast_t *fallback = NULL; + if (match_word(&pos, "else")) + fallback = expect(ctx, pos-4, &pos, parse_expr, "I couldn't parse the expression after this 'else'"); + + return NewAST(ctx->file, start, pos, Reduction, .iter=iter, .combination=combination, .fallback=fallback); +} + +ast_t *parse_index_suffix(parse_ctx_t *ctx, ast_t *lhs) { + if (!lhs) return NULL; + const char *start = lhs->start; + const char *pos = lhs->end; + if (!match(&pos, "[")) return NULL; + whitespace(&pos); + ast_t *index = NULL; + if (match(&pos, ".")) { + // array[.field] + const char *field_start = pos-1; + const char *field = get_id(&pos); + if (field) + index = NewAST(ctx->file, field_start, pos, FieldAccess, .field=field); + else + --pos; + } + + if (!index) { + // obj[expr] + index = optional(ctx, &pos, parse_extended_expr); + } + whitespace(&pos); + bool unchecked = match(&pos, ";") && (spaces(&pos), match_word(&pos, "unchecked") != 0); + expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this index"); + return NewAST(ctx->file, start, pos, Index, .indexed=lhs, .index=index, .unchecked=unchecked); +} + +PARSER(parse_if) { + // if <condition> [then] <body> [else <body>] + const char *start = pos; + int64_t starting_indent = sss_get_indent(ctx->file, pos); + + if (!match_word(&pos, "if")) + return NULL; + + ast_t *condition = optional(ctx, &pos, parse_declaration); + if (!condition) condition = expect(ctx, start, &pos, parse_expr, + "I expected to find an expression for this 'if'"); + + match_word(&pos, "then"); // optional + + ast_t *body = expect(ctx, start, &pos, parse_opt_indented_block, "I expected a body for this 'if' statement"); + + const char *tmp = pos; + whitespace(&tmp); + ast_t *else_body = NULL; + if (sss_get_indent(ctx->file, tmp) == starting_indent && match_word(&tmp, "else")) { + pos = tmp; + else_body = expect(ctx, start, &pos, parse_opt_indented_block, "I expected a body for this 'else'"); + } + return NewAST(ctx->file, start, pos, If, .condition=condition, .body=body, .else_body=else_body); +} + +PARSER(parse_for) { + // for [k,] v in iter [<indent>] body + const char *start = pos; + if (!match_word(&pos, "for")) return NULL; + ast_t *index = expect(ctx, start, &pos, parse_var, "I expected an iteration variable for this 'for'"); + spaces(&pos); + ast_t *value = NULL; + if (match(&pos, ",")) { + value = expect(ctx, pos-1, &pos, parse_var, "I expected a variable after this comma"); + } + expect_str(ctx, start, &pos, "in", "I expected an 'in' for this 'for'"); + ast_t *iter = expect(ctx, start, &pos, parse_expr, "I expected an iterable value for this 'for'"); + match(&pos, "do"); // optional + ast_t *body = expect(ctx, start, &pos, parse_opt_indented_block, "I expected a body for this 'for'"); + return NewAST(ctx->file, start, pos, For, .index=value ? index : NULL, .value=value ? value : index, .iter=iter, .body=body); +} + +PARSER(parse_while) { + // while condition [do] [<indent>] body + const char *start = pos; + if (!match_word(&pos, "while")) return NULL; + ast_t *condition = expect(ctx, start, &pos, parse_expr, "I don't see a viable condition for this 'while'"); + match(&pos, "do"); // optional + ast_t *body = expect(ctx, start, &pos, parse_opt_indented_block, "I expected a body for this 'while'"); + const char *tmp = pos; + whitespace(&tmp); + return NewAST(ctx->file, start, pos, While, .condition=condition, .body=body); +} + +PARSER(parse_heap_alloc) { + const char *start = pos; + if (!match(&pos, "@")) return NULL; + spaces(&pos); + ast_t *val = expect(ctx, start, &pos, parse_expr, "I expected an expression for this '@'"); + return NewAST(ctx->file, start, pos, UnaryOp, .op=UNOP_HEAP_ALLOCATE, .value=val); +} + +PARSER(parse_stack_reference) { + const char *start = pos; + if (!match(&pos, "&")) return NULL; + spaces(&pos); + ast_t *val = expect(ctx, start, &pos, parse_expr, "I expected an expression for this '&'"); + return NewAST(ctx->file, start, pos, UnaryOp, .op=UNOP_STACK_REFERENCE, .value=val); +} + +PARSER(parse_not) { + const char *start = pos; + if (!match_word(&pos, "not")) return NULL; + spaces(&pos); + ast_t *val = expect(ctx, start, &pos, parse_expr, "I expected an expression for this 'not'"); + return NewAST(ctx->file, start, pos, UnaryOp, .op=UNOP_NOT, .value=val); +} + +PARSER(parse_negative) { + const char *start = pos; + if (!match(&pos, "-")) return NULL; + spaces(&pos); + ast_t *val = expect(ctx, start, &pos, parse_term, "I expected an expression for this '-'"); + return NewAST(ctx->file, start, pos, UnaryOp, .op=UNOP_NEGATIVE, .value=val); +} + +PARSER(parse_bool) { + const char *start = pos; + if (match_word(&pos, "yes")) + return NewAST(ctx->file, start, pos, Bool, .b=true); + else if (match_word(&pos, "no")) + return NewAST(ctx->file, start, pos, Bool, .b=false); + else + return NULL; +} + +PARSER(parse_char) { + const char *start = pos; + if (*pos == '`') { + ++pos; + char c = *pos; + ++pos; + return NewAST(ctx->file, start, pos, Char, .c=c); + } else if (*pos == '\\') { + char c = unescape(&pos)[0]; + return NewAST(ctx->file, start, pos, Char, .c=c); + } else { + return NULL; + } +} + +PARSER(parse_interpolation) { + const char *start = pos; + ++pos; // ignore the initial character, typically a '$', but might be other stuff like '@' in different contexts + bool labelled = match(&pos, ":"); + ast_t *value = optional(ctx, &pos, parse_parens); + if (!value) value = optional(ctx, &pos, parse_term); + if (!value) { + match_group(&pos, '('); + parser_err(ctx, start, pos, "This interpolation didn't parse"); + } + return NewAST(ctx->file, start, pos, Interp, .value=value, .labelled=labelled); +} + +PARSER(parse_string) { + static const char closing[128] = {['(']=')', ['[']=']', ['<']='>', ['{']='}'}; + static const bool escapes[128] = {['\'']='\x1B', ['(']='\x1B', ['>']='\x1B', ['/']='\x1B'}; + static const char interps[128] = {['>']='@', ['/']='@', ['\'']='\x1A', ['(']='\x1A'}; + + const char *string_start = pos; + char open, close; + if (match(&pos, "$")) { + open = *pos; + close = closing[(int)open] ? closing[(int)open] : open; + ++pos; + } else { + if (*pos != '\'' && *pos != '"') + return NULL; + open = *pos; + close = *pos; + ++pos; + } + + char interp_char = interps[(int)open] ? interps[(int)open] : '$'; + char escape_char = escapes[(int)open] ? escapes[(int)open] : '\\'; + + if (open == ':' || open == '>') + spaces(&pos); + + ast_list_t *chunks = NULL; + if (*pos == '\r' || *pos == '\n') { // Multiline string + char special[] = {'\n','\r',interp_char,escape_char,'\0'}; + int64_t starting_indent = sss_get_indent(ctx->file, pos); + // indentation-delimited string + match(&pos, "\r"); + match(&pos, "\n"); + int64_t first_line = sss_get_line_number(ctx->file, pos); + int64_t indented = sss_get_indent(ctx->file, pos); + pos = sss_get_line(ctx->file, first_line); + while (pos < ctx->file->text + ctx->file->len) { + const char *eol = strchrnul(pos, '\n'); + if (eol == pos + strspn(pos, " \t\r")) { // Empty line + ast_t *ast = NewAST(ctx->file, pos, eol, StringLiteral, .str="\n"); + chunks = new(ast_list_t, .ast=ast, .next=chunks); + pos = eol + 1; + continue; + } + if (!match_indentation(&pos, starting_indent)) + parser_err(ctx, pos, strchrnul(pos, '\n'), "This isn't a valid indentation level for this unterminated string"); + + if (*pos == close) { + ++pos; + goto finished; + } + + if (!match_indentation(&pos, (indented - starting_indent))) + parser_err(ctx, pos, strchrnul(pos, '\n'), "I was expecting this to have %lu extra indentation beyond %lu", + (indented - starting_indent), starting_indent); + + while (pos < eol+1) { + size_t len = strcspn(pos, special); + if (pos[len] == '\r') ++len; + if (pos[len] == '\n') ++len; + + if (len > 0) { + ast_t *chunk = NewAST(ctx->file, pos, pos+len-1, StringLiteral, .str=heap_strn(pos, len)); + chunks = new(ast_list_t, .ast=chunk, .next=chunks); + } + + pos += len; + + if (*pos == escape_char) { + const char *start = pos; + const char* unescaped = unescape(&pos); + ast_t *chunk = NewAST(ctx->file, start, pos, StringLiteral, .str=unescaped); + chunks = new(ast_list_t, .ast=chunk, .next=chunks); + ++pos; + } else if (*pos == interp_char) { + ast_t *chunk = parse_interpolation(ctx, pos); + chunks = new(ast_list_t, .ast=chunk, .next=chunks); + pos = chunk->end; + } + } + } + finished:; + // Strip trailing newline: + if (chunks) { + ast_t *last_chunk = chunks->ast; + if (last_chunk->tag == StringLiteral) { + auto str = Match(last_chunk, StringLiteral); + const char* trimmed = heap_strn(str->str, strlen(str->str)-1); + chunks->ast = NewAST(ctx->file, last_chunk->start, last_chunk->end-1, StringLiteral, .str=trimmed); + } + } + } else { // Inline string + char special[] = {'\n','\r',open,close,interp_char,escape_char,'\0'}; + int depth = 1; + while (depth > 0 && *pos) { + size_t len = strcspn(pos, special); + if (len > 0) { + ast_t *chunk = NewAST(ctx->file, pos, pos+len-1, StringLiteral, .str=heap_strn(pos, len)); + chunks = new(ast_list_t, .ast=chunk, .next=chunks); + pos += len; + } + + if (*pos == interp_char) { + ast_t *chunk = parse_interpolation(ctx, pos); + chunks = new(ast_list_t, .ast=chunk, .next=chunks); + pos = chunk->end; + } else if (*pos == escape_char) { + const char *start = pos; + const char* unescaped = unescape(&pos); + ast_t *chunk = NewAST(ctx->file, start, pos, StringLiteral, .str=unescaped); + chunks = new(ast_list_t, .ast=chunk, .next=chunks); + } else if (*pos == '\r' || *pos == '\n') { + if (open == ' ' || open == ':' || open == '>') goto string_finished; + parser_err(ctx, string_start, pos, "This line ended without closing the string"); + } else if (*pos == close) { // if open == close, then don't do nesting (i.e. check 'close' first) + --depth; + if (depth > 0) { + ast_t *chunk = NewAST(ctx->file, pos, pos+1, StringLiteral, .str=heap_strn(pos, 1)); + chunks = new(ast_list_t, .ast=chunk, .next=chunks); + } + ++pos; + } else if (*pos == open) { + ++depth; + ast_t *chunk = NewAST(ctx->file, pos, pos+1, StringLiteral, .str=heap_strn(pos, 1)); + chunks = new(ast_list_t, .ast=chunk, .next=chunks); + ++pos; + } else { + ast_t *chunk = NewAST(ctx->file, pos, pos+1, StringLiteral, .str=heap_strn(pos, 1)); + ++pos; + chunks = new(ast_list_t, .ast=chunk, .next=chunks); + } + } + } + string_finished:; + REVERSE_LIST(chunks); + return NewAST(ctx->file, string_start, pos, StringJoin, .children=chunks); +} + +PARSER(parse_skip) { + const char *start = pos; + if (!match_word(&pos, "skip")) return NULL; + spaces(&pos); + const char* target; + if (match_word(&pos, "for")) target = "for"; + else if (match_word(&pos, "while")) target = "while"; + else target = get_id(&pos); + ast_t *skip = NewAST(ctx->file, start, pos, Skip, .target=target); + return skip; +} + +PARSER(parse_stop) { + const char *start = pos; + if (!match_word(&pos, "stop")) return NULL; + spaces(&pos); + const char* target; + if (match_word(&pos, "for")) target = "for"; + else if (match_word(&pos, "while")) target = "while"; + else target = get_id(&pos); + ast_t *stop = NewAST(ctx->file, start, pos, Stop, .target=target); + return stop; +} + +PARSER(parse_return) { + const char *start = pos; + if (!match_word(&pos, "return")) return NULL; + spaces(&pos); + ast_t *value = optional(ctx, &pos, parse_expr); + ast_t *ret = NewAST(ctx->file, start, pos, Return, .value=value); + return ret; +} + +PARSER(parse_lambda) { + const char *start = pos; + if (!match_word(&pos, "func")) + return NULL; + spaces(&pos); + if (!match(&pos, "(")) + return NULL; + arg_list_t *args = parse_args(ctx, &pos, false); + spaces(&pos); + expect_closing(ctx, &pos, ")", "I was expecting a ')' to finish this anonymous function's arguments"); + ast_t *body = optional(ctx, &pos, parse_opt_indented_block); + return NewAST(ctx->file, start, pos, Lambda, .args=args, .body=body); +} + +PARSER(parse_nil) { + const char *start = pos; + if (!match(&pos, "!")) return NULL; + type_ast_t *type = parse_type(ctx, pos); + if (!type) return NULL; + return NewAST(ctx->file, start, type->end, Nil, .type=type); +} + +PARSER(parse_var) { + const char *start = pos; + const char* name = get_id(&pos); + if (!name) return NULL; + return NewAST(ctx->file, start, pos, Var, .var.name=name); +} + +PARSER(parse_term_no_suffix) { + spaces(&pos); + ast_t *term = NULL; + (void)( + false + || (term=parse_nil(ctx, pos)) + || (term=parse_num(ctx, pos)) + || (term=parse_int(ctx, pos)) + || (term=parse_negative(ctx, pos)) + || (term=parse_heap_alloc(ctx, pos)) + || (term=parse_stack_reference(ctx, pos)) + || (term=parse_bool(ctx, pos)) + || (term=parse_char(ctx, pos)) + || (term=parse_string(ctx, pos)) + || (term=parse_lambda(ctx, pos)) + || (term=parse_parens(ctx, pos)) + || (term=parse_table(ctx, pos)) + || (term=parse_var(ctx, pos)) + || (term=parse_array(ctx, pos)) + || (term=parse_reduction(ctx, pos)) + || (term=parse_skip(ctx, pos)) + || (term=parse_stop(ctx, pos)) + || (term=parse_return(ctx, pos)) + || (term=parse_not(ctx, pos)) + || (term=parse_extern(ctx, pos)) + ); + return term; +} + +PARSER(parse_term) { + ast_t *term = parse_term_no_suffix(ctx, pos); + if (!term) return NULL; + + for (bool progress = true; progress; ) { + ast_t *new_term; + progress = (false + || (new_term=parse_index_suffix(ctx, term)) + || (new_term=parse_field_suffix(ctx, term)) + || (new_term=parse_fncall_suffix(ctx, term, NORMAL_FUNCTION)) + ); + if (progress) term = new_term; + } + return term; +} + +ast_t *parse_fncall_suffix(parse_ctx_t *ctx, ast_t *fn, bool is_extern) { + if (!fn) return NULL; + + const char *start = fn->start; + const char *pos = fn->end; + + if (!match(&pos, "(")) return NULL; + + whitespace(&pos); + + ast_list_t *args = NULL; + for (;;) { + const char *arg_start = pos; + const char *name = get_id(&pos); + whitespace(&pos); + if (name) { + if (match(&pos, "=")) { + whitespace(&pos); + ast_t *arg = parse_expr(ctx, pos); + if (!arg) parser_err(ctx, arg_start, pos, "I couldn't parse this keyword argument value"); + ast_t *kwarg = NewAST(ctx->file, arg_start, arg->end, KeywordArg, + .name=name, .arg=arg); + args = new(ast_list_t, .ast=kwarg, .next=args); + pos = kwarg->end; + goto got_arg; + } + pos = arg_start; + } + + ast_t *arg = optional(ctx, &pos, parse_expr); + if (!arg) break; + args = new(ast_list_t, .ast=arg, .next=args); + + got_arg:; + + whitespace(&pos); + if (!match(&pos, ",")) + break; + whitespace(&pos); + } + + whitespace(&pos); + + if (!match(&pos, ")")) + parser_err(ctx, start, pos, "This parenthesis is unclosed"); + + type_ast_t *extern_return_type = NULL; + if (is_extern) { + if (match(&pos, ":")) + extern_return_type = expect(ctx, start, &pos, parse_type, "I couldn't parse the return type of this external function call"); + else + extern_return_type = NewTypeAST(ctx->file, pos, pos, TypeVar, .var.name="Void"); + } + REVERSE_LIST(args); + return NewAST(ctx->file, start, pos, FunctionCall, .fn=fn, .args=args, .extern_return_type=extern_return_type); +} + +binop_e match_binary_operator(const char **pos) +{ + switch (**pos) { + case '+': { + *pos += 1; + return match(pos, "+") ? BINOP_CONCAT : BINOP_PLUS; + } + case '-': { + *pos += 1; + if ((*pos)[0] != ' ' && (*pos)[-2] == ' ') // looks like `fn -5` + return BINOP_UNKNOWN; + return BINOP_MINUS; + } + case '*': *pos += 1; return BINOP_MULT; + case '/': *pos += 1; return BINOP_DIVIDE; + case '^': *pos += 1; return BINOP_POWER; + case '<': *pos += 1; return match(pos, "=") ? BINOP_LE : (match(pos, "<") ? BINOP_LSHIFT : BINOP_LT); + case '>': *pos += 1; return match(pos, "=") ? BINOP_GE : (match(pos, ">") ? BINOP_RSHIFT : BINOP_GT); + default: { + if (match(pos, "!=")) return BINOP_NE; + else if (match(pos, "==") && **pos != '=') return BINOP_EQ; + else if (match_word(pos, "and")) return BINOP_AND; + else if (match_word(pos, "or")) return BINOP_OR; + else if (match_word(pos, "xor")) return BINOP_XOR; + else if (match_word(pos, "mod1")) return BINOP_MOD1; + else if (match_word(pos, "mod")) return BINOP_MOD; + else if (match_word(pos, "_min_")) return BINOP_MIN; + else if (match_word(pos, "_max_")) return BINOP_MAX; + else return BINOP_UNKNOWN; + } + } +} + +static ast_t *parse_infix_expr(parse_ctx_t *ctx, const char *pos, int min_tightness) { + ast_t *lhs = optional(ctx, &pos, parse_term); + if (!lhs) return NULL; + + spaces(&pos); + binop_e op = match_binary_operator(&pos); + if (op == BINOP_UNKNOWN || op_tightness[op] < min_tightness) + return lhs; + + ast_t *key = NULL; + if (op == BINOP_MIN || op == BINOP_MAX) { + key = NewAST(ctx->file, pos, pos, Var, .var.name=op == BINOP_MIN ? "_min_" : "_max_"); + for (bool progress = true; progress; ) { + ast_t *new_term; + progress = (false + || (new_term=parse_index_suffix(ctx, key)) + || (new_term=parse_field_suffix(ctx, key)) + || (new_term=parse_fncall_suffix(ctx, key, NORMAL_FUNCTION)) + ); + if (progress) key = new_term; + } + if (key->tag == Var) key = NULL; + else pos = key->end; + + } + spaces(&pos); + ast_t *rhs = parse_infix_expr(ctx, pos, op_tightness[op]); + if (!rhs) return lhs; + pos = rhs->end; + + switch (op) { + case BINOP_MIN: + return NewAST(ctx->file, lhs->start, rhs->end, Min, .lhs=lhs, .rhs=rhs, .key=key); + case BINOP_MAX: + return NewAST(ctx->file, lhs->start, rhs->end, Max, .lhs=lhs, .rhs=rhs, .key=key); + default: + return NewAST(ctx->file, lhs->start, rhs->end, BinaryOp, .lhs=lhs, .rhs=rhs, .op=op); + } +} + +ast_t *parse_expr(parse_ctx_t *ctx, const char *pos) { + return parse_infix_expr(ctx, pos, 0); +} + +PARSER(parse_declaration) { + const char *start = pos; + ast_t *var = parse_var(ctx, pos); + if (!var) return NULL; + pos = var->end; + spaces(&pos); + if (!match(&pos, ":=")) return NULL; + spaces(&pos); + ast_t *val = optional(ctx, &pos, parse_use); + if (!val) val = optional(ctx, &pos, parse_extended_expr); + if (!val) parser_err(ctx, pos, strchrnul(pos, '\n'), "This declaration value didn't parse"); + return NewAST(ctx->file, start, pos, Declare, .var=var, .value=val); +} + +PARSER(parse_update) { + const char *start = pos; + ast_t *lhs = optional(ctx, &pos, parse_expr); + if (!lhs) return NULL; + spaces(&pos); + binop_e op; + if (match(&pos, "+=")) op = BINOP_PLUS; + else if (match(&pos, "++=")) op = BINOP_CONCAT; + else if (match(&pos, "-=")) op = BINOP_MINUS; + else if (match(&pos, "*=")) op = BINOP_MULT; + else if (match(&pos, "/=")) op = BINOP_DIVIDE; + else if (match(&pos, "and=")) op = BINOP_AND; + else if (match(&pos, "or=")) op = BINOP_OR; + else if (match(&pos, "xor=")) op = BINOP_XOR; + else return NULL; + ast_t *rhs = expect(ctx, start, &pos, parse_extended_expr, "I expected an expression here"); + return NewAST(ctx->file, start, pos, UpdateAssign, .lhs=lhs, .rhs=rhs, .op=op); +} + +PARSER(parse_assignment) { + const char *start = pos; + ast_list_t *targets = NULL; + for (;;) { + ast_t *lhs = optional(ctx, &pos, parse_term); + if (!lhs) break; + targets = new(ast_list_t, .ast=lhs, .next=targets); + spaces(&pos); + if (!match(&pos, ",")) break; + whitespace(&pos); + } + + if (!targets) return NULL; + + spaces(&pos); + if (!match(&pos, "=")) return NULL; + if (match(&pos, "=")) return NULL; // == comparison + + ast_list_t *values = NULL; + for (;;) { + ast_t *rhs = optional(ctx, &pos, parse_extended_expr); + if (!rhs) break; + values = new(ast_list_t, .ast=rhs, .next=values); + spaces(&pos); + if (!match(&pos, ",")) break; + whitespace(&pos); + } + + REVERSE_LIST(targets); + REVERSE_LIST(values); + + return NewAST(ctx->file, start, pos, Assign, .targets=targets, .values=values); +} + +PARSER(parse_statement) { + ast_t *stmt = NULL; + if ((stmt=parse_declaration(ctx, pos)) + || (stmt=parse_doctest(ctx, pos)) + || (stmt=parse_func_def(ctx, pos)) + || (stmt=parse_use(ctx,pos))) + return stmt; + + if (!(false + || (stmt=parse_update(ctx, pos)) + || (stmt=parse_assignment(ctx, pos)) + )) + stmt = parse_extended_expr(ctx, pos); + + for (bool progress = (stmt != NULL); progress; ) { + ast_t *new_stmt; + progress = false; + if (stmt->tag == Var) + progress = (new_stmt=parse_fncall_suffix(ctx, stmt, NORMAL_FUNCTION)); + + if (progress) stmt = new_stmt; + } + return stmt; + +} + +PARSER(parse_extended_expr) { + ast_t *expr = NULL; + + if (false + || (expr=optional(ctx, &pos, parse_for)) + || (expr=optional(ctx, &pos, parse_while)) + || (expr=optional(ctx, &pos, parse_if)) + ) + return expr; + + return parse_expr(ctx, pos); +} + +PARSER(parse_block) { + int64_t block_indent = sss_get_indent(ctx->file, pos); + const char *start = pos; + whitespace(&pos); + ast_list_t *statements = NULL; + while (*pos) { + ast_t *stmt = optional(ctx, &pos, parse_statement); + if (!stmt) { + spaces(&pos); + if (*pos && *pos != '\r' && *pos != '\n') + parser_err(ctx, pos, strchrnul(pos, '\n'), "I couldn't parse this line"); + break; + } + statements = new(ast_list_t, .ast=stmt, .next=statements); + whitespace(&pos); + if (sss_get_indent(ctx->file, pos) != block_indent) { + pos = stmt->end; // backtrack + break; + } + } + REVERSE_LIST(statements); + return NewAST(ctx->file, start, pos, Block, .statements=statements); +} + +PARSER(parse_opt_indented_block) { + return indent(ctx, &pos) ? parse_block(ctx, pos) : parse_inline_block(ctx, pos); +} + +PARSER(parse_namespace) { + const char *start = pos; + whitespace(&pos); + int64_t indent = sss_get_indent(ctx->file, pos); + ast_list_t *statements = NULL; + for (;;) { + const char *next = pos; + whitespace(&next); + if (sss_get_indent(ctx->file, next) != indent) break; + ast_t *stmt; + if ((stmt=optional(ctx, &pos, parse_type_def)) + ||(stmt=optional(ctx, &pos, parse_linker)) + ||(stmt=optional(ctx, &pos, parse_statement))) + { + statements = new(ast_list_t, .ast=stmt, .next=statements); + pos = stmt->end; + whitespace(&pos); + } else { + if (sss_get_indent(ctx->file, next) > indent && next < strchrnul(next, '\n')) + parser_err(ctx, next, strchrnul(next, '\n'), "I couldn't parse this namespace statement"); + break; + } + } + REVERSE_LIST(statements); + return NewAST(ctx->file, start, pos, Block, .statements=statements); +} + +PARSER(parse_type_def) { + // type Foo := Type... \n body... + const char *start = pos; + if (!match_word(&pos, "type")) return NULL; + + int64_t starting_indent = sss_get_indent(ctx->file, pos); + + const char *name = get_id(&pos); + if (!name) return NULL; + spaces(&pos); + + if (!match(&pos, ":=")) return NULL; + type_ast_t *type_ast = expect(ctx, start, &pos, parse_type, "I expected a type after this ':='"); + + const char *ns_pos = pos; + whitespace(&ns_pos); + int64_t ns_indent = sss_get_indent(ctx->file, ns_pos); + ast_t *namespace = NULL; + if (ns_indent > starting_indent) { + pos = ns_pos; + namespace = optional(ctx, &pos, parse_namespace); + } + if (!namespace) + namespace = NewAST(ctx->file, pos, pos, Block, .statements=NULL); + return NewAST(ctx->file, start, pos, TypeDef, .var.name=name, .type=type_ast, .namespace=namespace); +} + +type_ast_t *parse_enum_type(parse_ctx_t *ctx, const char *pos) { + // tagged union: enum Foo := a|b(x:Int,y:Int)=5|... + const char *start = pos; + + if (!match_word(&pos, "enum")) return NULL; + spaces(&pos); + if (!match(&pos, "(")) return NULL; + + tag_t *tags = NULL; + int64_t next_value = 0; + + whitespace(&pos); + for (;;) { + const char *tag_start = pos; + + spaces(&pos); + const char *tag_name = get_id(&pos); + if (!tag_name) break; + + spaces(&pos); + arg_list_t *fields; + if (match(&pos, "(")) { + whitespace(&pos); + fields = parse_args(ctx, &pos, false); + whitespace(&pos); + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this tagged union member"); + } else { + fields = NULL; + } + + spaces(&pos); + if (match(&pos, "=")) { + ast_t *val = expect(ctx, tag_start, &pos, parse_int, "I expected an integer literal after this '='"); + next_value = Match(val, Int)->i; + } + + // Check for duplicate values: + for (tag_t *t = tags; t; t = t->next) { + if (t->value == next_value) + parser_err(ctx, tag_start, pos, "This tag value (%ld) is a duplicate of an earlier tag value", next_value); + } + + type_ast_t *type = NewTypeAST(ctx->file, tag_start, pos, TypeStruct, .fields=fields); + tags = new(tag_t, .name=tag_name, .value=next_value, .type=type, .next=tags); + + const char *next_pos = pos; + whitespace(&next_pos); + if (!match(&next_pos, "|")) + break; + whitespace(&next_pos); + pos = next_pos; + ++next_value; + } + + whitespace(&pos); + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this enum definition"); + + REVERSE_LIST(tags); + + return NewTypeAST(ctx->file, start, pos, TypeTaggedUnion, .tags=tags); +} + +arg_list_t *parse_args(parse_ctx_t *ctx, const char **pos, bool allow_unnamed) +{ + arg_list_t *args = NULL; + for (;;) { + const char *batch_start = *pos; + ast_t *default_val = NULL; + type_ast_t *type = NULL; + + typedef struct var_list_s { + var_t *var; + struct var_list_s *next; + } var_list_t; + + var_list_t *vars = NULL; + for (;;) { + whitespace(pos); + const char *name_start = *pos; + const char *name = get_id(pos); + if (!name) break; + whitespace(pos); + if (strncmp(*pos, "==", 2) != 0 && match(pos, "=")) { + default_val = expect(ctx, *pos-1, pos, parse_term, "I expected a value after this '='"); + vars = new(var_list_t, .var=new(var_t, .name=name), .next=vars); + break; + } else if (match(pos, ":")) { + type = expect(ctx, *pos-1, pos, parse_type, "I expected a type here"); + vars = new(var_list_t, .var=new(var_t, .name=name), .next=vars); + break; + } else if (allow_unnamed) { + *pos = name_start; + type = optional(ctx, pos, parse_type); + if (type) + vars = new(var_list_t, .var=NULL, .next=vars); + break; + } else if (name) { + vars = new(var_list_t, .var=new(var_t, .name=name), .next=vars); + spaces(pos); + if (!match(pos, ",")) break; + } else { + break; + } + } + if (!vars) break; + if (!default_val && !type) + parser_err(ctx, batch_start, *pos, "I expected a ':' and type, or '=' and a default value after this parameter (%s)", + vars->var->name); + + REVERSE_LIST(vars); + for (; vars; vars = vars->next) + args = new(arg_list_t, .var=vars->var, .type=type, .default_val=default_val); + whitespace(pos); + match(pos, ","); + } + + REVERSE_LIST(args); + return args; +} + +PARSER(parse_func_def) { + const char *start = pos; + if (!match_word(&pos, "func")) return NULL; + + ast_t *name = optional(ctx, &pos, parse_var); + if (!name) return NULL; + + spaces(&pos); + + if (!match(&pos, "(")) return NULL; + + arg_list_t *args = parse_args(ctx, &pos, false); + whitespace(&pos); + bool is_inline = false; + ast_t *cache_ast = NULL; + for (; whitespace(&pos), (match(&pos, ";") || match(&pos, ",")); ) { + const char *flag_start = pos; + if (match_word(&pos, "inline")) { + is_inline = true; + } else if (match_word(&pos, "cached")) { + if (!cache_ast) cache_ast = NewAST(ctx->file, pos, pos, Int, .i=INT64_MAX, .precision=64); + } else if (match_word(&pos, "cache_size")) { + if (whitespace(&pos), !match(&pos, "=")) + parser_err(ctx, flag_start, pos, "I expected a value for 'cache_size'"); + whitespace(&pos); + cache_ast = expect(ctx, start, &pos, parse_expr, "I expected a maximum size for the cache"); + } + } + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this function definition"); + + type_ast_t *ret_type = NULL; + spaces(&pos); + if (match(&pos, "->") || match(&pos, ":")) + ret_type = optional(ctx, &pos, parse_type); + + ast_t *body = expect(ctx, start, &pos, parse_opt_indented_block, + "This function needs a body block"); + return NewAST(ctx->file, start, pos, FunctionDef, + .name=name, .args=args, .ret_type=ret_type, .body=body, .cache=cache_ast, + .is_inline=is_inline); +} + +PARSER(parse_extern) { + const char *start = pos; + if (!match_word(&pos, "extern")) return NULL; + spaces(&pos); + bool address = (match(&pos, "&") != 0); + const char* name = get_id(&pos); + spaces(&pos); + // extern function call: + if (match(&pos, "(")) { + return parse_fncall_suffix(ctx, NewAST(ctx->file, start, pos-1, Var, .var.name=name), EXTERN_FUNCTION); + } + if (!match(&pos, ":")) + parser_err(ctx, start, pos, "I couldn't get a type for this extern"); + type_ast_t *type = expect(ctx, start, &pos, parse_type, "I couldn't parse the type for this extern"); + return NewAST(ctx->file, start, pos, Extern, .name=name, .type=type, .address=address); +} + +PARSER(parse_doctest) { + const char *start = pos; + if (!match(&pos, ">>>")) return NULL; + spaces(&pos); + ast_t *expr = expect(ctx, start, &pos, parse_statement, "I couldn't parse the expression for this doctest"); + whitespace(&pos); + const char* output = NULL; + if (match(&pos, "===")) { + spaces(&pos); + const char *output_start = pos, + *output_end = strchrnul(pos, '\n'); + if (output_end <= output_start) + parser_err(ctx, output_start, output_end, "You're missing expected output here"); + output = heap_strn(output_start, (size_t)(output_end - output_start)); + pos = output_end; + } + return NewAST(ctx->file, start, pos, DocTest, .expr=expr, .output=output); +} + +PARSER(parse_use) { + const char *start = pos; + if (!match_word(&pos, "use")) return NULL; + spaces(&pos); + size_t path_len = strcspn(pos, " \t\r\n;"); + if (path_len < 1) + parser_err(ctx, start, pos, "There is no filename here to use"); + char *path = heap_strf("%.*s.sss", (int)path_len, pos); + pos += path_len; + char *resolved_path = resolve_path(path, ctx->file->filename); + if (!resolved_path) + parser_err(ctx, start, pos, "No such file exists: \"%s\"", path); + while (match(&pos, ";")) continue; + return NewAST(ctx->file, start, pos, Use, .path=resolved_path); +} + +PARSER(parse_linker) { + const char *start = pos; + if (!match_word(&pos, "!link")) return NULL; + spaces(&pos); + size_t len = strcspn(pos, "\r\n"); + const char *directive = heap_strn(pos, len); + return NewAST(ctx->file, start, pos, LinkerDirective, .directive=directive); +} + +PARSER(parse_inline_block) { + spaces(&pos); + const char *start = pos; + ast_list_t *statements = NULL; + while (*pos) { + spaces(&pos); + ast_t *stmt = optional(ctx, &pos, parse_statement); + if (!stmt) break; + statements = new(ast_list_t, .ast=stmt, .next=statements); + spaces(&pos); + if (!match(&pos, ";")) break; + } + REVERSE_LIST(statements); + return NewAST(ctx->file, start, pos, Block, .statements=statements); +} + +ast_t *parse_file(sss_file_t *file, jmp_buf *on_err) { + parse_ctx_t ctx = { + .file=file, + .on_err=on_err, + }; + + const char *pos = file->text; + if (match(&pos, "#!")) // shebang + some_not(&pos, "\r\n"); + + whitespace(&pos); + ast_t *ast = parse_namespace(&ctx, pos); + pos = ast->end; + whitespace(&pos); + if (strlen(pos) > 0) { + parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the file"); + } + return ast; +} + +type_ast_t *parse_type_str(const char *str) { + sss_file_t *file = sss_spoof_file("<type>", str); + parse_ctx_t ctx = { + .file=file, + .on_err=NULL, + }; + + const char *pos = file->text; + whitespace(&pos); + type_ast_t *ast = parse_type(&ctx, pos); + if (!ast) return ast; + pos = ast->end; + whitespace(&pos); + if (strlen(pos) > 0) { + parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the type"); + } + return ast; +} + +ast_t *parse_expression_str(const char *str) { + sss_file_t *file = sss_spoof_file("<expression>", str); + parse_ctx_t ctx = { + .file=file, + .on_err=NULL, + }; + + const char *pos = file->text; + whitespace(&pos); + ast_t *ast = parse_extended_expr(&ctx, pos); + if (!ast) return ast; + pos = ast->end; + whitespace(&pos); + if (strlen(pos) > 0) { + parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the expression"); + } + return ast; +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/parse.h b/parse.h new file mode 100644 index 00000000..aaaa671a --- /dev/null +++ b/parse.h @@ -0,0 +1,9 @@ +#pragma once + +#include <setjmp.h> + +#include "ast.h" + +type_ast_t *parse_type_str(const char *str); +ast_t *parse_expression_str(const char *str); +ast_t *parse_file(sss_file_t *file, jmp_buf *on_err); @@ -0,0 +1,84 @@ +#include <ctype.h> +#include <gc.h> +#include <gc/cord.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "util.h" + +public char *heap_strn(const char *str, size_t len) +{ + if (!str) return NULL; + if (len == 0) return ""; + char *heaped = GC_MALLOC_ATOMIC(len + 1); + memcpy(heaped, str, len); + heaped[len] = '\0'; + return heaped; +} + +public char *heap_str(const char *str) +{ + if (!str) return NULL; + return heap_strn(str, strlen(str)); +} + +public char *heap_strf(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + char *tmp = NULL; + int len = vasprintf(&tmp, fmt, args); + if (len < 0) return NULL; + va_end(args); + char *ret = heap_strn(tmp, (size_t)len); + free(tmp); + return ret; +} + +// Name mangling algorithm to produce valid identifiers: +// Escape individual chars as "_x%02X" +// Things being escaped: +// - Leading digit +// - Non alphanumeric/non-underscore characters +// - "_" when followed by "x" and two uppercase hex digits +public char *mangle(const char *name) +{ + size_t len = 0; + for (const char *p = name; *p; p++) { + if ((!isalnum(*p) && *p != '_') // Non-identifier character + || (p == name && isdigit(*p)) // Leading digit + || (p[0] == '_' && p[1] == 'x' && strspn(p+2, "ABCDEF0123456789") >= 2)) { // Looks like hex escape + len += strlen("_x00"); // Hex escape + } else { + len += 1; + } + } + char *mangled = GC_MALLOC_ATOMIC(len + 1); + char *dest = mangled; + for (const char *src = name; *src; src++) { + if ((!isalnum(*src) && *src != '_') // Non-identifier character + || (src == name && isdigit(*src)) // Leading digit + || (src[0] == '_' && src[1] == 'x' && strspn(src+2, "ABCDEF0123456789") >= 2)) { // Looks like hex escape + dest += sprintf(dest, "_x%02X", *src); // Hex escape + } else { + *(dest++) = *src; + } + } + mangled[len] = '\0'; + return mangled; +} + +CORD CORD_asprintf(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + CORD c = NULL; + CORD_vsprintf(&c, fmt, args); + va_end(args); + return c; +} + + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 @@ -0,0 +1,29 @@ +#pragma once + +#include <assert.h> +#include <gc.h> +#include <gc/cord.h> +#include <stdio.h> +#include <string.h> +#include <err.h> + +#define streq(a, b) (((a) == NULL && (b) == NULL) || (((a) == NULL) == ((b) == NULL) && strcmp(a, b) == 0)) +#define new(t, ...) ((t*)memcpy(GC_MALLOC(sizeof(t)), &(t){__VA_ARGS__}, sizeof(t))) +#define grow(arr, new_size) ((typeof (arr))GC_REALLOC(arr, (sizeof(arr[0]))*(new_size))) +#define Match(x, _tag) ((x)->tag == _tag ? &(x)->__data._tag : (errx(1, __FILE__ ":%d This was supposed to be a " # _tag "\n", __LINE__), &(x)->__data._tag)) +#define Tagged(t, _tag, ...) new(t, .tag=_tag, .__data._tag={__VA_ARGS__}) + +#ifndef auto +#define auto __auto_type +#endif + +#ifndef public +#define public __attribute__ ((visibility ("default"))) +#endif + +char *heap_strn(const char *str, size_t len); +char *heap_str(const char *str); +char *heap_strf(const char *fmt, ...); +CORD CORD_asprintf(const char *fmt, ...); + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 |
