From 98f0c51119f9d42d733f44cb516b1c2bcd9061af Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Sun, 4 Feb 2024 15:23:59 -0500 Subject: Initial commit --- Makefile | 43 ++ ast.c | 179 +++++++ ast.h | 290 ++++++++++ compile.c | 185 +++++++ compile.h | 11 + files.c | 317 +++++++++++ files.h | 43 ++ foo.c | 12 + nextlang.c | 23 + parse.c | 1759 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ parse.h | 9 + util.c | 84 +++ util.h | 29 + 13 files changed, 2984 insertions(+) create mode 100644 Makefile create mode 100644 ast.c create mode 100644 ast.h create mode 100644 compile.c create mode 100644 compile.h create mode 100644 files.c create mode 100644 files.h create mode 100644 foo.c create mode 100644 nextlang.c create mode 100644 parse.c create mode 100644 parse.h create mode 100644 util.c create mode 100644 util.h diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..fb2b6414 --- /dev/null +++ b/Makefile @@ -0,0 +1,43 @@ +CC=gcc +PREFIX=/usr/local +VERSION=0.12.1 +CCONFIG=-std=c11 -Werror -D_XOPEN_SOURCE=700 -D_POSIX_C_SOURCE=200809L -fPIC -ftrapv -fvisibility=hidden -flto -fno-fat-lto-objects -Wl,-flto +LDFLAGS=-Wl,-rpath '-Wl,$$ORIGIN' +# MAKEFLAGS := --jobs=$(shell nproc) --output-sync=target +CWARN=-Wall -Wextra -Wno-format + # -Wpedantic -Wsign-conversion -Wtype-limits -Wunused-result -Wnull-dereference \ + # -Waggregate-return -Walloc-zero -Walloca -Warith-conversion -Wcast-align -Wcast-align=strict \ + # -Wdangling-else -Wdate-time -Wdisabled-optimization -Wdouble-promotion -Wduplicated-branches \ + # -Wduplicated-cond -Wexpansion-to-defined -Wfloat-conversion -Wfloat-equal -Wformat-nonliteral \ + # -Wformat-security -Wformat-signedness -Wframe-address -Winline -Winvalid-pch -Wjump-misses-init \ + # -Wlogical-op -Wlong-long -Wmissing-format-attribute -Wmissing-include-dirs -Wmissing-noreturn \ + # -Wnull-dereference -Woverlength-strings -Wpacked -Wpacked-not-aligned -Wpointer-arith \ + # -Wredundant-decls -Wshadow -Wshadow=compatible-local -Wshadow=global -Wshadow=local \ + # -Wsign-conversion -Wstack-protector -Wsuggest-attribute=const -Wswitch-default -Wswitch-enum \ + # -Wsync-nand -Wtrampolines -Wundef -Wunsuffixed-float-constants -Wunused -Wunused-but-set-variable \ + # -Wunused-const-variable -Wunused-local-typedefs -Wunused-macros -Wvariadic-macros -Wvector-operation-performance \ + # -Wvla -Wwrite-strings +OSFLAGS != case $$(uname -s) in *BSD|Darwin) echo '-D_BSD_SOURCE';; Linux) echo '-D_GNU_SOURCE';; *) echo '-D_DEFAULT_SOURCE';; esac +EXTRA= +G=-ggdb +O=-Og +CFLAGS=$(CCONFIG) $(EXTRA) $(CWARN) $(G) $(O) $(OSFLAGS) +LDLIBS=-lgc -lgccjit -lcord -lm -lunistring + +all: nextlang + +nextlang: nextlang.c parse.o files.o util.o ast.o compile.o + +SipHash/halfsiphash.c: + git submodule update --init --recursive + +tags: + ctags **/*.[ch] + +clean: + rm -f nextlang *.o + +%.1: %.1.md + pandoc --lua-filter=.pandoc/bold-code.lua -s $< -t man -o $@ + +.PHONY: all clean install uninstall test diff --git a/ast.c b/ast.c new file mode 100644 index 00000000..b62a058c --- /dev/null +++ b/ast.c @@ -0,0 +1,179 @@ +// Some basic operations defined on AST nodes, mainly converting to +// strings for debugging. +#include +#include +#include + +#include "ast.h" + +static const char *OP_NAMES[] = { + [BINOP_UNKNOWN]="unknown", + [UNOP_NOT]="not", [UNOP_NEGATIVE]="negative", + [BINOP_POWER]="^", [BINOP_MULT]="*", [BINOP_DIVIDE]="/", + [BINOP_MOD]="mod", [BINOP_MOD1]="mod1", [BINOP_PLUS]="+", [BINOP_MINUS]="minus", + [BINOP_CONCAT]="++", [BINOP_LSHIFT]="<<", [BINOP_RSHIFT]=">>", [BINOP_MIN]="min", + [BINOP_MAX]="max", [BINOP_EQ]="==", [BINOP_NE]="!=", [BINOP_LT]="<", + [BINOP_LE]="<=", [BINOP_GT]=">", [BINOP_GE]=">=", [BINOP_AND]="and", [BINOP_OR]="or", [BINOP_XOR]="xor", +}; + +static CORD ast_to_cord(ast_t *ast); +static CORD ast_list_to_cord(ast_list_t *asts); +static CORD type_ast_to_cord(type_ast_t *t); +static CORD arg_list_to_cord(arg_list_t *args); +static CORD tags_to_cord(tag_t *tags); + +#define TO_CORD(x) _Generic(x, \ + ast_t*: ast_to_cord(x), \ + ast_list_t*: ast_list_to_cord(x), \ + type_ast_t*: type_ast_to_cord(x), \ + arg_list_t*: arg_list_to_cord(x), \ + tag_t*: tags_to_cord(x), \ + const char *: x, \ + int64_t: CORD_asprintf("%ld", x), \ + unsigned short int: CORD_asprintf("%d", x), \ + double: CORD_asprintf("%g", x), \ + bool: CORD_asprintf("%s", x ? "yes" : "no"), \ + unsigned char: CORD_asprintf("%s", x ? "yes" : "no")) + +CORD ast_list_to_cord(ast_list_t *asts) +{ + if (!asts) + return "\x1b[35mNULL\x1b[m"; + + CORD c = "["; + for (; asts; asts = asts->next) { + c = CORD_cat(c, ast_to_cord(asts->ast)); + if (asts->next) c = CORD_cat(c, ", "); + } + c = CORD_cat(c, "]"); + return c; +} + +CORD arg_list_to_cord(arg_list_t *args) { + CORD c = "Args("; + for (; args; args = args->next) { + if (args->var && args->var->name) + c = CORD_cat(c, args->var->name); + if (args->type) + CORD_sprintf(&c, "%r:%s", c, type_ast_to_cord(args->type)); + if (args->default_val) + CORD_sprintf(&c, "%r=%s", c, ast_to_cord(args->default_val)); + if (args->next) c = CORD_cat(c, ", "); + } + c = CORD_cat(c, ")"); + return c; +} + +CORD tags_to_cord(tag_t *tags) { + CORD c = "Tags("; + for (; tags; tags = tags->next) { + if (tags->name) + c = CORD_cat(c, tags->name); + CORD_sprintf(&c, "%r:%s=%ld", c, type_ast_to_cord(tags->type), tags->value); + if (tags->next) c = CORD_cat(c, ", "); + } + c = CORD_cat(c, ")"); + return c; +} + +CORD ast_to_cord(ast_t *ast) +{ + if (!ast) return "\x1b[35mNULL\x1b[m"; + + switch (ast->tag) { +#define T(type, ...) case type: { auto data = ast->__data.type; (void)data; return CORD_asprintf("\x1b[34;1m" #type "\x1b[m" __VA_ARGS__); } + T(Unknown, "Unknown") + T(Nil, "(%r)", type_ast_to_cord(data.type)) + T(Bool, "(\x1b[35m%s\x1b[m)", data.b ? "yes" : "no") + T(Var, "(\x1b[36;1m%s\x1b[m)", data.var.name) + T(Int, "(\x1b[35m%ld\x1b[m, precision=%ld)", data.i, data.precision) + T(Num, "(\x1b[35m%ld\x1b[m, precision=%ld)", data.n, data.precision) + T(Char, "(\x1b[35m'%c'\x1b[m)", data.c) + T(StringLiteral, "\x1b[35m\"%s\"\x1b[m", data.str) + T(StringJoin, "(%r)", ast_list_to_cord(data.children)) + T(Interp, "(%r)", ast_to_cord(data.value)) + T(Declare, "(var=%s, value=%s)", ast_to_cord(data.var), ast_to_cord(data.value)) + T(Assign, "(targets=%r, values=%r)", ast_list_to_cord(data.targets), ast_list_to_cord(data.values)) + T(BinaryOp, "(%r, %s, %r)", ast_to_cord(data.lhs), OP_NAMES[data.op], ast_to_cord(data.rhs)) + T(UpdateAssign, "(%r, %s, %r)", ast_to_cord(data.lhs), OP_NAMES[data.op], ast_to_cord(data.rhs)) + T(UnaryOp, "(%s, %r)", OP_NAMES[data.op], ast_to_cord(data.value)) + T(Min, "(%r, %r, key=%r)", ast_to_cord(data.lhs), ast_to_cord(data.rhs), ast_to_cord(data.key)) + T(Max, "(%r, %r, key=%r)", ast_to_cord(data.lhs), ast_to_cord(data.rhs), ast_to_cord(data.key)) + T(Array, "(%r, type=%r)", ast_list_to_cord(data.items), type_ast_to_cord(data.type)) + T(Table, "(key_type=%r, value_type=%r, fallback=%r, default_value=%r, entries=%r)", + type_ast_to_cord(data.key_type), type_ast_to_cord(data.value_type), + ast_to_cord(data.fallback), ast_to_cord(data.default_value), + ast_list_to_cord(data.entries)) + T(TableEntry, "(%r => %r)", ast_to_cord(data.key), ast_to_cord(data.value)) + T(FunctionDef, "(name=%r, args=%r, ret=%r, body=%r)", ast_to_cord(data.name), + arg_list_to_cord(data.args), type_ast_to_cord(data.ret_type), ast_to_cord(data.body)) + T(Lambda, "(args=%r, body=%r)", arg_list_to_cord(data.args), ast_to_cord(data.body)) + T(FunctionCall, "(fn=%r, args=%r)", ast_to_cord(data.fn), ast_list_to_cord(data.args)) + T(KeywordArg, "(%s=%r)", ast_to_cord(data.arg)) + T(Block, "(%r)", ast_list_to_cord(data.statements)) + T(For, "(index=%r, value=%r, iter=%r, body=%r)", ast_to_cord(data.index), ast_to_cord(data.value), + ast_to_cord(data.iter), ast_to_cord(data.body)) + T(While, "(condition=%r, body=%r)", ast_to_cord(data.condition), ast_to_cord(data.body)) + T(If, "(condition=%r, body=%r, else=%r)", ast_to_cord(data.condition), ast_to_cord(data.body), ast_to_cord(data.else_body)) + T(Reduction, "(iter=%r, combination=%r, fallback=%r)", ast_to_cord(data.iter), ast_to_cord(data.combination), ast_to_cord(data.fallback)) + T(Skip, "(%s)", data.target) + T(Stop, "(%s)", data.target) + T(Pass, "") + T(Return, "(%r)", ast_to_cord(data.value)) + T(Extern, "(name=%s, type=%r)", data.name, type_ast_to_cord(data.type)) + T(TypeDef, "(%s, type=%r, namespace=%r)", data.var.name, type_ast_to_cord(data.type), ast_to_cord(data.namespace)) + T(Index, "(indexed=%r, index=%r)", ast_to_cord(data.indexed), ast_to_cord(data.index)) + T(FieldAccess, "(fielded=%r, field=%s)", ast_to_cord(data.fielded), data.field) + T(DocTest, "(expr=%r, output=%s)", ast_to_cord(data.expr), data.output) + T(Use, "(%s)", data.path) + T(LinkerDirective, "(%s)", data.directive) +#undef T + } + return NULL; +} + +CORD type_ast_to_cord(type_ast_t *t) +{ + if (!t) return "\x1b[35mNULL\x1b[m"; + + switch (t->tag) { +#define T(type, ...) case type: { auto data = t->__data.type; (void)data; return CORD_asprintf("\x1b[32;1m" #type "\x1b[m" __VA_ARGS__); } + T(TypeUnknown, "") + T(TypeVar, "(\x1b[36;1m%s\x1b[m)", data.var.name) + T(TypePointer, "(%r, is_optional=%d, is_stack=%d, is_readonly=%d)", + type_ast_to_cord(data.pointed), data.is_optional, + data.is_stack, data.is_readonly) + T(TypeStruct, "(%r)", arg_list_to_cord(data.fields)) + T(TypeTaggedUnion, "(%r)", tags_to_cord(data.tags)) + T(TypeArray, "(%r)", type_ast_to_cord(data.item)) + T(TypeTable, "(%r => %r)", type_ast_to_cord(data.key), type_ast_to_cord(data.value)) + T(TypeFunction, "(args=%r, ret=%r)", arg_list_to_cord(data.args), type_ast_to_cord(data.ret)) +#undef T + } + return NULL; +} + +const char *ast_to_str(ast_t *ast) { + CORD c = ast_to_cord(ast); + return CORD_to_char_star(c); +} + +const char *type_ast_to_str(type_ast_t *t) { + CORD c = type_ast_to_cord(t); + return CORD_to_char_star(c); +} + +int printf_ast(FILE *stream, const struct printf_info *info, const void *const args[]) +{ + ast_t *ast = *(ast_t**)(args[0]); + if (ast) { + if (info->alt) + return fprintf(stream, "%.*s", (int)(ast->end - ast->start), ast->start); + else + return fprintf(stream, "%s", ast_to_str(ast)); + } else { + return fputs("(null)", stream); + } +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/ast.h b/ast.h new file mode 100644 index 00000000..0d90d4b5 --- /dev/null +++ b/ast.h @@ -0,0 +1,290 @@ +#pragma once +#include +#include +#include +#include + +#include "files.h" +#include "util.h" + +#define NewAST(_file, _start, _end, ast_tag, ...) (new(ast_t, .file=_file, .start=_start, .end=_end,\ + .tag=ast_tag, .__data.ast_tag={__VA_ARGS__})) +#define NewTypeAST(_file, _start, _end, ast_tag, ...) (new(type_ast_t, .file=_file, .start=_start, .end=_end,\ + .tag=ast_tag, .__data.ast_tag={__VA_ARGS__})) +#define FakeAST(ast_tag, ...) (new(ast_t, .tag=ast_tag, .__data.ast_tag={__VA_ARGS__})) +#define WrapAST(ast, ast_tag, ...) (new(ast_t, .file=(ast)->file, .start=(ast)->start, .end=(ast)->end, .tag=ast_tag, .__data.ast_tag={__VA_ARGS__})) +#define StringAST(ast, _str) WrapAST(ast, StringLiteral, .str=heap_str(_str)) + +struct binding_s; +typedef struct type_ast_s type_ast_t; +typedef struct ast_s ast_t; + +typedef struct { + const char *name; + struct binding_s *binding; +} var_t; + +typedef struct ast_list_s { + ast_t *ast; + struct ast_list_s *next; +} ast_list_t; + +typedef struct arg_list_s { + var_t *var; + type_ast_t *type; + ast_t *default_val; + struct arg_list_s *next; +} arg_list_t; + +#define REVERSE_LIST(list) do { \ + __typeof(list) _prev = NULL; \ + __typeof(list) _next = NULL; \ + auto _current = list; \ + while (_current != NULL) { \ + _next = _current->next; \ + _current->next = _prev; \ + _prev = _current; \ + _current = _next; \ + } \ + list = _prev; \ +} while(0) + +typedef enum { + UNOP_UNKNOWN, + UNOP_NOT=1, UNOP_NEGATIVE, + UNOP_HEAP_ALLOCATE, + UNOP_STACK_REFERENCE, +} unop_e; + +typedef enum { + BINOP_UNKNOWN, + BINOP_POWER=100, BINOP_MULT, BINOP_DIVIDE, BINOP_MOD, BINOP_MOD1, BINOP_PLUS, + BINOP_MINUS, BINOP_CONCAT, BINOP_LSHIFT, BINOP_RSHIFT, BINOP_MIN, + BINOP_MAX, BINOP_EQ, BINOP_NE, BINOP_LT, BINOP_LE, BINOP_GT, BINOP_GE, + BINOP_AND, BINOP_OR, BINOP_XOR, +} binop_e; + +typedef enum { + TypeUnknown, + TypeVar, + TypePointer, + TypeStruct, + TypeTaggedUnion, + TypeArray, + TypeTable, + TypeFunction, +} type_ast_e; + +typedef struct tag_s { + const char *name; + struct type_ast_s *type; + int64_t value; + struct tag_s *next; +} tag_t; + +struct type_ast_s { + type_ast_e tag; + sss_file_t *file; + const char *start, *end; + union { + struct {} TypeUnknown; + struct { + var_t var; + } TypeVar; + struct { + type_ast_t *pointed; + bool is_optional:1, is_stack:1, is_readonly:1; + } TypePointer; + struct { + arg_list_t *fields; + } TypeStruct; + struct { + tag_t *tags; + } TypeTaggedUnion; + struct { + type_ast_t *item; + } TypeArray; + struct { + type_ast_t *key, *value; + } TypeTable; + struct { + arg_list_t *args; + type_ast_t *ret; + } TypeFunction; + } __data; +}; + +typedef enum { + Unknown = 0, + Nil, Bool, Var, + Int, Num, Char, + StringLiteral, StringJoin, Interp, + Declare, Assign, + BinaryOp, UnaryOp, UpdateAssign, + Min, Max, + Array, Table, TableEntry, + FunctionDef, Lambda, + FunctionCall, KeywordArg, + Block, + For, While, If, + Reduction, + Skip, Stop, Pass, + Return, + Extern, + TypeDef, + Index, FieldAccess, + DocTest, + Use, + LinkerDirective, +} ast_e; + +struct ast_s { + ast_e tag; + sss_file_t *file; + const char *start, *end; + union { + struct {} Unknown; + struct { + type_ast_t *type; + } Nil; + struct { + bool b; + } Bool; + struct { + var_t var; + } Var; + struct { + int64_t i; + enum { INT_64BIT, INT_32BIT, INT_16BIT, INT_8BIT } precision; + } Int; + struct { + double n; + enum { NUM_64BIT, NUM_32BIT } precision; + } Num; + struct { + char c; + } Char; + struct { + const char *str; + } StringLiteral; + struct { + ast_list_t *children; + } StringJoin; + struct { + ast_t *value; + bool labelled:1, colorize:1, quote_string:1; + } Interp; + struct { + ast_t *var; + ast_t *value; + } Declare; + struct { + ast_list_t *targets, *values; + } Assign; + struct { + ast_t *lhs; + binop_e op; + ast_t *rhs; + } BinaryOp, UpdateAssign; + struct { + unop_e op; + ast_t *value; + } UnaryOp; + struct { + ast_t *lhs, *rhs, *key; + } Min, Max; + struct { + type_ast_t *type; + ast_list_t *items; + } Array; + struct { + type_ast_t *key_type, *value_type; + ast_t *fallback, *default_value; + ast_list_t *entries; + } Table; + struct { + ast_t *key, *value; + } TableEntry; + struct { + ast_t *name; + arg_list_t *args; + type_ast_t *ret_type; + ast_t *body; + ast_t *cache; + bool is_inline; + } FunctionDef; + struct { + arg_list_t *args; + ast_t *body; + } Lambda; + struct { + ast_t *fn; + ast_list_t *args; + type_ast_t *extern_return_type; + } FunctionCall; + struct { + const char *name; + ast_t *arg; + } KeywordArg; + struct { + ast_list_t *statements; + } Block; + struct { + ast_t *index, *value, *iter, *body; + } For; + struct { + ast_t *condition, *body; + } While; + struct { + ast_t *condition, *body, *else_body; + } If; + struct { + ast_t *iter, *combination, *fallback; + } Reduction; + struct { + const char *target; + } Skip, Stop; + struct {} Pass; + struct { + ast_t *value; + } Return; + struct { + const char *name; + type_ast_t *type; + bool address; + } Extern; + struct { + var_t var; + type_ast_t *type; + ast_t *namespace; + } TypeDef; + struct { + ast_t *indexed, *index; + bool unchecked; + } Index; + struct { + ast_t *fielded; + const char *field; + } FieldAccess; + struct { + ast_t *expr; + const char *output; + bool skip_source:1; + } DocTest; + struct { + const char *path; + sss_file_t *file; + bool main_program; + } Use; + struct { + const char *directive; + } LinkerDirective; + } __data; +}; + +const char *ast_to_str(ast_t *ast); +const char *type_ast_to_str(type_ast_t *ast); +int printf_ast(FILE *stream, const struct printf_info *info, const void *const args[]); +ast_list_t *get_ast_children(ast_t *ast); + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/compile.c b/compile.c new file mode 100644 index 00000000..e4ae0a48 --- /dev/null +++ b/compile.c @@ -0,0 +1,185 @@ + +#include +#include +#include +#include + +#include "ast.h" +#include "util.h" + +static CORD compile_type(type_ast_t *t) +{ + switch (t->tag) { + case TypeVar: return Match(t, TypeVar)->var.name; + default: errx(1, "Not implemented"); + } +} + +CORD compile(ast_t *ast) +{ + switch (ast->tag) { + case Nil: return "NULL"; + case Bool: return Match(ast, Bool)->b ? "true" : "false"; + case Var: return Match(ast, Var)->var.name; + case Int: return CORD_asprintf("((Int%ld_t)%ld)", Match(ast, Int)->precision, Match(ast, Int)->i); + case Num: return CORD_asprintf(Match(ast, Num)->precision == 64 ? "%g" : "%gf", Match(ast, Num)->n); + case Char: return CORD_asprintf("'\\x%02X'", (int)Match(ast, Char)->c); + case UnaryOp: { + auto unop = Match(ast, UnaryOp); + CORD expr = compile(unop->value); + switch (unop->op) { + case UNOP_NOT: return CORD_cat("!", expr); + case UNOP_NEGATIVE: return CORD_cat("-", expr); + case UNOP_HEAP_ALLOCATE: return CORD_asprintf("__heap(%r)", expr); + case UNOP_STACK_REFERENCE: return CORD_asprintf("__stack(%r)", expr); + default: break; + } + errx(1, "Invalid unop"); + } + case BinaryOp: { + auto binop = Match(ast, BinaryOp); + CORD lhs = compile(binop->lhs); + CORD rhs = compile(binop->rhs); + switch (binop->op) { + case BINOP_MULT: return CORD_asprintf("(%r * %r)", lhs, rhs); + case BINOP_DIVIDE: return CORD_asprintf("(%r / %r)", lhs, rhs); + case BINOP_MOD: return CORD_asprintf("(%r %% %r)", lhs, rhs); + case BINOP_PLUS: return CORD_asprintf("(%r + %r)", lhs, rhs); + case BINOP_MINUS: return CORD_asprintf("(%r - %r)", lhs, rhs); + case BINOP_LSHIFT: return CORD_asprintf("(%r << %r)", lhs, rhs); + case BINOP_RSHIFT: return CORD_asprintf("(%r >> %r)", lhs, rhs); + case BINOP_EQ: return CORD_asprintf("(%r == %r)", lhs, rhs); + case BINOP_NE: return CORD_asprintf("(%r != %r)", lhs, rhs); + case BINOP_LT: return CORD_asprintf("(%r < %r)", lhs, rhs); + case BINOP_LE: return CORD_asprintf("(%r <= %r)", lhs, rhs); + case BINOP_GT: return CORD_asprintf("(%r > %r)", lhs, rhs); + case BINOP_GE: return CORD_asprintf("(%r >= %r)", lhs, rhs); + case BINOP_AND: return CORD_asprintf("(%r && %r)", lhs, rhs); + case BINOP_OR: return CORD_asprintf("(%r || %r)", lhs, rhs); + default: break; + } + errx(1, "unimplemented binop"); + } + case UpdateAssign: { + auto update = Match(ast, UpdateAssign); + CORD lhs = compile(update->lhs); + CORD rhs = compile(update->rhs); + switch (update->op) { + case BINOP_MULT: return CORD_asprintf("%r *= %r", lhs, rhs); + case BINOP_DIVIDE: return CORD_asprintf("%r /= %r", lhs, rhs); + case BINOP_MOD: return CORD_asprintf("%r = %r %% %r", lhs, lhs, rhs); + case BINOP_PLUS: return CORD_asprintf("%r += %r", lhs, rhs); + case BINOP_MINUS: return CORD_asprintf("%r -= %r", lhs, rhs); + case BINOP_LSHIFT: return CORD_asprintf("%r <<= %r", lhs, rhs); + case BINOP_RSHIFT: return CORD_asprintf("%r >>= %r", lhs, rhs); + case BINOP_EQ: return CORD_asprintf("%r = (%r == %r)", lhs, lhs, rhs); + case BINOP_NE: return CORD_asprintf("%r = (%r != %r)", lhs, lhs, rhs); + case BINOP_LT: return CORD_asprintf("%r = (%r < %r)", lhs, lhs, rhs); + case BINOP_LE: return CORD_asprintf("%r = (%r <= %r)", lhs, lhs, rhs); + case BINOP_GT: return CORD_asprintf("%r = (%r > %r)", lhs, lhs, rhs); + case BINOP_GE: return CORD_asprintf("%r = (%r >= %r)", lhs, lhs, rhs); + case BINOP_AND: return CORD_asprintf("%r = (%r && %r)", lhs, lhs, rhs); + case BINOP_OR: return CORD_asprintf("%r = (%r || %r)", lhs, lhs, rhs); + default: break; + } + errx(1, "unimplemented binop"); + } + case StringLiteral: { + const char *str = Match(ast, StringLiteral)->str; + CORD c = "\""; + for (; *str; ++str) { + switch (*str) { + case '\\': c = CORD_cat(c, "\\\\"); break; + case '"': c = CORD_cat(c, "\\\""); break; + case '\a': c = CORD_cat(c, "\\a"); break; + case '\b': c = CORD_cat(c, "\\b"); break; + case '\n': c = CORD_cat(c, "\\n"); break; + case '\r': c = CORD_cat(c, "\\r"); break; + case '\t': c = CORD_cat(c, "\\t"); break; + case '\v': c = CORD_cat(c, "\\v"); break; + default: { + if (isprint(*str)) + c = CORD_cat_char(c, *str); + else + CORD_sprintf(&c, "%r\\x%02X", *str); + break; + } + } + } + return CORD_cat_char(c, '"'); + } + case StringJoin: { + CORD c = NULL; + for (ast_list_t *chunk = Match(ast, StringJoin)->children; chunk; chunk = chunk->next) { + if (c) CORD_sprintf(&c, "CORD_cat(%r, %r)", c, compile(chunk->ast)); + else c = compile(chunk->ast); + } + return c; + } + case Interp: { + return CORD_asprintf("__cord(%r)", compile(Match(ast, Interp)->value)); + } + case Block: { + CORD c = NULL; + for (ast_list_t *stmt = Match(ast, Block)->statements; stmt; stmt = stmt->next) { + c = CORD_cat(c, compile(stmt->ast)); + c = CORD_cat(c, ";\n"); + } + return c; + } + case Declare: { + auto decl = Match(ast, Declare); + return CORD_asprintf("auto %r = %r", decl->var, decl->value); + } + case Assign: { + auto assign = Match(ast, Assign); + CORD c = NULL; + for (ast_list_t *target = assign->targets, *value = assign->values; target && value; target = target->next, value = value->next) { + CORD_sprintf(&c, "%r = %r", compile(target->ast), compile(value->ast)); + if (target->next) c = CORD_cat(c, ", "); + } + return c; + } + // Min, Max, + // Array, Table, TableEntry, + case FunctionDef: { + auto fndef = Match(ast, FunctionDef); + CORD c = CORD_asprintf("%r %r(", fndef->ret_type ? compile_type(fndef->ret_type) : "void", compile(fndef->name)); + for (arg_list_t *arg = fndef->args; arg; arg = arg->next) { + CORD_sprintf(&c, "%r%r %s", c, compile_type(arg->type), arg->var->name); + if (arg->next) c = CORD_cat(c, ", "); + } + c = CORD_cat(c, ") {\n"); + c = CORD_cat(c, compile(fndef->body)); + c = CORD_cat(c, "}"); + return c; + } + case FunctionCall: { + auto call = Match(ast, FunctionCall); + CORD c = CORD_cat_char(compile(call->fn), '('); + for (ast_list_t *arg = call->args; arg; arg = arg->next) { + c = CORD_cat(c, compile(arg->ast)); + if (arg->next) c = CORD_cat(c, ", "); + } + return CORD_cat_char(c, ')'); + } + // Lambda, + // FunctionCall, KeywordArg, + // Block, + // For, While, If, + // Reduction, + // Skip, Stop, Pass, + // Return, + // Extern, + // TypeDef, + // Index, FieldAccess, + // DocTest, + // Use, + // LinkerDirective, + case Unknown: errx(1, "Unknown AST"); + default: break; + } + return NULL; +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/compile.h b/compile.h new file mode 100644 index 00000000..790b33a4 --- /dev/null +++ b/compile.h @@ -0,0 +1,11 @@ +#pragma once + +#include +#include +#include + +#include "util.h" + +CORD compile(ast_t *ast); + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/files.c b/files.c new file mode 100644 index 00000000..51a8740c --- /dev/null +++ b/files.c @@ -0,0 +1,317 @@ +// +// files.c - Implementation of some file loading functionality. +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "files.h" +#include "util.h" + +static const int tabstop = 4; + +public char *resolve_path(const char *path, const char *relative_to) +{ + if (!relative_to || streq(relative_to, "/dev/stdin")) relative_to = "."; + if (!path || strlen(path) == 0) return NULL; + + // Resolve the path to an absolute path, assuming it's relative to the file + // it was found in: + char buf[PATH_MAX] = {0}; + if (streq(path, "~") || strncmp(path, "~/", 2) == 0) { + char *resolved = realpath(heap_strf("%s%s", getenv("HOME"), path+1), buf); + if (resolved) return heap_str(resolved); + } else if (streq(path, ".") || strncmp(path, "./", 2) == 0) { + char *relative_dir = dirname(heap_str(relative_to)); + char *resolved = realpath(heap_strf("%s/%s", relative_dir, path), buf); + if (resolved) return heap_str(resolved); + } else if (path[0] == '/') { + // Absolute path: + char *resolved = realpath(path, buf); + if (resolved) return heap_str(resolved); + } else { + // Relative path: + char *blpath = heap_str(getenv("SSSPATH")); + char *relative_dir = dirname(heap_str(relative_to)); + for (char *dir; (dir = strsep(&blpath, ":")); ) { + if (dir[0] == '/') { + char *resolved = realpath(heap_strf("%s/%s", dir, path), buf); + if (resolved) return heap_str(resolved); + } else if (dir[0] == '~' && (dir[1] == '\0' || dir[1] == '/')) { + char *resolved = realpath(heap_strf("%s%s/%s", getenv("HOME"), dir, path), buf); + if (resolved) return heap_str(resolved); + } else if (streq(dir, ".") || strncmp(dir, "./", 2) == 0) { + char *resolved = realpath(heap_strf("%s/%s", relative_dir, path), buf); + if (resolved) return heap_str(resolved); + } else if (streq(dir, ".") || streq(dir, "..") || strncmp(dir, "./", 2) == 0 || strncmp(dir, "../", 3) == 0) { + char *resolved = realpath(heap_strf("%s/%s/%s", relative_dir, dir, path), buf); + if (resolved) return heap_str(resolved); + } else { + char *resolved = realpath(heap_strf("%s/%s", dir, path), buf); + if (resolved) return heap_str(resolved); + } + } + } + return NULL; +} + +static sss_file_t *_load_file(const char* filename, FILE *file) +{ + if (!file) return NULL; + + sss_file_t *ret = new(sss_file_t, .filename=filename); + + size_t file_size = 0, line_cap = 0; + char *file_buf = NULL, *line_buf = NULL; + FILE *mem = open_memstream(&file_buf, &file_size); + int64_t line_len = 0; + while ((line_len = getline(&line_buf, &line_cap, file)) >= 0) { + sss_line_t line_info = {.offset=file_size, .indent=0, .is_empty=false}; + char *p; + for (p = line_buf; *p == ' ' || *p == '\t'; ++p) + line_info.indent += *p == ' ' ? 1 : 4; + line_info.is_empty = *p != '\r' && *p != '\n'; + if (ret->line_capacity <= ret->num_lines) { + ret->lines = GC_REALLOC(ret->lines, sizeof(sss_line_t)*(ret->line_capacity += 32)); + } + ret->lines[ret->num_lines++] = line_info; + fwrite(line_buf, sizeof(char), line_len, mem); + fflush(mem); + } + fclose(file); + + char *copy = GC_MALLOC_ATOMIC(file_size+1); + memcpy(copy, file_buf, file_size); + copy[file_size] = '\0'; + ret->text = copy; + fclose(mem); + + free(file_buf); + ret->relative_filename = filename; + if (filename && filename[0] != '<' && !streq(filename, "/dev/stdin")) { + filename = resolve_path(filename, "."); + // Convert to relative path (if applicable) + char buf[PATH_MAX]; + char *cwd = getcwd(buf, sizeof(buf)); + int64_t cwd_len = strlen(cwd); + if (strncmp(cwd, filename, cwd_len) == 0 && filename[cwd_len] == '/') + ret->relative_filename = &filename[cwd_len+1]; + } + return ret; +} + +// +// Read an entire file into memory. +// +public sss_file_t *sss_load_file(const char* filename) +{ + FILE *file = filename[0] ? fopen(filename, "r") : stdin; + return _load_file(filename, file); +} + +// +// Create a virtual file from a string. +// +public sss_file_t *sss_spoof_file(const char* filename, const char *text) +{ + FILE *file = fmemopen((char*)text, strlen(text)+1, "r"); + return _load_file(filename, file); +} + +// +// Given a pointer, determine which line number it points to (1-indexed) +// +public int64_t sss_get_line_number(sss_file_t *f, const char *p) +{ + // Binary search: + int64_t lo = 0, hi = (int64_t)f->num_lines-1; + if (p < f->text) return 0; + int64_t offset = (int64_t)(p - f->text); + while (lo <= hi) { + int64_t mid = (lo + hi) / 2; + sss_line_t *line = &f->lines[mid]; + if (line->offset == offset) + return mid + 1; + else if (line->offset < offset) + lo = mid + 1; + else if (line->offset > offset) + hi = mid - 1; + } + return lo; // Return the line number whose line starts closest before p +} + +// +// Given a pointer, determine which line column it points to. +// +public int64_t sss_get_line_column(sss_file_t *f, const char *p) +{ + int64_t line_no = sss_get_line_number(f, p); + sss_line_t *line = &f->lines[line_no-1]; + return 1 + (int64_t)(p - (f->text + line->offset)); +} + +// +// Given a pointer, get the indentation of the line it's on. +// +public int64_t sss_get_indent(sss_file_t *f, const char *p) +{ + int64_t line_no = sss_get_line_number(f, p); + sss_line_t *line = &f->lines[line_no-1]; + return line->indent; +} + +// +// Return a pointer to the line with the specified line number (1-indexed) +// +public const char *sss_get_line(sss_file_t *f, int64_t line_number) +{ + if (line_number == 0 || line_number > (int64_t)f->num_lines) return NULL; + sss_line_t *line = &f->lines[line_number-1]; + return f->text + line->offset; +} + +// +// Return a value like /foo:line:col +// +public const char *sss_get_file_pos(sss_file_t *f, const char *p) +{ + return heap_strf("%s:%ld:%ld", f->filename, sss_get_line_number(f, p), sss_get_line_column(f, p)); +} + +static int fputc_column(FILE *out, char c, char print_char, int *column) +{ + int printed = 0; + if (print_char == '\t') print_char = ' '; + if (c == '\t') { + for (int to_fill = tabstop - (*column % tabstop); to_fill > 0; --to_fill) { + printed += fputc(print_char, out); + ++*column; + } + } else { + printed += fputc(print_char, out); + ++*column; + } + return printed; +} + +// +// Print a span from a file +// +public int fprint_span(FILE *out, sss_file_t *file, const char *start, const char *end, const char *hl_color, int64_t context_lines, bool use_color) +{ + if (!file) return 0; + + // Handle spans that come from multiple files: + if (start < file->text || start > file->text + file->len) + start = end; + if (end < file->text || end > file->text + file->len) + end = start; + // Just in case neither end of the span came from this file: + if (end < file->text || end > file->text + file->len) + start = end = file->text; + + const char *lineno_fmt, *normal_color, *empty_marker; + bool print_carets = false; + int printed = 0; + if (use_color) { + lineno_fmt = "\x1b[0;2m%*lu\x1b(0\x78\x1b(B\x1b[m "; + normal_color = "\x1b[m"; + empty_marker = "\x1b(0\x61\x1b(B"; + printed += fprintf(out, "\x1b[33;4;1m%s\x1b[m\n", file->relative_filename); + } else { + lineno_fmt = "%*lu| "; + hl_color = ""; + normal_color = ""; + empty_marker = " "; + print_carets = true; + printed += fprintf(out, "%s\n", file->relative_filename); + } + + if (context_lines == 0) + return fprintf(out, "%s%.*s%s", hl_color, (int)(end - start), start, normal_color); + + int64_t start_line = sss_get_line_number(file, start), + end_line = sss_get_line_number(file, end); + + int64_t first_line = start_line - (context_lines - 1), + last_line = end_line + (context_lines - 1); + + if (first_line < 1) first_line = 1; + if (last_line > file->num_lines) last_line = file->num_lines; + + int digits = 1; + for (int64_t i = last_line; i > 0; i /= 10) ++digits; + + for (int64_t line_no = first_line; line_no <= last_line; ++line_no) { + if (line_no > first_line + 5 && line_no < last_line - 5) { + if (use_color) + printed += fprintf(out, "\x1b[0;2;3;4m ... %ld lines omitted ... \x1b[m\n", (last_line - first_line) - 11); + else + printed += fprintf(out, " ... %ld lines omitted ...\n", (last_line - first_line) - 11); + line_no = last_line - 6; + continue; + } + + printed += fprintf(out, lineno_fmt, digits, line_no); + const char *line = sss_get_line(file, line_no); + if (!line) break; + + int column = 0; + const char *p = line; + // Before match + for (; *p && *p != '\r' && *p != '\n' && p < start; ++p) + printed += fputc_column(out, *p, *p, &column); + + // Zero-width matches + if (p == start && start == end) { + printed += fprintf(out, "%s%s%s", hl_color, empty_marker, normal_color); + column += 1; + } + + // Inside match + if (start <= p && p < end) { + printed += fputs(hl_color, out); + for (; *p && *p != '\r' && *p != '\n' && p < end; ++p) + printed += fputc_column(out, *p, *p, &column); + printed += fputs(normal_color, out); + } + + // After match + for (; *p && *p != '\r' && *p != '\n'; ++p) + printed += fputc_column(out, *p, *p, &column); + + printed += fprintf(out, "\n"); + + const char *eol = strchrnul(line, '\n'); + if (print_carets && start >= line && start < eol && line <= start) { + for (int num = 0; num < digits; num++) + printed += fputc(' ', out); + printed += fputs(": ", out); + int column = 0; + for (const char *sp = line; *sp && *sp != '\n'; ++sp) { + char print_char; + if (sp < start) + print_char = ' '; + else if (sp == start && sp == end) + print_char = '^'; + else if (sp >= start && sp < end) + print_char = '-'; + else + print_char = ' '; + printed += fputc_column(out, *sp, print_char, &column); + } + printed += fputs("\n", out); + } + } + fflush(out); + return printed; +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/files.h b/files.h new file mode 100644 index 00000000..0ff91568 --- /dev/null +++ b/files.h @@ -0,0 +1,43 @@ +// +// files.h - Definitions of an API for loading files. +// +#pragma once + +#include +#include +#include +#include +#include + +typedef struct { + int64_t offset; + int64_t indent:63; + bool is_empty:1; +} sss_line_t; + +typedef struct { + const char *filename, *relative_filename; + const char *text; + int64_t len; + int64_t num_lines, line_capacity; + sss_line_t *lines; +} sss_file_t; + +char *resolve_path(const char *path, const char *relative_to); +__attribute__((nonnull)) +sss_file_t *sss_load_file(const char *filename); +__attribute__((nonnull, returns_nonnull)) +sss_file_t *sss_spoof_file(const char *filename, const char *text); +__attribute__((pure, nonnull)) +int64_t sss_get_line_number(sss_file_t *f, const char *p); +__attribute__((pure, nonnull)) +int64_t sss_get_line_column(sss_file_t *f, const char *p); +__attribute__((pure, nonnull)) +int64_t sss_get_indent(sss_file_t *f, const char *p); +__attribute__((pure, nonnull)) +const char *sss_get_line(sss_file_t *f, int64_t line_number); +__attribute__((pure, nonnull)) +const char *sss_get_file_pos(sss_file_t *f, const char *p); +int fprint_span(FILE *out, sss_file_t *file, const char *start, const char *end, const char *hl_color, int64_t context_lines, bool use_color); + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/foo.c b/foo.c new file mode 100644 index 00000000..0d78c720 --- /dev/null +++ b/foo.c @@ -0,0 +1,12 @@ +#include + +int main(void) { + int x = 23; + const char *s = "Hi"; +#define say(x) _Generic(x, int: printf("%d\n", x), char *: puts(s), default: puts("???")) + say(x); + say(s); +#define all(...) { __VA_ARGS__; } + all(say("one"); say(2)) + return 0; +} diff --git a/nextlang.c b/nextlang.c new file mode 100644 index 00000000..617adc98 --- /dev/null +++ b/nextlang.c @@ -0,0 +1,23 @@ +#include +#include +#include +#include + +#include "ast.h" +#include "parse.h" +#include "compile.h" + +int main(int argc, char *argv[]) +{ + if (argc < 2) return 1; + + sss_file_t *f = sss_load_file(argv[1]); + ast_t *ast = parse_file(f, NULL); + const char *s = ast_to_str(ast); + puts(s); + CORD c = compile(ast); + CORD_put(c, stdout); + return 0; +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/parse.c b/parse.c new file mode 100644 index 00000000..845b3ca0 --- /dev/null +++ b/parse.c @@ -0,0 +1,1759 @@ +// Parse SSS code using recursive descent +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ast.h" +#include "util.h" + +typedef struct { + sss_file_t *file; + jmp_buf *on_err; +} parse_ctx_t; + +typedef ast_t* (parser_t)(parse_ctx_t*,const char*); + +extern void builtin_fail(const char *fmt, ...); + +#define PARSER(name) ast_t *name(parse_ctx_t *ctx, const char *pos) + +#define STUB_PARSER(name) PARSER(name) { (void)ctx; (void)pos; return NULL; } + +int op_tightness[] = { + [BINOP_POWER]=1, + [BINOP_MULT]=2, [BINOP_DIVIDE]=2, [BINOP_MOD]=2, [BINOP_MOD1]=2, + [BINOP_PLUS]=3, [BINOP_MINUS]=3, + [BINOP_CONCAT]=4, + [BINOP_LSHIFT]=5, [BINOP_RSHIFT]=5, + [BINOP_MIN]=6, [BINOP_MAX]=6, + [BINOP_EQ]=7, [BINOP_NE]=7, + [BINOP_LT]=8, [BINOP_LE]=8, [BINOP_GT]=8, [BINOP_GE]=8, + [BINOP_AND]=9, [BINOP_OR]=9, [BINOP_XOR]=9, +}; + +static const char *keywords[] = { + "yes", "xor", "while", "use", "then", "struct", "stop", "skip", "return", + "or", "not", "no", "mod1", "mod", "in", "if", "func", "for", "extern", + "enum", "else", "do", "and", "_mix_", "_min_", "_max_", + NULL, +}; + +enum {NORMAL_FUNCTION=0, EXTERN_FUNCTION=1}; + +static inline size_t some_of(const char **pos, const char *allow); +static inline size_t some_not(const char **pos, const char *forbid); +static inline size_t spaces(const char **pos); +static inline size_t whitespace(const char **pos); +static inline size_t match(const char **pos, const char *target); +static inline void expect_str(parse_ctx_t *ctx, const char *start, const char **pos, const char *target, const char *fmt, ...); +static inline void expect_closing(parse_ctx_t *ctx, const char **pos, const char *target, const char *fmt, ...); +static inline size_t match_word(const char **pos, const char *word); +static inline const char* get_word(const char **pos); +static inline const char* get_id(const char **pos); +static inline bool comment(const char **pos); +static inline bool indent(parse_ctx_t *ctx, const char **pos); +static inline binop_e match_binary_operator(const char **pos); +static ast_t *parse_fncall_suffix(parse_ctx_t *ctx, ast_t *fn, bool is_extern); +static ast_t *parse_field_suffix(parse_ctx_t *ctx, ast_t *lhs); +static ast_t *parse_index_suffix(parse_ctx_t *ctx, ast_t *lhs); +static arg_list_t *parse_args(parse_ctx_t *ctx, const char **pos, bool allow_unnamed); +static PARSER(parse_for); +static PARSER(parse_while); +static PARSER(parse_if); +static PARSER(parse_expr); +static PARSER(parse_extended_expr); +static PARSER(parse_term_no_suffix); +static PARSER(parse_term); +static PARSER(parse_inline_block); +static PARSER(parse_statement); +static PARSER(parse_block); +static PARSER(parse_opt_indented_block); +static PARSER(parse_var); +static PARSER(parse_type_def); +static PARSER(parse_func_def); +static PARSER(parse_extern); +static PARSER(parse_declaration); +static PARSER(parse_doctest); +static PARSER(parse_use); +static PARSER(parse_linker); +static PARSER(parse_namespace); + +static type_ast_t *parse_type(parse_ctx_t *ctx, const char *pos); +static type_ast_t *parse_enum_type(parse_ctx_t *ctx, const char *pos); + +// +// Print a parse error and exit (or use the on_err longjmp) +// +__attribute__((noreturn)) +static void vparser_err(parse_ctx_t *ctx, const char *start, const char *end, const char *fmt, va_list args) { + if (isatty(STDERR_FILENO) && !getenv("NO_COLOR")) + fputs("\x1b[31;1;7m", stderr); + fprintf(stderr, "%s:%ld.%ld: ", ctx->file->relative_filename, sss_get_line_number(ctx->file, start), + sss_get_line_column(ctx->file, start)); + vfprintf(stderr, fmt, args); + if (isatty(STDERR_FILENO) && !getenv("NO_COLOR")) + fputs(" \x1b[m", stderr); + fputs("\n\n", stderr); + + fprint_span(stderr, ctx->file, start, end, "\x1b[31;1;7m", 2, isatty(STDERR_FILENO) && !getenv("NO_COLOR")); + fputs("\n", stderr); + + if (ctx->on_err) + longjmp(*ctx->on_err, 1); + raise(SIGABRT); + exit(1); +} + +// +// Wrapper for vparser_err +// +__attribute__((noreturn)) +static void parser_err(parse_ctx_t *ctx, const char *start, const char *end, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + vparser_err(ctx, start, end, fmt, args); + va_end(args); +} + +// +// Convert an escape sequence like \n to a string +// +const char *unescape(const char **out) { + const char **endpos = out; + const char *escape = *out; + static const char *unescapes[256] = {['a']="\a",['b']="\b",['e']="\e",['f']="\f",['n']="\n",['r']="\r",['t']="\t",['v']="\v",['_']=" "}; + assert(*escape == '\\'); + if (unescapes[(int)escape[1]]) { + *endpos = escape + 2; + return heap_str(unescapes[(int)escape[1]]); + } else if (escape[1] == 'x' && escape[2] && escape[3]) { + char *endptr = (char*)&escape[3+1]; + char c = (char)strtol(escape+2, &endptr, 16); + *endpos = escape + 4; + return heap_strn(&c, 1); + } else if ('0' <= escape[1] && escape[1] <= '7' && '0' <= escape[2] && escape[2] <= '7' && '0' <= escape[3] && escape[3] <= '7') { + char *endptr = (char*)&escape[4]; + char c = (char)strtol(escape+1, &endptr, 8); + *endpos = escape + 4; + return heap_strn(&c, 1); + } else { + *endpos = escape + 2; + return heap_strn(escape+1, 1); + } +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////// Text-based parsing primitives /////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////// +size_t some_of(const char **pos, const char *allow) { + size_t len = strspn(*pos, allow); + *pos += len; + return len; +} + +size_t some_not(const char **pos, const char *forbid) { + size_t len = strcspn(*pos, forbid); + *pos += len; + return len; +} + +size_t spaces(const char **pos) { return some_of(pos, " \t"); } +size_t whitespace(const char **pos) { + const char *p0 = *pos; + while (some_of(pos, " \t\r\n") || comment(pos)) + continue; + return (size_t)(*pos - p0); +} + +size_t match(const char **pos, const char *target) { + size_t len = strlen(target); + if (strncmp(*pos, target, len) != 0) + return 0; + *pos += len; + return len; +} + +static inline bool is_xid_continue_next(const char *pos) { + ucs4_t point = 0; + u8_next(&point, (const uint8_t*)pos); + return uc_is_property_xid_continue(point); +} + +// +// Expect a string (potentially after whitespace) and emit a parser error if it's not there +// +static void expect_str( + parse_ctx_t *ctx, const char *start, const char **pos, const char *target, const char *fmt, ...) { + spaces(pos); + if (match(pos, target)) { + char lastchar = target[strlen(target)-1]; + if (!(isalpha(lastchar) || isdigit(lastchar) || lastchar == '_')) + return; + if (!is_xid_continue_next(*pos)) + return; + } + + if (isatty(STDERR_FILENO) && !getenv("NO_COLOR")) + fputs("\x1b[31;1;7m", stderr); + va_list args; + va_start(args, fmt); + vparser_err(ctx, start, *pos, fmt, args); + va_end(args); +} + +// +// Helper for matching closing parens with good error messages +// +static void expect_closing( + parse_ctx_t *ctx, const char **pos, const char *closing, const char *fmt, ...) { + const char *start = *pos; + spaces(pos); + if (match(pos, closing)) + return; + + const char *eol = strchr(*pos, '\n'); + const char *next = strstr(*pos, closing); + + const char *end = eol < next ? eol : next; + + if (isatty(STDERR_FILENO) && !getenv("NO_COLOR")) + fputs("\x1b[31;1;7m", stderr); + va_list args; + va_start(args, fmt); + vparser_err(ctx, start, end, fmt, args); + va_end(args); +} + +#define expect(ctx, start, pos, parser, ...) ({ \ + const char **_pos = pos; \ + spaces(_pos); \ + auto _result = parser(ctx, *_pos); \ + if (!_result) { \ + if (isatty(STDERR_FILENO) && !getenv("NO_COLOR")) \ + fputs("\x1b[31;1;7m", stderr); \ + parser_err(ctx, start, *_pos, __VA_ARGS__); \ + } \ + *_pos = _result->end; \ + _result; }) + +#define optional(ctx, pos, parser) ({ \ + const char **_pos = pos; \ + spaces(_pos); \ + auto _result = parser(ctx, *_pos); \ + if (_result) *_pos = _result->end; \ + _result; }) + +size_t match_word(const char **out, const char *word) { + const char *pos = *out; + spaces(&pos); + if (!match(&pos, word) || is_xid_continue_next(pos)) + return 0; + + *out = pos; + return strlen(word); +} + +bool match_group(const char **out, char open) { + static char mirror_delim[256] = {['(']=')', ['{']='}', ['<']='>', ['[']=']'}; + const char *pos = *out; + if (*pos != open) return 0; + char close = mirror_delim[(int)open] ? mirror_delim[(int)open] : open; + int depth = 1; + for (++pos; *pos && depth > 0; ++pos) { + if (*pos == close) --depth; + else if (*pos == open) ++depth; + } + if (depth == 0) { + *out = pos; + return true; + } else return false; +} + +const char *get_word(const char **inout) { + const char *word = *inout; + spaces(&word); + const uint8_t *pos = (const uint8_t*)word; + ucs4_t point; + pos = u8_next(&point, pos); + if (!uc_is_property_xid_start(point) && point != '_') + return NULL; + + for (const uint8_t *next; (next = u8_next(&point, pos)); pos = next) { + if (!uc_is_property_xid_continue(point)) + break; + } + *inout = (const char*)pos; + return heap_strn(word, (size_t)((const char*)pos - word)); +} + +const char *get_id(const char **inout) { + const char *pos = *inout; + const char *word = get_word(&pos); + if (!word) return word; + for (int i = 0; keywords[i]; i++) + if (strcmp(word, keywords[i]) == 0) + return NULL; + *inout = pos; + return word; +} + +bool comment(const char **pos) { + if (!match(pos, "//")) + return false; + some_not(pos, "\r\n"); + return true; +} + +bool indent(parse_ctx_t *ctx, const char **out) { + const char *pos = *out; + int64_t starting_indent = sss_get_indent(ctx->file, pos); + whitespace(&pos); + if (sss_get_line_number(ctx->file, pos) == sss_get_line_number(ctx->file, *out)) + return false; + + if (sss_get_indent(ctx->file, pos) > starting_indent) { + *out = pos; + return true; + } + + return false; +} + +bool match_indentation(const char **out, int64_t target) { + const char *pos = *out; + for (int64_t indentation = 0; indentation < target; ) { + switch (*pos) { + case ' ': indentation += 1; ++pos; break; + case '\t': indentation += 4; ++pos; break; + default: return false; + } + } + *out = pos; + return true; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////// AST-based parsers ///////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////// + +PARSER(parse_parens) { + const char *start = pos; + spaces(&pos); + if (!match(&pos, "(")) return NULL; + whitespace(&pos); + ast_t *expr = optional(ctx, &pos, parse_extended_expr); + if (!expr) return NULL; + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this expression"); + + // Update the span to include the parens: + return new(ast_t, .file=(ctx)->file, .start=start, .end=pos, + .tag=expr->tag, .__data=expr->__data); +} + +PARSER(parse_int) { + const char *start = pos; + bool negative = match(&pos, "-"); + if (!isdigit(*pos)) return false; + int64_t i = 0; + if (match(&pos, "0x")) { // Hex + size_t span = strspn(pos, "0123456789abcdefABCDEF_"); + char *buf = GC_MALLOC_ATOMIC(span+1); + memset(buf, 0, span+1); + for (char *src = (char*)pos, *dest = buf; src < pos+span; ++src) { + if (*src != '_') *(dest++) = *src; + } + i = strtol(buf, NULL, 16); + pos += span; + } else if (match(&pos, "0b")) { // Binary + size_t span = strspn(pos, "01_"); + char *buf = GC_MALLOC_ATOMIC(span+1); + memset(buf, 0, span+1); + for (char *src = (char*)pos, *dest = buf; src < pos+span; ++src) { + if (*src != '_') *(dest++) = *src; + } + i = strtol(buf, NULL, 2); + pos += span; + } else if (match(&pos, "0o")) { // Octal + size_t span = strspn(pos, "01234567_"); + char *buf = GC_MALLOC_ATOMIC(span+1); + memset(buf, 0, span+1); + for (char *src = (char*)pos, *dest = buf; src < pos+span; ++src) { + if (*src != '_') *(dest++) = *src; + } + i = strtol(buf, NULL, 8); + pos += span; + } else { // Decimal + size_t span = strspn(pos, "0123456789_"); + char *buf = GC_MALLOC_ATOMIC(span+1); + memset(buf, 0, span+1); + for (char *src = (char*)pos, *dest = buf; src < pos+span; ++src) { + if (*src != '_') *(dest++) = *src; + } + i = strtol(buf, NULL, 10); + pos += span; + } + + if (match(&pos, "e") || match(&pos, "f")) // floating point literal + return NULL; + + if (negative) i *= -1; + + if (match(&pos, "%")) { + double d = (double)i / 100.; + return NewAST(ctx->file, start, pos, Num, .n=d, .precision=64); + } + + match(&pos, "_"); + int64_t precision = 64; + if (match(&pos, "i64")) precision = 64; + else if (match(&pos, "i32")) precision = 32; + else if (match(&pos, "i16")) precision = 16; + else if (match(&pos, "i8")) precision = 8; + + // else if (match(&pos, ".") || match(&pos, "e")) return NULL; // looks like a float + + return NewAST(ctx->file, start, pos, Int, .i=i, .precision=precision); +} + +type_ast_t *parse_table_type(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + if (!match(&pos, "{")) return NULL; + whitespace(&pos); + type_ast_t *key_type = parse_type(ctx, pos); + if (!key_type) return NULL; + pos = key_type->end; + whitespace(&pos); + if (!match(&pos, "=>")) return NULL; + type_ast_t *value_type = expect(ctx, start, &pos, parse_type, "I couldn't parse the rest of this table type"); + whitespace(&pos); + expect_closing(ctx, &pos, "}", "I wasn't able to parse the rest of this table type"); + return NewTypeAST(ctx->file, start, pos, TypeTable, .key=key_type, .value=value_type); +} + +type_ast_t *parse_struct_type(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + if (!match(&pos, "struct")) return NULL; + spaces(&pos); + if (!match(&pos, "(")) return NULL; + arg_list_t *args = parse_args(ctx, &pos, false); + whitespace(&pos); + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this struct type"); + return NewTypeAST(ctx->file, start, pos, TypeStruct, .fields=args); +} + +type_ast_t *parse_func_type(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + if (!match_word(&pos, "func")) return NULL; + spaces(&pos); + if (!match(&pos, "(")) return NULL; + arg_list_t *args = parse_args(ctx, &pos, true); + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this function type"); + spaces(&pos); + if (!match(&pos, "->")) return NULL; + type_ast_t *ret = optional(ctx, &pos, parse_type); + return NewTypeAST(ctx->file, start, pos, TypeFunction, .args=args, .ret=ret); +} + +type_ast_t *parse_array_type(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + if (!match(&pos, "[")) return NULL; + type_ast_t *type = expect(ctx, start, &pos, parse_type, + "I couldn't parse an array item type after this point"); + expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this array type"); + return NewTypeAST(ctx->file, start, pos, TypeArray, .item=type); +} + +type_ast_t *parse_pointer_type(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + bool optional = false, is_stack = false; + if (match(&pos, "@")) + optional = false; + else if (match(&pos, "?")) + optional = true; + else if (match(&pos, "&")) + is_stack = true; + else + return NULL; + + spaces(&pos); + bool is_readonly = match(&pos, "(readonly)"); + spaces(&pos); + type_ast_t *type = expect(ctx, start, &pos, parse_type, + "I couldn't parse a pointer type after this point"); + return NewTypeAST(ctx->file, start, pos, TypePointer, .pointed=type, .is_optional=optional, .is_stack=is_stack, .is_readonly=is_readonly); +} + +type_ast_t *parse_type_name(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + const char *id = get_id(&pos); + if (!id) return NULL; + for (;;) { + const char *next = pos; + spaces(&next); + if (!match(&next, ".")) break; + const char *next_id = get_id(&next); + if (!next_id) break; + id = heap_strf("%s.%s", id, next_id); + pos = next; + } + return NewTypeAST(ctx->file, start, pos, TypeVar, .var.name=id); +} + +type_ast_t *parse_type(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + type_ast_t *type = NULL; + bool success = (false + || (type=parse_enum_type(ctx, pos)) + || (type=parse_pointer_type(ctx, pos)) + || (type=parse_array_type(ctx, pos)) + || (type=parse_table_type(ctx, pos)) + || (type=parse_struct_type(ctx, pos)) + || (type=parse_type_name(ctx, pos)) + || (type=parse_func_type(ctx, pos)) + ); + if (!success && match(&pos, "(")) { + whitespace(&pos); + type = optional(ctx, &pos, parse_type); + if (!type) return NULL; + whitespace(&pos); + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this type"); + type->start = start; + type->end = pos; + } + + if (!type) return NULL; + + pos = type->end; + return type; +} + +PARSER(parse_num) { + const char *start = pos; + bool negative = match(&pos, "-"); + if (!isdigit(*pos) && *pos != '.') return NULL; + + size_t len = strspn(pos, "0123456789_"); + if (strncmp(pos+len, "..", 2) == 0) + return NULL; + else if (pos[len] == '.') + len += 1 + strspn(pos + len + 1, "0123456789"); + else if (pos[len] != 'e' && pos[len] != 'f' && pos[len] != '%') + return NULL; + if (pos[len] == 'e') + len += 1 + strspn(pos + len + 1, "-0123456789_"); + char *buf = GC_MALLOC_ATOMIC(len+1); + memset(buf, 0, len+1); + for (char *src = (char*)pos, *dest = buf; src < pos+len; ++src) { + if (*src != '_') *(dest++) = *src; + } + double d = strtod(buf, NULL); + pos += len; + + if (negative) d *= -1; + + int64_t precision = 64; + match(&pos, "_"); + if (match(&pos, "f64")) precision = 64; + else if (match(&pos, "f32")) precision = 32; + + if (match(&pos, "%")) { + d /= 100.; + } + + return NewAST(ctx->file, start, pos, Num, .n=d, .precision=precision); +} + +PARSER(parse_array) { + const char *start = pos; + if (!match(&pos, "[")) return NULL; + + whitespace(&pos); + + ast_list_t *items = NULL; + type_ast_t *item_type = NULL; + if (match(&pos, ":")) { + whitespace(&pos); + item_type = expect(ctx, pos-1, &pos, parse_type, "I couldn't parse a type for this array"); + } + + for (;;) { + whitespace(&pos); + ast_t *item = optional(ctx, &pos, parse_extended_expr); + if (!item) break; + items = new(ast_list_t, .ast=item, .next=items); + whitespace(&pos); + if (!match(&pos, ",")) break; + } + whitespace(&pos); + expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this array"); + + if (!item_type && !items) + parser_err(ctx, start, pos, "Empty arrays must specify what type they would contain (e.g. [:Int])"); + + REVERSE_LIST(items); + return NewAST(ctx->file, start, pos, Array, .type=item_type, .items=items); +} + +PARSER(parse_table) { + const char *start = pos; + if (!match(&pos, "{")) return NULL; + + whitespace(&pos); + + ast_list_t *entries = NULL; + type_ast_t *key_type = NULL, *value_type = NULL; + if (match(&pos, ":")) { + whitespace(&pos); + key_type = expect(ctx, pos-1, &pos, parse_type, "I couldn't parse a key type for this table"); + whitespace(&pos); + if (!match(&pos, "=>")) + parser_err(ctx, pos, pos, "I expected an '=>' for this table type"); + value_type = expect(ctx, pos-1, &pos, parse_type, "I couldn't parse a value type for this table"); + } + + for (;;) { + whitespace(&pos); + const char *entry_start = pos; + ast_t *key = optional(ctx, &pos, parse_extended_expr); + if (!key) break; + whitespace(&pos); + if (!match(&pos, "=>")) return NULL; + ast_t *value = expect(ctx, pos-1, &pos, parse_expr, "I couldn't parse the value for this table entry"); + + ast_t *entry = NewAST(ctx->file, entry_start, pos, TableEntry, .key=key, .value=value); + for (bool progress = true; progress; ) { + ast_t *new_entry; + progress = (false + || (new_entry=parse_index_suffix(ctx, entry)) + || (new_entry=parse_field_suffix(ctx, entry)) + || (new_entry=parse_fncall_suffix(ctx, entry, NORMAL_FUNCTION)) + ); + if (progress) entry = new_entry; + } + pos = entry->end; + + entries = new(ast_list_t, .ast=entry, .next=entries); + whitespace(&pos); + if (!match(&pos, ",")) break; + } + + REVERSE_LIST(entries); + + if (!key_type && !value_type && !entries) + return NULL; + + whitespace(&pos); + + ast_t *fallback = NULL, *default_val = NULL; + if (match(&pos, ";")) { + for (;;) { + whitespace(&pos); + const char *attr_start = pos; + if (match(&pos, "fallback")) { + whitespace(&pos); + if (!match(&pos, "=")) parser_err(ctx, attr_start, pos, "I expected an '=' after 'fallback'"); + if (fallback) + parser_err(ctx, attr_start, pos, "This table already has a fallback"); + fallback = expect(ctx, attr_start, &pos, parse_expr, "I expected a fallback table"); + } else if (match(&pos, "default")) { + whitespace(&pos); + if (!match(&pos, "=")) parser_err(ctx, attr_start, pos, "I expected an '=' after 'default'"); + if (default_val) + parser_err(ctx, attr_start, pos, "This table already has a default value"); + default_val = expect(ctx, attr_start, &pos, parse_expr, "I expected a default value for this table"); + } else { + break; + } + whitespace(&pos); + if (!match(&pos, ";")) break; + } + } + + whitespace(&pos); + expect_closing(ctx, &pos, "}", "I wasn't able to parse the rest of this table"); + + return NewAST(ctx->file, start, pos, Table, .key_type=key_type, .value_type=value_type, .entries=entries, .fallback=fallback, .default_value=default_val); +} + +ast_t *parse_field_suffix(parse_ctx_t *ctx, ast_t *lhs) { + if (!lhs) return NULL; + const char *pos = lhs->end; + whitespace(&pos); + if (!match(&pos, ".")) return NULL; + if (*pos == '.') return NULL; + whitespace(&pos); + bool dollar = match(&pos, "$"); + const char* field = get_id(&pos); + if (!field) return NULL; + if (dollar) field = heap_strf("$%s", field); + return NewAST(ctx->file, lhs->start, pos, FieldAccess, .fielded=lhs, .field=field); +} + +PARSER(parse_reduction) { + const char *start = pos; + if (!match(&pos, "(")) return NULL; + + spaces(&pos); + const char *combo_start = pos; + binop_e op = match_binary_operator(&pos); + if (op == BINOP_UNKNOWN) return NULL; + + ast_t *combination; + ast_t *lhs = NewAST(ctx->file, pos, pos, Var, .var.name="lhs.0"); + ast_t *rhs = NewAST(ctx->file, pos, pos, Var, .var.name="rhs.0"); + if (op == BINOP_MIN || op == BINOP_MAX) { + for (bool progress = true; progress; ) { + ast_t *new_term; + progress = (false + || (new_term=parse_index_suffix(ctx, rhs)) + || (new_term=parse_field_suffix(ctx, rhs)) + || (new_term=parse_fncall_suffix(ctx, rhs, NORMAL_FUNCTION)) + ); + if (progress) rhs = new_term; + } + if (rhs->tag == Var) rhs = NULL; + else pos = rhs->end; + combination = op == BINOP_MIN ? + NewAST(ctx->file, combo_start, pos, Min, .lhs=lhs, .rhs=lhs, .key=rhs) + : NewAST(ctx->file, combo_start, pos, Max, .lhs=lhs, .rhs=lhs, .key=rhs); + } else { + combination = NewAST(ctx->file, combo_start, pos, BinaryOp, .op=op, .lhs=lhs, .rhs=rhs); + } + + spaces(&pos); + if (!match(&pos, ")")) return NULL; + + ast_t *iter = optional(ctx, &pos, parse_extended_expr); + if (!iter) return NULL; + + ast_t *fallback = NULL; + if (match_word(&pos, "else")) + fallback = expect(ctx, pos-4, &pos, parse_expr, "I couldn't parse the expression after this 'else'"); + + return NewAST(ctx->file, start, pos, Reduction, .iter=iter, .combination=combination, .fallback=fallback); +} + +ast_t *parse_index_suffix(parse_ctx_t *ctx, ast_t *lhs) { + if (!lhs) return NULL; + const char *start = lhs->start; + const char *pos = lhs->end; + if (!match(&pos, "[")) return NULL; + whitespace(&pos); + ast_t *index = NULL; + if (match(&pos, ".")) { + // array[.field] + const char *field_start = pos-1; + const char *field = get_id(&pos); + if (field) + index = NewAST(ctx->file, field_start, pos, FieldAccess, .field=field); + else + --pos; + } + + if (!index) { + // obj[expr] + index = optional(ctx, &pos, parse_extended_expr); + } + whitespace(&pos); + bool unchecked = match(&pos, ";") && (spaces(&pos), match_word(&pos, "unchecked") != 0); + expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this index"); + return NewAST(ctx->file, start, pos, Index, .indexed=lhs, .index=index, .unchecked=unchecked); +} + +PARSER(parse_if) { + // if [then] [else ] + const char *start = pos; + int64_t starting_indent = sss_get_indent(ctx->file, pos); + + if (!match_word(&pos, "if")) + return NULL; + + ast_t *condition = optional(ctx, &pos, parse_declaration); + if (!condition) condition = expect(ctx, start, &pos, parse_expr, + "I expected to find an expression for this 'if'"); + + match_word(&pos, "then"); // optional + + ast_t *body = expect(ctx, start, &pos, parse_opt_indented_block, "I expected a body for this 'if' statement"); + + const char *tmp = pos; + whitespace(&tmp); + ast_t *else_body = NULL; + if (sss_get_indent(ctx->file, tmp) == starting_indent && match_word(&tmp, "else")) { + pos = tmp; + else_body = expect(ctx, start, &pos, parse_opt_indented_block, "I expected a body for this 'else'"); + } + return NewAST(ctx->file, start, pos, If, .condition=condition, .body=body, .else_body=else_body); +} + +PARSER(parse_for) { + // for [k,] v in iter [] body + const char *start = pos; + if (!match_word(&pos, "for")) return NULL; + ast_t *index = expect(ctx, start, &pos, parse_var, "I expected an iteration variable for this 'for'"); + spaces(&pos); + ast_t *value = NULL; + if (match(&pos, ",")) { + value = expect(ctx, pos-1, &pos, parse_var, "I expected a variable after this comma"); + } + expect_str(ctx, start, &pos, "in", "I expected an 'in' for this 'for'"); + ast_t *iter = expect(ctx, start, &pos, parse_expr, "I expected an iterable value for this 'for'"); + match(&pos, "do"); // optional + ast_t *body = expect(ctx, start, &pos, parse_opt_indented_block, "I expected a body for this 'for'"); + return NewAST(ctx->file, start, pos, For, .index=value ? index : NULL, .value=value ? value : index, .iter=iter, .body=body); +} + +PARSER(parse_while) { + // while condition [do] [] body + const char *start = pos; + if (!match_word(&pos, "while")) return NULL; + ast_t *condition = expect(ctx, start, &pos, parse_expr, "I don't see a viable condition for this 'while'"); + match(&pos, "do"); // optional + ast_t *body = expect(ctx, start, &pos, parse_opt_indented_block, "I expected a body for this 'while'"); + const char *tmp = pos; + whitespace(&tmp); + return NewAST(ctx->file, start, pos, While, .condition=condition, .body=body); +} + +PARSER(parse_heap_alloc) { + const char *start = pos; + if (!match(&pos, "@")) return NULL; + spaces(&pos); + ast_t *val = expect(ctx, start, &pos, parse_expr, "I expected an expression for this '@'"); + return NewAST(ctx->file, start, pos, UnaryOp, .op=UNOP_HEAP_ALLOCATE, .value=val); +} + +PARSER(parse_stack_reference) { + const char *start = pos; + if (!match(&pos, "&")) return NULL; + spaces(&pos); + ast_t *val = expect(ctx, start, &pos, parse_expr, "I expected an expression for this '&'"); + return NewAST(ctx->file, start, pos, UnaryOp, .op=UNOP_STACK_REFERENCE, .value=val); +} + +PARSER(parse_not) { + const char *start = pos; + if (!match_word(&pos, "not")) return NULL; + spaces(&pos); + ast_t *val = expect(ctx, start, &pos, parse_expr, "I expected an expression for this 'not'"); + return NewAST(ctx->file, start, pos, UnaryOp, .op=UNOP_NOT, .value=val); +} + +PARSER(parse_negative) { + const char *start = pos; + if (!match(&pos, "-")) return NULL; + spaces(&pos); + ast_t *val = expect(ctx, start, &pos, parse_term, "I expected an expression for this '-'"); + return NewAST(ctx->file, start, pos, UnaryOp, .op=UNOP_NEGATIVE, .value=val); +} + +PARSER(parse_bool) { + const char *start = pos; + if (match_word(&pos, "yes")) + return NewAST(ctx->file, start, pos, Bool, .b=true); + else if (match_word(&pos, "no")) + return NewAST(ctx->file, start, pos, Bool, .b=false); + else + return NULL; +} + +PARSER(parse_char) { + const char *start = pos; + if (*pos == '`') { + ++pos; + char c = *pos; + ++pos; + return NewAST(ctx->file, start, pos, Char, .c=c); + } else if (*pos == '\\') { + char c = unescape(&pos)[0]; + return NewAST(ctx->file, start, pos, Char, .c=c); + } else { + return NULL; + } +} + +PARSER(parse_interpolation) { + const char *start = pos; + ++pos; // ignore the initial character, typically a '$', but might be other stuff like '@' in different contexts + bool labelled = match(&pos, ":"); + ast_t *value = optional(ctx, &pos, parse_parens); + if (!value) value = optional(ctx, &pos, parse_term); + if (!value) { + match_group(&pos, '('); + parser_err(ctx, start, pos, "This interpolation didn't parse"); + } + return NewAST(ctx->file, start, pos, Interp, .value=value, .labelled=labelled); +} + +PARSER(parse_string) { + static const char closing[128] = {['(']=')', ['[']=']', ['<']='>', ['{']='}'}; + static const bool escapes[128] = {['\'']='\x1B', ['(']='\x1B', ['>']='\x1B', ['/']='\x1B'}; + static const char interps[128] = {['>']='@', ['/']='@', ['\'']='\x1A', ['(']='\x1A'}; + + const char *string_start = pos; + char open, close; + if (match(&pos, "$")) { + open = *pos; + close = closing[(int)open] ? closing[(int)open] : open; + ++pos; + } else { + if (*pos != '\'' && *pos != '"') + return NULL; + open = *pos; + close = *pos; + ++pos; + } + + char interp_char = interps[(int)open] ? interps[(int)open] : '$'; + char escape_char = escapes[(int)open] ? escapes[(int)open] : '\\'; + + if (open == ':' || open == '>') + spaces(&pos); + + ast_list_t *chunks = NULL; + if (*pos == '\r' || *pos == '\n') { // Multiline string + char special[] = {'\n','\r',interp_char,escape_char,'\0'}; + int64_t starting_indent = sss_get_indent(ctx->file, pos); + // indentation-delimited string + match(&pos, "\r"); + match(&pos, "\n"); + int64_t first_line = sss_get_line_number(ctx->file, pos); + int64_t indented = sss_get_indent(ctx->file, pos); + pos = sss_get_line(ctx->file, first_line); + while (pos < ctx->file->text + ctx->file->len) { + const char *eol = strchrnul(pos, '\n'); + if (eol == pos + strspn(pos, " \t\r")) { // Empty line + ast_t *ast = NewAST(ctx->file, pos, eol, StringLiteral, .str="\n"); + chunks = new(ast_list_t, .ast=ast, .next=chunks); + pos = eol + 1; + continue; + } + if (!match_indentation(&pos, starting_indent)) + parser_err(ctx, pos, strchrnul(pos, '\n'), "This isn't a valid indentation level for this unterminated string"); + + if (*pos == close) { + ++pos; + goto finished; + } + + if (!match_indentation(&pos, (indented - starting_indent))) + parser_err(ctx, pos, strchrnul(pos, '\n'), "I was expecting this to have %lu extra indentation beyond %lu", + (indented - starting_indent), starting_indent); + + while (pos < eol+1) { + size_t len = strcspn(pos, special); + if (pos[len] == '\r') ++len; + if (pos[len] == '\n') ++len; + + if (len > 0) { + ast_t *chunk = NewAST(ctx->file, pos, pos+len-1, StringLiteral, .str=heap_strn(pos, len)); + chunks = new(ast_list_t, .ast=chunk, .next=chunks); + } + + pos += len; + + if (*pos == escape_char) { + const char *start = pos; + const char* unescaped = unescape(&pos); + ast_t *chunk = NewAST(ctx->file, start, pos, StringLiteral, .str=unescaped); + chunks = new(ast_list_t, .ast=chunk, .next=chunks); + ++pos; + } else if (*pos == interp_char) { + ast_t *chunk = parse_interpolation(ctx, pos); + chunks = new(ast_list_t, .ast=chunk, .next=chunks); + pos = chunk->end; + } + } + } + finished:; + // Strip trailing newline: + if (chunks) { + ast_t *last_chunk = chunks->ast; + if (last_chunk->tag == StringLiteral) { + auto str = Match(last_chunk, StringLiteral); + const char* trimmed = heap_strn(str->str, strlen(str->str)-1); + chunks->ast = NewAST(ctx->file, last_chunk->start, last_chunk->end-1, StringLiteral, .str=trimmed); + } + } + } else { // Inline string + char special[] = {'\n','\r',open,close,interp_char,escape_char,'\0'}; + int depth = 1; + while (depth > 0 && *pos) { + size_t len = strcspn(pos, special); + if (len > 0) { + ast_t *chunk = NewAST(ctx->file, pos, pos+len-1, StringLiteral, .str=heap_strn(pos, len)); + chunks = new(ast_list_t, .ast=chunk, .next=chunks); + pos += len; + } + + if (*pos == interp_char) { + ast_t *chunk = parse_interpolation(ctx, pos); + chunks = new(ast_list_t, .ast=chunk, .next=chunks); + pos = chunk->end; + } else if (*pos == escape_char) { + const char *start = pos; + const char* unescaped = unescape(&pos); + ast_t *chunk = NewAST(ctx->file, start, pos, StringLiteral, .str=unescaped); + chunks = new(ast_list_t, .ast=chunk, .next=chunks); + } else if (*pos == '\r' || *pos == '\n') { + if (open == ' ' || open == ':' || open == '>') goto string_finished; + parser_err(ctx, string_start, pos, "This line ended without closing the string"); + } else if (*pos == close) { // if open == close, then don't do nesting (i.e. check 'close' first) + --depth; + if (depth > 0) { + ast_t *chunk = NewAST(ctx->file, pos, pos+1, StringLiteral, .str=heap_strn(pos, 1)); + chunks = new(ast_list_t, .ast=chunk, .next=chunks); + } + ++pos; + } else if (*pos == open) { + ++depth; + ast_t *chunk = NewAST(ctx->file, pos, pos+1, StringLiteral, .str=heap_strn(pos, 1)); + chunks = new(ast_list_t, .ast=chunk, .next=chunks); + ++pos; + } else { + ast_t *chunk = NewAST(ctx->file, pos, pos+1, StringLiteral, .str=heap_strn(pos, 1)); + ++pos; + chunks = new(ast_list_t, .ast=chunk, .next=chunks); + } + } + } + string_finished:; + REVERSE_LIST(chunks); + return NewAST(ctx->file, string_start, pos, StringJoin, .children=chunks); +} + +PARSER(parse_skip) { + const char *start = pos; + if (!match_word(&pos, "skip")) return NULL; + spaces(&pos); + const char* target; + if (match_word(&pos, "for")) target = "for"; + else if (match_word(&pos, "while")) target = "while"; + else target = get_id(&pos); + ast_t *skip = NewAST(ctx->file, start, pos, Skip, .target=target); + return skip; +} + +PARSER(parse_stop) { + const char *start = pos; + if (!match_word(&pos, "stop")) return NULL; + spaces(&pos); + const char* target; + if (match_word(&pos, "for")) target = "for"; + else if (match_word(&pos, "while")) target = "while"; + else target = get_id(&pos); + ast_t *stop = NewAST(ctx->file, start, pos, Stop, .target=target); + return stop; +} + +PARSER(parse_return) { + const char *start = pos; + if (!match_word(&pos, "return")) return NULL; + spaces(&pos); + ast_t *value = optional(ctx, &pos, parse_expr); + ast_t *ret = NewAST(ctx->file, start, pos, Return, .value=value); + return ret; +} + +PARSER(parse_lambda) { + const char *start = pos; + if (!match_word(&pos, "func")) + return NULL; + spaces(&pos); + if (!match(&pos, "(")) + return NULL; + arg_list_t *args = parse_args(ctx, &pos, false); + spaces(&pos); + expect_closing(ctx, &pos, ")", "I was expecting a ')' to finish this anonymous function's arguments"); + ast_t *body = optional(ctx, &pos, parse_opt_indented_block); + return NewAST(ctx->file, start, pos, Lambda, .args=args, .body=body); +} + +PARSER(parse_nil) { + const char *start = pos; + if (!match(&pos, "!")) return NULL; + type_ast_t *type = parse_type(ctx, pos); + if (!type) return NULL; + return NewAST(ctx->file, start, type->end, Nil, .type=type); +} + +PARSER(parse_var) { + const char *start = pos; + const char* name = get_id(&pos); + if (!name) return NULL; + return NewAST(ctx->file, start, pos, Var, .var.name=name); +} + +PARSER(parse_term_no_suffix) { + spaces(&pos); + ast_t *term = NULL; + (void)( + false + || (term=parse_nil(ctx, pos)) + || (term=parse_num(ctx, pos)) + || (term=parse_int(ctx, pos)) + || (term=parse_negative(ctx, pos)) + || (term=parse_heap_alloc(ctx, pos)) + || (term=parse_stack_reference(ctx, pos)) + || (term=parse_bool(ctx, pos)) + || (term=parse_char(ctx, pos)) + || (term=parse_string(ctx, pos)) + || (term=parse_lambda(ctx, pos)) + || (term=parse_parens(ctx, pos)) + || (term=parse_table(ctx, pos)) + || (term=parse_var(ctx, pos)) + || (term=parse_array(ctx, pos)) + || (term=parse_reduction(ctx, pos)) + || (term=parse_skip(ctx, pos)) + || (term=parse_stop(ctx, pos)) + || (term=parse_return(ctx, pos)) + || (term=parse_not(ctx, pos)) + || (term=parse_extern(ctx, pos)) + ); + return term; +} + +PARSER(parse_term) { + ast_t *term = parse_term_no_suffix(ctx, pos); + if (!term) return NULL; + + for (bool progress = true; progress; ) { + ast_t *new_term; + progress = (false + || (new_term=parse_index_suffix(ctx, term)) + || (new_term=parse_field_suffix(ctx, term)) + || (new_term=parse_fncall_suffix(ctx, term, NORMAL_FUNCTION)) + ); + if (progress) term = new_term; + } + return term; +} + +ast_t *parse_fncall_suffix(parse_ctx_t *ctx, ast_t *fn, bool is_extern) { + if (!fn) return NULL; + + const char *start = fn->start; + const char *pos = fn->end; + + if (!match(&pos, "(")) return NULL; + + whitespace(&pos); + + ast_list_t *args = NULL; + for (;;) { + const char *arg_start = pos; + const char *name = get_id(&pos); + whitespace(&pos); + if (name) { + if (match(&pos, "=")) { + whitespace(&pos); + ast_t *arg = parse_expr(ctx, pos); + if (!arg) parser_err(ctx, arg_start, pos, "I couldn't parse this keyword argument value"); + ast_t *kwarg = NewAST(ctx->file, arg_start, arg->end, KeywordArg, + .name=name, .arg=arg); + args = new(ast_list_t, .ast=kwarg, .next=args); + pos = kwarg->end; + goto got_arg; + } + pos = arg_start; + } + + ast_t *arg = optional(ctx, &pos, parse_expr); + if (!arg) break; + args = new(ast_list_t, .ast=arg, .next=args); + + got_arg:; + + whitespace(&pos); + if (!match(&pos, ",")) + break; + whitespace(&pos); + } + + whitespace(&pos); + + if (!match(&pos, ")")) + parser_err(ctx, start, pos, "This parenthesis is unclosed"); + + type_ast_t *extern_return_type = NULL; + if (is_extern) { + if (match(&pos, ":")) + extern_return_type = expect(ctx, start, &pos, parse_type, "I couldn't parse the return type of this external function call"); + else + extern_return_type = NewTypeAST(ctx->file, pos, pos, TypeVar, .var.name="Void"); + } + REVERSE_LIST(args); + return NewAST(ctx->file, start, pos, FunctionCall, .fn=fn, .args=args, .extern_return_type=extern_return_type); +} + +binop_e match_binary_operator(const char **pos) +{ + switch (**pos) { + case '+': { + *pos += 1; + return match(pos, "+") ? BINOP_CONCAT : BINOP_PLUS; + } + case '-': { + *pos += 1; + if ((*pos)[0] != ' ' && (*pos)[-2] == ' ') // looks like `fn -5` + return BINOP_UNKNOWN; + return BINOP_MINUS; + } + case '*': *pos += 1; return BINOP_MULT; + case '/': *pos += 1; return BINOP_DIVIDE; + case '^': *pos += 1; return BINOP_POWER; + case '<': *pos += 1; return match(pos, "=") ? BINOP_LE : (match(pos, "<") ? BINOP_LSHIFT : BINOP_LT); + case '>': *pos += 1; return match(pos, "=") ? BINOP_GE : (match(pos, ">") ? BINOP_RSHIFT : BINOP_GT); + default: { + if (match(pos, "!=")) return BINOP_NE; + else if (match(pos, "==") && **pos != '=') return BINOP_EQ; + else if (match_word(pos, "and")) return BINOP_AND; + else if (match_word(pos, "or")) return BINOP_OR; + else if (match_word(pos, "xor")) return BINOP_XOR; + else if (match_word(pos, "mod1")) return BINOP_MOD1; + else if (match_word(pos, "mod")) return BINOP_MOD; + else if (match_word(pos, "_min_")) return BINOP_MIN; + else if (match_word(pos, "_max_")) return BINOP_MAX; + else return BINOP_UNKNOWN; + } + } +} + +static ast_t *parse_infix_expr(parse_ctx_t *ctx, const char *pos, int min_tightness) { + ast_t *lhs = optional(ctx, &pos, parse_term); + if (!lhs) return NULL; + + spaces(&pos); + binop_e op = match_binary_operator(&pos); + if (op == BINOP_UNKNOWN || op_tightness[op] < min_tightness) + return lhs; + + ast_t *key = NULL; + if (op == BINOP_MIN || op == BINOP_MAX) { + key = NewAST(ctx->file, pos, pos, Var, .var.name=op == BINOP_MIN ? "_min_" : "_max_"); + for (bool progress = true; progress; ) { + ast_t *new_term; + progress = (false + || (new_term=parse_index_suffix(ctx, key)) + || (new_term=parse_field_suffix(ctx, key)) + || (new_term=parse_fncall_suffix(ctx, key, NORMAL_FUNCTION)) + ); + if (progress) key = new_term; + } + if (key->tag == Var) key = NULL; + else pos = key->end; + + } + spaces(&pos); + ast_t *rhs = parse_infix_expr(ctx, pos, op_tightness[op]); + if (!rhs) return lhs; + pos = rhs->end; + + switch (op) { + case BINOP_MIN: + return NewAST(ctx->file, lhs->start, rhs->end, Min, .lhs=lhs, .rhs=rhs, .key=key); + case BINOP_MAX: + return NewAST(ctx->file, lhs->start, rhs->end, Max, .lhs=lhs, .rhs=rhs, .key=key); + default: + return NewAST(ctx->file, lhs->start, rhs->end, BinaryOp, .lhs=lhs, .rhs=rhs, .op=op); + } +} + +ast_t *parse_expr(parse_ctx_t *ctx, const char *pos) { + return parse_infix_expr(ctx, pos, 0); +} + +PARSER(parse_declaration) { + const char *start = pos; + ast_t *var = parse_var(ctx, pos); + if (!var) return NULL; + pos = var->end; + spaces(&pos); + if (!match(&pos, ":=")) return NULL; + spaces(&pos); + ast_t *val = optional(ctx, &pos, parse_use); + if (!val) val = optional(ctx, &pos, parse_extended_expr); + if (!val) parser_err(ctx, pos, strchrnul(pos, '\n'), "This declaration value didn't parse"); + return NewAST(ctx->file, start, pos, Declare, .var=var, .value=val); +} + +PARSER(parse_update) { + const char *start = pos; + ast_t *lhs = optional(ctx, &pos, parse_expr); + if (!lhs) return NULL; + spaces(&pos); + binop_e op; + if (match(&pos, "+=")) op = BINOP_PLUS; + else if (match(&pos, "++=")) op = BINOP_CONCAT; + else if (match(&pos, "-=")) op = BINOP_MINUS; + else if (match(&pos, "*=")) op = BINOP_MULT; + else if (match(&pos, "/=")) op = BINOP_DIVIDE; + else if (match(&pos, "and=")) op = BINOP_AND; + else if (match(&pos, "or=")) op = BINOP_OR; + else if (match(&pos, "xor=")) op = BINOP_XOR; + else return NULL; + ast_t *rhs = expect(ctx, start, &pos, parse_extended_expr, "I expected an expression here"); + return NewAST(ctx->file, start, pos, UpdateAssign, .lhs=lhs, .rhs=rhs, .op=op); +} + +PARSER(parse_assignment) { + const char *start = pos; + ast_list_t *targets = NULL; + for (;;) { + ast_t *lhs = optional(ctx, &pos, parse_term); + if (!lhs) break; + targets = new(ast_list_t, .ast=lhs, .next=targets); + spaces(&pos); + if (!match(&pos, ",")) break; + whitespace(&pos); + } + + if (!targets) return NULL; + + spaces(&pos); + if (!match(&pos, "=")) return NULL; + if (match(&pos, "=")) return NULL; // == comparison + + ast_list_t *values = NULL; + for (;;) { + ast_t *rhs = optional(ctx, &pos, parse_extended_expr); + if (!rhs) break; + values = new(ast_list_t, .ast=rhs, .next=values); + spaces(&pos); + if (!match(&pos, ",")) break; + whitespace(&pos); + } + + REVERSE_LIST(targets); + REVERSE_LIST(values); + + return NewAST(ctx->file, start, pos, Assign, .targets=targets, .values=values); +} + +PARSER(parse_statement) { + ast_t *stmt = NULL; + if ((stmt=parse_declaration(ctx, pos)) + || (stmt=parse_doctest(ctx, pos)) + || (stmt=parse_func_def(ctx, pos)) + || (stmt=parse_use(ctx,pos))) + return stmt; + + if (!(false + || (stmt=parse_update(ctx, pos)) + || (stmt=parse_assignment(ctx, pos)) + )) + stmt = parse_extended_expr(ctx, pos); + + for (bool progress = (stmt != NULL); progress; ) { + ast_t *new_stmt; + progress = false; + if (stmt->tag == Var) + progress = (new_stmt=parse_fncall_suffix(ctx, stmt, NORMAL_FUNCTION)); + + if (progress) stmt = new_stmt; + } + return stmt; + +} + +PARSER(parse_extended_expr) { + ast_t *expr = NULL; + + if (false + || (expr=optional(ctx, &pos, parse_for)) + || (expr=optional(ctx, &pos, parse_while)) + || (expr=optional(ctx, &pos, parse_if)) + ) + return expr; + + return parse_expr(ctx, pos); +} + +PARSER(parse_block) { + int64_t block_indent = sss_get_indent(ctx->file, pos); + const char *start = pos; + whitespace(&pos); + ast_list_t *statements = NULL; + while (*pos) { + ast_t *stmt = optional(ctx, &pos, parse_statement); + if (!stmt) { + spaces(&pos); + if (*pos && *pos != '\r' && *pos != '\n') + parser_err(ctx, pos, strchrnul(pos, '\n'), "I couldn't parse this line"); + break; + } + statements = new(ast_list_t, .ast=stmt, .next=statements); + whitespace(&pos); + if (sss_get_indent(ctx->file, pos) != block_indent) { + pos = stmt->end; // backtrack + break; + } + } + REVERSE_LIST(statements); + return NewAST(ctx->file, start, pos, Block, .statements=statements); +} + +PARSER(parse_opt_indented_block) { + return indent(ctx, &pos) ? parse_block(ctx, pos) : parse_inline_block(ctx, pos); +} + +PARSER(parse_namespace) { + const char *start = pos; + whitespace(&pos); + int64_t indent = sss_get_indent(ctx->file, pos); + ast_list_t *statements = NULL; + for (;;) { + const char *next = pos; + whitespace(&next); + if (sss_get_indent(ctx->file, next) != indent) break; + ast_t *stmt; + if ((stmt=optional(ctx, &pos, parse_type_def)) + ||(stmt=optional(ctx, &pos, parse_linker)) + ||(stmt=optional(ctx, &pos, parse_statement))) + { + statements = new(ast_list_t, .ast=stmt, .next=statements); + pos = stmt->end; + whitespace(&pos); + } else { + if (sss_get_indent(ctx->file, next) > indent && next < strchrnul(next, '\n')) + parser_err(ctx, next, strchrnul(next, '\n'), "I couldn't parse this namespace statement"); + break; + } + } + REVERSE_LIST(statements); + return NewAST(ctx->file, start, pos, Block, .statements=statements); +} + +PARSER(parse_type_def) { + // type Foo := Type... \n body... + const char *start = pos; + if (!match_word(&pos, "type")) return NULL; + + int64_t starting_indent = sss_get_indent(ctx->file, pos); + + const char *name = get_id(&pos); + if (!name) return NULL; + spaces(&pos); + + if (!match(&pos, ":=")) return NULL; + type_ast_t *type_ast = expect(ctx, start, &pos, parse_type, "I expected a type after this ':='"); + + const char *ns_pos = pos; + whitespace(&ns_pos); + int64_t ns_indent = sss_get_indent(ctx->file, ns_pos); + ast_t *namespace = NULL; + if (ns_indent > starting_indent) { + pos = ns_pos; + namespace = optional(ctx, &pos, parse_namespace); + } + if (!namespace) + namespace = NewAST(ctx->file, pos, pos, Block, .statements=NULL); + return NewAST(ctx->file, start, pos, TypeDef, .var.name=name, .type=type_ast, .namespace=namespace); +} + +type_ast_t *parse_enum_type(parse_ctx_t *ctx, const char *pos) { + // tagged union: enum Foo := a|b(x:Int,y:Int)=5|... + const char *start = pos; + + if (!match_word(&pos, "enum")) return NULL; + spaces(&pos); + if (!match(&pos, "(")) return NULL; + + tag_t *tags = NULL; + int64_t next_value = 0; + + whitespace(&pos); + for (;;) { + const char *tag_start = pos; + + spaces(&pos); + const char *tag_name = get_id(&pos); + if (!tag_name) break; + + spaces(&pos); + arg_list_t *fields; + if (match(&pos, "(")) { + whitespace(&pos); + fields = parse_args(ctx, &pos, false); + whitespace(&pos); + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this tagged union member"); + } else { + fields = NULL; + } + + spaces(&pos); + if (match(&pos, "=")) { + ast_t *val = expect(ctx, tag_start, &pos, parse_int, "I expected an integer literal after this '='"); + next_value = Match(val, Int)->i; + } + + // Check for duplicate values: + for (tag_t *t = tags; t; t = t->next) { + if (t->value == next_value) + parser_err(ctx, tag_start, pos, "This tag value (%ld) is a duplicate of an earlier tag value", next_value); + } + + type_ast_t *type = NewTypeAST(ctx->file, tag_start, pos, TypeStruct, .fields=fields); + tags = new(tag_t, .name=tag_name, .value=next_value, .type=type, .next=tags); + + const char *next_pos = pos; + whitespace(&next_pos); + if (!match(&next_pos, "|")) + break; + whitespace(&next_pos); + pos = next_pos; + ++next_value; + } + + whitespace(&pos); + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this enum definition"); + + REVERSE_LIST(tags); + + return NewTypeAST(ctx->file, start, pos, TypeTaggedUnion, .tags=tags); +} + +arg_list_t *parse_args(parse_ctx_t *ctx, const char **pos, bool allow_unnamed) +{ + arg_list_t *args = NULL; + for (;;) { + const char *batch_start = *pos; + ast_t *default_val = NULL; + type_ast_t *type = NULL; + + typedef struct var_list_s { + var_t *var; + struct var_list_s *next; + } var_list_t; + + var_list_t *vars = NULL; + for (;;) { + whitespace(pos); + const char *name_start = *pos; + const char *name = get_id(pos); + if (!name) break; + whitespace(pos); + if (strncmp(*pos, "==", 2) != 0 && match(pos, "=")) { + default_val = expect(ctx, *pos-1, pos, parse_term, "I expected a value after this '='"); + vars = new(var_list_t, .var=new(var_t, .name=name), .next=vars); + break; + } else if (match(pos, ":")) { + type = expect(ctx, *pos-1, pos, parse_type, "I expected a type here"); + vars = new(var_list_t, .var=new(var_t, .name=name), .next=vars); + break; + } else if (allow_unnamed) { + *pos = name_start; + type = optional(ctx, pos, parse_type); + if (type) + vars = new(var_list_t, .var=NULL, .next=vars); + break; + } else if (name) { + vars = new(var_list_t, .var=new(var_t, .name=name), .next=vars); + spaces(pos); + if (!match(pos, ",")) break; + } else { + break; + } + } + if (!vars) break; + if (!default_val && !type) + parser_err(ctx, batch_start, *pos, "I expected a ':' and type, or '=' and a default value after this parameter (%s)", + vars->var->name); + + REVERSE_LIST(vars); + for (; vars; vars = vars->next) + args = new(arg_list_t, .var=vars->var, .type=type, .default_val=default_val); + whitespace(pos); + match(pos, ","); + } + + REVERSE_LIST(args); + return args; +} + +PARSER(parse_func_def) { + const char *start = pos; + if (!match_word(&pos, "func")) return NULL; + + ast_t *name = optional(ctx, &pos, parse_var); + if (!name) return NULL; + + spaces(&pos); + + if (!match(&pos, "(")) return NULL; + + arg_list_t *args = parse_args(ctx, &pos, false); + whitespace(&pos); + bool is_inline = false; + ast_t *cache_ast = NULL; + for (; whitespace(&pos), (match(&pos, ";") || match(&pos, ",")); ) { + const char *flag_start = pos; + if (match_word(&pos, "inline")) { + is_inline = true; + } else if (match_word(&pos, "cached")) { + if (!cache_ast) cache_ast = NewAST(ctx->file, pos, pos, Int, .i=INT64_MAX, .precision=64); + } else if (match_word(&pos, "cache_size")) { + if (whitespace(&pos), !match(&pos, "=")) + parser_err(ctx, flag_start, pos, "I expected a value for 'cache_size'"); + whitespace(&pos); + cache_ast = expect(ctx, start, &pos, parse_expr, "I expected a maximum size for the cache"); + } + } + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this function definition"); + + type_ast_t *ret_type = NULL; + spaces(&pos); + if (match(&pos, "->") || match(&pos, ":")) + ret_type = optional(ctx, &pos, parse_type); + + ast_t *body = expect(ctx, start, &pos, parse_opt_indented_block, + "This function needs a body block"); + return NewAST(ctx->file, start, pos, FunctionDef, + .name=name, .args=args, .ret_type=ret_type, .body=body, .cache=cache_ast, + .is_inline=is_inline); +} + +PARSER(parse_extern) { + const char *start = pos; + if (!match_word(&pos, "extern")) return NULL; + spaces(&pos); + bool address = (match(&pos, "&") != 0); + const char* name = get_id(&pos); + spaces(&pos); + // extern function call: + if (match(&pos, "(")) { + return parse_fncall_suffix(ctx, NewAST(ctx->file, start, pos-1, Var, .var.name=name), EXTERN_FUNCTION); + } + if (!match(&pos, ":")) + parser_err(ctx, start, pos, "I couldn't get a type for this extern"); + type_ast_t *type = expect(ctx, start, &pos, parse_type, "I couldn't parse the type for this extern"); + return NewAST(ctx->file, start, pos, Extern, .name=name, .type=type, .address=address); +} + +PARSER(parse_doctest) { + const char *start = pos; + if (!match(&pos, ">>>")) return NULL; + spaces(&pos); + ast_t *expr = expect(ctx, start, &pos, parse_statement, "I couldn't parse the expression for this doctest"); + whitespace(&pos); + const char* output = NULL; + if (match(&pos, "===")) { + spaces(&pos); + const char *output_start = pos, + *output_end = strchrnul(pos, '\n'); + if (output_end <= output_start) + parser_err(ctx, output_start, output_end, "You're missing expected output here"); + output = heap_strn(output_start, (size_t)(output_end - output_start)); + pos = output_end; + } + return NewAST(ctx->file, start, pos, DocTest, .expr=expr, .output=output); +} + +PARSER(parse_use) { + const char *start = pos; + if (!match_word(&pos, "use")) return NULL; + spaces(&pos); + size_t path_len = strcspn(pos, " \t\r\n;"); + if (path_len < 1) + parser_err(ctx, start, pos, "There is no filename here to use"); + char *path = heap_strf("%.*s.sss", (int)path_len, pos); + pos += path_len; + char *resolved_path = resolve_path(path, ctx->file->filename); + if (!resolved_path) + parser_err(ctx, start, pos, "No such file exists: \"%s\"", path); + while (match(&pos, ";")) continue; + return NewAST(ctx->file, start, pos, Use, .path=resolved_path); +} + +PARSER(parse_linker) { + const char *start = pos; + if (!match_word(&pos, "!link")) return NULL; + spaces(&pos); + size_t len = strcspn(pos, "\r\n"); + const char *directive = heap_strn(pos, len); + return NewAST(ctx->file, start, pos, LinkerDirective, .directive=directive); +} + +PARSER(parse_inline_block) { + spaces(&pos); + const char *start = pos; + ast_list_t *statements = NULL; + while (*pos) { + spaces(&pos); + ast_t *stmt = optional(ctx, &pos, parse_statement); + if (!stmt) break; + statements = new(ast_list_t, .ast=stmt, .next=statements); + spaces(&pos); + if (!match(&pos, ";")) break; + } + REVERSE_LIST(statements); + return NewAST(ctx->file, start, pos, Block, .statements=statements); +} + +ast_t *parse_file(sss_file_t *file, jmp_buf *on_err) { + parse_ctx_t ctx = { + .file=file, + .on_err=on_err, + }; + + const char *pos = file->text; + if (match(&pos, "#!")) // shebang + some_not(&pos, "\r\n"); + + whitespace(&pos); + ast_t *ast = parse_namespace(&ctx, pos); + pos = ast->end; + whitespace(&pos); + if (strlen(pos) > 0) { + parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the file"); + } + return ast; +} + +type_ast_t *parse_type_str(const char *str) { + sss_file_t *file = sss_spoof_file("", str); + parse_ctx_t ctx = { + .file=file, + .on_err=NULL, + }; + + const char *pos = file->text; + whitespace(&pos); + type_ast_t *ast = parse_type(&ctx, pos); + if (!ast) return ast; + pos = ast->end; + whitespace(&pos); + if (strlen(pos) > 0) { + parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the type"); + } + return ast; +} + +ast_t *parse_expression_str(const char *str) { + sss_file_t *file = sss_spoof_file("", str); + parse_ctx_t ctx = { + .file=file, + .on_err=NULL, + }; + + const char *pos = file->text; + whitespace(&pos); + ast_t *ast = parse_extended_expr(&ctx, pos); + if (!ast) return ast; + pos = ast->end; + whitespace(&pos); + if (strlen(pos) > 0) { + parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the expression"); + } + return ast; +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/parse.h b/parse.h new file mode 100644 index 00000000..aaaa671a --- /dev/null +++ b/parse.h @@ -0,0 +1,9 @@ +#pragma once + +#include + +#include "ast.h" + +type_ast_t *parse_type_str(const char *str); +ast_t *parse_expression_str(const char *str); +ast_t *parse_file(sss_file_t *file, jmp_buf *on_err); diff --git a/util.c b/util.c new file mode 100644 index 00000000..4ee5ef7a --- /dev/null +++ b/util.c @@ -0,0 +1,84 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" + +public char *heap_strn(const char *str, size_t len) +{ + if (!str) return NULL; + if (len == 0) return ""; + char *heaped = GC_MALLOC_ATOMIC(len + 1); + memcpy(heaped, str, len); + heaped[len] = '\0'; + return heaped; +} + +public char *heap_str(const char *str) +{ + if (!str) return NULL; + return heap_strn(str, strlen(str)); +} + +public char *heap_strf(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + char *tmp = NULL; + int len = vasprintf(&tmp, fmt, args); + if (len < 0) return NULL; + va_end(args); + char *ret = heap_strn(tmp, (size_t)len); + free(tmp); + return ret; +} + +// Name mangling algorithm to produce valid identifiers: +// Escape individual chars as "_x%02X" +// Things being escaped: +// - Leading digit +// - Non alphanumeric/non-underscore characters +// - "_" when followed by "x" and two uppercase hex digits +public char *mangle(const char *name) +{ + size_t len = 0; + for (const char *p = name; *p; p++) { + if ((!isalnum(*p) && *p != '_') // Non-identifier character + || (p == name && isdigit(*p)) // Leading digit + || (p[0] == '_' && p[1] == 'x' && strspn(p+2, "ABCDEF0123456789") >= 2)) { // Looks like hex escape + len += strlen("_x00"); // Hex escape + } else { + len += 1; + } + } + char *mangled = GC_MALLOC_ATOMIC(len + 1); + char *dest = mangled; + for (const char *src = name; *src; src++) { + if ((!isalnum(*src) && *src != '_') // Non-identifier character + || (src == name && isdigit(*src)) // Leading digit + || (src[0] == '_' && src[1] == 'x' && strspn(src+2, "ABCDEF0123456789") >= 2)) { // Looks like hex escape + dest += sprintf(dest, "_x%02X", *src); // Hex escape + } else { + *(dest++) = *src; + } + } + mangled[len] = '\0'; + return mangled; +} + +CORD CORD_asprintf(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + CORD c = NULL; + CORD_vsprintf(&c, fmt, args); + va_end(args); + return c; +} + + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/util.h b/util.h new file mode 100644 index 00000000..416c0b3e --- /dev/null +++ b/util.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#define streq(a, b) (((a) == NULL && (b) == NULL) || (((a) == NULL) == ((b) == NULL) && strcmp(a, b) == 0)) +#define new(t, ...) ((t*)memcpy(GC_MALLOC(sizeof(t)), &(t){__VA_ARGS__}, sizeof(t))) +#define grow(arr, new_size) ((typeof (arr))GC_REALLOC(arr, (sizeof(arr[0]))*(new_size))) +#define Match(x, _tag) ((x)->tag == _tag ? &(x)->__data._tag : (errx(1, __FILE__ ":%d This was supposed to be a " # _tag "\n", __LINE__), &(x)->__data._tag)) +#define Tagged(t, _tag, ...) new(t, .tag=_tag, .__data._tag={__VA_ARGS__}) + +#ifndef auto +#define auto __auto_type +#endif + +#ifndef public +#define public __attribute__ ((visibility ("default"))) +#endif + +char *heap_strn(const char *str, size_t len); +char *heap_str(const char *str); +char *heap_strf(const char *fmt, ...); +CORD CORD_asprintf(const char *fmt, ...); + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 -- cgit v1.2.3