diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2025-03-21 21:43:51 -0400 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2025-03-21 21:43:51 -0400 |
| commit | dcf266228512e100e779fe72f2d4e9ebb605ffe6 (patch) | |
| tree | 6fb3cac1ee3647f08bca219949ed8dc61b5861f2 /src | |
| parent | 9acea1807f5945c55445a16259386e4979203a1e (diff) | |
Move files into src/ and build into build/
Diffstat (limited to 'src')
| -rw-r--r-- | src/ast.c | 318 | ||||
| -rw-r--r-- | src/ast.h | 353 | ||||
| -rw-r--r-- | src/compile.c | 4597 | ||||
| -rw-r--r-- | src/compile.h | 24 | ||||
| -rw-r--r-- | src/cordhelpers.c | 71 | ||||
| -rw-r--r-- | src/cordhelpers.h | 14 | ||||
| -rw-r--r-- | src/enums.c | 129 | ||||
| -rw-r--r-- | src/enums.h | 14 | ||||
| -rw-r--r-- | src/environment.c | 864 | ||||
| -rw-r--r-- | src/environment.h | 81 | ||||
| -rw-r--r-- | src/parse.c | 2622 | ||||
| -rw-r--r-- | src/parse.h | 13 | ||||
| -rw-r--r-- | src/repl.c | 552 | ||||
| -rw-r--r-- | src/repl.h | 5 | ||||
| -rw-r--r-- | src/structs.c | 77 | ||||
| -rw-r--r-- | src/structs.h | 13 | ||||
| -rw-r--r-- | src/tomo.c | 726 | ||||
| -rw-r--r-- | src/typecheck.c | 1578 | ||||
| -rw-r--r-- | src/typecheck.h | 33 | ||||
| -rw-r--r-- | src/types.c | 739 | ||||
| -rw-r--r-- | src/types.h | 162 |
21 files changed, 12985 insertions, 0 deletions
diff --git a/src/ast.c b/src/ast.c new file mode 100644 index 00000000..982ef7dc --- /dev/null +++ b/src/ast.c @@ -0,0 +1,318 @@ +// Some basic operations defined on AST nodes, mainly converting to +// strings for debugging. +#include <gc/cord.h> +#include <printf.h> +#include <stdarg.h> + +#include "ast.h" +#include "stdlib/datatypes.h" +#include "stdlib/integers.h" +#include "stdlib/tables.h" +#include "stdlib/text.h" +#include "cordhelpers.h" + +static const char *OP_NAMES[] = { + [BINOP_UNKNOWN]="unknown", + [BINOP_POWER]="^", [BINOP_MULT]="*", [BINOP_DIVIDE]="/", + [BINOP_MOD]="mod", [BINOP_MOD1]="mod1", [BINOP_PLUS]="+", [BINOP_MINUS]="minus", + [BINOP_CONCAT]="++", [BINOP_LSHIFT]="<<", [BINOP_ULSHIFT]="<<<", + [BINOP_RSHIFT]=">>", [BINOP_URSHIFT]=">>>", [BINOP_MIN]="min", + [BINOP_MAX]="max", [BINOP_EQ]="==", [BINOP_NE]="!=", [BINOP_LT]="<", + [BINOP_LE]="<=", [BINOP_GT]=">", [BINOP_GE]=">=", [BINOP_CMP]="<>", + [BINOP_AND]="and", [BINOP_OR]="or", [BINOP_XOR]="xor", +}; + +const char *binop_method_names[BINOP_XOR+1] = { + [BINOP_POWER]="power", [BINOP_MULT]="times", [BINOP_DIVIDE]="divided_by", + [BINOP_MOD]="modulo", [BINOP_MOD1]="modulo1", [BINOP_PLUS]="plus", [BINOP_MINUS]="minus", + [BINOP_CONCAT]="concatenated_with", [BINOP_LSHIFT]="left_shifted", [BINOP_RSHIFT]="right_shifted", + [BINOP_ULSHIFT]="unsigned_left_shifted", [BINOP_URSHIFT]="unsigned_right_shifted", + [BINOP_AND]="bit_and", [BINOP_OR]="bit_or", [BINOP_XOR]="bit_xor", +}; + +static CORD ast_list_to_xml(ast_list_t *asts); +static CORD arg_list_to_xml(arg_ast_t *args); +static CORD when_clauses_to_xml(when_clause_t *clauses); +static CORD tags_to_xml(tag_ast_t *tags); +static CORD xml_escape(CORD text); +static CORD optional_tagged(const char *tag, ast_t *ast); +static CORD optional_tagged_type(const char *tag, type_ast_t *ast); + +CORD xml_escape(CORD text) +{ + text = CORD_replace(text, "&", "&"); + text = CORD_replace(text, "<", "<"); + text = CORD_replace(text, ">", ">"); + return text; +} + +CORD ast_list_to_xml(ast_list_t *asts) +{ + CORD c = CORD_EMPTY; + for (; asts; asts = asts->next) { + c = CORD_cat(c, ast_to_xml(asts->ast)); + } + return c; +} + +CORD arg_list_to_xml(arg_ast_t *args) { + CORD c = "<args>"; + for (; args; args = args->next) { + CORD arg_cord = args->name ? CORD_all("<arg name=\"", args->name, "\">") : "<arg>"; + if (args->type) + arg_cord = CORD_all(arg_cord, "<type>", type_ast_to_xml(args->type), "</type>"); + if (args->value) + arg_cord = CORD_all(arg_cord, "<value>", ast_to_xml(args->value), "</value>"); + c = CORD_all(c, arg_cord, "</arg>"); + } + return CORD_cat(c, "</args>"); +} + +CORD when_clauses_to_xml(when_clause_t *clauses) { + CORD c = CORD_EMPTY; + for (; clauses; clauses = clauses->next) { + c = CORD_all(c, "<case>", ast_to_xml(clauses->pattern), ast_to_xml(clauses->body), "</case>"); + } + return c; +} + +CORD tags_to_xml(tag_ast_t *tags) { + CORD c = CORD_EMPTY; + for (; tags; tags = tags->next) { + c = CORD_all(c, "<tag name=\"", tags->name, "\">", arg_list_to_xml(tags->fields), "</tag>"); + } + return c; +} + +CORD optional_tagged(const char *tag, ast_t *ast) +{ + return ast ? CORD_all("<", tag, ">", ast_to_xml(ast), "</", tag, ">") : CORD_EMPTY; +} + +CORD optional_tagged_type(const char *tag, type_ast_t *ast) +{ + return ast ? CORD_all("<", tag, ">", type_ast_to_xml(ast), "</", tag, ">") : CORD_EMPTY; +} + +CORD ast_to_xml(ast_t *ast) +{ + if (!ast) return CORD_EMPTY; + + switch (ast->tag) { +#define T(type, ...) case type: { auto data = ast->__data.type; (void)data; return CORD_asprintf(__VA_ARGS__); } + T(Unknown, "<Unknown>") + T(None, "<None>%r</None>", type_ast_to_xml(data.type)) + T(Bool, "<Bool value=\"%s\" />", data.b ? "yes" : "no") + T(Var, "<Var>%s</Var>", data.name) + T(Int, "<Int>%s</Int>", data.str) + T(Num, "<Num>%g</Num>", data.n) + T(TextLiteral, "%r", xml_escape(data.cord)) + T(TextJoin, "<Text%r>%r</Text>", data.lang ? CORD_all(" lang=\"", data.lang, "\"") : CORD_EMPTY, ast_list_to_xml(data.children)) + T(Path, "<Path>%s</Path>", data.path) + T(Declare, "<Declare var=\"%r\">%r</Declare>", ast_to_xml(data.var), ast_to_xml(data.value)) + T(Assign, "<Assign><targets>%r</targets><values>%r</values></Assign>", ast_list_to_xml(data.targets), ast_list_to_xml(data.values)) + T(BinaryOp, "<BinaryOp op=\"%r\">%r %r</BinaryOp>", xml_escape(OP_NAMES[data.op]), ast_to_xml(data.lhs), ast_to_xml(data.rhs)) + T(UpdateAssign, "<UpdateAssign op=\"%r\">%r %r</UpdateAssign>", xml_escape(OP_NAMES[data.op]), ast_to_xml(data.lhs), ast_to_xml(data.rhs)) + T(Negative, "<Negative>%r</Negative>", ast_to_xml(data.value)) + T(Not, "<Not>%r</Not>", ast_to_xml(data.value)) + T(HeapAllocate, "<HeapAllocate>%r</HeapAllocate>", ast_to_xml(data.value)) + T(StackReference, "<StackReference>%r</StackReference>", ast_to_xml(data.value)) + T(Mutexed, "<Mutexed>%r</Mutexed>", ast_to_xml(data.value)) + T(Holding, "<Holding>%r%r</Holding>", ast_to_xml(data.mutexed), ast_to_xml(data.body)) + T(Min, "<Min>%r%r%r</Min>", ast_to_xml(data.lhs), ast_to_xml(data.rhs), optional_tagged("key", data.key)) + T(Max, "<Max>%r%r%r</Max>", ast_to_xml(data.lhs), ast_to_xml(data.rhs), optional_tagged("key", data.key)) + T(Array, "<Array>%r%r</Array>", optional_tagged_type("item-type", data.item_type), ast_list_to_xml(data.items)) + T(Set, "<Set>%r%r</Set>", + optional_tagged_type("item-type", data.item_type), + ast_list_to_xml(data.items)) + T(Table, "<Table>%r%r%r%r</Table>", + optional_tagged_type("key-type", data.key_type), optional_tagged_type("value-type", data.value_type), + optional_tagged("default-value", data.default_value), + ast_list_to_xml(data.entries), optional_tagged("fallback", data.fallback)) + T(TableEntry, "<TableEntry>%r%r</TableEntry>", ast_to_xml(data.key), ast_to_xml(data.value)) + T(Comprehension, "<Comprehension>%r%r%r%r%r</Comprehension>", optional_tagged("expr", data.expr), + ast_list_to_xml(data.vars), optional_tagged("iter", data.iter), + optional_tagged("filter", data.filter)) + T(FunctionDef, "<FunctionDef name=\"%r\">%r%r<body>%r</body></FunctionDef>", ast_to_xml(data.name), + arg_list_to_xml(data.args), optional_tagged_type("return-type", data.ret_type), ast_to_xml(data.body)) + T(ConvertDef, "<ConvertDef>%r%r<body>%r</body></ConvertDef>", + arg_list_to_xml(data.args), optional_tagged_type("return-type", data.ret_type), ast_to_xml(data.body)) + T(Lambda, "<Lambda>%r%r<body>%r</body></Lambda>)", arg_list_to_xml(data.args), + optional_tagged_type("return-type", data.ret_type), ast_to_xml(data.body)) + T(FunctionCall, "<FunctionCall><function>%r</function>%r</FunctionCall>", ast_to_xml(data.fn), arg_list_to_xml(data.args)) + T(MethodCall, "<MethodCall><self>%r</self><method>%s</method>%r</MethodCall>", ast_to_xml(data.self), data.name, arg_list_to_xml(data.args)) + T(Block, "<Block>%r</Block>", ast_list_to_xml(data.statements)) + T(For, "<For>%r%r%r%r%r</For>", ast_list_to_xml(data.vars), optional_tagged("iterable", data.iter), + optional_tagged("body", data.body), optional_tagged("empty", data.empty)) + T(While, "<While>%r%r</While>", optional_tagged("condition", data.condition), optional_tagged("body", data.body)) + T(Repeat, "<Repeat>%r</Repeat>", optional_tagged("body", data.body)) + T(If, "<If>%r%r%r</If>", optional_tagged("condition", data.condition), optional_tagged("body", data.body), optional_tagged("else", data.else_body)) + T(When, "<When><subject>%r</subject>%r%r</When>", ast_to_xml(data.subject), when_clauses_to_xml(data.clauses), optional_tagged("else", data.else_body)) + T(Reduction, "<Reduction op=%r%r>%r</Reduction>", xml_escape(OP_NAMES[data.op]), optional_tagged("key", data.key), + optional_tagged("iterable", data.iter)) + T(Skip, "<Skip>%r</Skip>", data.target) + T(Stop, "<Stop>%r</Stop>", data.target) + T(PrintStatement, "<PrintStatement>%r</PrintStatement>", ast_list_to_xml(data.to_print)) + T(Pass, "<Pass/>") + T(Defer, "<Defer>%r<Defer/>", ast_to_xml(data.body)) + T(Return, "<Return>%r</Return>", ast_to_xml(data.value)) + T(Extern, "<Extern name=\"%s\">%r</Extern>", data.name, type_ast_to_xml(data.type)) + T(StructDef, "<StructDef name=\"%s\">%r<namespace>%r</namespace></StructDef>", data.name, arg_list_to_xml(data.fields), ast_to_xml(data.namespace)) + T(EnumDef, "<EnumDef name=\"%s\"><tags>%r</tags><namespace>%r</namespace></EnumDef>", data.name, tags_to_xml(data.tags), ast_to_xml(data.namespace)) + T(LangDef, "<LangDef name=\"%s\">%r</LangDef>", data.name, ast_to_xml(data.namespace)) + T(Index, "<Index>%r%r</Index>", optional_tagged("indexed", data.indexed), optional_tagged("index", data.index)) + T(FieldAccess, "<FieldAccess field=\"%s\">%r</FieldAccess>", data.field, ast_to_xml(data.fielded)) + T(Optional, "<Optional>%r</Optional>", ast_to_xml(data.value)) + T(NonOptional, "<NonOptional>%r</NonOptional>", ast_to_xml(data.value)) + T(Moment, "<Moment/>") + T(DocTest, "<DocTest>%r<output>%r</output></DocTest>", optional_tagged("expression", data.expr), xml_escape(data.output)) + T(Use, "<Use>%r%r</Use>", optional_tagged("var", data.var), xml_escape(data.path)) + T(InlineCCode, "<InlineCode>%r</InlineCode>", xml_escape(data.code)) + T(Deserialize, "<Deserialize><type>%r</type>%r</Deserialize>", type_ast_to_xml(data.type), ast_to_xml(data.value)) + default: return "???"; +#undef T + } +} + +CORD type_ast_to_xml(type_ast_t *t) +{ + if (!t) return "NULL"; + + switch (t->tag) { +#define T(type, ...) case type: { auto data = t->__data.type; (void)data; return CORD_asprintf(__VA_ARGS__); } + T(UnknownTypeAST, "<UnknownType/>") + T(VarTypeAST, "%s", data.name) + T(PointerTypeAST, "<PointerType is_stack=\"%s\">%r</PointerType>", + data.is_stack ? "yes" : "no", type_ast_to_xml(data.pointed)) + T(ArrayTypeAST, "<ArrayType>%r</ArrayType>", type_ast_to_xml(data.item)) + T(SetTypeAST, "<TableType>%r</TableType>", type_ast_to_xml(data.item)) + T(TableTypeAST, "<TableType>%r %r</TableType>", type_ast_to_xml(data.key), type_ast_to_xml(data.value)) + T(FunctionTypeAST, "<FunctionType>%r %r</FunctionType>", arg_list_to_xml(data.args), type_ast_to_xml(data.ret)) + T(OptionalTypeAST, "<OptionalType>%r</OptionalType>", data.type) + T(MutexedTypeAST, "<MutexedType>%r</MutexedType>", data.type) +#undef T + default: return CORD_EMPTY; + } +} + +int printf_ast(FILE *stream, const struct printf_info *info, const void *const args[]) +{ + ast_t *ast = *(ast_t**)(args[0]); + if (ast) { + if (info->alt) + return fprintf(stream, "%.*s", (int)(ast->end - ast->start), ast->start); + else + return CORD_put(ast_to_xml(ast), stream); + } else { + return fputs("(null)", stream); + } +} + +PUREFUNC bool is_idempotent(ast_t *ast) +{ + switch (ast->tag) { + case Int: case Bool: case Num: case Var: case None: case TextLiteral: return true; + case Index: { + auto index = Match(ast, Index); + return is_idempotent(index->indexed) && index->index != NULL && is_idempotent(index->index); + } + case FieldAccess: { + auto access = Match(ast, FieldAccess); + return is_idempotent(access->fielded); + } + default: return false; + } +} + +void _visit_topologically(ast_t *ast, Table_t definitions, Table_t *visited, Closure_t fn) +{ + void (*visit)(void*, ast_t*) = (void*)fn.fn; + if (ast->tag == StructDef) { + auto def = Match(ast, StructDef); + if (Table$str_get(*visited, def->name)) + return; + + Table$str_set(visited, def->name, (void*)_visit_topologically); + for (arg_ast_t *field = def->fields; field; field = field->next) { + if (field->type && field->type->tag == VarTypeAST) { + const char *field_type_name = Match(field->type, VarTypeAST)->name; + ast_t *dependency = Table$str_get(definitions, field_type_name); + if (dependency) { + _visit_topologically(dependency, definitions, visited, fn); + } + } + + } + visit(fn.userdata, ast); + } else if (ast->tag == EnumDef) { + auto def = Match(ast, EnumDef); + if (Table$str_get(*visited, def->name)) + return; + + Table$str_set(visited, def->name, (void*)_visit_topologically); + for (tag_ast_t *tag = def->tags; tag; tag = tag->next) { + for (arg_ast_t *field = tag->fields; field; field = field->next) { + if (field->type && field->type->tag == VarTypeAST) { + const char *field_type_name = Match(field->type, VarTypeAST)->name; + ast_t *dependency = Table$str_get(definitions, field_type_name); + if (dependency) { + _visit_topologically(dependency, definitions, visited, fn); + } + } + } + } + visit(fn.userdata, ast); + } else if (ast->tag == LangDef) { + auto def = Match(ast, LangDef); + if (Table$str_get(*visited, def->name)) + return; + visit(fn.userdata, ast); + } else { + visit(fn.userdata, ast); + } +} + +void visit_topologically(ast_list_t *asts, Closure_t fn) +{ + // Visit each top-level statement in topological order: + // - 'use' statements first + // - then typedefs + // - visiting typedefs' dependencies first + // - then function/variable declarations + + Table_t definitions = {}; + for (ast_list_t *stmt = asts; stmt; stmt = stmt->next) { + if (stmt->ast->tag == StructDef) { + auto def = Match(stmt->ast, StructDef); + Table$str_set(&definitions, def->name, stmt->ast); + } else if (stmt->ast->tag == EnumDef) { + auto def = Match(stmt->ast, EnumDef); + Table$str_set(&definitions, def->name, stmt->ast); + } else if (stmt->ast->tag == LangDef) { + auto def = Match(stmt->ast, LangDef); + Table$str_set(&definitions, def->name, stmt->ast); + } + } + + void (*visit)(void*, ast_t*) = (void*)fn.fn; + Table_t visited = {}; + // First: 'use' statements in order: + for (ast_list_t *stmt = asts; stmt; stmt = stmt->next) { + if (stmt->ast->tag == Use || (stmt->ast->tag == Declare && Match(stmt->ast, Declare)->value->tag == Use)) + visit(fn.userdata, stmt->ast); + } + // Then typedefs in topological order: + for (ast_list_t *stmt = asts; stmt; stmt = stmt->next) { + if (stmt->ast->tag == StructDef || stmt->ast->tag == EnumDef || stmt->ast->tag == LangDef) + _visit_topologically(stmt->ast, definitions, &visited, fn); + } + // Then everything else in order: + for (ast_list_t *stmt = asts; stmt; stmt = stmt->next) { + if (!(stmt->ast->tag == StructDef || stmt->ast->tag == EnumDef || stmt->ast->tag == LangDef + || stmt->ast->tag == Use || (stmt->ast->tag == Declare && Match(stmt->ast, Declare)->value->tag == Use))) { + visit(fn.userdata, stmt->ast); + } + } +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/src/ast.h b/src/ast.h new file mode 100644 index 00000000..c9db9061 --- /dev/null +++ b/src/ast.h @@ -0,0 +1,353 @@ +#pragma once + +// Logic defining ASTs (abstract syntax trees) to represent code + +#include <err.h> +#include <gc/cord.h> +#include <printf.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> + +#include "stdlib/datatypes.h" +#include "stdlib/files.h" +#include "stdlib/util.h" + +#define NewAST(_file, _start, _end, ast_tag, ...) (new(ast_t, .file=_file, .start=_start, .end=_end,\ + .tag=ast_tag, .__data.ast_tag={__VA_ARGS__})) +#define NewTypeAST(_file, _start, _end, ast_tag, ...) (new(type_ast_t, .file=_file, .start=_start, .end=_end,\ + .tag=ast_tag, .__data.ast_tag={__VA_ARGS__})) +#define FakeAST(ast_tag, ...) (new(ast_t, .tag=ast_tag, .__data.ast_tag={__VA_ARGS__})) +#define WrapAST(ast, ast_tag, ...) (new(ast_t, .file=(ast)->file, .start=(ast)->start, .end=(ast)->end, .tag=ast_tag, .__data.ast_tag={__VA_ARGS__})) +#define TextAST(ast, _str) WrapAST(ast, TextLiteral, .str=GC_strdup(_str)) +#define Match(x, _tag) ((x)->tag == _tag ? &(x)->__data._tag : (errx(1, __FILE__ ":%d This was supposed to be a " # _tag "\n", __LINE__), &(x)->__data._tag)) + +#define REVERSE_LIST(list) do { \ + __typeof(list) _prev = NULL; \ + __typeof(list) _next = NULL; \ + auto _current = list; \ + while (_current != NULL) { \ + _next = _current->next; \ + _current->next = _prev; \ + _prev = _current; \ + _current = _next; \ + } \ + list = _prev; \ +} while(0) + +struct binding_s; +typedef struct type_ast_s type_ast_t; +typedef struct ast_s ast_t; + +typedef struct ast_list_s { + ast_t *ast; + struct ast_list_s *next; +} ast_list_t; + +typedef struct arg_ast_s { + const char *name; + type_ast_t *type; + ast_t *value; + struct arg_ast_s *next; +} arg_ast_t; + +typedef struct when_clause_s { + ast_t *pattern, *body; + struct when_clause_s *next; +} when_clause_t; + +typedef enum { + BINOP_UNKNOWN, + BINOP_POWER=100, BINOP_MULT, BINOP_DIVIDE, BINOP_MOD, BINOP_MOD1, BINOP_PLUS, + BINOP_MINUS, BINOP_CONCAT, BINOP_LSHIFT, BINOP_ULSHIFT, BINOP_RSHIFT, BINOP_URSHIFT, BINOP_MIN, + BINOP_MAX, BINOP_EQ, BINOP_NE, BINOP_LT, BINOP_LE, BINOP_GT, BINOP_GE, + BINOP_CMP, + BINOP_AND, BINOP_OR, BINOP_XOR, +} binop_e; + +extern const char *binop_method_names[BINOP_XOR+1]; + +typedef enum { + UnknownTypeAST, + VarTypeAST, + PointerTypeAST, + ArrayTypeAST, + SetTypeAST, + TableTypeAST, + FunctionTypeAST, + OptionalTypeAST, + MutexedTypeAST, +} type_ast_e; + +typedef struct tag_ast_s { + const char *name; + arg_ast_t *fields; + bool secret:1; + struct tag_ast_s *next; +} tag_ast_t; + +struct type_ast_s { + type_ast_e tag; + file_t *file; + const char *start, *end; + union { + struct {} UnknownTypeAST; + struct { + const char *name; + } VarTypeAST; + struct { + type_ast_t *pointed; + bool is_stack:1; + } PointerTypeAST; + struct { + type_ast_t *item; + } ArrayTypeAST; + struct { + type_ast_t *key, *value; + ast_t *default_value; + } TableTypeAST; + struct { + type_ast_t *item; + } SetTypeAST; + struct { + arg_ast_t *args; + type_ast_t *ret; + } FunctionTypeAST; + struct { + type_ast_t *type; + } OptionalTypeAST, MutexedTypeAST; + } __data; +}; + +typedef enum { + Unknown = 0, + None, Bool, Var, + Int, Num, + TextLiteral, TextJoin, PrintStatement, + Path, + Declare, Assign, + BinaryOp, UpdateAssign, + Not, Negative, HeapAllocate, StackReference, Mutexed, Holding, + Min, Max, + Array, Set, Table, TableEntry, Comprehension, + FunctionDef, Lambda, ConvertDef, + FunctionCall, MethodCall, + Block, + For, While, If, When, Repeat, + Reduction, + Skip, Stop, Pass, + Defer, + Return, + Extern, + StructDef, EnumDef, LangDef, + Index, FieldAccess, Optional, NonOptional, + Moment, + DocTest, + Use, + InlineCCode, + Deserialize, +} ast_e; + +struct ast_s { + ast_e tag; + file_t *file; + const char *start, *end; + union { + struct {} Unknown; + struct { + type_ast_t *type; + } None; + struct { + bool b; + } Bool; + struct { + const char *name; + } Var; + struct { + const char *str; + } Int; + struct { + double n; + } Num; + struct { + CORD cord; + } TextLiteral; + struct { + const char *lang; + ast_list_t *children; + } TextJoin; + struct { + const char *path; + } Path; + struct { + ast_list_t *to_print; + } PrintStatement; + struct { + ast_t *var; + ast_t *value; + } Declare; + struct { + ast_list_t *targets, *values; + } Assign; + struct { + ast_t *lhs; + binop_e op; + ast_t *rhs; + } BinaryOp, UpdateAssign; + struct { + ast_t *value; + } Not, Negative, HeapAllocate, StackReference, Mutexed; + struct { + ast_t *mutexed, *body; + } Holding; + struct { + ast_t *lhs, *rhs, *key; + } Min, Max; + struct { + type_ast_t *item_type; + ast_list_t *items; + } Array; + struct { + type_ast_t *item_type; + ast_list_t *items; + } Set; + struct { + type_ast_t *key_type, *value_type; + ast_t *default_value; + ast_t *fallback; + ast_list_t *entries; + } Table; + struct { + ast_t *key, *value; + } TableEntry; + struct { + ast_list_t *vars; + ast_t *expr, *iter, *filter; + } Comprehension; + struct { + ast_t *name; + arg_ast_t *args; + type_ast_t *ret_type; + ast_t *body; + ast_t *cache; + bool is_inline; + } FunctionDef; + struct { + arg_ast_t *args; + type_ast_t *ret_type; + ast_t *body; + ast_t *cache; + bool is_inline; + } ConvertDef; + struct { + arg_ast_t *args; + type_ast_t *ret_type; + ast_t *body; + int64_t id; + } Lambda; + struct { + ast_t *fn; + arg_ast_t *args; + } FunctionCall; + struct { + const char *name; + ast_t *self; + arg_ast_t *args; + } MethodCall; + struct { + ast_list_t *statements; + } Block; + struct { + ast_list_t *vars; + ast_t *iter, *body, *empty; + } For; + struct { + ast_t *condition, *body; + } While; + struct { + ast_t *body; + } Repeat; + struct { + ast_t *condition, *body, *else_body; + } If; + struct { + ast_t *subject; + when_clause_t *clauses; + ast_t *else_body; + } When; + struct { + ast_t *iter, *key; + binop_e op; + } Reduction; + struct { + const char *target; + } Skip, Stop; + struct {} Pass; + struct { + ast_t *body; + } Defer; + struct { + ast_t *value; + } Return; + struct { + const char *name; + type_ast_t *type; + } Extern; + struct { + const char *name; + arg_ast_t *fields; + ast_t *namespace; + bool secret:1, external:1, opaque:1; + } StructDef; + struct { + const char *name; + tag_ast_t *tags; + ast_t *namespace; + } EnumDef; + struct { + const char *name; + ast_t *namespace; + } LangDef; + struct { + ast_t *indexed, *index; + bool unchecked; + } Index; + struct { + ast_t *fielded; + const char *field; + } FieldAccess; + struct { + ast_t *value; + } Optional, NonOptional; + struct { + Moment_t moment; + } Moment; + struct { + ast_t *expr; + const char *output; + bool skip_source:1; + } DocTest; + struct { + ast_t *var; + const char *path; + enum { USE_LOCAL, USE_MODULE, USE_SHARED_OBJECT, USE_HEADER, USE_C_CODE, USE_ASM } what; + } Use; + struct { + CORD code; + struct type_s *type; + type_ast_t *type_ast; + } InlineCCode; + struct { + ast_t *value; + type_ast_t *type; + } Deserialize; + } __data; +}; + +CORD ast_to_xml(ast_t *ast); +CORD type_ast_to_xml(type_ast_t *ast); +int printf_ast(FILE *stream, const struct printf_info *info, const void *const args[]); +PUREFUNC bool is_idempotent(ast_t *ast); +void visit_topologically(ast_list_t *ast, Closure_t fn); + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/src/compile.c b/src/compile.c new file mode 100644 index 00000000..73d5ebfe --- /dev/null +++ b/src/compile.c @@ -0,0 +1,4597 @@ +// Compilation logic +#include <ctype.h> +#include <glob.h> +#include <gc.h> +#include <gc/cord.h> +#include <gmp.h> +#include <stdio.h> +#include <uninorm.h> + +#include "ast.h" +#include "compile.h" +#include "cordhelpers.h" +#include "enums.h" +#include "environment.h" +#include "stdlib/integers.h" +#include "stdlib/nums.h" +#include "stdlib/paths.h" +#include "stdlib/patterns.h" +#include "stdlib/text.h" +#include "stdlib/util.h" +#include "structs.h" +#include "typecheck.h" + +typedef ast_t* (*comprehension_body_t)(ast_t*, ast_t*); + +static CORD compile_to_pointer_depth(env_t *env, ast_t *ast, int64_t target_depth, bool needs_incref); +static CORD compile_math_method(env_t *env, binop_e op, ast_t *lhs, ast_t *rhs, type_t *required_type); +static CORD compile_string(env_t *env, ast_t *ast, CORD color); +static CORD compile_arguments(env_t *env, ast_t *call_ast, arg_t *spec_args, arg_ast_t *call_args); +static CORD compile_maybe_incref(env_t *env, ast_t *ast, type_t *t); +static CORD compile_int_to_type(env_t *env, ast_t *ast, type_t *target); +static CORD compile_unsigned_type(type_t *t); +static CORD promote_to_optional(type_t *t, CORD code); +static CORD compile_none(type_t *t); +static CORD compile_to_type(env_t *env, ast_t *ast, type_t *t); +static CORD check_none(type_t *t, CORD value); +static CORD optional_into_nonnone(type_t *t, CORD value); +static CORD compile_string_literal(CORD literal); + +CORD promote_to_optional(type_t *t, CORD code) +{ + if (t == THREAD_TYPE || t == PATH_TYPE || t == PATH_TYPE_TYPE || t->tag == MomentType) { + return code; + } else if (t->tag == IntType) { + switch (Match(t, IntType)->bits) { + case TYPE_IBITS8: return CORD_all("((OptionalInt8_t){", code, "})"); + case TYPE_IBITS16: return CORD_all("((OptionalInt16_t){", code, "})"); + case TYPE_IBITS32: return CORD_all("((OptionalInt32_t){", code, "})"); + case TYPE_IBITS64: return CORD_all("((OptionalInt64_t){", code, "})"); + default: errx(1, "Unsupported in type: %T", t); + } + } else if (t->tag == ByteType) { + return CORD_all("((OptionalByte_t){", code, "})"); + } else if (t->tag == StructType) { + return CORD_all("({ ", compile_type(Type(OptionalType, .type=t)), " nonnull = {.value=", code, "}; nonnull.is_none = false; nonnull; })"); + } else { + return code; + } +} + +static CORD with_source_info(ast_t *ast, CORD code) +{ + if (code == CORD_EMPTY || !ast || !ast->file) + return code; + int64_t line = get_line_number(ast->file, ast->start); + return CORD_asprintf("\n#line %ld\n%r", line, code); +} + +static bool promote(env_t *env, ast_t *ast, CORD *code, type_t *actual, type_t *needed) +{ + if (type_eq(actual, needed)) + return true; + + if (!can_promote(actual, needed)) + return false; + + if (needed->tag == ClosureType && actual->tag == FunctionType) { + *code = CORD_all("((Closure_t){", *code, ", NULL})"); + return true; + } + + // Optional promotion: + if (needed->tag == OptionalType && type_eq(actual, Match(needed, OptionalType)->type)) { + *code = promote_to_optional(actual, *code); + return true; + } + + // Optional -> Bool promotion + if (actual->tag == OptionalType && needed->tag == BoolType) { + *code = CORD_all("(!", check_none(actual, *code), ")"); + return true; + } + + // Lang to Text: + if (actual->tag == TextType && needed->tag == TextType && streq(Match(needed, TextType)->lang, "Text")) + return true; + + // Automatic optional checking for nums: + if (needed->tag == NumType && actual->tag == OptionalType && Match(actual, OptionalType)->type->tag == NumType) { + *code = CORD_all("({ ", compile_declaration(actual, "opt"), " = ", *code, "; ", + "if unlikely (", check_none(actual, "opt"), ")\n", + CORD_asprintf("fail_source(%r, %ld, %ld, \"This was expected to be a value, but it's none\");\n", + CORD_quoted(ast->file->filename), + (long)(ast->start - ast->file->text), + (long)(ast->end - ast->file->text)), + optional_into_nonnone(actual, "opt"), "; })"); + return true; + } + + // Numeric promotions/demotions + if ((is_numeric_type(actual) || actual->tag == BoolType) && (is_numeric_type(needed) || needed->tag == BoolType)) { + arg_ast_t *args = new(arg_ast_t, .value=FakeAST(InlineCCode, .code=*code, .type=actual)); + binding_t *constructor = get_constructor(env, needed, args); + if (constructor) { + auto fn = Match(constructor->type, FunctionType); + if (fn->args->next == NULL) { + *code = CORD_all(constructor->code, "(", compile_arguments(env, ast, fn->args, args), ")"); + return true; + } + } + } + + if (needed->tag == EnumType) { + const char *tag = enum_single_value_tag(needed, actual); + binding_t *b = get_binding(Match(needed, EnumType)->env, tag); + assert(b && b->type->tag == FunctionType); + // Single-value enum constructor: + if (!promote(env, ast, code, actual, Match(b->type, FunctionType)->args->type)) + return false; + *code = CORD_all(b->code, "(", *code, ")"); + return true; + } + + // Text to C String + if (actual->tag == TextType && type_eq(actual, TEXT_TYPE) && needed->tag == CStringType) { + *code = CORD_all("Text$as_c_string(", *code, ")"); + return true; + } + + // Automatic dereferencing: + if (actual->tag == PointerType + && can_promote(Match(actual, PointerType)->pointed, needed)) { + *code = CORD_all("*(", *code, ")"); + return promote(env, ast, code, Match(actual, PointerType)->pointed, needed); + } + + // Stack ref promotion: + if (actual->tag == PointerType && needed->tag == PointerType) + return true; + + // Cross-promotion between tables with default values and without + if (needed->tag == TableType && actual->tag == TableType) + return true; + + if (needed->tag == ClosureType && actual->tag == ClosureType) + return true; + + if (needed->tag == FunctionType && actual->tag == FunctionType) { + *code = CORD_all("(", compile_type(needed), ")", *code); + return true; + } + + // Set -> Array promotion: + if (needed->tag == ArrayType && actual->tag == SetType + && type_eq(Match(needed, ArrayType)->item_type, Match(actual, SetType)->item_type)) { + *code = CORD_all("(", *code, ").entries"); + return true; + } + + return false; +} + +CORD compile_maybe_incref(env_t *env, ast_t *ast, type_t *t) +{ + if (is_idempotent(ast) && can_be_mutated(env, ast)) { + if (t->tag == ArrayType) + return CORD_all("ARRAY_COPY(", compile_to_type(env, ast, t), ")"); + else if (t->tag == TableType || t->tag == SetType) + return CORD_all("TABLE_COPY(", compile_to_type(env, ast, t), ")"); + } + return compile_to_type(env, ast, t); +} + +static void add_closed_vars(Table_t *closed_vars, env_t *enclosing_scope, env_t *env, ast_t *ast) +{ + if (ast == NULL) + return; + + switch (ast->tag) { + case Var: { + binding_t *b = get_binding(enclosing_scope, Match(ast, Var)->name); + if (b) { + binding_t *shadow = get_binding(env, Match(ast, Var)->name); + if (!shadow || shadow == b) + Table$str_set(closed_vars, Match(ast, Var)->name, b); + } + break; + } + case TextJoin: { + for (ast_list_t *child = Match(ast, TextJoin)->children; child; child = child->next) + add_closed_vars(closed_vars, enclosing_scope, env, child->ast); + break; + } + case PrintStatement: { + for (ast_list_t *child = Match(ast, PrintStatement)->to_print; child; child = child->next) + add_closed_vars(closed_vars, enclosing_scope, env, child->ast); + break; + } + case Declare: { + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, Declare)->value); + bind_statement(env, ast); + break; + } + case Assign: { + for (ast_list_t *target = Match(ast, Assign)->targets; target; target = target->next) + add_closed_vars(closed_vars, enclosing_scope, env, target->ast); + for (ast_list_t *value = Match(ast, Assign)->values; value; value = value->next) + add_closed_vars(closed_vars, enclosing_scope, env, value->ast); + break; + } + case BinaryOp: { + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, BinaryOp)->lhs); + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, BinaryOp)->rhs); + break; + } + case UpdateAssign: { + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, UpdateAssign)->lhs); + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, UpdateAssign)->rhs); + break; + } + case Not: case Negative: case HeapAllocate: case StackReference: case Mutexed: { + // UNSAFE: + ast_t *value = ast->__data.Not.value; + // END UNSAFE + add_closed_vars(closed_vars, enclosing_scope, env, value); + break; + } + case Holding: { + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, Holding)->mutexed); + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, Holding)->body); + break; + } + case Min: { + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, Min)->lhs); + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, Min)->rhs); + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, Min)->key); + break; + } + case Max: { + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, Max)->lhs); + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, Max)->rhs); + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, Max)->key); + break; + } + case Array: { + for (ast_list_t *item = Match(ast, Array)->items; item; item = item->next) + add_closed_vars(closed_vars, enclosing_scope, env, item->ast); + break; + } + case Set: { + for (ast_list_t *item = Match(ast, Set)->items; item; item = item->next) + add_closed_vars(closed_vars, enclosing_scope, env, item->ast); + break; + } + case Table: { + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, Table)->default_value); + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, Table)->fallback); + for (ast_list_t *entry = Match(ast, Table)->entries; entry; entry = entry->next) + add_closed_vars(closed_vars, enclosing_scope, env, entry->ast); + break; + } + case TableEntry: { + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, TableEntry)->key); + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, TableEntry)->value); + break; + } + case Comprehension: { + auto comp = Match(ast, Comprehension); + if (comp->expr->tag == Comprehension) { // Nested comprehension + ast_t *body = comp->filter ? WrapAST(ast, If, .condition=comp->filter, .body=comp->expr) : comp->expr; + ast_t *loop = WrapAST(ast, For, .vars=comp->vars, .iter=comp->iter, .body=body); + return add_closed_vars(closed_vars, enclosing_scope, env, loop); + } + + // Array/Set/Table comprehension: + ast_t *body = comp->expr; + if (comp->filter) + body = WrapAST(comp->expr, If, .condition=comp->filter, .body=body); + ast_t *loop = WrapAST(ast, For, .vars=comp->vars, .iter=comp->iter, .body=body); + add_closed_vars(closed_vars, enclosing_scope, env, loop); + break; + } + case Lambda: { + auto lambda = Match(ast, Lambda); + env_t *lambda_scope = fresh_scope(env); + for (arg_ast_t *arg = lambda->args; arg; arg = arg->next) + set_binding(lambda_scope, arg->name, get_arg_ast_type(env, arg), CORD_all("_$", arg->name)); + add_closed_vars(closed_vars, enclosing_scope, lambda_scope, lambda->body); + break; + } + case FunctionCall: { + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, FunctionCall)->fn); + for (arg_ast_t *arg = Match(ast, FunctionCall)->args; arg; arg = arg->next) + add_closed_vars(closed_vars, enclosing_scope, env, arg->value); + break; + } + case MethodCall: { + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, MethodCall)->self); + for (arg_ast_t *arg = Match(ast, MethodCall)->args; arg; arg = arg->next) + add_closed_vars(closed_vars, enclosing_scope, env, arg->value); + break; + } + case Block: { + env = fresh_scope(env); + for (ast_list_t *statement = Match(ast, Block)->statements; statement; statement = statement->next) + add_closed_vars(closed_vars, enclosing_scope, env, statement->ast); + break; + } + case For: { + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, For)->iter); + env_t *body_scope = for_scope(env, ast); + add_closed_vars(closed_vars, enclosing_scope, body_scope, Match(ast, For)->body); + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, For)->empty); + break; + } + case While: { + auto while_ = Match(ast, While); + add_closed_vars(closed_vars, enclosing_scope, env, while_->condition); + env_t *scope = fresh_scope(env); + add_closed_vars(closed_vars, enclosing_scope, scope, while_->body); + break; + } + case If: { + auto if_ = Match(ast, If); + ast_t *condition = if_->condition; + if (condition->tag == Declare) { + env_t *truthy_scope = fresh_scope(env); + bind_statement(truthy_scope, condition); + add_closed_vars(closed_vars, enclosing_scope, env, Match(condition, Declare)->value); + ast_t *var = Match(condition, Declare)->var; + type_t *cond_t = get_type(truthy_scope, var); + if (cond_t->tag == OptionalType) { + set_binding(truthy_scope, Match(var, Var)->name, + Match(cond_t, OptionalType)->type, CORD_EMPTY); + } + add_closed_vars(closed_vars, enclosing_scope, truthy_scope, if_->body); + add_closed_vars(closed_vars, enclosing_scope, env, if_->else_body); + } else { + add_closed_vars(closed_vars, enclosing_scope, env, condition); + env_t *truthy_scope = env; + type_t *cond_t = get_type(env, condition); + if (condition->tag == Var && cond_t->tag == OptionalType) { + truthy_scope = fresh_scope(env); + set_binding(truthy_scope, Match(condition, Var)->name, + Match(cond_t, OptionalType)->type, CORD_EMPTY); + } + add_closed_vars(closed_vars, enclosing_scope, truthy_scope, if_->body); + add_closed_vars(closed_vars, enclosing_scope, env, if_->else_body); + } + break; + } + case When: { + auto when = Match(ast, When); + add_closed_vars(closed_vars, enclosing_scope, env, when->subject); + type_t *subject_t = get_type(env, when->subject); + + if (subject_t->tag != EnumType) { + for (when_clause_t *clause = when->clauses; clause; clause = clause->next) { + add_closed_vars(closed_vars, enclosing_scope, env, clause->pattern); + add_closed_vars(closed_vars, enclosing_scope, env, clause->body); + } + + if (when->else_body) + add_closed_vars(closed_vars, enclosing_scope, env, when->else_body); + return; + } + + auto enum_t = Match(subject_t, EnumType); + for (when_clause_t *clause = when->clauses; clause; clause = clause->next) { + const char *clause_tag_name; + if (clause->pattern->tag == Var) + clause_tag_name = Match(clause->pattern, Var)->name; + else if (clause->pattern->tag == FunctionCall && Match(clause->pattern, FunctionCall)->fn->tag == Var) + clause_tag_name = Match(Match(clause->pattern, FunctionCall)->fn, Var)->name; + else + code_err(clause->pattern, "This is not a valid pattern for a %T enum", subject_t); + + type_t *tag_type = NULL; + for (tag_t *tag = enum_t->tags; tag; tag = tag->next) { + if (streq(tag->name, clause_tag_name)) { + tag_type = tag->type; + break; + } + } + assert(tag_type); + env_t *scope = when_clause_scope(env, subject_t, clause); + add_closed_vars(closed_vars, enclosing_scope, scope, clause->body); + } + if (when->else_body) + add_closed_vars(closed_vars, enclosing_scope, env, when->else_body); + break; + } + case Repeat: { + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, Repeat)->body); + break; + } + case Reduction: { + auto reduction = Match(ast, Reduction); + static int64_t next_id = 1; + ast_t *item = FakeAST(Var, heap_strf("$it%ld", next_id++)); + ast_t *loop = FakeAST(For, .vars=new(ast_list_t, .ast=item), .iter=reduction->iter, .body=FakeAST(Pass)); + env_t *scope = for_scope(env, loop); + add_closed_vars(closed_vars, enclosing_scope, scope, reduction->key ? reduction->key : item); + break; + } + case Defer: { + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, Defer)->body); + break; + } + case Return: { + ast_t *ret = Match(ast, Return)->value; + if (ret) add_closed_vars(closed_vars, enclosing_scope, env, ret); + break; + } + case Index: { + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, Index)->indexed); + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, Index)->index); + break; + } + case FieldAccess: { + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, FieldAccess)->fielded); + break; + } + case Optional: { + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, Optional)->value); + break; + } + case NonOptional: { + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, NonOptional)->value); + break; + } + case DocTest: { + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, DocTest)->expr); + break; + } + case Deserialize: { + add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, Deserialize)->value); + break; + } + case Use: case FunctionDef: case ConvertDef: case StructDef: case EnumDef: case LangDef: { + errx(1, "Definitions should not be reachable in a closure."); + } + default: + break; + } +} + +static Table_t get_closed_vars(env_t *env, arg_ast_t *args, ast_t *block) +{ + env_t *body_scope = fresh_scope(env); + for (arg_ast_t *arg = args; arg; arg = arg->next) { + type_t *arg_type = get_arg_ast_type(env, arg); + set_binding(body_scope, arg->name, arg_type, CORD_cat("_$", arg->name)); + } + + Table_t closed_vars = {}; + add_closed_vars(&closed_vars, env, body_scope, block); + return closed_vars; +} + +CORD compile_declaration(type_t *t, CORD name) +{ + if (t->tag == FunctionType) { + auto fn = Match(t, FunctionType); + CORD code = CORD_all(compile_type(fn->ret), " (*", name, ")("); + for (arg_t *arg = fn->args; arg; arg = arg->next) { + code = CORD_all(code, compile_type(arg->type)); + if (arg->next) code = CORD_cat(code, ", "); + } + if (!fn->args) code = CORD_all(code, "void"); + return CORD_all(code, ")"); + } else if (t->tag != ModuleType) { + return CORD_all(compile_type(t), " ", name); + } else { + return CORD_EMPTY; + } +} + +PUREFUNC CORD compile_unsigned_type(type_t *t) +{ + if (t->tag != IntType) + errx(1, "Not an int type, so unsigned doesn't make sense!"); + switch (Match(t, IntType)->bits) { + case TYPE_IBITS8: return "uint8_t"; + case TYPE_IBITS16: return "uint16_t"; + case TYPE_IBITS32: return "uint32_t"; + case TYPE_IBITS64: return "uint64_t"; + default: errx(1, "Invalid integer bit size"); + } +} + +CORD compile_type(type_t *t) +{ + if (t == THREAD_TYPE) return "Thread_t"; + else if (t == RNG_TYPE) return "RNG_t"; + else if (t == MATCH_TYPE) return "Match_t"; + else if (t == PATH_TYPE) return "Path_t"; + else if (t == PATH_TYPE_TYPE) return "PathType_t"; + + switch (t->tag) { + case ReturnType: errx(1, "Shouldn't be compiling ReturnType to a type"); + case AbortType: return "void"; + case VoidType: return "void"; + case MemoryType: return "void"; + case MutexedType: return "MutexedData_t"; + case BoolType: return "Bool_t"; + case ByteType: return "Byte_t"; + case CStringType: return "const char*"; + case MomentType: return "Moment_t"; + case BigIntType: return "Int_t"; + case IntType: return CORD_asprintf("Int%ld_t", Match(t, IntType)->bits); + case NumType: return Match(t, NumType)->bits == TYPE_NBITS64 ? "Num_t" : CORD_asprintf("Num%ld_t", Match(t, NumType)->bits); + case TextType: { + auto text = Match(t, TextType); + if (!text->lang || streq(text->lang, "Text")) + return "Text_t"; + else if (streq(text->lang, "Pattern")) + return "Pattern_t"; + else + return CORD_all(namespace_prefix(text->env, text->env->namespace->parent), text->lang, "$$type"); + } + case ArrayType: return "Array_t"; + case SetType: return "Table_t"; + case TableType: return "Table_t"; + case FunctionType: { + auto fn = Match(t, FunctionType); + CORD code = CORD_all(compile_type(fn->ret), " (*)("); + for (arg_t *arg = fn->args; arg; arg = arg->next) { + code = CORD_all(code, compile_type(arg->type)); + if (arg->next) code = CORD_cat(code, ", "); + } + if (!fn->args) + code = CORD_all(code, "void"); + return CORD_all(code, ")"); + } + case ClosureType: return "Closure_t"; + case PointerType: return CORD_cat(compile_type(Match(t, PointerType)->pointed), "*"); + case StructType: { + auto s = Match(t, StructType); + if (s->external) return s->name; + return CORD_all("struct ", namespace_prefix(s->env, s->env->namespace->parent), s->name, "$$struct"); + } + case EnumType: { + auto e = Match(t, EnumType); + return CORD_all(namespace_prefix(e->env, e->env->namespace->parent), e->name, "$$type"); + } + case OptionalType: { + type_t *nonnull = Match(t, OptionalType)->type; + switch (nonnull->tag) { + case CStringType: case FunctionType: case ClosureType: case MutexedType: + case PointerType: case EnumType: + return compile_type(nonnull); + case TextType: + return Match(nonnull, TextType)->lang ? compile_type(nonnull) : "OptionalText_t"; + case IntType: case BigIntType: case NumType: case BoolType: case ByteType: + case ArrayType: case TableType: case SetType: case MomentType: + return CORD_all("Optional", compile_type(nonnull)); + case StructType: { + if (nonnull == THREAD_TYPE) + return "Thread_t"; + if (nonnull == MATCH_TYPE) + return "OptionalMatch_t"; + if (nonnull == PATH_TYPE) + return "OptionalPath_t"; + if (nonnull == PATH_TYPE_TYPE) + return "OptionalPathType_t"; + auto s = Match(nonnull, StructType); + return CORD_all(namespace_prefix(s->env, s->env->namespace->parent), "$Optional", s->name, "$$type"); + } + default: + compiler_err(NULL, NULL, NULL, "Optional types are not supported for: %T", t); + } + } + case TypeInfoType: return "TypeInfo_t"; + default: compiler_err(NULL, NULL, NULL, "Compiling type is not implemented for type with tag %d", t->tag); + } +} + +static CORD compile_lvalue(env_t *env, ast_t *ast) +{ + if (!can_be_mutated(env, ast)) { + if (ast->tag == Index) { + ast_t *subject = Match(ast, Index)->indexed; + code_err(subject, "This is an immutable value, you can't mutate its contents"); + } else if (ast->tag == FieldAccess) { + ast_t *subject = Match(ast, FieldAccess)->fielded; + type_t *t = get_type(env, subject); + code_err(subject, "This is an immutable %T value, you can't assign to its fields", t); + } else { + code_err(ast, "This is a value of type %T and can't be used as an assignment target", get_type(env, ast)); + } + } + + if (ast->tag == Index) { + auto index = Match(ast, Index); + type_t *container_t = get_type(env, index->indexed); + if (container_t->tag == OptionalType) + code_err(index->indexed, "This value might be null, so it can't be safely used as an assignment target"); + + if (!index->index && container_t->tag == PointerType) + return compile(env, ast); + + container_t = value_type(container_t); + if (container_t->tag == ArrayType) { + CORD target_code = compile_to_pointer_depth(env, index->indexed, 1, false); + type_t *item_type = Match(container_t, ArrayType)->item_type; + if (index->unchecked) { + return CORD_all("Array_lvalue_unchecked(", compile_type(item_type), ", ", target_code, ", ", + compile_int_to_type(env, index->index, Type(IntType, .bits=TYPE_IBITS64)), + ", sizeof(", compile_type(item_type), "))"); + } else { + return CORD_all("Array_lvalue(", compile_type(item_type), ", ", target_code, ", ", + compile_int_to_type(env, index->index, Type(IntType, .bits=TYPE_IBITS64)), + ", ", heap_strf("%ld", ast->start - ast->file->text), + ", ", heap_strf("%ld", ast->end - ast->file->text), ")"); + } + } else if (container_t->tag == TableType) { + auto table_type = Match(container_t, TableType); + if (table_type->default_value) { + type_t *value_type = get_type(env, table_type->default_value); + return CORD_all("*Table$get_or_setdefault(", + compile_to_pointer_depth(env, index->indexed, 1, false), ", ", + compile_type(table_type->key_type), ", ", + compile_type(value_type), ", ", + compile_to_type(env, index->index, table_type->key_type), ", ", + compile(env, table_type->default_value), ", ", + compile_type_info(container_t), ")"); + } + if (index->unchecked) + code_err(ast, "Table indexes cannot be unchecked"); + return CORD_all("*(", compile_type(Type(PointerType, table_type->value_type)), ")Table$reserve(", + compile_to_pointer_depth(env, index->indexed, 1, false), ", ", + compile_to_type(env, index->index, Type(PointerType, table_type->key_type, .is_stack=true)), ", NULL,", + compile_type_info(container_t), ")"); + } else { + code_err(ast, "I don't know how to assign to this target"); + } + } else if (ast->tag == Var || ast->tag == FieldAccess || ast->tag == InlineCCode) { + return compile(env, ast); + } else { + code_err(ast, "I don't know how to assign to this"); + } +} + +static CORD compile_assignment(env_t *env, ast_t *target, CORD value) +{ + return CORD_all(compile_lvalue(env, target), " = ", value); +} + +static CORD compile_inline_block(env_t *env, ast_t *ast) +{ + if (ast->tag != Block) + return compile_statement(env, ast); + + CORD code = CORD_EMPTY; + ast_list_t *stmts = Match(ast, Block)->statements; + deferral_t *prev_deferred = env->deferred; + env = fresh_scope(env); + for (ast_list_t *stmt = stmts; stmt; stmt = stmt->next) + prebind_statement(env, stmt->ast); + for (ast_list_t *stmt = stmts; stmt; stmt = stmt->next) { + code = CORD_all(code, compile_statement(env, stmt->ast), "\n"); + bind_statement(env, stmt->ast); + } + for (deferral_t *deferred = env->deferred; deferred && deferred != prev_deferred; deferred = deferred->next) { + code = CORD_all(code, compile_statement(deferred->defer_env, deferred->block)); + } + return code; +} + +CORD optional_into_nonnone(type_t *t, CORD value) +{ + if (t->tag == OptionalType) t = Match(t, OptionalType)->type; + switch (t->tag) { + case IntType: + return CORD_all(value, ".i"); + case StructType: + if (t == THREAD_TYPE || t == MATCH_TYPE || t == PATH_TYPE || t == PATH_TYPE_TYPE) + return value; + return CORD_all(value, ".value"); + default: + return value; + } +} + +CORD check_none(type_t *t, CORD value) +{ + t = Match(t, OptionalType)->type; + if (t->tag == PointerType || t->tag == FunctionType || t->tag == CStringType + || t == THREAD_TYPE) + return CORD_all("(", value, " == NULL)"); + else if (t == MATCH_TYPE) + return CORD_all("((", value, ").index.small == 0)"); + else if (t == PATH_TYPE) + return CORD_all("((", value, ").type.$tag == PATH_NONE)"); + else if (t == PATH_TYPE_TYPE) + return CORD_all("((", value, ").$tag == PATH_NONE)"); + else if (t->tag == BigIntType) + return CORD_all("((", value, ").small == 0)"); + else if (t->tag == ClosureType) + return CORD_all("((", value, ").fn == NULL)"); + else if (t->tag == NumType) + return CORD_all("isnan(", value, ")"); + else if (t->tag == ArrayType) + return CORD_all("((", value, ").length < 0)"); + else if (t->tag == TableType || t->tag == SetType) + return CORD_all("((", value, ").entries.length < 0)"); + else if (t->tag == BoolType) + return CORD_all("((", value, ") == NONE_BOOL)"); + else if (t->tag == TextType) + return CORD_all("((", value, ").length < 0)"); + else if (t->tag == IntType || t->tag == ByteType || t->tag == StructType) + return CORD_all("(", value, ").is_none"); + else if (t->tag == EnumType) + return CORD_all("((", value, ").$tag == 0)"); + else if (t->tag == MomentType) + return CORD_all("((", value, ").tv_usec < 0)"); + else if (t->tag == MutexedType) + return CORD_all("(", value, " == NULL)"); + errx(1, "Optional check not implemented for: %T", t); +} + +static CORD compile_condition(env_t *env, ast_t *ast) +{ + type_t *t = get_type(env, ast); + if (t->tag == BoolType) { + return compile(env, ast); + } else if (t->tag == TextType) { + return CORD_all("(", compile(env, ast), ").length"); + } else if (t->tag == ArrayType) { + return CORD_all("(", compile(env, ast), ").length"); + } else if (t->tag == TableType || t->tag == SetType) { + return CORD_all("(", compile(env, ast), ").entries.length"); + } else if (t->tag == OptionalType) { + return CORD_all("!", check_none(t, compile(env, ast))); + } else if (t->tag == PointerType) { + code_err(ast, "This pointer will always be non-none, so it should not be used in a conditional."); + } else { + code_err(ast, "%T values cannot be used for conditionals", t); + } +} + +static CORD _compile_statement(env_t *env, ast_t *ast) +{ + switch (ast->tag) { + case When: { + // Typecheck to verify exhaustiveness: + type_t *result_t = get_type(env, ast); + (void)result_t; + + auto when = Match(ast, When); + type_t *subject_t = get_type(env, when->subject); + + if (subject_t->tag != EnumType) { + CORD prefix = CORD_EMPTY, suffix = CORD_EMPTY; + ast_t *subject = when->subject; + if (!is_idempotent(when->subject)) { + prefix = CORD_all("{\n", compile_declaration(subject_t, "_when_subject"), " = ", compile(env, subject), ";\n"); + suffix = "}\n"; + subject = WrapAST(subject, InlineCCode, .type=subject_t, .code="_when_subject"); + } + + CORD code = CORD_EMPTY; + for (when_clause_t *clause = when->clauses; clause; clause = clause->next) { + ast_t *comparison = WrapAST(clause->pattern, BinaryOp, .lhs=subject, .op=BINOP_EQ, .rhs=clause->pattern); + (void)get_type(env, comparison); + if (code != CORD_EMPTY) + code = CORD_all(code, "else "); + code = CORD_all(code, "if (", compile(env, comparison), ")", compile_statement(env, clause->body)); + } + if (when->else_body) + code = CORD_all(code, "else ", compile_statement(env, when->else_body)); + code = CORD_all(prefix, code, suffix); + return code; + } + + auto enum_t = Match(subject_t, EnumType); + CORD code = CORD_all("WHEN(", compile(env, when->subject), ", _when_subject, {\n"); + for (when_clause_t *clause = when->clauses; clause; clause = clause->next) { + if (clause->pattern->tag == Var) { + const char *clause_tag_name = Match(clause->pattern, Var)->name; + code = CORD_all(code, "case ", namespace_prefix(enum_t->env, enum_t->env->namespace), "tag$", clause_tag_name, ": {\n", + compile_statement(env, clause->body), + "}\n"); + continue; + } + + if (clause->pattern->tag != FunctionCall || Match(clause->pattern, FunctionCall)->fn->tag != Var) + code_err(clause->pattern, "This is not a valid pattern for a %T enum type", subject_t); + + const char *clause_tag_name = Match(Match(clause->pattern, FunctionCall)->fn, Var)->name; + code = CORD_all(code, "case ", namespace_prefix(enum_t->env, enum_t->env->namespace), "tag$", clause_tag_name, ": {\n"); + type_t *tag_type = NULL; + for (tag_t *tag = enum_t->tags; tag; tag = tag->next) { + if (streq(tag->name, clause_tag_name)) { + tag_type = tag->type; + break; + } + } + assert(tag_type); + env_t *scope = env; + + auto tag_struct = Match(tag_type, StructType); + arg_ast_t *args = Match(clause->pattern, FunctionCall)->args; + if (args && !args->next && tag_struct->fields && tag_struct->fields->next) { + if (args->value->tag != Var) + code_err(args->value, "This is not a valid variable to bind to"); + const char *var_name = Match(args->value, Var)->name; + if (!streq(var_name, "_")) { + code = CORD_all(code, compile_declaration(tag_type, compile(env, args->value)), " = _when_subject.", clause_tag_name, ";\n"); + scope = fresh_scope(scope); + set_binding(scope, Match(args->value, Var)->name, tag_type, CORD_EMPTY); + } + } else if (args) { + scope = fresh_scope(scope); + arg_t *field = tag_struct->fields; + for (arg_ast_t *arg = args; arg || field; arg = arg->next) { + if (!arg) + code_err(ast, "The field %T.%s.%s wasn't accounted for", subject_t, clause_tag_name, field->name); + if (!field) + code_err(arg->value, "This is one more field than %T has", subject_t); + if (arg->name) + code_err(arg->value, "Named arguments are not currently supported"); + + const char *var_name = Match(arg->value, Var)->name; + if (!streq(var_name, "_")) { + code = CORD_all(code, compile_declaration(field->type, compile(env, arg->value)), " = _when_subject.", clause_tag_name, ".", field->name, ";\n"); + set_binding(scope, Match(arg->value, Var)->name, field->type, CORD_EMPTY); + } + field = field->next; + } + } + if (clause->body->tag == Block) { + ast_list_t *statements = Match(clause->body, Block)->statements; + if (!statements || (statements->ast->tag == Pass && !statements->next)) + code = CORD_all(code, "break;\n}\n"); + else + code = CORD_all(code, compile_inline_block(scope, clause->body), "\nbreak;\n}\n"); + } else { + code = CORD_all(code, compile_statement(scope, clause->body), "\nbreak;\n}\n"); + } + } + if (when->else_body) { + if (when->else_body->tag == Block) { + ast_list_t *statements = Match(when->else_body, Block)->statements; + if (!statements || (statements->ast->tag == Pass && !statements->next)) + code = CORD_all(code, "default: break;"); + else + code = CORD_all(code, "default: {\n", compile_inline_block(env, when->else_body), "\nbreak;\n}\n"); + } else { + code = CORD_all(code, "default: {\n", compile_statement(env, when->else_body), "\nbreak;\n}\n"); + } + } else { + code = CORD_all(code, "default: errx(1, \"Invalid tag!\");\n"); + } + code = CORD_all(code, "\n})\n"); + return code; + } + case DocTest: { + auto test = Match(ast, DocTest); + type_t *expr_t = get_type(env, test->expr); + if (!expr_t) + code_err(test->expr, "I couldn't figure out the type of this expression"); + + CORD output = CORD_EMPTY; + if (test->output) { + const uint8_t *raw = (const uint8_t*)CORD_to_const_char_star(test->output); + uint8_t buf[128] = {0}; + size_t norm_len = sizeof(buf); + uint8_t *norm = u8_normalize(UNINORM_NFC, (uint8_t*)raw, strlen((char*)raw)+1, buf, &norm_len); + assert(norm[norm_len-1] == 0); + output = CORD_from_char_star((char*)norm); + if (norm && norm != buf) free(norm); + } + + CORD setup = CORD_EMPTY; + CORD test_code; + if (test->expr->tag == Declare) { + auto decl = Match(test->expr, Declare); + const char *varname = Match(decl->var, Var)->name; + if (streq(varname, "_")) + return compile_statement(env, WrapAST(ast, DocTest, .expr=decl->value, .output=output, .skip_source=test->skip_source)); + CORD var = CORD_all("_$", Match(decl->var, Var)->name); + type_t *t = get_type(env, decl->value); + CORD val_code = compile_maybe_incref(env, decl->value, t); + if (t->tag == FunctionType) { + assert(promote(env, decl->value, &val_code, t, Type(ClosureType, t))); + t = Type(ClosureType, t); + } + setup = CORD_all(compile_declaration(t, var), ";\n"); + test_code = CORD_all("(", var, " = ", val_code, ")"); + expr_t = t; + } else if (test->expr->tag == Assign) { + auto assign = Match(test->expr, Assign); + if (!assign->targets->next && assign->targets->ast->tag == Var && is_idempotent(assign->targets->ast)) { + // Common case: assigning to one variable: + type_t *lhs_t = get_type(env, assign->targets->ast); + if (assign->targets->ast->tag == Index && lhs_t->tag == OptionalType + && value_type(get_type(env, Match(assign->targets->ast, Index)->indexed))->tag == TableType) + lhs_t = Match(lhs_t, OptionalType)->type; + if (has_stack_memory(lhs_t)) + code_err(test->expr, "Stack references cannot be assigned to variables because the variable's scope may outlive the scope of the stack memory."); + env_t *val_scope = with_enum_scope(env, lhs_t); + CORD value = compile_to_type(val_scope, assign->values->ast, lhs_t); + test_code = CORD_all("(", compile_assignment(env, assign->targets->ast, value), ")"); + expr_t = lhs_t; + } else { + // Multi-assign or assignment to potentially non-idempotent targets + if (output && assign->targets->next) + code_err(ast, "Sorry, but doctesting with '=' is not supported for multi-assignments"); + + test_code = "({ // Assignment\n"; + + int64_t i = 1; + for (ast_list_t *target = assign->targets, *value = assign->values; target && value; target = target->next, value = value->next) { + type_t *lhs_t = get_type(env, target->ast); + if (target->ast->tag == Index && lhs_t->tag == OptionalType + && value_type(get_type(env, Match(target->ast, Index)->indexed))->tag == TableType) + lhs_t = Match(lhs_t, OptionalType)->type; + if (has_stack_memory(lhs_t)) + code_err(ast, "Stack references cannot be assigned to variables because the variable's scope may outlive the scope of the stack memory."); + if (target == assign->targets) + expr_t = lhs_t; + env_t *val_scope = with_enum_scope(env, lhs_t); + CORD val_code = compile_to_type(val_scope, value->ast, lhs_t); + CORD_appendf(&test_code, "%r $%ld = %r;\n", compile_type(lhs_t), i++, val_code); + } + i = 1; + for (ast_list_t *target = assign->targets; target; target = target->next) + test_code = CORD_all(test_code, compile_assignment(env, target->ast, CORD_asprintf("$%ld", i++)), ";\n"); + + test_code = CORD_all(test_code, "$1; })"); + } + } else if (test->expr->tag == UpdateAssign) { + type_t *lhs_t = get_type(env, Match(test->expr, UpdateAssign)->lhs); + auto update = Match(test->expr, UpdateAssign); + + if (update->lhs->tag == Index) { + type_t *indexed = value_type(get_type(env, Match(update->lhs, Index)->indexed)); + if (indexed->tag == TableType && Match(indexed, TableType)->default_value == NULL) + code_err(update->lhs, "Update assignments are not currently supported for tables"); + } + + ast_t *update_var = WrapAST(ast, UpdateAssign, + .lhs=WrapAST(update->lhs, InlineCCode, .code="(*expr)", .type=lhs_t), + .op=update->op, .rhs=update->rhs); + test_code = CORD_all("({", + compile_declaration(Type(PointerType, lhs_t), "expr"), " = &(", compile_lvalue(env, update->lhs), "); ", + compile_statement(env, update_var), "; *expr; })"); + expr_t = lhs_t; + } else if (expr_t->tag == VoidType || expr_t->tag == AbortType || expr_t->tag == ReturnType) { + test_code = CORD_all("({", compile_statement(env, test->expr), " NULL;})"); + } else { + test_code = compile(env, test->expr); + } + if (test->output) { + return CORD_asprintf( + "%rtest(%r, %r, %r, %ld, %ld);", + setup, test_code, + compile_type_info(expr_t), + compile_string_literal(output), + (int64_t)(test->expr->start - test->expr->file->text), + (int64_t)(test->expr->end - test->expr->file->text)); + } else { + return CORD_asprintf( + "%rinspect(%r, %r, %ld, %ld);", + setup, test_code, + compile_type_info(expr_t), + (int64_t)(test->expr->start - test->expr->file->text), + (int64_t)(test->expr->end - test->expr->file->text)); + } + } + case Declare: { + auto decl = Match(ast, Declare); + const char *name = Match(decl->var, Var)->name; + if (streq(name, "_")) { // Explicit discard + return CORD_all("(void)", compile(env, decl->value), ";"); + } else { + type_t *t = get_type(env, decl->value); + if (t->tag == AbortType || t->tag == VoidType || t->tag == ReturnType) + code_err(ast, "You can't declare a variable with a %T value", t); + + CORD val_code = compile_maybe_incref(env, decl->value, t); + if (t->tag == FunctionType) { + assert(promote(env, decl->value, &val_code, t, Type(ClosureType, t))); + t = Type(ClosureType, t); + } + return CORD_all(compile_declaration(t, CORD_cat("_$", name)), " = ", val_code, ";"); + } + } + case Assign: { + auto assign = Match(ast, Assign); + // Single assignment, no temp vars needed: + if (assign->targets && !assign->targets->next) { + type_t *lhs_t = get_type(env, assign->targets->ast); + if (assign->targets->ast->tag == Index && lhs_t->tag == OptionalType + && value_type(get_type(env, Match(assign->targets->ast, Index)->indexed))->tag == TableType) + lhs_t = Match(lhs_t, OptionalType)->type; + if (has_stack_memory(lhs_t)) + code_err(ast, "Stack references cannot be assigned to variables because the variable's scope may outlive the scope of the stack memory."); + env_t *val_env = with_enum_scope(env, lhs_t); + CORD val = compile_to_type(val_env, assign->values->ast, lhs_t); + return CORD_all(compile_assignment(env, assign->targets->ast, val), ";\n"); + } + + CORD code = "{ // Assignment\n"; + int64_t i = 1; + for (ast_list_t *value = assign->values, *target = assign->targets; value && target; value = value->next, target = target->next) { + type_t *lhs_t = get_type(env, target->ast); + if (target->ast->tag == Index && lhs_t->tag == OptionalType + && value_type(get_type(env, Match(target->ast, Index)->indexed))->tag == TableType) + lhs_t = Match(lhs_t, OptionalType)->type; + if (has_stack_memory(lhs_t)) + code_err(ast, "Stack references cannot be assigned to variables because the variable's scope may outlive the scope of the stack memory."); + env_t *val_env = with_enum_scope(env, lhs_t); + CORD val = compile_to_type(val_env, value->ast, lhs_t); + CORD_appendf(&code, "%r $%ld = %r;\n", compile_type(lhs_t), i++, val); + } + i = 1; + for (ast_list_t *target = assign->targets; target; target = target->next) { + code = CORD_all(code, compile_assignment(env, target->ast, CORD_asprintf("$%ld", i++)), ";\n"); + } + return CORD_cat(code, "\n}"); + } + case UpdateAssign: { + auto update = Match(ast, UpdateAssign); + + if (update->lhs->tag == Index) { + type_t *indexed = value_type(get_type(env, Match(update->lhs, Index)->indexed)); + if (indexed->tag == TableType && Match(indexed, TableType)->default_value == NULL) + code_err(update->lhs, "Update assignments are not currently supported for tables"); + } + + if (!is_idempotent(update->lhs)) { + type_t *lhs_t = get_type(env, update->lhs); + return CORD_all("{ ", compile_declaration(Type(PointerType, lhs_t), "update_lhs"), " = &", + compile_lvalue(env, update->lhs), ";\n", + "*update_lhs = ", compile(env, WrapAST(ast, BinaryOp, + .lhs=WrapAST(update->lhs, InlineCCode, .code="(*update_lhs)", .type=lhs_t), + .op=update->op, .rhs=update->rhs)), "; }"); + } + + type_t *lhs_t = get_type(env, update->lhs); + CORD lhs = compile_lvalue(env, update->lhs); + + if (update->lhs->tag == Index && value_type(get_type(env, Match(update->lhs, Index)->indexed))->tag == TableType) { + ast_t *lhs_placeholder = WrapAST(update->lhs, InlineCCode, .code="(*lhs)", .type=lhs_t); + CORD method_call = compile_math_method(env, update->op, lhs_placeholder, update->rhs, lhs_t); + if (method_call) + return CORD_all("{ ", compile_declaration(Type(PointerType, .pointed=lhs_t), "lhs"), " = &", lhs, "; *lhs = ", method_call, "; }"); + } else { + CORD method_call = compile_math_method(env, update->op, update->lhs, update->rhs, lhs_t); + if (method_call) + return CORD_all(lhs, " = ", method_call, ";"); + } + + CORD rhs = compile(env, update->rhs); + + type_t *rhs_t = get_type(env, update->rhs); + if (update->rhs->tag == Int && is_numeric_type(non_optional(lhs_t))) { + rhs = compile_int_to_type(env, update->rhs, lhs_t); + } else if (!promote(env, update->rhs, &rhs, rhs_t, lhs_t)) { + code_err(ast, "I can't do operations between %T and %T", lhs_t, rhs_t); + } + + bool lhs_is_optional_num = (lhs_t->tag == OptionalType && Match(lhs_t, OptionalType)->type && Match(lhs_t, OptionalType)->type->tag == NumType); + switch (update->op) { + case BINOP_MULT: + if (lhs_t->tag != IntType && lhs_t->tag != NumType && lhs_t->tag != ByteType && !lhs_is_optional_num) + code_err(ast, "I can't do a multiply assignment with this operator between %T and %T", lhs_t, rhs_t); + if (lhs_t->tag == NumType) { // 0*INF -> NaN, needs checking + return CORD_asprintf("%r *= %r;\n" + "if (isnan(%r))\n" + "fail_source(%r, %ld, %ld, \"This update assignment created a NaN value (probably multiplying zero with infinity), but the type is not optional!\");\n", + lhs, rhs, lhs, + CORD_quoted(ast->file->filename), + (long)(ast->start - ast->file->text), + (long)(ast->end - ast->file->text)); + } + return CORD_all(lhs, " *= ", rhs, ";"); + case BINOP_DIVIDE: + if (lhs_t->tag != IntType && lhs_t->tag != NumType && lhs_t->tag != ByteType && !lhs_is_optional_num) + code_err(ast, "I can't do a divide assignment with this operator between %T and %T", lhs_t, rhs_t); + if (lhs_t->tag == NumType) { // 0/0 or INF/INF -> NaN, needs checking + return CORD_asprintf("%r /= %r;\n" + "if (isnan(%r))\n" + "fail_source(%r, %ld, %ld, \"This update assignment created a NaN value (probably 0/0 or INF/INF), but the type is not optional!\");\n", + lhs, rhs, lhs, + CORD_quoted(ast->file->filename), + (long)(ast->start - ast->file->text), + (long)(ast->end - ast->file->text)); + } + return CORD_all(lhs, " /= ", rhs, ";"); + case BINOP_MOD: + if (lhs_t->tag != IntType && lhs_t->tag != NumType && lhs_t->tag != ByteType && !lhs_is_optional_num) + code_err(ast, "I can't do a mod assignment with this operator between %T and %T", lhs_t, rhs_t); + return CORD_all(lhs, " = ", lhs, " % ", rhs); + case BINOP_MOD1: + if (lhs_t->tag != IntType && lhs_t->tag != NumType && lhs_t->tag != ByteType && !lhs_is_optional_num) + code_err(ast, "I can't do a mod assignment with this operator between %T and %T", lhs_t, rhs_t); + return CORD_all(lhs, " = (((", lhs, ") - 1) % ", rhs, ") + 1;"); + case BINOP_PLUS: + if (lhs_t->tag != IntType && lhs_t->tag != NumType && lhs_t->tag != ByteType && !lhs_is_optional_num) + code_err(ast, "I can't do an addition assignment with this operator between %T and %T", lhs_t, rhs_t); + return CORD_all(lhs, " += ", rhs, ";"); + case BINOP_MINUS: + if (lhs_t->tag != IntType && lhs_t->tag != NumType && lhs_t->tag != ByteType && !lhs_is_optional_num) + code_err(ast, "I can't do a subtraction assignment with this operator between %T and %T", lhs_t, rhs_t); + return CORD_all(lhs, " -= ", rhs, ";"); + case BINOP_POWER: { + if (lhs_t->tag != NumType && !lhs_is_optional_num) + code_err(ast, "'^=' is only supported for Num types"); + if (lhs_t->tag == NumType && Match(lhs_t, NumType)->bits == TYPE_NBITS32) + return CORD_all(lhs, " = powf(", lhs, ", ", rhs, ");"); + else + return CORD_all(lhs, " = pow(", lhs, ", ", rhs, ");"); + } + case BINOP_LSHIFT: + if (lhs_t->tag != IntType && lhs_t->tag != ByteType) + code_err(ast, "I can't do a shift assignment with this operator between %T and %T", lhs_t, rhs_t); + return CORD_all(lhs, " <<= ", rhs, ";"); + case BINOP_RSHIFT: + if (lhs_t->tag != IntType && lhs_t->tag != ByteType) + code_err(ast, "I can't do a shift assignment with this operator between %T and %T", lhs_t, rhs_t); + return CORD_all(lhs, " >>= ", rhs, ";"); + case BINOP_ULSHIFT: + if (lhs_t->tag != IntType && lhs_t->tag != ByteType) + code_err(ast, "I can't do a shift assignment with this operator between %T and %T", lhs_t, rhs_t); + return CORD_all("{ ", compile_unsigned_type(lhs_t), " *dest = (void*)&(", lhs, "); *dest <<= ", rhs, "; }"); + case BINOP_URSHIFT: + if (lhs_t->tag != IntType && lhs_t->tag != ByteType) + code_err(ast, "I can't do a shift assignment with this operator between %T and %T", lhs_t, rhs_t); + return CORD_all("{ ", compile_unsigned_type(lhs_t), " *dest = (void*)&(", lhs, "); *dest >>= ", rhs, "; }"); + case BINOP_AND: { + if (lhs_t->tag == BoolType) + return CORD_all("if (", lhs, ") ", lhs, " = ", rhs, ";"); + else if (lhs_t->tag == IntType || lhs_t->tag == ByteType) + return CORD_all(lhs, " &= ", rhs, ";"); + else if (lhs_t->tag == OptionalType) + return CORD_all("if (!(", check_none(lhs_t, lhs), ")) ", lhs, " = ", promote_to_optional(rhs_t, rhs), ";"); + else + code_err(ast, "'or=' is not implemented for %T types", lhs_t); + } + case BINOP_OR: { + if (lhs_t->tag == BoolType) + return CORD_all("if (!(", lhs, ")) ", lhs, " = ", rhs, ";"); + else if (lhs_t->tag == IntType || lhs_t->tag == ByteType) + return CORD_all(lhs, " |= ", rhs, ";"); + else if (lhs_t->tag == OptionalType) + return CORD_all("if (", check_none(lhs_t, lhs), ") ", lhs, " = ", promote_to_optional(rhs_t, rhs), ";"); + else + code_err(ast, "'or=' is not implemented for %T types", lhs_t); + } + case BINOP_XOR: + if (lhs_t->tag != IntType && lhs_t->tag != BoolType && lhs_t->tag != ByteType) + code_err(ast, "I can't do an xor assignment with this operator between %T and %T", lhs_t, rhs_t); + return CORD_all(lhs, " ^= ", rhs, ";"); + case BINOP_CONCAT: { + if (lhs_t->tag == TextType) { + return CORD_all(lhs, " = Texts(", lhs, ", ", rhs, ");"); + } else if (lhs_t->tag == ArrayType) { + CORD padded_item_size = CORD_all("sizeof(", compile_type(Match(lhs_t, ArrayType)->item_type), ")"); + // arr ++= [...] + if (update->lhs->tag == Var) + return CORD_all("Array$insert_all(&", lhs, ", ", rhs, ", I(0), ", padded_item_size, ");"); + else + return CORD_all(lhs, " = Array$concat(", lhs, ", ", rhs, ", ", padded_item_size, ");"); + } else { + code_err(ast, "'++=' is not implemented for %T types", lhs_t); + } + } + default: code_err(ast, "Update assignments are not implemented for this operation"); + } + } + case StructDef: case EnumDef: case LangDef: case FunctionDef: case ConvertDef: { + return CORD_EMPTY; + } + case Skip: { + const char *target = Match(ast, Skip)->target; + for (loop_ctx_t *ctx = env->loop_ctx; ctx; ctx = ctx->next) { + bool matched = !target || CORD_cmp(target, ctx->loop_name) == 0; + for (ast_list_t *var = ctx->loop_vars; var && !matched; var = var ? var->next : NULL) + matched = (CORD_cmp(target, Match(var->ast, Var)->name) == 0); + + if (matched) { + if (!ctx->skip_label) { + static int64_t skip_label_count = 1; + CORD_sprintf(&ctx->skip_label, "skip_%ld", skip_label_count); + ++skip_label_count; + } + CORD code = CORD_EMPTY; + for (deferral_t *deferred = env->deferred; deferred && deferred != ctx->deferred; deferred = deferred->next) + code = CORD_all(code, compile_statement(deferred->defer_env, deferred->block)); + if (code) + return CORD_all("{\n", code, "goto ", ctx->skip_label, ";\n}\n"); + else + return CORD_all("goto ", ctx->skip_label, ";"); + } + } + if (env->loop_ctx) + code_err(ast, "This 'skip' is not inside any loop"); + else if (target) + code_err(ast, "No loop target named '%s' was found", target); + else + return "continue;"; + } + case Stop: { + const char *target = Match(ast, Stop)->target; + for (loop_ctx_t *ctx = env->loop_ctx; ctx; ctx = ctx->next) { + bool matched = !target || CORD_cmp(target, ctx->loop_name) == 0; + for (ast_list_t *var = ctx->loop_vars; var && !matched; var = var ? var->next : var) + matched = (CORD_cmp(target, Match(var->ast, Var)->name) == 0); + + if (matched) { + if (!ctx->stop_label) { + static int64_t stop_label_count = 1; + CORD_sprintf(&ctx->stop_label, "stop_%ld", stop_label_count); + ++stop_label_count; + } + CORD code = CORD_EMPTY; + for (deferral_t *deferred = env->deferred; deferred && deferred != ctx->deferred; deferred = deferred->next) + code = CORD_all(code, compile_statement(deferred->defer_env, deferred->block)); + if (code) + return CORD_all("{\n", code, "goto ", ctx->stop_label, ";\n}\n"); + else + return CORD_all("goto ", ctx->stop_label, ";"); + } + } + if (env->loop_ctx) + code_err(ast, "This 'stop' is not inside any loop"); + else if (target) + code_err(ast, "No loop target named '%s' was found", target); + else + return "break;"; + } + case Pass: return ";"; + case Defer: { + ast_t *body = Match(ast, Defer)->body; + Table_t closed_vars = get_closed_vars(env, NULL, body); + + static int defer_id = 0; + env_t *defer_env = fresh_scope(env); + CORD code = CORD_EMPTY; + for (int64_t i = 1; i <= Table$length(closed_vars); i++) { + struct { const char *name; binding_t *b; } *entry = Table$entry(closed_vars, i); + if (entry->b->type->tag == ModuleType) + continue; + if (CORD_ncmp(entry->b->code, 0, "userdata->", 0, strlen("userdata->")) == 0) { + Table$str_set(defer_env->locals, entry->name, entry->b); + } else { + CORD defer_name = CORD_asprintf("defer$%d$%s", ++defer_id, entry->name); + code = CORD_all( + code, compile_declaration(entry->b->type, defer_name), " = ", entry->b->code, ";\n"); + set_binding(defer_env, entry->name, entry->b->type, defer_name); + } + } + env->deferred = new(deferral_t, .defer_env=defer_env, .block=body, .next=env->deferred); + return code; + } + case PrintStatement: { + ast_list_t *to_print = Match(ast, PrintStatement)->to_print; + if (!to_print) + return CORD_EMPTY; + + CORD code = "say("; + if (to_print->next) code = CORD_all(code, "Texts("); + for (ast_list_t *chunk = to_print; chunk; chunk = chunk->next) { + if (chunk->ast->tag == TextLiteral) { + code = CORD_cat(code, compile(env, chunk->ast)); + } else { + code = CORD_cat(code, compile_string(env, chunk->ast, "USE_COLOR")); + } + if (chunk->next) code = CORD_cat(code, ", "); + } + if (to_print->next) code = CORD_all(code, ")"); + return CORD_cat(code, ", yes);"); + } + case Return: { + if (!env->fn_ret) code_err(ast, "This return statement is not inside any function"); + auto ret = Match(ast, Return)->value; + + CORD code = CORD_EMPTY; + for (deferral_t *deferred = env->deferred; deferred; deferred = deferred->next) { + code = CORD_all(code, compile_statement(deferred->defer_env, deferred->block)); + } + + if (ret) { + if (env->fn_ret->tag == VoidType || env->fn_ret->tag == AbortType) + code_err(ast, "This function is not supposed to return any values, according to its type signature"); + + env = with_enum_scope(env, env->fn_ret); + CORD value = compile_to_type(env, ret, env->fn_ret); + if (env->deferred) { + code = CORD_all(compile_declaration(env->fn_ret, "ret"), " = ", value, ";\n", code); + value = "ret"; + } + + return CORD_all(code, "return ", value, ";"); + } else { + if (env->fn_ret->tag != VoidType) + code_err(ast, "This function expects you to return a %T value", env->fn_ret); + return CORD_all(code, "return;"); + } + } + case While: { + auto while_ = Match(ast, While); + env_t *scope = fresh_scope(env); + loop_ctx_t loop_ctx = (loop_ctx_t){ + .loop_name="while", + .deferred=scope->deferred, + .next=env->loop_ctx, + }; + scope->loop_ctx = &loop_ctx; + CORD body = compile_statement(scope, while_->body); + if (loop_ctx.skip_label) + body = CORD_all(body, "\n", loop_ctx.skip_label, ": continue;"); + CORD loop = CORD_all("while (", while_->condition ? compile(scope, while_->condition) : "yes", ") {\n\t", body, "\n}"); + if (loop_ctx.stop_label) + loop = CORD_all(loop, "\n", loop_ctx.stop_label, ":;"); + return loop; + } + case Repeat: { + ast_t *body = Match(ast, Repeat)->body; + env_t *scope = fresh_scope(env); + loop_ctx_t loop_ctx = (loop_ctx_t){ + .loop_name="repeat", + .deferred=scope->deferred, + .next=env->loop_ctx, + }; + scope->loop_ctx = &loop_ctx; + CORD body_code = compile_statement(scope, body); + if (loop_ctx.skip_label) + body_code = CORD_all(body_code, "\n", loop_ctx.skip_label, ": continue;"); + CORD loop = CORD_all("for (;;) {\n\t", body_code, "\n}"); + if (loop_ctx.stop_label) + loop = CORD_all(loop, "\n", loop_ctx.stop_label, ":;"); + return loop; + } + case Holding: { + ast_t *held = Match(ast, Holding)->mutexed; + type_t *held_type = get_type(env, held); + if (held_type->tag != MutexedType) + code_err(held, "This is a %t, not a mutexed value", held_type); + CORD code = CORD_all( + "{ // Holding\n", + "MutexedData_t mutexed = ", compile(env, held), ";\n", + "pthread_mutex_lock(&mutexed->mutex);\n"); + + env_t *body_scope = fresh_scope(env); + body_scope->deferred = new(deferral_t, .defer_env=env, + .block=FakeAST(InlineCCode, .code="pthread_mutex_unlock(&mutexed->mutex);"), + .next=body_scope->deferred); + if (held->tag == Var) { + CORD held_var = CORD_all(Match(held, Var)->name, "$held"); + set_binding(body_scope, Match(held, Var)->name, + Type(PointerType, .pointed=Match(held_type, MutexedType)->type, .is_stack=true), held_var); + code = CORD_all(code, compile_declaration(Type(PointerType, .pointed=Match(held_type, MutexedType)->type), held_var), + " = (", compile_type(Type(PointerType, .pointed=Match(held_type, MutexedType)->type)), ")mutexed->data;\n"); + } + return CORD_all(code, compile_statement(body_scope, Match(ast, Holding)->body), + "pthread_mutex_unlock(&mutexed->mutex);\n}"); + } + case For: { + auto for_ = Match(ast, For); + + // If we're iterating over a comprehension, that's actually just doing + // one loop, we don't need to compile the comprehension as an array + // comprehension. This is a common case for reducers like `(+: i*2 for i in 5)` + // or `(and) x:is_good() for x in xs` + if (for_->iter->tag == Comprehension) { + auto comp = Match(for_->iter, Comprehension); + ast_t *body = for_->body; + if (for_->vars) { + if (for_->vars->next) + code_err(for_->vars->next->ast, "This is too many variables for iteration"); + + body = WrapAST( + ast, Block, + .statements=new(ast_list_t, .ast=WrapAST(ast, Declare, .var=for_->vars->ast, .value=comp->expr), + .next=body->tag == Block ? Match(body, Block)->statements : new(ast_list_t, .ast=body))); + } + + if (comp->filter) + body = WrapAST(for_->body, If, .condition=comp->filter, .body=body); + ast_t *loop = WrapAST(ast, For, .vars=comp->vars, .iter=comp->iter, .body=body); + return compile_statement(env, loop); + } + + env_t *body_scope = for_scope(env, ast); + loop_ctx_t loop_ctx = (loop_ctx_t){ + .loop_name="for", + .loop_vars=for_->vars, + .deferred=body_scope->deferred, + .next=body_scope->loop_ctx, + }; + body_scope->loop_ctx = &loop_ctx; + // Naked means no enclosing braces: + CORD naked_body = compile_inline_block(body_scope, for_->body); + if (loop_ctx.skip_label) + naked_body = CORD_all(naked_body, "\n", loop_ctx.skip_label, ": continue;"); + CORD stop = loop_ctx.stop_label ? CORD_all("\n", loop_ctx.stop_label, ":;") : CORD_EMPTY; + + // Special case for improving performance for numeric iteration: + if (for_->iter->tag == MethodCall && streq(Match(for_->iter, MethodCall)->name, "to") && + get_type(env, Match(for_->iter, MethodCall)->self)->tag == BigIntType) { + // TODO: support other integer types + arg_ast_t *args = Match(for_->iter, MethodCall)->args; + if (!args) code_err(for_->iter, "to() needs at least one argument"); + + CORD last = CORD_EMPTY, step = CORD_EMPTY, optional_step = CORD_EMPTY; + if (!args->name || streq(args->name, "last")) { + last = compile_to_type(env, args->value, INT_TYPE); + if (args->next) { + if (args->next->name && !streq(args->next->name, "step")) + code_err(args->next->value, "Invalid argument name: %s", args->next->name); + if (get_type(env, args->next->value)->tag == OptionalType) + optional_step = compile_to_type(env, args->next->value, Type(OptionalType, .type=INT_TYPE)); + else + step = compile_to_type(env, args->next->value, INT_TYPE); + } + } else if (streq(args->name, "step")) { + if (get_type(env, args->value)->tag == OptionalType) + optional_step = compile_to_type(env, args->value, Type(OptionalType, .type=INT_TYPE)); + else + step = compile_to_type(env, args->value, INT_TYPE); + if (args->next) { + if (args->next->name && !streq(args->next->name, "last")) + code_err(args->next->value, "Invalid argument name: %s", args->next->name); + last = compile_to_type(env, args->next->value, INT_TYPE); + } + } + + if (!last) + code_err(for_->iter, "No `last` argument was given"); + + if (step && optional_step) + step = CORD_all("({ OptionalInt_t maybe_step = ", optional_step, "; maybe_step->small == 0 ? (Int$compare_value(last, first) >= 0 ? I_small(1) : I_small(-1)) : (Int_t)maybe_step; })"); + else if (!step) + step = "Int$compare_value(last, first) >= 0 ? I_small(1) : I_small(-1)"; + + CORD value = for_->vars ? compile(body_scope, for_->vars->ast) : "i"; + return CORD_all( + "for (Int_t first = ", compile(env, Match(for_->iter, MethodCall)->self), ", ", + value, " = first, " + "last = ", last, ", " + "step = ", step, "; " + "Int$compare_value(", value, ", last) != Int$compare_value(step, I_small(0)); ", value, " = Int$plus(", value, ", step)) {\n" + "\t", naked_body, + "}", + stop); + } else if (for_->iter->tag == MethodCall && streq(Match(for_->iter, MethodCall)->name, "onward") && + get_type(env, Match(for_->iter, MethodCall)->self)->tag == BigIntType) { + // Special case for Int:onward() + arg_ast_t *args = Match(for_->iter, MethodCall)->args; + arg_t *arg_spec = new(arg_t, .name="step", .type=INT_TYPE, .default_val=FakeAST(Int, .str="1"), .next=NULL); + CORD step = compile_arguments(env, for_->iter, arg_spec, args); + CORD value = for_->vars ? compile(body_scope, for_->vars->ast) : "i"; + return CORD_all( + "for (Int_t ", value, " = ", compile(env, Match(for_->iter, MethodCall)->self), ", ", + "step = ", step, "; ; ", value, " = Int$plus(", value, ", step)) {\n" + "\t", naked_body, + "}", + stop); + } + + type_t *iter_t = value_type(get_type(env, for_->iter)); + type_t *iter_value_t = value_type(iter_t); + + switch (iter_value_t->tag) { + case ArrayType: { + type_t *item_t = Match(iter_value_t, ArrayType)->item_type; + CORD index = CORD_EMPTY; + CORD value = CORD_EMPTY; + if (for_->vars) { + if (for_->vars->next) { + if (for_->vars->next->next) + code_err(for_->vars->next->next->ast, "This is too many variables for this loop"); + + index = compile(body_scope, for_->vars->ast); + value = compile(body_scope, for_->vars->next->ast); + } else { + value = compile(body_scope, for_->vars->ast); + } + } + + CORD loop = CORD_EMPTY; + loop = CORD_all(loop, "for (int64_t i = 1; i <= iterating.length; ++i)"); + + if (index != CORD_EMPTY) + naked_body = CORD_all("Int_t ", index, " = I(i);\n", naked_body); + + if (value != CORD_EMPTY) { + loop = CORD_all(loop, "{\n", + compile_declaration(item_t, value), + " = *(", compile_type(item_t), "*)(iterating.data + (i-1)*iterating.stride);\n", + naked_body, "\n}"); + } else { + loop = CORD_all(loop, "{\n", naked_body, "\n}"); + } + + if (for_->empty) + loop = CORD_all("if (iterating.length > 0) {\n", loop, "\n} else ", compile_statement(env, for_->empty)); + + if (iter_t->tag == PointerType) { + loop = CORD_all("{\n" + "Array_t *ptr = ", compile_to_pointer_depth(env, for_->iter, 1, false), ";\n" + "\nARRAY_INCREF(*ptr);\n" + "Array_t iterating = *ptr;\n", + loop, + stop, + "\nARRAY_DECREF(*ptr);\n" + "}\n"); + + } else { + loop = CORD_all("{\n" + "Array_t iterating = ", compile_to_pointer_depth(env, for_->iter, 0, false), ";\n", + loop, + stop, + "}\n"); + } + return loop; + } + case SetType: case TableType: { + CORD loop = "for (int64_t i = 0; i < iterating.length; ++i) {\n"; + if (for_->vars) { + if (iter_value_t->tag == SetType) { + if (for_->vars->next) + code_err(for_->vars->next->ast, "This is too many variables for this loop"); + CORD item = compile(body_scope, for_->vars->ast); + type_t *item_type = Match(iter_value_t, SetType)->item_type; + loop = CORD_all(loop, compile_declaration(item_type, item), " = *(", compile_type(item_type), "*)(", + "iterating.data + i*iterating.stride);\n"); + } else { + CORD key = compile(body_scope, for_->vars->ast); + type_t *key_t = Match(iter_value_t, TableType)->key_type; + loop = CORD_all(loop, compile_declaration(key_t, key), " = *(", compile_type(key_t), "*)(", + "iterating.data + i*iterating.stride);\n"); + + if (for_->vars->next) { + if (for_->vars->next->next) + code_err(for_->vars->next->next->ast, "This is too many variables for this loop"); + + type_t *value_t = Match(iter_value_t, TableType)->value_type; + CORD value = compile(body_scope, for_->vars->next->ast); + CORD value_offset = CORD_all("offsetof(struct { ", compile_declaration(key_t, "k"), "; ", compile_declaration(value_t, "v"), "; }, v)"); + loop = CORD_all(loop, compile_declaration(value_t, value), " = *(", compile_type(value_t), "*)(", + "iterating.data + i*iterating.stride + ", value_offset, ");\n"); + } + } + } + + loop = CORD_all(loop, naked_body, "\n}"); + + if (for_->empty) { + loop = CORD_all("if (iterating.length > 0) {\n", loop, "\n} else ", compile_statement(env, for_->empty)); + } + + if (iter_t->tag == PointerType) { + loop = CORD_all( + "{\n", + "Table_t *t = ", compile_to_pointer_depth(env, for_->iter, 1, false), ";\n" + "ARRAY_INCREF(t->entries);\n" + "Array_t iterating = t->entries;\n", + loop, + "ARRAY_DECREF(t->entries);\n" + "}\n"); + } else { + loop = CORD_all( + "{\n", + "Array_t iterating = (", compile_to_pointer_depth(env, for_->iter, 0, false), ").entries;\n", + loop, + "}\n"); + } + return loop; + } + case BigIntType: { + CORD n; + if (for_->iter->tag == Int) { + const char *str = Match(for_->iter, Int)->str; + Int_t int_val = Int$from_str(str); + if (int_val.small == 0) + code_err(for_->iter, "Failed to parse this integer"); + mpz_t i; + mpz_init_set_int(i, int_val); + if (mpz_cmpabs_ui(i, BIGGEST_SMALL_INT) <= 0) + n = mpz_get_str(NULL, 10, i); + else + goto big_n; + + + if (for_->empty && mpz_cmp_si(i, 0) <= 0) { + return compile_statement(env, for_->empty); + } else { + return CORD_all( + "for (int64_t i = 1; i <= ", n, "; ++i) {\n", + for_->vars ? CORD_all("\tInt_t ", compile(body_scope, for_->vars->ast), " = I_small(i);\n") : CORD_EMPTY, + "\t", naked_body, + "}\n", + stop, "\n"); + } + } + + big_n: + n = compile_to_pointer_depth(env, for_->iter, 0, false); + CORD i = for_->vars ? compile(body_scope, for_->vars->ast) : "i"; + CORD n_var = for_->vars ? CORD_all("max", i) : "n"; + if (for_->empty) { + return CORD_all( + "{\n" + "Int_t ", n_var, " = ", n, ";\n" + "if (Int$compare_value(", n_var, ", I(0)) > 0) {\n" + "for (Int_t ", i, " = I(1); Int$compare_value(", i, ", ", n_var, ") <= 0; ", i, " = Int$plus(", i, ", I(1))) {\n", + "\t", naked_body, + "}\n" + "} else ", compile_statement(env, for_->empty), + stop, "\n" + "}\n"); + } else { + return CORD_all( + "for (Int_t ", i, " = I(1), ", n_var, " = ", n, "; Int$compare_value(", i, ", ", n_var, ") <= 0; ", i, " = Int$plus(", i, ", I(1))) {\n", + "\t", naked_body, + "}\n", + stop, "\n"); + } + } + case FunctionType: case ClosureType: { + // Iterator function: + CORD code = "{\n"; + + CORD next_fn; + if (is_idempotent(for_->iter)) { + next_fn = compile_to_pointer_depth(env, for_->iter, 0, false); + } else { + code = CORD_all(code, compile_declaration(iter_value_t, "next"), " = ", compile_to_pointer_depth(env, for_->iter, 0, false), ";\n"); + next_fn = "next"; + } + + auto fn = iter_value_t->tag == ClosureType ? Match(Match(iter_value_t, ClosureType)->fn, FunctionType) : Match(iter_value_t, FunctionType); + + CORD get_next; + if (iter_value_t->tag == ClosureType) { + type_t *fn_t = Match(iter_value_t, ClosureType)->fn; + arg_t *closure_fn_args = NULL; + for (arg_t *arg = Match(fn_t, FunctionType)->args; arg; arg = arg->next) + closure_fn_args = new(arg_t, .name=arg->name, .type=arg->type, .default_val=arg->default_val, .next=closure_fn_args); + closure_fn_args = new(arg_t, .name="userdata", .type=Type(PointerType, .pointed=Type(MemoryType)), .next=closure_fn_args); + REVERSE_LIST(closure_fn_args); + CORD fn_type_code = compile_type(Type(FunctionType, .args=closure_fn_args, .ret=Match(fn_t, FunctionType)->ret)); + get_next = CORD_all("((", fn_type_code, ")", next_fn, ".fn)(", next_fn, ".userdata)"); + } else { + get_next = CORD_all(next_fn, "()"); + } + + if (fn->ret->tag == OptionalType) { + // Use an optional variable `cur` for each iteration step, which will be checked for null + code = CORD_all(code, compile_declaration(fn->ret, "cur"), ";\n"); + get_next = CORD_all("(cur=", get_next, ", !", check_none(fn->ret, "cur"), ")"); + if (for_->vars) { + naked_body = CORD_all( + compile_declaration(Match(fn->ret, OptionalType)->type, CORD_all("_$", Match(for_->vars->ast, Var)->name)), + " = ", optional_into_nonnone(fn->ret, "cur"), ";\n", + naked_body); + } + if (for_->empty) { + code = CORD_all(code, "if (", get_next, ") {\n" + "\tdo{\n\t\t", naked_body, "\t} while(", get_next, ");\n" + "} else {\n\t", compile_statement(env, for_->empty), "}", stop, "\n}\n"); + } else { + code = CORD_all(code, "while(", get_next, ") {\n\t", naked_body, "}\n", stop, "\n}\n"); + } + } else { + if (for_->vars) { + naked_body = CORD_all( + compile_declaration(fn->ret, CORD_all("_$", Match(for_->vars->ast, Var)->name)), + " = ", get_next, ";\n", naked_body); + } else { + naked_body = CORD_all(get_next, ";\n", naked_body); + } + if (for_->empty) + code_err(for_->empty, "This iteration loop will always have values, so this block will never run"); + code = CORD_all(code, "for (;;) {\n\t", naked_body, "}\n", stop, "\n}\n"); + } + + return code; + } + default: code_err(for_->iter, "Iteration is not implemented for type: %T", iter_t); + } + } + case If: { + auto if_ = Match(ast, If); + ast_t *condition = if_->condition; + if (condition->tag == Declare) { + env_t *truthy_scope = fresh_scope(env); + CORD code = CORD_all("IF_DECLARE(", compile_statement(truthy_scope, condition), ", "); + bind_statement(truthy_scope, condition); + ast_t *var = Match(condition, Declare)->var; + code = CORD_all(code, compile_condition(truthy_scope, var), ", "); + type_t *cond_t = get_type(truthy_scope, var); + if (cond_t->tag == OptionalType) { + set_binding(truthy_scope, Match(var, Var)->name, + Match(cond_t, OptionalType)->type, + optional_into_nonnone(cond_t, compile(truthy_scope, var))); + } + code = CORD_all(code, compile_statement(truthy_scope, if_->body), ")"); + if (if_->else_body) + code = CORD_all(code, "\nelse ", compile_statement(env, if_->else_body)); + return code; + } else { + CORD code = CORD_all("if (", compile_condition(env, condition), ")"); + env_t *truthy_scope = env; + type_t *cond_t = get_type(env, condition); + if (condition->tag == Var && cond_t->tag == OptionalType) { + truthy_scope = fresh_scope(env); + set_binding(truthy_scope, Match(condition, Var)->name, + Match(cond_t, OptionalType)->type, + optional_into_nonnone(cond_t, compile(truthy_scope, condition))); + } + code = CORD_all(code, compile_statement(truthy_scope, if_->body)); + if (if_->else_body) + code = CORD_all(code, "\nelse ", compile_statement(env, if_->else_body)); + return code; + } + } + case Block: { + return CORD_all("{\n", compile_inline_block(env, ast), "}\n"); + } + case Comprehension: { + if (!env->comprehension_action) + code_err(ast, "I don't know what to do with this comprehension!"); + auto comp = Match(ast, Comprehension); + if (comp->expr->tag == Comprehension) { // Nested comprehension + ast_t *body = comp->filter ? WrapAST(ast, If, .condition=comp->filter, .body=comp->expr) : comp->expr; + ast_t *loop = WrapAST(ast, For, .vars=comp->vars, .iter=comp->iter, .body=body); + return compile_statement(env, loop); + } + + // Array/Set/Table comprehension: + comprehension_body_t get_body = (void*)env->comprehension_action->fn; + ast_t *body = get_body(comp->expr, env->comprehension_action->userdata); + if (comp->filter) + body = WrapAST(comp->expr, If, .condition=comp->filter, .body=body); + ast_t *loop = WrapAST(ast, For, .vars=comp->vars, .iter=comp->iter, .body=body); + return compile_statement(env, loop); + } + case Extern: return CORD_EMPTY; + case InlineCCode: { + auto inline_code = Match(ast, InlineCCode); + if (inline_code->type) + return CORD_all("({ ", inline_code->code, "; })"); + else + return inline_code->code; + } + case Use: { + auto use = Match(ast, Use); + if (use->what == USE_LOCAL) { + CORD name = file_base_id(Match(ast, Use)->path); + return with_source_info(ast, CORD_all("_$", name, "$$initialize();\n")); + } else if (use->what == USE_MODULE) { + glob_t tm_files; + if (glob(heap_strf("~/.local/share/tomo/installed/%s/[!._0-9]*.tm", use->path), GLOB_TILDE, NULL, &tm_files) != 0) + code_err(ast, "Could not find library"); + + CORD initialization = CORD_EMPTY; + const char *lib_id = Text$as_c_string( + Text$replace(Text$from_str(use->path), Pattern("{1+ !alphanumeric}"), Text("_"), Pattern(""), false)); + for (size_t i = 0; i < tm_files.gl_pathc; i++) { + const char *filename = tm_files.gl_pathv[i]; + initialization = CORD_all( + initialization, + with_source_info(ast, CORD_all("_$", lib_id, "$", file_base_id(filename), "$$initialize();\n"))); + } + globfree(&tm_files); + return initialization; + } else { + return CORD_EMPTY; + } + } + default: + if (!is_discardable(env, ast)) + code_err(ast, "The %T result of this statement cannot be discarded", get_type(env, ast)); + return CORD_asprintf("(void)%r;", compile(env, ast)); + } +} + +CORD compile_statement(env_t *env, ast_t *ast) { + CORD stmt = _compile_statement(env, ast); + return with_source_info(ast, stmt); +} + +CORD expr_as_text(CORD expr, type_t *t, CORD color) +{ + switch (t->tag) { + case MemoryType: return CORD_asprintf("Memory$as_text(stack(%r), %r, &Memory$info)", expr, color); + case BoolType: + // NOTE: this cannot use stack(), since bools may actually be bit fields: + return CORD_asprintf("Bool$as_text((Bool_t[1]){%r}, %r, &Bool$info)", expr, color); + case CStringType: return CORD_asprintf("CString$as_text(stack(%r), %r, &CString$info)", expr, color); + case MomentType: return CORD_asprintf("Moment$as_text(stack(%r), %r, &Moment$info)", expr, color); + case BigIntType: case IntType: case ByteType: case NumType: { + CORD name = type_to_cord(t); + return CORD_asprintf("%r$as_text(stack(%r), %r, &%r$info)", name, expr, color, name); + } + case TextType: return CORD_asprintf("Text$as_text(stack(%r), %r, %r)", expr, color, compile_type_info(t)); + case ArrayType: return CORD_asprintf("Array$as_text(stack(%r), %r, %r)", expr, color, compile_type_info(t)); + case SetType: return CORD_asprintf("Table$as_text(stack(%r), %r, %r)", expr, color, compile_type_info(t)); + case TableType: return CORD_asprintf("Table$as_text(stack(%r), %r, %r)", expr, color, compile_type_info(t)); + case FunctionType: case ClosureType: return CORD_asprintf("Func$as_text(stack(%r), %r, %r)", expr, color, compile_type_info(t)); + case PointerType: return CORD_asprintf("Pointer$as_text(stack(%r), %r, %r)", expr, color, compile_type_info(t)); + case OptionalType: return CORD_asprintf("Optional$as_text(stack(%r), %r, %r)", expr, color, compile_type_info(t)); + case StructType: case EnumType: case MutexedType: + return CORD_asprintf("generic_as_text(stack(%r), %r, %r)", expr, color, compile_type_info(t)); + default: compiler_err(NULL, NULL, NULL, "Stringifying is not supported for %T", t); + } +} + +CORD compile_string(env_t *env, ast_t *ast, CORD color) +{ + type_t *t = get_type(env, ast); + CORD expr = compile(env, ast); + return expr_as_text(expr, t, color); +} + +CORD compile_to_pointer_depth(env_t *env, ast_t *ast, int64_t target_depth, bool needs_incref) +{ + CORD val = compile(env, ast); + type_t *t = get_type(env, ast); + int64_t depth = 0; + for (type_t *tt = t; tt->tag == PointerType; tt = Match(tt, PointerType)->pointed) + ++depth; + + // Passing a literal value won't trigger an incref, because it's ephemeral, + // e.g. [10, 20]:reversed() + if (t->tag != PointerType && needs_incref && !can_be_mutated(env, ast)) + needs_incref = false; + + while (depth != target_depth) { + if (depth < target_depth) { + if (ast->tag == Var && target_depth == 1) + val = CORD_all("(&", val, ")"); + else + code_err(ast, "This should be a pointer, not %T", get_type(env, ast)); + t = Type(PointerType, .pointed=t, .is_stack=true); + ++depth; + } else { + auto ptr = Match(t, PointerType); + val = CORD_all("*(", val, ")"); + t = ptr->pointed; + --depth; + } + } + + while (t->tag == PointerType) { + auto ptr = Match(t, PointerType); + t = ptr->pointed; + } + + if (needs_incref && t->tag == ArrayType) + val = CORD_all("ARRAY_COPY(", val, ")"); + else if (needs_incref && (t->tag == TableType || t->tag == SetType)) + val = CORD_all("TABLE_COPY(", val, ")"); + + return val; +} + +CORD compile_to_type(env_t *env, ast_t *ast, type_t *t) +{ + if (ast->tag == Int && is_numeric_type(t)) { + return compile_int_to_type(env, ast, t); + } else if (ast->tag == Num && t->tag == NumType) { + double n = Match(ast, Num)->n; + switch (Match(t, NumType)->bits) { + case TYPE_NBITS64: return CORD_asprintf("N64(%.20g)", n); + case TYPE_NBITS32: return CORD_asprintf("N32(%.10g)", n); + default: code_err(ast, "This is not a valid number bit width"); + } + } else if (ast->tag == None && Match(ast, None)->type == NULL) { + return compile_none(t); + } + + type_t *actual = get_type(env, ast); + + // Promote values to views-of-values if needed: + if (t->tag == PointerType && Match(t, PointerType)->is_stack && actual->tag != PointerType) + return CORD_all("stack(", compile_to_type(env, ast, Match(t, PointerType)->pointed), ")"); + + CORD code = compile(env, ast); + if (!promote(env, ast, &code, actual, t)) + code_err(ast, "I expected a %T here, but this is a %T", t, actual); + return code; +} + +CORD compile_int_to_type(env_t *env, ast_t *ast, type_t *target) +{ + if (ast->tag != Int) { + CORD code = compile(env, ast); + type_t *actual_type = get_type(env, ast); + if (!promote(env, ast, &code, actual_type, target)) + code_err(ast, "I couldn't promote this %T to a %T", actual_type, target); + return code; + } + + if (target->tag == BigIntType) + return compile(env, ast); + + if (target->tag == OptionalType && Match(target, OptionalType)->type) + return compile_int_to_type(env, ast, Match(target, OptionalType)->type); + + const char *literal = Match(ast, Int)->str; + OptionalInt_t int_val = Int$from_str(literal); + if (int_val.small == 0) + code_err(ast, "Failed to parse this integer"); + + mpz_t i; + mpz_init_set_int(i, int_val); + + char *c_literal; + if (strncmp(literal, "0x", 2) == 0 || strncmp(literal, "0X", 2) == 0 || strncmp(literal, "0b", 2) == 0) { + gmp_asprintf(&c_literal, "0x%ZX", i); + } else if (strncmp(literal, "0o", 2) == 0) { + gmp_asprintf(&c_literal, "%#Zo", i); + } else { + gmp_asprintf(&c_literal, "%#Zd", i); + } + + if (target->tag == ByteType) { + if (mpz_cmp_si(i, UINT8_MAX) <= 0 && mpz_cmp_si(i, 0) >= 0) + return CORD_asprintf("(Byte_t)(%s)", c_literal); + code_err(ast, "This integer cannot fit in a byte"); + } else if (target->tag == NumType) { + if (Match(target, NumType)->bits == TYPE_NBITS64) { + return CORD_asprintf("N64(%s)", c_literal); + } else { + return CORD_asprintf("N32(%s)", c_literal); + } + } else if (target->tag == IntType) { + int64_t target_bits = (int64_t)Match(target, IntType)->bits; + switch (target_bits) { + case TYPE_IBITS64: + if (mpz_cmp_si(i, INT64_MAX) <= 0 && mpz_cmp_si(i, INT64_MIN) >= 0) + return CORD_asprintf("I64(%s)", c_literal); + break; + case TYPE_IBITS32: + if (mpz_cmp_si(i, INT32_MAX) <= 0 && mpz_cmp_si(i, INT32_MIN) >= 0) + return CORD_asprintf("I32(%s)", c_literal); + break; + case TYPE_IBITS16: + if (mpz_cmp_si(i, INT16_MAX) <= 0 && mpz_cmp_si(i, INT16_MIN) >= 0) + return CORD_asprintf("I16(%s)", c_literal); + break; + case TYPE_IBITS8: + if (mpz_cmp_si(i, INT8_MAX) <= 0 && mpz_cmp_si(i, INT8_MIN) >= 0) + return CORD_asprintf("I8(%s)", c_literal); + break; + default: break; + } + code_err(ast, "This integer cannot fit in a %d-bit value", target_bits); + } else { + code_err(ast, "I don't know how to compile this to a %T", target); + } +} + +CORD compile_arguments(env_t *env, ast_t *call_ast, arg_t *spec_args, arg_ast_t *call_args) +{ + Table_t used_args = {}; + CORD code = CORD_EMPTY; + env_t *default_scope = namespace_scope(env); + for (arg_t *spec_arg = spec_args; spec_arg; spec_arg = spec_arg->next) { + int64_t i = 1; + // Find keyword: + if (spec_arg->name) { + for (arg_ast_t *call_arg = call_args; call_arg; call_arg = call_arg->next) { + if (call_arg->name && streq(call_arg->name, spec_arg->name)) { + CORD value; + if (spec_arg->type->tag == IntType && call_arg->value->tag == Int) { + value = compile_int_to_type(env, call_arg->value, spec_arg->type); + } else if (spec_arg->type->tag == NumType && call_arg->value->tag == Int) { + OptionalInt_t int_val = Int$from_str(Match(call_arg->value, Int)->str); + if (int_val.small == 0) + code_err(call_arg->value, "Failed to parse this integer"); + if (Match(spec_arg->type, NumType)->bits == TYPE_NBITS64) + value = CORD_asprintf("N64(%.20g)", Num$from_int(int_val, false)); + else + value = CORD_asprintf("N32(%.10g)", (double)Num32$from_int(int_val, false)); + } else { + env_t *arg_env = with_enum_scope(env, spec_arg->type); + value = compile_maybe_incref(arg_env, call_arg->value, spec_arg->type); + } + Table$str_set(&used_args, call_arg->name, call_arg); + if (code) code = CORD_cat(code, ", "); + code = CORD_cat(code, value); + goto found_it; + } + } + } + // Find positional: + for (arg_ast_t *call_arg = call_args; call_arg; call_arg = call_arg->next) { + if (call_arg->name) continue; + const char *pseudoname = heap_strf("%ld", i++); + if (!Table$str_get(used_args, pseudoname)) { + CORD value; + if (spec_arg->type->tag == IntType && call_arg->value->tag == Int) { + value = compile_int_to_type(env, call_arg->value, spec_arg->type); + } else if (spec_arg->type->tag == NumType && call_arg->value->tag == Int) { + OptionalInt_t int_val = Int$from_str(Match(call_arg->value, Int)->str); + if (int_val.small == 0) + code_err(call_arg->value, "Failed to parse this integer"); + if (Match(spec_arg->type, NumType)->bits == TYPE_NBITS64) + value = CORD_asprintf("N64(%.20g)", Num$from_int(int_val, false)); + else + value = CORD_asprintf("N32(%.10g)", (double)Num32$from_int(int_val, false)); + } else { + env_t *arg_env = with_enum_scope(env, spec_arg->type); + value = compile_maybe_incref(arg_env, call_arg->value, spec_arg->type); + } + + Table$str_set(&used_args, pseudoname, call_arg); + if (code) code = CORD_cat(code, ", "); + code = CORD_cat(code, value); + goto found_it; + } + } + + if (spec_arg->default_val) { + if (code) code = CORD_cat(code, ", "); + code = CORD_cat(code, compile_maybe_incref(default_scope, spec_arg->default_val, get_type(env, spec_arg->default_val))); + goto found_it; + } + + assert(spec_arg->name); + code_err(call_ast, "The required argument '%s' was not provided", spec_arg->name); + found_it: continue; + } + + int64_t i = 1; + for (arg_ast_t *call_arg = call_args; call_arg; call_arg = call_arg->next) { + if (call_arg->name) { + if (!Table$str_get(used_args, call_arg->name)) + code_err(call_arg->value, "There is no argument with the name '%s'", call_arg->name); + } else { + const char *pseudoname = heap_strf("%ld", i++); + if (!Table$str_get(used_args, pseudoname)) + code_err(call_arg->value, "This is one argument too many!"); + } + } + return code; +} + +CORD compile_math_method(env_t *env, binop_e op, ast_t *lhs, ast_t *rhs, type_t *required_type) +{ + // Math methods are things like plus(), minus(), etc. If we don't find a + // matching method, return CORD_EMPTY. + const char *method_name = binop_method_names[op]; + if (!method_name) + return CORD_EMPTY; + + type_t *lhs_t = get_type(env, lhs); + type_t *rhs_t = get_type(env, rhs); +#define binding_works(b, lhs_t, rhs_t, ret_t) \ + (b && b->type->tag == FunctionType && ({ auto fn = Match(b->type, FunctionType); \ + (type_eq(fn->ret, ret_t) \ + && (fn->args && type_eq(fn->args->type, lhs_t)) \ + && (fn->args->next && can_promote(rhs_t, fn->args->next->type)) \ + && (!required_type || type_eq(required_type, fn->ret))); })) + arg_ast_t *args = new(arg_ast_t, .value=lhs, .next=new(arg_ast_t, .value=rhs)); + switch (op) { + case BINOP_MULT: { + if (type_eq(lhs_t, rhs_t)) { + binding_t *b = get_namespace_binding(env, lhs, binop_method_names[op]); + if (binding_works(b, lhs_t, rhs_t, lhs_t)) + return CORD_all(b->code, "(", compile_arguments(env, lhs, Match(b->type, FunctionType)->args, args), ")"); + } else if (lhs_t->tag == NumType || lhs_t->tag == IntType || lhs_t->tag == BigIntType) { + binding_t *b = get_namespace_binding(env, rhs, "scaled_by"); + if (binding_works(b, rhs_t, lhs_t, rhs_t)) { + REVERSE_LIST(args); + return CORD_all(b->code, "(", compile_arguments(env, lhs, Match(b->type, FunctionType)->args, args), ")"); + } + } else if (rhs_t->tag == NumType || rhs_t->tag == IntType|| rhs_t->tag == BigIntType) { + binding_t *b = get_namespace_binding(env, lhs, "scaled_by"); + if (binding_works(b, lhs_t, rhs_t, lhs_t)) + return CORD_all(b->code, "(", compile_arguments(env, lhs, Match(b->type, FunctionType)->args, args), ")"); + } + break; + } + case BINOP_OR: case BINOP_CONCAT: { + if (lhs_t->tag == SetType) { + return CORD_all("Table$with(", compile(env, lhs), ", ", compile(env, rhs), ", ", compile_type_info(lhs_t), ")"); + } + goto fallthrough; + } + case BINOP_AND: { + if (lhs_t->tag == SetType) { + return CORD_all("Table$overlap(", compile(env, lhs), ", ", compile(env, rhs), ", ", compile_type_info(lhs_t), ")"); + } + goto fallthrough; + } + case BINOP_MINUS: { + if (lhs_t->tag == SetType) { + return CORD_all("Table$without(", compile(env, lhs), ", ", compile(env, rhs), ", ", compile_type_info(lhs_t), ")"); + } + goto fallthrough; + } + case BINOP_PLUS: case BINOP_XOR: { + fallthrough: + if (type_eq(lhs_t, rhs_t)) { + binding_t *b = get_namespace_binding(env, lhs, binop_method_names[op]); + if (binding_works(b, lhs_t, rhs_t, lhs_t)) + return CORD_all(b->code, "(", compile(env, lhs), ", ", compile(env, rhs), ")"); + } + break; + } + case BINOP_DIVIDE: case BINOP_MOD: case BINOP_MOD1: { + if (is_numeric_type(rhs_t)) { + binding_t *b = get_namespace_binding(env, lhs, binop_method_names[op]); + if (binding_works(b, lhs_t, rhs_t, lhs_t)) + return CORD_all(b->code, "(", compile_arguments(env, lhs, Match(b->type, FunctionType)->args, args), ")"); + } + break; + } + case BINOP_LSHIFT: case BINOP_RSHIFT: case BINOP_ULSHIFT: case BINOP_URSHIFT: { + if (rhs_t->tag == IntType || rhs_t->tag == BigIntType) { + binding_t *b = get_namespace_binding(env, lhs, binop_method_names[op]); + if (binding_works(b, lhs_t, rhs_t, lhs_t)) + return CORD_all(b->code, "(", compile_arguments(env, lhs, Match(b->type, FunctionType)->args, args), ")"); + } + break; + } + case BINOP_POWER: { + if (rhs_t->tag == NumType || rhs_t->tag == IntType || rhs_t->tag == BigIntType) { + binding_t *b = get_namespace_binding(env, lhs, binop_method_names[op]); + if (binding_works(b, lhs_t, rhs_t, lhs_t)) + return CORD_all(b->code, "(", compile_arguments(env, lhs, Match(b->type, FunctionType)->args, args), ")"); + } + break; + } + default: break; + } + return CORD_EMPTY; +} + +CORD compile_string_literal(CORD literal) +{ + CORD code = "\""; + CORD_pos i; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wsign-conversion" + CORD_FOR(i, literal) { +#pragma GCC diagnostic pop + char c = CORD_pos_fetch(i); + switch (c) { + case '\\': code = CORD_cat(code, "\\\\"); break; + case '"': code = CORD_cat(code, "\\\""); break; + case '\a': code = CORD_cat(code, "\\a"); break; + case '\b': code = CORD_cat(code, "\\b"); break; + case '\n': code = CORD_cat(code, "\\n"); break; + case '\r': code = CORD_cat(code, "\\r"); break; + case '\t': code = CORD_cat(code, "\\t"); break; + case '\v': code = CORD_cat(code, "\\v"); break; + default: { + if (isprint(c)) + code = CORD_cat_char(code, c); + else + CORD_sprintf(&code, "%r\\x%02X\"\"", code, (uint8_t)c); + break; + } + } + } + return CORD_cat(code, "\""); +} + +static bool string_literal_is_all_ascii(CORD literal) +{ + CORD_pos i; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wsign-conversion" + CORD_FOR(i, literal) { +#pragma GCC diagnostic pop + if (!isascii(CORD_pos_fetch(i))) + return false; + } + return true; +} + +CORD compile_none(type_t *t) +{ + if (t->tag == OptionalType) + t = Match(t, OptionalType)->type; + + if (t == THREAD_TYPE) return "NULL"; + else if (t == PATH_TYPE) return "NONE_PATH"; + else if (t == PATH_TYPE_TYPE) return "((OptionalPathType_t){})"; + + switch (t->tag) { + case BigIntType: return "NONE_INT"; + case IntType: { + switch (Match(t, IntType)->bits) { + case TYPE_IBITS8: return "NONE_INT8"; + case TYPE_IBITS16: return "NONE_INT16"; + case TYPE_IBITS32: return "NONE_INT32"; + case TYPE_IBITS64: return "NONE_INT64"; + default: errx(1, "Invalid integer bit size"); + } + break; + } + case BoolType: return "NONE_BOOL"; + case ByteType: return "NONE_BYTE"; + case ArrayType: return "NONE_ARRAY"; + case TableType: return "NONE_TABLE"; + case SetType: return "NONE_TABLE"; + case TextType: return "NONE_TEXT"; + case CStringType: return "NULL"; + case MomentType: return "NONE_MOMENT"; + case PointerType: return CORD_all("((", compile_type(t), ")NULL)"); + case ClosureType: return "NONE_CLOSURE"; + case NumType: return "nan(\"null\")"; + case StructType: return CORD_all("((", compile_type(Type(OptionalType, .type=t)), "){.is_none=true})"); + case EnumType: { + env_t *enum_env = Match(t, EnumType)->env; + return CORD_all("((", compile_type(t), "){", namespace_prefix(enum_env, enum_env->namespace), "null})"); + } + case MutexedType: return "NONE_MUTEXED_DATA"; + default: compiler_err(NULL, NULL, NULL, "none isn't implemented for this type: %T", t); + } +} + +static ast_t *add_to_table_comprehension(ast_t *entry, ast_t *subject) +{ + auto e = Match(entry, TableEntry); + return WrapAST(entry, MethodCall, .name="set", .self=subject, + .args=new(arg_ast_t, .value=e->key, .next=new(arg_ast_t, .value=e->value))); +} + +static ast_t *add_to_array_comprehension(ast_t *item, ast_t *subject) +{ + return WrapAST(item, MethodCall, .name="insert", .self=subject, .args=new(arg_ast_t, .value=item)); +} + +static ast_t *add_to_set_comprehension(ast_t *item, ast_t *subject) +{ + return WrapAST(item, MethodCall, .name="add", .self=subject, .args=new(arg_ast_t, .value=item)); +} + +CORD compile(env_t *env, ast_t *ast) +{ + switch (ast->tag) { + case None: { + if (!Match(ast, None)->type) + code_err(ast, "This 'none' needs to specify what type it is using `none:Type` syntax"); + type_t *t = parse_type_ast(env, Match(ast, None)->type); + return compile_none(t); + } + case Bool: return Match(ast, Bool)->b ? "yes" : "no"; + case Moment: { + auto moment = Match(ast, Moment)->moment; + return CORD_asprintf("((Moment_t){.tv_sec=%ld, .tv_usec=%ld})", moment.tv_sec, moment.tv_usec); + } + case Var: { + binding_t *b = get_binding(env, Match(ast, Var)->name); + if (b) + return b->code ? b->code : CORD_cat("_$", Match(ast, Var)->name); + return CORD_cat("_$", Match(ast, Var)->name); + // code_err(ast, "I don't know of any variable by this name"); + } + case Int: { + const char *str = Match(ast, Int)->str; + OptionalInt_t int_val = Int$from_str(str); + if (int_val.small == 0) + code_err(ast, "Failed to parse this integer"); + mpz_t i; + mpz_init_set_int(i, int_val); + if (mpz_cmpabs_ui(i, BIGGEST_SMALL_INT) <= 0) { + return CORD_asprintf("I_small(%s)", str); + } else if (mpz_cmp_si(i, INT64_MAX) <= 0 && mpz_cmp_si(i, INT64_MIN) >= 0) { + return CORD_asprintf("Int$from_int64(%s)", str); + } else { + return CORD_asprintf("Int$from_str(\"%s\")", str); + } + } + case Num: { + return CORD_asprintf("N64(%.20g)", Match(ast, Num)->n); + } + case Not: { + ast_t *value = Match(ast, Not)->value; + type_t *t = get_type(env, value); + + binding_t *b = get_namespace_binding(env, value, "negated"); + if (b && b->type->tag == FunctionType) { + auto fn = Match(b->type, FunctionType); + if (fn->args && can_promote(t, get_arg_type(env, fn->args))) + return CORD_all(b->code, "(", compile_arguments(env, ast, fn->args, new(arg_ast_t, .value=value)), ")"); + } + + if (t->tag == BoolType) + return CORD_all("!(", compile(env, value), ")"); + else if (t->tag == IntType || t->tag == ByteType) + return CORD_all("~(", compile(env, value), ")"); + else if (t->tag == ArrayType) + return CORD_all("((", compile(env, value), ").length == 0)"); + else if (t->tag == SetType || t->tag == TableType) + return CORD_all("((", compile(env, value), ").entries.length == 0)"); + else if (t->tag == TextType) + return CORD_all("(", compile(env, value), " == CORD_EMPTY)"); + else if (t->tag == OptionalType) + return check_none(t, compile(env, value)); + + code_err(ast, "I don't know how to negate values of type %T", t); + } + case Negative: { + ast_t *value = Match(ast, Negative)->value; + type_t *t = get_type(env, value); + binding_t *b = get_namespace_binding(env, value, "negative"); + if (b && b->type->tag == FunctionType) { + auto fn = Match(b->type, FunctionType); + if (fn->args && can_promote(t, get_arg_type(env, fn->args))) + return CORD_all(b->code, "(", compile_arguments(env, ast, fn->args, new(arg_ast_t, .value=value)), ")"); + } + + if (t->tag == IntType || t->tag == NumType) + return CORD_all("-(", compile(env, value), ")"); + + code_err(ast, "I don't know how to get the negative value of type %T", t); + + } + // TODO: for constructors, do new(T, ...) instead of heap((T){...}) + case HeapAllocate: return CORD_asprintf("heap(%r)", compile(env, Match(ast, HeapAllocate)->value)); + case StackReference: { + ast_t *subject = Match(ast, StackReference)->value; + if (can_be_mutated(env, subject)) + return CORD_all("(&", compile_lvalue(env, subject), ")"); + else + return CORD_all("stack(", compile(env, subject), ")"); + } + case Mutexed: { + ast_t *mutexed = Match(ast, Mutexed)->value; + return CORD_all("new(struct MutexedData_s, .mutex=PTHREAD_MUTEX_INITIALIZER, .data=", compile(env, WrapAST(mutexed, HeapAllocate, mutexed)), ")"); + } + case Holding: { + ast_t *held = Match(ast, Holding)->mutexed; + type_t *held_type = get_type(env, held); + if (held_type->tag != MutexedType) + code_err(held, "This is a %t, not a mutexed value", held_type); + CORD code = CORD_all( + "({ // Holding\n", + "MutexedData_t mutexed = ", compile(env, held), ";\n", + "pthread_mutex_lock(&mutexed->mutex);\n"); + + env_t *body_scope = fresh_scope(env); + body_scope->deferred = new(deferral_t, .defer_env=env, + .block=FakeAST(InlineCCode, .code="pthread_mutex_unlock(&mutexed->mutex);"), + .next=body_scope->deferred); + if (held->tag == Var) { + CORD held_var = CORD_all(Match(held, Var)->name, "$held"); + set_binding(body_scope, Match(held, Var)->name, + Type(PointerType, .pointed=Match(held_type, MutexedType)->type, .is_stack=true), held_var); + code = CORD_all(code, compile_declaration(Type(PointerType, .pointed=Match(held_type, MutexedType)->type), held_var), + " = (", compile_type(Type(PointerType, .pointed=Match(held_type, MutexedType)->type)), ")mutexed->data;\n"); + } + type_t *body_type = get_type(env, ast); + return CORD_all(code, compile_declaration(body_type, "result"), " = ", compile(body_scope, Match(ast, Holding)->body), ";\n" + "pthread_mutex_unlock(&mutexed->mutex);\n" + "result;\n})"); + } + case Optional: { + ast_t *value = Match(ast, Optional)->value; + CORD value_code = compile(env, value); + return promote_to_optional(get_type(env, value), value_code); + } + case NonOptional: { + ast_t *value = Match(ast, NonOptional)->value; + type_t *t = get_type(env, value); + CORD value_code = compile(env, value); + return CORD_all("({ ", compile_declaration(t, "opt"), " = ", value_code, "; ", + "if unlikely (", check_none(t, "opt"), ")\n", + CORD_asprintf("fail_source(%r, %ld, %ld, \"This was expected to be a value, but it's none\");\n", + CORD_quoted(ast->file->filename), + (long)(value->start - value->file->text), + (long)(value->end - value->file->text)), + optional_into_nonnone(t, "opt"), "; })"); + } + case BinaryOp: { + auto binop = Match(ast, BinaryOp); + CORD method_call = compile_math_method(env, binop->op, binop->lhs, binop->rhs, NULL); + if (method_call != CORD_EMPTY) + return method_call; + + type_t *lhs_t = get_type(env, binop->lhs); + type_t *rhs_t = get_type(env, binop->rhs); + + if (binop->op == BINOP_OR && lhs_t->tag == OptionalType) { + if (rhs_t->tag == AbortType || rhs_t->tag == ReturnType) { + return CORD_all("({ ", compile_declaration(lhs_t, "lhs"), " = ", compile(env, binop->lhs), "; ", + "if (", check_none(lhs_t, "lhs"), ") ", compile_statement(env, binop->rhs), " ", + optional_into_nonnone(lhs_t, "lhs"), "; })"); + } else if (rhs_t->tag == OptionalType && type_eq(lhs_t, rhs_t)) { + return CORD_all("({ ", compile_declaration(lhs_t, "lhs"), " = ", compile(env, binop->lhs), "; ", + check_none(lhs_t, "lhs"), " ? ", compile(env, binop->rhs), " : lhs; })"); + } else if (rhs_t->tag != OptionalType && type_eq(Match(lhs_t, OptionalType)->type, rhs_t)) { + return CORD_all("({ ", compile_declaration(lhs_t, "lhs"), " = ", compile(env, binop->lhs), "; ", + check_none(lhs_t, "lhs"), " ? ", compile(env, binop->rhs), " : ", + optional_into_nonnone(lhs_t, "lhs"), "; })"); + } else if (rhs_t->tag == BoolType) { + return CORD_all("((!", check_none(lhs_t, compile(env, binop->lhs)), ") || ", compile(env, binop->rhs), ")"); + } else { + code_err(ast, "I don't know how to do an 'or' operation between %T and %T", lhs_t, rhs_t); + } + } else if (binop->op == BINOP_AND && lhs_t->tag == OptionalType) { + if (rhs_t->tag == AbortType || rhs_t->tag == ReturnType) { + return CORD_all("({ ", compile_declaration(lhs_t, "lhs"), " = ", compile(env, binop->lhs), "; ", + "if (!", check_none(lhs_t, "lhs"), ") ", compile_statement(env, binop->rhs), " ", + optional_into_nonnone(lhs_t, "lhs"), "; })"); + } else if (rhs_t->tag == OptionalType && type_eq(lhs_t, rhs_t)) { + return CORD_all("({ ", compile_declaration(lhs_t, "lhs"), " = ", compile(env, binop->lhs), "; ", + check_none(lhs_t, "lhs"), " ? lhs : ", compile(env, binop->rhs), "; })"); + } else if (rhs_t->tag == BoolType) { + return CORD_all("((!", check_none(lhs_t, compile(env, binop->lhs)), ") && ", compile(env, binop->rhs), ")"); + } else { + code_err(ast, "I don't know how to do an 'or' operation between %T and %T", lhs_t, rhs_t); + } + } + + type_t *non_optional_lhs = lhs_t; + if (lhs_t->tag == OptionalType) non_optional_lhs = Match(lhs_t, OptionalType)->type; + type_t *non_optional_rhs = rhs_t; + if (rhs_t->tag == OptionalType) non_optional_rhs = Match(rhs_t, OptionalType)->type; + + if (!non_optional_lhs && !non_optional_rhs) + code_err(ast, "Both of these values do not specify a type"); + else if (!non_optional_lhs) + non_optional_lhs = non_optional_rhs; + else if (!non_optional_rhs) + non_optional_rhs = non_optional_lhs; + + bool lhs_is_optional_num = (lhs_t->tag == OptionalType && non_optional_lhs->tag == NumType); + if (lhs_is_optional_num) + lhs_t = Match(lhs_t, OptionalType)->type; + bool rhs_is_optional_num = (rhs_t->tag == OptionalType && non_optional_rhs->tag == NumType); + if (rhs_is_optional_num) + rhs_t = Match(rhs_t, OptionalType)->type; + + CORD lhs, rhs; + if (lhs_t->tag == BigIntType && rhs_t->tag != BigIntType && is_numeric_type(rhs_t) && binop->lhs->tag == Int) { + lhs = compile_int_to_type(env, binop->lhs, rhs_t); + lhs_t = rhs_t; + rhs = compile(env, binop->rhs); + } else if (rhs_t->tag == BigIntType && lhs_t->tag != BigIntType && is_numeric_type(lhs_t) && binop->rhs->tag == Int) { + lhs = compile(env, binop->lhs); + rhs = compile_int_to_type(env, binop->rhs, lhs_t); + rhs_t = lhs_t; + } else { + lhs = compile(env, binop->lhs); + rhs = compile(env, binop->rhs); + } + + type_t *operand_t; + if (promote(env, binop->rhs, &rhs, rhs_t, lhs_t)) + operand_t = lhs_t; + else if (promote(env, binop->lhs, &lhs, lhs_t, rhs_t)) + operand_t = rhs_t; + else + code_err(ast, "I can't do operations between %T and %T", lhs_t, rhs_t); + + switch (binop->op) { + case BINOP_POWER: { + if (operand_t->tag != NumType) + code_err(ast, "Exponentiation is only supported for Num types, not %T", operand_t); + if (operand_t->tag == NumType && Match(operand_t, NumType)->bits == TYPE_NBITS32) + return CORD_all("powf(", lhs, ", ", rhs, ")"); + else + return CORD_all("pow(", lhs, ", ", rhs, ")"); + } + case BINOP_MULT: { + if (operand_t->tag != IntType && operand_t->tag != NumType && operand_t->tag != ByteType) + code_err(ast, "Math operations are only supported for values of the same numeric type, not %T and %T", lhs_t, rhs_t); + return CORD_all("(", lhs, " * ", rhs, ")"); + } + case BINOP_DIVIDE: { + if (operand_t->tag != IntType && operand_t->tag != NumType && operand_t->tag != ByteType) + code_err(ast, "Math operations are only supported for values of the same numeric type, not %T and %T", lhs_t, rhs_t); + return CORD_all("(", lhs, " / ", rhs, ")"); + } + case BINOP_MOD: { + if (operand_t->tag != IntType && operand_t->tag != NumType && operand_t->tag != ByteType) + code_err(ast, "Math operations are only supported for values of the same numeric type, not %T and %T", lhs_t, rhs_t); + return CORD_all("(", lhs, " % ", rhs, ")"); + } + case BINOP_MOD1: { + if (operand_t->tag != IntType && operand_t->tag != NumType && operand_t->tag != ByteType) + code_err(ast, "Math operations are only supported for values of the same numeric type, not %T and %T", lhs_t, rhs_t); + return CORD_all("((((", lhs, ")-1) % (", rhs, ")) + 1)"); + } + case BINOP_PLUS: { + if (operand_t->tag != IntType && operand_t->tag != NumType && operand_t->tag != ByteType) + code_err(ast, "Math operations are only supported for values of the same numeric type, not %T and %T", lhs_t, rhs_t); + return CORD_all("(", lhs, " + ", rhs, ")"); + } + case BINOP_MINUS: { + if (operand_t->tag != IntType && operand_t->tag != NumType && operand_t->tag != ByteType) + code_err(ast, "Math operations are only supported for values of the same numeric type, not %T and %T", lhs_t, rhs_t); + return CORD_all("(", lhs, " - ", rhs, ")"); + } + case BINOP_LSHIFT: { + if (operand_t->tag != IntType && operand_t->tag != NumType && operand_t->tag != ByteType) + code_err(ast, "Math operations are only supported for values of the same numeric type, not %T and %T", lhs_t, rhs_t); + return CORD_all("(", lhs, " << ", rhs, ")"); + } + case BINOP_RSHIFT: { + if (operand_t->tag != IntType && operand_t->tag != NumType && operand_t->tag != ByteType) + code_err(ast, "Math operations are only supported for values of the same numeric type, not %T and %T", lhs_t, rhs_t); + return CORD_all("(", lhs, " >> ", rhs, ")"); + } + case BINOP_ULSHIFT: { + if (operand_t->tag != IntType && operand_t->tag != NumType && operand_t->tag != ByteType) + code_err(ast, "Math operations are only supported for values of the same numeric type, not %T and %T", lhs_t, rhs_t); + return CORD_all("(", compile_type(operand_t), ")((", compile_unsigned_type(lhs_t), ")", lhs, " << ", rhs, ")"); + } + case BINOP_URSHIFT: { + if (operand_t->tag != IntType && operand_t->tag != NumType && operand_t->tag != ByteType) + code_err(ast, "Math operations are only supported for values of the same numeric type, not %T and %T", lhs_t, rhs_t); + return CORD_all("(", compile_type(operand_t), ")((", compile_unsigned_type(lhs_t), ")", lhs, " >> ", rhs, ")"); + } + case BINOP_EQ: { + switch (operand_t->tag) { + case BigIntType: + return CORD_all("Int$equal_value(", lhs, ", ", rhs, ")"); + case BoolType: case ByteType: case IntType: case NumType: case PointerType: case FunctionType: + if (lhs_is_optional_num || rhs_is_optional_num) + return CORD_asprintf("generic_equal(stack(%r), stack(%r), %r)", lhs, rhs, compile_type_info(Type(OptionalType, operand_t))); + return CORD_all("(", lhs, " == ", rhs, ")"); + default: + return CORD_asprintf("generic_equal(stack(%r), stack(%r), %r)", lhs, rhs, compile_type_info(operand_t)); + } + } + case BINOP_NE: { + switch (operand_t->tag) { + case BigIntType: + return CORD_all("!Int$equal_value(", lhs, ", ", rhs, ")"); + case BoolType: case ByteType: case IntType: case NumType: case PointerType: case FunctionType: + if (lhs_is_optional_num || rhs_is_optional_num) + return CORD_asprintf("!generic_equal(stack(%r), stack(%r), %r)", lhs, rhs, compile_type_info(Type(OptionalType, operand_t))); + return CORD_all("(", lhs, " != ", rhs, ")"); + default: + return CORD_asprintf("!generic_equal(stack(%r), stack(%r), %r)", lhs, rhs, compile_type_info(operand_t)); + } + } + case BINOP_LT: { + switch (operand_t->tag) { + case BigIntType: + return CORD_all("(Int$compare_value(", lhs, ", ", rhs, ") < 0)"); + case BoolType: case ByteType: case IntType: case NumType: case PointerType: case FunctionType: + if (lhs_is_optional_num || rhs_is_optional_num) + return CORD_asprintf("(generic_compare(stack(%r), stack(%r), %r) < 0)", lhs, rhs, compile_type_info(Type(OptionalType, operand_t))); + return CORD_all("(", lhs, " < ", rhs, ")"); + default: + return CORD_asprintf("(generic_compare(stack(%r), stack(%r), %r) < 0)", lhs, rhs, compile_type_info(operand_t)); + } + } + case BINOP_LE: { + switch (operand_t->tag) { + case BigIntType: + return CORD_all("(Int$compare_value(", lhs, ", ", rhs, ") <= 0)"); + case BoolType: case ByteType: case IntType: case NumType: case PointerType: case FunctionType: + if (lhs_is_optional_num || rhs_is_optional_num) + return CORD_asprintf("(generic_compare(stack(%r), stack(%r), %r) <= 0)", lhs, rhs, compile_type_info(Type(OptionalType, operand_t))); + return CORD_all("(", lhs, " <= ", rhs, ")"); + default: + return CORD_asprintf("(generic_compare(stack(%r), stack(%r), %r) <= 0)", lhs, rhs, compile_type_info(operand_t)); + } + } + case BINOP_GT: { + switch (operand_t->tag) { + case BigIntType: + return CORD_all("(Int$compare_value(", lhs, ", ", rhs, ") > 0)"); + case BoolType: case ByteType: case IntType: case NumType: case PointerType: case FunctionType: + if (lhs_is_optional_num || rhs_is_optional_num) + return CORD_asprintf("(generic_compare(stack(%r), stack(%r), %r) > 0)", lhs, rhs, compile_type_info(Type(OptionalType, operand_t))); + return CORD_all("(", lhs, " > ", rhs, ")"); + default: + return CORD_asprintf("(generic_compare(stack(%r), stack(%r), %r) > 0)", lhs, rhs, compile_type_info(operand_t)); + } + } + case BINOP_GE: { + switch (operand_t->tag) { + case BigIntType: + return CORD_all("(Int$compare_value(", lhs, ", ", rhs, ") >= 0)"); + case BoolType: case ByteType: case IntType: case NumType: case PointerType: case FunctionType: + if (lhs_is_optional_num || rhs_is_optional_num) + return CORD_asprintf("(generic_compare(stack(%r), stack(%r), %r) >= 0)", lhs, rhs, compile_type_info(Type(OptionalType, operand_t))); + return CORD_all("(", lhs, " >= ", rhs, ")"); + default: + return CORD_asprintf("(generic_compare(stack(%r), stack(%r), %r) >= 0)", lhs, rhs, compile_type_info(operand_t)); + } + } + case BINOP_AND: { + if (operand_t->tag == BoolType) + return CORD_all("(", lhs, " && ", rhs, ")"); + else if (operand_t->tag == IntType || operand_t->tag == ByteType) + return CORD_all("(", lhs, " & ", rhs, ")"); + else + code_err(ast, "The 'and' operator isn't supported between %T and %T values", lhs_t, rhs_t); + } + case BINOP_CMP: { + if (lhs_is_optional_num || rhs_is_optional_num) + operand_t = Type(OptionalType, operand_t); + return CORD_all("generic_compare(stack(", lhs, "), stack(", rhs, "), ", compile_type_info(operand_t), ")"); + } + case BINOP_OR: { + if (operand_t->tag == BoolType) + return CORD_all("(", lhs, " || ", rhs, ")"); + else if (operand_t->tag == IntType || operand_t->tag == ByteType) + return CORD_all("(", lhs, " | ", rhs, ")"); + else + code_err(ast, "The 'or' operator isn't supported between %T and %T values", lhs_t, rhs_t); + } + case BINOP_XOR: { + // TODO: support optional values in `xor` expressions + if (operand_t->tag == BoolType || operand_t->tag == IntType || operand_t->tag == ByteType) + return CORD_all("(", lhs, " ^ ", rhs, ")"); + else + code_err(ast, "The 'xor' operator isn't supported between %T and %T values", lhs_t, rhs_t); + } + case BINOP_CONCAT: { + if (operand_t == PATH_TYPE) + return CORD_all("Path$concat(", lhs, ", ", rhs, ")"); + switch (operand_t->tag) { + case TextType: { + return CORD_all("Text$concat(", lhs, ", ", rhs, ")"); + } + case ArrayType: { + return CORD_all("Array$concat(", lhs, ", ", rhs, ", sizeof(", compile_type(Match(operand_t, ArrayType)->item_type), "))"); + } + default: + code_err(ast, "Concatenation isn't supported between %T and %T values", lhs_t, rhs_t); + } + } + default: break; + } + code_err(ast, "unimplemented binop"); + } + case TextLiteral: { + CORD literal = Match(ast, TextLiteral)->cord; + if (literal == CORD_EMPTY) + return "EMPTY_TEXT"; + + if (string_literal_is_all_ascii(literal)) + return CORD_all("Text(", compile_string_literal(literal), ")"); + else + return CORD_all("Text$from_str(", compile_string_literal(literal), ")"); + } + case TextJoin: { + const char *lang = Match(ast, TextJoin)->lang; + + type_t *text_t = lang ? Table$str_get(*env->types, lang) : TEXT_TYPE; + if (!text_t || text_t->tag != TextType) + code_err(ast, "%s is not a valid text language name", lang); + + CORD lang_constructor; + if (!lang || streq(lang, "Text")) + lang_constructor = "Text"; + else if (streq(lang, "Pattern")) + lang_constructor = lang; + else + lang_constructor = CORD_all(namespace_prefix(Match(text_t, TextType)->env, Match(text_t, TextType)->env->namespace->parent), lang); + + ast_list_t *chunks = Match(ast, TextJoin)->children; + if (!chunks) { + return CORD_all(lang_constructor, "(\"\")"); + } else if (!chunks->next && chunks->ast->tag == TextLiteral) { + CORD literal = Match(chunks->ast, TextLiteral)->cord; + if (string_literal_is_all_ascii(literal)) + return CORD_all(lang_constructor, "(", compile_string_literal(literal), ")"); + return CORD_all("((", compile_type(text_t), ")", compile(env, chunks->ast), ")"); + } else { + CORD code = CORD_EMPTY; + for (ast_list_t *chunk = chunks; chunk; chunk = chunk->next) { + CORD chunk_code; + type_t *chunk_t = get_type(env, chunk->ast); + if (chunk->ast->tag == TextLiteral || type_eq(chunk_t, text_t)) { + chunk_code = compile(env, chunk->ast); + } else { + binding_t *constructor = get_constructor(env, text_t, new(arg_ast_t, .value=chunk->ast)); + if (constructor) { + arg_t *arg_spec = Match(constructor->type, FunctionType)->args; + arg_ast_t *args = new(arg_ast_t, .value=chunk->ast); + chunk_code = CORD_all(constructor->code, "(", compile_arguments(env, ast, arg_spec, args), ")"); + } else if (type_eq(text_t, TEXT_TYPE)) { + if (chunk_t->tag == TextType) + chunk_code = compile(env, chunk->ast); + else + chunk_code = compile_string(env, chunk->ast, "no"); + } else { + code_err(chunk->ast, "I don't know how to convert %T to %T", chunk_t, text_t); + } + } + code = CORD_cat(code, chunk_code); + if (chunk->next) code = CORD_cat(code, ", "); + } + if (chunks->next) + return CORD_all(lang_constructor, "s(", code, ")"); + else + return code; + } + } + case Path: { + return CORD_all("Path(", compile_string_literal(Match(ast, Path)->path), ")"); + } + case Block: { + ast_list_t *stmts = Match(ast, Block)->statements; + if (stmts && !stmts->next) + return compile(env, stmts->ast); + + CORD code = "({\n"; + deferral_t *prev_deferred = env->deferred; + env = fresh_scope(env); + for (ast_list_t *stmt = stmts; stmt; stmt = stmt->next) + prebind_statement(env, stmt->ast); + for (ast_list_t *stmt = stmts; stmt; stmt = stmt->next) { + if (stmt->next) { + code = CORD_all(code, compile_statement(env, stmt->ast), "\n"); + } else { + // TODO: put defer after evaluating block expression + for (deferral_t *deferred = env->deferred; deferred && deferred != prev_deferred; deferred = deferred->next) { + code = CORD_all(code, compile_statement(deferred->defer_env, deferred->block)); + } + code = CORD_all(code, compile(env, stmt->ast), ";\n"); + } + bind_statement(env, stmt->ast); + } + + return CORD_cat(code, "})"); + } + case Min: case Max: { + type_t *t = get_type(env, ast); + ast_t *key = ast->tag == Min ? Match(ast, Min)->key : Match(ast, Max)->key; + ast_t *lhs = ast->tag == Min ? Match(ast, Min)->lhs : Match(ast, Max)->lhs; + ast_t *rhs = ast->tag == Min ? Match(ast, Min)->rhs : Match(ast, Max)->rhs; + const char *key_name = "$"; + if (key == NULL) key = FakeAST(Var, key_name); + + env_t *expr_env = fresh_scope(env); + set_binding(expr_env, key_name, t, "ternary$lhs"); + CORD lhs_key = compile(expr_env, key); + + set_binding(expr_env, key_name, t, "ternary$rhs"); + CORD rhs_key = compile(expr_env, key); + + type_t *key_t = get_type(expr_env, key); + CORD comparison; + if (key_t->tag == BigIntType) + comparison = CORD_all("(Int$compare_value(", lhs_key, ", ", rhs_key, ")", (ast->tag == Min ? "<=" : ">="), "0)"); + else if (key_t->tag == IntType || key_t->tag == NumType || key_t->tag == BoolType || key_t->tag == PointerType || key_t->tag == ByteType) + comparison = CORD_all("((", lhs_key, ")", (ast->tag == Min ? "<=" : ">="), "(", rhs_key, "))"); + else + comparison = CORD_all("generic_compare(stack(", lhs_key, "), stack(", rhs_key, "), ", compile_type_info(key_t), ")", + (ast->tag == Min ? "<=" : ">="), "0"); + + return CORD_all( + "({\n", + compile_type(t), " ternary$lhs = ", compile(env, lhs), ", ternary$rhs = ", compile(env, rhs), ";\n", + comparison, " ? ternary$lhs : ternary$rhs;\n" + "})"); + } + case Array: { + type_t *array_type = get_type(env, ast); + type_t *item_type = Match(array_type, ArrayType)->item_type; + // if (type_size(Match(array_type, ArrayType)->item_type) > ARRAY_MAX_STRIDE) + // code_err(ast, "This array holds items that take up %ld bytes, but the maximum supported size is %ld bytes. Consider using an array of pointers instead.", + // type_size(item_type), ARRAY_MAX_STRIDE); + + auto array = Match(ast, Array); + if (!array->items) + return "(Array_t){.length=0}"; + + int64_t n = 0; + for (ast_list_t *item = array->items; item; item = item->next) { + ++n; + if (item->ast->tag == Comprehension) + goto array_comprehension; + } + + { + env_t *scope = item_type->tag == EnumType ? with_enum_scope(env, item_type) : env; + CORD code = CORD_all("TypedArrayN(", compile_type(item_type), CORD_asprintf(", %ld", n)); + for (ast_list_t *item = array->items; item; item = item->next) { + code = CORD_all(code, ", ", compile_to_type(scope, item->ast, item_type)); + } + return CORD_cat(code, ")"); + } + + array_comprehension: + { + env_t *scope = item_type->tag == EnumType ? with_enum_scope(env, item_type) : fresh_scope(env); + static int64_t comp_num = 1; + const char *comprehension_name = heap_strf("arr$%ld", comp_num++); + ast_t *comprehension_var = FakeAST(InlineCCode, .code=CORD_all("&", comprehension_name), + .type=Type(PointerType, .pointed=array_type, .is_stack=true)); + Closure_t comp_action = {.fn=add_to_array_comprehension, .userdata=comprehension_var}; + scope->comprehension_action = &comp_action; + CORD code = CORD_all("({ Array_t ", comprehension_name, " = {};"); + // set_binding(scope, comprehension_name, array_type, comprehension_name); + for (ast_list_t *item = array->items; item; item = item->next) { + if (item->ast->tag == Comprehension) + code = CORD_all(code, "\n", compile_statement(scope, item->ast)); + else + code = CORD_all(code, compile_statement(env, add_to_array_comprehension(item->ast, comprehension_var))); + } + code = CORD_all(code, " ", comprehension_name, "; })"); + return code; + } + } + case Table: { + auto table = Match(ast, Table); + if (!table->entries) { + CORD code = "((Table_t){"; + if (table->fallback) + code = CORD_all(code, ".fallback=heap(", compile(env, table->fallback),")"); + return CORD_cat(code, "})"); + } + + type_t *table_type = get_type(env, ast); + type_t *key_t = Match(table_type, TableType)->key_type; + type_t *value_t = Match(table_type, TableType)->value_type; + + if (value_t->tag == OptionalType) + code_err(ast, "Tables whose values are optional (%T) are not currently supported.", value_t); + + for (ast_list_t *entry = table->entries; entry; entry = entry->next) { + if (entry->ast->tag == Comprehension) + goto table_comprehension; + } + + { // No comprehension: + env_t *key_scope = key_t->tag == EnumType ? with_enum_scope(env, key_t) : env; + env_t *value_scope = value_t->tag == EnumType ? with_enum_scope(env, value_t) : env; + CORD code = CORD_all("Table(", + compile_type(key_t), ", ", + compile_type(value_t), ", ", + compile_type_info(key_t), ", ", + compile_type_info(value_t)); + if (table->fallback) + code = CORD_all(code, ", /*fallback:*/ heap(", compile(env, table->fallback), ")"); + else + code = CORD_all(code, ", /*fallback:*/ NULL"); + + size_t n = 0; + for (ast_list_t *entry = table->entries; entry; entry = entry->next) + ++n; + CORD_appendf(&code, ", %zu", n); + + for (ast_list_t *entry = table->entries; entry; entry = entry->next) { + auto e = Match(entry->ast, TableEntry); + code = CORD_all(code, ",\n\t{", compile_to_type(key_scope, e->key, key_t), ", ", + compile_to_type(value_scope, e->value, value_t), "}"); + } + return CORD_cat(code, ")"); + } + + table_comprehension: + { + static int64_t comp_num = 1; + env_t *scope = fresh_scope(env); + const char *comprehension_name = heap_strf("table$%ld", comp_num++); + ast_t *comprehension_var = FakeAST(InlineCCode, .code=CORD_all("&", comprehension_name), + .type=Type(PointerType, .pointed=table_type, .is_stack=true)); + + CORD code = CORD_all("({ Table_t ", comprehension_name, " = {"); + if (table->fallback) + code = CORD_all(code, ".fallback=heap(", compile(env, table->fallback), "), "); + + code = CORD_cat(code, "};"); + + Closure_t comp_action = {.fn=add_to_table_comprehension, .userdata=comprehension_var}; + scope->comprehension_action = &comp_action; + for (ast_list_t *entry = table->entries; entry; entry = entry->next) { + if (entry->ast->tag == Comprehension) + code = CORD_all(code, "\n", compile_statement(scope, entry->ast)); + else + code = CORD_all(code, compile_statement(env, add_to_table_comprehension(entry->ast, comprehension_var))); + } + code = CORD_all(code, " ", comprehension_name, "; })"); + return code; + } + + } + case Set: { + auto set = Match(ast, Set); + if (!set->items) + return "((Table_t){})"; + + type_t *set_type = get_type(env, ast); + type_t *item_type = Match(set_type, SetType)->item_type; + + size_t n = 0; + for (ast_list_t *item = set->items; item; item = item->next) { + ++n; + if (item->ast->tag == Comprehension) + goto set_comprehension; + } + + { // No comprehension: + CORD code = CORD_all("Set(", + compile_type(item_type), ", ", + compile_type_info(item_type)); + CORD_appendf(&code, ", %zu", n); + env_t *scope = item_type->tag == EnumType ? with_enum_scope(env, item_type) : env; + for (ast_list_t *item = set->items; item; item = item->next) { + code = CORD_all(code, ", ", compile_to_type(scope, item->ast, item_type)); + } + return CORD_cat(code, ")"); + } + + set_comprehension: + { + static int64_t comp_num = 1; + env_t *scope = item_type->tag == EnumType ? with_enum_scope(env, item_type) : fresh_scope(env); + const char *comprehension_name = heap_strf("set$%ld", comp_num++); + ast_t *comprehension_var = FakeAST(InlineCCode, .code=CORD_all("&", comprehension_name), + .type=Type(PointerType, .pointed=set_type, .is_stack=true)); + CORD code = CORD_all("({ Table_t ", comprehension_name, " = {};"); + Closure_t comp_action = {.fn=add_to_set_comprehension, .userdata=comprehension_var}; + scope->comprehension_action = &comp_action; + for (ast_list_t *item = set->items; item; item = item->next) { + if (item->ast->tag == Comprehension) + code = CORD_all(code, "\n", compile_statement(scope, item->ast)); + else + code = CORD_all(code, compile_statement(env, add_to_set_comprehension(item->ast, comprehension_var))); + } + code = CORD_all(code, " ", comprehension_name, "; })"); + return code; + } + + } + case Comprehension: { + ast_t *base = Match(ast, Comprehension)->expr; + while (base->tag == Comprehension) + base = Match(ast, Comprehension)->expr; + if (base->tag == TableEntry) + return compile(env, WrapAST(ast, Table, .entries=new(ast_list_t, .ast=ast))); + else + return compile(env, WrapAST(ast, Array, .items=new(ast_list_t, .ast=ast))); + } + case Lambda: { + auto lambda = Match(ast, Lambda); + CORD name = CORD_asprintf("%rlambda$%ld", namespace_prefix(env, env->namespace), lambda->id); + + env->code->function_naming = CORD_all( + env->code->function_naming, + CORD_asprintf("register_function(%r, Text(\"%s.tm\"), %ld, Text(%r));\n", + name, file_base_name(ast->file->filename), get_line_number(ast->file, ast->start), + CORD_quoted(type_to_cord(get_type(env, ast))))); + + env_t *body_scope = fresh_scope(env); + body_scope->deferred = NULL; + for (arg_ast_t *arg = lambda->args; arg; arg = arg->next) { + type_t *arg_type = get_arg_ast_type(env, arg); + set_binding(body_scope, arg->name, arg_type, CORD_all("_$", arg->name)); + } + + type_t *ret_t = get_type(body_scope, lambda->body); + if (ret_t->tag == ReturnType) + ret_t = Match(ret_t, ReturnType)->ret; + + if (lambda->ret_type) { + type_t *declared = parse_type_ast(env, lambda->ret_type); + if (can_promote(ret_t, declared)) + ret_t = declared; + else + code_err(ast, "This function was declared to return a value of type %T, but actually returns a value of type %T", + declared, ret_t); + } + + body_scope->fn_ret = ret_t; + + Table_t closed_vars = get_closed_vars(env, lambda->args, ast); + if (Table$length(closed_vars) > 0) { // Create a typedef for the lambda's closure userdata + CORD def = "typedef struct {"; + for (int64_t i = 1; i <= Table$length(closed_vars); i++) { + struct { const char *name; binding_t *b; } *entry = Table$entry(closed_vars, i); + if (has_stack_memory(entry->b->type)) + code_err(ast, "This function is holding onto a reference to %T stack memory in the variable `%s`, but the function may outlive the stack memory", + entry->b->type, entry->name); + if (entry->b->type->tag == ModuleType) + continue; + set_binding(body_scope, entry->name, entry->b->type, CORD_cat("userdata->", entry->name)); + def = CORD_all(def, compile_declaration(entry->b->type, entry->name), "; "); + } + def = CORD_all(def, "} ", name, "$userdata_t;"); + env->code->local_typedefs = CORD_all(env->code->local_typedefs, def); + } + + CORD code = CORD_all("static ", compile_type(ret_t), " ", name, "("); + for (arg_ast_t *arg = lambda->args; arg; arg = arg->next) { + type_t *arg_type = get_arg_ast_type(env, arg); + code = CORD_all(code, compile_type(arg_type), " _$", arg->name, ", "); + } + + CORD userdata; + if (Table$length(closed_vars) == 0) { + code = CORD_cat(code, "void *)"); + userdata = "NULL"; + } else { + userdata = CORD_all("new(", name, "$userdata_t"); + for (int64_t i = 1; i <= Table$length(closed_vars); i++) { + struct { const char *name; binding_t *b; } *entry = Table$entry(closed_vars, i); + if (entry->b->type->tag == ModuleType) + continue; + binding_t *b = get_binding(env, entry->name); + assert(b); + CORD binding_code = b->code; + if (entry->b->type->tag == ArrayType) + userdata = CORD_all(userdata, ", ARRAY_COPY(", binding_code, ")"); + else if (entry->b->type->tag == TableType || entry->b->type->tag == SetType) + userdata = CORD_all(userdata, ", TABLE_COPY(", binding_code, ")"); + else + userdata = CORD_all(userdata, ", ", binding_code); + } + userdata = CORD_all(userdata, ")"); + code = CORD_all(code, name, "$userdata_t *userdata)"); + } + + CORD body = CORD_EMPTY; + for (ast_list_t *stmt = Match(lambda->body, Block)->statements; stmt; stmt = stmt->next) { + if (stmt->next || ret_t->tag == VoidType || ret_t->tag == AbortType || get_type(body_scope, stmt->ast)->tag == ReturnType) + body = CORD_all(body, compile_statement(body_scope, stmt->ast), "\n"); + else + body = CORD_all(body, compile_statement(body_scope, FakeAST(Return, stmt->ast)), "\n"); + bind_statement(body_scope, stmt->ast); + } + if ((ret_t->tag == VoidType || ret_t->tag == AbortType) && body_scope->deferred) + body = CORD_all(body, compile_statement(body_scope, FakeAST(Return)), "\n"); + + env->code->lambdas = CORD_all(env->code->lambdas, code, " {\n", body, "\n}\n"); + return CORD_all("((Closure_t){", name, ", ", userdata, "})"); + } + case MethodCall: { + auto call = Match(ast, MethodCall); + type_t *self_t = get_type(env, call->self); + + if (streq(call->name, "serialized")) { + if (call->args) + code_err(ast, ":serialized() doesn't take any arguments"); + return CORD_all("generic_serialize((", compile_declaration(self_t, "[1]"), "){", + compile(env, call->self), "}, ", compile_type_info(self_t), ")"); + } + + int64_t pointer_depth = 0; + type_t *self_value_t = self_t; + for (; self_value_t->tag == PointerType; self_value_t = Match(self_value_t, PointerType)->pointed) + pointer_depth += 1; + + CORD self = compile(env, call->self); + +#define EXPECT_POINTER(article, name) do { \ + if (pointer_depth < 1) code_err(call->self, "I expected "article" "name" pointer here, not "article" "name" value"); \ + else if (pointer_depth > 1) code_err(call->self, "I expected "article" "name" pointer here, not a nested "name" pointer"); \ +} while (0) + switch (self_value_t->tag) { + case ArrayType: { + type_t *item_t = Match(self_value_t, ArrayType)->item_type; + CORD padded_item_size = CORD_all("sizeof(", compile_type(item_t), ")"); + + ast_t *default_rng = FakeAST(InlineCCode, .code="default_rng", .type=RNG_TYPE); + if (streq(call->name, "insert")) { + EXPECT_POINTER("an", "array"); + arg_t *arg_spec = new(arg_t, .name="item", .type=item_t, + .next=new(arg_t, .name="at", .type=INT_TYPE, .default_val=FakeAST(Int, .str="0"))); + return CORD_all("Array$insert_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", + padded_item_size, ")"); + } else if (streq(call->name, "insert_all")) { + EXPECT_POINTER("an", "array"); + arg_t *arg_spec = new(arg_t, .name="items", .type=self_value_t, + .next=new(arg_t, .name="at", .type=INT_TYPE, .default_val=FakeAST(Int, .str="0"))); + return CORD_all("Array$insert_all(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", + padded_item_size, ")"); + } else if (streq(call->name, "remove_at")) { + EXPECT_POINTER("an", "array"); + arg_t *arg_spec = new(arg_t, .name="index", .type=INT_TYPE, .default_val=FakeAST(Int, .str="-1"), + .next=new(arg_t, .name="count", .type=INT_TYPE, .default_val=FakeAST(Int, .str="1"))); + return CORD_all("Array$remove_at(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", + padded_item_size, ")"); + } else if (streq(call->name, "remove_item")) { + EXPECT_POINTER("an", "array"); + arg_t *arg_spec = new(arg_t, .name="item", .type=item_t, + .next=new(arg_t, .name="max_count", .type=INT_TYPE, .default_val=FakeAST(Int, .str="-1"))); + return CORD_all("Array$remove_item_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", + compile_type_info(self_value_t), ")"); + } else if (streq(call->name, "random")) { + self = compile_to_pointer_depth(env, call->self, 0, false); + arg_t *arg_spec = new(arg_t, .name="rng", .type=RNG_TYPE, .default_val=default_rng); + return CORD_all("Array$random_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", compile_type(item_t), ")"); + } else if (streq(call->name, "has")) { + self = compile_to_pointer_depth(env, call->self, 0, false); + arg_t *arg_spec = new(arg_t, .name="item", .type=item_t); + return CORD_all("Array$has_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", + compile_type_info(self_value_t), ")"); + } else if (streq(call->name, "sample")) { + self = compile_to_pointer_depth(env, call->self, 0, false); + arg_t *arg_spec = new(arg_t, .name="count", .type=INT_TYPE, + .next=new(arg_t, .name="weights", .type=Type(ArrayType, .item_type=Type(NumType)), + .default_val=FakeAST(None, .type=new(type_ast_t, .tag=ArrayTypeAST, + .__data.ArrayTypeAST.item=new(type_ast_t, .tag=VarTypeAST, .__data.VarTypeAST.name="Num"))), + .next=new(arg_t, .name="rng", .type=RNG_TYPE, .default_val=default_rng))); + return CORD_all("Array$sample(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", + padded_item_size, ")"); + } else if (streq(call->name, "shuffle")) { + EXPECT_POINTER("an", "array"); + arg_t *arg_spec = new(arg_t, .name="rng", .type=RNG_TYPE, .default_val=default_rng); + return CORD_all("Array$shuffle(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", padded_item_size, ")"); + } else if (streq(call->name, "shuffled")) { + self = compile_to_pointer_depth(env, call->self, 0, false); + arg_t *arg_spec = new(arg_t, .name="rng", .type=RNG_TYPE, .default_val=default_rng); + return CORD_all("Array$shuffled(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", padded_item_size, ")"); + } else if (streq(call->name, "slice")) { + self = compile_to_pointer_depth(env, call->self, 0, true); + arg_t *arg_spec = new(arg_t, .name="first", .type=INT_TYPE, .next=new(arg_t, .name="last", .type=INT_TYPE)); + return CORD_all("Array$slice(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ")"); + } else if (streq(call->name, "sort") || streq(call->name, "sorted")) { + if (streq(call->name, "sort")) + EXPECT_POINTER("an", "array"); + else + self = compile_to_pointer_depth(env, call->self, 0, false); + CORD comparison; + if (call->args) { + type_t *item_ptr = Type(PointerType, .pointed=item_t, .is_stack=true); + type_t *fn_t = Type(FunctionType, .args=new(arg_t, .name="x", .type=item_ptr, .next=new(arg_t, .name="y", .type=item_ptr)), + .ret=Type(IntType, .bits=TYPE_IBITS32)); + arg_t *arg_spec = new(arg_t, .name="by", .type=Type(ClosureType, .fn=fn_t)); + comparison = compile_arguments(env, ast, arg_spec, call->args); + } else { + comparison = CORD_all("((Closure_t){.fn=generic_compare, .userdata=(void*)", compile_type_info(item_t), "})"); + } + return CORD_all("Array$", call->name, "(", self, ", ", comparison, ", ", padded_item_size, ")"); + } else if (streq(call->name, "heapify")) { + EXPECT_POINTER("an", "array"); + CORD comparison; + if (call->args) { + type_t *item_ptr = Type(PointerType, .pointed=item_t, .is_stack=true); + type_t *fn_t = Type(FunctionType, .args=new(arg_t, .name="x", .type=item_ptr, .next=new(arg_t, .name="y", .type=item_ptr)), + .ret=Type(IntType, .bits=TYPE_IBITS32)); + arg_t *arg_spec = new(arg_t, .name="by", .type=Type(ClosureType, .fn=fn_t)); + comparison = compile_arguments(env, ast, arg_spec, call->args); + } else { + comparison = CORD_all("((Closure_t){.fn=generic_compare, .userdata=(void*)", compile_type_info(item_t), "})"); + } + return CORD_all("Array$heapify(", self, ", ", comparison, ", ", padded_item_size, ")"); + } else if (streq(call->name, "heap_push")) { + EXPECT_POINTER("an", "array"); + type_t *item_ptr = Type(PointerType, .pointed=item_t, .is_stack=true); + type_t *fn_t = Type(FunctionType, .args=new(arg_t, .name="x", .type=item_ptr, .next=new(arg_t, .name="y", .type=item_ptr)), + .ret=Type(IntType, .bits=TYPE_IBITS32)); + ast_t *default_cmp = FakeAST(InlineCCode, + .code=CORD_all("((Closure_t){.fn=generic_compare, .userdata=(void*)", + compile_type_info(item_t), "})"), + .type=Type(ClosureType, .fn=fn_t)); + arg_t *arg_spec = new(arg_t, .name="item", .type=item_t, + .next=new(arg_t, .name="by", .type=Type(ClosureType, .fn=fn_t), .default_val=default_cmp)); + CORD arg_code = compile_arguments(env, ast, arg_spec, call->args); + return CORD_all("Array$heap_push_value(", self, ", ", arg_code, ", ", padded_item_size, ")"); + } else if (streq(call->name, "heap_pop")) { + EXPECT_POINTER("an", "array"); + type_t *item_ptr = Type(PointerType, .pointed=item_t, .is_stack=true); + type_t *fn_t = Type(FunctionType, .args=new(arg_t, .name="x", .type=item_ptr, .next=new(arg_t, .name="y", .type=item_ptr)), + .ret=Type(IntType, .bits=TYPE_IBITS32)); + ast_t *default_cmp = FakeAST(InlineCCode, + .code=CORD_all("((Closure_t){.fn=generic_compare, .userdata=(void*)", + compile_type_info(item_t), "})"), + .type=Type(ClosureType, .fn=fn_t)); + arg_t *arg_spec = new(arg_t, .name="by", .type=Type(ClosureType, .fn=fn_t), .default_val=default_cmp); + CORD arg_code = compile_arguments(env, ast, arg_spec, call->args); + return CORD_all("Array$heap_pop_value(", self, ", ", arg_code, ", ", compile_type(item_t), ", _, ", + promote_to_optional(item_t, "_"), ", ", compile_none(item_t), ")"); + } else if (streq(call->name, "binary_search")) { + self = compile_to_pointer_depth(env, call->self, 0, call->args != NULL); + type_t *item_ptr = Type(PointerType, .pointed=item_t, .is_stack=true); + type_t *fn_t = Type(FunctionType, .args=new(arg_t, .name="x", .type=item_ptr, .next=new(arg_t, .name="y", .type=item_ptr)), + .ret=Type(IntType, .bits=TYPE_IBITS32)); + ast_t *default_cmp = FakeAST(InlineCCode, + .code=CORD_all("((Closure_t){.fn=generic_compare, .userdata=(void*)", + compile_type_info(item_t), "})"), + .type=Type(ClosureType, .fn=fn_t)); + arg_t *arg_spec = new(arg_t, .name="target", .type=item_t, + .next=new(arg_t, .name="by", .type=Type(ClosureType, .fn=fn_t), .default_val=default_cmp)); + CORD arg_code = compile_arguments(env, ast, arg_spec, call->args); + return CORD_all("Array$binary_search_value(", self, ", ", arg_code, ")"); + } else if (streq(call->name, "clear")) { + EXPECT_POINTER("an", "array"); + (void)compile_arguments(env, ast, NULL, call->args); + return CORD_all("Array$clear(", self, ")"); + } else if (streq(call->name, "find")) { + self = compile_to_pointer_depth(env, call->self, 0, false); + arg_t *arg_spec = new(arg_t, .name="item", .type=item_t); + return CORD_all("Array$find_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), + ", ", compile_type_info(self_value_t), ")"); + } else if (streq(call->name, "first")) { + self = compile_to_pointer_depth(env, call->self, 0, call->args != NULL); + type_t *item_ptr = Type(PointerType, .pointed=item_t, .is_stack=true); + type_t *predicate_type = Type( + ClosureType, .fn=Type(FunctionType, .args=new(arg_t, .name="item", .type=item_ptr), .ret=Type(BoolType))); + arg_t *arg_spec = new(arg_t, .name="predicate", .type=predicate_type); + return CORD_all("Array$first(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ")"); + } else if (streq(call->name, "from")) { + self = compile_to_pointer_depth(env, call->self, 0, true); + arg_t *arg_spec = new(arg_t, .name="first", .type=INT_TYPE); + return CORD_all("Array$from(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ")"); + } else if (streq(call->name, "to")) { + self = compile_to_pointer_depth(env, call->self, 0, true); + arg_t *arg_spec = new(arg_t, .name="last", .type=INT_TYPE); + return CORD_all("Array$to(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ")"); + } else if (streq(call->name, "by")) { + self = compile_to_pointer_depth(env, call->self, 0, true); + arg_t *arg_spec = new(arg_t, .name="stride", .type=INT_TYPE); + return CORD_all("Array$by(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", padded_item_size, ")"); + } else if (streq(call->name, "reversed")) { + self = compile_to_pointer_depth(env, call->self, 0, true); + (void)compile_arguments(env, ast, NULL, call->args); + return CORD_all("Array$reversed(", self, ", ", padded_item_size, ")"); + } else if (streq(call->name, "unique")) { + self = compile_to_pointer_depth(env, call->self, 0, false); + (void)compile_arguments(env, ast, NULL, call->args); + return CORD_all("Table$from_entries(", self, ", Set$info(", compile_type_info(item_t), "))"); + } else if (streq(call->name, "pop")) { + EXPECT_POINTER("an", "array"); + arg_t *arg_spec = new(arg_t, .name="index", .type=INT_TYPE, .default_val=FakeAST(Int, "-1")); + CORD index = compile_arguments(env, ast, arg_spec, call->args); + return CORD_all("Array$pop(", self, ", ", index, ", ", compile_type(item_t), ", _, ", + promote_to_optional(item_t, "_"), ", ", compile_none(item_t), ")"); + } else if (streq(call->name, "counts")) { + self = compile_to_pointer_depth(env, call->self, 0, false); + (void)compile_arguments(env, ast, NULL, call->args); + return CORD_all("Array$counts(", self, ", ", compile_type_info(self_value_t), ")"); + } else code_err(ast, "There is no '%s' method for arrays", call->name); + } + case SetType: { + auto set = Match(self_value_t, SetType); + if (streq(call->name, "has")) { + self = compile_to_pointer_depth(env, call->self, 0, false); + arg_t *arg_spec = new(arg_t, .name="key", .type=set->item_type); + return CORD_all("Table$has_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", + compile_type_info(self_value_t), ")"); + } else if (streq(call->name, "add")) { + EXPECT_POINTER("a", "set"); + arg_t *arg_spec = new(arg_t, .name="item", .type=set->item_type); + return CORD_all("Table$set_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", NULL, ", + compile_type_info(self_value_t), ")"); + } else if (streq(call->name, "add_all")) { + EXPECT_POINTER("a", "set"); + arg_t *arg_spec = new(arg_t, .name="items", .type=Type(ArrayType, .item_type=Match(self_value_t, SetType)->item_type)); + return CORD_all("({ Table_t *set = ", self, "; ", + "Array_t to_add = ", compile_arguments(env, ast, arg_spec, call->args), "; ", + "for (int64_t i = 0; i < to_add.length; i++)\n" + "Table$set(set, to_add.data + i*to_add.stride, NULL, ", compile_type_info(self_value_t), ");\n", + "(void)0; })"); + } else if (streq(call->name, "remove")) { + EXPECT_POINTER("a", "set"); + arg_t *arg_spec = new(arg_t, .name="item", .type=set->item_type); + return CORD_all("Table$remove_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", + compile_type_info(self_value_t), ")"); + } else if (streq(call->name, "remove_all")) { + EXPECT_POINTER("a", "set"); + arg_t *arg_spec = new(arg_t, .name="items", .type=Type(ArrayType, .item_type=Match(self_value_t, SetType)->item_type)); + return CORD_all("({ Table_t *set = ", self, "; ", + "Array_t to_add = ", compile_arguments(env, ast, arg_spec, call->args), "; ", + "for (int64_t i = 0; i < to_add.length; i++)\n" + "Table$remove(set, to_add.data + i*to_add.stride, ", compile_type_info(self_value_t), ");\n", + "(void)0; })"); + } else if (streq(call->name, "clear")) { + EXPECT_POINTER("a", "set"); + (void)compile_arguments(env, ast, NULL, call->args); + return CORD_all("Table$clear(", self, ")"); + } else if (streq(call->name, "with")) { + self = compile_to_pointer_depth(env, call->self, 0, false); + arg_t *arg_spec = new(arg_t, .name="other", .type=self_value_t); + return CORD_all("Table$with(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), + ", ", compile_type_info(self_value_t), ")"); + } else if (streq(call->name, "overlap")) { + self = compile_to_pointer_depth(env, call->self, 0, false); + arg_t *arg_spec = new(arg_t, .name="other", .type=self_value_t); + return CORD_all("Table$overlap(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), + ", ", compile_type_info(self_value_t), ")"); + } else if (streq(call->name, "without")) { + self = compile_to_pointer_depth(env, call->self, 0, false); + arg_t *arg_spec = new(arg_t, .name="other", .type=self_value_t); + return CORD_all("Table$without(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), + ", ", compile_type_info(self_value_t), ")"); + } else if (streq(call->name, "is_subset_of")) { + self = compile_to_pointer_depth(env, call->self, 0, false); + arg_t *arg_spec = new(arg_t, .name="other", .type=self_value_t, + .next=new(arg_t, .name="strict", .type=Type(BoolType), .default_val=FakeAST(Bool, false))); + return CORD_all("Table$is_subset_of(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), + ", ", compile_type_info(self_value_t), ")"); + } else if (streq(call->name, "is_superset_of")) { + self = compile_to_pointer_depth(env, call->self, 0, false); + arg_t *arg_spec = new(arg_t, .name="other", .type=self_value_t, + .next=new(arg_t, .name="strict", .type=Type(BoolType), .default_val=FakeAST(Bool, false))); + return CORD_all("Table$is_superset_of(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), + ", ", compile_type_info(self_value_t), ")"); + } else code_err(ast, "There is no '%s' method for tables", call->name); + } + case TableType: { + auto table = Match(self_value_t, TableType); + if (streq(call->name, "get")) { + self = compile_to_pointer_depth(env, call->self, 0, false); + arg_t *arg_spec = new(arg_t, .name="key", .type=table->key_type); + return CORD_all( + "Table$get_optional(", self, ", ", compile_type(table->key_type), ", ", + compile_type(table->value_type), ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", + "_, ", optional_into_nonnone(table->value_type, "(*_)"), ", ", compile_none(table->value_type), ", ", + compile_type_info(self_value_t), ")"); + } else if (streq(call->name, "has")) { + self = compile_to_pointer_depth(env, call->self, 0, false); + arg_t *arg_spec = new(arg_t, .name="key", .type=table->key_type); + return CORD_all("Table$has_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", + compile_type_info(self_value_t), ")"); + } else if (streq(call->name, "set")) { + EXPECT_POINTER("a", "table"); + arg_t *arg_spec = new(arg_t, .name="key", .type=table->key_type, + .next=new(arg_t, .name="value", .type=table->value_type)); + return CORD_all("Table$set_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", + compile_type_info(self_value_t), ")"); + } else if (streq(call->name, "bump")) { + EXPECT_POINTER("a", "table"); + if (!(table->value_type->tag == IntType || table->value_type->tag == NumType)) + code_err(ast, "bump() is only supported for tables with numeric value types, not %T", self_value_t); + ast_t *one = table->value_type->tag == IntType + ? FakeAST(Int, .str="1") + : FakeAST(Num, .n=1); + arg_t *arg_spec = new(arg_t, .name="key", .type=table->key_type, + .next=new(arg_t, .name="amount", .type=table->value_type, .default_val=one)); + return CORD_all("Table$bump(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", + compile_type_info(self_value_t), ")"); + } else if (streq(call->name, "remove")) { + EXPECT_POINTER("a", "table"); + arg_t *arg_spec = new(arg_t, .name="key", .type=table->key_type); + return CORD_all("Table$remove_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", + compile_type_info(self_value_t), ")"); + } else if (streq(call->name, "clear")) { + EXPECT_POINTER("a", "table"); + (void)compile_arguments(env, ast, NULL, call->args); + return CORD_all("Table$clear(", self, ")"); + } else if (streq(call->name, "sorted")) { + self = compile_to_pointer_depth(env, call->self, 0, false); + (void)compile_arguments(env, ast, NULL, call->args); + return CORD_all("Table$sorted(", self, ", ", compile_type_info(self_value_t), ")"); + } else code_err(ast, "There is no '%s' method for tables", call->name); + } + default: { + auto methodcall = Match(ast, MethodCall); + type_t *fn_t = get_method_type(env, methodcall->self, methodcall->name); + arg_ast_t *args = new(arg_ast_t, .value=methodcall->self, .next=methodcall->args); + binding_t *b = get_namespace_binding(env, methodcall->self, methodcall->name); + if (!b) code_err(ast, "No such method"); + return CORD_all(b->code, "(", compile_arguments(env, ast, Match(fn_t, FunctionType)->args, args), ")"); + } + } +#undef EXPECT_POINTER + } + case FunctionCall: { + auto call = Match(ast, FunctionCall); + type_t *fn_t = get_type(env, call->fn); + if (fn_t->tag == FunctionType) { + CORD fn = compile(env, call->fn); + return CORD_all(fn, "(", compile_arguments(env, ast, Match(fn_t, FunctionType)->args, call->args), ")"); + } else if (fn_t->tag == TypeInfoType) { + type_t *t = Match(fn_t, TypeInfoType)->type; + + // Literal constructors for numeric types like `Byte(123)` should not go through any conversion, just a cast: + if (is_numeric_type(t) && call->args && !call->args->next && call->args->value->tag == Int) + return compile_to_type(env, call->args->value, t); + else if (t->tag == NumType && call->args && !call->args->next && call->args->value->tag == Num) + return compile_to_type(env, call->args->value, t); + + binding_t *constructor = get_constructor(env, t, call->args); + if (constructor) { + arg_t *arg_spec = Match(constructor->type, FunctionType)->args; + return CORD_all(constructor->code, "(", compile_arguments(env, ast, arg_spec, call->args), ")"); + } + + type_t *actual = call->args ? get_type(env, call->args->value) : NULL; + if (t->tag == TextType) { + if (!call->args) code_err(ast, "This constructor needs a value"); + if (!type_eq(t, TEXT_TYPE)) + code_err(call->fn, "I don't have a constructor defined for these arguments"); + // Text constructor: + if (!call->args || call->args->next) + code_err(call->fn, "This constructor takes exactly 1 argument"); + if (type_eq(actual, t)) + return compile(env, call->args->value); + return expr_as_text(compile(env, call->args->value), actual, "no"); + } else if (t->tag == CStringType) { + // C String constructor: + if (!call->args || call->args->next) + code_err(call->fn, "This constructor takes exactly 1 argument"); + if (call->args->value->tag == TextLiteral) + return compile_string_literal(Match(call->args->value, TextLiteral)->cord); + else if (call->args->value->tag == TextJoin && Match(call->args->value, TextJoin)->children == NULL) + return "\"\""; + else if (call->args->value->tag == TextJoin && Match(call->args->value, TextJoin)->children->next == NULL) + return compile_string_literal(Match(Match(call->args->value, TextJoin)->children->ast, TextLiteral)->cord); + return CORD_all("Text$as_c_string(", expr_as_text(compile(env, call->args->value), actual, "no"), ")"); + } else if (t->tag == StructType) { + auto struct_ = Match(t, StructType); + if (!struct_->opaque && is_valid_call(env, struct_->fields, call->args, true)) { + return CORD_all("((", compile_type(t), "){", + compile_arguments(env, ast, struct_->fields, call->args), "})"); + } + } + code_err(ast, "I could not find a constructor matching these arguments for %T", t); + } else if (fn_t->tag == ClosureType) { + fn_t = Match(fn_t, ClosureType)->fn; + arg_t *type_args = Match(fn_t, FunctionType)->args; + + arg_t *closure_fn_args = NULL; + for (arg_t *arg = Match(fn_t, FunctionType)->args; arg; arg = arg->next) + closure_fn_args = new(arg_t, .name=arg->name, .type=arg->type, .default_val=arg->default_val, .next=closure_fn_args); + closure_fn_args = new(arg_t, .name="userdata", .type=Type(PointerType, .pointed=Type(MemoryType)), .next=closure_fn_args); + REVERSE_LIST(closure_fn_args); + CORD fn_type_code = compile_type(Type(FunctionType, .args=closure_fn_args, .ret=Match(fn_t, FunctionType)->ret)); + + CORD closure = compile(env, call->fn); + CORD arg_code = compile_arguments(env, ast, type_args, call->args); + if (arg_code) arg_code = CORD_cat(arg_code, ", "); + if (call->fn->tag == Var) { + return CORD_all("((", fn_type_code, ")", closure, ".fn)(", arg_code, closure, ".userdata)"); + } else { + return CORD_all("({ Closure_t closure = ", closure, "; ((", fn_type_code, ")closure.fn)(", + arg_code, "closure.userdata); })"); + } + } else { + code_err(call->fn, "This is not a function, it's a %T", fn_t); + } + } + case Deserialize: { + ast_t *value = Match(ast, Deserialize)->value; + type_t *value_type = get_type(env, value); + if (!type_eq(value_type, Type(ArrayType, Type(ByteType)))) + code_err(value, "This value should be an array of bytes, not a %T", value_type); + type_t *t = parse_type_ast(env, Match(ast, Deserialize)->type); + return CORD_all("({ ", compile_declaration(t, "deserialized"), ";\n" + "generic_deserialize(", compile(env, value), ", &deserialized, ", compile_type_info(t), ");\n" + "deserialized; })"); + } + case When: { + auto original = Match(ast, When); + ast_t *when_var = WrapAST(ast, Var, .name="when"); + when_clause_t *new_clauses = NULL; + type_t *subject_t = get_type(env, original->subject); + for (when_clause_t *clause = original->clauses; clause; clause = clause->next) { + type_t *clause_type = get_clause_type(env, subject_t, clause); + if (clause_type->tag == AbortType || clause_type->tag == ReturnType) { + new_clauses = new(when_clause_t, .pattern=clause->pattern, .body=clause->body, .next=new_clauses); + } else { + ast_t *assign = WrapAST(clause->body, Assign, + .targets=new(ast_list_t, .ast=when_var), + .values=new(ast_list_t, .ast=clause->body)); + new_clauses = new(when_clause_t, .pattern=clause->pattern, .body=assign, .next=new_clauses); + } + } + REVERSE_LIST(new_clauses); + ast_t *else_body = original->else_body; + if (else_body) { + type_t *clause_type = get_type(env, else_body); + if (clause_type->tag != AbortType && clause_type->tag != ReturnType) { + else_body = WrapAST(else_body, Assign, + .targets=new(ast_list_t, .ast=when_var), + .values=new(ast_list_t, .ast=else_body)); + } + } + + type_t *t = get_type(env, ast); + env_t *when_env = fresh_scope(env); + set_binding(when_env, "when", t, "when"); + return CORD_all( + "({ ", compile_declaration(t, "when"), ";\n", + compile_statement(when_env, WrapAST(ast, When, .subject=original->subject, .clauses=new_clauses, .else_body=else_body)), + "when; })"); + } + case If: { + auto if_ = Match(ast, If); + ast_t *condition = if_->condition; + CORD decl_code = CORD_EMPTY; + env_t *truthy_scope = env, *falsey_scope = env; + + CORD condition_code; + if (condition->tag == Declare) { + type_t *condition_type = get_type(env, Match(condition, Declare)->value); + if (condition_type->tag != OptionalType) + code_err(condition, "This `if var := ...:` declaration should be an optional type, not %T", condition_type); + + decl_code = compile_statement(env, condition); + ast_t *var = Match(condition, Declare)->var; + truthy_scope = fresh_scope(env); + bind_statement(truthy_scope, condition); + condition_code = compile_condition(truthy_scope, var); + set_binding(truthy_scope, Match(var, Var)->name, + Match(condition_type, OptionalType)->type, + optional_into_nonnone(condition_type, compile(truthy_scope, var))); + } else if (condition->tag == Var) { + type_t *condition_type = get_type(env, condition); + condition_code = compile_condition(env, condition); + if (condition_type->tag == OptionalType) { + truthy_scope = fresh_scope(env); + set_binding(truthy_scope, Match(condition, Var)->name, + Match(condition_type, OptionalType)->type, + optional_into_nonnone(condition_type, compile(truthy_scope, condition))); + } + } else { + condition_code = compile_condition(env, condition); + } + + type_t *true_type = get_type(truthy_scope, if_->body); + type_t *false_type = get_type(falsey_scope, if_->else_body); + if (true_type->tag == AbortType || true_type->tag == ReturnType) + return CORD_all("({ ", decl_code, "if (", condition_code, ") ", compile_statement(truthy_scope, if_->body), + "\n", compile(falsey_scope, if_->else_body), "; })"); + else if (false_type->tag == AbortType || false_type->tag == ReturnType) + return CORD_all("({ ", decl_code, "if (!(", condition_code, ")) ", compile_statement(falsey_scope, if_->else_body), + "\n", compile(truthy_scope, if_->body), "; })"); + else if (decl_code != CORD_EMPTY) + return CORD_all("({ ", decl_code, "(", condition_code, ") ? ", compile(truthy_scope, if_->body), " : ", + compile(falsey_scope, if_->else_body), ";})"); + else + return CORD_all("((", condition_code, ") ? ", + compile(truthy_scope, if_->body), " : ", compile(falsey_scope, if_->else_body), ")"); + } + case Reduction: { + auto reduction = Match(ast, Reduction); + binop_e op = reduction->op; + + type_t *iter_t = get_type(env, reduction->iter); + type_t *item_t = get_iterated_type(iter_t); + if (!item_t) code_err(reduction->iter, "I couldn't figure out how to iterate over this type: %T", iter_t); + + static int64_t next_id = 1; + ast_t *item = FakeAST(Var, heap_strf("$it%ld", next_id++)); + ast_t *body = FakeAST(InlineCCode, .code="{}"); // placeholder + ast_t *loop = FakeAST(For, .vars=new(ast_list_t, .ast=item), .iter=reduction->iter, .body=body); + env_t *body_scope = for_scope(env, loop); + if (op == BINOP_EQ || op == BINOP_NE || op == BINOP_LT || op == BINOP_LE || op == BINOP_GT || op == BINOP_GE) { + // Chained comparisons like ==, <, etc. + CORD code = CORD_all( + "({ // Reduction:\n", + compile_declaration(item_t, "prev"), ";\n" + "OptionalBool_t result = NONE_BOOL;\n" + ); + + ast_t *comparison = WrapAST(ast, BinaryOp, .op=op, .lhs=FakeAST(InlineCCode, .code="prev", .type=item_t), .rhs=item); + body->__data.InlineCCode.code = CORD_all( + "if (result == NONE_BOOL) {\n" + " prev = ", compile(body_scope, item), ";\n" + " result = yes;\n" + "} else {\n" + " if (", compile(body_scope, comparison), ") {\n", + " prev = ", compile(body_scope, item), ";\n", + " } else {\n" + " result = no;\n", + " break;\n", + " }\n", + "}\n"); + code = CORD_all(code, compile_statement(env, loop), "\nresult;})"); + return code; + } else if (op == BINOP_MIN || op == BINOP_MAX) { + // Min/max: + const char *superlative = op == BINOP_MIN ? "min" : "max"; + CORD code = CORD_all( + "({ // Reduction:\n", + compile_declaration(item_t, superlative), ";\n" + "Bool_t has_value = no;\n" + ); + + CORD item_code = compile(body_scope, item); + binop_e cmp_op = op == BINOP_MIN ? BINOP_LT : BINOP_GT; + if (reduction->key) { + env_t *key_scope = fresh_scope(env); + set_binding(key_scope, "$", item_t, item_code); + type_t *key_type = get_type(key_scope, reduction->key); + const char *superlative_key = op == BINOP_MIN ? "min_key" : "max_key"; + code = CORD_all(code, compile_declaration(key_type, superlative_key), ";\n"); + + ast_t *comparison = WrapAST(ast, BinaryOp, .op=cmp_op, + .lhs=FakeAST(InlineCCode, .code="key", .type=key_type), + .rhs=FakeAST(InlineCCode, .code=superlative_key, .type=key_type)); + body->__data.InlineCCode.code = CORD_all( + compile_declaration(key_type, "key"), " = ", compile(key_scope, reduction->key), ";\n", + "if (!has_value || ", compile(body_scope, comparison), ") {\n" + " ", superlative, " = ", compile(body_scope, item), ";\n" + " ", superlative_key, " = key;\n" + " has_value = yes;\n" + "}\n"); + } else { + ast_t *comparison = WrapAST(ast, BinaryOp, .op=cmp_op, .lhs=item, .rhs=FakeAST(InlineCCode, .code=superlative, .type=item_t)); + body->__data.InlineCCode.code = CORD_all( + "if (!has_value || ", compile(body_scope, comparison), ") {\n" + " ", superlative, " = ", compile(body_scope, item), ";\n" + " has_value = yes;\n" + "}\n"); + } + + + code = CORD_all(code, compile_statement(env, loop), "\nhas_value ? ", promote_to_optional(item_t, superlative), + " : ", compile_none(item_t), ";})"); + return code; + } else { + // Accumulator-style reductions like +, ++, *, etc. + CORD code = CORD_all( + "({ // Reduction:\n", + compile_declaration(item_t, "reduction"), ";\n" + "Bool_t has_value = no;\n" + ); + + // For the special case of (or)/(and), we need to early out if we can: + CORD early_out = CORD_EMPTY; + if (op == BINOP_CMP) { + if (item_t->tag != IntType || Match(item_t, IntType)->bits != TYPE_IBITS32) + code_err(ast, "<> reductions are only supported for Int32 values"); + } else if (op == BINOP_AND) { + if (item_t->tag == BoolType) + early_out = "if (!reduction) break;"; + else if (item_t->tag == OptionalType) + early_out = CORD_all("if (", check_none(item_t, "reduction"), ") break;"); + } else if (op == BINOP_OR) { + if (item_t->tag == BoolType) + early_out = "if (reduction) break;"; + else if (item_t->tag == OptionalType) + early_out = CORD_all("if (!", check_none(item_t, "reduction"), ") break;"); + } + + ast_t *combination = WrapAST(ast, BinaryOp, .op=op, .lhs=FakeAST(InlineCCode, .code="reduction", .type=item_t), .rhs=item); + body->__data.InlineCCode.code = CORD_all( + "if (!has_value) {\n" + " reduction = ", compile(body_scope, item), ";\n" + " has_value = yes;\n" + "} else {\n" + " reduction = ", compile(body_scope, combination), ";\n", + early_out, + "}\n"); + + code = CORD_all(code, compile_statement(env, loop), "\nhas_value ? ", promote_to_optional(item_t, "reduction"), + " : ", compile_none(item_t), ";})"); + return code; + } + } + case FieldAccess: { + auto f = Match(ast, FieldAccess); + type_t *fielded_t = get_type(env, f->fielded); + type_t *value_t = value_type(fielded_t); + switch (value_t->tag) { + case TypeInfoType: { + auto info = Match(value_t, TypeInfoType); + if (f->field[0] == '_') { + for (Table_t *locals = env->locals; locals; locals = locals->fallback) { + if (locals == info->env->locals) + goto is_inside_type; + } + code_err(ast, "Fields that start with underscores are not accessible on types outside of the type definition.", f->field); + is_inside_type:; + } + binding_t *b = get_binding(info->env, f->field); + if (!b) code_err(ast, "I couldn't find the field '%s' on this type", f->field); + if (!b->code) code_err(ast, "I couldn't figure out how to compile this field"); + return b->code; + } + case TextType: { + const char *lang = Match(value_t, TextType)->lang; + if (lang && streq(f->field, "text")) { + CORD text = compile_to_pointer_depth(env, f->fielded, 0, false); + return CORD_all("((Text_t)", text, ")"); + } else if (streq(f->field, "length")) { + return CORD_all("Int$from_int64((", compile_to_pointer_depth(env, f->fielded, 0, false), ").length)"); + } + code_err(ast, "There is no '%s' field on %T values", f->field, value_t); + } + case StructType: { + for (arg_t *field = Match(value_t, StructType)->fields; field; field = field->next) { + if (streq(field->name, f->field)) { + if (fielded_t->tag == PointerType) { + CORD fielded = compile_to_pointer_depth(env, f->fielded, 1, false); + return CORD_asprintf("(%r)->%s", fielded, f->field); + } else { + CORD fielded = compile(env, f->fielded); + return CORD_asprintf("(%r).%s", fielded, f->field); + } + } + } + code_err(ast, "The field '%s' is not a valid field name of %T", f->field, value_t); + } + case EnumType: { + auto e = Match(value_t, EnumType); + for (tag_t *tag = e->tags; tag; tag = tag->next) { + if (streq(f->field, tag->name)) { + CORD prefix = namespace_prefix(e->env, e->env->namespace); + if (fielded_t->tag == PointerType) { + CORD fielded = compile_to_pointer_depth(env, f->fielded, 1, false); + return CORD_all("((", fielded, ")->$tag == ", prefix, "tag$", tag->name, ")"); + } else { + CORD fielded = compile(env, f->fielded); + return CORD_all("((", fielded, ").$tag == ", prefix, "tag$", tag->name, ")"); + } + } + } + code_err(ast, "The field '%s' is not a valid tag name of %T", f->field, value_t); + } + case ArrayType: { + if (streq(f->field, "length")) + return CORD_all("Int$from_int64((", compile_to_pointer_depth(env, f->fielded, 0, false), ").length)"); + code_err(ast, "There is no %s field on arrays", f->field); + } + case SetType: { + if (streq(f->field, "items")) + return CORD_all("ARRAY_COPY((", compile_to_pointer_depth(env, f->fielded, 0, false), ").entries)"); + else if (streq(f->field, "length")) + return CORD_all("Int$from_int64((", compile_to_pointer_depth(env, f->fielded, 0, false), ").entries.length)"); + code_err(ast, "There is no '%s' field on sets", f->field); + } + case TableType: { + if (streq(f->field, "length")) { + return CORD_all("Int$from_int64((", compile_to_pointer_depth(env, f->fielded, 0, false), ").entries.length)"); + } else if (streq(f->field, "keys")) { + return CORD_all("ARRAY_COPY((", compile_to_pointer_depth(env, f->fielded, 0, false), ").entries)"); + } else if (streq(f->field, "values")) { + auto table = Match(value_t, TableType); + CORD offset = CORD_all("offsetof(struct { ", compile_declaration(table->key_type, "k"), "; ", compile_declaration(table->value_type, "v"), "; }, v)"); + return CORD_all("({ Array_t *entries = &(", compile_to_pointer_depth(env, f->fielded, 0, false), ").entries;\n" + "ARRAY_INCREF(*entries);\n" + "Array_t values = *entries;\n" + "values.data += ", offset, ";\n" + "values; })"); + } else if (streq(f->field, "fallback")) { + return CORD_all("({ Table_t *_fallback = (", compile_to_pointer_depth(env, f->fielded, 0, false), ").fallback; _fallback ? *_fallback : NONE_TABLE; })"); + } + code_err(ast, "There is no '%s' field on tables", f->field); + } + case ModuleType: { + const char *name = Match(value_t, ModuleType)->name; + env_t *module_env = Table$str_get(*env->imports, name); + return compile(module_env, WrapAST(ast, Var, f->field)); + } + case MomentType: { + if (streq(f->field, "seconds")) { + return CORD_all("I64((", compile_to_pointer_depth(env, f->fielded, 0, false), ").tv_sec)"); + } else if (streq(f->field, "microseconds")) { + return CORD_all("I64((", compile_to_pointer_depth(env, f->fielded, 0, false), ").tv_usec)"); + } + code_err(ast, "There is no '%s' field on Moments", f->field); + } + default: + code_err(ast, "Field accesses are not supported on %T values", fielded_t); + } + } + case Index: { + auto indexing = Match(ast, Index); + type_t *indexed_type = get_type(env, indexing->indexed); + if (!indexing->index) { + if (indexed_type->tag != PointerType) + code_err(ast, "Only pointers can use the '[]' operator to dereference the entire value."); + auto ptr = Match(indexed_type, PointerType); + if (ptr->pointed->tag == ArrayType) { + return CORD_all("*({ Array_t *arr = ", compile(env, indexing->indexed), "; ARRAY_INCREF(*arr); arr; })"); + } else if (ptr->pointed->tag == TableType || ptr->pointed->tag == SetType) { + return CORD_all("*({ Table_t *t = ", compile(env, indexing->indexed), "; TABLE_INCREF(*t); t; })"); + } else { + return CORD_all("*(", compile(env, indexing->indexed), ")"); + } + } + + type_t *container_t = value_type(indexed_type); + type_t *index_t = get_type(env, indexing->index); + if (container_t->tag == ArrayType) { + if (index_t->tag != IntType && index_t->tag != BigIntType && index_t->tag != ByteType) + code_err(indexing->index, "Arrays can only be indexed by integers, not %T", index_t); + type_t *item_type = Match(container_t, ArrayType)->item_type; + CORD arr = compile_to_pointer_depth(env, indexing->indexed, 0, false); + file_t *f = indexing->index->file; + CORD index_code = indexing->index->tag == Int + ? compile_int_to_type(env, indexing->index, Type(IntType, .bits=TYPE_IBITS64)) + : (index_t->tag == BigIntType ? CORD_all("Int64$from_int(", compile(env, indexing->index), ", no)") + : CORD_all("(Int64_t)(", compile(env, indexing->index), ")")); + if (indexing->unchecked) + return CORD_all("Array_get_unchecked(", compile_type(item_type), ", ", arr, ", ", index_code, ")"); + else + return CORD_all("Array_get(", compile_type(item_type), ", ", arr, ", ", index_code, ", ", + CORD_asprintf("%ld", (int64_t)(indexing->index->start - f->text)), ", ", + CORD_asprintf("%ld", (int64_t)(indexing->index->end - f->text)), + ")"); + } else if (container_t->tag == TableType) { + auto table_type = Match(container_t, TableType); + if (indexing->unchecked) + code_err(ast, "Table indexes cannot be unchecked"); + if (table_type->default_value) { + type_t *value_type = get_type(env, table_type->default_value); + return CORD_all("*Table$get_or_setdefault(", + compile_to_pointer_depth(env, indexing->indexed, 1, false), ", ", + compile_type(table_type->key_type), ", ", + compile_type(value_type), ", ", + compile(env, indexing->index), ", ", + compile(env, table_type->default_value), ", ", + compile_type_info(container_t), ")"); + } else if (table_type->value_type) { + return CORD_all("Table$get_optional(", + compile_to_pointer_depth(env, indexing->indexed, 0, false), ", ", + compile_type(table_type->key_type), ", ", + compile_type(table_type->value_type), ", ", + compile(env, indexing->index), ", " + "_, ", promote_to_optional(table_type->value_type, "(*_)"), ", ", + compile_none(table_type->value_type), ", ", + compile_type_info(container_t), ")"); + } else { + code_err(indexing->index, "This table doesn't have a value type or a default value"); + } + } else if (container_t->tag == TextType) { + return CORD_all("Text$cluster(", compile_to_pointer_depth(env, indexing->indexed, 0, false), ", ", compile_to_type(env, indexing->index, Type(BigIntType)), ")"); + } else { + code_err(ast, "Indexing is not supported for type: %T", container_t); + } + } + case InlineCCode: { + type_t *t = get_type(env, ast); + if (t->tag == VoidType) + return CORD_all("{\n", Match(ast, InlineCCode)->code, "\n}"); + else + return CORD_all("({ ", Match(ast, InlineCCode)->code, "; })"); + } + case Use: code_err(ast, "Compiling 'use' as expression!"); + case Defer: code_err(ast, "Compiling 'defer' as expression!"); + case Extern: code_err(ast, "Externs are not supported as expressions"); + case TableEntry: code_err(ast, "Table entries should not be compiled directly"); + case Declare: case Assign: case UpdateAssign: case For: case While: case Repeat: case StructDef: case LangDef: + case EnumDef: case FunctionDef: case ConvertDef: case Skip: case Stop: case Pass: case Return: case DocTest: case PrintStatement: + code_err(ast, "This is not a valid expression"); + default: case Unknown: code_err(ast, "Unknown AST"); + } +} + +CORD compile_type_info(type_t *t) +{ + if (t == THREAD_TYPE) return "&Thread$info"; + else if (t == RNG_TYPE) return "&RNG$info"; + else if (t == MATCH_TYPE) return "&Match$info"; + else if (t == PATH_TYPE) return "&Path$info"; + else if (t == PATH_TYPE_TYPE) return "&PathType$info"; + + switch (t->tag) { + case BoolType: case ByteType: case IntType: case BigIntType: case NumType: case CStringType: case MomentType: + return CORD_all("&", type_to_cord(t), "$info"); + case TextType: { + auto text = Match(t, TextType); + if (!text->lang || streq(text->lang, "Text")) + return "&Text$info"; + else if (streq(text->lang, "Pattern")) + return "&Pattern$info"; + return CORD_all("(&", namespace_prefix(text->env, text->env->namespace->parent), text->lang, "$$info)"); + } + case StructType: { + auto s = Match(t, StructType); + return CORD_all("(&", namespace_prefix(s->env, s->env->namespace->parent), s->name, "$$info)"); + } + case EnumType: { + auto e = Match(t, EnumType); + return CORD_all("(&", namespace_prefix(e->env, e->env->namespace->parent), e->name, "$$info)"); + } + case ArrayType: { + type_t *item_t = Match(t, ArrayType)->item_type; + return CORD_all("Array$info(", compile_type_info(item_t), ")"); + } + case SetType: { + type_t *item_type = Match(t, SetType)->item_type; + return CORD_all("Set$info(", compile_type_info(item_type), ")"); + } + case TableType: { + auto table = Match(t, TableType); + type_t *key_type = table->key_type; + type_t *value_type = table->value_type; + if (!value_type) { + if (!table->env) + compiler_err(NULL, NULL, NULL, "I got a table with a default value, but no environment to get its type!"); + value_type = get_type(table->env, table->default_value); + } + return CORD_all("Table$info(", compile_type_info(key_type), ", ", compile_type_info(value_type), ")"); + } + case PointerType: { + auto ptr = Match(t, PointerType); + CORD sigil = ptr->is_stack ? "&" : "@"; + return CORD_asprintf("Pointer$info(%r, %r)", + CORD_quoted(sigil), + compile_type_info(ptr->pointed)); + } + case FunctionType: { + return CORD_asprintf("Function$info(%r)", CORD_quoted(type_to_cord(t))); + } + case ClosureType: { + return CORD_asprintf("Closure$info(%r)", CORD_quoted(type_to_cord(t))); + } + case OptionalType: { + type_t *non_optional = Match(t, OptionalType)->type; + return CORD_asprintf("Optional$info(sizeof(%r), __alignof__(%r), %r)", compile_type(non_optional), compile_type(non_optional), compile_type_info(non_optional)); + } + case MutexedType: { + type_t *mutexed = Match(t, MutexedType)->type; + return CORD_all("MutexedData$info(", compile_type_info(mutexed), ")"); + } + case TypeInfoType: return CORD_all("Type$info(", CORD_quoted(type_to_cord(Match(t, TypeInfoType)->type)), ")"); + case MemoryType: return "&Memory$info"; + case VoidType: return "&Void$info"; + default: + compiler_err(NULL, 0, 0, "I couldn't convert to a type info: %T", t); + } +} + +static CORD get_flag_options(type_t *t, CORD separator) +{ + if (t->tag == BoolType) { + return "yes|no"; + } else if (t->tag == EnumType) { + CORD options = CORD_EMPTY; + for (tag_t *tag = Match(t, EnumType)->tags; tag; tag = tag->next) { + options = CORD_all(options, tag->name); + if (tag->next) options = CORD_all(options, separator); + } + return options; + } else if (t->tag == IntType || t->tag == NumType || t->tag == BigIntType) { + return "N"; + } else { + return "..."; + } +} + +CORD compile_cli_arg_call(env_t *env, CORD fn_name, type_t *fn_type) +{ + auto fn_info = Match(fn_type, FunctionType); + + env_t *main_env = fresh_scope(env); + + CORD code = CORD_EMPTY; + binding_t *usage_binding = get_binding(env, "_USAGE"); + CORD usage_code = usage_binding ? usage_binding->code : "usage"; + binding_t *help_binding = get_binding(env, "_HELP"); + CORD help_code = help_binding ? help_binding->code : usage_code; + if (!usage_binding) { + bool explicit_help_flag = false; + for (arg_t *arg = fn_info->args; arg; arg = arg->next) { + if (streq(arg->name, "help")) { + explicit_help_flag = true; + break; + } + } + + CORD usage = explicit_help_flag ? CORD_EMPTY : " [--help]"; + for (arg_t *arg = fn_info->args; arg; arg = arg->next) { + usage = CORD_cat(usage, " "); + type_t *t = get_arg_type(main_env, arg); + CORD flag = CORD_replace(arg->name, "_", "-"); + if (arg->default_val || arg->type->tag == OptionalType) { + if (strlen(arg->name) == 1) { + if (t->tag == BoolType || (t->tag == OptionalType && Match(t, OptionalType)->type->tag == BoolType)) + usage = CORD_all(usage, "[-", flag, "]"); + else + usage = CORD_all(usage, "[-", flag, " ", get_flag_options(t, "|"), "]"); + } else { + if (t->tag == BoolType || (t->tag == OptionalType && Match(t, OptionalType)->type->tag == BoolType)) + usage = CORD_all(usage, "[--", flag, "]"); + else if (t->tag == ArrayType) + usage = CORD_all(usage, "[--", flag, " ", get_flag_options(t, "|"), "]"); + else + usage = CORD_all(usage, "[--", flag, "=", get_flag_options(t, "|"), "]"); + } + } else { + if (t->tag == BoolType) + usage = CORD_all(usage, "<--", flag, "|--no-", flag, ">"); + else if (t->tag == EnumType) + usage = CORD_all(usage, get_flag_options(t, "|")); + else if (t->tag == ArrayType) + usage = CORD_all(usage, "[", flag, "...]"); + else + usage = CORD_all(usage, "<", flag, ">"); + } + } + code = CORD_all(code, "Text_t usage = Texts(Text(\"Usage: \"), Text$from_str(argv[0])", + usage == CORD_EMPTY ? CORD_EMPTY : CORD_all(", Text(", CORD_quoted(usage), ")"), ");\n"); + } + + + int num_args = 0; + for (arg_t *arg = fn_info->args; arg; arg = arg->next) { + type_t *opt_type = arg->type->tag == OptionalType ? arg->type : Type(OptionalType, .type=arg->type); + code = CORD_all(code, compile_declaration(opt_type, CORD_all("_$", arg->name))); + if (arg->default_val) { + CORD default_val = compile(env, arg->default_val); + if (arg->type->tag != OptionalType) + default_val = promote_to_optional(arg->type, default_val); + code = CORD_all(code, " = ", default_val); + } else { + code = CORD_all(code, " = ", compile_none(arg->type)); + } + code = CORD_all(code, ";\n"); + num_args += 1; + } + + code = CORD_all(code, "tomo_parse_args(argc, argv, ", usage_code, ", ", help_code); + for (arg_t *arg = fn_info->args; arg; arg = arg->next) { + code = CORD_all(code, ",\n{", CORD_quoted(CORD_replace(arg->name, "_", "-")), ", ", + (arg->default_val || arg->type->tag == OptionalType) ? "false" : "true", ", ", + compile_type_info(arg->type), + ", &", CORD_all("_$", arg->name), "}"); + } + code = CORD_all(code, ");\n"); + + code = CORD_all(code, fn_name, "("); + for (arg_t *arg = fn_info->args; arg; arg = arg->next) { + CORD arg_code = CORD_all("_$", arg->name); + if (arg->type->tag != OptionalType) + arg_code = optional_into_nonnone(arg->type, arg_code); + + code = CORD_all(code, arg_code); + if (arg->next) code = CORD_all(code, ", "); + } + code = CORD_all(code, ");\n"); + return code; +} + +CORD compile_function(env_t *env, CORD name_code, ast_t *ast, CORD *staticdefs) +{ + bool is_private = false; + const char *function_name; + arg_ast_t *args; + type_t *ret_t; + ast_t *body; + ast_t *cache; + bool is_inline; + if (ast->tag == FunctionDef) { + auto fndef = Match(ast, FunctionDef); + function_name = Match(fndef->name, Var)->name; + is_private = function_name[0] == '_'; + args = fndef->args; + ret_t = fndef->ret_type ? parse_type_ast(env, fndef->ret_type) : Type(VoidType); + body = fndef->body; + cache = fndef->cache; + is_inline = fndef->is_inline; + } else { + auto convertdef = Match(ast, ConvertDef); + args = convertdef->args; + ret_t = convertdef->ret_type ? parse_type_ast(env, convertdef->ret_type) : Type(VoidType); + function_name = get_type_name(ret_t); + if (!function_name) + code_err(ast, "Conversions are only supported for text, struct, and enum types, not %T", ret_t); + body = convertdef->body; + cache = convertdef->cache; + is_inline = convertdef->is_inline; + } + + CORD arg_signature = "("; + Table_t used_names = {}; + for (arg_ast_t *arg = args; arg; arg = arg->next) { + type_t *arg_type = get_arg_ast_type(env, arg); + arg_signature = CORD_cat(arg_signature, compile_declaration(arg_type, CORD_cat("_$", arg->name))); + if (arg->next) arg_signature = CORD_cat(arg_signature, ", "); + if (Table$str_get(used_names, arg->name)) + code_err(ast, "The argument name '%s' is used more than once", arg->name); + Table$str_set(&used_names, arg->name, arg->name); + } + arg_signature = CORD_cat(arg_signature, ")"); + + CORD ret_type_code = compile_type(ret_t); + if (ret_t->tag == AbortType) + ret_type_code = CORD_all("_Noreturn ", ret_type_code); + + if (is_private) + *staticdefs = CORD_all(*staticdefs, "static ", ret_type_code, " ", name_code, arg_signature, ";\n"); + + CORD code; + if (cache) { + code = CORD_all("static ", ret_type_code, " ", name_code, "$uncached", arg_signature); + } else { + code = CORD_all(ret_type_code, " ", name_code, arg_signature); + if (is_inline) + code = CORD_cat("INLINE ", code); + if (!is_private) + code = CORD_cat("public ", code); + } + + env_t *body_scope = fresh_scope(env); + while (body_scope->namespace && body_scope->namespace->parent) { + body_scope->locals->fallback = body_scope->locals->fallback->fallback; + body_scope->namespace = body_scope->namespace->parent; + } + + body_scope->deferred = NULL; + body_scope->namespace = NULL; + for (arg_ast_t *arg = args; arg; arg = arg->next) { + type_t *arg_type = get_arg_ast_type(env, arg); + set_binding(body_scope, arg->name, arg_type, CORD_cat("_$", arg->name)); + } + + body_scope->fn_ret = ret_t; + + type_t *body_type = get_type(body_scope, body); + if (ret_t->tag == AbortType) { + if (body_type->tag != AbortType) + code_err(ast, "This function can reach the end without aborting!"); + } else if (ret_t->tag == VoidType) { + if (body_type->tag == AbortType) + code_err(ast, "This function will always abort before it reaches the end, but it's declared as having a Void return. It should be declared as an Abort return instead."); + } else { + if (body_type->tag != ReturnType && body_type->tag != AbortType) + code_err(ast, "This function can reach the end without returning a %T value!", ret_t); + } + + CORD body_code = CORD_all("{\n", compile_inline_block(body_scope, body), "}\n"); + CORD definition = with_source_info(ast, CORD_all(code, " ", body_code, "\n")); + + if (cache && args == NULL) { // no-args cache just uses a static var + CORD wrapper = CORD_all( + is_private ? CORD_EMPTY : "public ", ret_type_code, " ", name_code, "(void) {\n" + "static ", compile_declaration(ret_t, "cached_result"), ";\n", + "static bool initialized = false;\n", + "if (!initialized) {\n" + "\tcached_result = ", name_code, "$uncached();\n", + "\tinitialized = true;\n", + "}\n", + "return cached_result;\n" + "}\n"); + definition = CORD_cat(definition, wrapper); + } else if (cache && cache->tag == Int) { + assert(args); + OptionalInt64_t cache_size = Int64$parse(Text$from_str(Match(cache, Int)->str)); + CORD pop_code = CORD_EMPTY; + if (cache->tag == Int && !cache_size.is_none && cache_size.i > 0) { + pop_code = CORD_all("if (cache.entries.length > ", CORD_asprintf("%ld", cache_size.i), + ") Table$remove(&cache, cache.entries.data + cache.entries.stride*RNG$int64(default_rng, 0, cache.entries.length-1), table_type);\n"); + } + + if (!args->next) { + // Single-argument functions have simplified caching logic + type_t *arg_type = get_arg_ast_type(env, args); + CORD wrapper = CORD_all( + is_private ? CORD_EMPTY : "public ", ret_type_code, " ", name_code, arg_signature, "{\n" + "static Table_t cache = {};\n", + "const TypeInfo_t *table_type = Table$info(", compile_type_info(arg_type), ", ", compile_type_info(ret_t), ");\n", + compile_declaration(Type(PointerType, .pointed=ret_t), "cached"), " = Table$get_raw(cache, &_$", args->name, ", table_type);\n" + "if (cached) return *cached;\n", + compile_declaration(ret_t, "ret"), " = ", name_code, "$uncached(_$", args->name, ");\n", + pop_code, + "Table$set(&cache, &_$", args->name, ", &ret, table_type);\n" + "return ret;\n" + "}\n"); + definition = CORD_cat(definition, wrapper); + } else { + // Multi-argument functions use a custom struct type (only defined internally) as a cache key: + arg_t *fields = NULL; + for (arg_ast_t *arg = args; arg; arg = arg->next) + fields = new(arg_t, .name=arg->name, .type=get_arg_ast_type(env, arg), .next=fields); + REVERSE_LIST(fields); + type_t *t = Type(StructType, .name=heap_strf("func$%ld$args", get_line_number(ast->file, ast->start)), .fields=fields, .env=env); + + int64_t num_fields = used_names.entries.length; + const char *metamethods = is_packed_data(t) ? "PackedData$metamethods" : "Struct$metamethods"; + CORD args_typeinfo = CORD_asprintf("((TypeInfo_t[1]){{.size=sizeof(args), .align=__alignof__(args), .metamethods=%s, " + ".tag=StructInfo, .StructInfo.name=\"FunctionArguments\", " + ".StructInfo.num_fields=%ld, .StructInfo.fields=(NamedType_t[%ld]){", + metamethods, num_fields, num_fields); + CORD args_type = "struct { "; + for (arg_t *f = fields; f; f = f->next) { + args_typeinfo = CORD_all(args_typeinfo, "{\"", f->name, "\", ", compile_type_info(f->type), "}"); + args_type = CORD_all(args_type, compile_declaration(f->type, f->name), "; "); + if (f->next) args_typeinfo = CORD_all(args_typeinfo, ", "); + } + args_type = CORD_all(args_type, "}"); + args_typeinfo = CORD_all(args_typeinfo, "}}})"); + + CORD all_args = CORD_EMPTY; + for (arg_ast_t *arg = args; arg; arg = arg->next) + all_args = CORD_all(all_args, "_$", arg->name, arg->next ? ", " : CORD_EMPTY); + + CORD wrapper = CORD_all( + is_private ? CORD_EMPTY : "public ", ret_type_code, " ", name_code, arg_signature, "{\n" + "static Table_t cache = {};\n", + args_type, " args = {", all_args, "};\n" + "const TypeInfo_t *table_type = Table$info(", args_typeinfo, ", ", compile_type_info(ret_t), ");\n", + compile_declaration(Type(PointerType, .pointed=ret_t), "cached"), " = Table$get_raw(cache, &args, table_type);\n" + "if (cached) return *cached;\n", + compile_declaration(ret_t, "ret"), " = ", name_code, "$uncached(", all_args, ");\n", + pop_code, + "Table$set(&cache, &args, &ret, table_type);\n" + "return ret;\n" + "}\n"); + definition = CORD_cat(definition, wrapper); + } + } + + CORD qualified_name = function_name; + if (env->namespace && env->namespace->parent && env->namespace->name) + qualified_name = CORD_all(env->namespace->name, ".", qualified_name); + CORD text = CORD_all("func ", qualified_name, "("); + for (arg_ast_t *arg = args; arg; arg = arg->next) { + text = CORD_cat(text, type_to_cord(get_arg_ast_type(env, arg))); + if (arg->next) text = CORD_cat(text, ", "); + } + if (ret_t && ret_t->tag != VoidType) + text = CORD_all(text, "->", type_to_cord(ret_t)); + text = CORD_all(text, ")"); + + if (!is_inline) { + env->code->function_naming = CORD_all( + env->code->function_naming, + CORD_asprintf("register_function(%r, Text(\"%s.tm\"), %ld, Text(%r));\n", + name_code, file_base_name(ast->file->filename), get_line_number(ast->file, ast->start), CORD_quoted(text))); + } + return definition; +} + +CORD compile_top_level_code(env_t *env, ast_t *ast) +{ + if (!ast) return CORD_EMPTY; + + switch (ast->tag) { + case Use: { + auto use = Match(ast, Use); + if (use->what == USE_C_CODE) { + Path_t path = Path$relative_to(Path$from_str(use->path), Path(".build")); + return CORD_all("#include \"", Path$as_c_string(path), "\"\n"); + } + return CORD_EMPTY; + } + case Declare: { + auto decl = Match(ast, Declare); + const char *decl_name = Match(decl->var, Var)->name; + CORD full_name = CORD_all(namespace_prefix(env, env->namespace), decl_name); + type_t *t = get_type(env, decl->value); + if (t->tag == AbortType || t->tag == VoidType || t->tag == ReturnType) + code_err(ast, "You can't declare a variable with a %T value", t); + + CORD val_code = compile_maybe_incref(env, decl->value, t); + if (t->tag == FunctionType) { + assert(promote(env, decl->value, &val_code, t, Type(ClosureType, t))); + t = Type(ClosureType, t); + } + + bool is_private = decl_name[0] == '_'; + if (is_constant(env, decl->value)) { + set_binding(env, decl_name, t, full_name); + return CORD_all( + is_private ? "static " : CORD_EMPTY, + compile_declaration(t, full_name), " = ", val_code, ";\n"); + } else { + CORD checked_access = CORD_all("check_initialized(", full_name, ", \"", decl_name, "\")"); + set_binding(env, decl_name, t, checked_access); + + return CORD_all( + "static bool ", full_name, "$initialized = false;\n", + is_private ? "static " : CORD_EMPTY, + compile_declaration(t, full_name), ";\n"); + } + } + case FunctionDef: { + CORD name_code = CORD_all(namespace_prefix(env, env->namespace), Match(Match(ast, FunctionDef)->name, Var)->name); + return compile_function(env, name_code, ast, &env->code->staticdefs); + } + case ConvertDef: { + type_t *type = get_function_def_type(env, ast); + const char *name = get_type_name(Match(type, FunctionType)->ret); + if (!name) + code_err(ast, "Conversions are only supported for text, struct, and enum types, not %T", Match(type, FunctionType)->ret); + CORD name_code = CORD_asprintf("%r%s$%ld", namespace_prefix(env, env->namespace), name, get_line_number(ast->file, ast->start)); + return compile_function(env, name_code, ast, &env->code->staticdefs); + } + case StructDef: { + auto def = Match(ast, StructDef); + type_t *t = Table$str_get(*env->types, def->name); + assert(t && t->tag == StructType); + CORD code = compile_struct_typeinfo(env, t, def->name, def->fields, def->secret, def->opaque); + env_t *ns_env = namespace_env(env, def->name); + return CORD_all(code, def->namespace ? compile_top_level_code(ns_env, def->namespace) : CORD_EMPTY); + } + case EnumDef: { + auto def = Match(ast, EnumDef); + CORD code = compile_enum_typeinfo(env, ast); + code = CORD_all(code, compile_enum_constructors(env, ast)); + env_t *ns_env = namespace_env(env, def->name); + return CORD_all(code, def->namespace ? compile_top_level_code(ns_env, def->namespace) : CORD_EMPTY); + } + case LangDef: { + auto def = Match(ast, LangDef); + CORD code = CORD_asprintf("public const TypeInfo_t %r%s$$info = {%zu, %zu, .metamethods=Text$metamethods, .tag=TextInfo, .TextInfo={%r}};\n", + namespace_prefix(env, env->namespace), def->name, sizeof(Text_t), __alignof__(Text_t), + CORD_quoted(def->name)); + env_t *ns_env = namespace_env(env, def->name); + return CORD_all(code, def->namespace ? compile_top_level_code(ns_env, def->namespace) : CORD_EMPTY); + } + case Extern: return CORD_EMPTY; + case Block: { + CORD code = CORD_EMPTY; + for (ast_list_t *stmt = Match(ast, Block)->statements; stmt; stmt = stmt->next) { + code = CORD_all(code, compile_top_level_code(env, stmt->ast)); + } + return code; + } + default: return CORD_EMPTY; + } +} + +static void initialize_vars_and_statics(env_t *env, ast_t *ast) +{ + if (!ast) return; + + for (ast_list_t *stmt = Match(ast, Block)->statements; stmt; stmt = stmt->next) { + if (stmt->ast->tag == InlineCCode) { + CORD code = compile_statement(env, stmt->ast); + env->code->staticdefs = CORD_all(env->code->staticdefs, code, "\n"); + } else if (stmt->ast->tag == Declare) { + auto decl = Match(stmt->ast, Declare); + const char *decl_name = Match(decl->var, Var)->name; + CORD full_name = CORD_all(namespace_prefix(env, env->namespace), decl_name); + type_t *t = get_type(env, decl->value); + if (t->tag == AbortType || t->tag == VoidType || t->tag == ReturnType) + code_err(stmt->ast, "You can't declare a variable with a %T value", t); + + CORD val_code = compile_maybe_incref(env, decl->value, t); + if (t->tag == FunctionType) { + assert(promote(env, decl->value, &val_code, t, Type(ClosureType, t))); + t = Type(ClosureType, t); + } + + if (!is_constant(env, decl->value)) { + env->code->variable_initializers = CORD_all( + env->code->variable_initializers, + with_source_info( + stmt->ast, + CORD_all( + full_name, " = ", val_code, ",\n", + full_name, "$initialized = true;\n"))); + } + } else if (stmt->ast->tag == StructDef) { + initialize_vars_and_statics(namespace_env(env, Match(stmt->ast, StructDef)->name), + Match(stmt->ast, StructDef)->namespace); + } else if (stmt->ast->tag == EnumDef) { + initialize_vars_and_statics(namespace_env(env, Match(stmt->ast, EnumDef)->name), + Match(stmt->ast, EnumDef)->namespace); + } else if (stmt->ast->tag == LangDef) { + initialize_vars_and_statics(namespace_env(env, Match(stmt->ast, LangDef)->name), + Match(stmt->ast, LangDef)->namespace); + } else if (stmt->ast->tag == Use) { + continue; + } else { + CORD code = compile_statement(env, stmt->ast); + if (code) code_err(stmt->ast, "I did not expect this to generate code"); + assert(!code); + } + } +} + +CORD compile_file(env_t *env, ast_t *ast) +{ + CORD top_level_code = compile_top_level_code(env, ast); + CORD use_imports = CORD_EMPTY; + + // First prepare variable initializers to prevent unitialized access: + for (ast_list_t *stmt = Match(ast, Block)->statements; stmt; stmt = stmt->next) { + if (stmt->ast->tag == Use) + use_imports = CORD_all(use_imports, compile_statement(env, stmt->ast)); + } + + initialize_vars_and_statics(env, ast); + + const char *name = file_base_name(ast->file->filename); + return CORD_all( + "#line 1 ", CORD_quoted(ast->file->filename), "\n", + "#define __SOURCE_FILE__ ", CORD_quoted(ast->file->filename), "\n", + "#include <tomo/tomo.h>\n" + "#include \"", name, ".tm.h\"\n\n", + env->code->local_typedefs, "\n", + env->code->lambdas, "\n", + env->code->staticdefs, "\n", + top_level_code, + "public void _$", env->namespace->name, "$$initialize(void) {\n", + "static bool initialized = false;\n", + "if (initialized) return;\n", + "initialized = true;\n", + use_imports, + env->code->variable_initializers, + env->code->function_naming, + "}\n"); +} + +CORD compile_statement_type_header(env_t *env, ast_t *ast) +{ + switch (ast->tag) { + case Use: { + auto use = Match(ast, Use); + switch (use->what) { + case USE_MODULE: { + return CORD_all("#include <", use->path, "/", use->path, ".h>\n"); + } + case USE_LOCAL: { + Path_t path = Path$relative_to(Path$from_str(use->path), Path(".build")); + Path_t build_dir = Path$with_component(Path$parent(path), Text(".build")); + path = Path$with_component(build_dir, Texts(Path$base_name(path), Text(".h"))); + return CORD_all("#include \"", Path$as_c_string(path), "\"\n"); + } + case USE_HEADER: + if (use->path[0] == '<') + return CORD_all("#include ", use->path, "\n"); + else + return CORD_all("#include \"", use->path, "\"\n"); + default: + return CORD_EMPTY; + } + } + case StructDef: { + return compile_struct_header(env, ast); + } + case EnumDef: { + return compile_enum_header(env, ast); + } + case LangDef: { + auto def = Match(ast, LangDef); + CORD full_name = CORD_cat(namespace_prefix(env, env->namespace), def->name); + return CORD_all( + // Constructor macro: + "#define ", namespace_prefix(env, env->namespace), def->name, + "(text) ((", namespace_prefix(env, env->namespace), def->name, "$$type){.length=sizeof(text)-1, .tag=TEXT_ASCII, .ascii=\"\" text})\n" + "#define ", namespace_prefix(env, env->namespace), def->name, + "s(...) ((", namespace_prefix(env, env->namespace), def->name, "$$type)Texts(__VA_ARGS__))\n" + "extern const TypeInfo_t ", full_name, ";\n" + ); + } + default: + return CORD_EMPTY; + } +} + +CORD compile_statement_namespace_header(env_t *env, ast_t *ast) +{ + const char *ns_name = NULL; + ast_t *block = NULL; + switch (ast->tag) { + case LangDef: { + auto def = Match(ast, LangDef); + ns_name = def->name; + block = def->namespace; + break; + } + case StructDef: { + auto def = Match(ast, StructDef); + ns_name = def->name; + block = def->namespace; + break; + } + case EnumDef: { + auto def = Match(ast, EnumDef); + ns_name = def->name; + block = def->namespace; + break; + } + case Extern: { + auto ext = Match(ast, Extern); + type_t *t = parse_type_ast(env, ext->type); + CORD decl; + if (t->tag == ClosureType) { + t = Match(t, ClosureType)->fn; + auto fn = Match(t, FunctionType); + decl = CORD_all(compile_type(fn->ret), " ", ext->name, "("); + for (arg_t *arg = fn->args; arg; arg = arg->next) { + decl = CORD_all(decl, compile_type(arg->type)); + if (arg->next) decl = CORD_cat(decl, ", "); + } + decl = CORD_cat(decl, ")"); + } else { + decl = compile_declaration(t, ext->name); + } + return CORD_all("extern ", decl, ";\n"); + } + case Declare: { + auto decl = Match(ast, Declare); + const char *decl_name = Match(decl->var, Var)->name; + bool is_private = (decl_name[0] == '_'); + if (is_private) + return CORD_EMPTY; + + type_t *t = get_type(env, decl->value); + if (t->tag == FunctionType) + t = Type(ClosureType, t); + assert(t->tag != ModuleType); + if (t->tag == AbortType || t->tag == VoidType || t->tag == ReturnType) + code_err(ast, "You can't declare a variable with a %T value", t); + + return CORD_all( + compile_statement_type_header(env, decl->value), + "extern ", compile_declaration(t, CORD_cat(namespace_prefix(env, env->namespace), decl_name)), ";\n"); + } + case FunctionDef: { + auto fndef = Match(ast, FunctionDef); + const char *decl_name = Match(fndef->name, Var)->name; + bool is_private = decl_name[0] == '_'; + if (is_private) return CORD_EMPTY; + CORD arg_signature = "("; + for (arg_ast_t *arg = fndef->args; arg; arg = arg->next) { + type_t *arg_type = get_arg_ast_type(env, arg); + arg_signature = CORD_cat(arg_signature, compile_declaration(arg_type, CORD_cat("_$", arg->name))); + if (arg->next) arg_signature = CORD_cat(arg_signature, ", "); + } + arg_signature = CORD_cat(arg_signature, ")"); + + type_t *ret_t = fndef->ret_type ? parse_type_ast(env, fndef->ret_type) : Type(VoidType); + CORD ret_type_code = compile_type(ret_t); + if (ret_t->tag == AbortType) + ret_type_code = CORD_all("_Noreturn ", ret_type_code); + CORD name = CORD_all(namespace_prefix(env, env->namespace), decl_name); + if (env->namespace && env->namespace->parent && env->namespace->name && streq(decl_name, env->namespace->name)) + name = CORD_asprintf("%r%ld", namespace_prefix(env, env->namespace), get_line_number(ast->file, ast->start)); + return CORD_all(ret_type_code, " ", name, arg_signature, ";\n"); + } + case ConvertDef: { + auto def = Match(ast, ConvertDef); + + CORD arg_signature = "("; + for (arg_ast_t *arg = def->args; arg; arg = arg->next) { + type_t *arg_type = get_arg_ast_type(env, arg); + arg_signature = CORD_cat(arg_signature, compile_declaration(arg_type, CORD_cat("_$", arg->name))); + if (arg->next) arg_signature = CORD_cat(arg_signature, ", "); + } + arg_signature = CORD_cat(arg_signature, ")"); + + type_t *ret_t = def->ret_type ? parse_type_ast(env, def->ret_type) : Type(VoidType); + CORD ret_type_code = compile_type(ret_t); + CORD name = get_type_name(ret_t); + if (!name) + code_err(ast, "Conversions are only supported for text, struct, and enum types, not %T", ret_t); + name = CORD_all(namespace_prefix(env, env->namespace), name); + CORD name_code = CORD_asprintf("%r$%ld", name, get_line_number(ast->file, ast->start)); + return CORD_all(ret_type_code, " ", name_code, arg_signature, ";\n"); + } + default: return CORD_EMPTY; + } + env_t *ns_env = namespace_env(env, ns_name); + CORD header = CORD_EMPTY; + for (ast_list_t *stmt = block ? Match(block, Block)->statements : NULL; stmt; stmt = stmt->next) { + header = CORD_all(header, compile_statement_namespace_header(ns_env, stmt->ast)); + } + return header; +} + +typedef struct { + env_t *env; + CORD *header; +} compile_typedef_info_t; + +static void _make_typedefs(compile_typedef_info_t *info, ast_t *ast) +{ + if (ast->tag == StructDef) { + auto def = Match(ast, StructDef); + if (def->external) return; + CORD full_name = CORD_cat(namespace_prefix(info->env, info->env->namespace), def->name); + *info->header = CORD_all(*info->header, "typedef struct ", full_name, "$$struct ", full_name, "$$type;\n"); + } else if (ast->tag == EnumDef) { + auto def = Match(ast, EnumDef); + CORD full_name = CORD_cat(namespace_prefix(info->env, info->env->namespace), def->name); + *info->header = CORD_all(*info->header, "typedef struct ", full_name, "$$struct ", full_name, "$$type;\n"); + + for (tag_ast_t *tag = def->tags; tag; tag = tag->next) { + if (!tag->fields) continue; + *info->header = CORD_all(*info->header, "typedef struct ", full_name, "$", tag->name, "$$struct ", full_name, "$", tag->name, "$$type;\n"); + } + } else if (ast->tag == LangDef) { + auto def = Match(ast, LangDef); + *info->header = CORD_all(*info->header, "typedef Text_t ", namespace_prefix(info->env, info->env->namespace), def->name, "$$type;\n"); + } +} + +static void _define_types_and_funcs(compile_typedef_info_t *info, ast_t *ast) +{ + *info->header = CORD_all(*info->header, + compile_statement_type_header(info->env, ast), + compile_statement_namespace_header(info->env, ast)); +} + +CORD compile_file_header(env_t *env, ast_t *ast) +{ + CORD header = CORD_all( + "#pragma once\n" + "#line 1 ", CORD_quoted(ast->file->filename), "\n", + "#include <tomo/tomo.h>\n"); + + compile_typedef_info_t info = {.env=env, .header=&header}; + visit_topologically(Match(ast, Block)->statements, (Closure_t){.fn=(void*)_make_typedefs, &info}); + visit_topologically(Match(ast, Block)->statements, (Closure_t){.fn=(void*)_define_types_and_funcs, &info}); + + header = CORD_all(header, "void _$", env->namespace->name, "$$initialize(void);\n"); + return header; +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/src/compile.h b/src/compile.h new file mode 100644 index 00000000..4256ab5d --- /dev/null +++ b/src/compile.h @@ -0,0 +1,24 @@ +#pragma once + +// Compilation functions + +#include <gc/cord.h> +#include <gc.h> +#include <stdio.h> + +#include "environment.h" + +CORD expr_as_text(CORD expr, type_t *t, CORD color); +CORD compile_file(env_t *env, ast_t *ast); +CORD compile_file_header(env_t *env, ast_t *ast); +CORD compile_declaration(type_t *t, const char *name); +CORD compile_type(type_t *t); +CORD compile(env_t *env, ast_t *ast); +CORD compile_namespace_header(env_t *env, const char *ns_name, ast_t *block); +CORD compile_statement(env_t *env, ast_t *ast); +CORD compile_statement_type_header(env_t *env, ast_t *ast); +CORD compile_statement_namespace_header(env_t *env, ast_t *ast); +CORD compile_type_info(type_t *t); +CORD compile_cli_arg_call(env_t *env, CORD fn_name, type_t *fn_type); + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/src/cordhelpers.c b/src/cordhelpers.c new file mode 100644 index 00000000..57189a8f --- /dev/null +++ b/src/cordhelpers.c @@ -0,0 +1,71 @@ +// Some helper functions for the GC Cord library + +#include <gc/cord.h> +#include <stdarg.h> + +#include "stdlib/util.h" + +__attribute__((format(printf, 1, 2))) +public CORD CORD_asprintf(CORD fmt, ...) +{ + va_list args; + va_start(args, fmt); + CORD c = NULL; + CORD_vsprintf(&c, fmt, args); + va_end(args); + return c; +} + +public CORD CORD_quoted(CORD str) +{ + CORD quoted = "\""; + CORD_pos i; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wsign-conversion" + CORD_FOR(i, str) { +#pragma GCC diagnostic pop + char c = CORD_pos_fetch(i); + switch (c) { + case '\a': quoted = CORD_cat(quoted, "\\a"); break; + case '\b': quoted = CORD_cat(quoted, "\\b"); break; + case '\x1b': quoted = CORD_cat(quoted, "\\e"); break; + case '\f': quoted = CORD_cat(quoted, "\\f"); break; + case '\n': quoted = CORD_cat(quoted, "\\n"); break; + case '\r': quoted = CORD_cat(quoted, "\\r"); break; + case '\t': quoted = CORD_cat(quoted, "\\t"); break; + case '\v': quoted = CORD_cat(quoted, "\\v"); break; + case '"': quoted = CORD_cat(quoted, "\\\""); break; + case '\\': quoted = CORD_cat(quoted, "\\\\"); break; + case '\x00' ... '\x06': case '\x0E' ... '\x1A': + case '\x1C' ... '\x1F': case '\x7F' ... '\x7F': + CORD_sprintf("ed, "%r\\x%02X", quoted, c); + break; + default: quoted = CORD_cat_char(quoted, c); break; + } + } + quoted = CORD_cat_char(quoted, '"'); + return quoted; +} + +public CORD CORD_replace(CORD c, CORD to_replace, CORD replacement) +{ + size_t len = CORD_len(c); + size_t replaced_len = CORD_len(to_replace); + size_t pos = 0; + CORD ret = CORD_EMPTY; + while (pos < len) { + size_t found = CORD_str(c, pos, to_replace); + if (found == CORD_NOT_FOUND) { + if (pos < len) + ret = CORD_cat(ret, CORD_substr(c, pos, len)); + return ret; + } + if (found > pos) + ret = CORD_cat(ret, CORD_substr(c, pos, found-pos)); + ret = CORD_cat(ret, replacement); + pos = found + replaced_len; + } + return ret; +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/src/cordhelpers.h b/src/cordhelpers.h new file mode 100644 index 00000000..9a72e93c --- /dev/null +++ b/src/cordhelpers.h @@ -0,0 +1,14 @@ +#pragma once +// Some helper functions for the GC Cord library + +#include <gc/cord.h> + +#define CORD_appendf(cord, fmt, ...) CORD_sprintf(cord, "%r" fmt, *(cord) __VA_OPT__(,) __VA_ARGS__) +#define CORD_all(...) CORD_catn(sizeof((CORD[]){__VA_ARGS__})/sizeof(CORD), __VA_ARGS__) + +__attribute__((format(printf, 1, 2))) +CORD CORD_asprintf(CORD fmt, ...); +CORD CORD_quoted(CORD str); +CORD CORD_replace(CORD c, CORD to_replace, CORD replacement); + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/src/enums.c b/src/enums.c new file mode 100644 index 00000000..06af0df3 --- /dev/null +++ b/src/enums.c @@ -0,0 +1,129 @@ +// Logic for compiling tagged unions (enums) +#include <ctype.h> +#include <gc/cord.h> +#include <gc.h> +#include <stdio.h> + +#include "ast.h" +#include "stdlib/text.h" +#include "compile.h" +#include "cordhelpers.h" +#include "structs.h" +#include "environment.h" +#include "typecheck.h" +#include "stdlib/util.h" + +CORD compile_enum_typeinfo(env_t *env, ast_t *ast) +{ + auto def = Match(ast, EnumDef); + CORD full_name = CORD_cat(namespace_prefix(env, env->namespace), def->name); + + // Compile member types and constructors: + CORD member_typeinfos = CORD_EMPTY; + for (tag_ast_t *tag = def->tags; tag; tag = tag->next) { + if (!tag->fields) continue; + + const char *tag_name = heap_strf("%s$%s", def->name, tag->name); + type_t *tag_type = Table$str_get(*env->types, tag_name); + assert(tag_type && tag_type->tag == StructType); + member_typeinfos = CORD_all( + member_typeinfos, compile_struct_typeinfo(env, tag_type, tag_name, tag->fields, tag->secret, false)); + } + + int num_tags = 0; + for (tag_ast_t *t = def->tags; t; t = t->next) + num_tags += 1; + + type_t *t = Table$str_get(*env->types, def->name); + const char *metamethods = is_packed_data(t) ? "PackedDataEnum$metamethods" : "Enum$metamethods"; + CORD typeinfo = CORD_asprintf("public const TypeInfo_t %r$$info = {%zu, %zu, .metamethods=%s, {.tag=EnumInfo, .EnumInfo={.name=\"%s\", " + ".num_tags=%d, .tags=(NamedType_t[]){", + full_name, type_size(t), type_align(t), metamethods, def->name, num_tags); + + for (tag_ast_t *tag = def->tags; tag; tag = tag->next) { + const char *tag_type_name = heap_strf("%s$%s", def->name, tag->name); + type_t *tag_type = Table$str_get(*env->types, tag_type_name); + if (tag_type && Match(tag_type, StructType)->fields) + typeinfo = CORD_all(typeinfo, "{\"", tag->name, "\", ", compile_type_info(tag_type), "}, "); + else + typeinfo = CORD_all(typeinfo, "{\"", tag->name, "\"}, "); + } + typeinfo = CORD_all(typeinfo, "}}}};\n"); + return CORD_all(member_typeinfos, typeinfo); +} + +CORD compile_enum_constructors(env_t *env, ast_t *ast) +{ + auto def = Match(ast, EnumDef); + CORD full_name = CORD_cat(namespace_prefix(env, env->namespace), def->name); + + CORD constructors = CORD_EMPTY; + for (tag_ast_t *tag = def->tags; tag; tag = tag->next) { + if (!tag->fields) continue; + + CORD arg_sig = CORD_EMPTY; + for (arg_ast_t *field = tag->fields; field; field = field->next) { + type_t *field_t = get_arg_ast_type(env, field); + arg_sig = CORD_all(arg_sig, compile_declaration(field_t, CORD_all("$", field->name))); + if (field->next) arg_sig = CORD_cat(arg_sig, ", "); + } + if (arg_sig == CORD_EMPTY) arg_sig = "void"; + CORD constructor_impl = CORD_all("public inline ", full_name, "$$type ", full_name, "$tagged$", tag->name, "(", arg_sig, ") { return (", + full_name, "$$type){.$tag=", full_name, "$tag$", tag->name, ", .", tag->name, "={"); + for (arg_ast_t *field = tag->fields; field; field = field->next) { + constructor_impl = CORD_all(constructor_impl, "$", field->name); + if (field->next) constructor_impl = CORD_cat(constructor_impl, ", "); + } + constructor_impl = CORD_cat(constructor_impl, "}}; }\n"); + constructors = CORD_cat(constructors, constructor_impl); + } + return constructors; +} + +CORD compile_enum_header(env_t *env, ast_t *ast) +{ + auto def = Match(ast, EnumDef); + CORD full_name = CORD_all(namespace_prefix(env, env->namespace), def->name); + CORD all_defs = CORD_EMPTY; + CORD enum_def = CORD_all("struct ", full_name, "$$struct {\n" + "\tenum { ", full_name, "$null=0, "); + + bool has_any_tags_with_fields = false; + for (tag_ast_t *tag = def->tags; tag; tag = tag->next) { + enum_def = CORD_all(enum_def, full_name, "$tag$", tag->name); + if (tag->next) enum_def = CORD_all(enum_def, ", "); + has_any_tags_with_fields = has_any_tags_with_fields || (tag->fields != NULL); + } + enum_def = CORD_all(enum_def, "} $tag;\n"); + + if (has_any_tags_with_fields) { + enum_def = CORD_all(enum_def, "union {\n"); + for (tag_ast_t *tag = def->tags; tag; tag = tag->next) { + if (!tag->fields) continue; + CORD field_def = compile_struct_header(env, WrapAST(ast, StructDef, .name=CORD_to_const_char_star(CORD_all(def->name, "$", tag->name)), .fields=tag->fields)); + all_defs = CORD_all(all_defs, field_def); + enum_def = CORD_all(enum_def, full_name, "$", tag->name, "$$type ", tag->name, ";\n"); + } + enum_def = CORD_all(enum_def, "};\n"); + } + enum_def = CORD_all(enum_def, "};\n"); + all_defs = CORD_all(all_defs, enum_def); + + all_defs = CORD_all(all_defs, "extern const TypeInfo_t ", full_name, "$$info;\n"); + for (tag_ast_t *tag = def->tags; tag; tag = tag->next) { + if (!tag->fields) continue; + + CORD arg_sig = CORD_EMPTY; + for (arg_ast_t *field = tag->fields; field; field = field->next) { + type_t *field_t = get_arg_ast_type(env, field); + arg_sig = CORD_all(arg_sig, compile_declaration(field_t, CORD_all("$", field->name))); + if (field->next) arg_sig = CORD_all(arg_sig, ", "); + } + if (arg_sig == CORD_EMPTY) arg_sig = "void"; + CORD constructor_def = CORD_all(full_name, "$$type ", full_name, "$tagged$", tag->name, "(", arg_sig, ");\n"); + all_defs = CORD_all(all_defs, constructor_def); + } + return all_defs; +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/src/enums.h b/src/enums.h new file mode 100644 index 00000000..5f693e5a --- /dev/null +++ b/src/enums.h @@ -0,0 +1,14 @@ +#pragma once + +// Compilation of tagged unions (enums) + +#include <gc/cord.h> + +#include "ast.h" +#include "environment.h" + +CORD compile_enum_typeinfo(env_t *env, ast_t *ast); +CORD compile_enum_constructors(env_t *env, ast_t *ast); +CORD compile_enum_header(env_t *env, ast_t *ast); + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/src/environment.c b/src/environment.c new file mode 100644 index 00000000..85677524 --- /dev/null +++ b/src/environment.c @@ -0,0 +1,864 @@ +// Logic for the environmental context information during compilation +// (variable bindings, code sections, etc.) +#include <stdlib.h> +#include <signal.h> + +#include "cordhelpers.h" +#include "environment.h" +#include "parse.h" +#include "stdlib/datatypes.h" +#include "stdlib/tables.h" +#include "stdlib/text.h" +#include "stdlib/util.h" +#include "typecheck.h" + +type_t *TEXT_TYPE = NULL; +type_t *MATCH_TYPE = NULL; +type_t *RNG_TYPE = NULL; +public type_t *PATH_TYPE = NULL; +public type_t *THREAD_TYPE = NULL; +public type_t *PATH_TYPE_TYPE = NULL; + +static type_t *declare_type(env_t *env, const char *def_str) +{ + ast_t *ast = parse(def_str); + if (!ast) errx(1, "Couldn't not parse struct def: %s", def_str); + if (ast->tag != Block) errx(1, "Couldn't not parse struct def: %s", def_str); + ast_list_t *statements = Match(ast, Block)->statements; + if (statements == NULL || statements->next) errx(1, "Couldn't not parse struct def: %s", def_str); + switch (statements->ast->tag) { + case StructDef: { + auto def = Match(statements->ast, StructDef); + prebind_statement(env, statements->ast); + bind_statement(env, statements->ast); + return Table$str_get(*env->types, def->name); + } + case EnumDef: { + auto def = Match(statements->ast, EnumDef); + prebind_statement(env, statements->ast); + bind_statement(env, statements->ast); + return Table$str_get(*env->types, def->name); + } + default: errx(1, "Not a type definition: %s", def_str); + } +} + +static type_t *bind_type(env_t *env, const char *name, type_t *type) +{ + if (Table$str_get(*env->types, name)) + errx(1, "Duplicate binding for type: %s", name); + Table$str_set(env->types, name, type); + return type; +} + +env_t *global_env(void) +{ + static env_t *_global_env = NULL; + if (_global_env != NULL) return _global_env; + + env_t *env = new(env_t); + env->code = new(compilation_unit_t); + env->types = new(Table_t); + env->globals = new(Table_t); + env->locals = env->globals; + env->imports = new(Table_t); + + TEXT_TYPE = bind_type(env, "Text", Type(TextType, .lang="Text", .env=namespace_env(env, "Text"))); + (void)bind_type(env, "Int", Type(BigIntType)); + (void)bind_type(env, "Int32", Type(IntType, .bits=TYPE_IBITS32)); + (void)bind_type(env, "Memory", Type(MemoryType)); + PATH_TYPE_TYPE = declare_type(env, "enum PathType(Relative, Absolute, Home)"); + MATCH_TYPE = declare_type(env, "struct Match(text:Text, index:Int, captures:[Text])"); + PATH_TYPE = declare_type(env, "struct Path(type:PathType, components:[Text])"); + THREAD_TYPE = declare_type(env, "struct Thread(; opaque)"); + RNG_TYPE = declare_type(env, "struct RNG(state:@Memory)"); + + typedef struct { + const char *name, *code, *type_str; + } ns_entry_t; + + struct { + const char *name; + type_t *type; + CORD typename; + CORD typeinfo; + Array_t namespace; + } global_types[] = { + {"Void", Type(VoidType), "Void_t", "Void$info", {}}, + {"Abort", Type(AbortType), "void", "Abort$info", {}}, + {"Memory", Type(MemoryType), "Memory_t", "Memory$info", {}}, + {"Bool", Type(BoolType), "Bool_t", "Bool$info", TypedArray(ns_entry_t, + {"parse", "Bool$parse", "func(text:Text -> Bool?)"}, + )}, + {"Byte", Type(ByteType), "Byte_t", "Byte$info", TypedArray(ns_entry_t, + {"max", "Byte$max", "Byte"}, + {"hex", "Byte$hex", "func(byte:Byte, uppercase=yes, prefix=no -> Text)"}, + {"min", "Byte$min", "Byte"}, + )}, + {"Int", Type(BigIntType), "Int_t", "Int$info", TypedArray(ns_entry_t, + {"abs", "Int$abs", "func(x:Int -> Int)"}, + {"bit_and", "Int$bit_and", "func(x,y:Int -> Int)"}, + {"bit_or", "Int$bit_or", "func(x,y:Int -> Int)"}, + {"bit_xor", "Int$bit_xor", "func(x,y:Int -> Int)"}, + {"choose", "Int$choose", "func(x,y:Int -> Int)"}, + {"clamped", "Int$clamped", "func(x,low,high:Int -> Int)"}, + {"divided_by", "Int$divided_by", "func(x,y:Int -> Int)"}, + {"factorial", "Int$factorial", "func(x:Int -> Int)"}, + {"format", "Int$format", "func(i:Int, digits=0 -> Text)"}, + {"gcd", "Int$gcd", "func(x,y:Int -> Int)"}, + {"hex", "Int$hex", "func(i:Int, digits=0, uppercase=yes, prefix=yes -> Text)"}, + {"is_prime", "Int$is_prime", "func(x:Int,reps=50 -> Bool)"}, + {"left_shifted", "Int$left_shifted", "func(x,y:Int -> Int)"}, + {"minus", "Int$minus", "func(x,y:Int -> Int)"}, + {"modulo", "Int$modulo", "func(x,y:Int -> Int)"}, + {"modulo1", "Int$modulo1", "func(x,y:Int -> Int)"}, + {"negated", "Int$negated", "func(x:Int -> Int)"}, + {"negative", "Int$negative", "func(x:Int -> Int)"}, + {"next_prime", "Int$next_prime", "func(x:Int -> Int)"}, + {"octal", "Int$octal", "func(i:Int, digits=0, prefix=yes -> Text)"}, + {"onward", "Int$onward", "func(first:Int,step=1 -> func(->Int?))"}, + {"parse", "Int$parse", "func(text:Text -> Int?)"}, + {"plus", "Int$plus", "func(x,y:Int -> Int)"}, + {"power", "Int$power", "func(base:Int,exponent:Int -> Int)"}, +#if __GNU_MP_VERSION >= 6 +#if __GNU_MP_VERSION_MINOR >= 3 + {"prev_prime", "Int$prev_prime", "func(x:Int -> Int)"}, +#endif +#endif + {"right_shifted", "Int$right_shifted", "func(x,y:Int -> Int)"}, + {"sqrt", "Int$sqrt", "func(x:Int -> Int?)"}, + {"times", "Int$times", "func(x,y:Int -> Int)"}, + {"to", "Int$to", "func(first:Int,last:Int,step=none:Int -> func(->Int?))"}, + )}, + {"Int64", Type(IntType, .bits=TYPE_IBITS64), "Int64_t", "Int64$info", TypedArray(ns_entry_t, + {"abs", "labs", "func(i:Int64 -> Int64)"}, + {"bits", "Int64$bits", "func(x:Int64 -> [Bool])"}, + {"clamped", "Int64$clamped", "func(x,low,high:Int64 -> Int64)"}, + {"divided_by", "Int64$divided_by", "func(x,y:Int64 -> Int64)"}, + {"format", "Int64$format", "func(i:Int64, digits=0 -> Text)"}, + {"gcd", "Int64$gcd", "func(x,y:Int64 -> Int64)"}, + {"parse", "Int64$parse", "func(text:Text -> Int64?)"}, + {"hex", "Int64$hex", "func(i:Int64, digits=0, uppercase=yes, prefix=yes -> Text)"}, + {"max", "Int64$max", "Int64"}, + {"min", "Int64$min", "Int64"}, + {"modulo", "Int64$modulo", "func(x,y:Int64 -> Int64)"}, + {"modulo1", "Int64$modulo1", "func(x,y:Int64 -> Int64)"}, + {"octal", "Int64$octal", "func(i:Int64, digits=0, prefix=yes -> Text)"}, + {"onward", "Int64$onward", "func(first:Int64,step=Int64(1) -> func(->Int64?))"}, + {"to", "Int64$to", "func(first:Int64,last:Int64,step=none:Int64 -> func(->Int64?))"}, + {"unsigned_left_shifted", "Int64$unsigned_left_shifted", "func(x:Int64,y:Int64 -> Int64)"}, + {"unsigned_right_shifted", "Int64$unsigned_right_shifted", "func(x:Int64,y:Int64 -> Int64)"}, + {"wrapping_minus", "Int64$wrapping_minus", "func(x:Int64,y:Int64 -> Int64)"}, + {"wrapping_plus", "Int64$wrapping_plus", "func(x:Int64,y:Int64 -> Int64)"}, + )}, + {"Int32", Type(IntType, .bits=TYPE_IBITS32), "Int32_t", "Int32$info", TypedArray(ns_entry_t, + {"abs", "abs", "func(i:Int32 -> Int32)"}, + {"bits", "Int32$bits", "func(x:Int32 -> [Bool])"}, + {"clamped", "Int32$clamped", "func(x,low,high:Int32 -> Int32)"}, + {"divided_by", "Int32$divided_by", "func(x,y:Int32 -> Int32)"}, + {"format", "Int32$format", "func(i:Int32, digits=0 -> Text)"}, + {"gcd", "Int32$gcd", "func(x,y:Int32 -> Int32)"}, + {"parse", "Int32$parse", "func(text:Text -> Int32?)"}, + {"hex", "Int32$hex", "func(i:Int32, digits=0, uppercase=yes, prefix=yes -> Text)"}, + {"max", "Int32$max", "Int32"}, + {"min", "Int32$min", "Int32"}, + {"modulo", "Int32$modulo", "func(x,y:Int32 -> Int32)"}, + {"modulo1", "Int32$modulo1", "func(x,y:Int32 -> Int32)"}, + {"octal", "Int32$octal", "func(i:Int32, digits=0, prefix=yes -> Text)"}, + {"onward", "Int32$onward", "func(first:Int32,step=Int32(1) -> func(->Int32?))"}, + {"to", "Int32$to", "func(first:Int32,last:Int32,step=none:Int32 -> func(->Int32?))"}, + {"unsigned_left_shifted", "Int32$unsigned_left_shifted", "func(x:Int32,y:Int32 -> Int32)"}, + {"unsigned_right_shifted", "Int32$unsigned_right_shifted", "func(x:Int32,y:Int32 -> Int32)"}, + {"wrapping_minus", "Int32$wrapping_minus", "func(x:Int32,y:Int32 -> Int32)"}, + {"wrapping_plus", "Int32$wrapping_plus", "func(x:Int32,y:Int32 -> Int32)"}, + )}, + {"Int16", Type(IntType, .bits=TYPE_IBITS16), "Int16_t", "Int16$info", TypedArray(ns_entry_t, + {"abs", "abs", "func(i:Int16 -> Int16)"}, + {"bits", "Int16$bits", "func(x:Int16 -> [Bool])"}, + {"clamped", "Int16$clamped", "func(x,low,high:Int16 -> Int16)"}, + {"divided_by", "Int16$divided_by", "func(x,y:Int16 -> Int16)"}, + {"format", "Int16$format", "func(i:Int16, digits=0 -> Text)"}, + {"gcd", "Int16$gcd", "func(x,y:Int16 -> Int16)"}, + {"parse", "Int16$parse", "func(text:Text -> Int16?)"}, + {"hex", "Int16$hex", "func(i:Int16, digits=0, uppercase=yes, prefix=yes -> Text)"}, + {"max", "Int16$max", "Int16"}, + {"min", "Int16$min", "Int16"}, + {"modulo", "Int16$modulo", "func(x,y:Int16 -> Int16)"}, + {"modulo1", "Int16$modulo1", "func(x,y:Int16 -> Int16)"}, + {"octal", "Int16$octal", "func(i:Int16, digits=0, prefix=yes -> Text)"}, + {"onward", "Int16$onward", "func(first:Int16,step=Int16(1) -> func(->Int16?))"}, + {"to", "Int16$to", "func(first:Int16,last:Int16,step=none:Int16 -> func(->Int16?))"}, + {"unsigned_left_shifted", "Int16$unsigned_left_shifted", "func(x:Int16,y:Int16 -> Int16)"}, + {"unsigned_right_shifted", "Int16$unsigned_right_shifted", "func(x:Int16,y:Int16 -> Int16)"}, + {"wrapping_minus", "Int16$wrapping_minus", "func(x:Int16,y:Int16 -> Int16)"}, + {"wrapping_plus", "Int16$wrapping_plus", "func(x:Int16,y:Int16 -> Int16)"}, + )}, + {"Int8", Type(IntType, .bits=TYPE_IBITS8), "Int8_t", "Int8$info", TypedArray(ns_entry_t, + {"abs", "abs", "func(i:Int8 -> Int8)"}, + {"bits", "Int8$bits", "func(x:Int8 -> [Bool])"}, + {"clamped", "Int8$clamped", "func(x,low,high:Int8 -> Int8)"}, + {"divided_by", "Int8$divided_by", "func(x,y:Int8 -> Int8)"}, + {"format", "Int8$format", "func(i:Int8, digits=0 -> Text)"}, + {"gcd", "Int8$gcd", "func(x,y:Int8 -> Int8)"}, + {"parse", "Int8$parse", "func(text:Text -> Int8?)"}, + {"hex", "Int8$hex", "func(i:Int8, digits=0, uppercase=yes, prefix=yes -> Text)"}, + {"max", "Int8$max", "Int8"}, + {"min", "Int8$min", "Int8"}, + {"modulo", "Int8$modulo", "func(x,y:Int8 -> Int8)"}, + {"modulo1", "Int8$modulo1", "func(x,y:Int8 -> Int8)"}, + {"octal", "Int8$octal", "func(i:Int8, digits=0, prefix=yes -> Text)"}, + {"onward", "Int8$onward", "func(first:Int8,step=Int8(1) -> func(->Int8?))"}, + {"to", "Int8$to", "func(first:Int8,last:Int8,step=none:Int8 -> func(->Int8?))"}, + {"unsigned_left_shifted", "Int8$unsigned_left_shifted", "func(x:Int8,y:Int8 -> Int8)"}, + {"unsigned_right_shifted", "Int8$unsigned_right_shifted", "func(x:Int8,y:Int8 -> Int8)"}, + {"wrapping_minus", "Int8$wrapping_minus", "func(x:Int8,y:Int8 -> Int8)"}, + {"wrapping_plus", "Int8$wrapping_plus", "func(x:Int8,y:Int8 -> Int8)"}, + )}, +#define C(name) {#name, "M_"#name, "Num"} +#define F(name) {#name, #name, "func(n:Num -> Num)"} +#define F_opt(name) {#name, #name, "func(n:Num -> Num?)"} +#define F2(name) {#name, #name, "func(x,y:Num -> Num)"} + {"Num", Type(NumType, .bits=TYPE_NBITS64), "Num_t", "Num$info", TypedArray(ns_entry_t, + {"near", "Num$near", "func(x,y:Num, ratio=1e-9, min_epsilon=1e-9 -> Bool)"}, + {"clamped", "Num$clamped", "func(x,low,high:Num -> Num)"}, + {"format", "Num$format", "func(n:Num, precision=16 -> Text)"}, + {"scientific", "Num$scientific", "func(n:Num,precision=0 -> Text)"}, + {"isinf", "Num$isinf", "func(n:Num -> Bool)"}, + {"isfinite", "Num$isfinite", "func(n:Num -> Bool)"}, + {"modulo", "Num$mod", "func(x,y:Num -> Num)"}, + {"modulo1", "Num$mod1", "func(x,y:Num -> Num)"}, + C(2_SQRTPI), C(E), C(PI_2), C(2_PI), C(1_PI), C(LN10), C(LN2), C(LOG2E), + C(PI), C(PI_4), C(SQRT2), C(SQRT1_2), + {"INF", "(Num_t)(INFINITY)", "Num"}, + {"TAU", "(Num_t)(2.*M_PI)", "Num"}, + {"mix", "Num$mix", "func(amount,x,y:Num -> Num)"}, + {"parse", "Num$parse", "func(text:Text -> Num?)"}, + {"abs", "fabs", "func(n:Num -> Num)"}, + F_opt(acos), F_opt(acosh), F_opt(asin), F(asinh), F(atan), F_opt(atanh), + F(cbrt), F(ceil), F_opt(cos), F(cosh), F(erf), F(erfc), + F(exp), F(exp2), F(expm1), F(floor), F(j0), F(j1), F_opt(log), F_opt(log10), F_opt(log1p), + F_opt(log2), F(logb), F(rint), F(round), F(significand), F_opt(sin), F(sinh), F_opt(sqrt), + F_opt(tan), F(tanh), F_opt(tgamma), F(trunc), F_opt(y0), F_opt(y1), + F2(atan2), F2(copysign), F2(fdim), F2(hypot), F2(nextafter), + )}, +#undef F2 +#undef F_opt +#undef F +#undef C +#define C(name) {#name, "(Num32_t)(M_"#name")", "Num32"} +#define F(name) {#name, #name"f", "func(n:Num32 -> Num32)"} +#define F_opt(name) {#name, #name"f", "func(n:Num32 -> Num32?)"} +#define F2(name) {#name, #name"f", "func(x,y:Num32 -> Num32)"} + {"Num32", Type(NumType, .bits=TYPE_NBITS32), "Num32_t", "Num32$info", TypedArray(ns_entry_t, + {"near", "Num32$near", "func(x,y:Num32, ratio=Num32(1e-9), min_epsilon=Num32(1e-9) -> Bool)"}, + {"clamped", "Num32$clamped", "func(x,low,high:Num32 -> Num32)"}, + {"format", "Num32$format", "func(n:Num32, precision=8 -> Text)"}, + {"scientific", "Num32$scientific", "func(n:Num32, precision=0 -> Text)"}, + {"isinf", "Num32$isinf", "func(n:Num32 -> Bool)"}, + {"isfinite", "Num32$isfinite", "func(n:Num32 -> Bool)"}, + C(2_SQRTPI), C(E), C(PI_2), C(2_PI), C(1_PI), C(LN10), C(LN2), C(LOG2E), + C(PI), C(PI_4), C(SQRT2), C(SQRT1_2), + {"INF", "(Num32_t)(INFINITY)", "Num32"}, + {"TAU", "(Num32_t)(2.f*M_PI)", "Num32"}, + {"mix", "Num32$mix", "func(amount,x,y:Num32 -> Num32)"}, + {"parse", "Num32$parse", "func(text:Text -> Num32?)"}, + {"abs", "fabsf", "func(n:Num32 -> Num32)"}, + {"modulo", "Num32$mod", "func(x,y:Num32 -> Num32)"}, + {"modulo1", "Num32$mod1", "func(x,y:Num32 -> Num32)"}, + F_opt(acos), F_opt(acosh), F_opt(asin), F(asinh), F(atan), F_opt(atanh), + F(cbrt), F(ceil), F_opt(cos), F(cosh), F(erf), F(erfc), + F(exp), F(exp2), F(expm1), F(floor), F(j0), F(j1), F_opt(log), F_opt(log10), F_opt(log1p), + F_opt(log2), F(logb), F(rint), F(round), F(significand), F_opt(sin), F(sinh), F_opt(sqrt), + F_opt(tan), F(tanh), F_opt(tgamma), F(trunc), F_opt(y0), F_opt(y1), + F2(atan2), F2(copysign), F2(fdim), F2(hypot), F2(nextafter), + )}, + {"CString", Type(CStringType), "char*", "CString$info", TypedArray(ns_entry_t, + {"as_text", "CString$as_text_simple", "func(str:CString -> Text)"}, + )}, +#undef F2 +#undef F_opt +#undef F +#undef C + {"Match", MATCH_TYPE, "Match_t", "Match", TypedArray(ns_entry_t, + // No methods + )}, + {"Pattern", Type(TextType, .lang="Pattern", .env=namespace_env(env, "Pattern")), "Pattern_t", "Pattern$info", TypedArray(ns_entry_t, + {"escape_int", "Int$value_as_text", "func(i:Int -> Pattern)"}, + {"escape_text", "Pattern$escape_text", "func(text:Text -> Pattern)"}, + )}, + {"Moment", Type(MomentType), "Moment_t", "Moment", TypedArray(ns_entry_t, + // Used as a default for functions below: + {"now", "Moment$now", "func(->Moment)"}, + + {"after", "Moment$after", "func(moment:Moment,seconds,minutes,hours=0.0,days,weeks,months,years=0,timezone=none:Text -> Moment)"}, + {"date", "Moment$date", "func(moment:Moment,timezone=none:Text -> Text)"}, + {"day_of_month", "Moment$day_of_month", "func(moment:Moment,timezone=none:Text -> Int)"}, + {"day_of_week", "Moment$day_of_week", "func(moment:Moment,timezone=none:Text -> Int)"}, + {"day_of_year", "Moment$day_of_year", "func(moment:Moment,timezone=none:Text -> Int)"}, + {"format", "Moment$format", "func(moment:Moment,format=\"%Y-%m-%dT%H:%M:%S%z\",timezone=none:Text -> Text)"}, + {"from_unix_timestamp", "Moment$from_unix_timestamp", "func(timestamp:Int64 -> Moment)"}, + {"get_local_timezone", "Moment$get_local_timezone", "func(->Text)"}, + {"hour", "Moment$hour", "func(moment:Moment,timezone=none:Text -> Int)"}, + {"hours_till", "Moment$hours_till", "func(now,then:Moment -> Num)"}, + {"minute", "Moment$minute", "func(moment:Moment,timezone=none:Text -> Int)"}, + {"minutes_till", "Moment$minutes_till", "func(now,then:Moment -> Num)"}, + {"month", "Moment$month", "func(moment:Moment,timezone=none:Text -> Int)"}, + {"microsecond", "Moment$microsecond", "func(moment:Moment,timezone=none:Text -> Int)"}, + {"new", "Moment$new", "func(year,month,day:Int,hour,minute=0,second=0.0,timezone=none:Text -> Moment)"}, + {"parse", "Moment$parse", "func(text:Text, format=\"%Y-%m-%dT%H:%M:%S%z\" -> Moment?)"}, + {"relative", "Moment$relative", "func(moment:Moment,relative_to=Moment.now(),timezone=none:Text -> Text)"}, + {"second", "Moment$second", "func(moment:Moment,timezone=none:Text -> Int)"}, + {"seconds_till", "Moment$seconds_till", "func(now:Moment,then:Moment -> Num)"}, + {"set_local_timezone", "Moment$set_local_timezone", "func(timezone=none:Text)"}, + {"time", "Moment$time", "func(moment:Moment,seconds=no,am_pm=yes,timezone=none:Text -> Text)"}, + {"unix_timestamp", "Moment$unix_timestamp", "func(moment:Moment -> Int64)"}, + {"year", "Moment$year", "func(moment:Moment,timezone=none:Text -> Int)"}, + )}, + {"PathType", PATH_TYPE_TYPE, "PathType_t", "PathType$info", TypedArray(ns_entry_t, + {"Relative", "((PathType_t){.$tag=PATH_RELATIVE})", "PathType"}, + {"Absolute", "((PathType_t){.$tag=PATH_ABSOLUTE})", "PathType"}, + {"Home", "((PathType_t){.$tag=PATH_HOME})", "PathType"}, + )}, + {"Path", PATH_TYPE, "Path_t", "Path$info", TypedArray(ns_entry_t, + {"accessed", "Path$accessed", "func(path:Path, follow_symlinks=yes -> Moment?)"}, + {"append", "Path$append", "func(path:Path, text:Text, permissions=Int32(0o644))"}, + {"append_bytes", "Path$append_bytes", "func(path:Path, bytes:[Byte], permissions=Int32(0o644))"}, + {"base_name", "Path$base_name", "func(path:Path -> Text)"}, + {"by_line", "Path$by_line", "func(path:Path -> func(->Text?)?)"}, + {"can_execute", "Path$can_execute", "func(path:Path -> Bool)"}, + {"can_read", "Path$can_read", "func(path:Path -> Bool)"}, + {"can_write", "Path$can_write", "func(path:Path -> Bool)"}, + {"changed", "Path$changed", "func(path:Path, follow_symlinks=yes -> Moment?)"}, + {"child", "Path$with_component", "func(path:Path, child:Text -> Path)"}, + {"children", "Path$children", "func(path:Path, include_hidden=no -> [Path])"}, + {"concatenated_with", "Path$concat", "func(a,b:Path -> Path)"}, + {"create_directory", "Path$create_directory", "func(path:Path, permissions=Int32(0o755))"}, + {"exists", "Path$exists", "func(path:Path -> Bool)"}, + {"expand_home", "Path$expand_home", "func(path:Path -> Path)"}, + {"extension", "Path$extension", "func(path:Path, full=yes -> Text)"}, + {"files", "Path$children", "func(path:Path, include_hidden=no -> [Path])"}, + {"from_components", "Path$from_components", "func(components:[Text] -> Path)"}, + {"glob", "Path$glob", "func(path:Path -> [Path])"}, + {"group", "Path$group", "func(path:Path, follow_symlinks=yes -> Text?)"}, + {"is_directory", "Path$is_directory", "func(path:Path, follow_symlinks=yes -> Bool)"}, + {"is_file", "Path$is_file", "func(path:Path, follow_symlinks=yes -> Bool)"}, + {"is_pipe", "Path$is_pipe", "func(path:Path, follow_symlinks=yes -> Bool)"}, + {"is_socket", "Path$is_socket", "func(path:Path, follow_symlinks=yes -> Bool)"}, + {"is_symlink", "Path$is_symlink", "func(path:Path -> Bool)"}, + {"modified", "Path$modified", "func(path:Path, follow_symlinks=yes -> Moment?)"}, + {"owner", "Path$owner", "func(path:Path, follow_symlinks=yes -> Text?)"}, + {"parent", "Path$parent", "func(path:Path -> Path)"}, + {"read", "Path$read", "func(path:Path -> Text?)"}, + {"read_bytes", "Path$read_bytes", "func(path:Path, limit=none:Int -> [Byte]?)"}, + {"relative_to", "Path$relative_to", "func(path:Path, relative_to:Path -> Path)"}, + {"remove", "Path$remove", "func(path:Path, ignore_missing=no)"}, + {"resolved", "Path$resolved", "func(path:Path, relative_to=(./) -> Path)"}, + {"set_owner", "Path$set_owner", "func(path:Path, owner=none:Text, group=none:Text, follow_symlinks=yes)"}, + {"subdirectories", "Path$children", "func(path:Path, include_hidden=no -> [Path])"}, + {"unique_directory", "Path$unique_directory", "func(path:Path -> Path)"}, + {"write", "Path$write", "func(path:Path, text:Text, permissions=Int32(0o644))"}, + {"write_bytes", "Path$write_bytes", "func(path:Path, bytes:[Byte], permissions=Int32(0o644))"}, + {"write_unique", "Path$write_unique", "func(path:Path, text:Text -> Path)"}, + {"write_unique_bytes", "Path$write_unique_bytes", "func(path:Path, bytes:[Byte] -> Path)"}, + )}, + // RNG must come after Path so we can read bytes from /dev/urandom + {"RNG", RNG_TYPE, "RNG_t", "RNG", TypedArray(ns_entry_t, + {"bool", "RNG$bool", "func(rng:RNG, p=0.5 -> Bool)"}, + {"byte", "RNG$byte", "func(rng:RNG -> Byte)"}, + {"bytes", "RNG$bytes", "func(rng:RNG, count:Int -> [Byte])"}, + {"copy", "RNG$copy", "func(rng:RNG -> RNG)"}, + {"int", "RNG$int", "func(rng:RNG, min,max:Int -> Int)"}, + {"int16", "RNG$int16", "func(rng:RNG, min=Int16.min, max=Int16.max -> Int16)"}, + {"int32", "RNG$int32", "func(rng:RNG, min=Int32.min, max=Int32.max -> Int32)"}, + {"int64", "RNG$int64", "func(rng:RNG, min=Int64.min, max=Int64.max -> Int64)"}, + {"int8", "RNG$int8", "func(rng:RNG, min=Int8.min, max=Int8.max -> Int8)"}, + {"new", "RNG$new", "func(seed=(/dev/urandom):read_bytes(40)! -> RNG)"}, + {"num", "RNG$num", "func(rng:RNG, min=0.0, max=1.0 -> Num)"}, + {"num32", "RNG$num32", "func(rng:RNG, min=Num32(0.0), max=Num32(1.0) -> Num32)"}, + {"set_seed", "RNG$set_seed", "func(rng:RNG, seed:[Byte])"}, + )}, + {"Text", TEXT_TYPE, "Text_t", "Text$info", TypedArray(ns_entry_t, + {"as_c_string", "Text$as_c_string", "func(text:Text -> CString)"}, + {"at", "Text$cluster", "func(text:Text, index:Int -> Text)"}, + {"by_line", "Text$by_line", "func(text:Text -> func(->Text?))"}, + {"by_match", "Text$by_match", "func(text:Text, pattern:Pattern -> func(->Match?))"}, + {"by_split", "Text$by_split", "func(text:Text, pattern=$Pattern'' -> func(->Text?))"}, + {"bytes", "Text$utf8_bytes", "func(text:Text -> [Byte])"}, + {"caseless_equals", "Text$equal_ignoring_case", "func(a,b:Text, language='C' -> Bool)"}, + {"codepoint_names", "Text$codepoint_names", "func(text:Text -> [Text])"}, + {"ends_with", "Text$ends_with", "func(text,suffix:Text -> Bool)"}, + {"each", "Text$each", "func(text:Text, pattern:Pattern, fn:func(match:Match), recursive=yes)"}, + {"find", "Text$find", "func(text:Text, pattern:Pattern, start=1 -> Match?)"}, + {"find_all", "Text$find_all", "func(text:Text, pattern:Pattern -> [Match])"}, + {"from", "Text$from", "func(text:Text, first:Int -> Text)"}, + {"from_bytes", "Text$from_bytes", "func(bytes:[Byte] -> Text?)"}, + {"from_c_string", "Text$from_str", "func(str:CString -> Text?)"}, + {"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text] -> Text?)"}, + {"from_codepoints", "Text$from_codepoints", "func(codepoints:[Int32] -> Text)"}, + {"from_text", "Path$from_text", "func(text:Text -> Path)"}, + {"has", "Text$has", "func(text:Text, pattern:Pattern -> Bool)"}, + {"join", "Text$join", "func(glue:Text, pieces:[Text] -> Text)"}, + {"left_pad", "Text$left_pad", "func(text:Text, count:Int, pad=' ', language='C' -> Text)"}, + {"lines", "Text$lines", "func(text:Text -> [Text])"}, + {"lower", "Text$lower", "func(text:Text, language='C' -> Text)"}, + {"map", "Text$map", "func(text:Text, pattern:Pattern, fn:func(match:Match -> Text), recursive=yes -> Text)"}, + {"matches", "Text$matches", "func(text:Text, pattern:Pattern -> [Text]?)"}, + {"middle_pad", "Text$middle_pad", "func(text:Text, count:Int, pad=' ', language='C' -> Text)"}, + {"quoted", "Text$quoted", "func(text:Text, color=no -> Text)"}, + {"repeat", "Text$repeat", "func(text:Text, count:Int -> Text)"}, + {"replace", "Text$replace", "func(text:Text, pattern:Pattern, replacement:Text, backref=$/\\/, recursive=yes -> Text)"}, + {"replace_all", "Text$replace_all", "func(text:Text, replacements:{Pattern,Text}, backref=$/\\/, recursive=yes -> Text)"}, + {"reversed", "Text$reversed", "func(text:Text -> Text)"}, + {"right_pad", "Text$right_pad", "func(text:Text, count:Int, pad=' ', language='C' -> Text)"}, + {"slice", "Text$slice", "func(text:Text, from=1, to=-1 -> Text)"}, + {"split", "Text$split", "func(text:Text, pattern=$Pattern'' -> [Text])"}, + {"starts_with", "Text$starts_with", "func(text,prefix:Text -> Bool)"}, + {"title", "Text$title", "func(text:Text, language='C' -> Text)"}, + {"to", "Text$to", "func(text:Text, last:Int -> Text)"}, + {"trim", "Text$trim", "func(text:Text, pattern=$/{whitespace}/, trim_left=yes, trim_right=yes -> Text)"}, + {"upper", "Text$upper", "func(text:Text, language='C' -> Text)"}, + {"utf32_codepoints", "Text$utf32_codepoints", "func(text:Text -> [Int32])"}, + {"width", "Text$width", "func(text:Text, language='C' -> Int)"}, + )}, + {"Thread", THREAD_TYPE, "Thread_t", "Thread", TypedArray(ns_entry_t, + {"new", "Thread$new", "func(fn:func() -> Thread)"}, + {"cancel", "Thread$cancel", "func(thread:Thread)"}, + {"join", "Thread$join", "func(thread:Thread)"}, + {"detach", "Thread$detach", "func(thread:Thread)"}, + )}, + }; + + for (size_t i = 0; i < sizeof(global_types)/sizeof(global_types[0]); i++) { + env_t *ns_env = NULL; + switch (global_types[i].type->tag) { + case TextType: + ns_env = Match(global_types[i].type, TextType)->env; + break; + case StructType: + ns_env = Match(global_types[i].type, StructType)->env; + break; + case EnumType: + ns_env = Match(global_types[i].type, EnumType)->env; + break; + default: break; + } + if (ns_env == NULL) ns_env = namespace_env(env, global_types[i].name); + binding_t *binding = new(binding_t, .type=Type(TypeInfoType, .name=global_types[i].name, .type=global_types[i].type, .env=ns_env), + .code=global_types[i].typeinfo); + Table$str_set(env->globals, global_types[i].name, binding); + Table$str_set(env->types, global_types[i].name, global_types[i].type); + } + + for (size_t i = 0; i < sizeof(global_types)/sizeof(global_types[0]); i++) { + binding_t *type_binding = Table$str_get(*env->globals, global_types[i].name); + assert(type_binding); + env_t *ns_env = Match(type_binding->type, TypeInfoType)->env; + for (int64_t j = 0; j < global_types[i].namespace.length; j++) { + ns_entry_t *entry = global_types[i].namespace.data + j*global_types[i].namespace.stride; + type_t *type = parse_type_string(ns_env, entry->type_str); + if (!type) compiler_err(NULL, NULL, NULL, "Couldn't parse type string: %s", entry->type_str); + if (type->tag == ClosureType) type = Match(type, ClosureType)->fn; + set_binding(ns_env, entry->name, type, entry->code); + } + } + + + // Conversion constructors: +#define ADD_CONSTRUCTORS(type_name, ...) do {\ + env_t *ns_env = namespace_env(env, type_name); \ + struct { const char *c_name, *type_str; } constructor_infos[] = {__VA_ARGS__}; \ + for (size_t i = 0; i < sizeof(constructor_infos)/sizeof(constructor_infos[0]); i++) { \ + type_t *t = parse_type_string(ns_env, constructor_infos[i].type_str); \ + Array$insert(&ns_env->namespace->constructors, \ + ((binding_t[1]){{.code=constructor_infos[i].c_name, \ + .type=Match(t, ClosureType)->fn}}), I(0), sizeof(binding_t)); \ + } \ +} while (0) + + ADD_CONSTRUCTORS("Bool", + {"Bool$from_byte", "func(b:Byte -> Bool)"}, + {"Bool$from_int8", "func(i:Int8 -> Bool)"}, + {"Bool$from_int16", "func(i:Int16 -> Bool)"}, + {"Bool$from_int32", "func(i:Int32 -> Bool)"}, + {"Bool$from_int64", "func(i:Int64 -> Bool)"}, + {"Bool$from_int", "func(i:Int -> Bool)"}); + ADD_CONSTRUCTORS("Byte", + {"Byte$from_bool", "func(b:Bool -> Byte)"}, + {"Byte$from_int8", "func(i:Int8 -> Byte)"}, + {"Byte$from_int16", "func(i:Int16, truncate=no -> Byte)"}, + {"Byte$from_int32", "func(i:Int32, truncate=no -> Byte)"}, + {"Byte$from_int64", "func(i:Int64, truncate=no -> Byte)"}, + {"Byte$from_int", "func(i:Int, truncate=no -> Byte)"}); + ADD_CONSTRUCTORS("Int", + {"Int$from_bool", "func(b:Bool -> Int)"}, + {"Int$from_byte", "func(b:Byte -> Int)"}, + {"Int$from_int8", "func(i:Int8 -> Int)"}, + {"Int$from_int16", "func(i:Int16 -> Int)"}, + {"Int$from_int32", "func(i:Int32 -> Int)"}, + {"Int$from_int64", "func(i:Int64 -> Int)"}, + {"Int$from_num", "func(n:Num, truncate=no -> Int)"}, + {"Int$from_num32", "func(n:Num32, truncate=no -> Int)"}); + ADD_CONSTRUCTORS("Int64", + {"Int64$from_bool", "func(b:Bool -> Int64)"}, + {"Int64$from_byte", "func(b:Byte -> Int64)"}, + {"Int64$from_int8", "func(i:Int8 -> Int64)"}, + {"Int64$from_int16", "func(i:Int16 -> Int64)"}, + {"Int64$from_int32", "func(i:Int32 -> Int64)"}, + {"Int64$from_int", "func(i:Int, truncate=no -> Int64)"}, + {"Int64$from_num", "func(n:Num, truncate=no -> Int64)"}, + {"Int64$from_num32", "func(n:Num32, truncate=no -> Int64)"}); + ADD_CONSTRUCTORS("Int32", + {"Int32$from_bool", "func(b:Bool -> Int32)"}, + {"Int32$from_byte", "func(b:Byte -> Int32)"}, + {"Int32$from_int8", "func(i:Int8 -> Int32)"}, + {"Int32$from_int16", "func(i:Int16 -> Int32)"}, + {"Int32$from_int64", "func(i:Int64, truncate=no -> Int32)"}, + {"Int32$from_int", "func(i:Int, truncate=no -> Int32)"}, + {"Int32$from_num", "func(n:Num, truncate=no -> Int32)"}, + {"Int32$from_num32", "func(n:Num32, truncate=no -> Int32)"}); + ADD_CONSTRUCTORS("Int16", + {"Int16$from_bool", "func(b:Bool -> Int16)"}, + {"Int16$from_byte", "func(b:Byte -> Int16)"}, + {"Int16$from_int8", "func(i:Int8 -> Int16)"}, + {"Int16$from_int32", "func(i:Int32, truncate=no -> Int16)"}, + {"Int16$from_int64", "func(i:Int64, truncate=no -> Int16)"}, + {"Int16$from_int", "func(i:Int, truncate=no -> Int16)"}, + {"Int16$from_num", "func(n:Num, truncate=no -> Int16)"}, + {"Int16$from_num32", "func(n:Num32, truncate=no -> Int16)"}); + ADD_CONSTRUCTORS("Int8", + {"Int8$from_bool", "func(b:Bool -> Int8)"}, + {"Int8$from_byte", "func(b:Byte -> Int8)"}, + {"Int8$from_int16", "func(i:Int16, truncate=no -> Int8)"}, + {"Int8$from_int32", "func(i:Int32, truncate=no -> Int8)"}, + {"Int8$from_int64", "func(i:Int64, truncate=no -> Int8)"}, + {"Int8$from_int", "func(i:Int, truncate=no -> Int8)"}, + {"Int8$from_num", "func(n:Num, truncate=no -> Int8)"}, + {"Int8$from_num32", "func(n:Num32, truncate=no -> Int8)"}); + ADD_CONSTRUCTORS("Num", + {"Num$from_bool", "func(b:Bool -> Num)"}, + {"Num$from_byte", "func(b:Byte -> Num)"}, + {"Num$from_int8", "func(i:Int8 -> Num)"}, + {"Num$from_int16", "func(i:Int16 -> Num)"}, + {"Num$from_int32", "func(i:Int32 -> Num)"}, + {"Num$from_int64", "func(i:Int64, truncate=no -> Num)"}, + {"Num$from_int", "func(i:Int, truncate=no -> Num)"}, + {"Num$from_num32", "func(n:Num32 -> Num)"}); + ADD_CONSTRUCTORS("Num32", + {"Num32$from_bool", "func(b:Bool -> Num32)"}, + {"Num32$from_byte", "func(b:Byte -> Num32)"}, + {"Num32$from_int8", "func(i:Int8 -> Num32)"}, + {"Num32$from_int16", "func(i:Int16 -> Num32)"}, + {"Num32$from_int32", "func(i:Int32, truncate=no -> Num32)"}, + {"Num32$from_int64", "func(i:Int64, truncate=no -> Num32)"}, + {"Num32$from_int", "func(i:Int, truncate=no -> Num32)"}, + {"Num32$from_num", "func(n:Num -> Num32)"}); + ADD_CONSTRUCTORS("Pattern", + {"Pattern$escape_text", "func(text:Text -> Pattern)"}, + {"Int$value_as_text", "func(i:Int -> Pattern)"}); + ADD_CONSTRUCTORS("Path", + {"Path$escape_text", "func(text:Text -> Path)"}, + {"Path$escape_path", "func(path:Path -> Path)"}, + {"Int$value_as_text", "func(i:Int -> Path)"}); + ADD_CONSTRUCTORS("CString", {"Text$as_c_string", "func(text:Text -> CString)"}); + ADD_CONSTRUCTORS("Moment", + {"Moment$now", "func(-> Moment)"}, + {"Moment$new", "func(year,month,day:Int,hour,minute=0,second=0.0,timezone=none:Text -> Moment)"}, + {"Moment$from_unix_timestamp", "func(timestamp:Int64 -> Moment)"}); + ADD_CONSTRUCTORS("RNG", {"RNG$new", "func(-> RNG)"}); + ADD_CONSTRUCTORS("Thread", {"Thread$new", "func(fn:func() -> Thread)"}); +#undef ADD_CONSTRUCTORS + + set_binding(namespace_env(env, "Path"), "from_text", + Type(FunctionType, .args=new(arg_t, .name="text", .type=TEXT_TYPE), + .ret=PATH_TYPE), + "Path$from_text"); + + set_binding(namespace_env(env, "Pattern"), "from_text", + Type(FunctionType, .args=new(arg_t, .name="text", .type=TEXT_TYPE), + .ret=Type(TextType, .lang="Pattern", .env=namespace_env(env, "Pattern"))), + "(Pattern_t)"); + + struct { + const char *name, *code, *type_str; + } global_vars[] = { + {"USE_COLOR", "USE_COLOR", "Bool"}, + {"random", "default_rng", "RNG"}, + {"say", "say", "func(text:Text, newline=yes)"}, + {"print", "say", "func(text:Text, newline=yes)"}, + {"ask", "ask", "func(prompt:Text, bold=yes, force_tty=yes -> Text?)"}, + {"exit", "tomo_exit", "func(message=none:Text, code=Int32(1) -> Abort)"}, + {"fail", "fail_text", "func(message:Text -> Abort)"}, + {"sleep", "sleep_num", "func(seconds:Num)"}, + {"now", "Moment$now", "func(->Moment)"}, + }; + + for (size_t i = 0; i < sizeof(global_vars)/sizeof(global_vars[0]); i++) { + type_t *type = parse_type_string(env, global_vars[i].type_str); + if (!type) compiler_err(NULL, NULL, NULL, "Couldn't parse type string for %s: %s", global_vars[i].name, global_vars[i].type_str); + if (type->tag == ClosureType) type = Match(type, ClosureType)->fn; + Table$str_set(env->globals, global_vars[i].name, new(binding_t, .type=type, .code=global_vars[i].code)); + } + + _global_env = env; + return env; +} + +CORD namespace_prefix(env_t *env, namespace_t *ns) +{ + CORD prefix = CORD_EMPTY; + for (; ns; ns = ns->parent) + prefix = CORD_all(ns->name, "$", prefix); + if (env->locals != env->globals) { + if (env->libname) + prefix = CORD_all("_$", env->libname, "$", prefix); + else + prefix = CORD_all("_$", prefix); + } + return prefix; +} + +env_t *load_module_env(env_t *env, ast_t *ast) +{ + const char *name = ast->file->filename; + env_t *cached = Table$str_get(*env->imports, name); + if (cached) return cached; + env_t *module_env = fresh_scope(env); + module_env->code = new(compilation_unit_t); + module_env->namespace = new(namespace_t, .name=file_base_id(name)); + module_env->namespace_bindings = module_env->locals; + Table$str_set(module_env->imports, name, module_env); + + ast_list_t *statements = Match(ast, Block)->statements; + visit_topologically(statements, (Closure_t){.fn=(void*)prebind_statement, .userdata=module_env}); + visit_topologically(statements, (Closure_t){.fn=(void*)bind_statement, .userdata=module_env}); + + return module_env; +} + +env_t *namespace_scope(env_t *env) +{ + env_t *scope = new(env_t); + *scope = *env; + scope->locals = new(Table_t, .fallback=env->namespace_bindings ? env->namespace_bindings : env->globals); + return scope; +} + +env_t *fresh_scope(env_t *env) +{ + env_t *scope = new(env_t); + *scope = *env; + scope->locals = new(Table_t, .fallback=env->locals); + return scope; +} + +env_t *with_enum_scope(env_t *env, type_t *t) +{ + while (t->tag == OptionalType) + t = Match(t, OptionalType)->type; + + if (t->tag != EnumType) return env; + env = fresh_scope(env); + env_t *ns_env = Match(t, EnumType)->env; + for (tag_t *tag = Match(t, EnumType)->tags; tag; tag = tag->next) { + if (get_binding(env, tag->name)) + continue; + binding_t *b = get_binding(ns_env, tag->name); + assert(b); + Table$str_set(env->locals, tag->name, b); + } + return env; +} + + +env_t *for_scope(env_t *env, ast_t *ast) +{ + auto for_ = Match(ast, For); + type_t *iter_t = value_type(get_type(env, for_->iter)); + env_t *scope = fresh_scope(env); + + switch (iter_t->tag) { + case ArrayType: { + type_t *item_t = Match(iter_t, ArrayType)->item_type; + const char *vars[2] = {}; + int64_t num_vars = 0; + for (ast_list_t *var = for_->vars; var; var = var->next) { + if (num_vars >= 2) + code_err(var->ast, "This is too many variables for this loop"); + vars[num_vars++] = Match(var->ast, Var)->name; + } + if (num_vars == 1) { + set_binding(scope, vars[0], item_t, CORD_cat("_$", vars[0])); + } else if (num_vars == 2) { + set_binding(scope, vars[0], INT_TYPE, CORD_cat("_$", vars[0])); + set_binding(scope, vars[1], item_t, CORD_cat("_$", vars[1])); + } + return scope; + } + case SetType: { + if (for_->vars) { + if (for_->vars->next) + code_err(for_->vars->next->ast, "This is too many variables for this loop"); + type_t *item_type = Match(iter_t, SetType)->item_type; + const char *name = Match(for_->vars->ast, Var)->name; + set_binding(scope, name, item_type, CORD_cat("_$", name)); + } + return scope; + } + case TableType: { + const char *vars[2] = {}; + int64_t num_vars = 0; + for (ast_list_t *var = for_->vars; var; var = var->next) { + if (num_vars >= 2) + code_err(var->ast, "This is too many variables for this loop"); + vars[num_vars++] = Match(var->ast, Var)->name; + } + + type_t *key_t = Match(iter_t, TableType)->key_type; + if (num_vars == 1) { + set_binding(scope, vars[0], key_t, CORD_cat("_$", vars[0])); + } else if (num_vars == 2) { + set_binding(scope, vars[0], key_t, CORD_cat("_$", vars[0])); + type_t *value_t = Match(iter_t, TableType)->value_type; + set_binding(scope, vars[1], value_t, CORD_cat("_$", vars[1])); + } + return scope; + } + case BigIntType: { + if (for_->vars) { + if (for_->vars->next) + code_err(for_->vars->next->ast, "This is too many variables for this loop"); + const char *var = Match(for_->vars->ast, Var)->name; + set_binding(scope, var, INT_TYPE, CORD_cat("_$", var)); + } + return scope; + } + case FunctionType: case ClosureType: { + auto fn = iter_t->tag == ClosureType ? Match(Match(iter_t, ClosureType)->fn, FunctionType) : Match(iter_t, FunctionType); + // if (fn->ret->tag != OptionalType) + // code_err(for_->iter, "Iterator functions must return an optional type, not %T", fn->ret); + + if (for_->vars) { + if (for_->vars->next) + code_err(for_->vars->next->ast, "This is too many variables for this loop"); + const char *var = Match(for_->vars->ast, Var)->name; + type_t *non_opt_type = fn->ret->tag == OptionalType ? Match(fn->ret, OptionalType)->type : fn->ret; + set_binding(scope, var, non_opt_type, CORD_cat("_$", var)); + } + return scope; + } + default: code_err(for_->iter, "Iteration is not implemented for type: %T", iter_t); + } +} + +env_t *get_namespace_by_type(env_t *env, type_t *t) +{ + t = value_type(t); + switch (t->tag) { + case ArrayType: return NULL; + case TableType: return NULL; + case CStringType: case MomentType: + case BoolType: case IntType: case BigIntType: case NumType: case ByteType: { + binding_t *b = get_binding(env, CORD_to_const_char_star(type_to_cord(t))); + assert(b); + return Match(b->type, TypeInfoType)->env; + } + case TextType: return Match(t, TextType)->env; + case StructType: { + auto struct_ = Match(t, StructType); + return struct_->env; + } + case EnumType: { + auto enum_ = Match(t, EnumType); + return enum_->env; + } + case TypeInfoType: { + auto info = Match(t, TypeInfoType); + return info->env; + } + default: break; + } + return NULL; +} + +env_t *namespace_env(env_t *env, const char *namespace_name) +{ + binding_t *b = get_binding(env, namespace_name); + if (b) + return Match(b->type, TypeInfoType)->env; + + env_t *ns_env = new(env_t); + *ns_env = *env; + ns_env->locals = new(Table_t, .fallback=env->locals); + ns_env->namespace = new(namespace_t, .name=namespace_name, .parent=env->namespace); + ns_env->namespace_bindings = ns_env->locals; + return ns_env; +} + +PUREFUNC binding_t *get_binding(env_t *env, const char *name) +{ + return Table$str_get(*env->locals, name); +} + +binding_t *get_namespace_binding(env_t *env, ast_t *self, const char *name) +{ + type_t *self_type = get_type(env, self); + if (!self_type) + code_err(self, "I couldn't get this type"); + env_t *ns_env = get_namespace_by_type(env, self_type); + return ns_env ? get_binding(ns_env, name) : NULL; +} + +PUREFUNC binding_t *get_constructor(env_t *env, type_t *t, arg_ast_t *args) +{ + env_t *type_env = get_namespace_by_type(env, t); + if (!type_env) return NULL; + Array_t constructors = type_env->namespace->constructors; + // Prioritize exact matches: + for (int64_t i = constructors.length-1; i >= 0; i--) { + binding_t *b = constructors.data + i*constructors.stride; + auto fn = Match(b->type, FunctionType); + if (type_eq(fn->ret, t) && is_valid_call(env, fn->args, args, false)) + return b; + } + // Fall back to promotion: + for (int64_t i = constructors.length-1; i >= 0; i--) { + binding_t *b = constructors.data + i*constructors.stride; + auto fn = Match(b->type, FunctionType); + if (type_eq(fn->ret, t) && is_valid_call(env, fn->args, args, true)) + return b; + } + return NULL; +} + +void set_binding(env_t *env, const char *name, type_t *type, CORD code) +{ + assert(name); + Table$str_set(env->locals, name, new(binding_t, .type=type, .code=code)); +} + +__attribute__((format(printf, 4, 5))) +_Noreturn void compiler_err(file_t *f, const char *start, const char *end, const char *fmt, ...) +{ + if (isatty(STDERR_FILENO) && !getenv("NO_COLOR")) + fputs("\x1b[31;7;1m", stderr); + if (f && start && end) + fprintf(stderr, "%s:%ld.%ld: ", f->relative_filename, get_line_number(f, start), + get_line_column(f, start)); + va_list args; + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + if (isatty(STDERR_FILENO) && !getenv("NO_COLOR")) + fputs(" \x1b[m", stderr); + fputs("\n\n", stderr); + if (f && start && end) + highlight_error(f, start, end, "\x1b[31;1m", 2, isatty(STDERR_FILENO) && !getenv("NO_COLOR")); + + if (getenv("TOMO_STACKTRACE")) + print_stack_trace(stderr, 1, 3); + + raise(SIGABRT); + exit(1); +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/src/environment.h b/src/environment.h new file mode 100644 index 00000000..fbd75c8e --- /dev/null +++ b/src/environment.h @@ -0,0 +1,81 @@ +#pragma once + +// Compilation environments + +#include <gc/cord.h> + +#include "types.h" +#include "stdlib/tables.h" + +typedef struct { + CORD local_typedefs; + CORD staticdefs; + CORD lambdas; + CORD variable_initializers; + CORD function_naming; +} compilation_unit_t; + +typedef struct deferral_s { + struct deferral_s *next; + struct env_s *defer_env; + ast_t *block; +} deferral_t; + +typedef struct loop_ctx_s { + struct loop_ctx_s *next; + const char *loop_name; + ast_list_t *loop_vars; + deferral_t *deferred; + CORD skip_label, stop_label; +} loop_ctx_t; + +typedef struct namespace_s { + const char *name; + Array_t constructors; + struct namespace_s *parent; +} namespace_t; + +typedef struct env_s { + Table_t *types, *globals, *namespace_bindings, *locals; + // Lookup table for env_t* where the key is: + // - Resolved path for local imports (so that `use ./foo.tm` is the same as `use ./baz/../foo.tm`) + // - Raw 'use' string for module imports + Table_t *imports; + compilation_unit_t *code; + type_t *fn_ret; + loop_ctx_t *loop_ctx; + deferral_t *deferred; + CORD libname; // Currently compiling library name (if any) + namespace_t *namespace; + Closure_t *comprehension_action; +} env_t; + +typedef struct { + type_t *type; + CORD code; +} binding_t; + +env_t *global_env(void); +env_t *load_module_env(env_t *env, ast_t *ast); +CORD namespace_prefix(env_t *env, namespace_t *ns); +env_t *get_namespace_by_type(env_t *env, type_t *t); +env_t *namespace_scope(env_t *env); +env_t *fresh_scope(env_t *env); +env_t *for_scope(env_t *env, ast_t *ast); +env_t *with_enum_scope(env_t *env, type_t *t); +env_t *namespace_env(env_t *env, const char *namespace_name); +__attribute__((format(printf, 4, 5))) +_Noreturn void compiler_err(file_t *f, const char *start, const char *end, const char *fmt, ...); +binding_t *get_binding(env_t *env, const char *name); +binding_t *get_constructor(env_t *env, type_t *t, arg_ast_t *args); +void set_binding(env_t *env, const char *name, type_t *type, CORD code); +binding_t *get_namespace_binding(env_t *env, ast_t *self, const char *name); +#define code_err(ast, ...) compiler_err((ast)->file, (ast)->start, (ast)->end, __VA_ARGS__) +extern type_t *TEXT_TYPE; +extern type_t *MATCH_TYPE; +extern type_t *RNG_TYPE; +extern type_t *PATH_TYPE; +extern type_t *PATH_TYPE_TYPE; +extern type_t *THREAD_TYPE; + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/src/parse.c b/src/parse.c new file mode 100644 index 00000000..325131e5 --- /dev/null +++ b/src/parse.c @@ -0,0 +1,2622 @@ +// Recursive descent parser for parsing code +#include <ctype.h> +#include <gc.h> +#include <math.h> +#include <setjmp.h> +#include <stdarg.h> +#include <stdbool.h> +#include <string.h> +#include <unistr.h> +#include <unictype.h> +#include <uniname.h> +#include <signal.h> + +#include "ast.h" +#include "cordhelpers.h" +#include "stdlib/integers.h" +#include "stdlib/patterns.h" +#include "stdlib/tables.h" +#include "stdlib/text.h" +#include "stdlib/util.h" + +// The cache of {filename -> parsed AST} will hold at most this many entries: +#ifndef PARSE_CACHE_SIZE +#define PARSE_CACHE_SIZE 100 +#endif + +static const double RADIANS_PER_DEGREE = 0.0174532925199432957692369076848861271344287188854172545609719144; +static const char closing[128] = {['(']=')', ['[']=']', ['<']='>', ['{']='}'}; + +typedef struct { + file_t *file; + jmp_buf *on_err; + int64_t next_lambda_id; +} parse_ctx_t; + +#define SPACES_PER_INDENT 4 + +#define PARSER(name) ast_t *name(parse_ctx_t *ctx, const char *pos) + +int op_tightness[] = { + [BINOP_POWER]=9, + [BINOP_MULT]=8, [BINOP_DIVIDE]=8, [BINOP_MOD]=8, [BINOP_MOD1]=8, + [BINOP_PLUS]=7, [BINOP_MINUS]=7, + [BINOP_CONCAT]=6, + [BINOP_LSHIFT]=5, [BINOP_RSHIFT]=5, + [BINOP_MIN]=4, [BINOP_MAX]=4, + [BINOP_EQ]=3, [BINOP_NE]=3, + [BINOP_LT]=2, [BINOP_LE]=2, [BINOP_GT]=2, [BINOP_GE]=2, + [BINOP_CMP]=2, + [BINOP_AND]=1, [BINOP_OR]=1, [BINOP_XOR]=1, +}; + +static const char *keywords[] = { + "yes", "xor", "while", "when", "use", "unless", "struct", "stop", "skip", "return", + "or", "not", "none", "no", "mutexed", "mod1", "mod", "pass", "lang", "inline", "in", "if", "holding", + "func", "for", "extern", "enum", "do_end", "else", "do", "deserialize", "defer", "do_begin", "and", + "_min_", "_max_", NULL, +}; + +enum {NORMAL_FUNCTION=0, EXTERN_FUNCTION=1}; + +static INLINE size_t some_of(const char **pos, const char *allow); +static INLINE size_t some_not(const char **pos, const char *forbid); +static INLINE size_t spaces(const char **pos); +static INLINE void whitespace(const char **pos); +static INLINE size_t match(const char **pos, const char *target); +static void expect_str(parse_ctx_t *ctx, const char *start, const char **pos, const char *target, const char *fmt, ...); +static void expect_closing(parse_ctx_t *ctx, const char **pos, const char *target, const char *fmt, ...); +static INLINE size_t match_word(const char **pos, const char *word); +static INLINE const char* get_word(const char **pos); +static INLINE const char* get_id(const char **pos); +static INLINE bool comment(const char **pos); +static INLINE bool indent(parse_ctx_t *ctx, const char **pos); +static INLINE binop_e match_binary_operator(const char **pos); +static ast_t *parse_comprehension_suffix(parse_ctx_t *ctx, ast_t *expr); +static ast_t *parse_field_suffix(parse_ctx_t *ctx, ast_t *lhs); +static ast_t *parse_fncall_suffix(parse_ctx_t *ctx, ast_t *fn); +static ast_t *parse_index_suffix(parse_ctx_t *ctx, ast_t *lhs); +static ast_t *parse_method_call_suffix(parse_ctx_t *ctx, ast_t *self); +static ast_t *parse_non_optional_suffix(parse_ctx_t *ctx, ast_t *lhs); +static ast_t *parse_optional_conditional_suffix(parse_ctx_t *ctx, ast_t *stmt); +static ast_t *parse_optional_suffix(parse_ctx_t *ctx, ast_t *lhs); +static arg_ast_t *parse_args(parse_ctx_t *ctx, const char **pos); +static type_ast_t *parse_array_type(parse_ctx_t *ctx, const char *pos); +static type_ast_t *parse_func_type(parse_ctx_t *ctx, const char *pos); +static type_ast_t *parse_non_optional_type(parse_ctx_t *ctx, const char *pos); +static type_ast_t *parse_pointer_type(parse_ctx_t *ctx, const char *pos); +static type_ast_t *parse_mutexed_type(parse_ctx_t *ctx, const char *pos); +static type_ast_t *parse_set_type(parse_ctx_t *ctx, const char *pos); +static type_ast_t *parse_table_type(parse_ctx_t *ctx, const char *pos); +static type_ast_t *parse_type(parse_ctx_t *ctx, const char *pos); +static type_ast_t *parse_type_name(parse_ctx_t *ctx, const char *pos); +static PARSER(parse_array); +static PARSER(parse_assignment); +static PARSER(parse_block); +static PARSER(parse_bool); +static PARSER(parse_convert_def); +static PARSER(parse_declaration); +static PARSER(parse_defer); +static PARSER(parse_do); +static PARSER(parse_doctest); +static PARSER(parse_enum_def); +static PARSER(parse_expr); +static PARSER(parse_extended_expr); +static PARSER(parse_extern); +static PARSER(parse_file_body); +static PARSER(parse_for); +static PARSER(parse_func_def); +static PARSER(parse_heap_alloc); +static PARSER(parse_holding); +static PARSER(parse_if); +static PARSER(parse_inline_c); +static PARSER(parse_int); +static PARSER(parse_moment); +static PARSER(parse_lambda); +static PARSER(parse_lang_def); +static PARSER(parse_mutexed); +static PARSER(parse_namespace); +static PARSER(parse_negative); +static PARSER(parse_not); +static PARSER(parse_none); +static PARSER(parse_num); +static PARSER(parse_parens); +static PARSER(parse_pass); +static PARSER(parse_path); +static PARSER(parse_reduction); +static PARSER(parse_repeat); +static PARSER(parse_return); +static PARSER(parse_say); +static PARSER(parse_set); +static PARSER(parse_skip); +static PARSER(parse_stack_reference); +static PARSER(parse_statement); +static PARSER(parse_stop); +static PARSER(parse_struct_def); +static PARSER(parse_table); +static PARSER(parse_term); +static PARSER(parse_term_no_suffix); +static PARSER(parse_text); +static PARSER(parse_update); +static PARSER(parse_use); +static PARSER(parse_var); +static PARSER(parse_when); +static PARSER(parse_while); +static PARSER(parse_deserialize); + +// +// Print a parse error and exit (or use the on_err longjmp) +// +__attribute__((noreturn, format(printf, 4, 0))) +static _Noreturn void vparser_err(parse_ctx_t *ctx, const char *start, const char *end, const char *fmt, va_list args) { + if (isatty(STDERR_FILENO) && !getenv("NO_COLOR")) + fputs("\x1b[31;1;7m", stderr); + fprintf(stderr, "%s:%ld.%ld: ", ctx->file->relative_filename, get_line_number(ctx->file, start), + get_line_column(ctx->file, start)); + vfprintf(stderr, fmt, args); + if (isatty(STDERR_FILENO) && !getenv("NO_COLOR")) + fputs(" \x1b[m", stderr); + fputs("\n\n", stderr); + + highlight_error(ctx->file, start, end, "\x1b[31;1;7m", 2, isatty(STDERR_FILENO) && !getenv("NO_COLOR")); + fputs("\n", stderr); + + if (getenv("TOMO_STACKTRACE")) + print_stack_trace(stderr, 1, 3); + + if (ctx->on_err) + longjmp(*ctx->on_err, 1); + + raise(SIGABRT); + exit(1); +} + +// +// Wrapper for vparser_err +// +__attribute__((noreturn, format(printf, 4, 5))) +static _Noreturn void parser_err(parse_ctx_t *ctx, const char *start, const char *end, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + vparser_err(ctx, start, end, fmt, args); + va_end(args); +} + +// +// Convert an escape sequence like \n to a string +// +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstack-protector" +static const char *unescape(parse_ctx_t *ctx, const char **out) { + const char **endpos = out; + const char *escape = *out; + static const char *unescapes[256] = {['a']="\a",['b']="\b",['e']="\x1b",['f']="\f",['n']="\n",['r']="\r",['t']="\t",['v']="\v",['_']=" "}; + assert(*escape == '\\'); + if (unescapes[(int)escape[1]]) { + *endpos = escape + 2; + return GC_strdup(unescapes[(int)escape[1]]); + } else if (escape[1] == '[') { + // ANSI Control Sequence Indicator: \033 [ ... m + size_t len = strcspn(&escape[2], "\r\n]"); + if (escape[2+len] != ']') + parser_err(ctx, escape, escape + 2 + len, "Missing closing ']'"); + *endpos = escape + 3 + len; + return heap_strf("\033[%.*sm", len, &escape[2]); + } else if (escape[1] == '{') { + // Unicode codepoints by name + size_t len = strcspn(&escape[2], "\r\n}"); + if (escape[2+len] != '}') + parser_err(ctx, escape, escape + 2 + len, "Missing closing '}'"); + char name[len+1]; + memcpy(name, &escape[2], len); + name[len] = '\0'; + uint32_t codepoint = unicode_name_character(name); + if (codepoint == UNINAME_INVALID) + parser_err(ctx, escape, escape + 3 + len, + "Invalid unicode codepoint name: \"%s\"", name); + *endpos = escape + 3 + len; + char *str = GC_MALLOC_ATOMIC(16); + size_t u8_len = 16; + (void)u32_to_u8(&codepoint, 1, (uint8_t*)str, &u8_len); + str[u8_len] = '\0'; + return str; + } else if (escape[1] == 'U' && escape[2]) { + // Unicode codepoints by hex + char *endptr = NULL; + long codepoint = strtol(escape+2, &endptr, 16); + uint32_t ustr[2] = {codepoint, 0}; + size_t bufsize = 8; + uint8_t buf[bufsize]; + (void)u32_to_u8(ustr, bufsize, buf, &bufsize); + *endpos = endptr; + return GC_strndup((char*)buf, bufsize); + } else if (escape[1] == 'x' && escape[2] && escape[3]) { + // ASCII 2-digit hex + char *endptr = NULL; + char c = (char)strtol(escape+2, &endptr, 16); + *endpos = escape + 4; + return GC_strndup(&c, 1); + } else if ('0' <= escape[1] && escape[1] <= '7' && '0' <= escape[2] && escape[2] <= '7' && '0' <= escape[3] && escape[3] <= '7') { + char *endptr = NULL; + char c = (char)strtol(escape+1, &endptr, 8); + *endpos = escape + 4; + return GC_strndup(&c, 1); + } else { + *endpos = escape + 2; + return GC_strndup(escape+1, 1); + } +} +#pragma GCC diagnostic pop + +// Indent is in number of spaces (assuming that \t is 4 spaces) +PUREFUNC static INLINE int64_t get_indent(parse_ctx_t *ctx, const char *pos) +{ + int64_t line_num = get_line_number(ctx->file, pos); + const char *line = get_line(ctx->file, line_num); + if (*line == ' ') { + int64_t spaces = (int64_t)strspn(line, " "); + if (line[spaces] == '\t') + parser_err(ctx, line + spaces, line + spaces + 1, "This is a tab following spaces, and you can't mix tabs and spaces"); + return spaces; + } else if (*line == '\t') { + int64_t indent = (int64_t)strspn(line, "\t"); + if (line[indent] == ' ') + parser_err(ctx, line + indent, line + indent + 1, "This is a space following tabs, and you can't mix tabs and spaces"); + return indent * SPACES_PER_INDENT; + } else { + return 0; + } +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////// Text-based parsing primitives /////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////// +size_t some_of(const char **pos, const char *allow) { + size_t len = strspn(*pos, allow); + *pos += len; + return len; +} + +size_t some_not(const char **pos, const char *forbid) { + size_t len = strcspn(*pos, forbid); + *pos += len; + return len; +} + +size_t spaces(const char **pos) { + return some_of(pos, " \t"); +} + +void whitespace(const char **pos) { + while (some_of(pos, " \t\r\n") || comment(pos)) + continue; +} + +size_t match(const char **pos, const char *target) { + size_t len = strlen(target); + if (strncmp(*pos, target, len) != 0) + return 0; + *pos += len; + return len; +} + +static INLINE bool is_xid_continue_next(const char *pos) { + ucs4_t point = 0; + u8_next(&point, (const uint8_t*)pos); + return uc_is_property_xid_continue(point); +} + +// +// Expect a string (potentially after whitespace) and emit a parser error if it's not there +// +__attribute__((format(printf, 5, 6))) +static void expect_str( + parse_ctx_t *ctx, const char *start, const char **pos, const char *target, const char *fmt, ...) { + spaces(pos); + if (match(pos, target)) { + char lastchar = target[strlen(target)-1]; + if (!(isalpha(lastchar) || isdigit(lastchar) || lastchar == '_')) + return; + if (!is_xid_continue_next(*pos)) + return; + } + + if (isatty(STDERR_FILENO) && !getenv("NO_COLOR")) + fputs("\x1b[31;1;7m", stderr); + va_list args; + va_start(args, fmt); + vparser_err(ctx, start, *pos, fmt, args); + va_end(args); +} + +// +// Helper for matching closing parens with good error messages +// +__attribute__((format(printf, 4, 5))) +static void expect_closing( + parse_ctx_t *ctx, const char **pos, const char *close_str, const char *fmt, ...) { + const char *start = *pos; + spaces(pos); + if (match(pos, close_str)) + return; + + const char *eol = strchr(*pos, '\n'); + const char *next = strstr(*pos, close_str); + + const char *end = eol < next ? eol : next; + + if (isatty(STDERR_FILENO) && !getenv("NO_COLOR")) + fputs("\x1b[31;1;7m", stderr); + va_list args; + va_start(args, fmt); + vparser_err(ctx, start, end, fmt, args); + va_end(args); +} + +#define expect(ctx, start, pos, parser, ...) ({ \ + const char **_pos = pos; \ + spaces(_pos); \ + auto _result = parser(ctx, *_pos); \ + if (!_result) { \ + if (isatty(STDERR_FILENO) && !getenv("NO_COLOR")) \ + fputs("\x1b[31;1;7m", stderr); \ + parser_err(ctx, start, *_pos, __VA_ARGS__); \ + } \ + *_pos = _result->end; \ + _result; }) + +#define optional(ctx, pos, parser) ({ \ + const char **_pos = pos; \ + spaces(_pos); \ + auto _result = parser(ctx, *_pos); \ + if (_result) *_pos = _result->end; \ + _result; }) + +size_t match_word(const char **out, const char *word) { + const char *pos = *out; + spaces(&pos); + if (!match(&pos, word) || is_xid_continue_next(pos)) + return 0; + + *out = pos; + return strlen(word); +} + +const char *get_word(const char **inout) { + const char *word = *inout; + spaces(&word); + const uint8_t *pos = (const uint8_t*)word; + ucs4_t point; + pos = u8_next(&point, pos); + if (!uc_is_property_xid_start(point) && point != '_') + return NULL; + + for (const uint8_t *next; (next = u8_next(&point, pos)); pos = next) { + if (!uc_is_property_xid_continue(point)) + break; + } + *inout = (const char*)pos; + return GC_strndup(word, (size_t)((const char*)pos - word)); +} + +const char *get_id(const char **inout) { + const char *pos = *inout; + const char *word = get_word(&pos); + if (!word) return word; + for (int i = 0; keywords[i]; i++) + if (strcmp(word, keywords[i]) == 0) + return NULL; + *inout = pos; + return word; +} + +bool comment(const char **pos) { + if ((*pos)[0] == '#') { + *pos += strcspn(*pos, "\r\n"); + return true; + } else { + return false; + } +} + +bool indent(parse_ctx_t *ctx, const char **out) { + const char *pos = *out; + int64_t starting_indent = get_indent(ctx, pos); + whitespace(&pos); + const char *next_line = get_line(ctx->file, get_line_number(ctx->file, pos)); + if (next_line <= *out) + return false; + + if (get_indent(ctx, next_line) != starting_indent + SPACES_PER_INDENT) + return false; + + *out = next_line + strspn(next_line, " \t"); + return true; +} + +bool newline_with_indentation(const char **out, int64_t target) { + const char *pos = *out; + if (*pos == '\r') ++pos; + if (*pos != '\n') return false; + ++pos; + if (*pos == '\r' || *pos == '\n' || *pos == '\0') { + // Empty line + *out = pos; + return true; + } + + if (*pos == ' ') { + if ((int64_t)strspn(pos, " ") >= target) { + *out = pos + target; + return true; + } + } else if ((int64_t)strspn(pos, "\t") * SPACES_PER_INDENT >= target) { + *out = pos + target / SPACES_PER_INDENT; + return true; + } + return false; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////// AST-based parsers ///////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////////// + +PARSER(parse_parens) { + const char *start = pos; + spaces(&pos); + if (!match(&pos, "(")) return NULL; + whitespace(&pos); + ast_t *expr = optional(ctx, &pos, parse_extended_expr); + if (!expr) return NULL; + whitespace(&pos); + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this expression"); + + // Update the span to include the parens: + return new(ast_t, .file=(ctx)->file, .start=start, .end=pos, + .tag=expr->tag, .__data=expr->__data); +} + +PARSER(parse_int) { + const char *start = pos; + (void)match(&pos, "-"); + if (!isdigit(*pos)) return NULL; + if (match(&pos, "0x")) { // Hex + pos += strspn(pos, "0123456789abcdefABCDEF_"); + } else if (match(&pos, "0b")) { // Binary + pos += strspn(pos, "01_"); + } else if (match(&pos, "0o")) { // Octal + pos += strspn(pos, "01234567_"); + } else { // Decimal + pos += strspn(pos, "0123456789_"); + } + char *str = GC_MALLOC_ATOMIC((size_t)(pos - start) + 1); + memset(str, 0, (size_t)(pos - start) + 1); + for (char *src = (char*)start, *dest = str; src < pos; ++src) { + if (*src != '_') *(dest++) = *src; + } + + if (match(&pos, "e") || match(&pos, "f")) // floating point literal + return NULL; + + if (match(&pos, "%")) { + double n = strtod(str, NULL) / 100.; + return NewAST(ctx->file, start, pos, Num, .n=n); + } else if (match(&pos, "deg")) { + double n = strtod(str, NULL) * RADIANS_PER_DEGREE; + return NewAST(ctx->file, start, pos, Num, .n=n); + } + + return NewAST(ctx->file, start, pos, Int, .str=str); +} + +PARSER(parse_moment) { + const char *start = pos; + bool negative = match(&pos, "-"); + if (!isdigit(*pos)) return NULL; + + struct tm info = {.tm_isdst=-1}; + char *after = strptime(pos, "%Y-%m-%d", &info); + if (!after) return NULL; + if (negative) info.tm_year = -(info.tm_year + 1900) - 1900; + pos = after; + if (match(&pos, "T") || spaces(&pos) >= 1) { + after = strptime(pos, "%H:%M", &info); + if (after) { + pos = after; + after = strptime(pos, ":%S", &info); + if (after) pos = after; + // TODO parse nanoseconds + } + } + + const char *before_spaces = pos; + spaces(&pos); + Moment_t moment; + if (match(&pos, "[")) { + size_t tz_len = strcspn(pos, "\r\n]"); + const char *tz = heap_strf("%.*s", tz_len, pos); + // TODO: check that tz is a valid timezone + pos += tz_len; + expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this moment timezone"); + const char *old_tz = getenv("TZ"); + setenv("TZ", tz, 1); + tzset(); + moment = (Moment_t){.tv_sec=mktime(&info)}; + if (old_tz) setenv("TZ", old_tz, 1); + else unsetenv("TZ"); + } else if (*pos == 'Z' || *pos == '-' || *pos == '+') { + after = strptime(pos, "%z", &info); + if (after) { + pos = after; + long offset = info.tm_gmtoff; // Need to cache this because mktime() mutates it to local timezone >:( + time_t t = mktime(&info); + moment = (Moment_t){.tv_sec=t + offset - info.tm_gmtoff}; + } else { + moment = (Moment_t){.tv_sec=mktime(&info)}; + } + } else { + pos = before_spaces; + moment = (Moment_t){.tv_sec=mktime(&info)}; + } + + return NewAST(ctx->file, start, pos, Moment, .moment=moment); +} + +type_ast_t *parse_table_type(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + if (!match(&pos, "{")) return NULL; + whitespace(&pos); + type_ast_t *key_type = parse_type(ctx, pos); + if (!key_type) return NULL; + pos = key_type->end; + whitespace(&pos); + type_ast_t *value_type = NULL; + ast_t *default_value = NULL; + if (match(&pos, ",")) { + value_type = expect(ctx, start, &pos, parse_type, "I couldn't parse the rest of this table type"); + } else if (match(&pos, "=")) { + default_value = expect(ctx, start, &pos, parse_extended_expr, "I couldn't parse the rest of this table type"); + } else { + return NULL; + } + whitespace(&pos); + expect_closing(ctx, &pos, "}", "I wasn't able to parse the rest of this table type"); + return NewTypeAST(ctx->file, start, pos, TableTypeAST, .key=key_type, .value=value_type, .default_value=default_value); +} + +type_ast_t *parse_set_type(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + if (!match(&pos, "{")) return NULL; + whitespace(&pos); + type_ast_t *item_type = parse_type(ctx, pos); + if (!item_type) return NULL; + pos = item_type->end; + whitespace(&pos); + if (match(&pos, ",")) return NULL; + expect_closing(ctx, &pos, "}", "I wasn't able to parse the rest of this set type"); + return NewTypeAST(ctx->file, start, pos, SetTypeAST, .item=item_type); +} + +type_ast_t *parse_func_type(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + if (!match_word(&pos, "func")) return NULL; + spaces(&pos); + if (!match(&pos, "(")) return NULL; + arg_ast_t *args = parse_args(ctx, &pos); + spaces(&pos); + type_ast_t *ret = match(&pos, "->") ? optional(ctx, &pos, parse_type) : NULL; + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this function type"); + return NewTypeAST(ctx->file, start, pos, FunctionTypeAST, .args=args, .ret=ret); +} + +type_ast_t *parse_array_type(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + if (!match(&pos, "[")) return NULL; + type_ast_t *type = expect(ctx, start, &pos, parse_type, + "I couldn't parse an array item type after this point"); + expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this array type"); + return NewTypeAST(ctx->file, start, pos, ArrayTypeAST, .item=type); +} + +type_ast_t *parse_pointer_type(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + bool is_stack; + if (match(&pos, "@")) + is_stack = false; + else if (match(&pos, "&")) + is_stack = true; + else + return NULL; + + spaces(&pos); + type_ast_t *type = expect(ctx, start, &pos, parse_non_optional_type, + "I couldn't parse a pointer type after this point"); + type_ast_t *ptr_type = NewTypeAST(ctx->file, start, pos, PointerTypeAST, .pointed=type, .is_stack=is_stack); + spaces(&pos); + while (match(&pos, "?")) + ptr_type = NewTypeAST(ctx->file, start, pos, OptionalTypeAST, .type=ptr_type); + return ptr_type; +} + +type_ast_t *parse_mutexed_type(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + if (!match_word(&pos, "mutexed")) return NULL; + spaces(&pos); + if (!match(&pos, "(")) return NULL; + spaces(&pos); + type_ast_t *mutexed = expect(ctx, start, &pos, parse_type, + "I couldn't parse a mutexed type after this point"); + spaces(&pos); + if (!match(&pos, ")")) return NULL; + return NewTypeAST(ctx->file, start, pos, MutexedTypeAST, .type=mutexed); +} + +type_ast_t *parse_type_name(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + const char *id = get_id(&pos); + if (!id) return NULL; + for (;;) { + const char *next = pos; + spaces(&next); + if (!match(&next, ".")) break; + const char *next_id = get_id(&next); + if (!next_id) break; + id = heap_strf("%s.%s", id, next_id); + pos = next; + } + return NewTypeAST(ctx->file, start, pos, VarTypeAST, .name=id); +} + +type_ast_t *parse_non_optional_type(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + type_ast_t *type = NULL; + bool success = (false + || (type=parse_pointer_type(ctx, pos)) + || (type=parse_array_type(ctx, pos)) + || (type=parse_table_type(ctx, pos)) + || (type=parse_set_type(ctx, pos)) + || (type=parse_type_name(ctx, pos)) + || (type=parse_func_type(ctx, pos)) + || (type=parse_mutexed_type(ctx, pos)) + ); + if (!success && match(&pos, "(")) { + whitespace(&pos); + type = optional(ctx, &pos, parse_type); + if (!type) return NULL; + whitespace(&pos); + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this type"); + type->start = start; + type->end = pos; + } + + return type; +} + +type_ast_t *parse_type(parse_ctx_t *ctx, const char *pos) { + const char *start = pos; + type_ast_t *type = parse_non_optional_type(ctx, pos); + if (!type) return NULL; + pos = type->end; + spaces(&pos); + while (match(&pos, "?")) + type = NewTypeAST(ctx->file, start, pos, OptionalTypeAST, .type=type); + return type; +} + +PARSER(parse_num) { + const char *start = pos; + bool negative = match(&pos, "-"); + if (!isdigit(*pos) && *pos != '.') return NULL; + else if (*pos == '.' && !isdigit(pos[1])) return NULL; + + size_t len = strspn(pos, "0123456789_"); + if (strncmp(pos+len, "..", 2) == 0) + return NULL; + else if (pos[len] == '.') + len += 1 + strspn(pos + len + 1, "0123456789"); + else if (pos[len] != 'e' && pos[len] != 'f' && pos[len] != '%') + return NULL; + if (pos[len] == 'e') { + len += 1; + if (pos[len] == '-') + len += 1; + len += strspn(pos + len, "0123456789_"); + } + char *buf = GC_MALLOC_ATOMIC(len+1); + memset(buf, 0, len+1); + for (char *src = (char*)pos, *dest = buf; src < pos+len; ++src) { + if (*src != '_') *(dest++) = *src; + } + double d = strtod(buf, NULL); + pos += len; + + if (negative) d *= -1; + + if (match(&pos, "%")) + d /= 100.; + else if (match(&pos, "deg")) + d *= RADIANS_PER_DEGREE; + + return NewAST(ctx->file, start, pos, Num, .n=d); +} + +static INLINE bool match_separator(const char **pos) { // Either comma or newline + const char *p = *pos; + int separators = 0; + for (;;) { + if (some_of(&p, "\r\n,")) + ++separators; + else if (!comment(&p) && !some_of(&p, " \t")) + break; + } + if (separators > 0) { + *pos = p; + return true; + } else { + return false; + } +} + +PARSER(parse_array) { + const char *start = pos; + if (!match(&pos, "[")) return NULL; + + whitespace(&pos); + + ast_list_t *items = NULL; + type_ast_t *item_type = NULL; + if (match(&pos, ":")) { + whitespace(&pos); + item_type = expect(ctx, pos-1, &pos, parse_type, "I couldn't parse a type for this array"); + whitespace(&pos); + match(&pos, ","); + whitespace(&pos); + } + + for (;;) { + ast_t *item = optional(ctx, &pos, parse_extended_expr); + if (!item) break; + ast_t *suffixed = parse_comprehension_suffix(ctx, item); + while (suffixed) { + item = suffixed; + pos = suffixed->end; + suffixed = parse_comprehension_suffix(ctx, item); + } + items = new(ast_list_t, .ast=item, .next=items); + if (!match_separator(&pos)) + break; + } + whitespace(&pos); + expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this array"); + + if (!item_type && !items) + parser_err(ctx, start, pos, "Empty arrays must specify what type they would contain (e.g. [:Int])"); + + REVERSE_LIST(items); + return NewAST(ctx->file, start, pos, Array, .item_type=item_type, .items=items); +} + +PARSER(parse_table) { + const char *start = pos; + if (!match(&pos, "{")) return NULL; + + whitespace(&pos); + + ast_list_t *entries = NULL; + type_ast_t *key_type = NULL, *value_type = NULL; + ast_t *default_value = NULL; + if (match(&pos, ":")) { + whitespace(&pos); + key_type = expect(ctx, pos-1, &pos, parse_type, "I couldn't parse a key type for this table"); + whitespace(&pos); + if (match(&pos, ",")) { + value_type = expect(ctx, pos-1, &pos, parse_type, "I couldn't parse the value type for this table"); + } else if (match(&pos, "=")) { + default_value = expect(ctx, pos-1, &pos, parse_extended_expr, "I couldn't parse the default value for this table"); + } else { + return NULL; + } + whitespace(&pos); + match(&pos, ","); + } + + for (;;) { + const char *entry_start = pos; + ast_t *key = optional(ctx, &pos, parse_extended_expr); + if (!key) break; + whitespace(&pos); + if (!match(&pos, "=")) return NULL; + ast_t *value = expect(ctx, pos-1, &pos, parse_expr, "I couldn't parse the value for this table entry"); + ast_t *entry = NewAST(ctx->file, entry_start, pos, TableEntry, .key=key, .value=value); + ast_t *suffixed = parse_comprehension_suffix(ctx, entry); + while (suffixed) { + entry = suffixed; + pos = suffixed->end; + suffixed = parse_comprehension_suffix(ctx, entry); + } + entries = new(ast_list_t, .ast=entry, .next=entries); + if (!match_separator(&pos)) + break; + } + + REVERSE_LIST(entries); + + if (!key_type && !value_type && !entries) + return NULL; + + whitespace(&pos); + + ast_t *fallback = NULL; + if (match(&pos, ";")) { + for (;;) { + whitespace(&pos); + const char *attr_start = pos; + if (match_word(&pos, "fallback")) { + whitespace(&pos); + if (!match(&pos, "=")) parser_err(ctx, attr_start, pos, "I expected an '=' after 'fallback'"); + if (fallback) + parser_err(ctx, attr_start, pos, "This table already has a fallback"); + fallback = expect(ctx, attr_start, &pos, parse_expr, "I expected a fallback table"); + } else { + break; + } + whitespace(&pos); + if (!match(&pos, ";")) break; + } + } + + whitespace(&pos); + expect_closing(ctx, &pos, "}", "I wasn't able to parse the rest of this table"); + + return NewAST(ctx->file, start, pos, Table, .key_type=key_type, .value_type=value_type, + .default_value=default_value, .entries=entries, .fallback=fallback); +} + +PARSER(parse_set) { + const char *start = pos; + if (!match(&pos, "{")) return NULL; + + whitespace(&pos); + + ast_list_t *items = NULL; + type_ast_t *item_type = NULL; + if (match(&pos, ":")) { + whitespace(&pos); + item_type = expect(ctx, pos-1, &pos, parse_type, "I couldn't parse a key type for this set"); + whitespace(&pos); + if (match(&pos, ",")) + return NULL; + whitespace(&pos); + match(&pos, ","); + whitespace(&pos); + } + + for (;;) { + ast_t *item = optional(ctx, &pos, parse_extended_expr); + if (!item) break; + whitespace(&pos); + if (match(&pos, "=")) return NULL; + ast_t *suffixed = parse_comprehension_suffix(ctx, item); + while (suffixed) { + item = suffixed; + pos = suffixed->end; + suffixed = parse_comprehension_suffix(ctx, item); + } + items = new(ast_list_t, .ast=item, .next=items); + if (!match_separator(&pos)) + break; + } + + REVERSE_LIST(items); + + if (!item_type && !items) + return NULL; + + whitespace(&pos); + expect_closing(ctx, &pos, "}", "I wasn't able to parse the rest of this set"); + + return NewAST(ctx->file, start, pos, Set, .item_type=item_type, .items=items); +} + +ast_t *parse_field_suffix(parse_ctx_t *ctx, ast_t *lhs) { + if (!lhs) return NULL; + const char *pos = lhs->end; + whitespace(&pos); + if (!match(&pos, ".")) return NULL; + if (*pos == '.') return NULL; + whitespace(&pos); + bool dollar = match(&pos, "$"); + const char* field = get_id(&pos); + if (!field) return NULL; + if (dollar) field = heap_strf("$%s", field); + return NewAST(ctx->file, lhs->start, pos, FieldAccess, .fielded=lhs, .field=field); +} + +ast_t *parse_optional_suffix(parse_ctx_t *ctx, ast_t *lhs) { + if (!lhs) return NULL; + const char *pos = lhs->end; + if (match(&pos, "?")) + return NewAST(ctx->file, lhs->start, pos, Optional, .value=lhs); + else + return NULL; +} + +ast_t *parse_non_optional_suffix(parse_ctx_t *ctx, ast_t *lhs) { + if (!lhs) return NULL; + const char *pos = lhs->end; + if (match(&pos, "!")) + return NewAST(ctx->file, lhs->start, pos, NonOptional, .value=lhs); + else + return NULL; +} + +PARSER(parse_reduction) { + const char *start = pos; + if (!match(&pos, "(")) return NULL; + + whitespace(&pos); + binop_e op = match_binary_operator(&pos); + if (op == BINOP_UNKNOWN) return NULL; + + ast_t *key = NULL; + if (op == BINOP_MIN || op == BINOP_MAX) { + key = NewAST(ctx->file, pos, pos, Var, .name="$"); + for (bool progress = true; progress; ) { + ast_t *new_term; + progress = (false + || (new_term=parse_index_suffix(ctx, key)) + || (new_term=parse_field_suffix(ctx, key)) + || (new_term=parse_method_call_suffix(ctx, key)) + || (new_term=parse_fncall_suffix(ctx, key)) + || (new_term=parse_optional_suffix(ctx, key)) + || (new_term=parse_non_optional_suffix(ctx, key)) + ); + if (progress) key = new_term; + } + if (key->tag == Var) key = NULL; + else pos = key->end; + } + + whitespace(&pos); + if (!match(&pos, ":")) return NULL; + + ast_t *iter = optional(ctx, &pos, parse_extended_expr); + if (!iter) return NULL; + ast_t *suffixed = parse_comprehension_suffix(ctx, iter); + while (suffixed) { + iter = suffixed; + pos = suffixed->end; + suffixed = parse_comprehension_suffix(ctx, iter); + } + + whitespace(&pos); + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this reduction"); + + return NewAST(ctx->file, start, pos, Reduction, .iter=iter, .op=op, .key=key); +} + +ast_t *parse_index_suffix(parse_ctx_t *ctx, ast_t *lhs) { + if (!lhs) return NULL; + const char *start = lhs->start; + const char *pos = lhs->end; + spaces(&pos); + if (!match(&pos, "[")) return NULL; + whitespace(&pos); + ast_t *index = optional(ctx, &pos, parse_extended_expr); + whitespace(&pos); + bool unchecked = match(&pos, ";") && (spaces(&pos), match_word(&pos, "unchecked") != 0); + expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this index"); + return NewAST(ctx->file, start, pos, Index, .indexed=lhs, .index=index, .unchecked=unchecked); +} + +ast_t *parse_comprehension_suffix(parse_ctx_t *ctx, ast_t *expr) { + // <expr> "for" [<index>,]<var> "in" <iter> ["if" <condition> | "unless" <condition>] + if (!expr) return NULL; + const char *start = expr->start; + const char *pos = expr->end; + whitespace(&pos); + if (!match_word(&pos, "for")) return NULL; + + ast_list_t *vars = NULL; + for (;;) { + ast_t *var = optional(ctx, &pos, parse_var); + if (var) + vars = new(ast_list_t, .ast=var, .next=vars); + + spaces(&pos); + if (!match(&pos, ",")) + break; + } + REVERSE_LIST(vars); + + expect_str(ctx, start, &pos, "in", "I expected an 'in' for this 'for'"); + ast_t *iter = expect(ctx, start, &pos, parse_expr, "I expected an iterable value for this 'for'"); + const char *next_pos = pos; + whitespace(&next_pos); + ast_t *filter = NULL; + if (match_word(&next_pos, "if")) { + pos = next_pos; + filter = expect(ctx, pos-2, &pos, parse_expr, "I expected a condition for this 'if'"); + } else if (match_word(&next_pos, "unless")) { + pos = next_pos; + filter = expect(ctx, pos-2, &pos, parse_expr, "I expected a condition for this 'unless'"); + filter = WrapAST(filter, Not, filter); + } + return NewAST(ctx->file, start, pos, Comprehension, .expr=expr, .vars=vars, .iter=iter, .filter=filter); +} + +ast_t *parse_optional_conditional_suffix(parse_ctx_t *ctx, ast_t *stmt) { + // <statement> "if" <condition> | <statement> "unless" <condition> + if (!stmt) return stmt; + const char *start = stmt->start; + const char *pos = stmt->end; + if (match_word(&pos, "if")) { + ast_t *condition = expect(ctx, pos-2, &pos, parse_expr, "I expected a condition for this 'if'"); + return NewAST(ctx->file, start, pos, If, .condition=condition, .body=stmt); + } else if (match_word(&pos, "unless")) { + ast_t *condition = expect(ctx, pos-2, &pos, parse_expr, "I expected a condition for this 'unless'"); + condition = WrapAST(condition, Not, condition); + return NewAST(ctx->file, start, pos, If, .condition=condition, .body=stmt); + } else { + return stmt; + } +} + +PARSER(parse_if) { + // "if" <condition> <body> ["else" <body>] | "unless" <condition> <body> ["else" <body>] + const char *start = pos; + int64_t starting_indent = get_indent(ctx, pos); + + bool unless; + if (match_word(&pos, "if")) + unless = false; + else if (match_word(&pos, "unless")) + unless = true; + else + return NULL; + + ast_t *condition = unless ? NULL : optional(ctx, &pos, parse_declaration); + if (!condition) + condition = expect(ctx, start, &pos, parse_expr, "I expected to find a condition for this 'if'"); + + if (unless) + condition = WrapAST(condition, Not, condition); + + ast_t *body = expect(ctx, start, &pos, parse_block, "I expected a body for this 'if' statement"); + + const char *tmp = pos; + whitespace(&tmp); + ast_t *else_body = NULL; + const char *else_start = pos; + if (get_indent(ctx, tmp) == starting_indent && match_word(&tmp, "else")) { + pos = tmp; + spaces(&pos); + else_body = optional(ctx, &pos, parse_if); + if (!else_body) + else_body = expect(ctx, else_start, &pos, parse_block, "I expected a body for this 'else'"); + } + return NewAST(ctx->file, start, pos, If, .condition=condition, .body=body, .else_body=else_body); +} + +PARSER(parse_when) { + // when <expr> (is var : Tag <body>)* [else <body>] + const char *start = pos; + int64_t starting_indent = get_indent(ctx, pos); + + if (!match_word(&pos, "when")) + return NULL; + + ast_t *subject = optional(ctx, &pos, parse_declaration); + if (!subject) subject = expect(ctx, start, &pos, parse_expr, + "I expected to find an expression for this 'when'"); + + when_clause_t *clauses = NULL; + const char *tmp = pos; + whitespace(&tmp); + while (get_indent(ctx, tmp) == starting_indent && match_word(&tmp, "is")) { + pos = tmp; + spaces(&pos); + ast_t *pattern = expect(ctx, start, &pos, parse_expr, "I expected a pattern to match here"); + spaces(&pos); + tmp = pos; + if (!match(&tmp, ":")) + parser_err(ctx, tmp, tmp, "I expected a colon ':' after this clause"); + ast_t *body = expect(ctx, start, &pos, parse_block, "I expected a body for this 'when' clause"); + clauses = new(when_clause_t, .pattern=pattern, .body=body, .next=clauses); + tmp = pos; + whitespace(&tmp); + } + REVERSE_LIST(clauses); + + ast_t *else_body = NULL; + const char *else_start = pos; + if (get_indent(ctx, tmp) == starting_indent && match_word(&tmp, "else")) { + pos = tmp; + else_body = expect(ctx, else_start, &pos, parse_block, "I expected a body for this 'else'"); + } + return NewAST(ctx->file, start, pos, When, .subject=subject, .clauses=clauses, .else_body=else_body); +} + +PARSER(parse_for) { + // for [k,] v in iter [<indent>] body + const char *start = pos; + if (!match_word(&pos, "for")) return NULL; + int64_t starting_indent = get_indent(ctx, pos); + spaces(&pos); + ast_list_t *vars = NULL; + for (;;) { + ast_t *var = optional(ctx, &pos, parse_var); + if (var) + vars = new(ast_list_t, .ast=var, .next=vars); + + spaces(&pos); + if (!match(&pos, ",")) + break; + } + + spaces(&pos); + expect_str(ctx, start, &pos, "in", "I expected an 'in' for this 'for'"); + + ast_t *iter = expect(ctx, start, &pos, parse_expr, "I expected an iterable value for this 'for'"); + ast_t *body = expect(ctx, start, &pos, parse_block, "I expected a body for this 'for'"); + + const char *else_start = pos; + whitespace(&else_start); + ast_t *empty = NULL; + if (match_word(&else_start, "else") && get_indent(ctx, else_start) == starting_indent) { + pos = else_start; + empty = expect(ctx, pos, &pos, parse_block, "I expected a body for this 'else'"); + } + REVERSE_LIST(vars); + return NewAST(ctx->file, start, pos, For, .vars=vars, .iter=iter, .body=body, .empty=empty); +} + +PARSER(parse_do) { + // [do_begin: [<indent>] block] [do_end: [<indent>] block] do: [<indent>] body + const char *start = pos; + int64_t starting_indent = get_indent(ctx, pos); + ast_t *begin = NULL, *end = NULL; + if (match_word(&pos, "do_begin")) + begin = optional(ctx, &pos, parse_block); + + const char *tmp = pos; + whitespace(&tmp); + if (get_indent(ctx, tmp) == starting_indent && match_word(&tmp, "do_end")) { + pos = tmp; + end = optional(ctx, &pos, parse_block); + } + + tmp = pos; + whitespace(&tmp); + if (get_indent(ctx, tmp) == starting_indent && match_word(&tmp, "do")) { + pos = tmp; + ast_t *body = expect(ctx, start, &pos, parse_block, "I expected a body for this 'do'"); + if (begin && end) { + return NewAST(ctx->file, start, pos, Block, + .statements=new(ast_list_t, .ast=begin, + .next=new(ast_list_t, .ast=WrapAST(end, Defer, .body=end), + .next=new(ast_list_t, .ast=body)))); + } else if (begin) { + return NewAST(ctx->file, start, pos, Block, + .statements=new(ast_list_t, .ast=begin, + .next=new(ast_list_t, .ast=body))); + } else if (end) { + return NewAST(ctx->file, start, pos, Block, + .statements=new(ast_list_t, .ast=WrapAST(end, Defer, .body=end), + .next=new(ast_list_t, .ast=body))); + } else { + return NewAST(ctx->file, start, pos, Block, .statements=Match(body, Block)->statements); + } + } else { + return NULL; + } +} + +PARSER(parse_while) { + // while condition: [<indent>] body + const char *start = pos; + if (!match_word(&pos, "while")) return NULL; + + const char *tmp = pos; + // Shorthand form: `while when ...` + if (match_word(&tmp, "when")) { + ast_t *when = expect(ctx, start, &pos, parse_when, "I expected a 'when' block after this"); + if (!when->__data.When.else_body) when->__data.When.else_body = NewAST(ctx->file, pos, pos, Stop); + return NewAST(ctx->file, start, pos, While, .body=when); + } + ast_t *condition = expect(ctx, start, &pos, parse_expr, "I don't see a viable condition for this 'while'"); + ast_t *body = expect(ctx, start, &pos, parse_block, "I expected a body for this 'while'"); + return NewAST(ctx->file, start, pos, While, .condition=condition, .body=body); +} + +PARSER(parse_repeat) { + // repeat: [<indent>] body + const char *start = pos; + if (!match_word(&pos, "repeat")) return NULL; + ast_t *body = expect(ctx, start, &pos, parse_block, "I expected a body for this 'repeat'"); + return NewAST(ctx->file, start, pos, Repeat, .body=body); +} + +PARSER(parse_heap_alloc) { + const char *start = pos; + if (!match(&pos, "@")) return NULL; + spaces(&pos); + ast_t *val = expect(ctx, start, &pos, parse_term_no_suffix, "I expected an expression for this '@'"); + + for (;;) { + ast_t *new_term; + if ((new_term=parse_index_suffix(ctx, val)) + || (new_term=parse_fncall_suffix(ctx, val)) + || (new_term=parse_field_suffix(ctx, val))) { + val = new_term; + } else break; + } + pos = val->end; + + ast_t *ast = NewAST(ctx->file, start, pos, HeapAllocate, .value=val); + for (;;) { + ast_t *next = parse_optional_suffix(ctx, ast); + if (!next) next = parse_non_optional_suffix(ctx, ast); + if (!next) break; + ast = next; + } + return ast; +} + +PARSER(parse_stack_reference) { + const char *start = pos; + if (!match(&pos, "&")) return NULL; + spaces(&pos); + ast_t *val = expect(ctx, start, &pos, parse_term_no_suffix, "I expected an expression for this '&'"); + + for (;;) { + ast_t *new_term; + if ((new_term=parse_index_suffix(ctx, val)) + || (new_term=parse_fncall_suffix(ctx, val)) + || (new_term=parse_field_suffix(ctx, val))) { + val = new_term; + } else break; + } + pos = val->end; + + ast_t *ast = NewAST(ctx->file, start, pos, StackReference, .value=val); + for (;;) { + ast_t *next = parse_optional_suffix(ctx, ast); + if (!next) next = parse_non_optional_suffix(ctx, ast); + if (!next) break; + ast = next; + } + return ast; +} + +PARSER(parse_mutexed) { + const char *start = pos; + if (!match_word(&pos, "mutexed")) return NULL; + spaces(&pos); + ast_t *val = expect(ctx, start, &pos, parse_term, "I expected an expression for this 'mutexed'"); + return NewAST(ctx->file, start, pos, Mutexed, .value=val); +} + +PARSER(parse_holding) { + const char *start = pos; + if (!match_word(&pos, "holding")) return NULL; + spaces(&pos); + ast_t *mutexed = expect(ctx, start, &pos, parse_expr, "I expected an expression for this 'holding'"); + ast_t *body = expect(ctx, start, &pos, parse_block, "I expected a block to be here"); + return NewAST(ctx->file, start, pos, Holding, .mutexed=mutexed, .body=body); +} + +PARSER(parse_not) { + const char *start = pos; + if (!match_word(&pos, "not")) return NULL; + spaces(&pos); + ast_t *val = expect(ctx, start, &pos, parse_term, "I expected an expression for this 'not'"); + return NewAST(ctx->file, start, pos, Not, .value=val); +} + +PARSER(parse_negative) { + const char *start = pos; + if (!match(&pos, "-")) return NULL; + spaces(&pos); + ast_t *val = expect(ctx, start, &pos, parse_term, "I expected an expression for this '-'"); + return NewAST(ctx->file, start, pos, Negative, .value=val); +} + +PARSER(parse_bool) { + const char *start = pos; + if (match_word(&pos, "yes")) + return NewAST(ctx->file, start, pos, Bool, .b=true); + else if (match_word(&pos, "no")) + return NewAST(ctx->file, start, pos, Bool, .b=false); + else + return NULL; +} + +PARSER(parse_text) { + // ('"' ... '"' / "'" ... "'" / "`" ... "`") + // "$" [name] [interp-char] quote-char ... close-quote + const char *start = pos; + const char *lang = NULL; + + // Escape sequence, e.g. \r\n + if (*pos == '\\') { + CORD cord = CORD_EMPTY; + do { + const char *c = unescape(ctx, &pos); + cord = CORD_cat(cord, c); + // cord = CORD_cat_char(cord, c); + } while (*pos == '\\'); + return NewAST(ctx->file, start, pos, TextLiteral, .cord=cord); + } + + char open_quote, close_quote, open_interp = '$'; + if (match(&pos, "\"")) { // Double quote + open_quote = '"', close_quote = '"', open_interp = '$'; + } else if (match(&pos, "`")) { // Backtick + open_quote = '`', close_quote = '`', open_interp = '$'; + } else if (match(&pos, "'")) { // Single quote + open_quote = '\'', close_quote = '\'', open_interp = '\x03'; + } else if (match(&pos, "$")) { // Customized strings + lang = get_id(&pos); + // $"..." or $@"...." + static const char *interp_chars = "~!@#$%^&*+=\\?"; + if (match(&pos, "$")) { // Disable interpolation with $ + open_interp = '\x03'; + } else if (strchr(interp_chars, *pos)) { + open_interp = *pos; + ++pos; + } else if (*pos == '(') { + open_interp = '@'; // For shell commands + } + static const char *quote_chars = "\"'`|/;([{<"; + if (!strchr(quote_chars, *pos)) + parser_err(ctx, pos, pos+1, "This is not a valid string quotation character. Valid characters are: \"'`|/;([{<"); + open_quote = *pos; + ++pos; + close_quote = closing[(int)open_quote] ? closing[(int)open_quote] : open_quote; + + if (!lang && (open_quote == '/' || open_quote == '|')) + lang = "Pattern"; + } else { + return NULL; + } + + int64_t starting_indent = get_indent(ctx, pos); + int64_t string_indent = starting_indent + SPACES_PER_INDENT; + + ast_list_t *chunks = NULL; + CORD chunk = CORD_EMPTY; + const char *chunk_start = pos; + int depth = 1; + bool leading_newline = false; + for (; pos < ctx->file->text + ctx->file->len && depth > 0; ) { + if (*pos == open_interp) { // Interpolation + const char *interp_start = pos; + if (chunk) { + ast_t *literal = NewAST(ctx->file, chunk_start, pos, TextLiteral, .cord=chunk); + chunks = new(ast_list_t, .ast=literal, .next=chunks); + chunk = NULL; + } + ++pos; + ast_t *interp; + if (*pos == ' ' || *pos == '\t') + parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here"); + interp = expect(ctx, interp_start, &pos, parse_term_no_suffix, "I expected an interpolation term here"); + chunks = new(ast_list_t, .ast=interp, .next=chunks); + chunk_start = pos; + } else if (!leading_newline && *pos == open_quote && closing[(int)open_quote]) { // Nested pair begin + if (get_indent(ctx, pos) == starting_indent) { + ++depth; + } + chunk = CORD_cat_char(chunk, *pos); + ++pos; + } else if (!leading_newline && *pos == close_quote) { // Nested pair end + if (get_indent(ctx, pos) == starting_indent) { + --depth; + if (depth == 0) + break; + } + chunk = CORD_cat_char(chunk, *pos); + ++pos; + } else if (newline_with_indentation(&pos, string_indent)) { // Newline + if (!leading_newline && !(chunk || chunks)) { + leading_newline = true; + } else { + chunk = CORD_cat_char(chunk, '\n'); + } + } else if (newline_with_indentation(&pos, starting_indent)) { // Line continuation (..) + if (*pos == close_quote) { + break; + } else if (some_of(&pos, ".") >= 2) { + // Multi-line split + continue; + } else { + parser_err(ctx, pos, strchrnul(pos, '\n'), "This multi-line string should be either indented or have '..' at the front"); + } + } else { // Plain character + chunk = CORD_cat_char(chunk, *pos); + ++pos; + } + } + + if (chunk) { + ast_t *literal = NewAST(ctx->file, chunk_start, pos, TextLiteral, .cord=chunk); + chunks = new(ast_list_t, .ast=literal, .next=chunks); + chunk = NULL; + } + + REVERSE_LIST(chunks); + expect_closing(ctx, &pos, (char[]){close_quote, 0}, "I was expecting a '%c' to finish this string", close_quote); + return NewAST(ctx->file, start, pos, TextJoin, .lang=lang, .children=chunks); +} + +PARSER(parse_path) { + // "(" ("~/" / "./" / "../" / "/") ... ")" + const char *start = pos; + + if (!match(&pos, "(")) + return NULL; + + if (!(*pos == '~' || *pos == '.' || *pos == '/')) + return NULL; + + const char *path_start = pos; + size_t len = 1; + int paren_depth = 1; + while (pos + len < ctx->file->text + ctx->file->len - 1) { + if (pos[len] == '\\') { + len += 2; + continue; + } else if (pos[len] == '(') { + paren_depth += 1; + } else if (pos[len] == ')') { + paren_depth -= 1; + if (paren_depth <= 0) break; + } else if (pos[len] == '\r' || pos[len] == '\n') { + parser_err(ctx, path_start, &pos[len-1], "This path was not closed"); + } + len += 1; + } + pos += len + 1; + char *path = heap_strf("%.*s", (int)len, path_start); + for (char *src = path, *dest = path; ; ) { + if (src[0] == '\\') { + *(dest++) = src[1]; + src += 2; + } else if (*src) { + *(dest++) = *(src++); + } else { + *(dest++) = '\0'; + break; + } + } + return NewAST(ctx->file, start, pos, Path, .path=path); +} + +PARSER(parse_pass) { + const char *start = pos; + return match_word(&pos, "pass") ? NewAST(ctx->file, start, pos, Pass) : NULL; +} + +PARSER(parse_defer) { + const char *start = pos; + if (!match_word(&pos, "defer")) return NULL; + ast_t *body = expect(ctx, start, &pos, parse_block, "I expected a block to be deferred here"); + return NewAST(ctx->file, start, pos, Defer, .body=body); +} + +PARSER(parse_skip) { + const char *start = pos; + if (!match_word(&pos, "skip")) return NULL; + const char *target; + if (match_word(&pos, "for")) target = "for"; + else if (match_word(&pos, "while")) target = "while"; + else target = get_id(&pos); + ast_t *skip = NewAST(ctx->file, start, pos, Skip, .target=target); + skip = parse_optional_conditional_suffix(ctx, skip); + return skip; +} + +PARSER(parse_stop) { + const char *start = pos; + if (!match_word(&pos, "stop")) return NULL; + const char *target; + if (match_word(&pos, "for")) target = "for"; + else if (match_word(&pos, "while")) target = "while"; + else target = get_id(&pos); + ast_t *stop = NewAST(ctx->file, start, pos, Stop, .target=target); + stop = parse_optional_conditional_suffix(ctx, stop); + return stop; +} + +PARSER(parse_return) { + const char *start = pos; + if (!match_word(&pos, "return")) return NULL; + ast_t *value = optional(ctx, &pos, parse_expr); + ast_t *ret = NewAST(ctx->file, start, pos, Return, .value=value); + ret = parse_optional_conditional_suffix(ctx, ret); + return ret; +} + +PARSER(parse_lambda) { + const char *start = pos; + if (!match_word(&pos, "func")) + return NULL; + spaces(&pos); + if (!match(&pos, "(")) + return NULL; + arg_ast_t *args = parse_args(ctx, &pos); + spaces(&pos); + type_ast_t *ret = match(&pos, "->") ? optional(ctx, &pos, parse_type) : NULL; + spaces(&pos); + expect_closing(ctx, &pos, ")", "I was expecting a ')' to finish this anonymous function's arguments"); + ast_t *body = optional(ctx, &pos, parse_block); + if (!body) body = NewAST(ctx->file, pos, pos, Block, .statements=NULL); + return NewAST(ctx->file, start, pos, Lambda, .id=ctx->next_lambda_id++, .args=args, .ret_type=ret, .body=body); +} + +PARSER(parse_none) { + const char *start = pos; + if (!match_word(&pos, "none")) + return NULL; + + const char *none_end = pos; + spaces(&pos); + if (!match(&pos, ":")) + return NewAST(ctx->file, start, none_end, None, .type=NULL); + + spaces(&pos); + type_ast_t *type = parse_type(ctx, pos); + if (!type) return NULL; + return NewAST(ctx->file, start, type->end, None, .type=type); +} + +PARSER(parse_deserialize) { + const char *start = pos; + if (!match_word(&pos, "deserialize")) + return NULL; + + spaces(&pos); + expect_str(ctx, start, &pos, "(", "I expected arguments for this `deserialize` call"); + whitespace(&pos); + ast_t *value = expect(ctx, start, &pos, parse_extended_expr, "I expected an expression here"); + whitespace(&pos); + expect_str(ctx, start, &pos, "->", "I expected a `-> Type` for this `deserialize` call so I know what it deserializes to"); + whitespace(&pos); + type_ast_t *type = expect(ctx, start, &pos, parse_type, "I couldn't parse the type for this deserialization"); + whitespace(&pos); + expect_closing(ctx, &pos, ")", "I expected a closing ')' for this `deserialize` call"); + return NewAST(ctx->file, start, pos, Deserialize, .value=value, .type=type); +} + +PARSER(parse_var) { + const char *start = pos; + const char* name = get_id(&pos); + if (!name) return NULL; + return NewAST(ctx->file, start, pos, Var, .name=name); +} + +PARSER(parse_term_no_suffix) { + spaces(&pos); + ast_t *term = NULL; + (void)( + false + || (term=parse_moment(ctx, pos)) // Must come before num/int + || (term=parse_none(ctx, pos)) + || (term=parse_num(ctx, pos)) // Must come before int + || (term=parse_int(ctx, pos)) + || (term=parse_negative(ctx, pos)) // Must come after num/int + || (term=parse_heap_alloc(ctx, pos)) + || (term=parse_stack_reference(ctx, pos)) + || (term=parse_bool(ctx, pos)) + || (term=parse_text(ctx, pos)) + || (term=parse_path(ctx, pos)) + || (term=parse_lambda(ctx, pos)) + || (term=parse_parens(ctx, pos)) + || (term=parse_table(ctx, pos)) + || (term=parse_set(ctx, pos)) + || (term=parse_deserialize(ctx, pos)) + || (term=parse_var(ctx, pos)) + || (term=parse_array(ctx, pos)) + || (term=parse_reduction(ctx, pos)) + || (term=parse_pass(ctx, pos)) + || (term=parse_defer(ctx, pos)) + || (term=parse_skip(ctx, pos)) + || (term=parse_stop(ctx, pos)) + || (term=parse_return(ctx, pos)) + || (term=parse_not(ctx, pos)) + || (term=parse_mutexed(ctx, pos)) + || (term=parse_extern(ctx, pos)) + || (term=parse_inline_c(ctx, pos)) + ); + return term; +} + +PARSER(parse_term) { + ast_t *term = parse_term_no_suffix(ctx, pos); + if (!term) return NULL; + + for (bool progress = true; progress; ) { + ast_t *new_term; + progress = (false + || (new_term=parse_index_suffix(ctx, term)) + || (new_term=parse_field_suffix(ctx, term)) + || (new_term=parse_method_call_suffix(ctx, term)) + || (new_term=parse_fncall_suffix(ctx, term)) + || (new_term=parse_optional_suffix(ctx, term)) + || (new_term=parse_non_optional_suffix(ctx, term)) + ); + if (progress) term = new_term; + } + return term; +} + +ast_t *parse_method_call_suffix(parse_ctx_t *ctx, ast_t *self) { + if (!self) return NULL; + + const char *start = self->start; + const char *pos = self->end; + + if (!match(&pos, ":")) return NULL; + if (*pos == ' ') return NULL; + const char *fn = get_id(&pos); + if (!fn) return NULL; + spaces(&pos); + if (!match(&pos, "(")) return NULL; + whitespace(&pos); + + arg_ast_t *args = NULL; + for (;;) { + const char *arg_start = pos; + const char *name = get_id(&pos); + whitespace(&pos); + if (!name || !match(&pos, "=")) { + name = NULL; + pos = arg_start; + } + + ast_t *arg = optional(ctx, &pos, parse_expr); + if (!arg) { + if (name) parser_err(ctx, arg_start, pos, "I expected an argument here"); + break; + } + args = new(arg_ast_t, .name=name, .value=arg, .next=args); + if (!match_separator(&pos)) + break; + } + REVERSE_LIST(args); + + whitespace(&pos); + + if (!match(&pos, ")")) + parser_err(ctx, start, pos, "This parenthesis is unclosed"); + + return NewAST(ctx->file, start, pos, MethodCall, .self=self, .name=fn, .args=args); +} + +ast_t *parse_fncall_suffix(parse_ctx_t *ctx, ast_t *fn) { + if (!fn) return NULL; + + const char *start = fn->start; + const char *pos = fn->end; + + if (!match(&pos, "(")) return NULL; + + whitespace(&pos); + + arg_ast_t *args = NULL; + for (;;) { + const char *arg_start = pos; + const char *name = get_id(&pos); + whitespace(&pos); + if (!name || !match(&pos, "=")) { + name = NULL; + pos = arg_start; + } + + ast_t *arg = optional(ctx, &pos, parse_expr); + if (!arg) { + if (name) + parser_err(ctx, arg_start, pos, "I expected an argument here"); + break; + } + args = new(arg_ast_t, .name=name, .value=arg, .next=args); + if (!match_separator(&pos)) + break; + } + + whitespace(&pos); + + if (!match(&pos, ")")) + parser_err(ctx, start, pos, "This parenthesis is unclosed"); + + REVERSE_LIST(args); + return NewAST(ctx->file, start, pos, FunctionCall, .fn=fn, .args=args); +} + +binop_e match_binary_operator(const char **pos) +{ + switch (**pos) { + case '+': { + *pos += 1; + return match(pos, "+") ? BINOP_CONCAT : BINOP_PLUS; + } + case '-': { + *pos += 1; + if ((*pos)[0] != ' ' && (*pos)[-2] == ' ') // looks like `fn -5` + return BINOP_UNKNOWN; + return BINOP_MINUS; + } + case '*': *pos += 1; return BINOP_MULT; + case '/': *pos += 1; return BINOP_DIVIDE; + case '^': *pos += 1; return BINOP_POWER; + case '<': { + *pos += 1; + if (match(pos, "=")) return BINOP_LE; // "<=" + else if (match(pos, ">")) return BINOP_CMP; // "<>" + else if (match(pos, "<")) { + if (match(pos, "<")) + return BINOP_ULSHIFT; // "<<<" + return BINOP_LSHIFT; // "<<" + } else return BINOP_LT; + } + case '>': { + *pos += 1; + if (match(pos, "=")) return BINOP_GE; // ">=" + if (match(pos, ">")) { + if (match(pos, ">")) + return BINOP_URSHIFT; // ">>>" + return BINOP_RSHIFT; // ">>" + } + return BINOP_GT; + } + default: { + if (match(pos, "!=")) return BINOP_NE; + else if (match(pos, "==") && **pos != '=') return BINOP_EQ; + else if (match_word(pos, "and")) return BINOP_AND; + else if (match_word(pos, "or")) return BINOP_OR; + else if (match_word(pos, "xor")) return BINOP_XOR; + else if (match_word(pos, "mod1")) return BINOP_MOD1; + else if (match_word(pos, "mod")) return BINOP_MOD; + else if (match_word(pos, "_min_")) return BINOP_MIN; + else if (match_word(pos, "_max_")) return BINOP_MAX; + else return BINOP_UNKNOWN; + } + } +} + +static ast_t *parse_infix_expr(parse_ctx_t *ctx, const char *pos, int min_tightness) { + ast_t *lhs = optional(ctx, &pos, parse_term); + if (!lhs) return NULL; + + int64_t starting_line = get_line_number(ctx->file, pos); + int64_t starting_indent = get_indent(ctx, pos); + spaces(&pos); + for (binop_e op; (op=match_binary_operator(&pos)) != BINOP_UNKNOWN && op_tightness[op] >= min_tightness; spaces(&pos)) { + ast_t *key = NULL; + if (op == BINOP_MIN || op == BINOP_MAX) { + key = NewAST(ctx->file, pos, pos, Var, .name="$"); + for (bool progress = true; progress; ) { + ast_t *new_term; + progress = (false + || (new_term=parse_index_suffix(ctx, key)) + || (new_term=parse_field_suffix(ctx, key)) + || (new_term=parse_method_call_suffix(ctx, key)) + || (new_term=parse_fncall_suffix(ctx, key)) + || (new_term=parse_optional_suffix(ctx, key)) + || (new_term=parse_non_optional_suffix(ctx, key)) + ); + if (progress) key = new_term; + } + if (key->tag == Var) key = NULL; + else pos = key->end; + } + + whitespace(&pos); + if (get_line_number(ctx->file, pos) != starting_line && get_indent(ctx, pos) < starting_indent) + parser_err(ctx, pos, strchrnul(pos, '\n'), "I expected this line to be at least as indented than the line above it"); + + ast_t *rhs = parse_infix_expr(ctx, pos, op_tightness[op] + 1); + if (!rhs) break; + pos = rhs->end; + + if (op == BINOP_MIN) { + return NewAST(ctx->file, lhs->start, rhs->end, Min, .lhs=lhs, .rhs=rhs, .key=key); + } else if (op == BINOP_MAX) { + return NewAST(ctx->file, lhs->start, rhs->end, Max, .lhs=lhs, .rhs=rhs, .key=key); + } else { + lhs = NewAST(ctx->file, lhs->start, rhs->end, BinaryOp, .lhs=lhs, .op=op, .rhs=rhs); + } + } + return lhs; +} + +PARSER(parse_expr) { + return parse_infix_expr(ctx, pos, 0); +} + +PARSER(parse_declaration) { + const char *start = pos; + ast_t *var = parse_var(ctx, pos); + if (!var) return NULL; + pos = var->end; + spaces(&pos); + if (!match(&pos, ":=")) return NULL; + spaces(&pos); + ast_t *val = optional(ctx, &pos, parse_extended_expr); + if (!val) { + if (optional(ctx, &pos, parse_use)) + parser_err(ctx, start, pos, "'use' statements are only allowed at the top level of a file"); + else + parser_err(ctx, pos, strchrnul(pos, '\n'), "This is not a valid expression"); + } + return NewAST(ctx->file, start, pos, Declare, .var=var, .value=val); +} + +PARSER(parse_update) { + const char *start = pos; + ast_t *lhs = optional(ctx, &pos, parse_expr); + if (!lhs) return NULL; + spaces(&pos); + binop_e op; + if (match(&pos, "+=")) op = BINOP_PLUS; + else if (match(&pos, "++=")) op = BINOP_CONCAT; + else if (match(&pos, "-=")) op = BINOP_MINUS; + else if (match(&pos, "*=")) op = BINOP_MULT; + else if (match(&pos, "/=")) op = BINOP_DIVIDE; + else if (match(&pos, "^=")) op = BINOP_POWER; + else if (match(&pos, "<<=")) op = BINOP_LSHIFT; + else if (match(&pos, "<<<=")) op = BINOP_ULSHIFT; + else if (match(&pos, ">>=")) op = BINOP_RSHIFT; + else if (match(&pos, ">>>=")) op = BINOP_URSHIFT; + else if (match(&pos, "and=")) op = BINOP_AND; + else if (match(&pos, "or=")) op = BINOP_OR; + else if (match(&pos, "xor=")) op = BINOP_XOR; + else return NULL; + ast_t *rhs = expect(ctx, start, &pos, parse_extended_expr, "I expected an expression here"); + return NewAST(ctx->file, start, pos, UpdateAssign, .lhs=lhs, .rhs=rhs, .op=op); +} + +PARSER(parse_assignment) { + const char *start = pos; + ast_list_t *targets = NULL; + for (;;) { + ast_t *lhs = optional(ctx, &pos, parse_term); + if (!lhs) break; + targets = new(ast_list_t, .ast=lhs, .next=targets); + spaces(&pos); + if (!match(&pos, ",")) break; + whitespace(&pos); + } + + if (!targets) return NULL; + + spaces(&pos); + if (!match(&pos, "=")) return NULL; + if (match(&pos, "=")) return NULL; // == comparison + + ast_list_t *values = NULL; + for (;;) { + ast_t *rhs = optional(ctx, &pos, parse_extended_expr); + if (!rhs) break; + values = new(ast_list_t, .ast=rhs, .next=values); + spaces(&pos); + if (!match(&pos, ",")) break; + whitespace(&pos); + } + + REVERSE_LIST(targets); + REVERSE_LIST(values); + + return NewAST(ctx->file, start, pos, Assign, .targets=targets, .values=values); +} + +PARSER(parse_statement) { + ast_t *stmt = NULL; + if ((stmt=parse_declaration(ctx, pos)) + || (stmt=parse_doctest(ctx, pos)) + || (stmt=parse_say(ctx, pos))) + return stmt; + + if (!(false + || (stmt=parse_update(ctx, pos)) + || (stmt=parse_assignment(ctx, pos)) + )) + stmt = parse_extended_expr(ctx, pos); + + for (bool progress = (stmt != NULL); progress; ) { + ast_t *new_stmt; + progress = false; + if (stmt->tag == Var) { + progress = (false + || (new_stmt=parse_method_call_suffix(ctx, stmt)) + || (new_stmt=parse_fncall_suffix(ctx, stmt)) + ); + } else if (stmt->tag == FunctionCall) { + new_stmt = parse_optional_conditional_suffix(ctx, stmt); + progress = (new_stmt != stmt); + } + + if (progress) stmt = new_stmt; + } + return stmt; + +} + +PARSER(parse_extended_expr) { + ast_t *expr = NULL; + + if (false + || (expr=optional(ctx, &pos, parse_for)) + || (expr=optional(ctx, &pos, parse_while)) + || (expr=optional(ctx, &pos, parse_if)) + || (expr=optional(ctx, &pos, parse_when)) + || (expr=optional(ctx, &pos, parse_repeat)) + || (expr=optional(ctx, &pos, parse_do)) + || (expr=optional(ctx, &pos, parse_holding)) + ) + return expr; + + return parse_expr(ctx, pos); +} + +PARSER(parse_block) { + const char *start = pos; + spaces(&pos); + if (!match(&pos, ":")) return NULL; + + ast_list_t *statements = NULL; + if (!indent(ctx, &pos)) { + // Inline block + spaces(&pos); + while (*pos) { + spaces(&pos); + ast_t *stmt = optional(ctx, &pos, parse_statement); + if (!stmt) break; + statements = new(ast_list_t, .ast=stmt, .next=statements); + spaces(&pos); + if (!match(&pos, ";")) break; + } + } else { + goto indented; + } + + if (indent(ctx, &pos)) { + indented:; + int64_t block_indent = get_indent(ctx, pos); + whitespace(&pos); + while (*pos) { + ast_t *stmt = optional(ctx, &pos, parse_statement); + if (!stmt) { + const char *line_start = pos; + if (match_word(&pos, "struct")) + parser_err(ctx, line_start, strchrnul(pos, '\n'), "Struct definitions are only allowed at the top level"); + else if (match_word(&pos, "enum")) + parser_err(ctx, line_start, strchrnul(pos, '\n'), "Enum definitions are only allowed at the top level"); + else if (match_word(&pos, "func")) + parser_err(ctx, line_start, strchrnul(pos, '\n'), "Function definitions are only allowed at the top level"); + else if (match_word(&pos, "use")) + parser_err(ctx, line_start, strchrnul(pos, '\n'), "'use' statements are only allowed at the top level"); + + spaces(&pos); + if (*pos && *pos != '\r' && *pos != '\n') + parser_err(ctx, pos, strchrnul(pos, '\n'), "I couldn't parse this line"); + break; + } + statements = new(ast_list_t, .ast=stmt, .next=statements); + whitespace(&pos); + + // Guard against having two valid statements on the same line, separated by spaces (but no newlines): + if (!memchr(stmt->end, '\n', (size_t)(pos - stmt->end))) { + if (*pos) + parser_err(ctx, pos, strchrnul(pos, '\n'), "I don't know how to parse the rest of this line"); + pos = stmt->end; + break; + } + + if (get_indent(ctx, pos) != block_indent) { + pos = stmt->end; // backtrack + break; + } + } + } + REVERSE_LIST(statements); + return NewAST(ctx->file, start, pos, Block, .statements=statements); +} + +PARSER(parse_namespace) { + const char *start = pos; + whitespace(&pos); + int64_t indent = get_indent(ctx, pos); + ast_list_t *statements = NULL; + for (;;) { + const char *next = pos; + whitespace(&next); + if (get_indent(ctx, next) != indent) break; + ast_t *stmt; + if ((stmt=optional(ctx, &pos, parse_struct_def)) + ||(stmt=optional(ctx, &pos, parse_enum_def)) + ||(stmt=optional(ctx, &pos, parse_lang_def)) + ||(stmt=optional(ctx, &pos, parse_func_def)) + ||(stmt=optional(ctx, &pos, parse_convert_def)) + ||(stmt=optional(ctx, &pos, parse_use)) + ||(stmt=optional(ctx, &pos, parse_extern)) + ||(stmt=optional(ctx, &pos, parse_inline_c)) + ||(stmt=optional(ctx, &pos, parse_declaration))) + { + statements = new(ast_list_t, .ast=stmt, .next=statements); + pos = stmt->end; + whitespace(&pos); // TODO: check for newline + // if (!(space_types & WHITESPACE_NEWLINES)) { + // pos = stmt->end; + // break; + // } + } else { + if (get_indent(ctx, next) > indent && next < strchrnul(next, '\n')) + parser_err(ctx, next, strchrnul(next, '\n'), "I couldn't parse this namespace declaration"); + break; + } + } + REVERSE_LIST(statements); + return NewAST(ctx->file, start, pos, Block, .statements=statements); +} + +PARSER(parse_file_body) { + const char *start = pos; + whitespace(&pos); + ast_list_t *statements = NULL; + for (;;) { + const char *next = pos; + whitespace(&next); + if (get_indent(ctx, next) != 0) break; + ast_t *stmt; + if ((stmt=optional(ctx, &pos, parse_struct_def)) + ||(stmt=optional(ctx, &pos, parse_enum_def)) + ||(stmt=optional(ctx, &pos, parse_lang_def)) + ||(stmt=optional(ctx, &pos, parse_func_def)) + ||(stmt=optional(ctx, &pos, parse_convert_def)) + ||(stmt=optional(ctx, &pos, parse_use)) + ||(stmt=optional(ctx, &pos, parse_extern)) + ||(stmt=optional(ctx, &pos, parse_inline_c)) + ||(stmt=optional(ctx, &pos, parse_declaration))) + { + statements = new(ast_list_t, .ast=stmt, .next=statements); + pos = stmt->end; + whitespace(&pos); // TODO: check for newline + } else { + break; + } + } + whitespace(&pos); + if (pos < ctx->file->text + ctx->file->len && *pos != '\0') { + parser_err(ctx, pos, strchrnul(pos, '\n'), "I expect all top-level statements to be declarations of some kind"); + } + REVERSE_LIST(statements); + return NewAST(ctx->file, start, pos, Block, .statements=statements); +} + +PARSER(parse_struct_def) { + // struct Foo(...) [: \n body] + const char *start = pos; + if (!match_word(&pos, "struct")) return NULL; + + int64_t starting_indent = get_indent(ctx, pos); + + spaces(&pos); + const char *name = get_id(&pos); + if (!name) parser_err(ctx, start, pos, "I expected a name for this struct"); + spaces(&pos); + + if (!match(&pos, "(")) + parser_err(ctx, pos, pos, "I expected a '(' and a list of fields here"); + + arg_ast_t *fields = parse_args(ctx, &pos); + + whitespace(&pos); + bool secret = false, external = false, opaque = false; + if (match(&pos, ";")) { // Extra flags + whitespace(&pos); + for (;;) { + if (match_word(&pos, "secret")) { + secret = true; + } else if (match_word(&pos, "extern")) { + external = true; + } else if (match_word(&pos, "opaque")) { + if (fields) + parser_err(ctx, pos-strlen("opaque"), pos, "A struct can't be opaque if it has fields defined"); + opaque = true; + } else { + break; + } + + if (!match_separator(&pos)) + break; + } + } + + + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this struct"); + + ast_t *namespace = NULL; + if (match(&pos, ":")) { + const char *ns_pos = pos; + whitespace(&ns_pos); + int64_t ns_indent = get_indent(ctx, ns_pos); + if (ns_indent > starting_indent) { + pos = ns_pos; + namespace = optional(ctx, &pos, parse_namespace); + } + } + if (!namespace) + namespace = NewAST(ctx->file, pos, pos, Block, .statements=NULL); + return NewAST(ctx->file, start, pos, StructDef, .name=name, .fields=fields, .namespace=namespace, + .secret=secret, .external=external, .opaque=opaque); +} + +PARSER(parse_enum_def) { + // tagged union: enum Foo(a, b(x:Int,y:Int)=5, ...) [: \n namespace] + const char *start = pos; + if (!match_word(&pos, "enum")) return NULL; + int64_t starting_indent = get_indent(ctx, pos); + spaces(&pos); + const char *name = get_id(&pos); + if (!name) + parser_err(ctx, start, pos, "I expected a name for this enum"); + spaces(&pos); + if (!match(&pos, "(")) return NULL; + + tag_ast_t *tags = NULL; + whitespace(&pos); + for (;;) { + spaces(&pos); + const char *tag_name = get_id(&pos); + if (!tag_name) break; + + spaces(&pos); + arg_ast_t *fields; + bool secret = false; + if (match(&pos, "(")) { + whitespace(&pos); + fields = parse_args(ctx, &pos); + whitespace(&pos); + if (match(&pos, ";")) { // Extra flags + whitespace(&pos); + secret = match_word(&pos, "secret"); + whitespace(&pos); + } + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this tagged union member"); + } else { + fields = NULL; + } + + tags = new(tag_ast_t, .name=tag_name, .fields=fields, .secret=secret, .next=tags); + + if (!match_separator(&pos)) + break; + } + + whitespace(&pos); + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this enum definition"); + + REVERSE_LIST(tags); + + ast_t *namespace = NULL; + if (match(&pos, ":")) { + const char *ns_pos = pos; + whitespace(&ns_pos); + int64_t ns_indent = get_indent(ctx, ns_pos); + if (ns_indent > starting_indent) { + pos = ns_pos; + namespace = optional(ctx, &pos, parse_namespace); + } + } + if (!namespace) + namespace = NewAST(ctx->file, pos, pos, Block, .statements=NULL); + + return NewAST(ctx->file, start, pos, EnumDef, .name=name, .tags=tags, .namespace=namespace); +} + +PARSER(parse_lang_def) { + const char *start = pos; + // lang Name: [namespace...] + if (!match_word(&pos, "lang")) return NULL; + int64_t starting_indent = get_indent(ctx, pos); + spaces(&pos); + const char *name = get_id(&pos); + if (!name) + parser_err(ctx, start, pos, "I expected a name for this lang"); + spaces(&pos); + + ast_t *namespace = NULL; + if (match(&pos, ":")) { + const char *ns_pos = pos; + whitespace(&ns_pos); + int64_t ns_indent = get_indent(ctx, ns_pos); + if (ns_indent > starting_indent) { + pos = ns_pos; + namespace = optional(ctx, &pos, parse_namespace); + } + } + if (!namespace) + namespace = NewAST(ctx->file, pos, pos, Block, .statements=NULL); + + return NewAST(ctx->file, start, pos, LangDef, .name=name, .namespace=namespace); +} + +arg_ast_t *parse_args(parse_ctx_t *ctx, const char **pos) +{ + arg_ast_t *args = NULL; + for (;;) { + const char *batch_start = *pos; + ast_t *default_val = NULL; + type_ast_t *type = NULL; + + typedef struct name_list_s { + const char *name; + struct name_list_s *next; + } name_list_t; + + name_list_t *names = NULL; + for (;;) { + whitespace(pos); + const char *name = get_id(pos); + if (!name) break; + whitespace(pos); + if (strncmp(*pos, "==", 2) != 0 && match(pos, "=")) { + default_val = expect(ctx, *pos-1, pos, parse_term, "I expected a value after this '='"); + names = new(name_list_t, .name=name, .next=names); + break; + } else if (match(pos, ":")) { + type = expect(ctx, *pos-1, pos, parse_type, "I expected a type here"); + names = new(name_list_t, .name=name, .next=names); + break; + } else if (name) { + names = new(name_list_t, .name=name, .next=names); + spaces(pos); + if (!match(pos, ",")) break; + } else { + break; + } + } + if (!names) break; + if (!default_val && !type) + parser_err(ctx, batch_start, *pos, "I expected a ':' and type, or '=' and a default value after this parameter (%s)", + names->name); + + REVERSE_LIST(names); + for (; names; names = names->next) + args = new(arg_ast_t, .name=names->name, .type=type, .value=default_val, .next=args); + + if (!match_separator(pos)) + break; + } + + REVERSE_LIST(args); + return args; +} + +PARSER(parse_func_def) { + const char *start = pos; + if (!match_word(&pos, "func")) return NULL; + + ast_t *name = optional(ctx, &pos, parse_var); + if (!name) return NULL; + + spaces(&pos); + + if (!match(&pos, "(")) return NULL; + + arg_ast_t *args = parse_args(ctx, &pos); + spaces(&pos); + type_ast_t *ret_type = match(&pos, "->") ? optional(ctx, &pos, parse_type) : NULL; + whitespace(&pos); + bool is_inline = false; + ast_t *cache_ast = NULL; + for (bool specials = match(&pos, ";"); specials; specials = match_separator(&pos)) { + const char *flag_start = pos; + if (match_word(&pos, "inline")) { + is_inline = true; + } else if (match_word(&pos, "cached")) { + if (!cache_ast) cache_ast = NewAST(ctx->file, pos, pos, Int, .str="-1"); + } else if (match_word(&pos, "cache_size")) { + whitespace(&pos); + if (!match(&pos, "=")) + parser_err(ctx, flag_start, pos, "I expected a value for 'cache_size'"); + whitespace(&pos); + cache_ast = expect(ctx, start, &pos, parse_expr, "I expected a maximum size for the cache"); + } + } + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this function definition"); + + ast_t *body = expect(ctx, start, &pos, parse_block, + "This function needs a body block"); + return NewAST(ctx->file, start, pos, FunctionDef, + .name=name, .args=args, .ret_type=ret_type, .body=body, .cache=cache_ast, + .is_inline=is_inline); +} + +PARSER(parse_convert_def) { + const char *start = pos; + if (!match_word(&pos, "convert")) return NULL; + + spaces(&pos); + + if (!match(&pos, "(")) return NULL; + + arg_ast_t *args = parse_args(ctx, &pos); + spaces(&pos); + type_ast_t *ret_type = match(&pos, "->") ? optional(ctx, &pos, parse_type) : NULL; + whitespace(&pos); + bool is_inline = false; + ast_t *cache_ast = NULL; + for (bool specials = match(&pos, ";"); specials; specials = match_separator(&pos)) { + const char *flag_start = pos; + if (match_word(&pos, "inline")) { + is_inline = true; + } else if (match_word(&pos, "cached")) { + if (!cache_ast) cache_ast = NewAST(ctx->file, pos, pos, Int, .str="-1"); + } else if (match_word(&pos, "cache_size")) { + whitespace(&pos); + if (!match(&pos, "=")) + parser_err(ctx, flag_start, pos, "I expected a value for 'cache_size'"); + whitespace(&pos); + cache_ast = expect(ctx, start, &pos, parse_expr, "I expected a maximum size for the cache"); + } + } + expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this function definition"); + + ast_t *body = expect(ctx, start, &pos, parse_block, + "This function needs a body block"); + return NewAST(ctx->file, start, pos, ConvertDef, + .args=args, .ret_type=ret_type, .body=body, .cache=cache_ast, .is_inline=is_inline); +} + +PARSER(parse_extern) { + const char *start = pos; + if (!match_word(&pos, "extern")) return NULL; + spaces(&pos); + const char* name = get_id(&pos); + spaces(&pos); + if (!match(&pos, ":")) + parser_err(ctx, start, pos, "I couldn't get a type for this extern"); + type_ast_t *type = expect(ctx, start, &pos, parse_type, "I couldn't parse the type for this extern"); + return NewAST(ctx->file, start, pos, Extern, .name=name, .type=type); +} + +PARSER(parse_inline_c) { + const char *start = pos; + if (!match_word(&pos, "inline")) return NULL; + + spaces(&pos); + if (!match_word(&pos, "C")) return NULL; + + spaces(&pos); + type_ast_t *type = NULL; + if (match(&pos, ":")) + type = expect(ctx, start, &pos, parse_type, "I couldn't parse the type for this inline C code"); + + spaces(&pos); + if (!match(&pos, "{")) + parser_err(ctx, start, pos, "I expected a '{' here"); + + int depth = 1; + const char *c_code_start = pos; + for (; *pos && depth > 0; ++pos) { + if (*pos == '}') --depth; + else if (*pos == '{') ++depth; + } + + if (depth != 0) + parser_err(ctx, start, start+1, "I couldn't find the closing '}' for this inline C code"); + + CORD c_code = GC_strndup(c_code_start, (size_t)((pos-1) - c_code_start)); + return NewAST(ctx->file, start, pos, InlineCCode, .code=c_code, .type_ast=type); +} + +PARSER(parse_doctest) { + const char *start = pos; + if (!match(&pos, ">>")) return NULL; + spaces(&pos); + ast_t *expr = expect(ctx, start, &pos, parse_statement, "I couldn't parse the expression for this doctest"); + whitespace(&pos); + const char* output = NULL; + if (match(&pos, "=")) { + spaces(&pos); + const char *output_start = pos, + *output_end = pos + strcspn(pos, "\r\n"); + if (output_end <= output_start) + parser_err(ctx, output_start, output_end, "You're missing expected output here"); + int64_t trailing_spaces = 0; + while (output_end - trailing_spaces - 1 > output_start && (output_end[-trailing_spaces-1] == ' ' || output_end[-trailing_spaces-1] == '\t')) + ++trailing_spaces; + output = GC_strndup(output_start, (size_t)((output_end - output_start) - trailing_spaces)); + pos = output_end; + } else { + pos = expr->end; + } + return NewAST(ctx->file, start, pos, DocTest, .expr=expr, .output=output); +} + +PARSER(parse_say) { + const char *start = pos; + if (!match(&pos, "!!")) return NULL; + spaces(&pos); + + ast_list_t *chunks = NULL; + CORD chunk = CORD_EMPTY; + const char *chunk_start = pos; + const char open_interp = '$'; + while (pos < ctx->file->text + ctx->file->len) { + if (*pos == open_interp) { // Interpolation + const char *interp_start = pos; + if (chunk) { + ast_t *literal = NewAST(ctx->file, chunk_start, pos, TextLiteral, .cord=chunk); + chunks = new(ast_list_t, .ast=literal, .next=chunks); + chunk = NULL; + } + ++pos; + ast_t *interp; + if (*pos == ' ' || *pos == '\t') + parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here"); + interp = expect(ctx, interp_start, &pos, parse_term, "I expected an interpolation term here"); + chunks = new(ast_list_t, .ast=interp, .next=chunks); + chunk_start = pos; + } else if (*pos == '\r' || *pos == '\n') { // Newline + break; + } else { // Plain character + chunk = CORD_cat_char(chunk, *pos); + ++pos; + } + } + + if (chunk) { + ast_t *literal = NewAST(ctx->file, chunk_start, pos, TextLiteral, .cord=chunk); + chunks = new(ast_list_t, .ast=literal, .next=chunks); + chunk = NULL; + } + + REVERSE_LIST(chunks); + return NewAST(ctx->file, start, pos, PrintStatement, .to_print=chunks); +} + +PARSER(parse_use) { + const char *start = pos; + + ast_t *var = parse_var(ctx, pos); + if (var) { + pos = var->end; + spaces(&pos); + if (!match(&pos, ":=")) return NULL; + spaces(&pos); + } + + if (!match_word(&pos, "use")) return NULL; + spaces(&pos); + size_t name_len = strcspn(pos, " \t\r\n;"); + if (name_len < 1) + parser_err(ctx, start, pos, "There is no module name here to use"); + char *name = GC_strndup(pos, name_len); + pos += name_len; + while (match(&pos, ";")) continue; + int what; + if (name[0] == '<' || ends_with(name, ".h")) { + what = USE_HEADER; + } else if (ends_with(name, ".c")) { + what = USE_C_CODE; + } else if (ends_with(name, ".S") || ends_with(name, ".s")) { + what = USE_ASM; + } else if (starts_with(name, "./") || starts_with(name, "/") || starts_with(name, "../") || starts_with(name, "~/")) { + what = USE_LOCAL; + } else if (ends_with(name, ".so")) { + what = USE_SHARED_OBJECT; + } else { + what = USE_MODULE; + + // When `use`ing a URL, convert it to a hash: + Text_t text = Text$from_str(name); + Array_t m = Text$matches(text, Pattern("{url}")); + if (m.length >= 0) { + text = Text$trim(text, Pattern("http{0-1 s}://"), true, false); + FILE *shasum = popen(heap_strf("echo -n '%s' | sha256sum", Text$as_c_string(text)), "r"); + const size_t HASH_LEN = 32; + char *hash = GC_MALLOC_ATOMIC(HASH_LEN + 1); + size_t just_read = fread(hash, sizeof(char), HASH_LEN, shasum); + if (just_read < HASH_LEN) + errx(1, "Failed to get SHA sum for 'use': %s", name); + name = hash; + } + } + return NewAST(ctx->file, start, pos, Use, .var=var, .path=name, .what=what); +} + +ast_t *parse_file(const char *path, jmp_buf *on_err) { + if (path[0] != '<') { + const char *resolved = resolve_path(path, ".", "."); + if (!resolved) + errx(1, "Could not resolve path: %s", path); + path = resolved; + } + // NOTE: this cache leaks a bounded amount of memory. The cache will never + // hold more than PARSE_CACHE_SIZE entries (see below), but each entry's + // AST holds onto a reference to the file it came from, so they could + // potentially be somewhat large. + static Table_t cached = {}; + ast_t *ast = Table$str_get(cached, path); + if (ast) return ast; + + file_t *file; + if (path[0] == '<') { + const char *endbracket = strchr(path, '>'); + if (!endbracket) return NULL; + file = spoof_file(GC_strndup(path, (size_t)(endbracket + 1 - path)), endbracket + 1); + } else { + file = load_file(path); + if (!file) return NULL; + } + + parse_ctx_t ctx = { + .file=file, + .on_err=on_err, + }; + + const char *pos = file->text; + if (match(&pos, "#!")) // shebang + some_not(&pos, "\r\n"); + + whitespace(&pos); + ast = parse_file_body(&ctx, pos); + pos = ast->end; + whitespace(&pos); + if (pos < file->text + file->len && *pos != '\0') { + parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the file"); + } + + // If cache is getting too big, evict a random entry: + if (cached.entries.length > PARSE_CACHE_SIZE) { + uint32_t i = arc4random_uniform(cached.entries.length); + struct {const char *path; ast_t *ast; } *to_remove = Table$entry(cached, i+1); + Table$str_remove(&cached, to_remove->path); + } + + // Save the AST in the cache: + Table$str_set(&cached, path, ast); + return ast; +} + +type_ast_t *parse_type_str(const char *str) { + file_t *file = spoof_file("<type>", str); + parse_ctx_t ctx = { + .file=file, + .on_err=NULL, + }; + + const char *pos = file->text; + whitespace(&pos); + type_ast_t *ast = parse_type(&ctx, pos); + if (!ast) return ast; + pos = ast->end; + whitespace(&pos); + if (strlen(pos) > 0) { + parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the type"); + } + return ast; +} + +ast_t *parse(const char *str) { + file_t *file = spoof_file("<string>", str); + parse_ctx_t ctx = { + .file=file, + .on_err=NULL, + }; + + const char *pos = file->text; + whitespace(&pos); + ast_t *ast = parse_file_body(&ctx, pos); + pos = ast->end; + whitespace(&pos); + if (pos < file->text + file->len && *pos != '\0') + parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the file"); + return ast; +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/src/parse.h b/src/parse.h new file mode 100644 index 00000000..95b2ff95 --- /dev/null +++ b/src/parse.h @@ -0,0 +1,13 @@ +#pragma once + +// Parsing logic + +#include <setjmp.h> + +#include "ast.h" + +type_ast_t *parse_type_str(const char *str); +ast_t *parse_file(const char *path, jmp_buf *on_err); +ast_t *parse(const char *str); + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/src/repl.c b/src/repl.c new file mode 100644 index 00000000..39fe30fa --- /dev/null +++ b/src/repl.c @@ -0,0 +1,552 @@ +// A Read-Evaluate-Print-Loop +#include <dlfcn.h> +#include <setjmp.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "stdlib/tomo.h" +#include "stdlib/util.h" +#include "typecheck.h" +#include "parse.h" + +typedef union { + int8_t i8; + int16_t i16; + int32_t i32; + int64_t i64; + Int_t integer; + double n64; + float n32; +} number_t; + +static jmp_buf on_err; + +static void run(env_t *env, ast_t *ast); +static void eval(env_t *env, ast_t *ast, void *dest); + +typedef struct { + type_t *type; + void *value; +} repl_binding_t; + +static PUREFUNC repl_binding_t *get_repl_binding(env_t *env, const char *name) +{ + repl_binding_t *b = Table$str_get(*env->locals, name); + return b; +} + +void repl(void) +{ + env_t *env = global_env(); + void *dl = dlopen("libtomo.so", RTLD_LAZY); + if (!dl) errx(1, "I couldn't find libtomo.so in your library paths"); + + size_t buf_size = 0; + char *line = NULL; + ssize_t len = 0; + + if (setjmp(on_err)) + printf("\n"); + + printf("\x1b[33;1m>>\x1b[m "); + fflush(stdout); + + while ((len=getline(&line, &buf_size, stdin)) >= 0) { + if (len > 1) { + char *code = line; + if (starts_with(line, "if ") || starts_with(line, "for ") || starts_with(line, "while ") + || starts_with(line, "func ") || starts_with(line, "struct ") || starts_with(line, "lang ")) { + printf("\x1b[33;1m..\x1b[m "); + fflush(stdout); + code = GC_strdup(line); + while ((len=getline(&line, &buf_size, stdin)) >= 0) { + if (len == 1) break; + code = heap_strf("%s%s", code, line); + printf("\x1b[33;1m..\x1b[m "); + fflush(stdout); + } + } else { + code = heap_strf("func main(): >> %s", code); + } + ast_t *ast = parse_file(heap_strf("<code>%s", code), &on_err); + ast_t *doctest = Match(Match(Match(ast, Block)->statements->ast, FunctionDef)->body, Block)->statements->ast; + if (doctest->tag == DocTest) doctest->__data.DocTest.skip_source = 1; + run(env, doctest); + } + printf("\x1b[33;1m>>\x1b[m "); + fflush(stdout); + } + if (line) free(line); + printf("\n"); +} + +__attribute__((noreturn, format(printf, 2, 3))) +static void repl_err(ast_t *node, const char *fmt, ...) +{ + bool color = isatty(STDERR_FILENO) && !getenv("NO_COLOR"); + if (color) + fputs("\x1b[31;7;1m", stderr); + if (node) + fprintf(stderr, "%s:%ld.%ld: ", node->file->relative_filename, get_line_number(node->file, node->start), + get_line_column(node->file, node->start)); + va_list args; + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + if (color) + fputs(" \x1b[m", stderr); + fputs("\n\n", stderr); + if (node) + highlight_error(node->file, node->start, node->end, "\x1b[31;1m", 2, color); + + longjmp(on_err, 1); +} + +const TypeInfo_t *type_to_type_info(type_t *t) +{ + switch (t->tag) { + case AbortType: return &Abort$info; + case ReturnType: errx(1, "Shouldn't be getting a typeinfo for ReturnType"); + case VoidType: return &Void$info; + case MemoryType: return &Memory$info; + case BoolType: return &Bool$info; + case BigIntType: return &Int$info; + case IntType: + switch (Match(t, IntType)->bits) { + case TYPE_IBITS64: return &Int64$info; + case TYPE_IBITS32: return &Int32$info; + case TYPE_IBITS16: return &Int16$info; + case TYPE_IBITS8: return &Int8$info; + default: errx(1, "Invalid bits"); + } + case NumType: + switch (Match(t, NumType)->bits) { + case TYPE_NBITS64: return &Num$info; + case TYPE_NBITS32: return &Num32$info; + default: errx(1, "Invalid bits"); + } + case TextType: return &Text$info; + case ArrayType: { + const TypeInfo_t *item_info = type_to_type_info(Match(t, ArrayType)->item_type); + TypeInfo_t array_info = *Array$info(item_info); + return memcpy(GC_MALLOC(sizeof(TypeInfo_t)), &array_info, sizeof(TypeInfo_t)); + } + case TableType: { + const TypeInfo_t *key_info = type_to_type_info(Match(t, TableType)->key_type); + const TypeInfo_t *value_info = type_to_type_info(Match(t, TableType)->value_type); + const TypeInfo_t table_info = *Table$info(key_info, value_info); + return memcpy(GC_MALLOC(sizeof(TypeInfo_t)), &table_info, sizeof(TypeInfo_t)); + } + case PointerType: { + auto ptr = Match(t, PointerType); + CORD sigil = ptr->is_stack ? "&" : "@"; + const TypeInfo_t *pointed_info = type_to_type_info(ptr->pointed); + const TypeInfo_t pointer_info = *Pointer$info(sigil, pointed_info); + return memcpy(GC_MALLOC(sizeof(TypeInfo_t)), &pointer_info, sizeof(TypeInfo_t)); + } + default: errx(1, "Unsupported type: %T", t); + } +} + +static PUREFUNC void *get_address(env_t *env, ast_t *ast) +{ + switch (ast->tag) { + case Var: { + repl_binding_t *b = get_repl_binding(env, Match(ast, Var)->name); + if (!b) repl_err(ast, "No such variable"); + return b->value; + } + default: errx(1, "Address not implemented for %W", ast); + } +} + +static Int_t ast_to_int(env_t *env, ast_t *ast) +{ + type_t *t = get_type(env, ast); + switch (t->tag) { + case BigIntType: { + number_t num; + eval(env, ast, &num); + return num.integer; + } + case IntType: { + number_t num; + eval(env, ast, &num); + switch (Match(t, IntType)->bits) { + case TYPE_IBITS64: return Int$from_int64((int64_t)num.i64); + case TYPE_IBITS32: return Int$from_int32(num.i32); + case TYPE_IBITS16: return Int$from_int16(num.i16); + case TYPE_IBITS8: return Int$from_int8(num.i8); + default: errx(1, "Invalid int bits"); + } + } + default: repl_err(NULL, "Cannot convert to integer"); + } +} + +static double ast_to_num(env_t *env, ast_t *ast) +{ + type_t *t = get_type(env, ast); + switch (t->tag) { + case BigIntType: case IntType: { + number_t num; + eval(env, ast, &num); + if (t->tag == BigIntType) + return Num$from_int(num.integer, false); + switch (Match(t, IntType)->bits) { + case TYPE_IBITS64: return Num$from_int64(num.i64, false); + case TYPE_IBITS32: return Num$from_int32(num.i32); + case TYPE_IBITS16: return Num$from_int16(num.i16); + case TYPE_IBITS8: return Num$from_int8(num.i8); + default: errx(1, "Invalid int bits"); + } + } + case NumType: { + number_t num; + eval(env, ast, &num); + return Match(t, NumType)->bits == TYPE_NBITS32 ? (double)num.n32 : (double)num.n64; + } + default: repl_err(NULL, "Cannot convert to number"); + } +} + +static Text_t obj_to_text(type_t *t, const void *obj, bool use_color) +{ + const TypeInfo_t *info = type_to_type_info(t); + return generic_as_text(obj, use_color, info); +} + +void run(env_t *env, ast_t *ast) +{ + switch (ast->tag) { + case Declare: { + auto decl = Match(ast, Declare); + const char *name = Match(decl->var, Var)->name; + type_t *type = get_type(env, decl->value); + repl_binding_t *binding = new(repl_binding_t, .type=type, .value=GC_MALLOC(type_size(type))); + eval(env, decl->value, binding->value); + Table$str_set(env->locals, name, binding); + break; + } + case Assign: { + auto assign = Match(ast, Assign); + int64_t n = 0; + for (ast_list_t *t = assign->targets; t; t = t->next) + ++n; + for (ast_list_t *val = assign->values, *target = assign->targets; val && target; val = val->next, target = target->next) { + type_t *t_target = get_type(env, target->ast); + type_t *t_val = get_type(env, val->ast); + if (!type_eq(t_val, t_target)) + repl_err(target->ast, "This value has type %T but I expected a %T", t_val, t_target); + + if (!can_be_mutated(env, target->ast)) { + if (target->ast->tag == Index || target->ast->tag == FieldAccess) { + ast_t *subject = target->ast->tag == Index ? Match(target->ast, Index)->indexed : Match(target->ast, FieldAccess)->fielded; + repl_err(subject, "This is an immutable value, you can't assign to it"); + } else { + repl_err(target->ast, "This is a value of type %T and can't be assigned to", get_type(env, target->ast)); + } + } + } + for (ast_list_t *val = assign->values, *target = assign->targets; val && target; val = val->next, target = target->next) { + switch (target->ast->tag) { + case Var: { + void *dest = get_address(env, target->ast); + eval(env, val->ast, dest); + break; + } + // case Index: { + // auto index = Match(target->ast, Index); + // type_t *obj_t = get_type(env, index->indexed); + // TypeInfo_t *table_info = type_to_type_info(t); + // } + default: errx(1, "Assignment not implemented: %W", target->ast); + } + } + break; + } + case DocTest: { + auto doctest = Match(ast, DocTest); + type_t *t = get_type(env, doctest->expr); + size_t size = t ? type_size(t) : 0; + if (size == 0) { + run(env, doctest->expr); + } else { + void *value = GC_MALLOC(size); + eval(env, doctest->expr, value); + Text_t text = obj_to_text(t, value, true); + printf("= %k \x1b[2m: %T\x1b[m\n", &text, t); + fflush(stdout); + } + break; + } + case Block: { + auto block = Match(ast, Block); + for (ast_list_t *stmt = block->statements; stmt; stmt = stmt->next) { + run(env, stmt->ast); + } + break; + } + // UpdateAssign, + // FunctionDef, + // For, While, If, When, + // Skip, Stop, Pass, + // Return, + // Extern, + // StructDef, EnumDef, LangDef, + // DocTest, + // Use, + // LinkerDirective, + // InlineCCode, + case If: { + auto if_ = Match(ast, If); + bool condition; + type_t *cond_t = get_type(env, if_->condition); + assert(cond_t->tag == BoolType); + eval(env, if_->condition, &condition); + if (condition) { + run(env, if_->body); + } else if (if_->else_body) { + run(env, if_->else_body); + } + break; + } + case While: { + auto while_ = Match(ast, While); + bool condition; + type_t *cond_t = get_type(env, while_->condition); + assert(cond_t->tag == BoolType); + for (;;) { + eval(env, while_->condition, &condition); + if (!condition) break; + run(env, while_->body); + } + break; + } + // case For: { + // auto for_ = Match(ast, For); + // } + default: { + eval(env, ast, NULL); + break; + } + } +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstack-protector" +void eval(env_t *env, ast_t *ast, void *dest) +{ + type_t *t = get_type(env, ast); + size_t size = type_size(t); + switch (ast->tag) { + case None: + if (dest) *(void**)dest = 0; + break; + case Bool: + if (dest) *(bool*)dest = Match(ast, Bool)->b; + break; + case Var: { + if (!dest) return; + repl_binding_t *b = get_repl_binding(env, Match(ast, Var)->name); + if (!b) + repl_err(ast, "No such variable: %s", Match(ast, Var)->name); + memcpy(dest, b->value, size); + break; + } + case Int: { + if (!dest) return; + *(Int_t*)dest = Int$parse(Text$from_str(Match(ast, Int)->str)); break; + break; + } + case Num: { + if (!dest) return; + *(double*)dest = Match(ast, Num)->n; break; + break; + } + case TextLiteral: + if (dest) *(CORD*)dest = Match(ast, TextLiteral)->cord; + break; + case TextJoin: { + CORD ret = CORD_EMPTY; + for (ast_list_t *chunk = Match(ast, TextJoin)->children; chunk; chunk = chunk->next) { + type_t *chunk_t = get_type(env, chunk->ast); + if (chunk_t->tag == TextType) { + CORD c; + eval(env, chunk->ast, &c); + ret = CORD_cat(ret, c); + } else { + size_t chunk_size = type_size(chunk_t); + char buf[chunk_size]; + eval(env, chunk->ast, buf); + ret = CORD_cat(ret, Text$as_c_string(obj_to_text(chunk_t, buf, false))); + } + } + if (dest) *(CORD*)dest = ret; + break; + } + case BinaryOp: { + auto binop = Match(ast, BinaryOp); + if (t->tag == IntType || t->tag == BigIntType) { +#define CASE_OP(OP_NAME, method_name) case BINOP_##OP_NAME: {\ + Int_t lhs = ast_to_int(env, binop->lhs); \ + Int_t rhs = ast_to_int(env, binop->rhs); \ + Int_t result = Int$ ## method_name (lhs, rhs); \ + if (t->tag == BigIntType) {\ + *(Int_t*)dest = result; \ + return; \ + } \ + switch (Match(t, IntType)->bits) { \ + case 64: *(int64_t*)dest = Int64$from_int(result, false); return; \ + case 32: *(int32_t*)dest = Int32$from_int(result, false); return; \ + case 16: *(int16_t*)dest = Int16$from_int(result, false); return; \ + case 8: *(int8_t*)dest = Int8$from_int(result, false); return; \ + default: errx(1, "Invalid int bits"); \ + } \ + break; \ + } + switch (binop->op) { + CASE_OP(MULT, times) CASE_OP(DIVIDE, divided_by) CASE_OP(PLUS, plus) CASE_OP(MINUS, minus) + CASE_OP(RSHIFT, right_shifted) CASE_OP(LSHIFT, left_shifted) + CASE_OP(MOD, modulo) CASE_OP(MOD1, modulo1) + CASE_OP(AND, bit_and) CASE_OP(OR, bit_or) CASE_OP(XOR, bit_xor) + default: break; + } +#undef CASE_OP + } else if (t->tag == NumType) { +#define CASE_OP(OP_NAME, C_OP) case BINOP_##OP_NAME: {\ + double lhs = ast_to_num(env, binop->lhs); \ + double rhs = ast_to_num(env, binop->rhs); \ + if (Match(t, NumType)->bits == 64) \ + *(double*)dest = (double)(lhs C_OP rhs); \ + else \ + *(float*)dest = (float)(lhs C_OP rhs); \ + return; \ + } + switch (binop->op) { + CASE_OP(MULT, *) CASE_OP(DIVIDE, /) CASE_OP(PLUS, +) CASE_OP(MINUS, -) + default: break; + } +#undef CASE_OP + } + switch (binop->op) { + case BINOP_EQ: case BINOP_NE: case BINOP_LT: case BINOP_LE: case BINOP_GT: case BINOP_GE: { + type_t *t_lhs = get_type(env, binop->lhs); + if (!type_eq(t_lhs, get_type(env, binop->rhs))) + repl_err(ast, "Comparisons between different types aren't supported"); + const TypeInfo_t *info = type_to_type_info(t_lhs); + size_t value_size = type_size(t_lhs); + char lhs[value_size], rhs[value_size]; + eval(env, binop->lhs, lhs); + eval(env, binop->rhs, rhs); + int cmp = generic_compare(lhs, rhs, info); + switch (binop->op) { + case BINOP_EQ: *(bool*)dest = (cmp == 0); break; + case BINOP_NE: *(bool*)dest = (cmp != 0); break; + case BINOP_GT: *(bool*)dest = (cmp > 0); break; + case BINOP_GE: *(bool*)dest = (cmp >= 0); break; + case BINOP_LT: *(bool*)dest = (cmp < 0); break; + case BINOP_LE: *(bool*)dest = (cmp <= 0); break; + default: break; + } + break; + } + default: errx(1, "Binary op not implemented for %T: %W", t, ast); + } + break; + } + case Index: { + auto index = Match(ast, Index); + type_t *indexed_t = get_type(env, index->indexed); + // type_t *index_t = get_type(env, index->index); + switch (indexed_t->tag) { + case ArrayType: { + Array_t arr; + eval(env, index->indexed, &arr); + int64_t raw_index = Int64$from_int(ast_to_int(env, index->index), false); + int64_t index_int = raw_index; + if (index_int < 1) index_int = arr.length + index_int + 1; + if (index_int < 1 || index_int > arr.length) + repl_err(index->index, "%ld is an invalid index for an array with length %ld", + raw_index, arr.length); + size_t item_size = type_size(Match(indexed_t, ArrayType)->item_type); + memcpy(dest, arr.data + arr.stride*(index_int-1), item_size); + break; + } + case TableType: { + Table_t table; + eval(env, index->indexed, &table); + type_t *key_type = Match(indexed_t, TableType)->key_type; + size_t key_size = type_size(key_type); + char key_buf[key_size]; + eval(env, index->index, key_buf); + const TypeInfo_t *table_info = type_to_type_info(indexed_t); + memcpy(dest, Table$get(table, key_buf, table_info), key_size); + break; + } + case PointerType: { + auto ptr = Match(indexed_t, PointerType); + size_t pointed_size = type_size(ptr->pointed); + void *pointer; + eval(env, index->indexed, &pointer); + memcpy(dest, pointer, pointed_size); + break; + } + default: errx(1, "Indexing is not supported for %T", indexed_t); + } + break; + } + case Array: { + assert(t->tag == ArrayType); + Array_t arr = {}; + size_t item_size = type_size(Match(t, ArrayType)->item_type); + char item_buf[item_size]; + for (ast_list_t *item = Match(ast, Array)->items; item; item = item->next) { + eval(env, item->ast, item_buf); + Array$insert(&arr, item_buf, I(0), (int64_t)type_size(Match(t, ArrayType)->item_type)); + } + memcpy(dest, &arr, sizeof(Array_t)); + break; + } + case Table: { + assert(t->tag == TableType); + auto table_ast = Match(ast, Table); + Table_t table = {}; + size_t key_size = type_size(Match(t, TableType)->key_type); + size_t value_size = type_size(Match(t, TableType)->value_type); + char key_buf[key_size]; + char value_buf[value_size]; + const TypeInfo_t *table_info = type_to_type_info(t); + assert(table_info->tag == TableInfo); + for (ast_list_t *entry = table_ast->entries; entry; entry = entry->next) { + auto e = Match(entry->ast, TableEntry); + eval(env, e->key, key_buf); + eval(env, e->value, value_buf); + Table$set(&table, key_buf, value_buf, table_info); + } + if (table_ast->fallback) + eval(env, table_ast->fallback, &table.fallback); + memcpy(dest, &table, sizeof(Table_t)); + break; + } + case Block: { + auto block = Match(ast, Block); + for (ast_list_t *stmt = block->statements; stmt; stmt = stmt->next) { + if (stmt->next) + run(env, stmt->ast); + else + eval(env, stmt->ast, dest); + } + break; + } + default: + errx(1, "Eval not implemented for %W", ast); + } +} +#pragma GCC diagnostic pop + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/src/repl.h b/src/repl.h new file mode 100644 index 00000000..10ca0cc7 --- /dev/null +++ b/src/repl.h @@ -0,0 +1,5 @@ +#pragma once + +void repl(void); + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/src/structs.c b/src/structs.c new file mode 100644 index 00000000..c5f45c59 --- /dev/null +++ b/src/structs.c @@ -0,0 +1,77 @@ +// Logic for compiling new struct types defined in code +#include <ctype.h> +#include <gc/cord.h> +#include <gc.h> +#include <stdio.h> + +#include "ast.h" +#include "stdlib/text.h" +#include "compile.h" +#include "cordhelpers.h" +#include "environment.h" +#include "typecheck.h" +#include "stdlib/util.h" + +CORD compile_struct_typeinfo(env_t *env, type_t *t, const char *name, arg_ast_t *fields, bool is_secret, bool is_opaque) +{ + CORD typeinfo_name = CORD_all(namespace_prefix(env, env->namespace), name, "$$info"); + CORD type_code = Match(t, StructType)->external ? name : CORD_all("struct ", namespace_prefix(env, env->namespace), name, "$$struct"); + + int num_fields = 0; + for (arg_ast_t *f = fields; f; f = f->next) + num_fields += 1; + const char *short_name = name; + if (strchr(short_name, '$')) + short_name = strrchr(short_name, '$') + 1; + + const char *metamethods = is_packed_data(t) ? "PackedData$metamethods" : "Struct$metamethods"; + CORD typeinfo = CORD_asprintf("public const TypeInfo_t %r = {.size=sizeof(%r), .align=__alignof__(%r), .metamethods=%s, " + ".tag=StructInfo, .StructInfo.name=\"%s\"%s%s, " + ".StructInfo.num_fields=%ld", + typeinfo_name, type_code, type_code, metamethods, short_name, is_secret ? ", .StructInfo.is_secret=true" : "", + is_opaque ? ", .StructInfo.is_opaque=true" : "", + num_fields); + if (fields) { + typeinfo = CORD_asprintf("%r, .StructInfo.fields=(NamedType_t[%d]){", typeinfo, num_fields); + for (arg_ast_t *f = fields; f; f = f->next) { + type_t *field_type = get_arg_ast_type(env, f); + typeinfo = CORD_all(typeinfo, "{\"", f->name, "\", ", compile_type_info(field_type), "}"); + if (f->next) typeinfo = CORD_all(typeinfo, ", "); + } + typeinfo = CORD_all(typeinfo, "}"); + } + return CORD_all(typeinfo, "};\n"); +} + +CORD compile_struct_header(env_t *env, ast_t *ast) +{ + auto def = Match(ast, StructDef); + CORD typeinfo_name = CORD_all(namespace_prefix(env, env->namespace), def->name, "$$info"); + CORD type_code = def->external ? def->name : CORD_all("struct ", namespace_prefix(env, env->namespace), def->name, "$$struct"); + + CORD fields = CORD_EMPTY; + for (arg_ast_t *field = def->fields; field; field = field->next) { + type_t *field_t = get_arg_ast_type(env, field); + type_t *check_for_opaque = non_optional(field_t); + if (check_for_opaque->tag == StructType && Match(check_for_opaque, StructType)->opaque) { + if (field->type) + code_err(field->type, "This is an opaque type, so it can't be used as a struct field type"); + else if (field->value) + code_err(field->value, "This is an opaque type, so it can't be used as a struct field type"); + } + fields = CORD_all(fields, compile_declaration(field_t, field->name), field_t->tag == BoolType ? ":1" : CORD_EMPTY, ";\n"); + } + CORD struct_code = def->external ? CORD_EMPTY : CORD_all(type_code, " {\n", fields, "};\n"); + type_t *t = Table$str_get(*env->types, def->name); + + CORD unpadded_size = def->opaque ? CORD_all("sizeof(", type_code, ")") : CORD_asprintf("%zu", unpadded_struct_size(t)); + CORD typeinfo_code = CORD_all("extern const TypeInfo_t ", typeinfo_name, ";\n"); + CORD optional_code = CORD_EMPTY; + if (!def->opaque) { + optional_code = CORD_all("DEFINE_OPTIONAL_TYPE(", compile_type(t), ", ", unpadded_size, ",", + namespace_prefix(env, env->namespace), "$Optional", def->name, "$$type);\n"); + } + return CORD_all(struct_code, optional_code, typeinfo_code); +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/src/structs.h b/src/structs.h new file mode 100644 index 00000000..328972cd --- /dev/null +++ b/src/structs.h @@ -0,0 +1,13 @@ +#pragma once + +// Compilation of user-defined structs + +#include <gc/cord.h> + +#include "ast.h" +#include "environment.h" + +CORD compile_struct_typeinfo(env_t *env, type_t *t, const char *name, arg_ast_t *fields, bool is_secret, bool is_opaque); +CORD compile_struct_header(env_t *env, ast_t *ast); + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/src/tomo.c b/src/tomo.c new file mode 100644 index 00000000..e99f42d9 --- /dev/null +++ b/src/tomo.c @@ -0,0 +1,726 @@ +// The main program that runs compilation +#include <ctype.h> +#include <errno.h> +#include <gc.h> +#include <gc/cord.h> +#include <libgen.h> +#include <printf.h> +#include <spawn.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <sys/wait.h> + +#include "ast.h" +#include "compile.h" +#include "cordhelpers.h" +#include "parse.h" +#include "repl.h" +#include "stdlib/arrays.h" +#include "stdlib/bools.h" +#include "stdlib/bytes.h" +#include "stdlib/datatypes.h" +#include "stdlib/integers.h" +#include "stdlib/optionals.h" +#include "stdlib/patterns.h" +#include "stdlib/paths.h" +#include "stdlib/text.h" +#include "typecheck.h" +#include "types.h" + +#define run_cmd(...) ({ const char *_cmd = heap_strf(__VA_ARGS__); if (verbose) printf("\033[34;1m%s\033[m\n", _cmd); popen(_cmd, "w"); }) +#define array_str(arr) Text$as_c_string(Text$join(Text(" "), arr)) + +static struct stat compiler_stat; + +static const char *paths_str(Array_t paths) { + Text_t result = EMPTY_TEXT; + for (int64_t i = 0; i < paths.length; i++) { + if (i > 0) result = Texts(result, Text(" ")); + result = Texts(result, Path$as_text((Path_t*)(paths.data + i*paths.stride), false, &Path$info)); + } + return Text$as_c_string(result); +} + +static OptionalArray_t files = NONE_ARRAY, + args = NONE_ARRAY, + uninstall = NONE_ARRAY, + libraries = NONE_ARRAY; +static OptionalBool_t verbose = false, + quiet = false, + stop_at_transpile = false, + stop_at_obj_compilation = false, + compile_exe = false, + should_install = false, + run_repl = false, + clean_build = false; + +static OptionalText_t + show_codegen = NONE_TEXT, + cflags = Text("-Werror -fdollars-in-identifiers -std=gnu11 -Wno-trigraphs -fsanitize=signed-integer-overflow -fno-sanitize-recover" + " -fno-signed-zeros -fno-finite-math-only -fno-signaling-nans -fno-trapping-math" + " -D_XOPEN_SOURCE=700 -D_POSIX_C_SOURCE=200809L -D_DEFAULT_SOURCE -fPIC -ggdb" + " -DGC_THREADS" + " -I$HOME/.local/share/tomo/installed"), + ldlibs = Text("-lgc -lgmp -lm -ltomo"), + ldflags = Text("-Wl,-rpath='$ORIGIN',-rpath=$HOME/.local/share/tomo/lib -L. -L$HOME/.local/share/tomo/lib"), + optimization = Text("2"), + cc = Text("gcc"); + +static void transpile_header(env_t *base_env, Path_t path); +static void transpile_code(env_t *base_env, Path_t path); +static void compile_object_file(Path_t path); +static Path_t compile_executable(env_t *base_env, Path_t path, Path_t exe_path, Array_t object_files, Array_t extra_ldlibs); +static void build_file_dependency_graph(Path_t path, Table_t *to_compile, Table_t *to_link); +static Text_t escape_lib_name(Text_t lib_name); +static void build_library(Text_t lib_dir_name); +static void compile_files(env_t *env, Array_t files, Array_t *object_files, Array_t *ldlibs); +static bool is_stale(Path_t path, Path_t relative_to); + +typedef struct { + bool h:1, c:1, o:1; +} staleness_t; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstack-protector" +int main(int argc, char *argv[]) +{ + // Get the file modification time of the compiler, so we + // can recompile files after changing the compiler: + if (stat(argv[0], &compiler_stat) != 0) + err(1, "Could not find age of compiler"); + + if (register_printf_specifier('T', printf_type, printf_pointer_size)) + errx(1, "Couldn't set printf specifier"); + if (register_printf_specifier('W', printf_ast, printf_pointer_size)) + errx(1, "Couldn't set printf specifier"); + if (register_printf_specifier('k', printf_text, printf_text_size)) + errx(1, "Couldn't set printf specifier"); + + // Run a tool: + if ((streq(argv[1], "-r") || streq(argv[1], "--run")) && argc >= 3) { + if (strcspn(argv[2], "/;$") == strlen(argv[2])) { + const char *program = heap_strf("%s/.local/share/tomo/installed/%s/%s", getenv("HOME"), argv[2], argv[2]); + execv(program, &argv[2]); + } + errx(1, "This is not an installed tomo program: \033[31;1m%s\033[m", argv[2]); + } + + Text_t usage = Text("\x1b[33;4;1mUsage:\x1b[m\n" + "\x1b[1mRun a program:\x1b[m tomo file.tm [-- args...]\n" + "\x1b[1mTranspile files:\x1b[m tomo -t file.tm...\n" + "\x1b[1mCompile object files:\x1b[m tomo -c file.tm...\n" + "\x1b[1mCompile executables:\x1b[m tomo -e file.tm...\n" + "\x1b[1mBuild libraries:\x1b[m tomo -L lib...\n" + "\x1b[1mUninstall libraries:\x1b[m tomo -u lib...\n" + "\x1b[1mOther flags:\x1b[m\n" + " --verbose|-v: verbose output\n" + " --quiet|-q: quiet output\n" + " --install|-I: install the executable or library\n" + " --c-compiler <compiler>: the C compiler to use (default: cc)\n" + " --optimization|-O <level>: set optimization level\n" + " --run|-r: run a program from ~/.local/share/tomo/installed\n" + ); + Text_t help = Texts(Text("\x1b[1mtomo\x1b[m: a compiler for the Tomo programming language"), Text("\n\n"), usage); + tomo_parse_args( + argc, argv, usage, help, + {"files", true, Array$info(&Path$info), &files}, + {"args", true, Array$info(&Text$info), &args}, + {"verbose", false, &Bool$info, &verbose}, + {"v", false, &Bool$info, &verbose}, + {"quiet", false, &Bool$info, &quiet}, + {"q", false, &Bool$info, &quiet}, + {"transpile", false, &Bool$info, &stop_at_transpile}, + {"t", false, &Bool$info, &stop_at_transpile}, + {"compile-obj", false, &Bool$info, &stop_at_obj_compilation}, + {"c", false, &Bool$info, &stop_at_obj_compilation}, + {"compile-exe", false, &Bool$info, &compile_exe}, + {"e", false, &Bool$info, &compile_exe}, + {"uninstall", false, Array$info(&Text$info), &uninstall}, + {"u", false, Array$info(&Text$info), &uninstall}, + {"library", false, Array$info(&Path$info), &libraries}, + {"L", false, Array$info(&Path$info), &libraries}, + {"show-codegen", false, &Text$info, &show_codegen}, + {"C", false, &Text$info, &show_codegen}, + {"repl", false, &Bool$info, &run_repl}, + {"R", false, &Bool$info, &run_repl}, + {"install", false, &Bool$info, &should_install}, + {"I", false, &Bool$info, &should_install}, + {"c-compiler", false, &Text$info, &cc}, + {"optimization", false, &Text$info, &optimization}, + {"O", false, &Text$info, &optimization}, + {"force-rebuild", false, &Bool$info, &clean_build}, + {"f", false, &Bool$info, &clean_build}, + ); + + if (show_codegen.length > 0 && Text$equal_values(show_codegen, Text("pretty"))) + show_codegen = Text("sed '/^#line/d;/^$/d' | indent -o /dev/stdout | bat -l c -P"); + + for (int64_t i = 0; i < uninstall.length; i++) { + Text_t *u = (Text_t*)(uninstall.data + i*uninstall.stride); + system(heap_strf("rm -rvf ~/.local/share/tomo/installed/%k ~/.local/share/tomo/lib/lib%k.so", u, u)); + printf("Uninstalled %k\n", u); + } + + for (int64_t i = 0; i < libraries.length; i++) { + Path_t *lib = (Path_t*)(libraries.data + i*libraries.stride); + const char *lib_str = Path$as_c_string(*lib); + char *cwd = get_current_dir_name(); + if (chdir(lib_str) != 0) + errx(1, "Could not enter directory: %s", lib_str); + + char *libdir = get_current_dir_name(); + char *libdirname = basename(libdir); + build_library(Text$from_str(libdirname)); + free(libdir); + chdir(cwd); + free(cwd); + } + + // TODO: REPL + if (run_repl) { + repl(); + return 0; + } + + if (files.length <= 0 && (uninstall.length > 0 || libraries.length > 0)) { + return 0; + } + + // Convert `foo` to `foo/foo.tm` + for (int64_t i = 0; i < files.length; i++) { + Path_t *path = (Path_t*)(files.data + i*files.stride); + if (Path$is_directory(*path, true)) + *path = Path$with_component(*path, Texts(Path$base_name(*path), Text(".tm"))); + } + + if (files.length < 1) + errx(1, "No file specified!"); + else if (files.length != 1) + errx(1, "Too many files specified!"); + + quiet = !verbose; + + for (int64_t i = 0; i < files.length; i++) { + Path_t path = *(Path_t*)(files.data + i*files.stride); + Path_t exe_path = compile_exe ? Path$with_extension(path, Text(""), true) + : Path$write_unique_bytes(Path("/tmp/tomo-exe-XXXXXX"), (Array_t){}); + + pid_t child = fork(); + if (child == 0) { + env_t *env = global_env(); + Array_t object_files = {}, + extra_ldlibs = {}; + compile_files(env, files, &object_files, &extra_ldlibs); + compile_executable(env, path, exe_path, object_files, extra_ldlibs); + + if (compile_exe) + _exit(0); + + char *prog_args[1 + args.length + 1]; + prog_args[0] = (char*)Path$as_c_string(exe_path); + for (int64_t j = 0; j < args.length; j++) + prog_args[j + 1] = Text$as_c_string(*(Text_t*)(args.data + j*args.stride)); + prog_args[1 + args.length] = NULL; + execv(prog_args[0], prog_args); + err(1, "Could not execute program: %s", prog_args[0]); + } + + int status; + while (waitpid(child, &status, 0) < 0 && errno == EINTR) { + if (WIFEXITED(status) || WIFSIGNALED(status)) + break; + else if (WIFSTOPPED(status)) + kill(child, SIGCONT); + } + + if (!compile_exe) + Path$remove(exe_path, true); + + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { + _exit(WIFEXITED(status) ? WEXITSTATUS(status) : EXIT_FAILURE); + } + } + + if (compile_exe && should_install) { + for (int64_t i = 0; i < files.length; i++) { + Path_t path = *(Path_t*)(files.data + i*files.stride); + Path_t exe = Path$with_extension(path, Text(""), true); + system(heap_strf("cp -v '%s' ~/.local/bin/", Path$as_c_string(exe))); + } + } + return 0; +} +#pragma GCC diagnostic pop + +Text_t escape_lib_name(Text_t lib_name) +{ + return Text$replace(lib_name, Pattern("{1+ !alphanumeric}"), Text("_"), Pattern(""), false); +} + +static Path_t build_file(Path_t path, const char *extension) +{ + Path_t build_dir = Path$with_component(Path$parent(path), Text(".build")); + if (mkdir(Path$as_c_string(build_dir), 0777) != 0) { + if (errno != EEXIST) + err(1, "Could not make .build directory"); + } + return Path$with_component(build_dir, Texts(Path$base_name(path), Text$from_str(extension))); +} + +typedef struct { + env_t *env; + Table_t *used_imports; + FILE *output; +} libheader_info_t; + +static void _compile_statement_header_for_library(libheader_info_t *info, ast_t *ast) +{ + if (ast->tag == Declare && Match(ast, Declare)->value->tag == Use) + ast = Match(ast, Declare)->value; + + if (ast->tag == Use) { + auto use = Match(ast, Use); + if (use->what == USE_LOCAL) + return; + + Path_t path = Path$from_str(use->path); + if (!Table$get(*info->used_imports, &path, Table$info(&Path$info, &Path$info))) { + Table$set(info->used_imports, &path, ((Bool_t[1]){1}), Table$info(&Text$info, &Bool$info)); + CORD_put(compile_statement_type_header(info->env, ast), info->output); + CORD_put(compile_statement_namespace_header(info->env, ast), info->output); + } + } else { + CORD_put(compile_statement_type_header(info->env, ast), info->output); + CORD_put(compile_statement_namespace_header(info->env, ast), info->output); + } +} + +static void _make_typedefs_for_library(libheader_info_t *info, ast_t *ast) +{ + if (ast->tag == StructDef) { + auto def = Match(ast, StructDef); + CORD full_name = CORD_cat(namespace_prefix(info->env, info->env->namespace), def->name); + CORD_put(CORD_all("typedef struct ", full_name, "$$struct ", full_name, "$$type;\n"), info->output); + } else if (ast->tag == EnumDef) { + auto def = Match(ast, EnumDef); + CORD full_name = CORD_cat(namespace_prefix(info->env, info->env->namespace), def->name); + CORD_put(CORD_all("typedef struct ", full_name, "$$struct ", full_name, "$$type;\n"), info->output); + + for (tag_ast_t *tag = def->tags; tag; tag = tag->next) { + if (!tag->fields) continue; + CORD_put(CORD_all("typedef struct ", full_name, "$", tag->name, "$$struct ", full_name, "$", tag->name, "$$type;\n"), info->output); + } + } else if (ast->tag == LangDef) { + auto def = Match(ast, LangDef); + CORD_put(CORD_all("typedef Text_t ", namespace_prefix(info->env, info->env->namespace), def->name, "$$type;\n"), info->output); + } +} + +static void _compile_file_header_for_library(env_t *env, Path_t path, Table_t *visited_files, Table_t *used_imports, FILE *output) +{ + if (Table$get(*visited_files, &path, Table$info(&Path$info, &Bool$info))) + return; + + Table$set(visited_files, &path, ((Bool_t[1]){1}), Table$info(&Path$info, &Bool$info)); + + ast_t *file_ast = parse_file(Path$as_c_string(path), NULL); + if (!file_ast) errx(1, "Could not parse file %s", Path$as_c_string(path)); + env_t *module_env = load_module_env(env, file_ast); + + libheader_info_t info = { + .env=module_env, + .used_imports=used_imports, + .output=output, + }; + + // Visit files in topological order: + for (ast_list_t *stmt = Match(file_ast, Block)->statements; stmt; stmt = stmt->next) { + ast_t *ast = stmt->ast; + if (ast->tag == Declare) + ast = Match(ast, Declare)->value; + if (ast->tag != Use) continue; + + auto use = Match(ast, Use); + if (use->what == USE_LOCAL) { + Path_t resolved = Path$resolved(Path$from_str(use->path), Path("./")); + _compile_file_header_for_library(env, resolved, visited_files, used_imports, output); + } + } + + visit_topologically( + Match(file_ast, Block)->statements, (Closure_t){.fn=(void*)_make_typedefs_for_library, &info}); + visit_topologically( + Match(file_ast, Block)->statements, (Closure_t){.fn=(void*)_compile_statement_header_for_library, &info}); + + CORD_fprintf(output, "void %r$initialize(void);\n", namespace_prefix(module_env, module_env->namespace)); +} + +void build_library(Text_t lib_dir_name) +{ + Array_t tm_files = Path$glob(Path("./[!._0-9]*.tm")); + env_t *env = fresh_scope(global_env()); + Array_t object_files = {}, + extra_ldlibs = {}; + compile_files(env, tm_files, &object_files, &extra_ldlibs); + + // Library name replaces all stretchs of non-alphanumeric chars with an underscore + // So e.g. https://github.com/foo/baz --> https_github_com_foo_baz + env->libname = Text$as_c_string(escape_lib_name(lib_dir_name)); + + // Build a "whatever.h" header that loads all the headers: + FILE *header = fopen(heap_strf("%k.h", &lib_dir_name), "w"); + fputs("#pragma once\n", header); + fputs("#include <tomo/tomo.h>\n", header); + Table_t visited_files = {}; + Table_t used_imports = {}; + for (int64_t i = 0; i < tm_files.length; i++) { + Path_t f = *(Path_t*)(tm_files.data + i*tm_files.stride); + Path_t resolved = Path$resolved(f, Path(".")); + _compile_file_header_for_library(env, resolved, &visited_files, &used_imports, header); + } + if (fclose(header) == -1) + errx(1, "Failed to write header file: %k.h", &lib_dir_name); + + // Build up a list of symbol renamings: + unlink(".build/symbol_renames.txt"); + FILE *prog; + for (int64_t i = 0; i < tm_files.length; i++) { + Path_t f = *(Path_t*)(tm_files.data + i*tm_files.stride); + prog = run_cmd("nm -Ug -fjust-symbols '%s' | sed -n 's/_\\$\\(.*\\)/\\0 _$%s$\\1/p' >>.build/symbol_renames.txt", + Path$as_c_string(build_file(f, ".o")), CORD_to_const_char_star(env->libname)); + if (!prog) errx(1, "Could not find symbols!"); + int status = pclose(prog); + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) + errx(WEXITSTATUS(status), "Failed to create symbol rename table with `nm` and `sed`"); + } + + prog = run_cmd("%k -O%k %k %k %k %s -Wl,-soname='lib%k.so' -shared %s -o 'lib%k.so'", + &cc, &optimization, &cflags, &ldflags, &ldlibs, array_str(extra_ldlibs), &lib_dir_name, + paths_str(object_files), &lib_dir_name); + if (!prog) + errx(1, "Failed to run C compiler: %k", &cc); + int status = pclose(prog); + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) + exit(EXIT_FAILURE); + + if (!quiet) + printf("Compiled library:\tlib%k.so\n", &lib_dir_name); + + prog = run_cmd("objcopy --redefine-syms=.build/symbol_renames.txt 'lib%k.so'", &lib_dir_name); + status = pclose(prog); + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) + errx(WEXITSTATUS(status), "Failed to run `objcopy` to add library prefix to symbols"); + + prog = run_cmd("patchelf --rename-dynamic-symbols .build/symbol_renames.txt 'lib%k.so'", &lib_dir_name); + status = pclose(prog); + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) + errx(WEXITSTATUS(status), "Failed to run `patchelf` to rename dynamic symbols with library prefix"); + + if (verbose) + CORD_printf("Successfully renamed symbols with library prefix!\n"); + + // unlink(".build/symbol_renames.txt"); + + if (should_install) { + char *library_directory = get_current_dir_name(); + const char *dest = heap_strf("%s/.local/share/tomo/installed/%k", getenv("HOME"), &lib_dir_name); + if (!streq(library_directory, dest)) { + system(heap_strf("rm -rf '%s'", dest)); + system(heap_strf("mkdir -p '%s'", dest)); + system(heap_strf("cp -r * '%s/'", dest)); + } + system("mkdir -p ~/.local/share/tomo/lib/"); + system(heap_strf("ln -f -s ../installed/'%k'/lib'%k'.so ~/.local/share/tomo/lib/lib'%k'.so", + &lib_dir_name, &lib_dir_name, &lib_dir_name)); + printf("Installed \033[1m%k\033[m to ~/.local/share/tomo/installed\n", &lib_dir_name); + free(library_directory); + } +} + +void compile_files(env_t *env, Array_t to_compile, Array_t *object_files, Array_t *extra_ldlibs) +{ + Table_t to_link = {}; + Table_t dependency_files = {}; + for (int64_t i = 0; i < to_compile.length; i++) { + Path_t filename = *(Path_t*)(to_compile.data + i*to_compile.stride); + Text_t extension = Path$extension(filename, true); + if (!Text$equal_values(extension, Text("tm"))) + errx(1, "Not a valid .tm file: \x1b[31;1m%s\x1b[m", Path$as_c_string(filename)); + Path_t resolved = Path$resolved(filename, Path("./")); + if (!Path$is_file(resolved, true)) + errx(1, "Couldn't find file: %s", Path$as_c_string(resolved)); + build_file_dependency_graph(resolved, &dependency_files, &to_link); + } + + int status; + // (Re)compile header files, eagerly for explicitly passed in files, lazily + // for downstream dependencies: + for (int64_t i = 0; i < dependency_files.entries.length; i++) { + struct { + Path_t filename; + staleness_t staleness; + } *entry = (dependency_files.entries.data + i*dependency_files.entries.stride); + if (entry->staleness.h) { + transpile_header(env, entry->filename); + entry->staleness.o = true; + } + } + + env->imports = new(Table_t); + + struct child_s { + struct child_s *next; + pid_t pid; + } *child_processes = NULL; + + // (Re)transpile and compile object files, eagerly for files explicitly + // specified and lazily for downstream dependencies: + for (int64_t i = 0; i < dependency_files.entries.length; i++) { + struct { + Path_t filename; + staleness_t staleness; + } *entry = (dependency_files.entries.data + i*dependency_files.entries.stride); + if (!clean_build && !entry->staleness.c && !entry->staleness.h) + continue; + + pid_t pid = fork(); + if (pid == 0) { + transpile_code(env, entry->filename); + if (!stop_at_transpile) + compile_object_file(entry->filename); + _exit(EXIT_SUCCESS); + } + child_processes = new(struct child_s, .next=child_processes, .pid=pid); + } + + for (; child_processes; child_processes = child_processes->next) { + waitpid(child_processes->pid, &status, 0); + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) + exit(EXIT_FAILURE); + else if (WIFSTOPPED(status)) + kill(child_processes->pid, SIGCONT); + } + + if (object_files) { + for (int64_t i = 0; i < dependency_files.entries.length; i++) { + Path_t path = *(Path_t*)(dependency_files.entries.data + i*dependency_files.entries.stride); + path = build_file(path, ".o"); + Array$insert(object_files, &path, I(0), sizeof(Path_t)); + } + } + if (extra_ldlibs) { + for (int64_t i = 0; i < to_link.entries.length; i++) { + Text_t lib = *(Text_t*)(to_link.entries.data + i*to_link.entries.stride); + Array$insert(extra_ldlibs, &lib, I(0), sizeof(Text_t)); + } + } +} + +void build_file_dependency_graph(Path_t path, Table_t *to_compile, Table_t *to_link) +{ + if (Table$get(*to_compile, &path, Table$info(&Path$info, &Byte$info))) + return; + + staleness_t staleness = { + .h=is_stale(build_file(path, ".h"), path), + .c=is_stale(build_file(path, ".c"), path), + }; + staleness.o = staleness.c || staleness.h + || is_stale(build_file(path, ".o"), build_file(path, ".c")) + || is_stale(build_file(path, ".o"), build_file(path, ".h")); + Table$set(to_compile, &path, &staleness, Table$info(&Path$info, &Byte$info)); + + assert(Text$equal_values(Path$extension(path, true), Text("tm"))); + + ast_t *ast = parse_file(Path$as_c_string(path), NULL); + if (!ast) + errx(1, "Could not parse file %s", Path$as_c_string(path)); + + for (ast_list_t *stmt = Match(ast, Block)->statements; stmt; stmt = stmt->next) { + ast_t *stmt_ast = stmt->ast; + if (stmt_ast->tag == Declare) + stmt_ast = Match(stmt_ast, Declare)->value; + + if (stmt_ast->tag != Use) continue; + auto use = Match(stmt_ast, Use); + + switch (use->what) { + case USE_LOCAL: { + Path_t dep_tm = Path$resolved(Path$from_str(use->path), Path$parent(path)); + if (is_stale(build_file(path, ".h"), dep_tm)) + staleness.h = true; + if (is_stale(build_file(path, ".c"), dep_tm)) + staleness.c = true; + if (staleness.c || staleness.h) + staleness.o = true; + Table$set(to_compile, &path, &staleness, Table$info(&Path$info, &Byte$info)); + build_file_dependency_graph(dep_tm, to_compile, to_link); + break; + } + case USE_MODULE: { + Text_t lib = Text$format("'%s/.local/share/tomo/installed/%s/lib%s.so'", getenv("HOME"), use->path, use->path); + Table$set(to_link, &lib, ((Bool_t[1]){1}), Table$info(&Text$info, &Bool$info)); + + Array_t children = Path$glob(Path$from_str(heap_strf("%s/.local/share/tomo/installed/%s/*.tm", getenv("HOME"), use->path))); + for (int64_t i = 0; i < children.length; i++) { + Path_t *child = (Path_t*)(children.data + i*children.stride); + Table_t discarded = {.fallback=to_compile}; + build_file_dependency_graph(*child, &discarded, to_link); + } + break; + } + case USE_SHARED_OBJECT: { + Text_t lib = Text$format("-l:%s", use->path); + Table$set(to_link, &lib, ((Bool_t[1]){1}), Table$info(&Text$info, &Bool$info)); + break; + } + case USE_ASM: { + Text_t linker_text = Path$as_text(&use->path, false, &Path$info); + Table$set(to_link, &linker_text, ((Bool_t[1]){1}), Table$info(&Text$info, &Bool$info)); + break; + } + default: case USE_HEADER: break; + } + } +} + +bool is_stale(Path_t path, Path_t relative_to) +{ + struct stat target_stat; + if (stat(Path$as_c_string(path), &target_stat) != 0) + return true; + + // Any file older than the compiler is stale: + if (target_stat.st_mtime < compiler_stat.st_mtime) + return true; + + struct stat relative_to_stat; + if (stat(Path$as_c_string(relative_to), &relative_to_stat) != 0) + errx(1, "File doesn't exist: %s", Path$as_c_string(relative_to)); + return target_stat.st_mtime < relative_to_stat.st_mtime; +} + +void transpile_header(env_t *base_env, Path_t path) +{ + Path_t h_filename = build_file(path, ".h"); + ast_t *ast = parse_file(Path$as_c_string(path), NULL); + if (!ast) + errx(1, "Could not parse file %s", Path$as_c_string(path)); + + env_t *module_env = load_module_env(base_env, ast); + + CORD h_code = compile_file_header(module_env, ast); + + FILE *header = fopen(Path$as_c_string(h_filename), "w"); + if (!header) + errx(1, "Failed to open header file: %s", Path$as_c_string(h_filename)); + CORD_put(h_code, header); + if (fclose(header) == -1) + errx(1, "Failed to write header file: %s", Path$as_c_string(h_filename)); + + if (!quiet) + printf("Transpiled header:\t%s\n", Path$as_c_string(h_filename)); + + if (show_codegen.length > 0) + system(heap_strf("<%s %k", Path$as_c_string(h_filename), &show_codegen)); +} + +void transpile_code(env_t *base_env, Path_t path) +{ + Path_t c_filename = build_file(path, ".c"); + ast_t *ast = parse_file(Path$as_c_string(path), NULL); + if (!ast) + errx(1, "Could not parse file %s", Path$as_c_string(path)); + + env_t *module_env = load_module_env(base_env, ast); + + CORD c_code = compile_file(module_env, ast); + + FILE *c_file = fopen(Path$as_c_string(c_filename), "w"); + if (!c_file) + errx(1, "Failed to write C file: %s", Path$as_c_string(c_filename)); + + CORD_put(c_code, c_file); + + binding_t *main_binding = get_binding(module_env, "main"); + if (main_binding && main_binding->type->tag == FunctionType) { + type_t *ret = Match(main_binding->type, FunctionType)->ret; + if (ret->tag != VoidType && ret->tag != AbortType) + compiler_err(ast->file, ast->start, ast->end, "The main() function in this file has a return type of %T, but it should not have any return value!", ret); + + CORD_put(CORD_all( + "int ", main_binding->code, "$parse_and_run(int argc, char *argv[]) {\n" + "#line 1\n" + "tomo_init();\n", + "_$", module_env->namespace->name, "$$initialize();\n" + "\n", + compile_cli_arg_call(module_env, main_binding->code, main_binding->type), + "return 0;\n" + "}\n"), c_file); + } + + if (fclose(c_file) == -1) + errx(1, "Failed to output C code to %s", Path$as_c_string(c_filename)); + + if (!quiet) + printf("Transpiled code:\t%s\n", Path$as_c_string(c_filename)); + + if (show_codegen.length > 0) + system(heap_strf("<%s %k", Path$as_c_string(c_filename), &show_codegen)); +} + +void compile_object_file(Path_t path) +{ + Path_t obj_file = build_file(path, ".o"); + Path_t c_file = build_file(path, ".c"); + + FILE *prog = run_cmd("%k %k -O%k -c %s -o %s", + &cc, &cflags, &optimization, Path$as_c_string(c_file), Path$as_c_string(obj_file)); + if (!prog) + errx(1, "Failed to run C compiler: %k", &cc); + int status = pclose(prog); + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) + exit(EXIT_FAILURE); + + if (!quiet) + printf("Compiled object:\t%s\n", Path$as_c_string(obj_file)); +} + +Path_t compile_executable(env_t *base_env, Path_t path, Path_t exe_path, Array_t object_files, Array_t extra_ldlibs) +{ + ast_t *ast = parse_file(Path$as_c_string(path), NULL); + if (!ast) + errx(1, "Could not parse file %s", Path$as_c_string(path)); + env_t *env = load_module_env(base_env, ast); + binding_t *main_binding = get_binding(env, "main"); + if (!main_binding || main_binding->type->tag != FunctionType) + errx(1, "No main() function has been defined for %s, so it can't be run!", Path$as_c_string(path)); + + FILE *runner = run_cmd("%k %k -O%k %k %k %s %s -x c - -o %s", + &cc, &cflags, &optimization, &ldflags, &ldlibs, + array_str(extra_ldlibs), paths_str(object_files), Path$as_c_string(exe_path)); + CORD program = CORD_all( + "extern int ", main_binding->code, "$parse_and_run(int argc, char *argv[]);\n" + "int main(int argc, char *argv[]) {\n" + "\treturn ", main_binding->code, "$parse_and_run(argc, argv);\n" + "}\n" + ); + + if (show_codegen.length > 0) { + FILE *out = run_cmd("%k", &show_codegen); + CORD_put(program, out); + pclose(out); + } + + CORD_put(program, runner); + int status = pclose(runner); + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) + exit(EXIT_FAILURE); + + if (!quiet) + printf("Compiled executable:\t%s\n", Path$as_c_string(exe_path)); + return exe_path; +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/src/typecheck.c b/src/typecheck.c new file mode 100644 index 00000000..0d0690bb --- /dev/null +++ b/src/typecheck.c @@ -0,0 +1,1578 @@ +// Logic for getting a type from an AST node +#include <ctype.h> +#include <gc.h> +#include <glob.h> +#include <signal.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> + +#include "ast.h" +#include "cordhelpers.h" +#include "environment.h" +#include "parse.h" +#include "stdlib/patterns.h" +#include "stdlib/tables.h" +#include "stdlib/text.h" +#include "stdlib/util.h" +#include "typecheck.h" +#include "types.h" + +type_t *parse_type_ast(env_t *env, type_ast_t *ast) +{ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wswitch-default" + switch (ast->tag) { + case VarTypeAST: { + const char *name = Match(ast, VarTypeAST)->name; + type_t *t = Table$str_get(*env->types, name); + if (t) return t; + while (strchr(name, '.')) { + char *module_name = GC_strndup(name, strcspn(name, ".")); + binding_t *b = get_binding(env, module_name); + if (!b || b->type->tag != ModuleType) + code_err(ast, "I don't know a module with the name '%s'", module_name); + + env_t *imported = Table$str_get(*env->imports, Match(b->type, ModuleType)->name); + assert(imported); + env = imported; + name = strchr(name, '.') + 1; + t = Table$str_get(*env->types, name); + if (t) return t; + } + code_err(ast, "I don't know a type with the name '%s'", name); + } + case PointerTypeAST: { + auto ptr = Match(ast, PointerTypeAST); + type_t *pointed_t = parse_type_ast(env, ptr->pointed); + if (pointed_t->tag == VoidType) + code_err(ast, "Void pointers are not supported. You probably meant 'Memory' instead of 'Void'"); + return Type(PointerType, .pointed=pointed_t, .is_stack=ptr->is_stack); + } + case ArrayTypeAST: { + type_ast_t *item_type = Match(ast, ArrayTypeAST)->item; + type_t *item_t = parse_type_ast(env, item_type); + if (!item_t) code_err(item_type, "I can't figure out what this type is."); + if (has_stack_memory(item_t)) + code_err(item_type, "Arrays can't have stack references because the array may outlive the stack frame."); + if (type_size(item_t) > ARRAY_MAX_STRIDE) + code_err(ast, "This array holds items that take up %ld bytes, but the maximum supported size is %ld bytes. Consider using an array of pointers instead.", + type_size(item_t), ARRAY_MAX_STRIDE); + return Type(ArrayType, .item_type=item_t); + } + case SetTypeAST: { + type_ast_t *item_type = Match(ast, SetTypeAST)->item; + type_t *item_t = parse_type_ast(env, item_type); + if (!item_t) code_err(item_type, "I can't figure out what this type is."); + if (has_stack_memory(item_t)) + code_err(item_type, "Sets can't have stack references because the array may outlive the stack frame."); + if (type_size(item_t) > ARRAY_MAX_STRIDE) + code_err(ast, "This set holds items that take up %ld bytes, but the maximum supported size is %ld bytes. Consider using an set of pointers instead.", + type_size(item_t), ARRAY_MAX_STRIDE); + return Type(SetType, .item_type=item_t); + } + case TableTypeAST: { + auto table_type = Match(ast, TableTypeAST); + type_ast_t *key_type_ast = table_type->key; + type_t *key_type = parse_type_ast(env, key_type_ast); + if (!key_type) code_err(key_type_ast, "I can't figure out what type this is."); + if (has_stack_memory(key_type)) + code_err(key_type_ast, "Tables can't have stack references because the array may outlive the stack frame."); + if (table_type->value) { + type_t *val_type = parse_type_ast(env, table_type->value); + if (!val_type) code_err(table_type->value, "I can't figure out what type this is."); + if (has_stack_memory(val_type)) + code_err(table_type->value, "Tables can't have stack references because the array may outlive the stack frame."); + else if (val_type->tag == OptionalType) + code_err(ast, "Tables with optional-typed values are not currently supported"); + return Type(TableType, .key_type=key_type, .value_type=val_type, .env=env); + } else if (table_type->default_value) { + type_t *t = Type(TableType, .key_type=key_type, + .value_type=get_type(env, table_type->default_value), .default_value=table_type->default_value, + .env=env); + if (has_stack_memory(t)) + code_err(ast, "Tables can't have stack references because the array may outlive the stack frame."); + return t; + } else { + code_err(ast, "No value type or default value!"); + } + } + case FunctionTypeAST: { + auto fn = Match(ast, FunctionTypeAST); + type_t *ret_t = fn->ret ? parse_type_ast(env, fn->ret) : Type(VoidType); + if (has_stack_memory(ret_t)) + code_err(fn->ret, "Functions are not allowed to return stack references, because the reference may no longer exist on the stack."); + arg_t *type_args = NULL; + for (arg_ast_t *arg = fn->args; arg; arg = arg->next) { + type_args = new(arg_t, .name=arg->name, .next=type_args); + if (arg->type) { + type_args->type = parse_type_ast(env, arg->type); + } else { + type_args->default_val = arg->value; + type_args->type = get_type(env, arg->value); + } + } + REVERSE_LIST(type_args); + return Type(ClosureType, Type(FunctionType, .args=type_args, .ret=ret_t)); + } + case OptionalTypeAST: { + auto opt = Match(ast, OptionalTypeAST); + type_t *t = parse_type_ast(env, opt->type); + if (t->tag == VoidType || t->tag == AbortType || t->tag == ReturnType) + code_err(ast, "Optional %T types are not supported.", t); + else if (t->tag == OptionalType) + code_err(ast, "Nested optional types are not currently supported"); + return Type(OptionalType, .type=t); + } + case MutexedTypeAST: { + type_ast_t *mutexed = Match(ast, MutexedTypeAST)->type; + type_t *t = parse_type_ast(env, mutexed); + if (t->tag == VoidType || t->tag == AbortType || t->tag == ReturnType) + code_err(ast, "Mutexed %T types are not supported.", t); + return Type(MutexedType, .type=t); + } + case UnknownTypeAST: code_err(ast, "I don't know how to get this type"); + } +#pragma GCC diagnostic pop + errx(1, "Unreachable"); +} + +static PUREFUNC bool risks_zero_or_inf(ast_t *ast) +{ + switch (ast->tag) { + case Int: { + const char *str = Match(ast, Int)->str; + OptionalInt_t int_val = Int$from_str(str); + return (int_val.small == 0x1); // zero + } + case Num: { + return Match(ast, Num)->n == 0.0; + } + case BinaryOp: { + auto binop = Match(ast, BinaryOp); + if (binop->op == BINOP_MULT || binop->op == BINOP_DIVIDE || binop->op == BINOP_MIN || binop->op == BINOP_MAX) + return risks_zero_or_inf(binop->lhs) || risks_zero_or_inf(binop->rhs); + else + return true; + } + default: return true; + } +} + +PUREFUNC type_t *get_math_type(env_t *env, ast_t *ast, type_t *lhs_t, type_t *rhs_t) +{ + (void)env; + switch (compare_precision(lhs_t, rhs_t)) { + case NUM_PRECISION_EQUAL: case NUM_PRECISION_MORE: return lhs_t; + case NUM_PRECISION_LESS: return rhs_t; + default: code_err(ast, "Math operations between %T and %T are not supported", lhs_t, rhs_t); + } +} + +static env_t *load_module(env_t *env, ast_t *module_ast) +{ + auto use = Match(module_ast, Use); + switch (use->what) { + case USE_LOCAL: { + const char *resolved_path = resolve_path(use->path, module_ast->file->filename, module_ast->file->filename); + env_t *module_env = Table$str_get(*env->imports, resolved_path); + if (module_env) + return module_env; + + if (!resolved_path) + code_err(module_ast, "No such file exists: \"%s\"", use->path); + + ast_t *ast = parse_file(resolved_path, NULL); + if (!ast) errx(1, "Could not compile file %s", resolved_path); + return load_module_env(env, ast); + } + case USE_MODULE: { + glob_t tm_files; + if (glob(heap_strf("~/.local/share/tomo/installed/%s/[!._0-9]*.tm", use->path), GLOB_TILDE, NULL, &tm_files) != 0) + code_err(module_ast, "Could not find library"); + + env_t *module_env = fresh_scope(env); + Table$str_set(env->imports, use->path, module_env); + char *libname_id = Text$as_c_string( + Text$replace(Text$from_str(use->path), Pattern("{1+ !alphanumeric}"), Text("_"), Pattern(""), false)); + module_env->libname = libname_id; + for (size_t i = 0; i < tm_files.gl_pathc; i++) { + const char *filename = tm_files.gl_pathv[i]; + ast_t *ast = parse_file(filename, NULL); + if (!ast) errx(1, "Could not compile file %s", filename); + env_t *module_file_env = fresh_scope(module_env); + char *file_prefix = file_base_id(filename); + module_file_env->namespace = new(namespace_t, .name=file_prefix); + env_t *subenv = load_module_env(module_file_env, ast); + for (int64_t j = 0; j < subenv->locals->entries.length; j++) { + struct { + const char *name; binding_t *binding; + } *entry = subenv->locals->entries.data + j*subenv->locals->entries.stride; + Table$str_set(module_env->locals, entry->name, entry->binding); + } + } + globfree(&tm_files); + return module_env; + } + default: return NULL; + } +} + +void prebind_statement(env_t *env, ast_t *statement) +{ + switch (statement->tag) { + case DocTest: { + prebind_statement(env, Match(statement, DocTest)->expr); + break; + } + case StructDef: { + auto def = Match(statement, StructDef); + if (get_binding(env, def->name)) + code_err(statement, "A %T called '%s' has already been defined", get_binding(env, def->name)->type, def->name); + + env_t *ns_env = namespace_env(env, def->name); + type_t *type = Type(StructType, .name=def->name, .opaque=true, .external=def->external, .env=ns_env); // placeholder + Table$str_set(env->types, def->name, type); + set_binding(env, def->name, Type(TypeInfoType, .name=def->name, .type=type, .env=ns_env), + CORD_all(namespace_prefix(env, env->namespace), def->name, "$$info")); + for (ast_list_t *stmt = def->namespace ? Match(def->namespace, Block)->statements : NULL; stmt; stmt = stmt->next) + prebind_statement(ns_env, stmt->ast); + break; + } + case EnumDef: { + auto def = Match(statement, EnumDef); + if (get_binding(env, def->name)) + code_err(statement, "A %T called '%s' has already been defined", get_binding(env, def->name)->type, def->name); + + env_t *ns_env = namespace_env(env, def->name); + type_t *type = Type(EnumType, .name=def->name, .opaque=true, .env=ns_env); // placeholder + Table$str_set(env->types, def->name, type); + set_binding(env, def->name, Type(TypeInfoType, .name=def->name, .type=type, .env=ns_env), + CORD_all(namespace_prefix(env, env->namespace), def->name, "$$info")); + for (ast_list_t *stmt = def->namespace ? Match(def->namespace, Block)->statements : NULL; stmt; stmt = stmt->next) + prebind_statement(ns_env, stmt->ast); + break; + } + case LangDef: { + auto def = Match(statement, LangDef); + if (get_binding(env, def->name)) + code_err(statement, "A %T called '%s' has already been defined", get_binding(env, def->name)->type, def->name); + + env_t *ns_env = namespace_env(env, def->name); + type_t *type = Type(TextType, .lang=def->name, .env=ns_env); + Table$str_set(env->types, def->name, type); + set_binding(env, def->name, Type(TypeInfoType, .name=def->name, .type=type, .env=ns_env), + CORD_all(namespace_prefix(env, env->namespace), def->name, "$$info")); + for (ast_list_t *stmt = def->namespace ? Match(def->namespace, Block)->statements : NULL; stmt; stmt = stmt->next) + prebind_statement(ns_env, stmt->ast); + break; + } + default: break; + } +} + +void bind_statement(env_t *env, ast_t *statement) +{ + switch (statement->tag) { + case DocTest: { + bind_statement(env, Match(statement, DocTest)->expr); + break; + } + case Declare: { + auto decl = Match(statement, Declare); + const char *name = Match(decl->var, Var)->name; + if (streq(name, "_")) // Explicit discard + return; + if (get_binding(env, name)) + code_err(decl->var, "A %T called '%s' has already been defined", get_binding(env, name)->type, name); + bind_statement(env, decl->value); + type_t *type = get_type(env, decl->value); + if (!type) + code_err(decl->value, "I couldn't figure out the type of this value"); + if (type->tag == FunctionType) + type = Type(ClosureType, type); + CORD prefix = namespace_prefix(env, env->namespace); + CORD code = CORD_cat(prefix ? prefix : "$", name); + set_binding(env, name, type, code); + break; + } + case FunctionDef: { + auto def = Match(statement, FunctionDef); + const char *name = Match(def->name, Var)->name; + type_t *type = get_function_def_type(env, statement); + // binding_t *clobber = get_binding(env, name); + // if (clobber) + // code_err(def->name, "A %T called '%s' has already been defined", clobber->type, name); + CORD code = CORD_all(namespace_prefix(env, env->namespace), name); + set_binding(env, name, type, code); + break; + } + case ConvertDef: { + type_t *type = get_function_def_type(env, statement); + type_t *ret_t = Match(type, FunctionType)->ret; + const char *name = get_type_name(ret_t); + if (!name) + code_err(statement, "Conversions are only supported for text, struct, and enum types, not %T", ret_t); + + CORD code = CORD_asprintf("%r%r$%ld", namespace_prefix(env, env->namespace), name, + get_line_number(statement->file, statement->start)); + binding_t binding = {.type=type, .code=code}; + env_t *type_ns = get_namespace_by_type(env, ret_t); + Array$insert(&type_ns->namespace->constructors, &binding, I(0), sizeof(binding)); + break; + } + case StructDef: { + auto def = Match(statement, StructDef); + env_t *ns_env = namespace_env(env, def->name); + type_t *type = Table$str_get(*env->types, def->name); + if (!type) code_err(statement, "Couldn't find type!"); + assert(type); + if (!def->opaque) { + arg_t *fields = NULL; + for (arg_ast_t *field_ast = def->fields; field_ast; field_ast = field_ast->next) { + type_t *field_t = get_arg_ast_type(env, field_ast); + type_t *non_opt_field_t = field_t->tag == OptionalType ? Match(field_t, OptionalType)->type : field_t; + if ((non_opt_field_t->tag == StructType && Match(non_opt_field_t, StructType)->opaque) + || (non_opt_field_t->tag == EnumType && Match(non_opt_field_t, EnumType)->opaque)) { + + file_t *file = NULL; + const char *start = NULL, *end = NULL; + if (field_ast->type) { + file = field_ast->type->file, start = field_ast->type->start, end = field_ast->type->end; + } else if (field_ast->value) { + file = field_ast->value->file, start = field_ast->value->start, end = field_ast->value->end; + } + if (non_opt_field_t == type) + compiler_err(file, start, end, "This is a recursive struct that would be infinitely large. Maybe you meant to use an optional '@%T?' pointer instead?", type); + else if (non_opt_field_t->tag == StructType && Match(non_opt_field_t, StructType)->external) + compiler_err(file, start, end, "This is an opaque externally defined struct.\n" + "I can't use it as a member without knowing what its fields are.\n" + "Either specify its fields and remove the `opaque` qualifier, or use something like a @%T pointer.", non_opt_field_t); + else + compiler_err(file, start, end, "I'm still in the process of defining the fields of %T, so I don't know how to use it as a member." + "\nTry using a @%T pointer for this field.", + field_t, field_t); + } + fields = new(arg_t, .name=field_ast->name, .type=field_t, .default_val=field_ast->value, .next=fields); + } + REVERSE_LIST(fields); + type->__data.StructType.fields = fields; // populate placeholder + type->__data.StructType.opaque = false; + } + + for (ast_list_t *stmt = def->namespace ? Match(def->namespace, Block)->statements : NULL; stmt; stmt = stmt->next) + bind_statement(ns_env, stmt->ast); + break; + } + case EnumDef: { + auto def = Match(statement, EnumDef); + env_t *ns_env = namespace_env(env, def->name); + type_t *type = Table$str_get(*env->types, def->name); + assert(type); + tag_t *tags = NULL; + int64_t next_tag = 1; + for (tag_ast_t *tag_ast = def->tags; tag_ast; tag_ast = tag_ast->next) { + arg_t *fields = NULL; + for (arg_ast_t *field_ast = tag_ast->fields; field_ast; field_ast = field_ast->next) { + type_t *field_t = get_arg_ast_type(env, field_ast); + type_t *non_opt_field_t = field_t->tag == OptionalType ? Match(field_t, OptionalType)->type : field_t; + if ((non_opt_field_t->tag == StructType && Match(non_opt_field_t, StructType)->opaque) + || (non_opt_field_t->tag == EnumType && Match(non_opt_field_t, EnumType)->opaque)) { + file_t *file = NULL; + const char *start = NULL, *end = NULL; + if (field_ast->type) { + file = field_ast->type->file, start = field_ast->type->start, end = field_ast->type->end; + } else if (field_ast->value) { + file = field_ast->value->file, start = field_ast->value->start, end = field_ast->value->end; + } + if (non_opt_field_t == type) + compiler_err(file, start, end, "This is a recursive enum that would be infinitely large. Maybe you meant to use an optional '@%T?' pointer instead?", type); + else if (non_opt_field_t->tag == StructType && Match(non_opt_field_t, StructType)->external) + compiler_err(file, start, end, "This is an opaque externally defined struct.\n" + "I can't use it as a member without knowing what its fields are.\n" + "Either specify its fields and remove the `opaque` qualifier, or use something like a @%T pointer.", non_opt_field_t); + else + compiler_err(file, start, end, "I'm still in the process of defining the fields of %T, so I don't know how to use it as a member." + "\nTry using a @%T pointer for this field.", + field_t, field_t); + } + fields = new(arg_t, .name=field_ast->name, .type=field_t, .default_val=field_ast->value, .next=fields); + } + REVERSE_LIST(fields); + env_t *member_ns = namespace_env(env, heap_strf("%s$%s", def->name, tag_ast->name)); + type_t *tag_type = Type(StructType, .name=heap_strf("%s$%s", def->name, tag_ast->name), .fields=fields, .env=member_ns); + tags = new(tag_t, .name=tag_ast->name, .tag_value=(next_tag++), .type=tag_type, .next=tags); + } + REVERSE_LIST(tags); + type->__data.EnumType.tags = tags; + type->__data.EnumType.opaque = false; + + for (tag_t *tag = tags; tag; tag = tag->next) { + if (Match(tag->type, StructType)->fields) { // Constructor: + type_t *constructor_t = Type(FunctionType, .args=Match(tag->type, StructType)->fields, .ret=type); + set_binding(ns_env, tag->name, constructor_t, CORD_all(namespace_prefix(env, env->namespace), def->name, "$tagged$", tag->name)); + } else { // Empty singleton value: + CORD code = CORD_all("((", namespace_prefix(env, env->namespace), def->name, "$$type){", namespace_prefix(env, env->namespace), def->name, "$tag$", tag->name, "})"); + set_binding(ns_env, tag->name, type, code); + } + Table$str_set(env->types, heap_strf("%s$%s", def->name, tag->name), tag->type); + } + + for (ast_list_t *stmt = def->namespace ? Match(def->namespace, Block)->statements : NULL; stmt; stmt = stmt->next) { + bind_statement(ns_env, stmt->ast); + } + break; + } + case LangDef: { + auto def = Match(statement, LangDef); + env_t *ns_env = namespace_env(env, def->name); + type_t *type = Type(TextType, .lang=def->name, .env=ns_env); + Table$str_set(env->types, def->name, type); + + set_binding(ns_env, "from_text", + Type(FunctionType, .args=new(arg_t, .name="text", .type=TEXT_TYPE), .ret=type), + CORD_all("(", namespace_prefix(env, env->namespace), def->name, "$$type)")); + + for (ast_list_t *stmt = def->namespace ? Match(def->namespace, Block)->statements : NULL; stmt; stmt = stmt->next) + bind_statement(ns_env, stmt->ast); + break; + } + case Use: { + env_t *module_env = load_module(env, statement); + if (!module_env) break; + for (Table_t *bindings = module_env->locals; bindings != module_env->globals; bindings = bindings->fallback) { + for (int64_t i = 1; i <= Table$length(*bindings); i++) { + struct {const char *name; binding_t *binding; } *entry = Table$entry(*bindings, i); + if (entry->name[0] == '_' || streq(entry->name, "main")) + continue; + binding_t *b = Table$str_get(*env->locals, entry->name); + if (!b) + Table$str_set(env->locals, entry->name, entry->binding); + else if (b != entry->binding) + code_err(statement, "This module imports a symbol called '%s', which would clobber another variable", entry->name); + } + } + for (int64_t i = 1; i <= Table$length(*module_env->types); i++) { + struct {const char *name; type_t *type; } *entry = Table$entry(*module_env->types, i); + if (entry->name[0] == '_') + continue; + if (Table$str_get(*env->types, entry->name)) + continue; + + Table$str_set(env->types, entry->name, entry->type); + } + + ast_t *var = Match(statement, Use)->var; + if (var) { + type_t *type = get_type(env, statement); + assert(type); + set_binding(env, Match(var, Var)->name, type, CORD_EMPTY); + } + break; + } + case Extern: { + auto ext = Match(statement, Extern); + type_t *t = parse_type_ast(env, ext->type); + if (t->tag == ClosureType) + t = Match(t, ClosureType)->fn; + set_binding(env, ext->name, t, ext->name); + break; + } + default: break; + } +} + +type_t *get_function_def_type(env_t *env, ast_t *ast) +{ + arg_ast_t *arg_asts = ast->tag == FunctionDef ? Match(ast, FunctionDef)->args : Match(ast, ConvertDef)->args; + type_ast_t *ret_type = ast->tag == FunctionDef ? Match(ast, FunctionDef)->ret_type : Match(ast, ConvertDef)->ret_type; + arg_t *args = NULL; + env_t *scope = fresh_scope(env); + for (arg_ast_t *arg = arg_asts; arg; arg = arg->next) { + type_t *t = arg->type ? parse_type_ast(env, arg->type) : get_type(env, arg->value); + args = new(arg_t, .name=arg->name, .type=t, .default_val=arg->value, .next=args); + set_binding(scope, arg->name, t, CORD_EMPTY); + } + REVERSE_LIST(args); + + type_t *ret = ret_type ? parse_type_ast(scope, ret_type) : Type(VoidType); + if (has_stack_memory(ret)) + code_err(ast, "Functions can't return stack references because the reference may outlive its stack frame."); + return Type(FunctionType, .args=args, .ret=ret); +} + +type_t *get_method_type(env_t *env, ast_t *self, const char *name) +{ + binding_t *b = get_namespace_binding(env, self, name); + if (!b || !b->type) + code_err(self, "No such method: %T:%s(...)", get_type(env, self), name); + return b->type; +} + +env_t *when_clause_scope(env_t *env, type_t *subject_t, when_clause_t *clause) +{ + if (clause->pattern->tag == Var || subject_t->tag != EnumType) + return env; + + if (clause->pattern->tag != FunctionCall || Match(clause->pattern, FunctionCall)->fn->tag != Var) + code_err(clause->pattern, "I only support variables and constructors for pattern matching %T types in a 'when' block", subject_t); + + auto fn = Match(clause->pattern, FunctionCall); + const char *tag_name = Match(fn->fn, Var)->name; + type_t *tag_type = NULL; + tag_t * const tags = Match(subject_t, EnumType)->tags; + for (tag_t *tag = tags; tag; tag = tag->next) { + if (streq(tag->name, tag_name)) { + tag_type = tag->type; + break; + } + } + + if (!tag_type) + code_err(clause->pattern, "There is no tag '%s' for the type %T", tag_name, subject_t); + + if (!fn->args) + return env; + + env_t *scope = fresh_scope(env); + auto tag_struct = Match(tag_type, StructType); + if (fn->args && !fn->args->next && tag_struct->fields && tag_struct->fields->next) { + if (fn->args->value->tag != Var) + code_err(fn->args->value, "I expected a variable here"); + set_binding(scope, Match(fn->args->value, Var)->name, tag_type, CORD_EMPTY); + return scope; + } + + arg_t *field = tag_struct->fields; + for (arg_ast_t *var = fn->args; var || field; var = var ? var->next : var) { + if (!var) + code_err(clause->pattern, "The field %T.%s.%s wasn't accounted for", subject_t, tag_name, field->name); + if (!field) + code_err(var->value, "This is one more field than %T has", subject_t); + if (var->value->tag != Var) + code_err(var->value, "I expected this to be a plain variable so I could bind it to a value"); + if (!streq(Match(var->value, Var)->name, "_")) + set_binding(scope, Match(var->value, Var)->name, field->type, CORD_EMPTY); + field = field->next; + } + return scope; +} + +type_t *get_clause_type(env_t *env, type_t *subject_t, when_clause_t *clause) +{ + env_t *scope = when_clause_scope(env, subject_t, clause); + return get_type(scope, clause->body); +} + +type_t *get_type(env_t *env, ast_t *ast) +{ + if (!ast) return NULL; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wswitch-default" + switch (ast->tag) { + case None: { + if (!Match(ast, None)->type) + return Type(OptionalType, .type=NULL); + type_t *t = parse_type_ast(env, Match(ast, None)->type); + if (t->tag == OptionalType) + code_err(ast, "Nested optional types are not supported. This should be: `none:%T`", Match(t, OptionalType)->type); + return Type(OptionalType, .type=t); + } + case Bool: { + return Type(BoolType); + } + case Int: { + return Type(BigIntType); + } + case Num: { + return Type(NumType, .bits=TYPE_NBITS64); + } + case HeapAllocate: { + type_t *pointed = get_type(env, Match(ast, HeapAllocate)->value); + if (has_stack_memory(pointed)) + code_err(ast, "Stack references cannot be moved to the heap because they may outlive the stack frame they were created in."); + return Type(PointerType, .pointed=pointed); + } + case StackReference: { + // Supported: + // &variable + // &struct_variable.field.(...) + // &struct_ptr.field.(...) + // &[10, 20, 30]; &{key:value}; &{10, 20, 30} + // &Foo(...) + // &(expression) + // Not supported: + // &ptr[] + // &list[index] + // &table[key] + // &(expression).field + // &optional_struct_ptr.field + ast_t *value = Match(ast, StackReference)->value; + switch (value->tag) { + case FieldAccess: { + ast_t *base = value; + while (base->tag == FieldAccess) + base = Match(base, FieldAccess)->fielded; + + type_t *ref_type = get_type(env, value); + type_t *base_type = get_type(env, base); + if (base_type->tag == OptionalType) { + code_err(base, "This value might be null, so it can't be safely dereferenced"); + } else if (base_type->tag == PointerType) { + auto ptr = Match(base_type, PointerType); + return Type(PointerType, .pointed=ref_type, .is_stack=ptr->is_stack); + } else if (base->tag == Var) { + return Type(PointerType, .pointed=ref_type, .is_stack=true); + } + code_err(ast, "'&' stack references can only be used on the fields of pointers and local variables"); + } + case Index: + code_err(ast, "'&' stack references are not supported for array or table indexing"); + default: + return Type(PointerType, .pointed=get_type(env, value), .is_stack=true); + } + } + case Optional: { + ast_t *value = Match(ast, Optional)->value; + type_t *t = get_type(env, value); + if (t->tag == OptionalType) + code_err(ast, "This value is already optional, it can't be converted to optional"); + return Type(OptionalType, .type=t); + } + case NonOptional: { + ast_t *value = Match(ast, NonOptional)->value; + type_t *t = get_type(env, value); + if (t->tag != OptionalType) + code_err(value, "This value is not optional. Only optional values can use the '!' operator."); + return Match(t, OptionalType)->type; + } + case TextLiteral: return TEXT_TYPE; + case Path: return PATH_TYPE; + case TextJoin: { + const char *lang = Match(ast, TextJoin)->lang; + if (lang) { + binding_t *b = get_binding(env, lang); + if (!b || b->type->tag != TypeInfoType || Match(b->type, TypeInfoType)->type->tag != TextType) + code_err(ast, "There is no text language called '%s'", lang); + return Match(get_binding(env, lang)->type, TypeInfoType)->type; + } else { + return TEXT_TYPE; + } + } + case Var: { + auto var = Match(ast, Var); + binding_t *b = get_binding(env, var->name); + if (b) return b->type; + code_err(ast, "I don't know what \"%s\" refers to", var->name); + } + case Array: { + auto array = Match(ast, Array); + type_t *item_type = NULL; + if (array->item_type) { + item_type = parse_type_ast(env, array->item_type); + } else if (array->items) { + for (ast_list_t *item = array->items; item; item = item->next) { + ast_t *item_ast = item->ast; + env_t *scope = env; + while (item_ast->tag == Comprehension) { + auto comp = Match(item_ast, Comprehension); + scope = for_scope( + scope, FakeAST(For, .iter=comp->iter, .vars=comp->vars)); + item_ast = comp->expr; + } + type_t *t2 = get_type(scope, item_ast); + type_t *merged = item_type ? type_or_type(item_type, t2) : t2; + if (!merged) + code_err(item->ast, + "This array item has type %T, which is different from earlier array items which have type %T", + t2, item_type); + item_type = merged; + } + } else { + code_err(ast, "I can't figure out what type this array has because it has no members or explicit type"); + } + if (has_stack_memory(item_type)) + code_err(ast, "Arrays cannot hold stack references, because the array may outlive the stack frame the reference was created in."); + return Type(ArrayType, .item_type=item_type); + } + case Set: { + auto set = Match(ast, Set); + type_t *item_type = NULL; + if (set->item_type) { + item_type = parse_type_ast(env, set->item_type); + } else { + for (ast_list_t *item = set->items; item; item = item->next) { + ast_t *item_ast = item->ast; + env_t *scope = env; + while (item_ast->tag == Comprehension) { + auto comp = Match(item_ast, Comprehension); + scope = for_scope( + scope, FakeAST(For, .iter=comp->iter, .vars=comp->vars)); + item_ast = comp->expr; + } + + type_t *this_item_type = get_type(scope, item_ast); + type_t *item_merged = type_or_type(item_type, this_item_type); + if (!item_merged) + code_err(item_ast, + "This set item has type %T, which is different from earlier set items which have type %T", + this_item_type, item_type); + item_type = item_merged; + } + } + if (has_stack_memory(item_type)) + code_err(ast, "Sets cannot hold stack references because the set may outlive the reference's stack frame."); + return Type(SetType, .item_type=item_type); + } + case Table: { + auto table = Match(ast, Table); + type_t *key_type = NULL, *value_type = NULL; + if (table->key_type && table->value_type) { + key_type = parse_type_ast(env, table->key_type); + value_type = parse_type_ast(env, table->value_type); + } else if (table->key_type && table->default_value) { + key_type = parse_type_ast(env, table->key_type); + value_type = get_type(env, table->default_value); + } else { + for (ast_list_t *entry = table->entries; entry; entry = entry->next) { + ast_t *entry_ast = entry->ast; + env_t *scope = env; + while (entry_ast->tag == Comprehension) { + auto comp = Match(entry_ast, Comprehension); + scope = for_scope( + scope, FakeAST(For, .iter=comp->iter, .vars=comp->vars)); + entry_ast = comp->expr; + } + + auto e = Match(entry_ast, TableEntry); + type_t *key_t = get_type(scope, e->key); + type_t *value_t = get_type(scope, e->value); + + type_t *key_merged = key_type ? type_or_type(key_type, key_t) : key_t; + if (!key_merged) + code_err(entry->ast, + "This table entry has type %T, which is different from earlier table entries which have type %T", + key_t, key_type); + key_type = key_merged; + + type_t *val_merged = value_type ? type_or_type(value_type, value_t) : value_t; + if (!val_merged) + code_err(entry->ast, + "This table entry has type %T, which is different from earlier table entries which have type %T", + value_t, value_type); + value_type = val_merged; + } + } + if (has_stack_memory(key_type) || has_stack_memory(value_type)) + code_err(ast, "Tables cannot hold stack references because the table may outlive the reference's stack frame."); + return Type(TableType, .key_type=key_type, .value_type=value_type, .default_value=table->default_value, .env=env); + } + case TableEntry: { + code_err(ast, "Table entries should not be typechecked directly"); + } + case Comprehension: { + auto comp = Match(ast, Comprehension); + env_t *scope = for_scope(env, FakeAST(For, .iter=comp->iter, .vars=comp->vars)); + if (comp->expr->tag == Comprehension) { + return get_type(scope, comp->expr); + } else if (comp->expr->tag == TableEntry) { + auto e = Match(comp->expr, TableEntry); + return Type(TableType, .key_type=get_type(scope, e->key), .value_type=get_type(scope, e->value), .env=env); + } else { + return Type(ArrayType, .item_type=get_type(scope, comp->expr)); + } + } + case FieldAccess: { + auto access = Match(ast, FieldAccess); + type_t *fielded_t = get_type(env, access->fielded); + if (fielded_t->tag == ModuleType) { + const char *name = Match(fielded_t, ModuleType)->name; + env_t *module_env = Table$str_get(*env->imports, name); + if (!module_env) code_err(access->fielded, "I couldn't find the environment for the module %s", name); + return get_type(module_env, WrapAST(ast, Var, access->field)); + } else if (fielded_t->tag == TypeInfoType) { + auto info = Match(fielded_t, TypeInfoType); + assert(info->env); + binding_t *b = get_binding(info->env, access->field); + if (!b) code_err(ast, "I couldn't find the field '%s' on this type", access->field); + return b->type; + } + type_t *field_t = get_field_type(fielded_t, access->field); + if (!field_t) + code_err(ast, "%T objects don't have a field called '%s'", fielded_t, access->field); + return field_t; + } + case Index: { + auto indexing = Match(ast, Index); + type_t *indexed_t = get_type(env, indexing->indexed); + if (indexed_t->tag == OptionalType && !indexing->index) + code_err(ast, "You're attempting to dereference a value whose type indicates it could be null"); + + if (indexed_t->tag == PointerType && !indexing->index) + return Match(indexed_t, PointerType)->pointed; + + type_t *value_t = value_type(indexed_t); + if (value_t->tag == ArrayType) { + if (!indexing->index) return indexed_t; + type_t *index_t = get_type(env, indexing->index); + if (index_t->tag == IntType || index_t->tag == BigIntType || index_t->tag == ByteType) + return Match(value_t, ArrayType)->item_type; + code_err(indexing->index, "I only know how to index lists using integers, not %T", index_t); + } else if (value_t->tag == TableType) { + auto table_type = Match(value_t, TableType); + if (table_type->default_value) + return get_type(env, table_type->default_value); + else if (table_type->value_type) + return Type(OptionalType, table_type->value_type); + else + code_err(indexing->indexed, "This type doesn't have a value type or a default value"); + } else if (value_t->tag == TextType) { + return value_t; + } else { + code_err(ast, "I don't know how to index %T values", indexed_t); + } + } + case FunctionCall: { + auto call = Match(ast, FunctionCall); + type_t *fn_type_t = get_type(env, call->fn); + if (!fn_type_t) + code_err(call->fn, "I couldn't find this function"); + + if (fn_type_t->tag == TypeInfoType) { + type_t *t = Match(fn_type_t, TypeInfoType)->type; + + binding_t *constructor = get_constructor(env, t, call->args); + if (constructor) + return t; + else if (t->tag == StructType || t->tag == IntType || t->tag == BigIntType || t->tag == NumType + || t->tag == ByteType || t->tag == TextType || t->tag == CStringType || t->tag == MomentType) + return t; // Constructor + code_err(call->fn, "This is not a type that has a constructor"); + } + if (fn_type_t->tag == ClosureType) + fn_type_t = Match(fn_type_t, ClosureType)->fn; + if (fn_type_t->tag != FunctionType) + code_err(call->fn, "This isn't a function, it's a %T", fn_type_t); + auto fn_type = Match(fn_type_t, FunctionType); + return fn_type->ret; + } + case MethodCall: { + auto call = Match(ast, MethodCall); + + if (streq(call->name, "serialized")) // Data serialization + return Type(ArrayType, Type(ByteType)); + + type_t *self_value_t = value_type(get_type(env, call->self)); + switch (self_value_t->tag) { + case ArrayType: { + type_t *item_type = Match(self_value_t, ArrayType)->item_type; + if (streq(call->name, "binary_search")) return INT_TYPE; + else if (streq(call->name, "by")) return self_value_t; + else if (streq(call->name, "clear")) return Type(VoidType); + else if (streq(call->name, "counts")) return Type(TableType, .key_type=item_type, .value_type=INT_TYPE); + else if (streq(call->name, "find")) return Type(OptionalType, .type=INT_TYPE); + else if (streq(call->name, "first")) return Type(OptionalType, .type=INT_TYPE); + else if (streq(call->name, "from")) return self_value_t; + else if (streq(call->name, "has")) return Type(BoolType); + else if (streq(call->name, "heap_pop")) return Type(OptionalType, .type=item_type); + else if (streq(call->name, "heap_push")) return Type(VoidType); + else if (streq(call->name, "heapify")) return Type(VoidType); + else if (streq(call->name, "insert")) return Type(VoidType); + else if (streq(call->name, "insert_all")) return Type(VoidType); + else if (streq(call->name, "pop")) return Type(OptionalType, .type=item_type); + else if (streq(call->name, "random")) return item_type; + else if (streq(call->name, "remove_at")) return Type(VoidType); + else if (streq(call->name, "remove_item")) return Type(VoidType); + else if (streq(call->name, "reversed")) return self_value_t; + else if (streq(call->name, "sample")) return self_value_t; + else if (streq(call->name, "shuffle")) return Type(VoidType); + else if (streq(call->name, "shuffled")) return self_value_t; + else if (streq(call->name, "slice")) return self_value_t; + else if (streq(call->name, "sort")) return Type(VoidType); + else if (streq(call->name, "sorted")) return self_value_t; + else if (streq(call->name, "to")) return self_value_t; + else if (streq(call->name, "unique")) return Type(SetType, .item_type=item_type); + else code_err(ast, "There is no '%s' method for arrays", call->name); + } + case SetType: { + if (streq(call->name, "add")) return Type(VoidType); + else if (streq(call->name, "add_all")) return Type(VoidType); + else if (streq(call->name, "clear")) return Type(VoidType); + else if (streq(call->name, "has")) return Type(BoolType); + else if (streq(call->name, "is_subset_of")) return Type(BoolType); + else if (streq(call->name, "is_superset_of")) return Type(BoolType); + else if (streq(call->name, "overlap")) return self_value_t; + else if (streq(call->name, "remove")) return Type(VoidType); + else if (streq(call->name, "remove_all")) return Type(VoidType); + else if (streq(call->name, "with")) return self_value_t; + else if (streq(call->name, "without")) return self_value_t; + else code_err(ast, "There is no '%s' method for sets", call->name); + } + case TableType: { + auto table = Match(self_value_t, TableType); + if (streq(call->name, "bump")) return Type(VoidType); + else if (streq(call->name, "clear")) return Type(VoidType); + else if (streq(call->name, "get")) return Type(OptionalType, .type=table->value_type); + else if (streq(call->name, "has")) return Type(BoolType); + else if (streq(call->name, "remove")) return Type(VoidType); + else if (streq(call->name, "set")) return Type(VoidType); + else if (streq(call->name, "sorted")) return self_value_t; + code_err(ast, "There is no '%s' method for %T tables", call->name, self_value_t); + } + default: { + type_t *fn_type_t = get_method_type(env, call->self, call->name); + if (!fn_type_t) + code_err(ast, "No such method!"); + if (fn_type_t->tag != FunctionType) + code_err(ast, "This isn't a method, it's a %T", fn_type_t); + auto fn_type = Match(fn_type_t, FunctionType); + return fn_type->ret; + } + } + } + case Block: { + auto block = Match(ast, Block); + ast_list_t *last = block->statements; + if (!last) + return Type(VoidType); + while (last->next) + last = last->next; + + // Early out if the type is knowable without any context from the block: + switch (last->ast->tag) { + case UpdateAssign: case Assign: case Declare: case FunctionDef: case ConvertDef: case StructDef: case EnumDef: case LangDef: + return Type(VoidType); + default: break; + } + + env_t *block_env = fresh_scope(env); + for (ast_list_t *stmt = block->statements; stmt; stmt = stmt->next) { + prebind_statement(block_env, stmt->ast); + } + for (ast_list_t *stmt = block->statements; stmt; stmt = stmt->next) { + bind_statement(block_env, stmt->ast); + if (stmt->next) { // Check for unreachable code: + if (stmt->ast->tag == Return) + code_err(stmt->ast, "This statement will always return, so the rest of the code in this block is unreachable!"); + type_t *statement_type = get_type(block_env, stmt->ast); + if (statement_type && statement_type->tag == AbortType && stmt->next) + code_err(stmt->ast, "This statement will always abort, so the rest of the code in this block is unreachable!"); + } + } + return get_type(block_env, last->ast); + } + case Extern: { + return parse_type_ast(env, Match(ast, Extern)->type); + } + case Declare: case Assign: case DocTest: { + return Type(VoidType); + } + case Use: { + switch (Match(ast, Use)->what) { + case USE_LOCAL: + return Type(ModuleType, resolve_path(Match(ast, Use)->path, ast->file->filename, ast->file->filename)); + default: + return Type(ModuleType, Match(ast, Use)->path); + } + } + case Return: { + ast_t *val = Match(ast, Return)->value; + if (env->fn_ret) + env = with_enum_scope(env, env->fn_ret); + return Type(ReturnType, .ret=(val ? get_type(env, val) : Type(VoidType))); + } + case Stop: case Skip: { + return Type(AbortType); + } + case Pass: case Defer: case PrintStatement: return Type(VoidType); + case Negative: { + ast_t *value = Match(ast, Negative)->value; + type_t *t = get_type(env, value); + if (t->tag == IntType || t->tag == NumType) + return t; + + binding_t *b = get_namespace_binding(env, value, "negative"); + if (b && b->type->tag == FunctionType) { + auto fn = Match(b->type, FunctionType); + if (fn->args && type_eq(t, get_arg_type(env, fn->args)) && type_eq(t, fn->ret)) + return t; + } + + code_err(ast, "I don't know how to get the negative value of type %T", t); + } + case Not: { + type_t *t = get_type(env, Match(ast, Not)->value); + if (t->tag == IntType || t->tag == NumType || t->tag == BoolType) + return t; + if (t->tag == OptionalType) + return Type(BoolType); + + ast_t *value = Match(ast, Not)->value; + binding_t *b = get_namespace_binding(env, value, "negated"); + if (b && b->type->tag == FunctionType) { + auto fn = Match(b->type, FunctionType); + if (fn->args && type_eq(t, get_arg_type(env, fn->args)) && type_eq(t, fn->ret)) + return t; + } + code_err(ast, "I only know how to get 'not' of boolean, numeric, and optional pointer types, not %T", t); + } + case Mutexed: { + type_t *item_type = get_type(env, Match(ast, Mutexed)->value); + return Type(MutexedType, .type=item_type); + } + case Holding: { + ast_t *held = Match(ast, Holding)->mutexed; + type_t *held_type = get_type(env, held); + if (held_type->tag != MutexedType) + code_err(held, "This is a %t, not a mutexed value", held_type); + if (held->tag == Var) { + env = fresh_scope(env); + set_binding(env, Match(held, Var)->name, Type(PointerType, .pointed=Match(held_type, MutexedType)->type, .is_stack=true), CORD_EMPTY); + } + return get_type(env, Match(ast, Holding)->body); + } + case BinaryOp: { + auto binop = Match(ast, BinaryOp); + type_t *lhs_t = get_type(env, binop->lhs), + *rhs_t = get_type(env, binop->rhs); + + if (lhs_t->tag == BigIntType && rhs_t->tag != BigIntType && is_numeric_type(rhs_t) && binop->lhs->tag == Int) { + lhs_t = rhs_t; + } else if (rhs_t->tag == BigIntType && lhs_t->tag != BigIntType && is_numeric_type(lhs_t) && binop->rhs->tag == Int) { + + rhs_t = lhs_t; + } + +#define binding_works(name, self, lhs_t, rhs_t, ret_t) \ + ({ binding_t *b = get_namespace_binding(env, self, name); \ + (b && b->type->tag == FunctionType && ({ auto fn = Match(b->type, FunctionType); \ + (type_eq(fn->ret, ret_t) \ + && (fn->args && type_eq(fn->args->type, lhs_t)) \ + && (fn->args->next && can_promote(rhs_t, fn->args->next->type))); })); }) + // Check for a binop method like plus() etc: + switch (binop->op) { + case BINOP_MULT: { + if (is_numeric_type(lhs_t) && binding_works("scaled_by", binop->rhs, rhs_t, lhs_t, rhs_t)) + return rhs_t; + else if (is_numeric_type(rhs_t) && binding_works("scaled_by", binop->lhs, lhs_t, rhs_t, lhs_t)) + return lhs_t; + else if (type_eq(lhs_t, rhs_t) && binding_works(binop_method_names[binop->op], binop->lhs, lhs_t, rhs_t, lhs_t)) + return lhs_t; + break; + } + case BINOP_PLUS: case BINOP_MINUS: case BINOP_AND: case BINOP_OR: case BINOP_XOR: case BINOP_CONCAT: { + if (type_eq(lhs_t, rhs_t) && binding_works(binop_method_names[binop->op], binop->lhs, lhs_t, rhs_t, lhs_t)) + return lhs_t; + break; + } + case BINOP_DIVIDE: case BINOP_MOD: case BINOP_MOD1: { + if (is_numeric_type(rhs_t) && binding_works(binop_method_names[binop->op], binop->lhs, lhs_t, rhs_t, lhs_t)) + return lhs_t; + break; + } + case BINOP_LSHIFT: case BINOP_RSHIFT: case BINOP_ULSHIFT: case BINOP_URSHIFT: { + return lhs_t; + } + case BINOP_POWER: { + if (is_numeric_type(rhs_t) && binding_works(binop_method_names[binop->op], binop->lhs, lhs_t, rhs_t, lhs_t)) + return lhs_t; + break; + } + default: break; + } +#undef binding_works + + switch (binop->op) { + case BINOP_AND: { + if (lhs_t->tag == BoolType && rhs_t->tag == BoolType) { + return lhs_t; + } else if ((lhs_t->tag == BoolType && rhs_t->tag == OptionalType) || + (lhs_t->tag == OptionalType && rhs_t->tag == BoolType)) { + return Type(BoolType); + } else if (lhs_t->tag == BoolType && (rhs_t->tag == AbortType || rhs_t->tag == ReturnType)) { + return lhs_t; + } else if (rhs_t->tag == AbortType || rhs_t->tag == ReturnType) { + return lhs_t; + } else if (rhs_t->tag == OptionalType) { + if (can_promote(lhs_t, rhs_t)) + return rhs_t; + } else if (lhs_t->tag == PointerType && rhs_t->tag == PointerType) { + auto lhs_ptr = Match(lhs_t, PointerType); + auto rhs_ptr = Match(rhs_t, PointerType); + if (type_eq(lhs_ptr->pointed, rhs_ptr->pointed)) + return Type(PointerType, .pointed=lhs_ptr->pointed); + } else if ((is_int_type(lhs_t) && is_int_type(rhs_t)) + || (lhs_t->tag == ByteType && rhs_t->tag == ByteType)) { + return get_math_type(env, ast, lhs_t, rhs_t); + } + code_err(ast, "I can't figure out the type of this `and` expression between a %T and a %T", lhs_t, rhs_t); + } + case BINOP_OR: { + if (lhs_t->tag == BoolType && rhs_t->tag == BoolType) { + return lhs_t; + } else if ((lhs_t->tag == BoolType && rhs_t->tag == OptionalType) || + (lhs_t->tag == OptionalType && rhs_t->tag == BoolType)) { + return Type(BoolType); + } else if (lhs_t->tag == BoolType && (rhs_t->tag == AbortType || rhs_t->tag == ReturnType)) { + return lhs_t; + } else if ((is_int_type(lhs_t) && is_int_type(rhs_t)) + || (lhs_t->tag == ByteType && rhs_t->tag == ByteType)) { + return get_math_type(env, ast, lhs_t, rhs_t); + } else if (lhs_t->tag == OptionalType) { + if (rhs_t->tag == AbortType || rhs_t->tag == ReturnType) + return Match(lhs_t, OptionalType)->type; + if (can_promote(rhs_t, lhs_t)) + return rhs_t; + } else if (lhs_t->tag == PointerType) { + auto lhs_ptr = Match(lhs_t, PointerType); + if (rhs_t->tag == AbortType || rhs_t->tag == ReturnType) { + return Type(PointerType, .pointed=lhs_ptr->pointed); + } else if (rhs_t->tag == PointerType) { + auto rhs_ptr = Match(rhs_t, PointerType); + if (type_eq(rhs_ptr->pointed, lhs_ptr->pointed)) + return Type(PointerType, .pointed=lhs_ptr->pointed); + } + } else if (rhs_t->tag == OptionalType) { + return type_or_type(lhs_t, rhs_t); + } + code_err(ast, "I can't figure out the type of this `or` expression between a %T and a %T", lhs_t, rhs_t); + } + case BINOP_XOR: { + if (lhs_t->tag == BoolType && rhs_t->tag == BoolType) { + return lhs_t; + } else if ((lhs_t->tag == BoolType && rhs_t->tag == OptionalType) || + (lhs_t->tag == OptionalType && rhs_t->tag == BoolType)) { + return Type(BoolType); + } else if ((is_int_type(lhs_t) && is_int_type(rhs_t)) + || (lhs_t->tag == ByteType && rhs_t->tag == ByteType)) { + return get_math_type(env, ast, lhs_t, rhs_t); + } + + code_err(ast, "I can't figure out the type of this `xor` expression between a %T and a %T", lhs_t, rhs_t); + } + case BINOP_CONCAT: { + if (!type_eq(lhs_t, rhs_t)) + code_err(ast, "The type on the left side of this concatenation doesn't match the right side: %T vs. %T", + lhs_t, rhs_t); + if (lhs_t->tag == ArrayType || lhs_t->tag == TextType || lhs_t->tag == SetType) + return lhs_t; + + code_err(ast, "Only array/set/text value types support concatenation, not %T", lhs_t); + } + case BINOP_EQ: case BINOP_NE: case BINOP_LT: case BINOP_LE: case BINOP_GT: case BINOP_GE: { + if (!can_promote(lhs_t, rhs_t) && !can_promote(rhs_t, lhs_t)) + code_err(ast, "I can't compare these two different types: %T vs %T", lhs_t, rhs_t); + return Type(BoolType); + } + case BINOP_CMP: + return Type(IntType, .bits=TYPE_IBITS32); + case BINOP_POWER: { + type_t *result = get_math_type(env, ast, lhs_t, rhs_t); + if (result->tag == NumType) + return result; + return Type(NumType, .bits=TYPE_NBITS64); + } + case BINOP_MULT: case BINOP_DIVIDE: { + type_t *math_type = get_math_type(env, ast, value_type(lhs_t), value_type(rhs_t)); + if (value_type(lhs_t)->tag == NumType || value_type(rhs_t)->tag == NumType) { + if (risks_zero_or_inf(binop->lhs) && risks_zero_or_inf(binop->rhs)) + return Type(OptionalType, math_type); + else + return math_type; + } + return math_type; + } + default: { + return get_math_type(env, ast, lhs_t, rhs_t); + } + } + } + + case Reduction: { + auto reduction = Match(ast, Reduction); + type_t *iter_t = get_type(env, reduction->iter); + + if (reduction->op == BINOP_EQ || reduction->op == BINOP_NE || reduction->op == BINOP_LT + || reduction->op == BINOP_LE || reduction->op == BINOP_GT || reduction->op == BINOP_GE) + return Type(OptionalType, .type=Type(BoolType)); + + type_t *iterated = get_iterated_type(iter_t); + if (!iterated) + code_err(reduction->iter, "I don't know how to do a reduction over %T values", iter_t); + return iterated->tag == OptionalType ? iterated : Type(OptionalType, .type=iterated); + } + + case UpdateAssign: + return Type(VoidType); + + case Min: case Max: { + // Unsafe! These types *should* have the same fields and this saves a lot of duplicate code: + ast_t *lhs = ast->__data.Min.lhs, *rhs = ast->__data.Min.rhs; + // Okay safe again + + type_t *lhs_t = get_type(env, lhs), *rhs_t = get_type(env, rhs); + type_t *t = type_or_type(lhs_t, rhs_t); + if (!t) + code_err(ast, "The two sides of this operation are not compatible: %T vs %T", lhs_t, rhs_t); + return t; + } + + case Lambda: { + auto lambda = Match(ast, Lambda); + arg_t *args = NULL; + env_t *scope = fresh_scope(env); // For now, just use closed variables in scope normally + for (arg_ast_t *arg = lambda->args; arg; arg = arg->next) { + type_t *t = get_arg_ast_type(env, arg); + args = new(arg_t, .name=arg->name, .type=t, .next=args); + set_binding(scope, arg->name, t, CORD_EMPTY); + } + REVERSE_LIST(args); + + type_t *ret = get_type(scope, lambda->body); + if (ret->tag == ReturnType) + ret = Match(ret, ReturnType)->ret; + if (ret->tag == AbortType) + ret = Type(VoidType); + + if (ret->tag == OptionalType && !Match(ret, OptionalType)->type) + code_err(lambda->body, "This function doesn't return a specific optional type"); + + if (lambda->ret_type) { + type_t *declared = parse_type_ast(env, lambda->ret_type); + if (can_promote(ret, declared)) + ret = declared; + else + code_err(ast, "This function was declared to return a value of type %T, but actually returns a value of type %T", + declared, ret); + } + + if (has_stack_memory(ret)) + code_err(ast, "Functions can't return stack references because the reference may outlive its stack frame."); + return Type(ClosureType, Type(FunctionType, .args=args, .ret=ret)); + } + + case FunctionDef: case ConvertDef: case StructDef: case EnumDef: case LangDef: { + return Type(VoidType); + } + + case If: { + auto if_ = Match(ast, If); + if (!if_->else_body) + return Type(VoidType); + + env_t *truthy_scope = env; + env_t *falsey_scope = env; + if (if_->condition->tag == Declare) { + type_t *condition_type = get_type(env, Match(if_->condition, Declare)->value); + const char *varname = Match(Match(if_->condition, Declare)->var, Var)->name; + if (streq(varname, "_")) + code_err(if_->condition, "To use `if var := ...:`, you must choose a real variable name, not `_`"); + + truthy_scope = fresh_scope(env); + if (condition_type->tag == OptionalType) + set_binding(truthy_scope, varname, + Match(condition_type, OptionalType)->type, CORD_EMPTY); + else + set_binding(truthy_scope, varname, condition_type, CORD_EMPTY); + } else if (if_->condition->tag == Var) { + type_t *condition_type = get_type(env, if_->condition); + if (condition_type->tag == OptionalType) { + truthy_scope = fresh_scope(env); + const char *varname = Match(if_->condition, Var)->name; + set_binding(truthy_scope, varname, + Match(condition_type, OptionalType)->type, CORD_EMPTY); + } + } + + type_t *true_t = get_type(truthy_scope, if_->body); + type_t *false_t = get_type(falsey_scope, if_->else_body); + type_t *t_either = type_or_type(true_t, false_t); + if (!t_either) + code_err(if_->else_body, + "I was expecting this block to have a %T value (based on earlier clauses), but it actually has a %T value.", + true_t, false_t); + return t_either; + } + + case When: { + auto when = Match(ast, When); + type_t *subject_t = get_type(env, when->subject); + if (subject_t->tag != EnumType) { + type_t *t = NULL; + for (when_clause_t *clause = when->clauses; clause; clause = clause->next) { + t = type_or_type(t, get_type(env, clause->body)); + } + if (when->else_body) + t = type_or_type(t, get_type(env, when->else_body)); + else if (t->tag != OptionalType) + t = Type(OptionalType, .type=t); + return t; + } + + type_t *overall_t = NULL; + tag_t * const tags = Match(subject_t, EnumType)->tags; + + typedef struct match_s { + tag_t *tag; + bool handled; + struct match_s *next; + } match_t; + match_t *matches = NULL; + for (tag_t *tag = tags; tag; tag = tag->next) + matches = new(match_t, .tag=tag, .handled=false, .next=matches); + + for (when_clause_t *clause = when->clauses; clause; clause = clause->next) { + const char *tag_name; + if (clause->pattern->tag == Var) + tag_name = Match(clause->pattern, Var)->name; + else if (clause->pattern->tag == FunctionCall && Match(clause->pattern, FunctionCall)->fn->tag == Var) + tag_name = Match(Match(clause->pattern, FunctionCall)->fn, Var)->name; + else + code_err(clause->pattern, "This is not a valid pattern for a %T enum", subject_t); + + CORD valid_tags = CORD_EMPTY; + for (match_t *m = matches; m; m = m->next) { + if (streq(m->tag->name, tag_name)) { + if (m->handled) + code_err(clause->pattern, "This tag was already handled earlier"); + m->handled = true; + goto found_matching_tag; + } + if (valid_tags) valid_tags = CORD_cat(valid_tags, ", "); + valid_tags = CORD_cat(valid_tags, m->tag->name); + } + + code_err(clause->pattern, "There is no tag '%s' for the type %T (valid tags: %s)", + tag_name, subject_t, CORD_to_char_star(valid_tags)); + found_matching_tag:; + } + + for (when_clause_t *clause = when->clauses; clause; clause = clause->next) { + env_t *clause_scope = when_clause_scope(env, subject_t, clause); + type_t *clause_type = get_type(clause_scope, clause->body); + type_t *merged = type_or_type(overall_t, clause_type); + if (!merged) + code_err(clause->body, "The type of this branch is %T, which conflicts with the earlier branch type of %T", + clause_type, overall_t); + overall_t = merged; + } + + if (when->else_body) { + bool any_unhandled = false; + for (match_t *m = matches; m; m = m->next) { + if (!m->handled) { + any_unhandled = true; + break; + } + } + // HACK: `while when ...` is handled by the parser adding an implicit + // `else: stop`, which has an empty source code span. + if (!any_unhandled && when->else_body->end > when->else_body->start) + code_err(when->else_body, "This 'else' block will never run because every tag is handled"); + + type_t *else_t = get_type(env, when->else_body); + type_t *merged = type_or_type(overall_t, else_t); + if (!merged) + code_err(when->else_body, + "I was expecting this block to have a %T value (based on earlier clauses), but it actually has a %T value.", + overall_t, else_t); + return merged; + } else { + CORD unhandled = CORD_EMPTY; + for (match_t *m = matches; m; m = m->next) { + if (!m->handled) + unhandled = unhandled ? CORD_all(unhandled, ", ", m->tag->name) : m->tag->name; + } + if (unhandled) + code_err(ast, "This 'when' statement doesn't handle the tags: %s", CORD_to_const_char_star(unhandled)); + return overall_t; + } + } + + case While: case Repeat: case For: return Type(VoidType); + case InlineCCode: { + auto inline_code = Match(ast, InlineCCode); + if (inline_code->type) + return inline_code->type; + type_ast_t *type_ast = inline_code->type_ast; + return type_ast ? parse_type_ast(env, type_ast) : Type(VoidType); + } + case Moment: return Type(MomentType); + case Unknown: code_err(ast, "I can't figure out the type of: %W", ast); + case Deserialize: return parse_type_ast(env, Match(ast, Deserialize)->type); + } +#pragma GCC diagnostic pop + code_err(ast, "I can't figure out the type of: %W", ast); +} + +PUREFUNC bool is_discardable(env_t *env, ast_t *ast) +{ + switch (ast->tag) { + case UpdateAssign: case Assign: case Declare: case FunctionDef: case ConvertDef: case StructDef: case EnumDef: + case LangDef: case Use: + return true; + default: break; + } + type_t *t = get_type(env, ast); + return (t->tag == VoidType || t->tag == AbortType || t->tag == ReturnType); +} + +type_t *get_arg_ast_type(env_t *env, arg_ast_t *arg) +{ + assert(arg->type || arg->value); + if (arg->type) + return parse_type_ast(env, arg->type); + return get_type(env, arg->value); +} + +type_t *get_arg_type(env_t *env, arg_t *arg) +{ + assert(arg->type || arg->default_val); + if (arg->type) return arg->type; + return get_type(env, arg->default_val); +} + +Table_t *get_arg_bindings(env_t *env, arg_t *spec_args, arg_ast_t *call_args, bool promotion_allowed) +{ + Table_t used_args = {}; + + // Populate keyword args: + for (arg_ast_t *call_arg = call_args; call_arg; call_arg = call_arg->next) { + if (!call_arg->name) continue; + + type_t *call_type = get_arg_ast_type(env, call_arg); + for (arg_t *spec_arg = spec_args; spec_arg; spec_arg = spec_arg->next) { + if (!streq(call_arg->name, spec_arg->name)) continue; + type_t *spec_type = get_arg_type(env, spec_arg); + if (!(type_eq(call_type, spec_type) || (promotion_allowed && can_promote(call_type, spec_type)) + || (promotion_allowed && call_arg->value->tag == Int && is_numeric_type(spec_type)) + || (promotion_allowed && call_arg->value->tag == Num && spec_type->tag == NumType))) + return NULL; + Table$str_set(&used_args, call_arg->name, call_arg); + goto next_call_arg; + } + return NULL; + next_call_arg:; + } + + arg_ast_t *unused_args = call_args; + for (arg_t *spec_arg = spec_args; spec_arg; spec_arg = spec_arg->next) { + arg_ast_t *keyworded = Table$str_get(used_args, spec_arg->name); + if (keyworded) continue; + + type_t *spec_type = get_arg_type(env, spec_arg); + for (; unused_args; unused_args = unused_args->next) { + if (unused_args->name) continue; // Already handled the keyword args + type_t *call_type = get_arg_ast_type(env, unused_args); + if (!(type_eq(call_type, spec_type) || (promotion_allowed && can_promote(call_type, spec_type)) + || (promotion_allowed && unused_args->value->tag == Int && is_numeric_type(spec_type)) + || (promotion_allowed && unused_args->value->tag == Num && spec_type->tag == NumType))) + return NULL; // Positional arg trying to fill in + Table$str_set(&used_args, spec_arg->name, unused_args); + unused_args = unused_args->next; + goto found_it; + } + + if (spec_arg->default_val) + goto found_it; + + return NULL; + found_it: continue; + } + + while (unused_args && unused_args->name) + unused_args = unused_args->next; + + if (unused_args != NULL) + return NULL; + + Table_t *ret = new(Table_t); + *ret = used_args; + return ret; +} + +bool is_valid_call(env_t *env, arg_t *spec_args, arg_ast_t *call_args, bool promotion_allowed) +{ + Table_t *arg_bindings = get_arg_bindings(env, spec_args, call_args, promotion_allowed); + return (arg_bindings != NULL); +} + +PUREFUNC bool can_be_mutated(env_t *env, ast_t *ast) +{ + switch (ast->tag) { + case Var: return true; + case InlineCCode: return true; + case FieldAccess: { + auto access = Match(ast, FieldAccess); + type_t *fielded_type = get_type(env, access->fielded); + if (fielded_type->tag == PointerType) { + return true; + } else if (fielded_type->tag == StructType) { + return can_be_mutated(env, access->fielded); + } else { + return false; + } + } + case Index: { + auto index = Match(ast, Index); + type_t *indexed_type = get_type(env, index->indexed); + return (indexed_type->tag == PointerType); + } + default: return false; + } +} + +type_t *parse_type_string(env_t *env, const char *str) +{ + type_ast_t *ast = parse_type_str(str); + return ast ? parse_type_ast(env, ast) : NULL; +} + +PUREFUNC bool is_constant(env_t *env, ast_t *ast) +{ + switch (ast->tag) { + case Bool: case Num: case None: return true; + case Int: { + auto info = Match(ast, Int); + Int_t int_val = Int$parse(Text$from_str(info->str)); + if (int_val.small == 0) return false; // Failed to parse + return (Int$compare_value(int_val, I(BIGGEST_SMALL_INT)) <= 0); + } + case TextJoin: { + auto text = Match(ast, TextJoin); + if (!text->children) return true; // Empty string, OK + if (text->children->next) return false; // Concatenation, not constant + return is_constant(env, text->children->ast); + } + case TextLiteral: { + CORD literal = Match(ast, TextLiteral)->cord; + CORD_pos i; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wsign-conversion" + CORD_FOR(i, literal) { + if (!isascii(CORD_pos_fetch(i))) + return false; // Non-ASCII requires grapheme logic, not constant + } +#pragma GCC diagnostic pop + return true; // Literal ASCII string, OK + } + case Not: return is_constant(env, Match(ast, Not)->value); + case Negative: return is_constant(env, Match(ast, Negative)->value); + case BinaryOp: { + auto binop = Match(ast, BinaryOp); + switch (binop->op) { + case BINOP_UNKNOWN: case BINOP_POWER: case BINOP_CONCAT: case BINOP_MIN: case BINOP_MAX: case BINOP_CMP: + return false; + default: + return is_constant(env, binop->lhs) && is_constant(env, binop->rhs); + } + } + case Use: return true; + case FunctionCall: return false; + case InlineCCode: return true; + default: return false; + } +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/src/typecheck.h b/src/typecheck.h new file mode 100644 index 00000000..cc5cb18c --- /dev/null +++ b/src/typecheck.h @@ -0,0 +1,33 @@ +#pragma once + +// Type-checking functions + +#include <gc.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> + +#include "ast.h" +#include "environment.h" +#include "stdlib/datatypes.h" +#include "types.h" + +type_t *parse_type_ast(env_t *env, type_ast_t *ast); +type_t *get_type(env_t *env, ast_t *ast); +void prebind_statement(env_t *env, ast_t *statement); +void bind_statement(env_t *env, ast_t *statement); +PUREFUNC type_t *get_math_type(env_t *env, ast_t *ast, type_t *lhs_t, type_t *rhs_t); +PUREFUNC bool is_discardable(env_t *env, ast_t *ast); +type_t *get_function_def_type(env_t *env, ast_t *ast); +type_t *get_arg_type(env_t *env, arg_t *arg); +type_t *get_arg_ast_type(env_t *env, arg_ast_t *arg); +env_t *when_clause_scope(env_t *env, type_t *subject_t, when_clause_t *clause); +type_t *get_clause_type(env_t *env, type_t *subject_t, when_clause_t *clause); +PUREFUNC bool can_be_mutated(env_t *env, ast_t *ast); +type_t *parse_type_string(env_t *env, const char *str); +type_t *get_method_type(env_t *env, ast_t *self, const char *name); +PUREFUNC bool is_constant(env_t *env, ast_t *ast); +Table_t *get_arg_bindings(env_t *env, arg_t *spec_args, arg_ast_t *call_args, bool promotion_allowed); +bool is_valid_call(env_t *env, arg_t *spec_args, arg_ast_t *call_args, bool promotion_allowed); + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/src/types.c b/src/types.c new file mode 100644 index 00000000..506850e8 --- /dev/null +++ b/src/types.c @@ -0,0 +1,739 @@ +// Logic for handling type_t types +#include <gc/cord.h> +#include <limits.h> +#include <math.h> +#include <signal.h> +#include <stdint.h> +#include <sys/param.h> + +#include "cordhelpers.h" +#include "environment.h" +#include "stdlib/integers.h" +#include "stdlib/tables.h" +#include "stdlib/util.h" +#include "types.h" + +CORD type_to_cord(type_t *t) { + if (!t) + return "(Unknown type)"; + + switch (t->tag) { + case UnknownType: return "???"; + case AbortType: return "Abort"; + case ReturnType: { + type_t *ret = Match(t, ReturnType)->ret; + return CORD_all("Return(", ret ? type_to_cord(ret) : "Void", ")"); + } + case VoidType: return "Void"; + case MemoryType: return "Memory"; + case BoolType: return "Bool"; + case ByteType: return "Byte"; + case CStringType: return "CString"; + case MomentType: return "Moment"; + case TextType: return Match(t, TextType)->lang ? Match(t, TextType)->lang : "Text"; + case BigIntType: return "Int"; + case IntType: return CORD_asprintf("Int%d", Match(t, IntType)->bits); + case NumType: return Match(t, NumType)->bits == TYPE_NBITS32 ? "Num32" : "Num"; + case ArrayType: { + auto array = Match(t, ArrayType); + return CORD_asprintf("[%r]", type_to_cord(array->item_type)); + } + case TableType: { + auto table = Match(t, TableType); + if (table->default_value) + return CORD_asprintf("{%r=%.*s}", type_to_cord(table->key_type), + table->default_value->end - table->default_value->start, table->default_value->start); + else + return CORD_asprintf("{%r:%r}", type_to_cord(table->key_type), type_to_cord(table->value_type)); + } + case SetType: { + auto set = Match(t, SetType); + return CORD_asprintf("{%r}", type_to_cord(set->item_type)); + } + case ClosureType: { + return type_to_cord(Match(t, ClosureType)->fn); + } + case FunctionType: { + CORD c = "func("; + auto fn = Match(t, FunctionType); + for (arg_t *arg = fn->args; arg; arg = arg->next) { + c = CORD_cat(c, type_to_cord(arg->type)); + if (arg->next) c = CORD_cat(c, ","); + } + if (fn->ret && fn->ret->tag != VoidType) + c = CORD_all(c, fn->args ? " -> " : "-> ", type_to_cord(fn->ret)); + c = CORD_all(c, ")"); + return c; + } + case StructType: { + auto struct_ = Match(t, StructType); + return struct_->name; + } + case PointerType: { + auto ptr = Match(t, PointerType); + CORD sigil = ptr->is_stack ? "&" : "@"; + return CORD_all(sigil, type_to_cord(ptr->pointed)); + } + case EnumType: { + auto tagged = Match(t, EnumType); + return tagged->name; + } + case OptionalType: { + type_t *opt = Match(t, OptionalType)->type; + if (opt) + return CORD_all(type_to_cord(opt), "?"); + else + return "(Unknown optional type)"; + } + case MutexedType: { + type_t *opt = Match(t, MutexedType)->type; + if (opt) + return CORD_all("mutexed ", type_to_cord(opt)); + else + return "(Unknown optional type)"; + } + case TypeInfoType: { + return CORD_all("Type$info(", Match(t, TypeInfoType)->name, ")"); + } + case ModuleType: { + return CORD_all("Module(", Match(t, ModuleType)->name, ")"); + } + default: { + raise(SIGABRT); + return CORD_asprintf("Unknown type: %d", t->tag); + } + } +} + +PUREFUNC const char *get_type_name(type_t *t) +{ + switch (t->tag) { + case TextType: return Match(t, TextType)->lang; + case StructType: return Match(t, StructType)->name; + case EnumType: return Match(t, EnumType)->name; + default: return NULL; + } +} + +int printf_pointer_size(const struct printf_info *info, size_t n, int argtypes[n], int sizes[n]) +{ + if (n < 1) return -1; + (void)info; + argtypes[0] = PA_POINTER; + sizes[0] = sizeof(void*); + return 1; +} + +int printf_type(FILE *stream, const struct printf_info *info, const void *const args[]) +{ + (void)info; + type_t *t = *(type_t**)args[0]; + if (!t) return fputs("(null)", stream); + return CORD_put(type_to_cord(t), stream); +} + +bool type_eq(type_t *a, type_t *b) +{ + if (a == b) return true; + if (a->tag != b->tag) return false; + return (CORD_cmp(type_to_cord(a), type_to_cord(b)) == 0); +} + +bool type_is_a(type_t *t, type_t *req) +{ + if (type_eq(t, req)) return true; + if (req->tag == OptionalType && Match(req, OptionalType)->type) + return type_is_a(t, Match(req, OptionalType)->type); + if (t->tag == PointerType && req->tag == PointerType) { + auto t_ptr = Match(t, PointerType); + auto req_ptr = Match(req, PointerType); + if (type_eq(t_ptr->pointed, req_ptr->pointed)) + return (!t_ptr->is_stack && req_ptr->is_stack) || (!t_ptr->is_stack); + } + return false; +} + +type_t *non_optional(type_t *t) +{ + return t->tag == OptionalType ? Match(t, OptionalType)->type : t; +} + +PUREFUNC type_t *value_type(type_t *t) +{ + while (t->tag == PointerType) + t = Match(t, PointerType)->pointed; + return t; +} + +type_t *type_or_type(type_t *a, type_t *b) +{ + if (!a) return b; + if (!b) return a; + if (a->tag == OptionalType && !Match(a, OptionalType)->type) + return b->tag == OptionalType ? b : Type(OptionalType, b); + if (b->tag == OptionalType && !Match(b, OptionalType)->type) + return a->tag == OptionalType ? a : Type(OptionalType, a); + if (a->tag == ReturnType && b->tag == ReturnType) + return Type(ReturnType, .ret=type_or_type(Match(a, ReturnType)->ret, Match(b, ReturnType)->ret)); + if (type_is_a(b, a)) return a; + if (type_is_a(a, b)) return b; + if (a->tag == AbortType || a->tag == ReturnType) return non_optional(b); + if (b->tag == AbortType || b->tag == ReturnType) return non_optional(a); + if ((a->tag == IntType || a->tag == NumType) && (b->tag == IntType || b->tag == NumType)) { + switch (compare_precision(a, b)) { + case NUM_PRECISION_EQUAL: case NUM_PRECISION_MORE: return a; + case NUM_PRECISION_LESS: return b; + default: return NULL; + } + return NULL; + } + return NULL; +} + +static PUREFUNC INLINE double type_min_magnitude(type_t *t) +{ + switch (t->tag) { + case BoolType: return (double)false; + case ByteType: return 0; + case BigIntType: return -1./0.; + case IntType: { + switch (Match(t, IntType)->bits) { + case TYPE_IBITS8: return (double)INT8_MIN; + case TYPE_IBITS16: return (double)INT16_MIN; + case TYPE_IBITS32: return (double)INT32_MIN; + case TYPE_IBITS64: return (double)INT64_MIN; + default: errx(1, "Invalid integer bit size"); + } + } + case NumType: return -1./0.; + default: return NAN; + } +} + +static PUREFUNC INLINE double type_max_magnitude(type_t *t) +{ + switch (t->tag) { + case BoolType: return (double)true; + case ByteType: return (double)UINT8_MAX; + case BigIntType: return 1./0.; + case IntType: { + switch (Match(t, IntType)->bits) { + case TYPE_IBITS8: return (double)INT8_MAX; + case TYPE_IBITS16: return (double)INT16_MAX; + case TYPE_IBITS32: return (double)INT32_MAX; + case TYPE_IBITS64: return (double)INT64_MAX; + default: errx(1, "Invalid integer bit size"); + } + } + case NumType: return 1./0.; + default: return NAN; + } +} + +PUREFUNC precision_cmp_e compare_precision(type_t *a, type_t *b) +{ + if (a->tag == OptionalType && Match(a, OptionalType)->type->tag == NumType) + a = Match(a, OptionalType)->type; + if (b->tag == OptionalType && Match(b, OptionalType)->type->tag == NumType) + b = Match(b, OptionalType)->type; + + if (is_int_type(a) && b->tag == NumType) + return NUM_PRECISION_LESS; + else if (a->tag == NumType && is_int_type(b)) + return NUM_PRECISION_MORE; + + double a_min = type_min_magnitude(a), + b_min = type_min_magnitude(b), + a_max = type_max_magnitude(a), + b_max = type_max_magnitude(b); + + if (isnan(a_min) || isnan(b_min) || isnan(a_max) || isnan(b_max)) + return NUM_PRECISION_INCOMPARABLE; + else if (a_min == b_min && a_max == b_max) return NUM_PRECISION_EQUAL; + else if (a_min <= b_min && b_max <= a_max) return NUM_PRECISION_MORE; + else if (b_min <= a_min && a_max <= b_max) return NUM_PRECISION_LESS; + else return NUM_PRECISION_INCOMPARABLE; +} + +PUREFUNC bool has_heap_memory(type_t *t) +{ + switch (t->tag) { + case ArrayType: return true; + case TableType: return true; + case SetType: return true; + case PointerType: return true; + case OptionalType: return has_heap_memory(Match(t, OptionalType)->type); + case MutexedType: return true; + case BigIntType: return true; + case StructType: { + for (arg_t *field = Match(t, StructType)->fields; field; field = field->next) { + if (has_heap_memory(field->type)) + return true; + } + return false; + } + case EnumType: { + for (tag_t *tag = Match(t, EnumType)->tags; tag; tag = tag->next) { + if (tag->type && has_heap_memory(tag->type)) + return true; + } + return false; + } + default: return false; + } +} + +PUREFUNC bool has_stack_memory(type_t *t) +{ + switch (t->tag) { + case PointerType: return Match(t, PointerType)->is_stack; + case OptionalType: return has_stack_memory(Match(t, OptionalType)->type); + case MutexedType: return has_stack_memory(Match(t, MutexedType)->type); + default: return false; + } +} + +PUREFUNC const char *enum_single_value_tag(type_t *enum_type, type_t *t) +{ + const char *found = NULL; + for (tag_t *tag = Match(enum_type, EnumType)->tags; tag; tag = tag->next) { + if (tag->type->tag != StructType) continue; + auto s = Match(tag->type, StructType); + if (!s->fields || s->fields->next || !s->fields->type) + continue; + + if (can_promote(t, s->fields->type)) { + if (found) // Ambiguous case, multiple matches + return NULL; + found = tag->name; + // Continue searching to check for ambiguous cases + } + } + return found; +} + +PUREFUNC bool can_promote(type_t *actual, type_t *needed) +{ + if (!actual || !needed) + return false; + + // No promotion necessary: + if (type_eq(actual, needed)) + return true; + + if (actual->tag == NumType && needed->tag == IntType) + return false; + + if (actual->tag == IntType && (needed->tag == NumType || needed->tag == BigIntType)) + return true; + + if (actual->tag == BigIntType && needed->tag == NumType) + return true; + + if (actual->tag == IntType && needed->tag == IntType) { + auto cmp = compare_precision(actual, needed); + return cmp == NUM_PRECISION_EQUAL || cmp == NUM_PRECISION_LESS; + } + + if (needed->tag == EnumType) + return (enum_single_value_tag(needed, actual) != NULL); + + // Lang to Text: + if (actual->tag == TextType && needed->tag == TextType && streq(Match(needed, TextType)->lang, "Text")) + return true; + + // Text to C String + if (actual->tag == TextType && !Match(actual, TextType)->lang && needed->tag == CStringType) + return true; + + // Automatic dereferencing: + if (actual->tag == PointerType && can_promote(Match(actual, PointerType)->pointed, needed)) + return true; + + if (actual->tag == OptionalType) { + if (needed->tag == BoolType) + return true; + + // Ambiguous `none` to concrete optional + if (Match(actual, OptionalType)->type == NULL) + return (needed->tag == OptionalType); + + // Optional num -> num + if (needed->tag == NumType && actual->tag == OptionalType && Match(actual, OptionalType)->type->tag == NumType) + return can_promote(Match(actual, OptionalType)->type, needed); + } + + // Optional promotion: + if (needed->tag == OptionalType && Match(needed, OptionalType)->type != NULL && can_promote(actual, Match(needed, OptionalType)->type)) + return true; + + if (needed->tag == PointerType && actual->tag == PointerType) { + auto needed_ptr = Match(needed, PointerType); + auto actual_ptr = Match(actual, PointerType); + + if (actual_ptr->is_stack && !needed_ptr->is_stack) + // Can't use &x for a function that wants a @Foo or ?Foo + return false; + + if (needed_ptr->pointed->tag == TableType && actual_ptr->pointed->tag == TableType) + return can_promote(actual_ptr->pointed, needed_ptr->pointed); + else if (needed_ptr->pointed->tag != MemoryType && !type_eq(needed_ptr->pointed, actual_ptr->pointed)) + // Can't use @Foo for a function that wants @Baz + // But you *can* use @Foo for a function that wants @Memory + return false; + else + return true; + } + + // Cross-promotion between tables with default values and without + if (needed->tag == TableType && actual->tag == TableType) { + auto actual_table = Match(actual, TableType); + auto needed_table = Match(needed, TableType); + if (type_eq(needed_table->key_type, actual_table->key_type) + && type_eq(needed_table->value_type, actual_table->value_type)) + return true; + } + + if (needed->tag == ClosureType && actual->tag == FunctionType) + return can_promote(actual, Match(needed, ClosureType)->fn); + + if (needed->tag == ClosureType && actual->tag == ClosureType) + return can_promote(Match(actual, ClosureType)->fn, Match(needed, ClosureType)->fn); + + if (actual->tag == FunctionType && needed->tag == FunctionType) { + for (arg_t *actual_arg = Match(actual, FunctionType)->args, *needed_arg = Match(needed, FunctionType)->args; + actual_arg || needed_arg; actual_arg = actual_arg->next, needed_arg = needed_arg->next) { + if (!actual_arg || !needed_arg) return false; + if (type_eq(actual_arg->type, needed_arg->type)) continue; + if (actual_arg->type->tag == PointerType && needed_arg->type->tag == PointerType + && can_promote(actual_arg->type, needed_arg->type)) + continue; + return false; + } + type_t *actual_ret = Match(actual, FunctionType)->ret; + if (!actual_ret) actual_ret = Type(VoidType); + type_t *needed_ret = Match(needed, FunctionType)->ret; + if (!needed_ret) needed_ret = Type(VoidType); + + return ( + (type_eq(actual_ret, needed_ret)) + || (actual_ret->tag == PointerType && needed_ret->tag == PointerType + && can_promote(actual_ret, needed_ret))); + } + + // Set -> Array promotion + if (needed->tag == ArrayType && actual->tag == SetType + && type_eq(Match(needed, ArrayType)->item_type, Match(actual, SetType)->item_type)) + return true; + + return false; +} + +PUREFUNC bool is_int_type(type_t *t) +{ + return t->tag == IntType || t->tag == BigIntType; +} + +PUREFUNC bool is_numeric_type(type_t *t) +{ + return t->tag == IntType || t->tag == BigIntType || t->tag == NumType || t->tag == ByteType; +} + +PUREFUNC bool is_packed_data(type_t *t) +{ + if (t->tag == IntType || t->tag == NumType || t->tag == ByteType || t->tag == PointerType || t->tag == BoolType || t->tag == FunctionType) { + return true; + } else if (t->tag == StructType) { + for (arg_t *field = Match(t, StructType)->fields; field; field = field->next) { + if (!is_packed_data(field->type)) + return false; + } + return true; + } else if (t->tag == EnumType) { + for (tag_t *tag = Match(t, EnumType)->tags; tag; tag = tag->next) { + if (!is_packed_data(tag->type)) + return false; + } + return true; + } else { + return false; + } +} + +PUREFUNC size_t unpadded_struct_size(type_t *t) +{ + if (Match(t, StructType)->opaque) + compiler_err(NULL, NULL, NULL, "The struct type %s is opaque, so I can't get the size of it", Match(t, StructType)->name); + arg_t *fields = Match(t, StructType)->fields; + size_t size = 0; + size_t bit_offset = 0; + for (arg_t *field = fields; field; field = field->next) { + type_t *field_type = field->type; + if (field_type->tag == BoolType) { + bit_offset += 1; + if (bit_offset >= 8) { + size += 1; + bit_offset = 0; + } + } else { + if (bit_offset > 0) { + size += 1; + bit_offset = 0; + } + size_t align = type_align(field_type); + if (align > 1 && size % align > 0) + size += align - (size % align); // Padding + size += type_size(field_type); + } + } + if (bit_offset > 0) { + size += 1; + bit_offset = 0; + } + return size; +} + +PUREFUNC size_t type_size(type_t *t) +{ + if (t == THREAD_TYPE) return sizeof(pthread_t*); + if (t == PATH_TYPE) return sizeof(Path_t); + if (t == PATH_TYPE_TYPE) return sizeof(PathType_t); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wswitch-default" + switch (t->tag) { + case UnknownType: case AbortType: case ReturnType: case VoidType: return 0; + case MemoryType: errx(1, "Memory has undefined type size"); + case BoolType: return sizeof(bool); + case ByteType: return sizeof(uint8_t); + case CStringType: return sizeof(char*); + case MomentType: return sizeof(Moment_t); + case BigIntType: return sizeof(Int_t); + case IntType: { + switch (Match(t, IntType)->bits) { + case TYPE_IBITS64: return sizeof(int64_t); + case TYPE_IBITS32: return sizeof(int32_t); + case TYPE_IBITS16: return sizeof(int16_t); + case TYPE_IBITS8: return sizeof(int8_t); + default: errx(1, "Invalid integer bit size"); + } + } + case NumType: return Match(t, NumType)->bits == TYPE_NBITS64 ? sizeof(double) : sizeof(float); + case TextType: return sizeof(Text_t); + case ArrayType: return sizeof(Array_t); + case SetType: return sizeof(Table_t); + case TableType: return sizeof(Table_t); + case FunctionType: return sizeof(void*); + case ClosureType: return sizeof(struct {void *fn, *userdata;}); + case PointerType: return sizeof(void*); + case MutexedType: return sizeof(MutexedData_t); + case OptionalType: { + type_t *nonnull = Match(t, OptionalType)->type; + switch (nonnull->tag) { + case IntType: + switch (Match(nonnull, IntType)->bits) { + case TYPE_IBITS64: return sizeof(OptionalInt64_t); + case TYPE_IBITS32: return sizeof(OptionalInt32_t); + case TYPE_IBITS16: return sizeof(OptionalInt16_t); + case TYPE_IBITS8: return sizeof(OptionalInt8_t); + default: errx(1, "Invalid integer bit size"); + } + case StructType: { + size_t size = unpadded_struct_size(nonnull); + size += sizeof(bool); // is_null flag + size_t align = type_align(nonnull); + if (align > 0 && (size % align) > 0) + size = (size + align) - (size % align); + return size; + } + default: return type_size(nonnull); + } + } + case StructType: { + if (Match(t, StructType)->opaque) + compiler_err(NULL, NULL, NULL, "The struct type %s is opaque, so I can't get the size of it", Match(t, StructType)->name); + size_t size = unpadded_struct_size(t); + size_t align = type_align(t); + if (size > 0 && align > 0 && (size % align) > 0) + size = (size + align) - (size % align); + return size; + } + case EnumType: { + size_t max_align = 0; + size_t max_size = 0; + for (tag_t *tag = Match(t, EnumType)->tags; tag; tag = tag->next) { + size_t align = type_align(tag->type); + if (align > max_align) max_align = align; + size_t size = type_size(tag->type); + if (size > max_size) max_size = size; + } + size_t size = sizeof(UnknownType); // generic enum + if (max_align > 1 && size % max_align > 0) // Padding before first union field + size += max_align - (size % max_align); + size += max_size; + size_t align = MAX(__alignof__(UnknownType), max_align); + if (size % align > 0) // Padding after union + size += align - (size % align); + return size; + } + case TypeInfoType: return sizeof(TypeInfo_t); + case ModuleType: return 0; + } +#pragma GCC diagnostic pop + errx(1, "This should not be reachable"); +} + +PUREFUNC size_t type_align(type_t *t) +{ + if (t == THREAD_TYPE) return __alignof__(pthread_t*); + if (t == PATH_TYPE) return __alignof__(Path_t); + if (t == PATH_TYPE_TYPE) return __alignof__(PathType_t); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wswitch-default" + switch (t->tag) { + case UnknownType: case AbortType: case ReturnType: case VoidType: return 0; + case MemoryType: errx(1, "Memory has undefined type alignment"); + case BoolType: return __alignof__(bool); + case ByteType: return __alignof__(uint8_t); + case CStringType: return __alignof__(char*); + case MomentType: return __alignof__(Moment_t); + case BigIntType: return __alignof__(Int_t); + case IntType: { + switch (Match(t, IntType)->bits) { + case TYPE_IBITS64: return __alignof__(int64_t); + case TYPE_IBITS32: return __alignof__(int32_t); + case TYPE_IBITS16: return __alignof__(int16_t); + case TYPE_IBITS8: return __alignof__(int8_t); + default: return 0; + } + } + case NumType: return Match(t, NumType)->bits == TYPE_NBITS64 ? __alignof__(double) : __alignof__(float); + case TextType: return __alignof__(Text_t); + case SetType: return __alignof__(Table_t); + case ArrayType: return __alignof__(Array_t); + case TableType: return __alignof__(Table_t); + case FunctionType: return __alignof__(void*); + case ClosureType: return __alignof__(struct {void *fn, *userdata;}); + case PointerType: return __alignof__(void*); + case MutexedType: return __alignof__(MutexedData_t); + case OptionalType: { + type_t *nonnull = Match(t, OptionalType)->type; + switch (nonnull->tag) { + case IntType: + switch (Match(nonnull, IntType)->bits) { + case TYPE_IBITS64: return __alignof__(OptionalInt64_t); + case TYPE_IBITS32: return __alignof__(OptionalInt32_t); + case TYPE_IBITS16: return __alignof__(OptionalInt16_t); + case TYPE_IBITS8: return __alignof__(OptionalInt8_t); + default: errx(1, "Invalid integer bit size"); + } + case StructType: return MAX(1, type_align(nonnull)); + default: return type_align(nonnull); + } + } + case StructType: { + if (Match(t, StructType)->opaque) + compiler_err(NULL, NULL, NULL, "The struct type %s is opaque, so I can't get the alignment of it", + Match(t, StructType)->name); + arg_t *fields = Match(t, StructType)->fields; + size_t align = t->tag == StructType ? 0 : sizeof(void*); + for (arg_t *field = fields; field; field = field->next) { + size_t field_align = type_align(field->type); + if (field_align > align) align = field_align; + } + return align; + } + case EnumType: { + size_t align = __alignof__(UnknownType); + for (tag_t *tag = Match(t, EnumType)->tags; tag; tag = tag->next) { + size_t tag_align = type_align(tag->type); + if (tag_align > align) align = tag_align; + } + return align; + } + case TypeInfoType: return __alignof__(TypeInfo_t); + case ModuleType: return 0; + } +#pragma GCC diagnostic pop + errx(1, "This should not be reachable"); +} + +type_t *get_field_type(type_t *t, const char *field_name) +{ + t = value_type(t); + switch (t->tag) { + case PointerType: + return get_field_type(Match(t, PointerType)->pointed, field_name); + case TextType: { + if (Match(t, TextType)->lang && streq(field_name, "text")) + return TEXT_TYPE; + else if (streq(field_name, "length")) return INT_TYPE; + return NULL; + } + case StructType: { + auto struct_t = Match(t, StructType); + for (arg_t *field = struct_t->fields; field; field = field->next) { + if (streq(field->name, field_name)) + return field->type; + } + return NULL; + } + case EnumType: { + auto e = Match(t, EnumType); + for (tag_t *tag = e->tags; tag; tag = tag->next) { + if (streq(field_name, tag->name)) + return Type(BoolType); + } + return NULL; + } + case SetType: { + if (streq(field_name, "length")) + return INT_TYPE; + else if (streq(field_name, "items")) + return Type(ArrayType, .item_type=Match(t, SetType)->item_type); + return NULL; + } + case TableType: { + if (streq(field_name, "length")) + return INT_TYPE; + else if (streq(field_name, "keys")) + return Type(ArrayType, Match(t, TableType)->key_type); + else if (streq(field_name, "values")) + return Type(ArrayType, Match(t, TableType)->value_type); + else if (streq(field_name, "fallback")) + return Type(OptionalType, .type=t); + return NULL; + } + case ArrayType: { + if (streq(field_name, "length")) return INT_TYPE; + return NULL; + } + case MomentType: { + if (streq(field_name, "seconds")) return Type(IntType, .bits=TYPE_IBITS64); + else if (streq(field_name, "microseconds")) return Type(IntType, .bits=TYPE_IBITS64); + return NULL; + } + default: return NULL; + } +} + +PUREFUNC type_t *get_iterated_type(type_t *t) +{ + type_t *iter_value_t = value_type(t); + switch (iter_value_t->tag) { + case BigIntType: case IntType: return iter_value_t; break; + case ArrayType: return Match(iter_value_t, ArrayType)->item_type; break; + case SetType: return Match(iter_value_t, SetType)->item_type; break; + case TableType: return NULL; + case FunctionType: case ClosureType: { + // Iterator function + auto fn = iter_value_t->tag == ClosureType ? + Match(Match(iter_value_t, ClosureType)->fn, FunctionType) : Match(iter_value_t, FunctionType); + if (fn->args || fn->ret->tag != OptionalType) + return NULL; + return Match(fn->ret, OptionalType)->type; + } + default: return NULL; + } +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/src/types.h b/src/types.h new file mode 100644 index 00000000..780c4d5c --- /dev/null +++ b/src/types.h @@ -0,0 +1,162 @@ +#pragma once + +// Logic for defining and working with types + +#include <printf.h> +#include <stdlib.h> + +#include "ast.h" +#include "stdlib/arrays.h" + +typedef struct type_s type_t; + +typedef struct arg_s { + const char *name; + type_t *type; + ast_t *default_val; + struct arg_s *next; +} arg_t; + +#define ARG_LIST(...) ({\ + arg_t *args[] = {__VA_ARGS__}; \ + for (size_t i = 0; i < sizeof(args)/sizeof(args[0])-1; i++) \ + args[i]->next = args[i+1]; \ + args[0]; }) + +typedef struct tag_s { + const char *name; + int64_t tag_value; + type_t *type; + struct tag_s *next; +} tag_t; + +typedef struct type_list_s { + type_t *type; + struct type_list_s *next; +} type_list_t; + +struct type_s { + enum { + UnknownType, + AbortType, + ReturnType, + VoidType, + MemoryType, + BoolType, + ByteType, + BigIntType, + IntType, + NumType, + CStringType, + MomentType, + TextType, + ArrayType, + SetType, + TableType, + FunctionType, + ClosureType, + PointerType, + StructType, + EnumType, + OptionalType, + TypeInfoType, + MutexedType, + ModuleType, + } tag; + + union { + struct { + } UnknownType, AbortType, VoidType, MemoryType, BoolType; + struct { + type_t *ret; + } ReturnType; + struct {} BigIntType; + struct { + enum { TYPE_IBITS8=8, TYPE_IBITS16=16, TYPE_IBITS32=32, TYPE_IBITS64=64 } bits; + } IntType; + struct {} ByteType; + struct { + enum { TYPE_NBITS32=32, TYPE_NBITS64=64 } bits; + } NumType; + struct {} CStringType, MomentType; + struct { + const char *lang; + struct env_s *env; + } TextType; + struct { + type_t *item_type; + } ArrayType; + struct { + type_t *item_type; + } SetType; + struct { + type_t *key_type, *value_type; + struct env_s *env; + ast_t *default_value; + } TableType; + struct { + arg_t *args; + type_t *ret; + } FunctionType; + struct { + type_t *fn; + } ClosureType; + struct { + type_t *pointed; + bool is_stack:1; + } PointerType; + struct { + const char *name; + arg_t *fields; + struct env_s *env; + bool opaque:1, external:1; + } StructType; + struct { + const char *name; + tag_t *tags; + struct env_s *env; + bool opaque; + } EnumType; + struct { + type_t *type; + } OptionalType, MutexedType; + struct { + const char *name; + type_t *type; + struct env_s *env; + } TypeInfoType; + struct { + const char *name; + } ModuleType; + } __data; +}; + +#define Type(typetag, ...) new(type_t, .tag=typetag, .__data.typetag={__VA_ARGS__}) +#define INT_TYPE Type(BigIntType) +#define NUM_TYPE Type(NumType, .bits=TYPE_NBITS64) + +int printf_pointer_size(const struct printf_info *info, size_t n, int argtypes[n], int size[n]); +int printf_type(FILE *stream, const struct printf_info *info, const void *const args[]); +CORD type_to_cord(type_t *t); +const char *get_type_name(type_t *t); +PUREFUNC bool type_eq(type_t *a, type_t *b); +PUREFUNC bool type_is_a(type_t *t, type_t *req); +type_t *type_or_type(type_t *a, type_t *b); +type_t *value_type(type_t *a); +typedef enum {NUM_PRECISION_EQUAL, NUM_PRECISION_LESS, NUM_PRECISION_MORE, NUM_PRECISION_INCOMPARABLE} precision_cmp_e; +PUREFUNC precision_cmp_e compare_precision(type_t *a, type_t *b); +PUREFUNC bool has_heap_memory(type_t *t); +PUREFUNC bool has_stack_memory(type_t *t); +PUREFUNC bool can_promote(type_t *actual, type_t *needed); +PUREFUNC const char *enum_single_value_tag(type_t *enum_type, type_t *t); +PUREFUNC bool is_int_type(type_t *t); +PUREFUNC bool is_numeric_type(type_t *t); +PUREFUNC bool is_packed_data(type_t *t); +PUREFUNC size_t type_size(type_t *t); +PUREFUNC size_t type_align(type_t *t); +PUREFUNC size_t unpadded_struct_size(type_t *t); +PUREFUNC type_t *non_optional(type_t *t); +type_t *get_field_type(type_t *t, const char *field_name); +PUREFUNC type_t *get_iterated_type(type_t *t); + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 |
