diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2025-09-06 14:46:15 -0400 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2025-09-06 14:46:15 -0400 |
| commit | d8a48f64111f542f3afeb5d6e47ff092f9278d9f (patch) | |
| tree | 07c364503025bb2a26edd7b26f1ba9e8d25340f6 /src | |
| parent | 12345a85d9c7d7a56ddf323247a4bdf347021b73 (diff) | |
| parent | 73246764f88f6f652316ee0c138a990d836698a7 (diff) | |
Merge branch 'main' into optional-list-indexing
Diffstat (limited to 'src')
42 files changed, 1749 insertions, 425 deletions
@@ -5,77 +5,76 @@ #include "ast.h" #include "stdlib/datatypes.h" +#include "stdlib/optionals.h" #include "stdlib/tables.h" #include "stdlib/text.h" -static Text_t quoted_text(const char *text) { return Text$quoted(Text$from_str(text), false, Text("\"")); } - -CONSTFUNC const char *binop_method_name(ast_e tag) { - switch (tag) { - case Power: - case PowerUpdate: return "power"; - case Multiply: - case MultiplyUpdate: return "times"; - case Divide: - case DivideUpdate: return "divided_by"; - case Mod: - case ModUpdate: return "modulo"; - case Mod1: - case Mod1Update: return "modulo1"; - case Plus: - case PlusUpdate: return "plus"; - case Minus: - case MinusUpdate: return "minus"; - case Concat: - case ConcatUpdate: return "concatenated_with"; - case LeftShift: - case LeftShiftUpdate: return "left_shifted"; - case RightShift: - case RightShiftUpdate: return "right_shifted"; - case UnsignedLeftShift: - case UnsignedLeftShiftUpdate: return "unsigned_left_shifted"; - case UnsignedRightShift: - case UnsignedRightShiftUpdate: return "unsigned_right_shifted"; - case And: - case AndUpdate: return "bit_and"; - case Or: - case OrUpdate: return "bit_or"; - case Xor: - case XorUpdate: return "bit_xor"; - default: return NULL; - } +const int op_tightness[NUM_AST_TAGS] = { + [Power] = 9, + [Multiply] = 8, + [Divide] = 8, + [Mod] = 8, + [Mod1] = 8, + [Plus] = 7, + [Minus] = 7, + [Concat] = 6, + [LeftShift] = 5, + [RightShift] = 5, + [UnsignedLeftShift] = 5, + [UnsignedRightShift] = 5, + [Min] = 4, + [Max] = 4, + [Equals] = 3, + [NotEquals] = 3, + [LessThan] = 2, + [LessThanOrEquals] = 2, + [GreaterThan] = 2, + [GreaterThanOrEquals] = 2, + [Compare] = 2, + [And] = 1, + [Or] = 1, + [Xor] = 1, }; -CONSTFUNC const char *binop_operator(ast_e tag) { - switch (tag) { - case Multiply: - case MultiplyUpdate: return "*"; - case Divide: - case DivideUpdate: return "/"; - case Mod: - case ModUpdate: return "%"; - case Plus: - case PlusUpdate: return "+"; - case Minus: - case MinusUpdate: return "-"; - case LeftShift: - case LeftShiftUpdate: return "<<"; - case RightShift: - case RightShiftUpdate: return ">>"; - case And: - case AndUpdate: return "&"; - case Or: - case OrUpdate: return "|"; - case Xor: - case XorUpdate: return "^"; - case Equals: return "=="; - case NotEquals: return "!="; - case LessThan: return "<"; - case LessThanOrEquals: return "<="; - case GreaterThan: return ">"; - case GreaterThanOrEquals: return ">="; - default: return NULL; - } +const binop_info_t binop_info[NUM_AST_TAGS] = { + [Power] = {"power", "^"}, + [PowerUpdate] = {"power", "^="}, + [Multiply] = {"times", "*"}, + [MultiplyUpdate] = {"times", "*="}, + [Divide] = {"divided_by", "/"}, + [DivideUpdate] = {"divided_by", "/="}, + [Mod] = {"modulo", "mod"}, + [ModUpdate] = {"modulo", "mod="}, + [Mod1] = {"modulo1", "mod1"}, + [Mod1Update] = {"modulo1", "mod1="}, + [Plus] = {"plus", "+"}, + [PlusUpdate] = {"plus", "+="}, + [Minus] = {"minus", "-"}, + [MinusUpdate] = {"minus", "-="}, + [Concat] = {"concatenated_with", "++"}, + [ConcatUpdate] = {"concatenated_with", "++="}, + [LeftShift] = {"left_shifted", "<<"}, + [LeftShiftUpdate] = {"left_shifted", "<<="}, + [RightShift] = {"right_shifted", ">>"}, + [RightShiftUpdate] = {"right_shifted", ">>="}, + [UnsignedLeftShift] = {"unsigned_left_shifted", NULL}, + [UnsignedLeftShiftUpdate] = {"unsigned_left_shifted", NULL}, + [UnsignedRightShift] = {"unsigned_right_shifted", NULL}, + [UnsignedRightShiftUpdate] = {"unsigned_right_shifted", NULL}, + [And] = {"bit_and", "and"}, + [AndUpdate] = {"bit_and", "and="}, + [Or] = {"bit_or", "or"}, + [OrUpdate] = {"bit_or", "or="}, + [Xor] = {"bit_xor", "xor"}, + [XorUpdate] = {"bit_xor", "xor="}, + [Equals] = {NULL, "=="}, + [NotEquals] = {NULL, "!="}, + [LessThan] = {NULL, "<"}, + [LessThanOrEquals] = {NULL, "<="}, + [GreaterThan] = {NULL, ">"}, + [GreaterThanOrEquals] = {NULL, ">="}, + [Min] = {NULL, "_min_"}, + [Max] = {NULL, "_max_"}, }; static Text_t ast_list_to_sexp(ast_list_t *asts); @@ -86,6 +85,8 @@ static Text_t tags_to_sexp(tag_ast_t *tags); static Text_t optional_sexp(const char *tag, ast_t *ast); static Text_t optional_type_sexp(const char *tag, type_ast_t *ast); +static Text_t quoted_text(const char *text) { return Text$quoted(Text$from_str(text), false, Text("\"")); } + Text_t ast_list_to_sexp(ast_list_t *asts) { Text_t c = EMPTY_TEXT; for (; asts; asts = asts->next) { @@ -175,8 +176,8 @@ Text_t ast_to_sexp(ast_t *ast) { T(None, "(None)"); T(Bool, "(Bool ", data.b ? "yes" : "no", ")"); T(Var, "(Var ", quoted_text(data.name), ")"); - T(Int, "(Int ", quoted_text(ast_source(ast)), ")"); - T(Num, "(Num ", quoted_text(ast_source(ast)), ")"); + T(Int, "(Int ", Text$quoted(ast_source(ast), false, Text("\"")), ")"); + T(Num, "(Num ", Text$quoted(ast_source(ast), false, Text("\"")), ")"); T(TextLiteral, Text$quoted(data.text, false, Text("\""))); T(TextJoin, "(Text", data.lang ? Texts(" :lang ", quoted_text(data.lang)) : EMPTY_TEXT, ast_list_to_sexp(data.children), ")"); @@ -255,7 +256,7 @@ Text_t ast_to_sexp(ast_t *ast) { ")"); T(When, "(When ", ast_to_sexp(data.subject), when_clauses_to_sexp(data.clauses), optional_sexp("else", data.else_body), ")"); - T(Reduction, "(Reduction ", quoted_text(binop_method_name(data.op)), " ", ast_to_sexp(data.key), " ", + T(Reduction, "(Reduction ", quoted_text(binop_info[data.op].operator), " ", ast_to_sexp(data.key), " ", ast_to_sexp(data.iter), ")"); T(Skip, "(Skip ", quoted_text(data.target), ")"); T(Stop, "(Stop ", quoted_text(data.target), ")"); @@ -285,13 +286,9 @@ Text_t ast_to_sexp(ast_t *ast) { const char *ast_to_sexp_str(ast_t *ast) { return Text$as_c_string(ast_to_sexp(ast)); } -const char *ast_source(ast_t *ast) { - if (!ast) return NULL; - size_t len = (size_t)(ast->end - ast->start); - char *source = GC_MALLOC_ATOMIC(len + 1); - memcpy(source, ast->start, len); - source[len] = '\0'; - return source; +OptionalText_t ast_source(ast_t *ast) { + if (ast == NULL || ast->start == NULL || ast->end == NULL) return NONE_TEXT; + return Text$from_strn(ast->start, (size_t)(ast->end - ast->start)); } PUREFUNC bool is_idempotent(ast_t *ast) { @@ -400,6 +397,8 @@ void visit_topologically(ast_list_t *asts, Closure_t fn) { CONSTFUNC bool is_binary_operation(ast_t *ast) { switch (ast->tag) { + case Min: + case Max: case BINOP_CASES: return true; default: return false; } @@ -65,6 +65,8 @@ typedef struct ast_list_s { } ast_list_t; typedef struct arg_ast_s { + file_t *file; + const char *start, *end; const char *name, *alias; type_ast_t *type; ast_t *value; @@ -88,6 +90,7 @@ typedef enum { } type_ast_e; typedef struct tag_ast_s { + const char *start, *end; const char *name; arg_ast_t *fields; struct tag_ast_s *next; @@ -160,7 +163,12 @@ struct type_ast_s { case MinusUpdate: \ case ConcatUpdate: \ case LeftShiftUpdate: \ - case UnsignedLeftShiftUpdate + case UnsignedLeftShiftUpdate: \ + case RightShiftUpdate: \ + case UnsignedRightShiftUpdate: \ + case AndUpdate: \ + case OrUpdate: \ + case XorUpdate #define UPDATE_CASES \ PowerUpdate: \ case MultiplyUpdate: \ @@ -271,6 +279,7 @@ typedef enum { Extend, ExplicitlyTyped, } ast_e; +#define NUM_AST_TAGS (ExplicitlyTyped + 1) struct ast_s { ast_e tag; @@ -469,7 +478,16 @@ struct ast_s { } __data; }; -const char *ast_source(ast_t *ast); +extern const int op_tightness[NUM_AST_TAGS]; + +typedef struct { + const char *method_name; + const char *operator; +} binop_info_t; + +extern const binop_info_t binop_info[NUM_AST_TAGS]; + +OptionalText_t ast_source(ast_t *ast); Text_t ast_to_sexp(ast_t *ast); const char *ast_to_sexp_str(ast_t *ast); @@ -478,7 +496,5 @@ Text_t type_ast_to_sexp(type_ast_t *ast); PUREFUNC bool is_idempotent(ast_t *ast); void visit_topologically(ast_list_t *ast, Closure_t fn); CONSTFUNC bool is_update_assignment(ast_t *ast); -CONSTFUNC const char *binop_method_name(ast_e tag); -CONSTFUNC const char *binop_operator(ast_e tag); CONSTFUNC ast_e binop_tag(ast_e tag); CONSTFUNC bool is_binary_operation(ast_t *ast); diff --git a/src/compile/binops.c b/src/compile/binops.c index 87fd2c7a..ed4aaeba 100644 --- a/src/compile/binops.c +++ b/src/compile/binops.c @@ -67,7 +67,7 @@ Text_t compile_binary_op(env_t *env, ast_t *ast) { } } } else if ((ast->tag == Divide || ast->tag == Mod || ast->tag == Mod1) && is_numeric_type(rhs_t)) { - b = get_namespace_binding(env, binop.lhs, binop_method_name(ast->tag)); + b = get_namespace_binding(env, binop.lhs, binop_info[ast->tag].method_name); if (b && b->type->tag == FunctionType) { DeclareMatch(fn, b->type, FunctionType); if (type_eq(fn->ret, lhs_t)) { diff --git a/src/compile/comparisons.c b/src/compile/comparisons.c index d73664de..d7531261 100644 --- a/src/compile/comparisons.c +++ b/src/compile/comparisons.c @@ -7,6 +7,18 @@ #include "../typecheck.h" #include "compilation.h" +static CONSTFUNC const char *comparison_operator(ast_e tag) { + switch (tag) { + case Equals: return "=="; + case NotEquals: return "!="; + case LessThan: return "<"; + case LessThanOrEquals: return "<="; + case GreaterThan: return ">"; + case GreaterThanOrEquals: return ">="; + default: return NULL; + } +} + Text_t compile_comparison(env_t *env, ast_t *ast) { switch (ast->tag) { @@ -75,7 +87,7 @@ Text_t compile_comparison(env_t *env, ast_t *ast) { if (ast->tag == Compare) return Texts("generic_compare(stack(", lhs, "), stack(", rhs, "), ", compile_type_info(operand_t), ")"); - const char *op = binop_operator(ast->tag); + const char *op = comparison_operator(ast->tag); switch (operand_t->tag) { case BigIntType: return Texts("(Int$compare_value(", lhs, ", ", rhs, ") ", op, " 0)"); case BoolType: diff --git a/src/compile/expressions.c b/src/compile/expressions.c index e21ee263..e888ce16 100644 --- a/src/compile/expressions.c +++ b/src/compile/expressions.c @@ -152,7 +152,7 @@ Text_t compile(env_t *env, ast_t *ast) { ast_t *key = ast->tag == Min ? Match(ast, Min)->key : Match(ast, Max)->key; ast_t *lhs = ast->tag == Min ? Match(ast, Min)->lhs : Match(ast, Max)->lhs; ast_t *rhs = ast->tag == Min ? Match(ast, Min)->rhs : Match(ast, Max)->rhs; - const char *key_name = "$"; + const char *key_name = ast->tag == Min ? "_min_" : "_max_"; if (key == NULL) key = FakeAST(Var, key_name); env_t *expr_env = fresh_scope(env); @@ -237,7 +237,8 @@ Text_t compile(env_t *env, ast_t *ast) { case Index: return compile_indexing(env, ast, false); case InlineCCode: { type_t *t = get_type(env, ast); - if (t->tag == VoidType) return Texts("{\n", compile_statement(env, ast), "\n}"); + if (Match(ast, InlineCCode)->type_ast != NULL) return Texts("({", compile_statement(env, ast), "; })"); + else if (t->tag == VoidType) return Texts("{\n", compile_statement(env, ast), "\n}"); else return compile_statement(env, ast); } case Use: code_err(ast, "Compiling 'use' as expression!"); diff --git a/src/compile/files.c b/src/compile/files.c index a6af2300..4d6fb1a8 100644 --- a/src/compile/files.c +++ b/src/compile/files.c @@ -12,7 +12,16 @@ #include "../types.h" #include "compilation.h" -static void initialize_vars_and_statics(env_t *env, ast_t *ast) { +static void initialize_vars_and_statics(env_t *env, ast_t *ast); +static void initialize_namespace(env_t *env, const char *name, ast_t *namespace); +static Text_t compile_top_level_code(env_t *env, ast_t *ast); +static Text_t compile_namespace(env_t *env, const char *name, ast_t *namespace); + +void initialize_namespace(env_t *env, const char *name, ast_t *namespace) { + initialize_vars_and_statics(namespace_env(env, name), namespace); +} + +void initialize_vars_and_statics(env_t *env, ast_t *ast) { if (!ast) return; for (ast_list_t *stmt = Match(ast, Block)->statements; stmt; stmt = stmt->next) { @@ -34,17 +43,13 @@ static void initialize_vars_and_statics(env_t *env, ast_t *ast) { Texts(full_name, " = ", val_code, ",\n", initialized_name, " = true;\n"))); } } else if (stmt->ast->tag == StructDef) { - initialize_vars_and_statics(namespace_env(env, Match(stmt->ast, StructDef)->name), - Match(stmt->ast, StructDef)->namespace); + initialize_namespace(env, Match(stmt->ast, StructDef)->name, Match(stmt->ast, StructDef)->namespace); } else if (stmt->ast->tag == EnumDef) { - initialize_vars_and_statics(namespace_env(env, Match(stmt->ast, EnumDef)->name), - Match(stmt->ast, EnumDef)->namespace); + initialize_namespace(env, Match(stmt->ast, EnumDef)->name, Match(stmt->ast, EnumDef)->namespace); } else if (stmt->ast->tag == LangDef) { - initialize_vars_and_statics(namespace_env(env, Match(stmt->ast, LangDef)->name), - Match(stmt->ast, LangDef)->namespace); + initialize_namespace(env, Match(stmt->ast, LangDef)->name, Match(stmt->ast, LangDef)->namespace); } else if (stmt->ast->tag == Extend) { - initialize_vars_and_statics(namespace_env(env, Match(stmt->ast, Extend)->name), - Match(stmt->ast, Extend)->body); + initialize_namespace(env, Match(stmt->ast, Extend)->name, Match(stmt->ast, Extend)->body); } else if (stmt->ast->tag == Use) { continue; } else { @@ -54,7 +59,12 @@ static void initialize_vars_and_statics(env_t *env, ast_t *ast) { } } -static Text_t compile_top_level_code(env_t *env, ast_t *ast) { +Text_t compile_namespace(env_t *env, const char *name, ast_t *namespace) { + env_t *ns_env = namespace_env(env, name); + return namespace ? compile_top_level_code(ns_env, namespace) : EMPTY_TEXT; +} + +Text_t compile_top_level_code(env_t *env, ast_t *ast) { if (!ast) return EMPTY_TEXT; switch (ast->tag) { @@ -95,13 +105,13 @@ static Text_t compile_top_level_code(env_t *env, ast_t *ast) { return compile_function(env, name_code, ast, &env->code->staticdefs); } case ConvertDef: { - type_t *type = get_function_def_type(env, ast); - const char *name = get_type_name(Match(type, FunctionType)->ret); + type_t *type = get_function_return_type(env, ast); + const char *name = get_type_name(type); if (!name) code_err(ast, "Conversions are only supported for text, struct, and enum " "types, not ", - type_to_str(Match(type, FunctionType)->ret)); + type_to_str(type)); Text_t name_code = namespace_name(env, env->namespace, Texts(name, "$", get_line_number(ast->file, ast->start))); return compile_function(env, name_code, ast, &env->code->staticdefs); @@ -111,15 +121,13 @@ static Text_t compile_top_level_code(env_t *env, ast_t *ast) { type_t *t = Table$str_get(*env->types, def->name); assert(t && t->tag == StructType); Text_t code = compile_struct_typeinfo(env, t, def->name, def->fields, def->secret, def->opaque); - env_t *ns_env = namespace_env(env, def->name); - return Texts(code, def->namespace ? compile_top_level_code(ns_env, def->namespace) : EMPTY_TEXT); + return Texts(code, compile_namespace(env, def->name, def->namespace)); } case EnumDef: { DeclareMatch(def, ast, EnumDef); Text_t code = compile_enum_typeinfo(env, ast); code = Texts(code, compile_enum_constructors(env, ast)); - env_t *ns_env = namespace_env(env, def->name); - return Texts(code, def->namespace ? compile_top_level_code(ns_env, def->namespace) : EMPTY_TEXT); + return Texts(code, compile_namespace(env, def->name, def->namespace)); } case LangDef: { DeclareMatch(def, ast, LangDef); @@ -127,8 +135,7 @@ static Text_t compile_top_level_code(env_t *env, ast_t *ast) { Texts("public const TypeInfo_t ", namespace_name(env, env->namespace, Texts(def->name, "$$info")), " = {", (int64_t)sizeof(Text_t), ", ", (int64_t)__alignof__(Text_t), ", .metamethods=Text$metamethods, .tag=TextInfo, .TextInfo={", quoted_str(def->name), "}};\n"); - env_t *ns_env = namespace_env(env, def->name); - return Texts(code, def->namespace ? compile_top_level_code(ns_env, def->namespace) : EMPTY_TEXT); + return Texts(code, compile_namespace(env, def->name, def->namespace)); } case Extend: { DeclareMatch(extend, ast, Extend); diff --git a/src/compile/functions.c b/src/compile/functions.c index 6caefa8b..f04a3b59 100644 --- a/src/compile/functions.c +++ b/src/compile/functions.c @@ -256,18 +256,7 @@ Text_t compile_lambda(env_t *env, ast_t *ast) { set_binding(body_scope, arg->name, arg_type, Texts("_$", arg->name)); } - type_t *ret_t = get_type(body_scope, lambda->body); - if (ret_t->tag == ReturnType) ret_t = Match(ret_t, ReturnType)->ret; - - if (lambda->ret_type) { - type_t *declared = parse_type_ast(env, lambda->ret_type); - if (can_promote(ret_t, declared)) ret_t = declared; - else - code_err(ast, "This function was declared to return a value of type ", type_to_str(declared), - ", but actually returns a value of type ", type_to_str(ret_t)); - } - - body_scope->fn_ret = ret_t; + body_scope->fn = ast; Table_t closed_vars = get_closed_vars(env, lambda->args, ast); if (Table$length(closed_vars) > 0) { // Create a typedef for the lambda's closure userdata @@ -289,6 +278,7 @@ Text_t compile_lambda(env_t *env, ast_t *ast) { env->code->local_typedefs = Texts(env->code->local_typedefs, def); } + type_t *ret_t = get_function_return_type(env, ast); Text_t code = Texts("static ", compile_type(ret_t), " ", name, "("); for (arg_ast_t *arg = lambda->args; arg; arg = arg->next) { type_t *arg_type = get_arg_ast_type(env, arg); @@ -623,7 +613,7 @@ Text_t compile_function(env_t *env, Text_t name_code, ast_t *ast, Text_t *static bool is_private = false; const char *function_name; arg_ast_t *args; - type_t *ret_t; + type_t *ret_t = get_function_return_type(env, ast); ast_t *body; ast_t *cache; bool is_inline; @@ -632,14 +622,12 @@ Text_t compile_function(env_t *env, Text_t name_code, ast_t *ast, Text_t *static function_name = Match(fndef->name, Var)->name; is_private = function_name[0] == '_'; args = fndef->args; - ret_t = fndef->ret_type ? parse_type_ast(env, fndef->ret_type) : Type(VoidType); body = fndef->body; cache = fndef->cache; is_inline = fndef->is_inline; } else { DeclareMatch(convertdef, ast, ConvertDef); args = convertdef->args; - ret_t = convertdef->ret_type ? parse_type_ast(env, convertdef->ret_type) : Type(VoidType); function_name = get_type_name(ret_t); if (!function_name) code_err(ast, @@ -689,7 +677,7 @@ Text_t compile_function(env_t *env, Text_t name_code, ast_t *ast, Text_t *static set_binding(body_scope, arg->name, arg_type, Texts("_$", arg->name)); } - body_scope->fn_ret = ret_t; + body_scope->fn = ast; type_t *body_type = get_type(body_scope, body); if (ret_t->tag == AbortType) { diff --git a/src/compile/reductions.c b/src/compile/reductions.c index e0477a9c..438e072b 100644 --- a/src/compile/reductions.c +++ b/src/compile/reductions.c @@ -12,6 +12,7 @@ public Text_t compile_reduction(env_t *env, ast_t *ast) { DeclareMatch(reduction, ast, Reduction); ast_e op = reduction->op; + const char *op_str = binop_info[op].operator; type_t *iter_t = get_type(env, reduction->iter); type_t *item_t = get_iterated_type(iter_t); @@ -29,7 +30,7 @@ Text_t compile_reduction(env_t *env, ast_t *ast) { type_t *item_value_type = item_t; ast_t *item_value = item; if (reduction->key) { - set_binding(body_scope, "$", item_t, compile(body_scope, item)); + set_binding(body_scope, op_str, item_t, compile(body_scope, item)); item_value = reduction->key; item_value_type = get_type(body_scope, reduction->key); } @@ -67,7 +68,7 @@ Text_t compile_reduction(env_t *env, ast_t *ast) { ast_e cmp_op = op == Min ? LessThan : GreaterThan; if (reduction->key) { env_t *key_scope = fresh_scope(env); - set_binding(key_scope, "$", item_t, item_code); + set_binding(key_scope, op_str, item_t, item_code); type_t *key_type = get_type(key_scope, reduction->key); Text_t superlative_key = op == Min ? Text("min_key") : Text("max_key"); code = Texts(code, compile_declaration(key_type, superlative_key), ";\n"); @@ -111,7 +112,7 @@ Text_t compile_reduction(env_t *env, ast_t *ast) { type_t *reduction_type = Match(get_type(env, ast), OptionalType)->type; ast_t *item_value = item; if (reduction->key) { - set_binding(body_scope, "$", item_t, compile(body_scope, item)); + set_binding(body_scope, op_str, item_t, compile(body_scope, item)); item_value = reduction->key; } diff --git a/src/compile/statements.c b/src/compile/statements.c index a7c5214a..bde9ae36 100644 --- a/src/compile/statements.c +++ b/src/compile/statements.c @@ -25,6 +25,14 @@ Text_t with_source_info(env_t *env, ast_t *ast, Text_t code) { return Texts("\n#line ", line, "\n", code); } +static Text_t compile_simple_update_assignment(env_t *env, ast_t *ast, const char *op) { + binary_operands_t update = BINARY_OPERANDS(ast); + type_t *lhs_t = get_type(env, update.lhs); + if (is_idempotent(update.lhs) && (lhs_t->tag == IntType || lhs_t->tag == NumType || lhs_t->tag == ByteType)) + return Texts(compile_lvalue(env, update.lhs), " ", op, "= ", compile_to_type(env, update.rhs, lhs_t), ";"); + return compile_update_assignment(env, ast); +} + static Text_t _compile_statement(env_t *env, ast_t *ast) { switch (ast->tag) { case When: return compile_when_statement(env, ast); @@ -47,41 +55,12 @@ static Text_t _compile_statement(env_t *env, ast_t *ast) { } } case Assign: return compile_assignment_statement(env, ast); - case PlusUpdate: { - DeclareMatch(update, ast, PlusUpdate); - type_t *lhs_t = get_type(env, update->lhs); - if (is_idempotent(update->lhs) && (lhs_t->tag == IntType || lhs_t->tag == NumType || lhs_t->tag == ByteType)) - return Texts(compile_lvalue(env, update->lhs), " += ", compile_to_type(env, update->rhs, lhs_t), ";"); - return compile_update_assignment(env, ast); - } - case MinusUpdate: { - DeclareMatch(update, ast, MinusUpdate); - type_t *lhs_t = get_type(env, update->lhs); - if (is_idempotent(update->lhs) && (lhs_t->tag == IntType || lhs_t->tag == NumType || lhs_t->tag == ByteType)) - return Texts(compile_lvalue(env, update->lhs), " -= ", compile_to_type(env, update->rhs, lhs_t), ";"); - return compile_update_assignment(env, ast); - } - case MultiplyUpdate: { - DeclareMatch(update, ast, MultiplyUpdate); - type_t *lhs_t = get_type(env, update->lhs); - if (is_idempotent(update->lhs) && (lhs_t->tag == IntType || lhs_t->tag == NumType || lhs_t->tag == ByteType)) - return Texts(compile_lvalue(env, update->lhs), " *= ", compile_to_type(env, update->rhs, lhs_t), ";"); - return compile_update_assignment(env, ast); - } - case DivideUpdate: { - DeclareMatch(update, ast, DivideUpdate); - type_t *lhs_t = get_type(env, update->lhs); - if (is_idempotent(update->lhs) && (lhs_t->tag == IntType || lhs_t->tag == NumType || lhs_t->tag == ByteType)) - return Texts(compile_lvalue(env, update->lhs), " /= ", compile_to_type(env, update->rhs, lhs_t), ";"); - return compile_update_assignment(env, ast); - } - case ModUpdate: { - DeclareMatch(update, ast, ModUpdate); - type_t *lhs_t = get_type(env, update->lhs); - if (is_idempotent(update->lhs) && (lhs_t->tag == IntType || lhs_t->tag == NumType || lhs_t->tag == ByteType)) - return Texts(compile_lvalue(env, update->lhs), " %= ", compile_to_type(env, update->rhs, lhs_t), ";"); - return compile_update_assignment(env, ast); - } + case PlusUpdate: return compile_simple_update_assignment(env, ast, "+"); + case MinusUpdate: return compile_simple_update_assignment(env, ast, "-"); + case MultiplyUpdate: return compile_simple_update_assignment(env, ast, "*"); + case DivideUpdate: return compile_simple_update_assignment(env, ast, "/"); + case ModUpdate: return compile_simple_update_assignment(env, ast, "%"); + case PowerUpdate: case Mod1Update: case ConcatUpdate: @@ -131,7 +110,7 @@ static Text_t _compile_statement(env_t *env, ast_t *ast) { return code; } case Return: { - if (!env->fn_ret) code_err(ast, "This return statement is not inside any function"); + if (!env->fn) code_err(ast, "This return statement is not inside any function"); ast_t *ret = Match(ast, Return)->value; Text_t code = EMPTY_TEXT; @@ -139,22 +118,30 @@ static Text_t _compile_statement(env_t *env, ast_t *ast) { code = Texts(code, compile_statement(deferred->defer_env, deferred->block)); } + type_t *ret_type = get_function_return_type(env, env->fn); if (ret) { - if (env->fn_ret->tag == VoidType || env->fn_ret->tag == AbortType) + if (ret_type->tag == VoidType || ret_type->tag == AbortType) code_err(ast, "This function is not supposed to return any values, " "according to its type signature"); - env = with_enum_scope(env, env->fn_ret); - Text_t value = compile_to_type(env, ret, env->fn_ret); + env = with_enum_scope(env, ret_type); + if (env->fn->tag == ConvertDef) { + type_t *value_type = get_type(env, ret); + if (!type_eq(value_type, ret_type)) { + code_err(ret, "This value is a ", type_to_text(value_type), + " but this conversion needs an explicit ", type_to_text(ret_type)); + } + } + Text_t value = compile_to_type(env, ret, ret_type); if (env->deferred) { - code = Texts(compile_declaration(env->fn_ret, Text("ret")), " = ", value, ";\n", code); + code = Texts(compile_declaration(ret_type, Text("ret")), " = ", value, ";\n", code); value = Text("ret"); } return Texts(code, "return ", value, ";"); } else { - if (env->fn_ret->tag != VoidType) - code_err(ast, "This function expects you to return a ", type_to_str(env->fn_ret), " value"); + if (ret_type->tag != VoidType) + code_err(ast, "This function expects you to return a ", type_to_text(ret_type), " value"); return Texts(code, "return;"); } } diff --git a/src/environment.c b/src/environment.c index 5efedfbe..7ac54a7a 100644 --- a/src/environment.c +++ b/src/environment.c @@ -73,7 +73,9 @@ env_t *global_env(bool source_mapping) { } ns_entry_t; #define MAKE_TYPE(name, type, type_name, type_info, ...) \ - {name, type, type_name, type_info, TypedList(ns_entry_t, __VA_ARGS__)} + { \ + name, type, type_name, type_info, TypedList(ns_entry_t, __VA_ARGS__) \ + } struct { const char *name; type_t *type; @@ -736,7 +738,7 @@ PUREFUNC binding_t *get_constructor(env_t *env, type_t *t, arg_ast_t *args, bool } PUREFUNC binding_t *get_metamethod_binding(env_t *env, ast_e tag, ast_t *lhs, ast_t *rhs, type_t *ret) { - const char *method_name = binop_method_name(tag); + const char *method_name = binop_info[tag].method_name; if (!method_name) return NULL; binding_t *b = get_namespace_binding(env, lhs, method_name); if (!b || b->type->tag != FunctionType) return NULL; diff --git a/src/environment.h b/src/environment.h index 1ef9c1f9..c726508d 100644 --- a/src/environment.h +++ b/src/environment.h @@ -43,7 +43,7 @@ typedef struct env_s { Text_t id_suffix; Table_t *imports; compilation_unit_t *code; - type_t *fn_ret; + ast_t *fn; loop_ctx_t *loop_ctx; deferral_t *deferred; Closure_t *comprehension_action; diff --git a/src/formatter/args.c b/src/formatter/args.c new file mode 100644 index 00000000..997a1e39 --- /dev/null +++ b/src/formatter/args.c @@ -0,0 +1,57 @@ +// Logic for formatting arguments and argument lists + +#include "../ast.h" +#include "../stdlib/datatypes.h" +#include "../stdlib/optionals.h" +#include "../stdlib/text.h" +#include "formatter.h" +#include "types.h" +#include "utils.h" + +OptionalText_t format_inline_arg(arg_ast_t *arg, Table_t comments) { + if (range_has_comment(arg->start, arg->end, comments)) return NONE_TEXT; + if (arg->name == NULL && arg->value) return must(format_inline_code(arg->value, comments)); + Text_t code = Text$from_str(arg->name); + if (arg->type) code = Texts(code, ":", must(format_type(arg->type))); + if (arg->value) code = Texts(code, "=", must(format_inline_code(arg->value, comments))); + return code; +} + +Text_t format_arg(arg_ast_t *arg, Table_t comments, Text_t indent) { + OptionalText_t inline_arg = format_inline_arg(arg, comments); + if (inline_arg.length >= 0 && inline_arg.length <= MAX_WIDTH) return inline_arg; + if (arg->name == NULL && arg->value) return format_code(arg->value, comments, indent); + Text_t code = Text$from_str(arg->name); + if (arg->type) code = Texts(code, ":", format_type(arg->type)); + if (arg->value) code = Texts(code, "=", format_code(arg->value, comments, indent)); + return code; +} + +OptionalText_t format_inline_args(arg_ast_t *args, Table_t comments) { + Text_t code = EMPTY_TEXT; + for (arg_ast_t *arg = args; arg; arg = arg->next) { + if (arg->name && arg->next && arg->type == arg->next->type && arg->value == arg->next->value) { + code = Texts(code, Text$from_str(arg->name), ","); + } else { + code = Texts(code, must(format_inline_arg(arg, comments))); + if (arg->next) code = Texts(code, ", "); + } + if (arg->next && range_has_comment(arg->end, arg->next->start, comments)) return NONE_TEXT; + } + return code; +} + +Text_t format_args(arg_ast_t *args, Table_t comments, Text_t indent) { + OptionalText_t inline_args = format_inline_args(args, comments); + if (inline_args.length >= 0 && inline_args.length <= MAX_WIDTH) return inline_args; + Text_t code = EMPTY_TEXT; + for (arg_ast_t *arg = args; arg; arg = arg->next) { + if (arg->name && arg->next && arg->type == arg->next->type && arg->value == arg->next->value) { + code = Texts(code, Text$from_str(arg->name), ","); + } else { + code = Texts(code, "\n", indent, single_indent, format_arg(arg, comments, Texts(indent, single_indent))); + if (args->next) code = Texts(code, ","); + } + } + return code; +} diff --git a/src/formatter/args.h b/src/formatter/args.h new file mode 100644 index 00000000..c902684b --- /dev/null +++ b/src/formatter/args.h @@ -0,0 +1,11 @@ +// Logic for formatting arguments and argument lists + +#pragma once + +#include "../ast.h" +#include "../stdlib/datatypes.h" + +OptionalText_t format_inline_arg(arg_ast_t *arg, Table_t comments); +Text_t format_arg(arg_ast_t *arg, Table_t comments, Text_t indent); +OptionalText_t format_inline_args(arg_ast_t *args, Table_t comments); +Text_t format_args(arg_ast_t *args, Table_t comments, Text_t indent); diff --git a/src/formatter/enums.c b/src/formatter/enums.c new file mode 100644 index 00000000..893f055b --- /dev/null +++ b/src/formatter/enums.c @@ -0,0 +1,51 @@ +// Logic for formatting enums and enum tags + +#include "../ast.h" +#include "../stdlib/datatypes.h" +#include "../stdlib/optionals.h" +#include "../stdlib/text.h" +#include "args.h" +#include "utils.h" + +OptionalText_t format_inline_tag(tag_ast_t *tag, Table_t comments) { + if (range_has_comment(tag->start, tag->end, comments)) return NONE_TEXT; + Text_t code = Text$from_str(tag->name); + if (tag->fields || tag->secret) { + code = Texts(code, "(", must(format_inline_args(tag->fields, comments))); + if (tag->secret) code = Texts(code, "; secret"); + code = Texts(code, ")"); + } + return code; +} + +Text_t format_tag(tag_ast_t *tag, Table_t comments, Text_t indent) { + OptionalText_t inline_tag = format_inline_tag(tag, comments); + if (inline_tag.length >= 0) return inline_tag; + Text_t code = Text$from_str(tag->name); + if (tag->fields || tag->secret) { + code = Texts(code, "(", format_args(tag->fields, comments, Texts(indent, single_indent))); + if (tag->secret) code = Texts(code, "; secret"); + code = Texts(code, ")"); + } + return code; +} + +OptionalText_t format_inline_tags(tag_ast_t *tags, Table_t comments) { + Text_t code = EMPTY_TEXT; + for (; tags; tags = tags->next) { + code = Texts(code, must(format_inline_tag(tags, comments))); + if (tags->next) code = Texts(code, ", "); + if (tags->next && range_has_comment(tags->end, tags->next->start, comments)) return NONE_TEXT; + } + return code; +} + +Text_t format_tags(tag_ast_t *tags, Table_t comments, Text_t indent) { + OptionalText_t inline_tags = format_inline_tags(tags, comments); + if (inline_tags.length >= 0) return inline_tags; + Text_t code = EMPTY_TEXT; + for (; tags; tags = tags->next) { + add_line(&code, Texts(format_tag(tags, comments, indent), ","), indent); + } + return code; +} diff --git a/src/formatter/enums.h b/src/formatter/enums.h new file mode 100644 index 00000000..e7233df4 --- /dev/null +++ b/src/formatter/enums.h @@ -0,0 +1,11 @@ +// Logic for formatting enums and enum tags + +#pragma once + +#include "../ast.h" +#include "../stdlib/datatypes.h" + +OptionalText_t format_inline_tag(tag_ast_t *tag, Table_t comments); +Text_t format_tag(tag_ast_t *tag, Table_t comments, Text_t indent); +OptionalText_t format_inline_tags(tag_ast_t *tags, Table_t comments); +Text_t format_tags(tag_ast_t *tags, Table_t comments, Text_t indent); diff --git a/src/formatter/formatter.c b/src/formatter/formatter.c new file mode 100644 index 00000000..b68b3874 --- /dev/null +++ b/src/formatter/formatter.c @@ -0,0 +1,884 @@ +// This code defines functions for transforming ASTs back into Tomo source text + +#include <assert.h> +#include <setjmp.h> +#include <stdbool.h> +#include <stdint.h> +#include <unictype.h> + +#include "../ast.h" +#include "../parse/context.h" +#include "../parse/files.h" +#include "../parse/utils.h" +#include "../stdlib/datatypes.h" +#include "../stdlib/integers.h" +#include "../stdlib/optionals.h" +#include "../stdlib/stdlib.h" +#include "../stdlib/text.h" +#include "args.h" +#include "enums.h" +#include "formatter.h" +#include "types.h" +#include "utils.h" + +#define fmt_inline(...) must(format_inline_code(__VA_ARGS__)) +#define fmt(...) format_code(__VA_ARGS__) + +Text_t format_namespace(ast_t *namespace, Table_t comments, Text_t indent) { + if (unwrap_block(namespace) == NULL) return EMPTY_TEXT; + return Texts("\n", indent, single_indent, fmt(namespace, comments, Texts(indent, single_indent))); +} + +typedef struct { + Text_t quote, unquote, interp; +} text_opts_t; + +PUREFUNC text_opts_t choose_text_options(ast_list_t *chunks) { + int double_quotes = 0, single_quotes = 0, backticks = 0; + for (ast_list_t *chunk = chunks; chunk; chunk = chunk->next) { + if (chunk->ast->tag == TextLiteral) { + Text_t literal = Match(chunk->ast, TextLiteral)->text; + if (Text$has(literal, Text("\""))) double_quotes += 1; + if (Text$has(literal, Text("'"))) single_quotes += 1; + if (Text$has(literal, Text("`"))) backticks += 1; + } + } + Text_t quote; + if (double_quotes == 0) quote = Text("\""); + else if (single_quotes == 0) quote = Text("'"); + else if (backticks == 0) quote = Text("`"); + else quote = Text("\""); + + text_opts_t opts = {.quote = quote, .unquote = quote, .interp = Text("$")}; + return opts; +} + +static bool starts_with_id(Text_t text) { + List_t codepoints = Text$utf32_codepoints(Text$slice(text, I_small(1), I_small(1))); + return uc_is_property_xid_continue(*(ucs4_t *)codepoints.data); +} + +static OptionalText_t format_inline_text(text_opts_t opts, ast_list_t *chunks, Table_t comments) { + Text_t code = opts.quote; + for (ast_list_t *chunk = chunks; chunk; chunk = chunk->next) { + if (chunk->ast->tag == TextLiteral) { + Text_t literal = Match(chunk->ast, TextLiteral)->text; + Text_t segment = Text$escaped(literal, false, Texts(opts.unquote, opts.interp)); + code = Texts(code, segment); + } else { + if (chunk->ast->tag == Var + && (!chunk->next || chunk->next->ast->tag != TextLiteral + || !starts_with_id(Match(chunk->next->ast, TextLiteral)->text))) { + code = Texts(code, opts.interp, fmt_inline(chunk->ast, comments)); + } else { + code = Texts(code, opts.interp, "(", fmt_inline(chunk->ast, comments), ")"); + } + } + } + return Texts(code, opts.unquote); +} + +static Text_t format_text(text_opts_t opts, ast_list_t *chunks, Table_t comments, Text_t indent) { + Text_t code = EMPTY_TEXT; + Text_t current_line = EMPTY_TEXT; + for (ast_list_t *chunk = chunks; chunk; chunk = chunk->next) { + if (chunk->ast->tag == TextLiteral) { + Text_t literal = Match(chunk->ast, TextLiteral)->text; + List_t lines = Text$lines(literal); + if (lines.length == 0) continue; + current_line = Texts(current_line, Text$escaped(*(Text_t *)lines.data, false, opts.interp)); + for (int64_t i = 1; i < lines.length; i += 1) { + add_line(&code, current_line, Texts(indent, single_indent)); + current_line = Text$escaped(*(Text_t *)(lines.data + i * lines.stride), false, opts.interp); + } + } else { + current_line = Texts(current_line, opts.interp, "(", fmt(chunk->ast, comments, indent), ")"); + } + } + add_line(&code, current_line, Texts(indent, single_indent)); + code = Texts(opts.quote, "\n", indent, single_indent, code, "\n", indent, opts.unquote); + return code; +} + +OptionalText_t format_inline_code(ast_t *ast, Table_t comments) { + if (range_has_comment(ast->start, ast->end, comments)) return NONE_TEXT; + switch (ast->tag) { + /*inline*/ case Unknown: + fail("Invalid AST"); + /*inline*/ case Block: { + ast_list_t *statements = Match(ast, Block)->statements; + if (statements == NULL) return Text("pass"); + else if (statements->next == NULL) return fmt_inline(statements->ast, comments); + else return NONE_TEXT; + } + /*inline*/ case StructDef: + /*inline*/ case EnumDef: + /*inline*/ case LangDef: + /*inline*/ case Extend: + /*inline*/ case FunctionDef: + /*inline*/ case ConvertDef: + /*inline*/ case DocTest: + /*inline*/ case Extern: + return NONE_TEXT; + /*inline*/ case Assert: { + DeclareMatch(assert, ast, Assert); + Text_t expr = fmt_inline(assert->expr, comments); + if (!assert->message) return Texts("assert ", expr); + Text_t message = fmt_inline(assert->message, comments); + return Texts("assert ", expr, ", ", message); + } + /*inline*/ case Defer: + return Texts("defer ", fmt_inline(Match(ast, Defer)->body, comments)); + /*inline*/ case Lambda: { + DeclareMatch(lambda, ast, Lambda); + Text_t code = Texts("func(", format_inline_args(lambda->args, comments)); + if (lambda->ret_type) + code = Texts(code, lambda->args ? Text(" -> ") : Text("-> "), format_type(lambda->ret_type)); + code = Texts(code, ") ", fmt_inline(lambda->body, comments)); + return Texts(code); + } + /*inline*/ case If: { + DeclareMatch(if_, ast, If); + + Text_t if_condition = if_->condition->tag == Not + ? Texts("unless ", fmt_inline(Match(if_->condition, Not)->value, comments)) + : Texts("if ", fmt_inline(if_->condition, comments)); + + if (if_->else_body == NULL && if_->condition->tag != Declare) { + ast_t *stmt = unwrap_block(if_->body); + if (!stmt) return Texts("pass ", if_condition); + switch (stmt->tag) { + case Return: + case Skip: + case Stop: return Texts(fmt_inline(stmt, comments), " ", if_condition); + default: break; + } + } + + Text_t code = Texts(if_condition, " then ", fmt_inline(if_->body, comments)); + if (if_->else_body) code = Texts(code, " else ", fmt_inline(if_->else_body, comments)); + return code; + } + /*inline*/ case When: { + DeclareMatch(when, ast, When); + Text_t code = Texts("when ", fmt_inline(when->subject, comments)); + for (when_clause_t *clause = when->clauses; clause; clause = clause->next) { + code = Texts(code, " is ", fmt_inline(clause->pattern, comments)); + while (clause->next && clause->next->body == clause->body) { + clause = clause->next; + code = Texts(code, ", ", fmt_inline(clause->pattern, comments)); + } + code = Texts(code, " then ", fmt_inline(clause->body, comments)); + } + if (when->else_body) code = Texts(code, " else ", fmt_inline(when->else_body, comments)); + return code; + } + /*inline*/ case Repeat: + return Texts("repeat ", fmt_inline(Match(ast, Repeat)->body, comments)); + /*inline*/ case While: { + DeclareMatch(loop, ast, While); + return Texts("while ", fmt_inline(loop->condition, comments), " do ", fmt_inline(loop->body, comments)); + } + /*inline*/ case For: { + DeclareMatch(loop, ast, For); + Text_t code = Text("for "); + for (ast_list_t *var = loop->vars; var; var = var->next) { + code = Texts(code, fmt_inline(var->ast, comments)); + if (var->next) code = Texts(code, ", "); + } + code = Texts(code, " in ", fmt_inline(loop->iter, comments), " do ", fmt_inline(loop->body, comments)); + if (loop->empty) code = Texts(code, " else ", fmt_inline(loop->empty, comments)); + return code; + } + /*inline*/ case Comprehension: { + DeclareMatch(comp, ast, Comprehension); + Text_t code = Texts(fmt_inline(comp->expr, comments), " for "); + for (ast_list_t *var = comp->vars; var; var = var->next) { + code = Texts(code, fmt_inline(var->ast, comments)); + if (var->next) code = Texts(code, ", "); + } + code = Texts(code, " in ", fmt_inline(comp->iter, comments)); + if (comp->filter) code = Texts(code, " if ", fmt_inline(comp->filter, comments)); + return code; + } + /*inline*/ case List: + /*inline*/ case Set: { + ast_list_t *items = ast->tag == List ? Match(ast, List)->items : Match(ast, Set)->items; + Text_t code = EMPTY_TEXT; + for (ast_list_t *item = items; item; item = item->next) { + code = Texts(code, fmt_inline(item->ast, comments)); + if (item->next) code = Texts(code, ", "); + } + return ast->tag == List ? Texts("[", code, "]") : Texts("|", code, "|"); + } + /*inline*/ case Table: { + DeclareMatch(table, ast, Table); + Text_t code = EMPTY_TEXT; + for (ast_list_t *entry = table->entries; entry; entry = entry->next) { + code = Texts(code, fmt_inline(entry->ast, comments)); + if (entry->next) code = Texts(code, ", "); + } + if (table->fallback) code = Texts(code, "; fallback=", fmt_inline(table->fallback, comments)); + if (table->default_value) code = Texts(code, "; default=", fmt_inline(table->default_value, comments)); + return Texts("{", code, "}"); + } + /*inline*/ case TableEntry: { + DeclareMatch(entry, ast, TableEntry); + return Texts(fmt_inline(entry->key, comments), "=", fmt_inline(entry->value, comments)); + } + /*inline*/ case Declare: { + DeclareMatch(decl, ast, Declare); + Text_t code = fmt_inline(decl->var, comments); + if (decl->type) code = Texts(code, " : ", format_type(decl->type)); + if (decl->value) code = Texts(code, decl->type ? Text(" = ") : Text(" := "), fmt_inline(decl->value, comments)); + return code; + } + /*inline*/ case Assign: { + DeclareMatch(assign, ast, Assign); + Text_t code = EMPTY_TEXT; + for (ast_list_t *target = assign->targets; target; target = target->next) { + code = Texts(code, fmt_inline(target->ast, comments)); + if (target->next) code = Texts(code, ", "); + } + code = Texts(code, " = "); + for (ast_list_t *value = assign->values; value; value = value->next) { + code = Texts(code, fmt_inline(value->ast, comments)); + if (value->next) code = Texts(code, ", "); + } + return code; + } + /*inline*/ case Pass: + return Text("pass"); + /*inline*/ case Return: { + ast_t *value = Match(ast, Return)->value; + return value ? Texts("return ", fmt_inline(value, comments)) : Text("return"); + } + /*inline*/ case Not: { + ast_t *val = Match(ast, Not)->value; + return Texts("not ", must(termify_inline(val, comments))); + } + /*inline*/ case Negative: { + ast_t *val = Match(ast, Negative)->value; + return Texts("-", must(termify_inline(val, comments))); + } + /*inline*/ case HeapAllocate: { + ast_t *val = Match(ast, HeapAllocate)->value; + return Texts("@", must(termify_inline(val, comments))); + } + /*inline*/ case StackReference: { + ast_t *val = Match(ast, StackReference)->value; + return Texts("&", must(termify_inline(val, comments))); + } + /*inline*/ case Optional: { + ast_t *val = Match(ast, Optional)->value; + return Texts(must(termify_inline(val, comments)), "?"); + } + /*inline*/ case NonOptional: { + ast_t *val = Match(ast, NonOptional)->value; + return Texts(must(termify_inline(val, comments)), "!"); + } + /*inline*/ case FieldAccess: { + DeclareMatch(access, ast, FieldAccess); + return Texts(must(termify_inline(access->fielded, comments)), ".", Text$from_str(access->field)); + } + /*inline*/ case Index: { + DeclareMatch(index, ast, Index); + Text_t indexed = must(termify_inline(index->indexed, comments)); + if (index->index) return Texts(indexed, "[", fmt_inline(index->index, comments), "]"); + else return Texts(indexed, "[]"); + } + /*inline*/ case TextJoin: { + text_opts_t opts = choose_text_options(Match(ast, TextJoin)->children); + Text_t ret = must(format_inline_text(opts, Match(ast, TextJoin)->children, comments)); + const char *lang = Match(ast, TextJoin)->lang; + return lang ? Texts("$", Text$from_str(lang), ret) : ret; + } + /*inline*/ case InlineCCode: { + DeclareMatch(c_code, ast, InlineCCode); + Text_t code = c_code->type_ast ? Texts("C_code:", format_type(c_code->type_ast)) : Text("C_code"); + text_opts_t opts = {.quote = Text("`"), .unquote = Text("`"), .interp = Text("@")}; + return Texts(code, must(format_inline_text(opts, Match(ast, InlineCCode)->chunks, comments))); + } + /*inline*/ case TextLiteral: { fail("Something went wrong, we shouldn't be formatting text literals directly"); } + /*inline*/ case Path: { + return Texts("(", Text$escaped(Text$from_str(Match(ast, Path)->path), false, Text("()")), ")"); + } + /*inline*/ case Stop: { + const char *target = Match(ast, Stop)->target; + return target ? Texts("stop ", Text$from_str(target)) : Text("stop"); + } + /*inline*/ case Skip: { + const char *target = Match(ast, Skip)->target; + return target ? Texts("skip ", Text$from_str(target)) : Text("skip"); + } + /*inline*/ case Min: + /*inline*/ case Max: { + Text_t lhs = fmt_inline(ast->tag == Min ? Match(ast, Min)->lhs : Match(ast, Max)->lhs, comments); + Text_t rhs = fmt_inline(ast->tag == Min ? Match(ast, Min)->rhs : Match(ast, Max)->rhs, comments); + ast_t *key = ast->tag == Min ? Match(ast, Min)->key : Match(ast, Max)->key; + return Texts(lhs, key ? fmt_inline(key, comments) : (ast->tag == Min ? Text(" _min_ ") : Text(" _max_ ")), rhs); + } + /*inline*/ case Reduction: { + DeclareMatch(reduction, ast, Reduction); + if (reduction->key) { + return Texts("(", fmt_inline(reduction->key, comments), ": ", fmt_inline(reduction->iter, comments)); + } else { + return Texts("(", Text$from_str(binop_info[reduction->op].operator), ": ", + fmt_inline(reduction->iter, comments)); + } + } + /*inline*/ case None: + return Text("none"); + /*inline*/ case Bool: + return Match(ast, Bool)->b ? Text("yes") : Text("no"); + /*inline*/ case Int: { + OptionalText_t source = ast_source(ast); + return source.length > 0 ? source : Text$from_str(Match(ast, Int)->str); + } + /*inline*/ case Num: { + OptionalText_t source = ast_source(ast); + return source.length > 0 ? source : Text$from_str(String(Match(ast, Num)->n)); + } + /*inline*/ case Var: + return Text$from_str(Match(ast, Var)->name); + /*inline*/ case FunctionCall: { + DeclareMatch(call, ast, FunctionCall); + return Texts(fmt_inline(call->fn, comments), "(", must(format_inline_args(call->args, comments)), ")"); + } + /*inline*/ case MethodCall: { + DeclareMatch(call, ast, MethodCall); + Text_t self = fmt_inline(call->self, comments); + if (is_binary_operation(call->self) || call->self->tag == Negative || call->self->tag == Not) + self = parenthesize(self, EMPTY_TEXT); + return Texts(self, ".", Text$from_str(call->name), "(", must(format_inline_args(call->args, comments)), ")"); + } + /*inline*/ case BINOP_CASES: { + binary_operands_t operands = BINARY_OPERANDS(ast); + const char *op = binop_info[ast->tag].operator; + + Text_t lhs = fmt_inline(operands.lhs, comments); + Text_t rhs = fmt_inline(operands.rhs, comments); + + if (is_update_assignment(ast)) { + return Texts(lhs, " ", Text$from_str(op), " ", rhs); + } + + if (is_binary_operation(operands.lhs) && op_tightness[operands.lhs->tag] < op_tightness[ast->tag]) + lhs = parenthesize(lhs, EMPTY_TEXT); + if (is_binary_operation(operands.rhs) && op_tightness[operands.rhs->tag] < op_tightness[ast->tag]) + rhs = parenthesize(rhs, EMPTY_TEXT); + + Text_t space = op_tightness[ast->tag] >= op_tightness[Multiply] ? EMPTY_TEXT : Text(" "); + return Texts(lhs, space, Text$from_str(binop_info[ast->tag].operator), space, rhs); + } + /*inline*/ case Deserialize: { + DeclareMatch(deserialize, ast, Deserialize); + return Texts("deserialize(", fmt_inline(deserialize->value, comments), " -> ", format_type(deserialize->type), + ")"); + } + /*inline*/ case Use: { + DeclareMatch(use, ast, Use); + // struct { + // ast_t *var; + // const char *path; + // enum { USE_LOCAL, USE_MODULE, USE_SHARED_OBJECT, USE_HEADER, USE_C_CODE, USE_ASM } what; + // } Use; + return Texts("use ", use->path); + } + /*inline*/ case ExplicitlyTyped: + fail("Explicitly typed AST nodes are only meant to be used internally."); + default: { + fail("Formatting not implemented for: ", ast_to_sexp(ast)); + } + } +} + +PUREFUNC static int64_t trailing_line_len(Text_t text) { + TextIter_t state = NEW_TEXT_ITER_STATE(text); + int64_t len = 0; + for (int64_t i = text.length - 1; i >= 0; i--) { + int32_t g = Text$get_grapheme_fast(&state, i); + if (g == '\n' || g == '\r') break; + len += 1; + } + return len; +} + +Text_t format_code(ast_t *ast, Table_t comments, Text_t indent) { + OptionalText_t inlined = format_inline_code(ast, comments); + bool inlined_fits = (inlined.length >= 0 && indent.length + inlined.length <= MAX_WIDTH); + + switch (ast->tag) { + /*multiline*/ case Unknown: + fail("Invalid AST"); + /*multiline*/ case Block: { + Text_t code = EMPTY_TEXT; + bool gap_before_comment = false; + const char *comment_pos = ast->start; + for (ast_list_t *stmt = Match(ast, Block)->statements; stmt; stmt = stmt->next) { + for (OptionalText_t comment; + (comment = next_comment(comments, &comment_pos, stmt->ast->start)).length > 0;) { + if (gap_before_comment) { + add_line(&code, Text(""), indent); + gap_before_comment = false; + } + add_line(&code, Text$trim(comment, Text(" \t\r\n"), false, true), indent); + } + + if (stmt->ast->tag == Block) { + add_line(&code, + Texts("do\n", indent, single_indent, fmt(stmt->ast, comments, Texts(indent, single_indent))), + indent); + } else { + add_line(&code, fmt(stmt->ast, comments, indent), indent); + } + comment_pos = stmt->ast->end; + + if (stmt->next) { + int suggested_blanks = suggested_blank_lines(stmt->ast, stmt->next->ast); + for (int blanks = suggested_blanks; blanks > 0; blanks--) + add_line(&code, Text(""), indent); + gap_before_comment = (suggested_blanks == 0); + } else gap_before_comment = true; + } + + for (OptionalText_t comment; (comment = next_comment(comments, &comment_pos, ast->end)).length > 0;) { + if (gap_before_comment) { + add_line(&code, Text(""), indent); + gap_before_comment = false; + } + add_line(&code, Text$trim(comment, Text(" \t\r\n"), false, true), indent); + } + return code; + } + /*multiline*/ case If: { + DeclareMatch(if_, ast, If); + Text_t code = if_->condition->tag == Not + ? Texts("unless ", fmt(Match(if_->condition, Not)->value, comments, indent)) + : Texts("if ", fmt(if_->condition, comments, indent)); + + code = Texts(code, "\n", indent, single_indent, fmt(if_->body, comments, Texts(indent, single_indent))); + if (if_->else_body) { + if (if_->else_body->tag != If) { + code = Texts(code, "\n", indent, "else\n", indent, single_indent, + fmt(if_->else_body, comments, Texts(indent, single_indent))); + } else { + code = Texts(code, "\n", indent, "else ", fmt(if_->else_body, comments, indent)); + } + } + return code; + } + /*multiline*/ case When: { + DeclareMatch(when, ast, When); + Text_t code = Texts("when ", fmt(when->subject, comments, indent)); + for (when_clause_t *clause = when->clauses; clause; clause = clause->next) { + code = Texts(code, "\n", indent, "is ", fmt(clause->pattern, comments, indent)); + while (clause->next && clause->next->body == clause->body) { + clause = clause->next; + code = Texts(code, ", ", fmt(clause->pattern, comments, indent)); + } + code = Texts(code, format_namespace(clause->body, comments, indent)); + } + if (when->else_body) + code = Texts(code, "\n", indent, "else", format_namespace(when->else_body, comments, indent)); + return code; + } + /*multiline*/ case Repeat: { + return Texts("repeat\n", indent, single_indent, + fmt(Match(ast, Repeat)->body, comments, Texts(indent, single_indent))); + } + /*multiline*/ case While: { + DeclareMatch(loop, ast, While); + return Texts("while ", fmt(loop->condition, comments, indent), "\n", indent, single_indent, + fmt(loop->body, comments, Texts(indent, single_indent))); + } + /*multiline*/ case For: { + DeclareMatch(loop, ast, For); + Text_t code = Text("for "); + for (ast_list_t *var = loop->vars; var; var = var->next) { + code = Texts(code, fmt(var->ast, comments, indent)); + if (var->next) code = Texts(code, ", "); + } + code = Texts(code, " in ", fmt(loop->iter, comments, indent), format_namespace(loop->body, comments, indent)); + if (loop->empty) code = Texts(code, "\n", indent, "else", format_namespace(loop->empty, comments, indent)); + return code; + } + /*multiline*/ case Comprehension: { + if (inlined_fits) return inlined; + DeclareMatch(comp, ast, Comprehension); + Text_t code = Texts("(", fmt(comp->expr, comments, indent)); + if (code.length >= MAX_WIDTH) code = Texts(code, "\n", indent, "for "); + else code = Texts(code, " for "); + + for (ast_list_t *var = comp->vars; var; var = var->next) { + code = Texts(code, fmt(var->ast, comments, indent)); + if (var->next) code = Texts(code, ", "); + } + + code = Texts(code, " in ", fmt(comp->iter, comments, indent)); + + if (comp->filter) { + if (code.length >= MAX_WIDTH) code = Texts(code, "\n", indent, "if "); + else code = Texts(code, " if "); + code = Texts(code, fmt(comp->filter, comments, indent)); + } + return code; + } + /*multiline*/ case FunctionDef: { + DeclareMatch(func, ast, FunctionDef); + Text_t code = Texts("func ", fmt(func->name, comments, indent), "(", format_args(func->args, comments, indent)); + if (func->ret_type) code = Texts(code, func->args ? Text(" -> ") : Text("-> "), format_type(func->ret_type)); + if (func->cache) code = Texts(code, "; cache=", fmt(func->cache, comments, indent)); + if (func->is_inline) code = Texts(code, "; inline"); + code = Texts(code, Text$has(code, Text("\n")) ? Texts("\n", indent, ")") : Text(")"), "\n", indent, + single_indent, fmt(func->body, comments, Texts(indent, single_indent))); + return Texts(code); + } + /*multiline*/ case Lambda: { + if (inlined_fits) return inlined; + DeclareMatch(lambda, ast, Lambda); + Text_t code = Texts("func(", format_args(lambda->args, comments, indent)); + if (lambda->ret_type) + code = Texts(code, lambda->args ? Text(" -> ") : Text("-> "), format_type(lambda->ret_type)); + code = Texts(code, Text$has(code, Text("\n")) ? Texts("\n", indent, ")") : Text(")"), "\n", indent, + single_indent, fmt(lambda->body, comments, Texts(indent, single_indent))); + return Texts(code); + } + /*multiline*/ case ConvertDef: { + DeclareMatch(convert, ast, ConvertDef); + Text_t code = Texts("convert (", format_args(convert->args, comments, indent)); + if (convert->ret_type) + code = Texts(code, convert->args ? Text(" -> ") : Text("-> "), format_type(convert->ret_type)); + if (convert->cache) code = Texts(code, "; cache=", fmt(convert->cache, comments, indent)); + if (convert->is_inline) code = Texts(code, "; inline"); + code = Texts(code, Text$has(code, Text("\n")) ? Texts("\n", indent, ")") : Text(")"), "\n", indent, + single_indent, fmt(convert->body, comments, Texts(indent, single_indent))); + return Texts(code); + } + /*multiline*/ case StructDef: { + DeclareMatch(def, ast, StructDef); + Text_t args = format_args(def->fields, comments, indent); + Text_t code = Texts("struct ", Text$from_str(def->name), "(", args); + if (def->secret) code = Texts(code, "; secret"); + if (def->external) code = Texts(code, "; external"); + if (def->opaque) code = Texts(code, "; opaque"); + code = Texts(code, Text$has(code, Text("\n")) ? Texts("\n", indent, ")") : Text(")")); + return Texts(code, format_namespace(def->namespace, comments, indent)); + } + /*multiline*/ case EnumDef: { + DeclareMatch(def, ast, EnumDef); + Text_t code = Texts("enum ", Text$from_str(def->name), "(", format_tags(def->tags, comments, indent)); + return Texts(code, Text$has(code, Text("\n")) ? Texts("\n", indent, ")") : Text(")"), + format_namespace(def->namespace, comments, indent)); + } + /*multiline*/ case LangDef: { + DeclareMatch(def, ast, LangDef); + return Texts("lang ", Text$from_str(def->name), format_namespace(def->namespace, comments, indent)); + } + /*multiline*/ case Extend: { + DeclareMatch(extend, ast, Extend); + return Texts("lang ", Text$from_str(extend->name), format_namespace(extend->body, comments, indent)); + } + /*multiline*/ case Extern: { + DeclareMatch(ext, ast, Extern); + return Texts("extern ", Text$from_str(ext->name), " : ", format_type(ext->type)); + } + /*multiline*/ case Defer: + return Texts("defer ", format_namespace(Match(ast, Defer)->body, comments, indent)); + /*multiline*/ case List: + /*multiline*/ case Set: { + if (inlined_fits) return inlined; + ast_list_t *items = ast->tag == List ? Match(ast, List)->items : Match(ast, Set)->items; + Text_t code = ast->tag == List ? Text("[") : Text("|"); + const char *comment_pos = ast->start; + for (ast_list_t *item = items; item; item = item->next) { + for (OptionalText_t comment; + (comment = next_comment(comments, &comment_pos, item->ast->start)).length > 0;) { + add_line(&code, Text$trim(comment, Text(" \t\r\n"), false, true), Texts(indent, single_indent)); + } + Text_t item_text = fmt(item->ast, comments, Texts(indent, single_indent)); + if (Text$ends_with(code, Text(","), NULL)) { + if (!Text$has(item_text, Text("\n")) && trailing_line_len(code) + 1 + item_text.length + 1 <= MAX_WIDTH) + code = Texts(code, " ", item_text, ","); + else code = Texts(code, "\n", indent, single_indent, item_text, ","); + } else { + add_line(&code, Texts(item_text, ","), Texts(indent, single_indent)); + } + } + for (OptionalText_t comment; (comment = next_comment(comments, &comment_pos, ast->end)).length > 0;) { + add_line(&code, Text$trim(comment, Text(" \t\r\n"), false, true), Texts(indent, single_indent)); + } + return ast->tag == List ? Texts(code, "\n", indent, "]") : Texts(code, "\n", indent, "|"); + } + /*multiline*/ case Table: { + if (inlined_fits) return inlined; + DeclareMatch(table, ast, Table); + Text_t code = Texts("{"); + const char *comment_pos = ast->start; + for (ast_list_t *entry = table->entries; entry; entry = entry->next) { + for (OptionalText_t comment; + (comment = next_comment(comments, &comment_pos, entry->ast->start)).length > 0;) { + add_line(&code, Text$trim(comment, Text(" \t\r\n"), false, true), Texts(indent, single_indent)); + } + + Text_t entry_text = fmt(entry->ast, comments, Texts(indent, single_indent)); + if (Text$ends_with(code, Text(","), NULL)) { + if (!Text$has(entry_text, Text("\n")) + && trailing_line_len(code) + 1 + entry_text.length + 1 <= MAX_WIDTH) + code = Texts(code, " ", entry_text, ","); + else code = Texts(code, "\n", indent, single_indent, entry_text, ","); + } else { + add_line(&code, Texts(entry_text, ","), Texts(indent, single_indent)); + } + + add_line(&code, Texts(entry_text, ","), Texts(indent, single_indent)); + } + for (OptionalText_t comment; (comment = next_comment(comments, &comment_pos, ast->end)).length > 0;) { + add_line(&code, Text$trim(comment, Text(" \t\r\n"), false, true), Texts(indent, single_indent)); + } + + if (table->fallback) + code = Texts(code, ";\n", indent, single_indent, "fallback=", fmt(table->fallback, comments, indent)); + + if (table->default_value) + code = Texts(code, ";\n", indent, single_indent, "default=", fmt(table->default_value, comments, indent)); + + return Texts(code, "\n", indent, "}"); + } + /*multiline*/ case TableEntry: { + if (inlined_fits) return inlined; + DeclareMatch(entry, ast, TableEntry); + return Texts(fmt(entry->key, comments, indent), "=", fmt(entry->value, comments, indent)); + } + /*multiline*/ case Declare: { + if (inlined_fits) return inlined; + DeclareMatch(decl, ast, Declare); + Text_t code = fmt(decl->var, comments, indent); + if (decl->type) code = Texts(code, " : ", format_type(decl->type)); + if (decl->value) + code = Texts(code, decl->type ? Text(" = ") : Text(" := "), fmt(decl->value, comments, indent)); + return code; + } + /*multiline*/ case Assign: { + if (inlined_fits) return inlined; + DeclareMatch(assign, ast, Assign); + Text_t code = EMPTY_TEXT; + for (ast_list_t *target = assign->targets; target; target = target->next) { + code = Texts(code, fmt(target->ast, comments, indent)); + if (target->next) code = Texts(code, ", "); + } + code = Texts(code, " = "); + for (ast_list_t *value = assign->values; value; value = value->next) { + code = Texts(code, fmt(value->ast, comments, indent)); + if (value->next) code = Texts(code, ", "); + } + return code; + } + /*multiline*/ case Pass: + return Text("pass"); + /*multiline*/ case Return: { + if (inlined_fits) return inlined; + ast_t *value = Match(ast, Return)->value; + return value ? Texts("return ", fmt(value, comments, indent)) : Text("return"); + } + /*inline*/ case Not: { + if (inlined_fits) return inlined; + ast_t *val = Match(ast, Not)->value; + if (is_binary_operation(val)) return Texts("not ", termify(val, comments, indent)); + else return Texts("not ", fmt(val, comments, indent)); + } + /*inline*/ case Negative: { + if (inlined_fits) return inlined; + ast_t *val = Match(ast, Negative)->value; + if (is_binary_operation(val)) return Texts("-", termify(val, comments, indent)); + else return Texts("-", fmt(val, comments, indent)); + } + /*multiline*/ case HeapAllocate: { + if (inlined_fits) return inlined; + ast_t *val = Match(ast, HeapAllocate)->value; + return Texts("@", termify(val, comments, indent), ""); + } + /*multiline*/ case StackReference: { + if (inlined_fits) return inlined; + ast_t *val = Match(ast, StackReference)->value; + return Texts("&(", termify(val, comments, indent), ")"); + } + /*multiline*/ case Optional: { + if (inlined_fits) return inlined; + ast_t *val = Match(ast, Optional)->value; + return Texts(termify(val, comments, indent), "?"); + } + /*multiline*/ case NonOptional: { + if (inlined_fits) return inlined; + ast_t *val = Match(ast, NonOptional)->value; + return Texts(termify(val, comments, indent), "!"); + } + /*multiline*/ case FieldAccess: { + if (inlined_fits) return inlined; + DeclareMatch(access, ast, FieldAccess); + return Texts(termify(access->fielded, comments, indent), ".", Text$from_str(access->field)); + } + /*multiline*/ case Index: { + if (inlined_fits) return inlined; + DeclareMatch(index, ast, Index); + if (index->index) + return Texts(termify(index->indexed, comments, indent), "[", fmt(index->index, comments, indent), "]"); + else return Texts(termify(index->indexed, comments, indent), "[]"); + } + /*multiline*/ case TextJoin: { + if (inlined_fits) return inlined; + + text_opts_t opts = choose_text_options(Match(ast, TextJoin)->children); + Text_t ret = format_text(opts, Match(ast, TextJoin)->children, comments, indent); + const char *lang = Match(ast, TextJoin)->lang; + return lang ? Texts("$", Text$from_str(lang), ret) : ret; + } + /*multiline*/ case InlineCCode: { + DeclareMatch(c_code, ast, InlineCCode); + if (inlined_fits && c_code->type != NULL) return inlined; + Text_t code = c_code->type_ast ? Texts("C_code:", format_type(c_code->type_ast)) : Text("C_code"); + text_opts_t opts = {.quote = Text("`"), .unquote = Text("`"), .interp = Text("@")}; + return Texts(code, format_text(opts, Match(ast, InlineCCode)->chunks, comments, indent)); + } + /*multiline*/ case TextLiteral: { fail("Something went wrong, we shouldn't be formatting text literals directly"); } + /*multiline*/ case Path: { + assert(inlined.length > 0); + return inlined; + } + /*multiline*/ case Min: + /*multiline*/ case Max: { + if (inlined_fits) return inlined; + Text_t lhs = termify(ast->tag == Min ? Match(ast, Min)->lhs : Match(ast, Max)->lhs, comments, indent); + Text_t rhs = termify(ast->tag == Min ? Match(ast, Min)->rhs : Match(ast, Max)->rhs, comments, indent); + ast_t *key = ast->tag == Min ? Match(ast, Min)->key : Match(ast, Max)->key; + Text_t op = key ? fmt(key, comments, indent) : (ast->tag == Min ? Text("_min_") : Text("_max_")); + return Texts(lhs, " ", op, " ", rhs); + } + /*multiline*/ case Reduction: { + if (inlined_fits) return inlined; + DeclareMatch(reduction, ast, Reduction); + if (reduction->key) { + return Texts("(", fmt(reduction->key, comments, Texts(indent, single_indent)), ": ", + fmt(reduction->iter, comments, Texts(indent, single_indent))); + } else { + return Texts("(", binop_info[reduction->op].operator, ": ", + fmt(reduction->iter, comments, Texts(indent, single_indent))); + } + } + /*multiline*/ case Stop: + /*multiline*/ case Skip: + /*multiline*/ case None: + /*multiline*/ case Bool: + /*multiline*/ case Int: + /*multiline*/ case Num: + /*multiline*/ case Var: { + assert(inlined.length >= 0); + return inlined; + } + /*multiline*/ case FunctionCall: { + if (inlined_fits) return inlined; + DeclareMatch(call, ast, FunctionCall); + Text_t args = format_args(call->args, comments, indent); + return Texts(fmt(call->fn, comments, indent), "(", args, + Text$has(args, Text("\n")) ? Texts("\n", indent) : EMPTY_TEXT, ")"); + } + /*multiline*/ case MethodCall: { + if (inlined_fits) return inlined; + DeclareMatch(call, ast, MethodCall); + Text_t args = format_args(call->args, comments, indent); + return Texts(termify(call->self, comments, indent), ".", Text$from_str(call->name), "(", args, + Text$has(args, Text("\n")) ? Texts("\n", indent) : EMPTY_TEXT, ")"); + } + /*multiline*/ case DocTest: { + DeclareMatch(test, ast, DocTest); + Text_t expr = fmt(test->expr, comments, indent); + Text_t code = Texts(">> ", expr); + if (test->expected) { + Text_t expected = fmt(test->expected, comments, indent); + code = Texts(code, "\n", indent, "= ", expected); + } + return code; + } + /*multiline*/ case Assert: { + DeclareMatch(assert, ast, Assert); + Text_t expr = fmt(assert->expr, comments, indent); + if (!assert->message) return Texts("assert ", expr); + Text_t message = fmt(assert->message, comments, indent); + return Texts("assert ", expr, ", ", message); + } + /*multiline*/ case BINOP_CASES: { + if (inlined_fits) return inlined; + binary_operands_t operands = BINARY_OPERANDS(ast); + const char *op = binop_info[ast->tag].operator; + Text_t lhs = fmt(operands.lhs, comments, indent); + Text_t rhs = fmt(operands.rhs, comments, indent); + + if (is_update_assignment(ast)) { + return Texts(lhs, " ", Text$from_str(op), " ", rhs); + } + + if (is_binary_operation(operands.lhs) && op_tightness[operands.lhs->tag] < op_tightness[ast->tag]) + lhs = parenthesize(lhs, indent); + if (is_binary_operation(operands.rhs) && op_tightness[operands.rhs->tag] < op_tightness[ast->tag]) + rhs = parenthesize(rhs, indent); + + Text_t space = op_tightness[ast->tag] >= op_tightness[Multiply] ? EMPTY_TEXT : Text(" "); + return Texts(lhs, space, Text$from_str(binop_info[ast->tag].operator), space, rhs); + } + /*multiline*/ case Deserialize: { + if (inlined_fits) return inlined; + DeclareMatch(deserialize, ast, Deserialize); + return Texts("deserialize(", fmt(deserialize->value, comments, indent), " -> ", format_type(deserialize->type), + ")"); + } + /*multiline*/ case Use: { + assert(inlined.length > 0); + return inlined; + } + /*multiline*/ case ExplicitlyTyped: + fail("Explicitly typed AST nodes are only meant to be used internally."); + default: { + if (inlined_fits) return inlined; + fail("Formatting not implemented for: ", ast_to_sexp(ast)); + } + } +} + +Text_t format_file(const char *path) { + file_t *file = load_file(path); + if (!file) return EMPTY_TEXT; + + jmp_buf on_err; + if (setjmp(on_err) != 0) { + return Text$from_str(file->text); + } + parse_ctx_t ctx = { + .file = file, + .on_err = &on_err, + .comments = {}, + }; + + const char *pos = file->text; + if (match(&pos, "#!")) // shebang + some_not(&pos, "\r\n"); + + whitespace(&ctx, &pos); + ast_t *ast = parse_file_body(&ctx, pos); + if (!ast) return Text$from_str(file->text); + pos = ast->end; + whitespace(&ctx, &pos); + if (pos < file->text + file->len && *pos != '\0') { + return Text$from_str(file->text); + } + + const char *fmt_pos = file->text; + Text_t code = EMPTY_TEXT; + for (OptionalText_t comment; (comment = next_comment(ctx.comments, &fmt_pos, ast->start)).length > 0;) { + code = Texts(code, Text$trim(comment, Text(" \t\r\n"), false, true), "\n"); + } + code = Texts(code, fmt(ast, ctx.comments, EMPTY_TEXT)); + for (OptionalText_t comment; (comment = next_comment(ctx.comments, &fmt_pos, ast->start)).length > 0;) { + code = Texts(code, Text$trim(comment, Text(" \t\r\n"), false, true), "\n"); + } + return code; +} diff --git a/src/formatter/formatter.h b/src/formatter/formatter.h new file mode 100644 index 00000000..a8f9013a --- /dev/null +++ b/src/formatter/formatter.h @@ -0,0 +1,13 @@ +// This code defines functions for transforming ASTs back into Tomo source text + +#pragma once + +#include <stdbool.h> + +#include "../ast.h" +#include "../stdlib/datatypes.h" + +Text_t format_file(const char *path); +Text_t format_code(ast_t *ast, Table_t comments, Text_t indentation); +Text_t format_namespace(ast_t *namespace, Table_t comments, Text_t indent); +OptionalText_t format_inline_code(ast_t *ast, Table_t comments); diff --git a/src/formatter/types.c b/src/formatter/types.c new file mode 100644 index 00000000..e52faf70 --- /dev/null +++ b/src/formatter/types.c @@ -0,0 +1,45 @@ +// Logic for formatting types + +#include "../ast.h" +#include "../stdlib/datatypes.h" +#include "../stdlib/stdlib.h" +#include "../stdlib/text.h" +#include "args.h" +#include "formatter.h" + +Text_t format_type(type_ast_t *type) { + switch (type->tag) { + case VarTypeAST: return Text$from_str(Match(type, VarTypeAST)->name); + case PointerTypeAST: { + DeclareMatch(ptr, type, PointerTypeAST); + return Texts(ptr->is_stack ? Text("&") : Text("@"), format_type(ptr->pointed)); + } + case ListTypeAST: { + return Texts("[", format_type(Match(type, ListTypeAST)->item), "]"); + } + case SetTypeAST: { + return Texts("|", format_type(Match(type, SetTypeAST)->item), "|"); + } + case TableTypeAST: { + DeclareMatch(table, type, TableTypeAST); + Text_t code = Texts("{", format_type(table->key), "=", format_type(table->value)); + if (table->default_value) { + OptionalText_t val = format_inline_code(table->default_value, (Table_t){}); + assert(val.length >= 0); + code = Texts(code, "; default=", val); + } + return Texts(code, "}"); + } + case FunctionTypeAST: { + DeclareMatch(func, type, FunctionTypeAST); + Text_t code = Texts("func(", format_inline_args(func->args, (Table_t){})); + if (func->ret) code = Texts(code, func->args ? Text(" -> ") : Text("-> "), format_type(func->ret)); + return Texts(code, ")"); + } + case OptionalTypeAST: { + return Texts(format_type(Match(type, OptionalTypeAST)->type), "?"); + } + case UnknownTypeAST: + default: fail("Invalid Type AST"); + } +} diff --git a/src/formatter/types.h b/src/formatter/types.h new file mode 100644 index 00000000..2571f880 --- /dev/null +++ b/src/formatter/types.h @@ -0,0 +1,8 @@ +// Logic for formatting types + +#pragma once + +#include "../ast.h" +#include "../stdlib/datatypes.h" + +Text_t format_type(type_ast_t *type); diff --git a/src/formatter/utils.c b/src/formatter/utils.c new file mode 100644 index 00000000..bbe74d7f --- /dev/null +++ b/src/formatter/utils.c @@ -0,0 +1,154 @@ +// This file defines utility functions for autoformatting code + +#include <stdbool.h> +#include <stdint.h> + +#include "../ast.h" +#include "../parse/context.h" +#include "../stdlib/datatypes.h" +#include "../stdlib/optionals.h" +#include "../stdlib/tables.h" +#include "../stdlib/text.h" +#include "formatter.h" + +const Text_t single_indent = Text(" "); + +void add_line(Text_t *code, Text_t line, Text_t indent) { + if (code->length == 0) { + *code = line; + } else { + if (line.length > 0) *code = Texts(*code, "\n", indent, line); + else *code = Texts(*code, "\n"); + } +} + +OptionalText_t next_comment(Table_t comments, const char **pos, const char *end) { + for (const char *p = *pos; p < end; p++) { + const char **comment_end = Table$get(comments, &p, parse_comments_info); + if (comment_end) { + *pos = *comment_end; + return Text$from_strn(p, (size_t)(*comment_end - p)); + } + } + return NONE_TEXT; +} + +bool range_has_comment(const char *start, const char *end, Table_t comments) { + OptionalText_t comment = next_comment(comments, &start, end); + return (comment.length >= 0); +} + +CONSTFUNC int suggested_blank_lines(ast_t *first, ast_t *second) { + if (second == NULL) return 0; + + for (;;) { + if (first->tag == Declare && Match(first, Declare)->value) { + first = Match(first, Declare)->value; + } else if (first->tag == DocTest && Match(first, DocTest)->expr && Match(first, DocTest)->expected == NULL) { + first = Match(first, DocTest)->expr; + } else break; + } + + for (;;) { + if (second->tag == Declare && Match(second, Declare)->value) { + second = Match(second, Declare)->value; + } else if (second->tag == DocTest && Match(second, DocTest)->expr && Match(second, DocTest)->expected == NULL) { + second = Match(second, DocTest)->expr; + } else break; + } + + switch (first->tag) { + case If: + case When: + case Repeat: + case While: + case For: + case Block: + case Defer: + case ConvertDef: + case FunctionDef: + case Lambda: + case StructDef: + case EnumDef: + case LangDef: + case Extend: return 1; + case Use: { + if (second->tag != Use) return 1; + break; + } + case Declare: { + DeclareMatch(decl, first, Declare); + if (decl->value) return suggested_blank_lines(decl->value, second); + break; + } + case Assign: { + DeclareMatch(assign, first, Assign); + for (ast_list_t *val = assign->values; val; val = val->next) { + if (suggested_blank_lines(val->ast, second) > 0) return 1; + } + break; + } + default: break; + } + + switch (second->tag) { + case If: + case When: + case Repeat: + case While: + case For: + case Block: + case Defer: + case ConvertDef: + case FunctionDef: + case Lambda: + case StructDef: + case EnumDef: + case LangDef: + case Extend: return 1; + default: break; + } + return 0; +} + +Text_t indent_code(Text_t code) { + if (code.length <= 0) return code; + return Texts(single_indent, Text$replace(code, Text("\n"), Texts("\n", single_indent))); +} + +Text_t parenthesize(Text_t code, Text_t indent) { + if (Text$has(code, Text("\n"))) return Texts("(\n", indent, indent_code(code), "\n", indent, ")"); + else return Texts("(", code, ")"); +} + +CONSTFUNC ast_t *unwrap_block(ast_t *ast) { + if (ast == NULL) return NULL; + while (ast->tag == Block && Match(ast, Block)->statements && Match(ast, Block)->statements->next == NULL) { + ast = Match(ast, Block)->statements->ast; + } + if (ast->tag == Block && Match(ast, Block)->statements == NULL) return NULL; + return ast; +} + +OptionalText_t termify_inline(ast_t *ast, Table_t comments) { + if (range_has_comment(ast->start, ast->end, comments)) return NONE_TEXT; + switch (ast->tag) { + case BINOP_CASES: + case Not: + case Negative: + case HeapAllocate: + case StackReference: return parenthesize(format_inline_code(ast, comments), EMPTY_TEXT); + default: return format_inline_code(ast, comments); + } +} + +Text_t termify(ast_t *ast, Table_t comments, Text_t indent) { + switch (ast->tag) { + case BINOP_CASES: + case Not: + case Negative: + case HeapAllocate: + case StackReference: return parenthesize(format_code(ast, comments, indent), indent); + default: return format_inline_code(ast, comments); + } +} diff --git a/src/formatter/utils.h b/src/formatter/utils.h new file mode 100644 index 00000000..880da0a9 --- /dev/null +++ b/src/formatter/utils.h @@ -0,0 +1,30 @@ +// This file defines utility functions for autoformatting code + +#pragma once + +#include <stdbool.h> + +#include "../ast.h" +#include "../stdlib/datatypes.h" +#include "../stdlib/optionals.h" + +#define MAX_WIDTH 100 + +#define must(expr) \ + ({ \ + OptionalText_t _expr = expr; \ + if (_expr.length < 0) return NONE_TEXT; \ + (Text_t) _expr; \ + }) + +extern const Text_t single_indent; + +void add_line(Text_t *code, Text_t line, Text_t indent); +OptionalText_t next_comment(Table_t comments, const char **pos, const char *end); +bool range_has_comment(const char *start, const char *end, Table_t comments); +CONSTFUNC int suggested_blank_lines(ast_t *first, ast_t *second); +Text_t indent_code(Text_t code); +Text_t parenthesize(Text_t code, Text_t indent); +CONSTFUNC ast_t *unwrap_block(ast_t *ast); +OptionalText_t termify_inline(ast_t *ast, Table_t comments); +Text_t termify(ast_t *ast, Table_t comments, Text_t indent); diff --git a/src/parse/binops.c b/src/parse/binops.c index 7ccf1379..4676b249 100644 --- a/src/parse/binops.c +++ b/src/parse/binops.c @@ -9,33 +9,6 @@ #include "suffixes.h" #include "utils.h" -int op_tightness[] = { - [Power] = 9, - [Multiply] = 8, - [Divide] = 8, - [Mod] = 8, - [Mod1] = 8, - [Plus] = 7, - [Minus] = 7, - [Concat] = 6, - [LeftShift] = 5, - [RightShift] = 5, - [UnsignedLeftShift] = 5, - [UnsignedRightShift] = 5, - [Min] = 4, - [Max] = 4, - [Equals] = 3, - [NotEquals] = 3, - [LessThan] = 2, - [LessThanOrEquals] = 2, - [GreaterThan] = 2, - [GreaterThanOrEquals] = 2, - [Compare] = 2, - [And] = 1, - [Or] = 1, - [Xor] = 1, -}; - ast_e match_binary_operator(const char **pos) { switch (**pos) { case '+': { @@ -94,7 +67,7 @@ ast_t *parse_infix_expr(parse_ctx_t *ctx, const char *pos, int min_tightness) { for (ast_e op; (op = match_binary_operator(&pos)) != Unknown && op_tightness[op] >= min_tightness; spaces(&pos)) { ast_t *key = NULL; if (op == Min || op == Max) { - key = NewAST(ctx->file, pos, pos, Var, .name = "$"); + key = NewAST(ctx->file, pos, pos, Var, .name = (op == Min ? "_min_" : "_max_")); for (bool progress = true; progress;) { ast_t *new_term; progress = @@ -108,7 +81,7 @@ ast_t *parse_infix_expr(parse_ctx_t *ctx, const char *pos, int min_tightness) { else if (key) pos = key->end; } - whitespace(&pos); + whitespace(ctx, &pos); if (get_line_number(ctx->file, pos) != starting_line && get_indent(ctx, pos) < starting_indent) parser_err(ctx, pos, eol(pos), "I expected this line to be at least as indented than the line above it"); diff --git a/src/parse/containers.c b/src/parse/containers.c index 821cbdd4..73d30ecd 100644 --- a/src/parse/containers.c +++ b/src/parse/containers.c @@ -16,7 +16,7 @@ ast_t *parse_list(parse_ctx_t *ctx, const char *pos) { const char *start = pos; if (!match(&pos, "[")) return NULL; - whitespace(&pos); + whitespace(ctx, &pos); ast_list_t *items = NULL; for (;;) { @@ -29,9 +29,9 @@ ast_t *parse_list(parse_ctx_t *ctx, const char *pos) { suffixed = parse_comprehension_suffix(ctx, item); } items = new (ast_list_t, .ast = item, .next = items); - if (!match_separator(&pos)) break; + if (!match_separator(ctx, &pos)) break; } - whitespace(&pos); + whitespace(ctx, &pos); expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this list"); REVERSE_LIST(items); @@ -42,14 +42,14 @@ ast_t *parse_table(parse_ctx_t *ctx, const char *pos) { const char *start = pos; if (!match(&pos, "{")) return NULL; - whitespace(&pos); + whitespace(ctx, &pos); ast_list_t *entries = NULL; for (;;) { const char *entry_start = pos; ast_t *key = optional(ctx, &pos, parse_extended_expr); if (!key) break; - whitespace(&pos); + whitespace(ctx, &pos); if (!match(&pos, "=")) return NULL; ast_t *value = expect(ctx, pos - 1, &pos, parse_expr, "I couldn't parse the value for this table entry"); ast_t *entry = NewAST(ctx->file, entry_start, pos, TableEntry, .key = key, .value = value); @@ -60,37 +60,37 @@ ast_t *parse_table(parse_ctx_t *ctx, const char *pos) { suffixed = parse_comprehension_suffix(ctx, entry); } entries = new (ast_list_t, .ast = entry, .next = entries); - if (!match_separator(&pos)) break; + if (!match_separator(ctx, &pos)) break; } REVERSE_LIST(entries); - whitespace(&pos); + whitespace(ctx, &pos); ast_t *fallback = NULL, *default_value = NULL; if (match(&pos, ";")) { for (;;) { - whitespace(&pos); + whitespace(ctx, &pos); const char *attr_start = pos; if (match_word(&pos, "fallback")) { - whitespace(&pos); + whitespace(ctx, &pos); if (!match(&pos, "=")) parser_err(ctx, attr_start, pos, "I expected an '=' after 'fallback'"); if (fallback) parser_err(ctx, attr_start, pos, "This table already has a fallback"); fallback = expect(ctx, attr_start, &pos, parse_expr, "I expected a fallback table"); } else if (match_word(&pos, "default")) { - whitespace(&pos); + whitespace(ctx, &pos); if (!match(&pos, "=")) parser_err(ctx, attr_start, pos, "I expected an '=' after 'default'"); if (default_value) parser_err(ctx, attr_start, pos, "This table already has a default"); default_value = expect(ctx, attr_start, &pos, parse_expr, "I expected a default value"); } else { break; } - whitespace(&pos); + whitespace(ctx, &pos); if (!match(&pos, ",")) break; } } - whitespace(&pos); + whitespace(ctx, &pos); expect_closing(ctx, &pos, "}", "I wasn't able to parse the rest of this table"); return NewAST(ctx->file, start, pos, Table, .default_value = default_value, .entries = entries, @@ -102,13 +102,13 @@ ast_t *parse_set(parse_ctx_t *ctx, const char *pos) { if (match(&pos, "||")) return NewAST(ctx->file, start, pos, Set); if (!match(&pos, "|")) return NULL; - whitespace(&pos); + whitespace(ctx, &pos); ast_list_t *items = NULL; for (;;) { ast_t *item = optional(ctx, &pos, parse_extended_expr); if (!item) break; - whitespace(&pos); + whitespace(ctx, &pos); ast_t *suffixed = parse_comprehension_suffix(ctx, item); while (suffixed) { item = suffixed; @@ -116,12 +116,12 @@ ast_t *parse_set(parse_ctx_t *ctx, const char *pos) { suffixed = parse_comprehension_suffix(ctx, item); } items = new (ast_list_t, .ast = item, .next = items); - if (!match_separator(&pos)) break; + if (!match_separator(ctx, &pos)) break; } REVERSE_LIST(items); - whitespace(&pos); + whitespace(ctx, &pos); expect_closing(ctx, &pos, "|", "I wasn't able to parse the rest of this set"); return NewAST(ctx->file, start, pos, Set, .items = items); diff --git a/src/parse/context.c b/src/parse/context.c new file mode 100644 index 00000000..cd8d16bc --- /dev/null +++ b/src/parse/context.c @@ -0,0 +1,8 @@ +// A context parameter that gets passed around during parsing. + +#include "../stdlib/memory.h" +#include "../stdlib/pointers.h" +#include "../stdlib/tables.h" +#include "../stdlib/types.h" + +const TypeInfo_t *parse_comments_info = Table$info(Pointer$info("@", &Memory$info), Pointer$info("@", &Memory$info)); diff --git a/src/parse/context.h b/src/parse/context.h index 6008060e..f1e3be2f 100644 --- a/src/parse/context.h +++ b/src/parse/context.h @@ -4,10 +4,15 @@ #include <setjmp.h> #include <stdint.h> +#include "../stdlib/datatypes.h" #include "../stdlib/files.h" +#include "../stdlib/types.h" + +extern const TypeInfo_t *parse_comments_info; typedef struct { file_t *file; jmp_buf *on_err; int64_t next_lambda_id; + Table_t comments; // Map of <start pos> -> <end pos> } parse_ctx_t; diff --git a/src/parse/controlflow.c b/src/parse/controlflow.c index 6f6292af..1087e20e 100644 --- a/src/parse/controlflow.c +++ b/src/parse/controlflow.c @@ -36,7 +36,7 @@ ast_t *parse_block(parse_ctx_t *ctx, const char *pos) { if (indent(ctx, &pos)) { indented:; int64_t block_indent = get_indent(ctx, pos); - whitespace(&pos); + whitespace(ctx, &pos); while (*pos) { ast_t *stmt = optional(ctx, &pos, parse_statement); if (!stmt) { @@ -55,7 +55,7 @@ ast_t *parse_block(parse_ctx_t *ctx, const char *pos) { break; } statements = new (ast_list_t, .ast = stmt, .next = statements); - whitespace(&pos); + whitespace(ctx, &pos); // Guard against having two valid statements on the same line, separated by spaces (but no newlines): if (!memchr(stmt->end, '\n', (size_t)(pos - stmt->end))) { @@ -131,18 +131,8 @@ ast_t *parse_while(parse_ctx_t *ctx, const char *pos) { // while condition ["do"] [<indent>] body const char *start = pos; if (!match_word(&pos, "while")) return NULL; - - const char *tmp = pos; - // Shorthand form: `while when ...` - if (match_word(&tmp, "when")) { - ast_t *when = expect(ctx, start, &pos, parse_when, "I expected a 'when' block after this"); - if (!when->__data.When.else_body) when->__data.When.else_body = NewAST(ctx->file, pos, pos, Stop); - return NewAST(ctx->file, start, pos, While, .body = when); - } - - (void)match_word(&pos, "do"); // Optional 'do' - ast_t *condition = expect(ctx, start, &pos, parse_expr, "I don't see a viable condition for this 'while'"); + (void)match_word(&pos, "do"); // Optional 'do' ast_t *body = expect(ctx, start, &pos, parse_block, "I expected a body for this 'while'"); return NewAST(ctx->file, start, pos, While, .condition = condition, .body = body); } @@ -174,7 +164,7 @@ ast_t *parse_if(parse_ctx_t *ctx, const char *pos) { ast_t *body = expect(ctx, start, &pos, parse_block, "I expected a body for this 'if' statement"); const char *tmp = pos; - whitespace(&tmp); + whitespace(ctx, &tmp); ast_t *else_body = NULL; const char *else_start = pos; if (get_indent(ctx, tmp) == starting_indent && match_word(&tmp, "else")) { @@ -198,7 +188,7 @@ ast_t *parse_when(parse_ctx_t *ctx, const char *pos) { when_clause_t *clauses = NULL; const char *tmp = pos; - whitespace(&tmp); + whitespace(ctx, &tmp); while (get_indent(ctx, tmp) == starting_indent && match_word(&tmp, "is")) { pos = tmp; spaces(&pos); @@ -217,7 +207,7 @@ ast_t *parse_when(parse_ctx_t *ctx, const char *pos) { } clauses = new_clauses; tmp = pos; - whitespace(&tmp); + whitespace(ctx, &tmp); } REVERSE_LIST(clauses); @@ -255,7 +245,7 @@ ast_t *parse_for(parse_ctx_t *ctx, const char *pos) { ast_t *body = expect(ctx, start, &pos, parse_block, "I expected a body for this 'for'"); const char *else_start = pos; - whitespace(&else_start); + whitespace(ctx, &else_start); ast_t *empty = NULL; if (match_word(&else_start, "else") && get_indent(ctx, else_start) == starting_indent) { pos = else_start; diff --git a/src/parse/expressions.c b/src/parse/expressions.c index d643d4e7..df0a10a7 100644 --- a/src/parse/expressions.c +++ b/src/parse/expressions.c @@ -10,10 +10,10 @@ #include "context.h" #include "controlflow.h" #include "errors.h" +#include "expressions.h" #include "files.h" #include "functions.h" #include "numbers.h" -#include "expressions.h" #include "suffixes.h" #include "text.h" #include "types.h" @@ -23,7 +23,7 @@ ast_t *parse_parens(parse_ctx_t *ctx, const char *pos) { const char *start = pos; spaces(&pos); if (!match(&pos, "(")) return NULL; - whitespace(&pos); + whitespace(ctx, &pos); ast_t *expr = optional(ctx, &pos, parse_extended_expr); if (!expr) return NULL; @@ -34,7 +34,7 @@ ast_t *parse_parens(parse_ctx_t *ctx, const char *pos) { comprehension = parse_comprehension_suffix(ctx, expr); } - whitespace(&pos); + whitespace(ctx, &pos); expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this expression"); // Update the span to include the parens: @@ -45,11 +45,13 @@ ast_t *parse_reduction(parse_ctx_t *ctx, const char *pos) { const char *start = pos; if (!match(&pos, "(")) return NULL; - whitespace(&pos); + whitespace(ctx, &pos); ast_e op = match_binary_operator(&pos); if (op == Unknown) return NULL; - ast_t *key = NewAST(ctx->file, pos, pos, Var, .name = "$"); + const char *op_str = binop_info[op].operator; + assert(op_str); + ast_t *key = NewAST(ctx->file, pos, pos, Var, .name = op_str); for (bool progress = true; progress;) { ast_t *new_term; progress = @@ -61,7 +63,7 @@ ast_t *parse_reduction(parse_ctx_t *ctx, const char *pos) { if (key && key->tag == Var) key = NULL; else if (key) pos = key->end; - whitespace(&pos); + whitespace(ctx, &pos); if (!match(&pos, ":")) return NULL; ast_t *iter = optional(ctx, &pos, parse_extended_expr); @@ -73,7 +75,7 @@ ast_t *parse_reduction(parse_ctx_t *ctx, const char *pos) { suffixed = parse_comprehension_suffix(ctx, iter); } - whitespace(&pos); + whitespace(ctx, &pos); expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this reduction"); return NewAST(ctx->file, start, pos, Reduction, .iter = iter, .op = op, .key = key); @@ -164,14 +166,14 @@ ast_t *parse_deserialize(parse_ctx_t *ctx, const char *pos) { spaces(&pos); expect_str(ctx, start, &pos, "(", "I expected arguments for this `deserialize` call"); - whitespace(&pos); + whitespace(ctx, &pos); ast_t *value = expect(ctx, start, &pos, parse_extended_expr, "I expected an expression here"); - whitespace(&pos); + whitespace(ctx, &pos); expect_str(ctx, start, &pos, "->", "I expected a `-> Type` for this `deserialize` call so I know what it deserializes to"); - whitespace(&pos); + whitespace(ctx, &pos); type_ast_t *type = expect(ctx, start, &pos, parse_type, "I couldn't parse the type for this deserialization"); - whitespace(&pos); + whitespace(ctx, &pos); expect_closing(ctx, &pos, ")", "I expected a closing ')' for this `deserialize` call"); return NewAST(ctx->file, start, pos, Deserialize, .value = value, .type = type); } @@ -238,10 +240,10 @@ ast_t *parse_expr_str(const char *str) { }; const char *pos = file->text; - whitespace(&pos); + whitespace(&ctx, &pos); ast_t *ast = parse_extended_expr(&ctx, pos); pos = ast->end; - whitespace(&pos); + whitespace(&ctx, &pos); if (pos < file->text + file->len && *pos != '\0') parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the string"); return ast; diff --git a/src/parse/files.c b/src/parse/files.c index 8078d544..5ff41c68 100644 --- a/src/parse/files.c +++ b/src/parse/files.c @@ -11,9 +11,9 @@ #include "../stdlib/util.h" #include "context.h" #include "errors.h" +#include "expressions.h" #include "files.h" #include "functions.h" -#include "expressions.h" #include "statements.h" #include "text.h" #include "typedefs.h" @@ -33,11 +33,11 @@ static ast_t *parse_top_declaration(parse_ctx_t *ctx, const char *pos) { ast_t *parse_file_body(parse_ctx_t *ctx, const char *pos) { const char *start = pos; - whitespace(&pos); + whitespace(ctx, &pos); ast_list_t *statements = NULL; for (;;) { const char *next = pos; - whitespace(&next); + whitespace(ctx, &next); if (get_indent(ctx, next) != 0) break; ast_t *stmt; if ((stmt = optional(ctx, &pos, parse_struct_def)) || (stmt = optional(ctx, &pos, parse_func_def)) @@ -47,12 +47,12 @@ ast_t *parse_file_body(parse_ctx_t *ctx, const char *pos) { || (stmt = optional(ctx, &pos, parse_inline_c)) || (stmt = optional(ctx, &pos, parse_top_declaration))) { statements = new (ast_list_t, .ast = stmt, .next = statements); pos = stmt->end; - whitespace(&pos); // TODO: check for newline + whitespace(ctx, &pos); // TODO: check for newline } else { break; } } - whitespace(&pos); + whitespace(ctx, &pos); if (pos < ctx->file->text + ctx->file->len && *pos != '\0') { parser_err(ctx, pos, eol(pos), "I expect all top-level statements to be declarations of some kind"); } @@ -90,10 +90,10 @@ ast_t *parse_file(const char *path, jmp_buf *on_err) { if (match(&pos, "#!")) // shebang some_not(&pos, "\r\n"); - whitespace(&pos); + whitespace(&ctx, &pos); ast = parse_file_body(&ctx, pos); pos = ast->end; - whitespace(&pos); + whitespace(&ctx, &pos); if (pos < file->text + file->len && *pos != '\0') { parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the file"); } @@ -171,10 +171,10 @@ ast_t *parse_file_str(const char *str) { }; const char *pos = file->text; - whitespace(&pos); + whitespace(&ctx, &pos); ast_t *ast = parse_file_body(&ctx, pos); pos = ast->end; - whitespace(&pos); + whitespace(&ctx, &pos); if (pos < file->text + file->len && *pos != '\0') parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the string"); return ast; diff --git a/src/parse/functions.c b/src/parse/functions.c index 37505ac5..ceb0a8bc 100644 --- a/src/parse/functions.c +++ b/src/parse/functions.c @@ -26,37 +26,42 @@ arg_ast_t *parse_args(parse_ctx_t *ctx, const char **pos) { type_ast_t *type = NULL; typedef struct name_list_s { + const char *start, *end; const char *name, *alias; struct name_list_s *next; } name_list_t; name_list_t *names = NULL; for (;;) { - whitespace(pos); + whitespace(ctx, pos); const char *name = get_id(pos); if (!name) break; - whitespace(pos); + const char *name_start = *pos; + whitespace(ctx, pos); const char *alias = NULL; if (match(pos, "|")) { - whitespace(pos); + whitespace(ctx, pos); alias = get_id(pos); if (!alias) parser_err(ctx, *pos, *pos, "I expected an argument alias after `|`"); } if (match(pos, ":")) { type = expect(ctx, *pos - 1, pos, parse_type, "I expected a type here"); - names = new (name_list_t, .name = name, .alias = alias, .next = names); - whitespace(pos); + whitespace(ctx, pos); if (match(pos, "=")) default_val = expect(ctx, *pos - 1, pos, parse_term, "I expected a value after this '='"); + names = + new (name_list_t, .start = name_start, .end = *pos, .name = name, .alias = alias, .next = names); break; } else if (strncmp(*pos, "==", 2) != 0 && match(pos, "=")) { default_val = expect(ctx, *pos - 1, pos, parse_term, "I expected a value after this '='"); - names = new (name_list_t, .name = name, .alias = alias, .next = names); + names = + new (name_list_t, .start = name_start, .end = *pos, .name = name, .alias = alias, .next = names); break; } else if (name) { - names = new (name_list_t, .name = name, .alias = alias, .next = names); + names = + new (name_list_t, .start = name_start, .end = *pos, .name = name, .alias = alias, .next = names); spaces(pos); if (!match(pos, ",")) break; } else { @@ -71,10 +76,10 @@ arg_ast_t *parse_args(parse_ctx_t *ctx, const char **pos) { REVERSE_LIST(names); for (; names; names = names->next) - args = new (arg_ast_t, .name = names->name, .alias = names->alias, .type = type, .value = default_val, - .next = args); + args = new (arg_ast_t, .start = names->start, .end = names->end, .name = names->name, .alias = names->alias, + .type = type, .value = default_val, .next = args); - if (!match_separator(pos)) break; + if (!match_separator(ctx, pos)) break; } REVERSE_LIST(args); @@ -95,19 +100,19 @@ ast_t *parse_func_def(parse_ctx_t *ctx, const char *pos) { arg_ast_t *args = parse_args(ctx, &pos); spaces(&pos); type_ast_t *ret_type = match(&pos, "->") ? optional(ctx, &pos, parse_type) : NULL; - whitespace(&pos); + whitespace(ctx, &pos); bool is_inline = false; ast_t *cache_ast = NULL; - for (bool specials = match(&pos, ";"); specials; specials = match_separator(&pos)) { + for (bool specials = match(&pos, ";"); specials; specials = match_separator(ctx, &pos)) { const char *flag_start = pos; if (match_word(&pos, "inline")) { is_inline = true; } else if (match_word(&pos, "cached")) { if (!cache_ast) cache_ast = NewAST(ctx->file, pos, pos, Int, .str = "-1"); } else if (match_word(&pos, "cache_size")) { - whitespace(&pos); + whitespace(ctx, &pos); if (!match(&pos, "=")) parser_err(ctx, flag_start, pos, "I expected a value for 'cache_size'"); - whitespace(&pos); + whitespace(ctx, &pos); cache_ast = expect(ctx, start, &pos, parse_expr, "I expected a maximum size for the cache"); } } @@ -129,19 +134,19 @@ ast_t *parse_convert_def(parse_ctx_t *ctx, const char *pos) { arg_ast_t *args = parse_args(ctx, &pos); spaces(&pos); type_ast_t *ret_type = match(&pos, "->") ? optional(ctx, &pos, parse_type) : NULL; - whitespace(&pos); + whitespace(ctx, &pos); bool is_inline = false; ast_t *cache_ast = NULL; - for (bool specials = match(&pos, ";"); specials; specials = match_separator(&pos)) { + for (bool specials = match(&pos, ";"); specials; specials = match_separator(ctx, &pos)) { const char *flag_start = pos; if (match_word(&pos, "inline")) { is_inline = true; } else if (match_word(&pos, "cached")) { if (!cache_ast) cache_ast = NewAST(ctx->file, pos, pos, Int, .str = "-1"); } else if (match_word(&pos, "cache_size")) { - whitespace(&pos); + whitespace(ctx, &pos); if (!match(&pos, "=")) parser_err(ctx, flag_start, pos, "I expected a value for 'cache_size'"); - whitespace(&pos); + whitespace(ctx, &pos); cache_ast = expect(ctx, start, &pos, parse_expr, "I expected a maximum size for the cache"); } } diff --git a/src/parse/statements.c b/src/parse/statements.c index a30231f0..9606acdc 100644 --- a/src/parse/statements.c +++ b/src/parse/statements.c @@ -8,8 +8,8 @@ #include "../stdlib/util.h" #include "context.h" #include "errors.h" -#include "files.h" #include "expressions.h" +#include "files.h" #include "statements.h" #include "suffixes.h" #include "types.h" @@ -46,7 +46,7 @@ ast_t *parse_assignment(parse_ctx_t *ctx, const char *pos) { targets = new (ast_list_t, .ast = lhs, .next = targets); spaces(&pos); if (!match(&pos, ",")) break; - whitespace(&pos); + whitespace(ctx, &pos); } if (!targets) return NULL; @@ -62,7 +62,7 @@ ast_t *parse_assignment(parse_ctx_t *ctx, const char *pos) { values = new (ast_list_t, .ast = rhs, .next = values); spaces(&pos); if (!match(&pos, ",")) break; - whitespace(&pos); + whitespace(ctx, &pos); } REVERSE_LIST(targets); @@ -101,7 +101,7 @@ ast_t *parse_doctest(parse_ctx_t *ctx, const char *pos) { if (!match(&pos, ">>")) return NULL; spaces(&pos); ast_t *expr = expect(ctx, start, &pos, parse_statement, "I couldn't parse the expression for this doctest"); - whitespace(&pos); + whitespace(ctx, &pos); ast_t *expected = NULL; if (match(&pos, "=")) { spaces(&pos); @@ -120,7 +120,7 @@ ast_t *parse_assert(parse_ctx_t *ctx, const char *pos) { spaces(&pos); ast_t *message = NULL; if (match(&pos, ",")) { - whitespace(&pos); + whitespace(ctx, &pos); message = expect(ctx, start, &pos, parse_extended_expr, "I couldn't parse the error message for this assert"); } else { pos = expr->end; diff --git a/src/parse/suffixes.c b/src/parse/suffixes.c index 4aa09cb8..312f958f 100644 --- a/src/parse/suffixes.c +++ b/src/parse/suffixes.c @@ -14,10 +14,10 @@ ast_t *parse_field_suffix(parse_ctx_t *ctx, ast_t *lhs) { if (!lhs) return NULL; const char *pos = lhs->end; - whitespace(&pos); + whitespace(ctx, &pos); if (!match(&pos, ".")) return NULL; if (*pos == '.') return NULL; - whitespace(&pos); + whitespace(ctx, &pos); bool dollar = match(&pos, "$"); const char *field = get_id(&pos); if (!field) return NULL; @@ -44,9 +44,9 @@ ast_t *parse_index_suffix(parse_ctx_t *ctx, ast_t *lhs) { const char *start = lhs->start; const char *pos = lhs->end; if (!match(&pos, "[")) return NULL; - whitespace(&pos); + whitespace(ctx, &pos); ast_t *index = optional(ctx, &pos, parse_extended_expr); - whitespace(&pos); + whitespace(ctx, &pos); expect_closing(ctx, &pos, "]", "I wasn't able to parse the rest of this index"); return NewAST(ctx->file, start, pos, Index, .indexed = lhs, .index = index); } @@ -56,7 +56,7 @@ ast_t *parse_comprehension_suffix(parse_ctx_t *ctx, ast_t *expr) { if (!expr) return NULL; const char *start = expr->start; const char *pos = expr->end; - whitespace(&pos); + whitespace(ctx, &pos); if (!match_word(&pos, "for")) return NULL; ast_list_t *vars = NULL; @@ -72,7 +72,7 @@ ast_t *parse_comprehension_suffix(parse_ctx_t *ctx, ast_t *expr) { expect_str(ctx, start, &pos, "in", "I expected an 'in' for this 'for'"); ast_t *iter = expect(ctx, start, &pos, parse_expr, "I expected an iterable value for this 'for'"); const char *next_pos = pos; - whitespace(&next_pos); + whitespace(ctx, &next_pos); ast_t *filter = NULL; if (match_word(&next_pos, "if")) { pos = next_pos; @@ -114,13 +114,13 @@ ast_t *parse_method_call_suffix(parse_ctx_t *ctx, ast_t *self) { if (!fn) return NULL; spaces(&pos); if (!match(&pos, "(")) return NULL; - whitespace(&pos); + whitespace(ctx, &pos); arg_ast_t *args = NULL; for (;;) { const char *arg_start = pos; const char *name = get_id(&pos); - whitespace(&pos); + whitespace(ctx, &pos); if (!name || !match(&pos, "=")) { name = NULL; pos = arg_start; @@ -131,12 +131,12 @@ ast_t *parse_method_call_suffix(parse_ctx_t *ctx, ast_t *self) { if (name) parser_err(ctx, arg_start, pos, "I expected an argument here"); break; } - args = new (arg_ast_t, .name = name, .value = arg, .next = args); - if (!match_separator(&pos)) break; + args = new (arg_ast_t, .start = arg_start, .end = arg->end, .name = name, .value = arg, .next = args); + if (!match_separator(ctx, &pos)) break; } REVERSE_LIST(args); - whitespace(&pos); + whitespace(ctx, &pos); if (!match(&pos, ")")) parser_err(ctx, start, pos, "This parenthesis is unclosed"); @@ -151,13 +151,13 @@ ast_t *parse_fncall_suffix(parse_ctx_t *ctx, ast_t *fn) { if (!match(&pos, "(")) return NULL; - whitespace(&pos); + whitespace(ctx, &pos); arg_ast_t *args = NULL; for (;;) { const char *arg_start = pos; const char *name = get_id(&pos); - whitespace(&pos); + whitespace(ctx, &pos); if (!name || !match(&pos, "=")) { name = NULL; pos = arg_start; @@ -169,10 +169,10 @@ ast_t *parse_fncall_suffix(parse_ctx_t *ctx, ast_t *fn) { break; } args = new (arg_ast_t, .name = name, .value = arg, .next = args); - if (!match_separator(&pos)) break; + if (!match_separator(ctx, &pos)) break; } - whitespace(&pos); + whitespace(ctx, &pos); if (!match(&pos, ")")) parser_err(ctx, start, pos, "This parenthesis is unclosed"); diff --git a/src/parse/text.c b/src/parse/text.c index c554273f..6e7201bb 100644 --- a/src/parse/text.c +++ b/src/parse/text.c @@ -162,23 +162,19 @@ ast_t *parse_inline_c(parse_ctx_t *ctx, const char *pos) { spaces(&pos); type_ast_t *type = NULL; - ast_list_t *chunks; if (match(&pos, ":")) { type = expect(ctx, start, &pos, parse_type, "I couldn't parse the type for this C_code code"); spaces(&pos); - if (!match(&pos, "(")) parser_err(ctx, start, pos, "I expected a '(' here"); - chunks = new (ast_list_t, .ast = NewAST(ctx->file, pos, pos, TextLiteral, Text("({")), - .next = _parse_text_helper(ctx, &pos, '(', ')', '@', false)); - if (type) { - REVERSE_LIST(chunks); - chunks = new (ast_list_t, .ast = NewAST(ctx->file, pos, pos, TextLiteral, Text("; })")), .next = chunks); - REVERSE_LIST(chunks); - } - } else { - if (!match(&pos, "{")) parser_err(ctx, start, pos, "I expected a '{' here"); - chunks = _parse_text_helper(ctx, &pos, '{', '}', '@', false); } + static const char *quote_chars = "\"'`|/;([{<"; + if (!strchr(quote_chars, *pos)) + parser_err(ctx, pos, pos + 1, + "This is not a valid string quotation character. Valid characters are: \"'`|/;([{<"); + + char quote = *(pos++); + char unquote = closing[(int)quote] ? closing[(int)quote] : quote; + ast_list_t *chunks = _parse_text_helper(ctx, &pos, quote, unquote, '@', false); return NewAST(ctx->file, start, pos, InlineCCode, .chunks = chunks, .type_ast = type); } diff --git a/src/parse/typedefs.c b/src/parse/typedefs.c index 73fe9d7c..6e5e40d0 100644 --- a/src/parse/typedefs.c +++ b/src/parse/typedefs.c @@ -16,12 +16,12 @@ ast_t *parse_namespace(parse_ctx_t *ctx, const char *pos) { const char *start = pos; - whitespace(&pos); + whitespace(ctx, &pos); int64_t indent = get_indent(ctx, pos); ast_list_t *statements = NULL; for (;;) { const char *next = pos; - whitespace(&next); + whitespace(ctx, &next); if (get_indent(ctx, next) != indent) break; ast_t *stmt; if ((stmt = optional(ctx, &pos, parse_struct_def)) || (stmt = optional(ctx, &pos, parse_func_def)) @@ -31,7 +31,7 @@ ast_t *parse_namespace(parse_ctx_t *ctx, const char *pos) { || (stmt = optional(ctx, &pos, parse_inline_c)) || (stmt = optional(ctx, &pos, parse_declaration))) { statements = new (ast_list_t, .ast = stmt, .next = statements); pos = stmt->end; - whitespace(&pos); // TODO: check for newline + whitespace(ctx, &pos); // TODO: check for newline // if (!(space_types & WHITESPACE_NEWLINES)) { // pos = stmt->end; // break; @@ -62,10 +62,10 @@ ast_t *parse_struct_def(parse_ctx_t *ctx, const char *pos) { arg_ast_t *fields = parse_args(ctx, &pos); - whitespace(&pos); + whitespace(ctx, &pos); bool secret = false, external = false, opaque = false; if (match(&pos, ";")) { // Extra flags - whitespace(&pos); + whitespace(ctx, &pos); for (;;) { if (match_word(&pos, "secret")) { secret = true; @@ -79,7 +79,7 @@ ast_t *parse_struct_def(parse_ctx_t *ctx, const char *pos) { break; } - if (!match_separator(&pos)) break; + if (!match_separator(ctx, &pos)) break; } } @@ -87,7 +87,7 @@ ast_t *parse_struct_def(parse_ctx_t *ctx, const char *pos) { ast_t *namespace = NULL; const char *ns_pos = pos; - whitespace(&ns_pos); + whitespace(ctx, &ns_pos); int64_t ns_indent = get_indent(ctx, ns_pos); if (ns_indent > starting_indent) { pos = ns_pos; @@ -110,9 +110,10 @@ ast_t *parse_enum_def(parse_ctx_t *ctx, const char *pos) { if (!match(&pos, "(")) return NULL; tag_ast_t *tags = NULL; - whitespace(&pos); + whitespace(ctx, &pos); for (;;) { spaces(&pos); + const char *tag_start = pos; const char *tag_name = get_id(&pos); if (!tag_name) break; @@ -120,25 +121,26 @@ ast_t *parse_enum_def(parse_ctx_t *ctx, const char *pos) { arg_ast_t *fields; bool secret = false; if (match(&pos, "(")) { - whitespace(&pos); + whitespace(ctx, &pos); fields = parse_args(ctx, &pos); - whitespace(&pos); + whitespace(ctx, &pos); if (match(&pos, ";")) { // Extra flags - whitespace(&pos); + whitespace(ctx, &pos); secret = match_word(&pos, "secret"); - whitespace(&pos); + whitespace(ctx, &pos); } expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this tagged union member"); } else { fields = NULL; } - tags = new (tag_ast_t, .name = tag_name, .fields = fields, .secret = secret, .next = tags); + tags = new (tag_ast_t, .start = tag_start, .end = pos, .name = tag_name, .fields = fields, .secret = secret, + .next = tags); - if (!match_separator(&pos)) break; + if (!match_separator(ctx, &pos)) break; } - whitespace(&pos); + whitespace(ctx, &pos); expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this enum definition"); REVERSE_LIST(tags); @@ -147,7 +149,7 @@ ast_t *parse_enum_def(parse_ctx_t *ctx, const char *pos) { ast_t *namespace = NULL; const char *ns_pos = pos; - whitespace(&ns_pos); + whitespace(ctx, &ns_pos); int64_t ns_indent = get_indent(ctx, ns_pos); if (ns_indent > starting_indent) { pos = ns_pos; @@ -170,7 +172,7 @@ ast_t *parse_lang_def(parse_ctx_t *ctx, const char *pos) { ast_t *namespace = NULL; const char *ns_pos = pos; - whitespace(&ns_pos); + whitespace(ctx, &ns_pos); int64_t ns_indent = get_indent(ctx, ns_pos); if (ns_indent > starting_indent) { pos = ns_pos; @@ -192,7 +194,7 @@ ast_t *parse_extend(parse_ctx_t *ctx, const char *pos) { ast_t *body = NULL; const char *ns_pos = pos; - whitespace(&ns_pos); + whitespace(ctx, &ns_pos); int64_t ns_indent = get_indent(ctx, ns_pos); if (ns_indent > starting_indent) { pos = ns_pos; diff --git a/src/parse/types.c b/src/parse/types.c index 54bc0c03..ffb7d869 100644 --- a/src/parse/types.c +++ b/src/parse/types.c @@ -9,19 +9,19 @@ #include "../stdlib/print.h" #include "context.h" #include "errors.h" -#include "functions.h" #include "expressions.h" +#include "functions.h" #include "types.h" #include "utils.h" type_ast_t *parse_table_type(parse_ctx_t *ctx, const char *pos) { const char *start = pos; if (!match(&pos, "{")) return NULL; - whitespace(&pos); + whitespace(ctx, &pos); type_ast_t *key_type = parse_type(ctx, pos); if (!key_type) return NULL; pos = key_type->end; - whitespace(&pos); + whitespace(ctx, &pos); type_ast_t *value_type = NULL; if (match(&pos, "=")) { value_type = expect(ctx, start, &pos, parse_type, "I couldn't parse the rest of this table type"); @@ -35,7 +35,7 @@ type_ast_t *parse_table_type(parse_ctx_t *ctx, const char *pos) { default_value = expect(ctx, start, &pos, parse_extended_expr, "I couldn't parse the default value for this table"); } - whitespace(&pos); + whitespace(ctx, &pos); expect_closing(ctx, &pos, "}", "I wasn't able to parse the rest of this table type"); return NewTypeAST(ctx->file, start, pos, TableTypeAST, .key = key_type, .value = value_type, .default_value = default_value); @@ -44,11 +44,11 @@ type_ast_t *parse_table_type(parse_ctx_t *ctx, const char *pos) { type_ast_t *parse_set_type(parse_ctx_t *ctx, const char *pos) { const char *start = pos; if (!match(&pos, "|")) return NULL; - whitespace(&pos); + whitespace(ctx, &pos); type_ast_t *item_type = parse_type(ctx, pos); if (!item_type) return NULL; pos = item_type->end; - whitespace(&pos); + whitespace(ctx, &pos); expect_closing(ctx, &pos, "|", "I wasn't able to parse the rest of this set type"); return NewTypeAST(ctx->file, start, pos, SetTypeAST, .item = item_type); } @@ -113,10 +113,10 @@ type_ast_t *parse_non_optional_type(parse_ctx_t *ctx, const char *pos) { || (type = parse_table_type(ctx, pos)) || (type = parse_set_type(ctx, pos)) || (type = parse_type_name(ctx, pos)) || (type = parse_func_type(ctx, pos))); if (!success && match(&pos, "(")) { - whitespace(&pos); + whitespace(ctx, &pos); type = optional(ctx, &pos, parse_type); if (!type) return NULL; - whitespace(&pos); + whitespace(ctx, &pos); expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this type"); type->start = start; type->end = pos; @@ -144,11 +144,11 @@ type_ast_t *parse_type_str(const char *str) { }; const char *pos = file->text; - whitespace(&pos); + whitespace(&ctx, &pos); type_ast_t *ast = parse_type(&ctx, pos); if (!ast) return ast; pos = ast->end; - whitespace(&pos); + whitespace(&ctx, &pos); if (strlen(pos) > 0) { parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the type"); } diff --git a/src/parse/utils.c b/src/parse/utils.c index 7e827ac6..0644bfa0 100644 --- a/src/parse/utils.c +++ b/src/parse/utils.c @@ -6,6 +6,7 @@ #include <unictype.h> #include <uniname.h> +#include "../stdlib/tables.h" #include "../stdlib/util.h" #include "errors.h" #include "utils.h" @@ -43,8 +44,8 @@ size_t some_not(const char **pos, const char *forbid) { size_t spaces(const char **pos) { return some_of(pos, " \t"); } -void whitespace(const char **pos) { - while (some_of(pos, " \t\r\n") || comment(pos)) +void whitespace(parse_ctx_t *ctx, const char **pos) { + while (some_of(pos, " \t\r\n") || comment(ctx, pos)) continue; } @@ -95,9 +96,12 @@ const char *get_id(const char **inout) { PUREFUNC const char *eol(const char *str) { return str + strcspn(str, "\r\n"); } -bool comment(const char **pos) { +bool comment(parse_ctx_t *ctx, const char **pos) { if ((*pos)[0] == '#') { + const char *start = *pos; *pos += strcspn(*pos, "\r\n"); + const char *end = *pos; + Table$set(&ctx->comments, &start, &end, parse_comments_info); return true; } else { return false; @@ -129,7 +133,7 @@ PUREFUNC int64_t get_indent(parse_ctx_t *ctx, const char *pos) { bool indent(parse_ctx_t *ctx, const char **out) { const char *pos = *out; int64_t starting_indent = get_indent(ctx, pos); - whitespace(&pos); + whitespace(ctx, &pos); const char *next_line = get_line(ctx->file, get_line_number(ctx->file, pos)); if (next_line <= *out) return false; @@ -239,12 +243,12 @@ const char *unescape(parse_ctx_t *ctx, const char **out) { #pragma GCC diagnostic pop #endif -bool match_separator(const char **pos) { // Either comma or newline +bool match_separator(parse_ctx_t *ctx, const char **pos) { // Either comma or newline const char *p = *pos; int separators = 0; for (;;) { if (some_of(&p, "\r\n,")) ++separators; - else if (!comment(&p) && !some_of(&p, " \t")) break; + else if (!comment(ctx, &p) && !some_of(&p, " \t")) break; } if (separators > 0) { *pos = p; diff --git a/src/parse/utils.h b/src/parse/utils.h index ba54120a..b8fb0756 100644 --- a/src/parse/utils.h +++ b/src/parse/utils.h @@ -12,16 +12,16 @@ CONSTFUNC bool is_keyword(const char *word); size_t some_of(const char **pos, const char *allow); size_t some_not(const char **pos, const char *forbid); size_t spaces(const char **pos); -void whitespace(const char **pos); +void whitespace(parse_ctx_t *ctx, const char **pos); size_t match(const char **pos, const char *target); size_t match_word(const char **pos, const char *word); const char *get_word(const char **pos); const char *get_id(const char **pos); -bool comment(const char **pos); +bool comment(parse_ctx_t *ctx, const char **pos); bool indent(parse_ctx_t *ctx, const char **pos); const char *eol(const char *str); PUREFUNC int64_t get_indent(parse_ctx_t *ctx, const char *pos); const char *unescape(parse_ctx_t *ctx, const char **out); bool is_xid_continue_next(const char *pos); bool newline_with_indentation(const char **out, int64_t target); -bool match_separator(const char **pos); +bool match_separator(parse_ctx_t *ctx, const char **pos); diff --git a/src/stdlib/integers.c b/src/stdlib/integers.c index 5dc9ac55..863bb42d 100644 --- a/src/stdlib/integers.c +++ b/src/stdlib/integers.c @@ -430,7 +430,7 @@ OptionalInt_t Int$parse(Text_t text, Text_t *remainder) { else if (*end != '\0') return NONE_INT; result = mpz_init_set_str(i, str + 2, 2); } else { - const char *end = str + 2 + strspn(str + 2, "0123456789"); + const char *end = str + strspn(str, "0123456789"); if (remainder) *remainder = Text$from_str(end); else if (*end != '\0') return NONE_INT; result = mpz_init_set_str(i, str, 10); diff --git a/src/stdlib/text.c b/src/stdlib/text.c index 1dd2003e..bc5d4af3 100644 --- a/src/stdlib/text.c +++ b/src/stdlib/text.c @@ -1393,17 +1393,8 @@ Text_t Text$title(Text_t text, Text_t language) { } public -Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) { - if (quotation_mark.length != 1) fail("Invalid quote text: ", quotation_mark, " (must have length == 1)"); - +Text_t Text$escaped(Text_t text, bool colorize, Text_t extra_escapes) { Text_t ret = colorize ? Text("\x1b[35m") : EMPTY_TEXT; - if (!Text$equal_values(quotation_mark, Text("\"")) && !Text$equal_values(quotation_mark, Text("'")) - && !Text$equal_values(quotation_mark, Text("`"))) - ret = concat2_assuming_safe(ret, Text("$")); - - ret = concat2_assuming_safe(ret, quotation_mark); - int32_t quote_char = Text$get_grapheme(quotation_mark, 0); - #define flush_unquoted() \ ({ \ if (unquoted_span > 0) { \ @@ -1457,15 +1448,18 @@ Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) { break; } default: { - if (g == quote_char) { - flush_unquoted(); - if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[34;1m")); - ret = concat2_assuming_safe(ret, Text("\\")); - ret = concat2_assuming_safe(ret, quotation_mark); - if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[0;35m")); - } else { - unquoted_span += 1; + TextIter_t esc_state = NEW_TEXT_ITER_STATE(extra_escapes); + for (int64_t j = 0; j < extra_escapes.length; j++) { + int32_t esc = Text$get_grapheme_fast(&esc_state, j); + if (g == esc) { + flush_unquoted(); + if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[34;1m")); + ret = concat2_assuming_safe(ret, Text("\\")); + if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[0;35m")); + break; + } } + unquoted_span += 1; break; } } @@ -1473,10 +1467,19 @@ Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) { flush_unquoted(); #undef add_escaped #undef flush_unquoted - - ret = concat2_assuming_safe(ret, quotation_mark); if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[m")); + return ret; +} + +public +Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) { + if (quotation_mark.length != 1) fail("Invalid quote text: ", quotation_mark, " (must have length == 1)"); + Text_t ret = Text$escaped(text, colorize, quotation_mark); + if (!(Text$equal_values(quotation_mark, Text("\"")) || Text$equal_values(quotation_mark, Text("'")) + || Text$equal_values(quotation_mark, Text("`")))) + ret = Texts("$", quotation_mark, ret, quotation_mark); + else ret = Texts(quotation_mark, ret, quotation_mark); return ret; } diff --git a/src/stdlib/text.h b/src/stdlib/text.h index 97b8f90f..0b341eb2 100644 --- a/src/stdlib/text.h +++ b/src/stdlib/text.h @@ -76,6 +76,7 @@ Text_t Text$upper(Text_t text, Text_t language); Text_t Text$lower(Text_t text, Text_t language); Text_t Text$title(Text_t text, Text_t language); Text_t Text$as_text(const void *text, bool colorize, const TypeInfo_t *info); +Text_t Text$escaped(Text_t text, bool colorize, Text_t extra_escapes); Text_t Text$quoted(Text_t str, bool colorize, Text_t quotation_mark); PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix, Text_t *remainder); PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix, Text_t *remainder); @@ -19,6 +19,7 @@ #include "compile/files.h" #include "compile/headers.h" #include "config.h" +#include "formatter/formatter.h" #include "modules.h" #include "naming.h" #include "parse/files.h" @@ -75,9 +76,9 @@ static const char *paths_str(List_t paths) { static OptionalList_t files = NONE_LIST, args = NONE_LIST, uninstall = NONE_LIST, libraries = NONE_LIST; static OptionalBool_t verbose = false, quiet = false, show_version = false, show_parse_tree = false, - show_prefix = false, stop_at_transpile = false, stop_at_obj_compilation = false, - compile_exe = false, should_install = false, clean_build = false, source_mapping = true, - show_changelog = false; + do_format_code = false, format_inplace = false, show_prefix = false, stop_at_transpile = false, + stop_at_obj_compilation = false, compile_exe = false, should_install = false, clean_build = false, + source_mapping = true, show_changelog = false; static OptionalText_t show_codegen = NONE_TEXT, cflags = Text("-Werror -fdollars-in-identifiers -std=c2x -Wno-trigraphs " @@ -183,30 +184,59 @@ int main(int argc, char *argv[]) { "\x1b[1mUninstall libraries:\x1b[m tomo -u lib...\n" "\x1b[1mOther flags:\x1b[m\n" " --verbose|-v: verbose output\n" + " --prefix: print the Tomo prefix directory\n" " --quiet|-q: quiet output\n" " --parse|-p: show parse tree\n" + " --transpile|-t: transpile C code without compiling\n" + " --show-codegen|-c <pager>: show generated code\n" + " --compile-obj|-c: compile C code for object file\n" + " --compile-exe|-e: compile to standalone executable without running\n" + " --format: print formatted code\n" + " --format-inplace: format the code in a file (in place)\n" + " --library|-L: build a folder as a library\n" " --install|-I: install the executable or library\n" + " --uninstall|-u: uninstall an executable or library\n" " --optimization|-O <level>: set optimization level\n" + " --force-rebuild|-f: force rebuilding\n" + " --source-mapping|-m <yes|no>: toggle source mapping in generated code\n" + " --changelog: show the Tomo changelog\n" " --run|-r: run a program from ", - TOMO_PATH, "/lib/tomo_" TOMO_VERSION "\n"); + TOMO_PATH, "/share/tomo_" TOMO_VERSION "/installed\n"); Text_t help = Texts(Text("\x1b[1mtomo\x1b[m: a compiler for the Tomo programming language"), Text("\n\n"), usage); - tomo_parse_args( - argc, argv, usage, help, TOMO_VERSION, {"files", true, List$info(&Path$info), &files}, - {"args", true, List$info(&Text$info), &args}, {"verbose", false, &Bool$info, &verbose}, - {"v", false, &Bool$info, &verbose}, {"version", false, &Bool$info, &show_version}, - {"parse", false, &Bool$info, &show_parse_tree}, {"p", false, &Bool$info, &show_parse_tree}, - {"prefix", false, &Bool$info, &show_prefix}, {"quiet", false, &Bool$info, &quiet}, - {"q", false, &Bool$info, &quiet}, {"transpile", false, &Bool$info, &stop_at_transpile}, - {"t", false, &Bool$info, &stop_at_transpile}, {"compile-obj", false, &Bool$info, &stop_at_obj_compilation}, - {"c", false, &Bool$info, &stop_at_obj_compilation}, {"compile-exe", false, &Bool$info, &compile_exe}, - {"e", false, &Bool$info, &compile_exe}, {"uninstall", false, List$info(&Text$info), &uninstall}, - {"u", false, List$info(&Text$info), &uninstall}, {"library", false, List$info(&Path$info), &libraries}, - {"L", false, List$info(&Path$info), &libraries}, {"show-codegen", false, &Text$info, &show_codegen}, - {"C", false, &Text$info, &show_codegen}, {"install", false, &Bool$info, &should_install}, - {"I", false, &Bool$info, &should_install}, {"optimization", false, &Text$info, &optimization}, - {"O", false, &Text$info, &optimization}, {"force-rebuild", false, &Bool$info, &clean_build}, - {"f", false, &Bool$info, &clean_build}, {"source-mapping", false, &Bool$info, &source_mapping}, - {"m", false, &Bool$info, &source_mapping}, {"changelog", false, &Bool$info, &show_changelog}, ); + tomo_parse_args(argc, argv, usage, help, TOMO_VERSION, // + {"files", true, List$info(&Path$info), &files}, // + {"args", true, List$info(&Text$info), &args}, // + {"verbose", false, &Bool$info, &verbose}, // + {"v", false, &Bool$info, &verbose}, // + {"version", false, &Bool$info, &show_version}, // + {"parse", false, &Bool$info, &show_parse_tree}, // + {"p", false, &Bool$info, &show_parse_tree}, // + {"format", false, &Bool$info, &do_format_code}, // + {"format-inplace", false, &Bool$info, &format_inplace}, // + {"prefix", false, &Bool$info, &show_prefix}, // + {"quiet", false, &Bool$info, &quiet}, // + {"q", false, &Bool$info, &quiet}, // + {"transpile", false, &Bool$info, &stop_at_transpile}, // + {"t", false, &Bool$info, &stop_at_transpile}, // + {"compile-obj", false, &Bool$info, &stop_at_obj_compilation}, // + {"c", false, &Bool$info, &stop_at_obj_compilation}, // + {"compile-exe", false, &Bool$info, &compile_exe}, // + {"e", false, &Bool$info, &compile_exe}, // + {"uninstall", false, List$info(&Text$info), &uninstall}, // + {"u", false, List$info(&Text$info), &uninstall}, // + {"library", false, List$info(&Path$info), &libraries}, // + {"L", false, List$info(&Path$info), &libraries}, // + {"show-codegen", false, &Text$info, &show_codegen}, // + {"C", false, &Text$info, &show_codegen}, // + {"install", false, &Bool$info, &should_install}, // + {"I", false, &Bool$info, &should_install}, // + {"optimization", false, &Text$info, &optimization}, // + {"O", false, &Text$info, &optimization}, // + {"force-rebuild", false, &Bool$info, &clean_build}, // + {"f", false, &Bool$info, &clean_build}, // + {"source-mapping", false, &Bool$info, &source_mapping}, + {"m", false, &Bool$info, &source_mapping}, // + {"changelog", false, &Bool$info, &show_changelog}, ); if (show_prefix) { print(TOMO_PATH); @@ -214,7 +244,7 @@ int main(int argc, char *argv[]) { } if (show_changelog) { - print_inline(string_slice((const char *)CHANGES_md, CHANGES_md_len)); + print_inline(string_slice((const char *)CHANGES_md, (size_t)CHANGES_md_len)); return 0; } @@ -313,6 +343,17 @@ int main(int argc, char *argv[]) { continue; } + if (do_format_code || format_inplace) { + Text_t formatted = format_file(Path$as_c_string(path)); + if (format_inplace) { + print("Formatted ", path); + Path$write(path, formatted, 0644); + } else { + print(formatted); + } + continue; + } + Path_t exe_path = compile_exe ? Path$with_extension(path, Text(""), true) : build_file(Path$with_extension(path, Text(""), true), ""); diff --git a/src/typecheck.c b/src/typecheck.c index 695f7cbc..d7e87e65 100644 --- a/src/typecheck.c +++ b/src/typecheck.c @@ -336,12 +336,12 @@ void bind_statement(env_t *env, ast_t *statement) { case FunctionDef: { DeclareMatch(def, statement, FunctionDef); const char *name = Match(def->name, Var)->name; - type_t *type = get_function_def_type(env, statement); + type_t *type = get_function_type(env, statement); set_binding(env, name, type, namespace_name(env, env->namespace, Text$from_str(name))); break; } case ConvertDef: { - type_t *type = get_function_def_type(env, statement); + type_t *type = get_function_type(env, statement); type_t *ret_t = Match(type, FunctionType)->ret; const char *name = get_type_name(ret_t); if (!name) @@ -577,10 +577,24 @@ void bind_statement(env_t *env, ast_t *statement) { } } -type_t *get_function_def_type(env_t *env, ast_t *ast) { - arg_ast_t *arg_asts = ast->tag == FunctionDef ? Match(ast, FunctionDef)->args : Match(ast, ConvertDef)->args; - type_ast_t *ret_type = - ast->tag == FunctionDef ? Match(ast, FunctionDef)->ret_type : Match(ast, ConvertDef)->ret_type; +type_t *get_function_type(env_t *env, ast_t *ast) { + arg_ast_t *arg_asts; + type_ast_t *ret_ast; + switch (ast->tag) { + case FunctionDef: + arg_asts = Match(ast, FunctionDef)->args; + ret_ast = Match(ast, FunctionDef)->ret_type; + break; + case ConvertDef: + arg_asts = Match(ast, ConvertDef)->args; + ret_ast = Match(ast, ConvertDef)->ret_type; + break; + case Lambda: + arg_asts = Match(ast, Lambda)->args; + ret_ast = Match(ast, Lambda)->ret_type; + break; + default: code_err(ast, "This was expected to be a function definition of some sort"); + } arg_t *args = NULL; env_t *scope = fresh_scope(env); for (arg_ast_t *arg = arg_asts; arg; arg = arg->next) { @@ -590,10 +604,40 @@ type_t *get_function_def_type(env_t *env, ast_t *ast) { } REVERSE_LIST(args); - type_t *ret = ret_type ? parse_type_ast(scope, ret_type) : Type(VoidType); - if (has_stack_memory(ret)) - code_err(ast, "Functions can't return stack references because the reference may outlive its stack frame."); - return Type(FunctionType, .args = args, .ret = ret); + if (ast->tag == Lambda) { + ast_t *body = Match(ast, Lambda)->body; + + scope->fn = NULL; + type_t *ret_t = get_type(scope, body); + if (ret_t->tag == ReturnType) ret_t = Match(ret_t, ReturnType)->ret; + if (ret_t->tag == AbortType) ret_t = Type(VoidType); + + if (ret_t->tag == OptionalType && !Match(ret_t, OptionalType)->type) + code_err(body, "This function doesn't return a specific optional type"); + + if (ret_ast) { + type_t *declared = parse_type_ast(env, ret_ast); + if (can_promote(ret_t, declared)) ret_t = declared; + else + code_err(ast, "This function was declared to return a value of type ", type_to_str(declared), + ", but actually returns a value of type ", type_to_str(ret_t)); + } + + if (has_stack_memory(ret_t)) + code_err(ast, "Functions can't return stack references because the reference may outlive its stack frame."); + return Type(ClosureType, Type(FunctionType, .args = args, .ret = ret_t)); + } else { + type_t *ret_t = ret_ast ? parse_type_ast(scope, ret_ast) : Type(VoidType); + if (has_stack_memory(ret_t)) + code_err(ast, "Functions can't return stack references because the reference may outlive its stack frame."); + return Type(FunctionType, .args = args, .ret = ret_t); + } +} + +type_t *get_function_return_type(env_t *env, ast_t *ast) { + type_t *fn_t = get_function_type(env, ast); + if (fn_t->tag == ClosureType) fn_t = Match(fn_t, ClosureType)->fn; + return Match(fn_t, FunctionType)->ret; } type_t *get_method_type(env_t *env, ast_t *self, const char *name) { @@ -1085,7 +1129,7 @@ type_t *get_type(env_t *env, ast_t *ast) { } case Return: { ast_t *val = Match(ast, Return)->value; - if (env->fn_ret) env = with_enum_scope(env, env->fn_ret); + if (env->fn) env = with_enum_scope(env, get_function_return_type(env, env->fn)); return Type(ReturnType, .ret = (val ? get_type(env, val) : Type(VoidType))); } case Stop: @@ -1314,7 +1358,7 @@ type_t *get_type(env_t *env, ast_t *ast) { } } } else if ((ast->tag == Divide || ast->tag == Mod || ast->tag == Mod1) && is_numeric_type(rhs_t)) { - binding_t *b = get_namespace_binding(env, binop.lhs, binop_method_name(ast->tag)); + binding_t *b = get_namespace_binding(env, binop.lhs, binop_info[ast->tag].method_name); if (b && b->type->tag == FunctionType) { DeclareMatch(fn, b->type, FunctionType); if (type_eq(fn->ret, lhs_t)) { @@ -1373,7 +1417,8 @@ type_t *get_type(env_t *env, ast_t *ast) { code_err(reduction->iter, "I don't know how to do a reduction over ", type_to_str(iter_t), " values"); if (reduction->key && !(reduction->op == Min || reduction->op == Max)) { env_t *item_scope = fresh_scope(env); - set_binding(item_scope, "$", iterated, EMPTY_TEXT); + const char *op_str = binop_info[reduction->op].operator; + set_binding(item_scope, op_str, iterated, EMPTY_TEXT); iterated = get_type(item_scope, reduction->key); } return iterated->tag == OptionalType ? iterated : Type(OptionalType, .type = iterated); @@ -1393,36 +1438,7 @@ type_t *get_type(env_t *env, ast_t *ast) { return t; } - case Lambda: { - DeclareMatch(lambda, ast, Lambda); - arg_t *args = NULL; - env_t *scope = fresh_scope(env); // For now, just use closed variables in scope normally - for (arg_ast_t *arg = lambda->args; arg; arg = arg->next) { - type_t *t = get_arg_ast_type(env, arg); - args = new (arg_t, .name = arg->name, .alias = arg->alias, .type = t, .next = args); - set_binding(scope, arg->name, t, EMPTY_TEXT); - } - REVERSE_LIST(args); - - type_t *ret = get_type(scope, lambda->body); - if (ret->tag == ReturnType) ret = Match(ret, ReturnType)->ret; - if (ret->tag == AbortType) ret = Type(VoidType); - - if (ret->tag == OptionalType && !Match(ret, OptionalType)->type) - code_err(lambda->body, "This function doesn't return a specific optional type"); - - if (lambda->ret_type) { - type_t *declared = parse_type_ast(env, lambda->ret_type); - if (can_promote(ret, declared)) ret = declared; - else - code_err(ast, "This function was declared to return a value of type ", type_to_str(declared), - ", but actually returns a value of type ", type_to_str(ret)); - } - - if (has_stack_memory(ret)) - code_err(ast, "Functions can't return stack references because the reference may outlive its stack frame."); - return Type(ClosureType, Type(FunctionType, .args = args, .ret = ret)); - } + case Lambda: return get_function_type(env, ast); case FunctionDef: case ConvertDef: diff --git a/src/typecheck.h b/src/typecheck.h index 8fc30333..d64bb316 100644 --- a/src/typecheck.h +++ b/src/typecheck.h @@ -16,7 +16,8 @@ void prebind_statement(env_t *env, ast_t *statement); void bind_statement(env_t *env, ast_t *statement); PUREFUNC type_t *get_math_type(env_t *env, ast_t *ast, type_t *lhs_t, type_t *rhs_t); PUREFUNC bool is_discardable(env_t *env, ast_t *ast); -type_t *get_function_def_type(env_t *env, ast_t *ast); +type_t *get_function_type(env_t *env, ast_t *ast); +type_t *get_function_return_type(env_t *env, ast_t *ast); type_t *get_arg_type(env_t *env, arg_t *arg); type_t *get_arg_ast_type(env_t *env, arg_ast_t *arg); env_t *when_clause_scope(env_t *env, type_t *subject_t, when_clause_t *clause); |
