diff --git a/compile.c b/compile.c index 83f8854..ce133c1 100644 --- a/compile.c +++ b/compile.c @@ -16,7 +16,7 @@ #include "typecheck.h" #include "builtins/util.h" -static CORD compile_to_pointer_depth(env_t *env, ast_t *ast, int64_t target_depth, bool allow_optional); +static CORD compile_to_pointer_depth(env_t *env, ast_t *ast, int64_t target_depth); static env_t *with_enum_scope(env_t *env, type_t *t); static CORD compile_math_method(env_t *env, binop_e op, ast_t *lhs, ast_t *rhs, type_t *required_type); static CORD compile_string(env_t *env, ast_t *ast, CORD color); @@ -214,7 +214,7 @@ static CORD compile_lvalue(env_t *env, ast_t *ast) } container_t = value_type(container_t); if (container_t->tag == ArrayType) { - CORD target_code = compile_to_pointer_depth(env, index->indexed, 1, false); + CORD target_code = compile_to_pointer_depth(env, index->indexed, 1); type_t *item_type = Match(container_t, ArrayType)->item_type; if (index->unchecked) { return CORD_all("Array_lvalue_unchecked(", compile_type(item_type), ", ", target_code, ", ", @@ -1216,7 +1216,7 @@ CORD compile_string(env_t *env, ast_t *ast, CORD color) return expr_as_text(env, expr, t, color); } -CORD compile_to_pointer_depth(env_t *env, ast_t *ast, int64_t target_depth, bool allow_optional) +CORD compile_to_pointer_depth(env_t *env, ast_t *ast, int64_t target_depth) { CORD val = compile(env, ast); type_t *t = get_type(env, ast); @@ -1241,13 +1241,12 @@ CORD compile_to_pointer_depth(env_t *env, ast_t *ast, int64_t target_depth, bool --depth; } } - if (!allow_optional) { - while (t->tag == PointerType) { - auto ptr = Match(t, PointerType); - if (ptr->is_optional) - code_err(ast, "You can't dereference this value, since it's not guaranteed to be non-null"); - t = ptr->pointed; - } + + while (t->tag == PointerType) { + auto ptr = Match(t, PointerType); + if (ptr->is_optional) + code_err(ast, "You can't dereference this value, since it's not guaranteed to be non-null"); + t = ptr->pointed; } return val; @@ -2148,55 +2147,55 @@ CORD compile(env_t *env, ast_t *ast) type_t *item_t = Match(self_value_t, ArrayType)->item_type; CORD padded_item_size = CORD_asprintf("%ld", padded_type_size(item_t)); if (streq(call->name, "insert")) { - CORD self = compile_to_pointer_depth(env, call->self, 1, false); + CORD self = compile_to_pointer_depth(env, call->self, 1); arg_t *arg_spec = new(arg_t, .name="item", .type=item_t, .next=new(arg_t, .name="at", .type=INT_TYPE, .default_val=FakeAST(Int, .str="0"))); return CORD_all("Array$insert_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", padded_item_size, ")"); } else if (streq(call->name, "insert_all")) { - CORD self = compile_to_pointer_depth(env, call->self, 1, false); + CORD self = compile_to_pointer_depth(env, call->self, 1); arg_t *arg_spec = new(arg_t, .name="items", .type=self_value_t, .next=new(arg_t, .name="at", .type=INT_TYPE, .default_val=FakeAST(Int, .str="0"))); return CORD_all("Array$insert_all(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", padded_item_size, ")"); } else if (streq(call->name, "remove_at")) { - CORD self = compile_to_pointer_depth(env, call->self, 1, false); + CORD self = compile_to_pointer_depth(env, call->self, 1); arg_t *arg_spec = new(arg_t, .name="index", .type=INT_TYPE, .default_val=FakeAST(Int, .str="-1"), .next=new(arg_t, .name="count", .type=INT_TYPE, .default_val=FakeAST(Int, .str="1"))); return CORD_all("Array$remove_at(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", padded_item_size, ")"); } else if (streq(call->name, "remove_item")) { - CORD self = compile_to_pointer_depth(env, call->self, 1, false); + CORD self = compile_to_pointer_depth(env, call->self, 1); arg_t *arg_spec = new(arg_t, .name="item", .type=item_t, .next=new(arg_t, .name="max_count", .type=INT_TYPE, .default_val=FakeAST(Int, .str="-1"))); return CORD_all("Array$remove_item_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", compile_type_info(env, self_value_t), ")"); } else if (streq(call->name, "random")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); (void)compile_arguments(env, ast, NULL, call->args); return CORD_all("Array$random_value(", self, ", ", compile_type(item_t), ")"); } else if (streq(call->name, "has")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); arg_t *arg_spec = new(arg_t, .name="item", .type=item_t); return CORD_all("Array$has_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", compile_type_info(env, self_value_t), ")"); } else if (streq(call->name, "sample")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); arg_t *arg_spec = new(arg_t, .name="count", .type=INT_TYPE, .next=new(arg_t, .name="weights", .type=Type(ArrayType, .item_type=Type(NumType)), .default_val=FakeAST(Array, .item_type=new(type_ast_t, .tag=VarTypeAST, .__data.VarTypeAST.name="Num")))); return CORD_all("Array$sample(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", padded_item_size, ")"); } else if (streq(call->name, "shuffle")) { - CORD self = compile_to_pointer_depth(env, call->self, 1, false); + CORD self = compile_to_pointer_depth(env, call->self, 1); (void)compile_arguments(env, ast, NULL, call->args); return CORD_all("Array$shuffle(", self, ", ", padded_item_size, ")"); } else if (streq(call->name, "shuffled")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); (void)compile_arguments(env, ast, NULL, call->args); return CORD_all("Array$shuffled(", self, ", ", padded_item_size, ")"); } else if (streq(call->name, "sort") || streq(call->name, "sorted")) { - CORD self = compile_to_pointer_depth(env, call->self, streq(call->name, "sort") ? 1 : 0, false); + CORD self = compile_to_pointer_depth(env, call->self, streq(call->name, "sort") ? 1 : 0); CORD comparison; if (call->args) { type_t *item_ptr = Type(PointerType, .pointed=item_t, .is_stack=true, .is_readonly=true); @@ -2209,7 +2208,7 @@ CORD compile(env_t *env, ast_t *ast) } return CORD_all("Array$", call->name, "(", self, ", ", comparison, ", ", padded_item_size, ")"); } else if (streq(call->name, "heapify")) { - CORD self = compile_to_pointer_depth(env, call->self, 1, false); + CORD self = compile_to_pointer_depth(env, call->self, 1); CORD comparison; if (call->args) { type_t *item_ptr = Type(PointerType, .pointed=item_t, .is_stack=true); @@ -2222,7 +2221,7 @@ CORD compile(env_t *env, ast_t *ast) } return CORD_all("Array$heapify(", self, ", ", comparison, ", ", padded_item_size, ")"); } else if (streq(call->name, "heap_push")) { - CORD self = compile_to_pointer_depth(env, call->self, 1, false); + CORD self = compile_to_pointer_depth(env, call->self, 1); type_t *item_ptr = Type(PointerType, .pointed=item_t, .is_stack=true); type_t *fn_t = Type(FunctionType, .args=new(arg_t, .name="x", .type=item_ptr, .next=new(arg_t, .name="y", .type=item_ptr)), .ret=Type(IntType, .bits=TYPE_IBITS32)); @@ -2235,7 +2234,7 @@ CORD compile(env_t *env, ast_t *ast) CORD arg_code = compile_arguments(env, ast, arg_spec, call->args); return CORD_all("Array$heap_push_value(", self, ", ", arg_code, ", ", padded_item_size, ")"); } else if (streq(call->name, "heap_pop")) { - CORD self = compile_to_pointer_depth(env, call->self, 1, false); + CORD self = compile_to_pointer_depth(env, call->self, 1); type_t *item_ptr = Type(PointerType, .pointed=item_t, .is_stack=true); type_t *fn_t = Type(FunctionType, .args=new(arg_t, .name="x", .type=item_ptr, .next=new(arg_t, .name="y", .type=item_ptr)), .ret=Type(IntType, .bits=TYPE_IBITS32)); @@ -2247,7 +2246,7 @@ CORD compile(env_t *env, ast_t *ast) CORD arg_code = compile_arguments(env, ast, arg_spec, call->args); return CORD_all("Array$heap_pop_value(", self, ", ", arg_code, ", ", padded_item_size, ", ", compile_type(item_t), ")"); } else if (streq(call->name, "binary_search")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); type_t *item_ptr = Type(PointerType, .pointed=item_t, .is_stack=true); type_t *fn_t = Type(FunctionType, .args=new(arg_t, .name="x", .type=item_ptr, .next=new(arg_t, .name="y", .type=item_ptr)), .ret=Type(IntType, .bits=TYPE_IBITS32)); @@ -2260,43 +2259,43 @@ CORD compile(env_t *env, ast_t *ast) CORD arg_code = compile_arguments(env, ast, arg_spec, call->args); return CORD_all("Array$binary_search_value(", self, ", ", arg_code, ")"); } else if (streq(call->name, "clear")) { - CORD self = compile_to_pointer_depth(env, call->self, 1, false); + CORD self = compile_to_pointer_depth(env, call->self, 1); (void)compile_arguments(env, ast, NULL, call->args); return CORD_all("Array$clear(", self, ")"); } else if (streq(call->name, "find")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); arg_t *arg_spec = new(arg_t, .name="item", .type=item_t); return CORD_all("Array$find_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", compile_type_info(env, self_value_t), ")"); } else if (streq(call->name, "first")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); type_t *item_ptr = Type(PointerType, .pointed=item_t, .is_stack=true); type_t *predicate_type = Type( ClosureType, .fn=Type(FunctionType, .args=new(arg_t, .name="item", .type=item_ptr), .ret=Type(BoolType))); arg_t *arg_spec = new(arg_t, .name="predicate", .type=predicate_type); return CORD_all("Array$first(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ")"); } else if (streq(call->name, "from")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); arg_t *arg_spec = new(arg_t, .name="first", .type=INT_TYPE); return CORD_all("Array$from(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ")"); } else if (streq(call->name, "to")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); arg_t *arg_spec = new(arg_t, .name="last", .type=INT_TYPE); return CORD_all("Array$to(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ")"); } else if (streq(call->name, "by")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); arg_t *arg_spec = new(arg_t, .name="stride", .type=INT_TYPE); return CORD_all("Array$by(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", padded_item_size, ")"); } else if (streq(call->name, "reversed")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); (void)compile_arguments(env, ast, NULL, call->args); return CORD_all("Array$reversed(", self, ", ", padded_item_size, ")"); } else if (streq(call->name, "unique")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); (void)compile_arguments(env, ast, NULL, call->args); return CORD_all("Table$from_entries(", self, ", $SetInfo(", compile_type_info(env, item_t), "))"); } else if (streq(call->name, "counts")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); (void)compile_arguments(env, ast, NULL, call->args); return CORD_all("Array$counts(", self, ", ", compile_type_info(env, self_value_t), ")"); } else code_err(ast, "There is no '%s' method for arrays", call->name); @@ -2304,61 +2303,61 @@ CORD compile(env_t *env, ast_t *ast) case SetType: { auto set = Match(self_value_t, SetType); if (streq(call->name, "has")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); arg_t *arg_spec = new(arg_t, .name="key", .type=set->item_type); return CORD_all("Table$has_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", compile_type_info(env, self_value_t), ")"); } else if (streq(call->name, "add")) { - CORD self = compile_to_pointer_depth(env, call->self, 1, false); + CORD self = compile_to_pointer_depth(env, call->self, 1); arg_t *arg_spec = new(arg_t, .name="item", .type=set->item_type); return CORD_all("Table$set_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", NULL, ", compile_type_info(env, self_value_t), ")"); } else if (streq(call->name, "add_all")) { arg_t *arg_spec = new(arg_t, .name="items", .type=Type(ArrayType, .item_type=Match(self_value_t, SetType)->item_type)); - return CORD_all("({ table_t *set = ", compile_to_pointer_depth(env, call->self, 1, false), "; ", + return CORD_all("({ table_t *set = ", compile_to_pointer_depth(env, call->self, 1), "; ", "array_t to_add = ", compile_arguments(env, ast, arg_spec, call->args), "; ", "for (int64_t i = 0; i < to_add.length; i++)\n" "Table$set(set, to_add.data + i*to_add.stride, NULL, ", compile_type_info(env, self_value_t), ");\n", "(void)0; })"); } else if (streq(call->name, "remove")) { - CORD self = compile_to_pointer_depth(env, call->self, 1, false); + CORD self = compile_to_pointer_depth(env, call->self, 1); arg_t *arg_spec = new(arg_t, .name="item", .type=set->item_type); return CORD_all("Table$remove_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", compile_type_info(env, self_value_t), ")"); } else if (streq(call->name, "remove_all")) { arg_t *arg_spec = new(arg_t, .name="items", .type=Type(ArrayType, .item_type=Match(self_value_t, SetType)->item_type)); - return CORD_all("({ table_t *set = ", compile_to_pointer_depth(env, call->self, 1, false), "; ", + return CORD_all("({ table_t *set = ", compile_to_pointer_depth(env, call->self, 1), "; ", "array_t to_add = ", compile_arguments(env, ast, arg_spec, call->args), "; ", "for (int64_t i = 0; i < to_add.length; i++)\n" "Table$remove(set, to_add.data + i*to_add.stride, ", compile_type_info(env, self_value_t), ");\n", "(void)0; })"); } else if (streq(call->name, "clear")) { - CORD self = compile_to_pointer_depth(env, call->self, 1, false); + CORD self = compile_to_pointer_depth(env, call->self, 1); (void)compile_arguments(env, ast, NULL, call->args); return CORD_all("Table$clear(", self, ")"); } else if (streq(call->name, "with")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); arg_t *arg_spec = new(arg_t, .name="other", .type=self_value_t); return CORD_all("Table$with(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", compile_type_info(env, self_value_t), ")"); } else if (streq(call->name, "overlap")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); arg_t *arg_spec = new(arg_t, .name="other", .type=self_value_t); return CORD_all("Table$overlap(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", compile_type_info(env, self_value_t), ")"); } else if (streq(call->name, "without")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); arg_t *arg_spec = new(arg_t, .name="other", .type=self_value_t); return CORD_all("Table$without(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", compile_type_info(env, self_value_t), ")"); } else if (streq(call->name, "is_subset_of")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); arg_t *arg_spec = new(arg_t, .name="other", .type=self_value_t, .next=new(arg_t, .name="strict", .type=Type(BoolType), .default_val=FakeAST(Bool, false))); return CORD_all("Table$is_subset_of(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", compile_type_info(env, self_value_t), ")"); } else if (streq(call->name, "is_superset_of")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); arg_t *arg_spec = new(arg_t, .name="other", .type=self_value_t, .next=new(arg_t, .name="strict", .type=Type(BoolType), .default_val=FakeAST(Bool, false))); return CORD_all("Table$is_superset_of(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), @@ -2373,31 +2372,31 @@ CORD compile(env_t *env, ast_t *ast) arg_t *where_default_start = new(arg_t, .name="where", .type=WHERE_TYPE, .default_val=FakeAST(FieldAccess, .fielded=FakeAST(Var, "Where"), .field="Start")); if (streq(call->name, "give")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); arg_t *arg_spec = new(arg_t, .name="item", .type=item_t, .next=where_default_end); return CORD_all("Channel$give_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", padded_item_size, ")"); } else if (streq(call->name, "give_all")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); arg_t *arg_spec = new(arg_t, .name="to_give", .type=Type(ArrayType, .item_type=item_t), .next=where_default_end); return CORD_all("Channel$give_all(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", padded_item_size, ")"); } else if (streq(call->name, "get")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); arg_t *arg_spec = where_default_start; return CORD_all("Channel$get_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", compile_type(item_t), ", ", padded_item_size, ")"); } else if (streq(call->name, "peek")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); arg_t *arg_spec = where_default_start; return CORD_all("Channel$peek_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", compile_type(item_t), ")"); } else if (streq(call->name, "clear")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); (void)compile_arguments(env, ast, NULL, call->args); return CORD_all("Channel$clear(", self, ")"); } else if (streq(call->name, "view")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); (void)compile_arguments(env, ast, NULL, call->args); return CORD_all("Channel$view(", self, ")"); } else code_err(ast, "There is no '%s' method for channels", call->name); @@ -2405,7 +2404,7 @@ CORD compile(env_t *env, ast_t *ast) case TableType: { auto table = Match(self_value_t, TableType); if (streq(call->name, "get")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); if (call->args->next) { arg_t *arg_spec = new(arg_t, .name="key", .type=table->key_type, .next=new(arg_t, .name="default", .type=table->value_type)); return CORD_all("Table$get_value_or_default(", self, ", ", compile_type(table->key_type), ", ", compile_type(table->value_type), ", ", @@ -2422,23 +2421,23 @@ CORD compile(env_t *env, ast_t *ast) } else if (streq(call->name, "get_or_null")) { if (table->value_type->tag != PointerType) code_err(ast, "The table method :get_or_null() is only supported for tables whose value type is a pointer, not %T", table->value_type); - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); arg_t *arg_spec = new(arg_t, .name="key", .type=table->key_type); return CORD_all("Table$get_value_or_default(", self, ", ", compile_type(table->key_type), ", ", compile_type(table->value_type), ", ", compile_arguments(env, ast, arg_spec, call->args), ", NULL, ", compile_type_info(env, self_value_t), ")"); } else if (streq(call->name, "has")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); arg_t *arg_spec = new(arg_t, .name="key", .type=table->key_type); return CORD_all("Table$has_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", compile_type_info(env, self_value_t), ")"); } else if (streq(call->name, "set")) { - CORD self = compile_to_pointer_depth(env, call->self, 1, false); + CORD self = compile_to_pointer_depth(env, call->self, 1); arg_t *arg_spec = new(arg_t, .name="key", .type=table->key_type, .next=new(arg_t, .name="value", .type=table->value_type)); return CORD_all("Table$set_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", compile_type_info(env, self_value_t), ")"); } else if (streq(call->name, "bump")) { - CORD self = compile_to_pointer_depth(env, call->self, 1, false); + CORD self = compile_to_pointer_depth(env, call->self, 1); if (!(table->value_type->tag == IntType || table->value_type->tag == NumType)) code_err(ast, "bump() is only supported for tables with numeric value types, not %T", self_value_t); ast_t *one = table->value_type->tag == IntType @@ -2449,16 +2448,16 @@ CORD compile(env_t *env, ast_t *ast) return CORD_all("Table$bump(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", compile_type_info(env, self_value_t), ")"); } else if (streq(call->name, "remove")) { - CORD self = compile_to_pointer_depth(env, call->self, 1, false); + CORD self = compile_to_pointer_depth(env, call->self, 1); arg_t *arg_spec = new(arg_t, .name="key", .type=table->key_type); return CORD_all("Table$remove_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", compile_type_info(env, self_value_t), ")"); } else if (streq(call->name, "clear")) { - CORD self = compile_to_pointer_depth(env, call->self, 1, false); + CORD self = compile_to_pointer_depth(env, call->self, 1); (void)compile_arguments(env, ast, NULL, call->args); return CORD_all("Table$clear(", self, ")"); } else if (streq(call->name, "sorted")) { - CORD self = compile_to_pointer_depth(env, call->self, 0, false); + CORD self = compile_to_pointer_depth(env, call->self, 0); (void)compile_arguments(env, ast, NULL, call->args); return CORD_all("Table$sorted(", self, ", ", compile_type_info(env, self_value_t), ")"); } else code_err(ast, "There is no '%s' method for tables", call->name); @@ -2639,7 +2638,7 @@ CORD compile(env_t *env, ast_t *ast) case TextType: { const char *lang = Match(value_t, TextType)->lang; if (lang && streq(f->field, "text_content")) { - CORD text = compile_to_pointer_depth(env, f->fielded, 0, false); + CORD text = compile_to_pointer_depth(env, f->fielded, 0); return CORD_all("((Text_t)", text, ")"); } code_err(ast, "There is no '%s' field on %T values", f->field, value_t); @@ -2648,7 +2647,7 @@ CORD compile(env_t *env, ast_t *ast) for (arg_t *field = Match(value_t, StructType)->fields; field; field = field->next) { if (streq(field->name, f->field)) { if (fielded_t->tag == PointerType) { - CORD fielded = compile_to_pointer_depth(env, f->fielded, 1, false); + CORD fielded = compile_to_pointer_depth(env, f->fielded, 1); return CORD_asprintf("(%r)->$%s", fielded, f->field); } else { CORD fielded = compile(env, f->fielded); @@ -2664,7 +2663,7 @@ CORD compile(env_t *env, ast_t *ast) if (streq(f->field, tag->name)) { CORD prefix = namespace_prefix(e->env->libname, e->env->namespace); if (fielded_t->tag == PointerType) { - CORD fielded = compile_to_pointer_depth(env, f->fielded, 1, false); + CORD fielded = compile_to_pointer_depth(env, f->fielded, 1); return CORD_all("((", fielded, ")->tag == ", prefix, "tag$", tag->name, ")"); } else { CORD fielded = compile(env, f->fielded); @@ -2676,39 +2675,39 @@ CORD compile(env_t *env, ast_t *ast) } case ArrayType: { if (streq(f->field, "length")) - return CORD_all("Int64_to_Int((", compile_to_pointer_depth(env, f->fielded, 0, false), ").length)"); + return CORD_all("Int64_to_Int((", compile_to_pointer_depth(env, f->fielded, 0), ").length)"); code_err(ast, "There is no %s field on arrays", f->field); } case ChannelType: { if (streq(f->field, "max_size")) - return CORD_all("Int64_to_Int((", compile_to_pointer_depth(env, f->fielded, 0, false), ")->max_size)"); + return CORD_all("Int64_to_Int((", compile_to_pointer_depth(env, f->fielded, 0), ")->max_size)"); code_err(ast, "There is no %s field on arrays", f->field); } case SetType: { if (streq(f->field, "items")) - return CORD_all("(", compile_to_pointer_depth(env, f->fielded, 0, false), ").entries"); + return CORD_all("(", compile_to_pointer_depth(env, f->fielded, 0), ").entries"); else if (streq(f->field, "length")) - return CORD_all("Int64_to_Int((", compile_to_pointer_depth(env, f->fielded, 0, false), ").entries.length)"); + return CORD_all("Int64_to_Int((", compile_to_pointer_depth(env, f->fielded, 0), ").entries.length)"); code_err(ast, "There is no '%s' field on sets", f->field); } case TableType: { if (streq(f->field, "length")) { - return CORD_all("Int64_to_Int((", compile_to_pointer_depth(env, f->fielded, 0, false), ").entries.length)"); + return CORD_all("Int64_to_Int((", compile_to_pointer_depth(env, f->fielded, 0), ").entries.length)"); } else if (streq(f->field, "keys")) { - return CORD_all("(", compile_to_pointer_depth(env, f->fielded, 0, false), ").entries"); + return CORD_all("(", compile_to_pointer_depth(env, f->fielded, 0), ").entries"); } else if (streq(f->field, "values")) { auto table = Match(value_t, TableType); size_t offset = type_size(table->key_type); size_t align = type_align(table->value_type); if (align > 1 && offset % align > 0) offset += align - (offset % align); - return CORD_all("({ array_t *entries = &(", compile_to_pointer_depth(env, f->fielded, 0, false), ").entries;\n" + return CORD_all("({ array_t *entries = &(", compile_to_pointer_depth(env, f->fielded, 0), ").entries;\n" "ARRAY_INCREF(*entries);\n" "array_t values = *entries;\n" "values.data += ", CORD_asprintf("%zu", offset), ";\n" "values; })"); } else if (streq(f->field, "fallback")) { - return CORD_all("(", compile_to_pointer_depth(env, f->fielded, 0, false), ").fallback"); + return CORD_all("(", compile_to_pointer_depth(env, f->fielded, 0), ").fallback"); } code_err(ast, "There is no '%s' field on tables", f->field); } @@ -2745,7 +2744,7 @@ CORD compile(env_t *env, ast_t *ast) if (index_t->tag != IntType && index_t->tag != BigIntType) code_err(indexing->index, "Arrays can only be indexed by integers, not %T", index_t); type_t *item_type = Match(container_t, ArrayType)->item_type; - CORD arr = compile_to_pointer_depth(env, indexing->indexed, 0, false); + CORD arr = compile_to_pointer_depth(env, indexing->indexed, 0); file_t *f = indexing->index->file; if (indexing->unchecked) return CORD_all("Array_get_unchecked(", compile_type(item_type), ", ", arr, ", ", diff --git a/docs/arrays.md b/docs/arrays.md index 0453feb..55d6024 100644 --- a/docs/arrays.md +++ b/docs/arrays.md @@ -107,6 +107,129 @@ the addition operator `+`, which does not work with arrays. = [1, 2, 3, 4] ``` +## Implementation Details + +Under the hood, arrays are implemented as a struct that contains a pointer to a +contiguous chunk of memory storing the elements of the array and some other +metadata. Since Tomo has datatypes with different sizes, like `Bool`s which +take one byte and `struct`s which can take up many bytes, it's worth noting +that arrays store the elements compactly and inline, without the need for each +array cell to hold a pointer to where the data actually lives. + +The other metadata stored with an array includes its length as well as the +_stride_ of the array. The stride is not exposed to the user, but it's the gap +in bytes between each element in the array. The reason this is mentioned is +that it is possible to create immutable slices of arrays in constant time by +creating a new struct that points to the appropriate starting place for the +array items and has the appropriate stride. The upshot is that a method like +`array:reversed()` does not actually copy the array, it simply returns a struct +that points to the back of the array with a negative stride. Arrays adhere to +copy-on-write semantics, so we can cheaply create many read-only references to +the same data, and only need to do copying if we plan to modify data. After +doing a modification, future modifications can be done in-place as long as +there is only one reference to that data. + +Internally, we also take advantage of this inside of tables, which compactly +store all of the key/value pairs in a contiguous array and we can return an +immutable slice of that array showing only the keys or only the values by +choosing the right starting point and stride. + +## Copy on Write + +Arrays can be thought of as values that have copy-on-write semantics that use +reference counting to perform efficient in-place mutations instead of copying +as a performance optimization when it wouldn't affect the program's semantics. +Without getting too deep into the details, suffice it to say that when you +create an array, that array can be thought of as a singular "value" in the same +way that `123` is a value. That variable's value will never change unless you +explicitly perform an assignment operation on the variable or call a method on +the variable. + +Because it would be tedious to require users to write all array operations as +pure functions like `array = array:with_value_at_index(value=x, index=i)`, Tomo +provides the familiar imperative syntax for modifying arrays, but keeps the +semantics of the pure functional style. Writing `array[i] = x` is +_semantically_ equivalent to `array = array:with_value_at_index(value=x, +index=i)`, but much more readable and easy to write. Similarly, +`array:insert(x)` is semantically equivalent to `array = +array:with_value_inserted(x)`. We implement these mutating methods as functions +that take a pointer to an array variable, which then either mutate the array's +data in-place (if this is the only thing referencing that data) or construct a +new array and store its value in the memory where the array variable is stored. + +When there is only a single reference to an array value, we can perform these +modifications in-place (arrays typically have a little bit of spare capacity at +the end, so appending usually doesn't trigger a reallocation). When there are +shared references, we must create a copy of the array's data before modifying +it so the other references don't see the effects of the mutation. Here are some +simple examples: + +```tomo +nums := [10, 20, 30, 39] + +// Efficient in-place mutation because data references are not shared: +nums[4] = 40 + +// Constant time operation, but increments the reference count: +tmp := nums +>> tmp += [10, 20, 30, 40] + +// Now, a mutation will trigger a copy-on-write, +// which resets the reference count to zero: +nums[4] = 999 +>> nums += [10, 20, 30, 999] + +// Because of the copy-on-write, `tmp` is unchanged: +>> tmp += [10, 20, 30, 40] + +// Since the reference count has been reset, we can do more +// mutations without triggering another copy-on-write: +nums[4] = -1 +>> nums += [10, 20, 30, -1] +``` + +Array reference counting is _approximate_, but will only ever err on the side +of correctness at the expense of performance, not the other way around. +Occasionally, unnecessary copying may occur, but you should never experience an +array value changing because of some operation performed on a different array +value. + +## Array Pointers + +Since the normal case of arrays is to treat them like immutable values, what do +we do if we actually want to have a shared reference to an array whose contents +change over time? In that case, we want to use the `@` operator to create a +pointer to a heap-allocated array and pass that pointer around. This is the same +behavior that you get in Python when you create a `list`: + +```tomo +nums := @[10, 20, 30] +tmp := nums + +nums:insert(40) +>> tmp += @[10, 20, 30, 40] +``` + +Having multiple pointers to the same heap-allocated array does not cause the +array's reference count to increase, because there is only one "value" in play: +the one stored on the heap. It's only when we store the "value" in multiple +places that we need to increment the reference count: + +```tomo +// Increment the reference count, because `value` now has to hold +// whatever data was at the pointer's location at this point in time: +value := nums[] +``` + +The TL;DR is: you can cheaply modify local variables that aren't aliased or +`@`-allocated arrays, but if you assign a local variable array to another +variable or dereference a heap pointer, it may trigger copy-on-write behavior. + ## Array Methods ### `binary_search`