Fix up some import issues and improve arrays to use saturating refcounts

instead of .copy_on_write
2024-02-29 12:37:09 -05:00 · 2024-02-29 12:37:09 -05:00 · ec75208980
commit ec75208980
parent a7bbbe9584
8 changed files with 62 additions and 55 deletions
--- a/builtins/array.c
+++ b/builtins/array.c
@ -38,9 +38,7 @@ public void Array__compact(array_t *arr, const TypeInfo *type)
        .data=copy,
        .length=arr->length,
        .stride=item_size,
-        .free=0,
        .atomic=arr->atomic,
-        .copy_on_write=0,
    };
 }

@ -64,10 +62,10 @@ public void Array__insert(array_t *arr, const void *item, int64_t index, const T
        for (int64_t i = index-1; i < (int64_t)arr->length; i++)
            memcpy(copy + (i+1)*item_size, arr->data + arr->stride*i, item_size);
        arr->data = copy;
-        arr->copy_on_write = 0;
+        arr->data_refcount = 0;
        arr->stride = item_size;
    } else {
-        if (arr->copy_on_write)
+        if (arr->data_refcount)
            Array__compact(arr, type);

        if (index != arr->length+1)
@ -98,9 +96,9 @@ public void Array__insert_all(array_t *arr, array_t to_insert, int64_t index, co
        for (int64_t i = index-1; i < (int64_t)arr->length; i++)
            memcpy(copy + (i+to_insert.length)*item_size, arr->data + arr->stride*i, item_size);
        arr->data = copy;
-        arr->copy_on_write = 0;
+        arr->data_refcount = 0;
    } else {
-        if (arr->copy_on_write)
+        if (arr->data_refcount)
            Array__compact(arr, type);

        if (index != arr->length+1)
@ -127,7 +125,7 @@ public void Array__remove(array_t *arr, int64_t index, int64_t count, const Type
    if (index + count > arr->length) {
        if (arr->free >= 0)
            arr->free += count;
-    } else if (arr->copy_on_write || (int64_t)arr->stride != item_size) {
+    } else if (arr->data_refcount || (int64_t)arr->stride != item_size) {
        void *copy = arr->atomic ? GC_MALLOC_ATOMIC((arr->length-1) * item_size) : GC_MALLOC((arr->length-1) * item_size);
        for (int64_t src = 1, dest = 1; src <= (int64_t)arr->length; src++) {
            if (src < index || src >= index + count) {
@ -137,7 +135,7 @@ public void Array__remove(array_t *arr, int64_t index, int64_t count, const Type
        }
        arr->data = copy;
        arr->free = 0;
-        arr->copy_on_write = 0;
+        arr->data_refcount = 0;
    } else {
        memmove((void*)arr->data + (index-1)*item_size, arr->data + (index-1 + count)*item_size, (arr->length - index + count - 1)*item_size);
        arr->free += count;
@ -152,7 +150,7 @@ public void Array__sort(array_t *arr, const TypeInfo *type)
    if (item_type->align > 1 && item_size % item_type->align)
        item_size += item_type->align - (item_size % item_type->align); // padding

-    if (arr->copy_on_write || (int64_t)arr->stride != item_size)
+    if (arr->data_refcount || (int64_t)arr->stride != item_size)
        Array__compact(arr, type);

    qsort_r(arr->data, arr->length, item_size, (void*)generic_compare, (void*)item_type);
@ -161,7 +159,7 @@ public void Array__sort(array_t *arr, const TypeInfo *type)
 public void Array__shuffle(array_t *arr, const TypeInfo *type)
 {
    int64_t item_size = get_item_size(type);
-    if (arr->copy_on_write || (int64_t)arr->stride != item_size)
+    if (arr->data_refcount || (int64_t)arr->stride != item_size)
        Array__compact(arr, type);

    char tmp[item_size];
@ -173,14 +171,12 @@ public void Array__shuffle(array_t *arr, const TypeInfo *type)
    }
 }

-public array_t Array__slice(array_t *array, int64_t first, int64_t stride, int64_t length, bool readonly, const TypeInfo *type)
+public array_t Array__slice(array_t *array, int64_t first, int64_t stride, int64_t length, const TypeInfo *type)
 {
-    if (stride > INT16_MAX)
-        stride = INT16_MAX;
-    else if (stride < INT16_MIN)
-        stride = INT16_MIN;
+    if (stride > MAX_STRIDE || stride < MIN_STRIDE)
+        fail("Stride is too big: %ld", stride);

-    if (stride == 0) {
+    if (stride == 0 || length <= 0) {
        // Zero stride
        return (array_t){.atomic=array->atomic};
    } else if (stride < 0) {
@ -210,13 +206,12 @@ public array_t Array__slice(array_t *array, int64_t first, int64_t stride, int64

    // If less than zero, set to zero (without a conditional branch)
    length = length & ~(length >> 63);
+
    if (length > array->length/labs(stride) + 1) length = array->length/labs(stride) + 1;
    if (length < 0) length = -length;

-    // Sometimes, we want to create a readonly slice (e.g. during iteration)
-    // and we don't actually need to set the COW flag because the slice will
-    // never do modifictions
-    array->copy_on_write = !readonly;
+    // Saturating add:
+    array->data_refcount |= (array->data_refcount << 1) | 1;

    int64_t item_size = get_item_size(type);
    return (array_t){
@ -224,7 +219,7 @@ public array_t Array__slice(array_t *array, int64_t first, int64_t stride, int64
        .data=array->data + item_size*(first-1),
        .length=length,
        .stride=(array->stride * stride),
-        .copy_on_write=1, // slice is always copy-on-write
+        .data_refcount=array->data_refcount,
    };
 }

@ -250,14 +245,13 @@ public array_t Array__concat(array_t x, array_t y, const TypeInfo *type)
        .data=data,
        .length=x.length + y.length,
        .stride=item_size,
-        .copy_on_write=0,
        .atomic=x.atomic,
    };
 }

 public bool Array__contains(array_t array, void *item, const TypeInfo *type)
 {
-    TypeInfo *item_type = type->ArrayInfo.item;
+    const TypeInfo *item_type = type->ArrayInfo.item;
    for (int64_t i = 0; i < array.length; i++)
        if (generic_equal(array.data + i*array.stride, item, item_type))
            return true;
@ -276,7 +270,7 @@ public int32_t Array__compare(const array_t *x, const array_t *y, const TypeInfo
    if (x->data == y->data && x->stride == y->stride)
        return (x->length > y->length) - (x->length < y->length);

-    TypeInfo *item = type->ArrayInfo.item;
+    const TypeInfo *item = type->ArrayInfo.item;
    if (item->tag == PointerInfo || (item->tag == CustomInfo && item->CustomInfo.compare == NULL)) { // data comparison
        int64_t item_size = item->size;
        if (x->stride == (int32_t)item_size && y->stride == (int32_t)item_size) {
@ -307,7 +301,7 @@ public CORD Array__as_str(const array_t *arr, bool colorize, const TypeInfo *typ
    if (!arr)
        return CORD_all("[", generic_as_str(NULL, false, type->ArrayInfo.item), "]");

-    TypeInfo *item_type = type->ArrayInfo.item;
+    const TypeInfo *item_type = type->ArrayInfo.item;
    CORD c = "[";
    for (int64_t i = 0; i < arr->length; i++) {
        if (i > 0)
@ -326,7 +320,7 @@ public uint32_t Array__hash(const array_t *arr, const TypeInfo *type)
    // In other words, it reads in a chunk of items or item hashes, then when it fills up the chunk,
    // hashes it down to a single item to start the next chunk. This repeats until the end, when it
    // hashes the last chunk down to a uint32_t.
-    TypeInfo *item = type->ArrayInfo.item;
+    const TypeInfo *item = type->ArrayInfo.item;
    if (item->tag == PointerInfo || (item->tag == CustomInfo && item->CustomInfo.hash == NULL)) { // Raw data hash
        int64_t item_size = item->size;
        uint8_t hash_batch[4 + 8*item_size] = {};
--- a/builtins/array.h
+++ b/builtins/array.h
@ -23,7 +23,16 @@
                         .stride=(int64_t)&$items[1] - (int64_t)&$items[0], \
                         .data=memcpy($is_atomic(x) ? GC_MALLOC_ATOMIC(sizeof($items)) : GC_MALLOC(sizeof($items)), $items,  sizeof($items)), \
                         .atomic=$is_atomic(x), \
-                         .copy_on_write=1}; })
+                         .data_refcount=1}; })
+#define $ARRAY_INCREF(arr) (arr).data_refcount |= ((arr).data_refcount << 1) | 1
+#define $ARRAY_DECREF(arr) (arr).data_refcount &= 2
+#define $ARRAY_FOREACH(arr_expr, i, item_type, x, body, else_body) {\
+        array_t $arr = arr_expr; \
+        $ARRAY_INCREF($arr); \
+        if ($arr.length == 0) else_body \
+        else for (int64_t i = 1; i <= $arr.length; i++) { item_type x = *(item_type*)($arr.data + (i-1)*$arr.stride); body } \
+        $ARRAY_DECREF($arr); \
+    }

 void Array__insert(array_t *arr, const void *item, int64_t index, const TypeInfo *type);
 void Array__insert_all(array_t *arr, array_t to_insert, int64_t index, const TypeInfo *type);
@ -33,7 +42,7 @@ void Array__shuffle(array_t *arr, const TypeInfo *type);
 void Array__clear(array_t *array, const TypeInfo *type);
 void Array__compact(array_t *arr, const TypeInfo *type);
 bool Array__contains(array_t array, void *item, const TypeInfo *type);
-array_t Array__slice(array_t *array, int64_t first, int64_t stride, int64_t length, bool readonly, const TypeInfo *type);
+array_t Array__slice(array_t *array, int64_t first, int64_t stride, int64_t length, const TypeInfo *type);
 array_t Array__concat(array_t x, array_t y, const TypeInfo *type);
 uint32_t Array__hash(const array_t *arr, const TypeInfo *type);
 int32_t Array__compare(const array_t *x, const array_t *y, const TypeInfo *type);
--- a/builtins/datatypes.h
+++ b/builtins/datatypes.h
@ -2,12 +2,14 @@
 #include <stdint.h>
 #include <stdbool.h>

+#define MIN_STRIDE (INT16_MIN>>1)
+#define MAX_STRIDE (INT16_MAX>>1)
 typedef struct {
    void *data;
    int64_t length:42;
-    uint8_t free:4;
-    bool copy_on_write:1, atomic:1;
-    int16_t stride:16;
+    uint8_t free:4, data_refcount:2;
+    bool atomic:1;
+    int16_t stride:15;
 } array_t;

 typedef struct {
@ -16,8 +18,8 @@ typedef struct {
 } bucket_t;

 typedef struct {
-    uint32_t count:32, last_free:31;
-    bool copy_on_write:1;
+    uint32_t count:31, last_free:31;
+    uint8_t data_refcount:2;
    bucket_t buckets[0];
 } bucket_info_t;

--- a/builtins/table.c
+++ b/builtins/table.c
@ -98,21 +98,21 @@ static inline void hshow(const table_t *t)

 static void maybe_copy_on_write(table_t *t, const TypeInfo *type)
 {
-    if (t->entries.copy_on_write) {
+    if (t->entries.data_refcount) {
        Array__compact(&t->entries, ENTRIES_TYPE(type));
    }

-    if (t->bucket_info && t->bucket_info->copy_on_write) {
+    if (t->bucket_info && t->bucket_info->data_refcount) {
        int64_t size = sizeof(bucket_info_t) + t->bucket_info->count*sizeof(bucket_t);
        t->bucket_info = memcpy(GC_MALLOC(size), t->bucket_info, size);
-        t->bucket_info->copy_on_write = 0;
+        t->bucket_info->data_refcount = 0;
    }
 }

 public void Table_mark_copy_on_write(table_t *t)
 {
-    t->entries.copy_on_write = 1;
-    if (t->bucket_info) t->bucket_info->copy_on_write = 1;
+    t->entries.data_refcount = 3;
+    if (t->bucket_info) t->bucket_info->data_refcount = 3;
 }

 // Return address of value or NULL
--- a/builtins/table.h
+++ b/builtins/table.h
@ -24,7 +24,19 @@
    if (__builtin_expect($v == NULL, 0)) \
        fail_source(filename, start, end, "The key %r is not in this table\n", generic_as_str(&$k, USE_COLOR, $info->TableInfo.key)); \
    *$v; })
-
+#define $TABLE_FOREACH(table_expr, key_type, k, value_type, v, value_offset, body, else_body) {\
+        array_t $entries = (table_expr).entries; \
+        if ($entries.length == 0) else_body \
+        else { \
+            $ARRAY_INCREF($entries); \
+            for (int64_t $i = 0; $i < $entries.length; $i++) { \
+                key_type k = *(key_type*)($entries.data + $i*$entries.stride); \
+                value_type v = *(value_type*)($entries.data + $i*$entries.stride + value_offset); \
+                body \
+            } \
+            $ARRAY_DECREF($entries); \
+        } \
+    }

 table_t Table_from_entries(array_t entries, const TypeInfo *type);
 void *Table_get(const table_t *t, const void *key, const TypeInfo *type);
--- a/builtins/types.h
+++ b/builtins/types.h
@ -25,13 +25,13 @@ typedef struct TypeInfo {
            } CustomInfo;
            struct {
                const char *sigil;
-                struct TypeInfo *pointed;
+                const struct TypeInfo *pointed;
            } PointerInfo;
            struct {
-                struct TypeInfo *item;
+                const struct TypeInfo *item;
            } ArrayInfo;
            struct {
-                struct TypeInfo *key, *value;
+                const struct TypeInfo *key, *value;
            } TableInfo;
            struct {
                const char *type_str;
--- a/compile.c
+++ b/compile.c
@ -711,18 +711,8 @@ CORD compile(env_t *env, ast_t *ast)
                set_binding(scope, CORD_to_const_char_star(index), new(binding_t, .type=Type(IntType, .bits=64)));
            CORD value = compile(env, for_->value);
            set_binding(scope, CORD_to_const_char_star(value), new(binding_t, .type=item_t));
-            CORD loop = CORD_all(
-                "for (Int64_t ", index, " = 1; ", index, " <= $iter.length; ++", index, ") {\n"
-                "\t", compile_type(item_t), " ", value, " = *(", compile_type(item_t), "*)($iter.data + (", index, "-1)*$iter.stride);\n"
-                "\t", compile(scope, for_->body), "\n"
-                "}\n");
-            if (for_->empty)
-                loop = CORD_all("if ($iter.length == 0)\n", compile(env, for_->empty), "\nelse\n", loop);
-            return CORD_all(
-                "{ // For loop:\n"
-                "array_t $iter = ", compile(env, for_->iter), ";\n",
-                loop,
-                "}\n");
+            return CORD_all("$ARRAY_FOREACH(", compile(env, for_->iter), ", ", index, ", ", compile_type(item_t), ", ", value,
+                            ", ", compile(scope, for_->body), ", ", for_->empty ? compile(scope, for_->empty) : "{}", ")");
        }
        case TableType: {
            type_t *key_t = Match(iter_t, TableType)->key_type;
--- a/tomo.c
+++ b/tomo.c
@ -71,7 +71,7 @@ int main(int argc, char *argv[])

    const char *cflags = getenv("CFLAGS");
    if (!cflags)
-        cflags = "-std=c11 -fdollars-in-identifiers -fsanitize=signed-integer-overflow -fno-sanitize-recover";
+        cflags = "-std=c11 -fdollars-in-identifiers -fsanitize=signed-integer-overflow -fno-sanitize-recover -D_XOPEN_SOURCE=700 -D_POSIX_C_SOURCE=200809L -D_DEFAULT_SOURCE";

    const char *ldlibs = "-lgc -lcord -lm -L. -ltomo";
    if (getenv("LDLIBS"))