diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2024-02-04 21:13:50 -0500 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2024-02-04 21:13:50 -0500 |
| commit | adde91636f04ae7544dba1ca5c6c1a40c074edb9 (patch) | |
| tree | dfeb8c0c16fda6a87ef30b048b070ee4cb175a78 /builtins | |
| parent | b08a0d3e2bf45bae11c982dd24d0292d6436b993 (diff) | |
Builtins
Diffstat (limited to 'builtins')
| -rw-r--r-- | builtins/array.c | 332 | ||||
| -rw-r--r-- | builtins/array.h | 46 | ||||
| -rw-r--r-- | builtins/bool.c | 39 | ||||
| -rw-r--r-- | builtins/builtins.c | 10 | ||||
| -rw-r--r-- | builtins/char.c | 86 | ||||
| -rw-r--r-- | builtins/datatypes.h | 31 | ||||
| -rw-r--r-- | builtins/floats.c | 211 | ||||
| -rw-r--r-- | builtins/functions.c | 83 | ||||
| -rw-r--r-- | builtins/functions.h | 15 | ||||
| -rw-r--r-- | builtins/integers.c | 89 | ||||
| -rw-r--r-- | builtins/memory.c | 33 | ||||
| -rw-r--r-- | builtins/string.c | 597 | ||||
| -rw-r--r-- | builtins/string.h | 23 | ||||
| -rw-r--r-- | builtins/table.c | 558 | ||||
| -rw-r--r-- | builtins/table.h | 34 | ||||
| -rw-r--r-- | builtins/types.c | 349 | ||||
| -rw-r--r-- | builtins/types.h | 55 |
17 files changed, 2591 insertions, 0 deletions
diff --git a/builtins/array.c b/builtins/array.c new file mode 100644 index 00000000..d2c5444e --- /dev/null +++ b/builtins/array.c @@ -0,0 +1,332 @@ + +#include <ctype.h> +#include <err.h> +#include <gc.h> +#include <gc/cord.h> +#include <stdalign.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <sys/param.h> + +#include "array.h" +#include "types.h" +#include "functions.h" +#include "../SipHash/halfsiphash.h" +#include "../util.h" + +extern const void *SSS_HASH_VECTOR; + +// Replace the array's .data pointer with a new pointer to a copy of the +// data that is compacted and has a stride of exactly `item_size` +public void Array_compact(array_t *arr, int64_t item_size) +{ + void *copy = NULL; + if (arr->length > 0) { + copy = arr->atomic ? GC_MALLOC_ATOMIC(arr->length * item_size) : GC_MALLOC(arr->length * item_size); + if ((int64_t)arr->stride == item_size) { + memcpy(copy, arr->data, arr->length * item_size); + } else { + for (int64_t i = 0; i < arr->length; i++) + memcpy(copy + i*item_size, arr->data + arr->stride*i, item_size); + } + } + *arr = (array_t){ + .data=copy, + .length=arr->length, + .stride=item_size, + .free=0, + .atomic=arr->atomic, + .copy_on_write=0, + }; +} + +public void Array_insert(array_t *arr, const void *item, int64_t index, int64_t item_size) +{ + if (index < 1) index = arr->length - index + 1; + + if (index < 1) index = 1; + else if (index > (int64_t)arr->length + 1) index = (int64_t)arr->length + 1; + + if (!arr->data) { + arr->free = 4; + arr->data = arr->atomic ? GC_MALLOC_ATOMIC(arr->free * item_size) : GC_MALLOC(arr->free * item_size); + arr->stride = item_size; + } else if (arr->free < 1 || (int64_t)arr->stride != item_size) { + arr->free = MAX(15, MIN(1, arr->length/4)); + void *copy = arr->atomic ? GC_MALLOC_ATOMIC((arr->length + arr->free) * item_size) : GC_MALLOC((arr->length + arr->free) * item_size); + for (int64_t i = 0; i < index-1; i++) + memcpy(copy + i*item_size, arr->data + arr->stride*i, item_size); + for (int64_t i = index-1; i < (int64_t)arr->length; i++) + memcpy(copy + (i+1)*item_size, arr->data + arr->stride*i, item_size); + arr->data = copy; + arr->copy_on_write = 0; + arr->stride = item_size; + } else { + if (arr->copy_on_write) + Array_compact(arr, item_size); + + if (index != arr->length+1) + memmove((void*)arr->data + index*item_size, arr->data + (index-1)*item_size, (arr->length - index)*item_size); + } + assert(arr->free > 0); + --arr->free; + ++arr->length; + memcpy((void*)arr->data + (index-1)*item_size, item, item_size); +} + +public void Array_insert_all(array_t *arr, array_t to_insert, int64_t index, int64_t item_size) +{ + if (index < 1) index = arr->length - index + 1; + + if (index < 1) index = 1; + else if (index > (int64_t)arr->length + 1) index = (int64_t)arr->length + 1; + + if (!arr->data) { + arr->free = to_insert.length; + arr->data = arr->atomic ? GC_MALLOC_ATOMIC(item_size*arr->free) : GC_MALLOC(item_size*arr->free); + } else if ((int64_t)arr->free < (int64_t)to_insert.length || (int64_t)arr->stride != item_size) { + arr->free = to_insert.length; + void *copy = arr->atomic ? GC_MALLOC_ATOMIC((arr->length + arr->free) * item_size) : GC_MALLOC((arr->length + arr->free) * item_size); + for (int64_t i = 0; i < index-1; i++) + memcpy(copy + i*item_size, arr->data + arr->stride*i, item_size); + for (int64_t i = index-1; i < (int64_t)arr->length; i++) + memcpy(copy + (i+to_insert.length)*item_size, arr->data + arr->stride*i, item_size); + arr->data = copy; + arr->copy_on_write = 0; + } else { + if (arr->copy_on_write) + Array_compact(arr, item_size); + + if (index != arr->length+1) + memmove((void*)arr->data + index*item_size, arr->data + (index-1)*item_size, (arr->length - index + to_insert.length-1)*item_size); + } + arr->free -= to_insert.length; + arr->length += to_insert.length; + for (int64_t i = 0; i < to_insert.length; i++) + memcpy((void*)arr->data + (index-1 + i)*item_size, to_insert.data + i*to_insert.stride, item_size); +} + +public void Array_remove(array_t *arr, int64_t index, int64_t count, int64_t item_size) +{ + if (index < 1) index = arr->length - index + 1; + + if (index < 1 || index > (int64_t)arr->length || count < 1) return; + + if (count > arr->length - index + 1) + count = (arr->length - index) + 1; + + // TODO: optimize arr.remove(1) by just updating the .data and .length values + + if (index + count > arr->length) { + if (arr->free >= 0) + arr->free += count; + } else if (arr->copy_on_write || (int64_t)arr->stride != item_size) { + void *copy = arr->atomic ? GC_MALLOC_ATOMIC((arr->length-1) * item_size) : GC_MALLOC((arr->length-1) * item_size); + for (int64_t src = 1, dest = 1; src <= (int64_t)arr->length; src++) { + if (src < index || src >= index + count) { + memcpy(copy + (dest - 1)*item_size, arr->data + arr->stride*(src - 1), item_size); + ++dest; + } + } + arr->data = copy; + arr->free = 0; + arr->copy_on_write = 0; + } else { + memmove((void*)arr->data + (index-1)*item_size, arr->data + (index-1 + count)*item_size, (arr->length - index + count - 1)*item_size); + arr->free += count; + } + arr->length -= count; +} + +public void Array_sort(array_t *arr, const TypeInfo *type) +{ + const TypeInfo *item_type = type->ArrayInfo.item; + int64_t item_size = item_type->size; + if (item_type->align > 1 && item_size % item_type->align) + item_size += item_type->align - (item_size % item_type->align); // padding + + if (arr->copy_on_write || (int64_t)arr->stride != item_size) + Array_compact(arr, item_size); + + qsort_r(arr->data, arr->length, item_size, (void*)generic_compare, (void*)item_type); +} + +public void Array_shuffle(array_t *arr, int64_t item_size) +{ + if (arr->copy_on_write || (int64_t)arr->stride != item_size) + Array_compact(arr, item_size); + + char tmp[item_size]; + for (int64_t i = arr->length-1; i > 1; i--) { + int32_t j = arc4random_uniform(i+1); + memcpy(tmp, arr->data + i*item_size, item_size); + memcpy((void*)arr->data + i*item_size, arr->data + j*item_size, item_size); + memcpy((void*)arr->data + j*item_size, tmp, item_size); + } +} + +public array_t Array_slice(array_t *array, int64_t first, int64_t stride, int64_t length, bool readonly, const TypeInfo *type) +{ + TypeInfo *item = type->ArrayInfo.item; + int64_t item_size = item->size; + + if (stride > INT16_MAX) + stride = INT16_MAX; + else if (stride < INT16_MIN) + stride = INT16_MIN; + + if (stride == 0) { + // Zero stride + return (array_t){.atomic=array->atomic}; + } else if (stride < 0) { + if (first == INT64_MIN) first = array->length; + if (first > array->length) { + // Range starting after array + int64_t residual = first % -stride; + first = array->length - (array->length % -stride) + residual; + } + if (first > array->length) first += stride; + if (first < 1) { + // Range outside array + return (array_t){.atomic=array->atomic}; + } + } else { + if (first == INT64_MIN) first = 1; + if (first < 1) { + // Range starting before array + first = first % stride; + } + while (first < 1) first += stride; + if (first > array->length) { + // Range outside array + return (array_t){.atomic=array->atomic}; + } + } + + // If less than zero, set to zero (without a conditional branch) + length = length & ~(length >> 63); + if (length > array->length/labs(stride) + 1) length = array->length/labs(stride) + 1; + if (length < 0) length = -length; + + // Sometimes, we want to create a readonly slice (e.g. during iteration) + // and we don't actually need to set the COW flag because the slice will + // never do modifictions + array->copy_on_write = !readonly; + + return (array_t){ + .atomic=array->atomic, + .data=array->data + item_size*(first-1), + .length=length, + .stride=(array->stride * stride), + .copy_on_write=1, // slice is always copy-on-write + }; +} + +public bool Array_contains(array_t array, void *item, const TypeInfo *type) +{ + TypeInfo *item_type = type->ArrayInfo.item; + for (int64_t i = 0; i < array.length; i++) + if (generic_equal(array.data + i*array.stride, item, item_type)) + return true; + return false; +} + +public void Array_clear(array_t *array) +{ + *array = (array_t){.data=0, .length=0}; +} + +public int32_t Array_compare(const array_t *x, const array_t *y, const TypeInfo *type) +{ + // Early out for arrays with the same data, e.g. two copies of the same array: + if (x->data == y->data && x->stride == y->stride) + return (x->length > y->length) - (x->length < y->length); + + TypeInfo *item = type->ArrayInfo.item; + if (item->tag == PointerInfo || (item->tag == CustomInfo && item->CustomInfo.compare == NULL)) { // data comparison + int64_t item_size = item->size; + if (x->stride == (int32_t)item_size && y->stride == (int32_t)item_size) { + int32_t cmp = (int32_t)memcmp(x->data, y->data, MIN(x->length, y->length)*item_size); + if (cmp != 0) return cmp; + } else { + for (int32_t i = 0, len = MIN(x->length, y->length); i < len; i++) { + int32_t cmp = (int32_t)memcmp(x->data+ x->stride*i, y->data + y->stride*i, item_size); + if (cmp != 0) return cmp; + } + } + } else { + for (int32_t i = 0, len = MIN(x->length, y->length); i < len; i++) { + int32_t cmp = generic_compare(x->data + x->stride*i, y->data + y->stride*i, item); + if (cmp != 0) return cmp; + } + } + return (x->length > y->length) - (x->length < y->length); +} + +public bool Array_equal(const array_t *x, const array_t *y, const TypeInfo *type) +{ + return (Array_compare(x, y, type) == 0); +} + +public CORD Array_cord(const array_t *arr, bool colorize, const TypeInfo *type) +{ + TypeInfo *item_type = type->ArrayInfo.item; + CORD c = "["; + for (int64_t i = 0; i < arr->length; i++) { + if (i > 0) + c = CORD_cat(c, ", "); + CORD item_cord = generic_cord(arr->data + i*arr->stride, colorize, item_type); + c = CORD_cat(c, item_cord); + } + c = CORD_cat(c, "]"); + return c; +} + +public uint32_t Array_hash(const array_t *arr, const TypeInfo *type) +{ + // Array hash is calculated as a rolling, compacting hash of the length of the array, followed by + // the hashes of its items (or the items themselves if they're small plain data) + // In other words, it reads in a chunk of items or item hashes, then when it fills up the chunk, + // hashes it down to a single item to start the next chunk. This repeats until the end, when it + // hashes the last chunk down to a uint32_t. + TypeInfo *item = type->ArrayInfo.item; + if (item->tag == PointerInfo || (item->tag == CustomInfo && item->CustomInfo.hash == NULL)) { // Raw data hash + int64_t item_size = item->size; + uint8_t hash_batch[4 + 8*item_size] = {}; + uint8_t *p = hash_batch, *end = hash_batch + sizeof(hash_batch); + int64_t length = arr->length; + *p = (uint32_t)length; + p += sizeof(uint32_t); + for (int64_t i = 0; i < arr->length; i++) { + if (p >= end) { + uint32_t chunk_hash; + halfsiphash(&hash_batch, sizeof(hash_batch), SSS_HASH_VECTOR, (uint8_t*)&chunk_hash, sizeof(chunk_hash)); + p = hash_batch; + *(uint32_t*)p = chunk_hash; + p += sizeof(uint32_t); + } + memcpy((p += item_size), arr->data + i*arr->stride, item_size); + } + uint32_t hash; + halfsiphash(&hash_batch, ((int64_t)p) - ((int64_t)hash_batch), SSS_HASH_VECTOR, (uint8_t*)&hash, sizeof(hash)); + return hash; + } else { + uint32_t hash_batch[16] = {(uint32_t)arr->length}; + uint32_t *p = &hash_batch[1], *end = hash_batch + sizeof(hash_batch)/sizeof(hash_batch[0]); + for (int64_t i = 0; i < arr->length; i++) { + if (p >= end) { + uint64_t chunk_hash; + halfsiphash(&hash_batch, sizeof(hash_batch), SSS_HASH_VECTOR, (uint8_t*)&chunk_hash, sizeof(chunk_hash)); + p = hash_batch; + *(p++) = chunk_hash; + } + *(p++) = generic_hash(arr->data + i*arr->stride, item); + } + uint32_t hash; + halfsiphash(&hash_batch, ((int64_t)p) - ((int64_t)hash_batch), SSS_HASH_VECTOR, (uint8_t*)&hash, sizeof(hash)); + return hash; + } +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/builtins/array.h b/builtins/array.h new file mode 100644 index 00000000..5d0a2ee4 --- /dev/null +++ b/builtins/array.h @@ -0,0 +1,46 @@ +#pragma once +#include <stdbool.h> +#include <gc/cord.h> + +#include "../util.h" +#include "datatypes.h" +#include "functions.h" +#include "types.h" + +void Array_insert(array_t *arr, const void *item, int64_t index, int64_t item_size); +void Array_insert_all(array_t *arr, array_t to_insert, int64_t index, int64_t item_size); +void Array_remove(array_t *arr, int64_t index, int64_t count, int64_t item_size); +void Array_sort(array_t *arr, const TypeInfo *type); +void Array_shuffle(array_t *arr, int64_t item_size); +void Array_clear(array_t *array); +void Array_compact(array_t *arr, int64_t item_size); +bool Array_contains(array_t array, void *item, const TypeInfo *type); +array_t Array_slice(array_t *array, int64_t first, int64_t stride, int64_t length, bool readonly, const TypeInfo *type); +uint32_t Array_hash(const array_t *arr, const TypeInfo *type); +int32_t Array_compare(const array_t *x, const array_t *y, const TypeInfo *type); +bool Array_equal(const array_t *x, const array_t *y, const TypeInfo *type); +CORD Array_cord(const array_t *arr, bool colorize, const TypeInfo *type); + +// Due to some C language weirdness, the type of "foo" is inferred to be `char[3]` instead of `const char*` +// This is a hacky workaround to ensure that __typeof("foo") => `const char *` +#define FIX_STR_LITERAL(s) _Generic(((void)0, s), char*: (const char*)s, default: s) + +#define ARRAY_OF(t) t** +#define EMPTY_ARRAY(t) (t**)new(array_t) +#define LENGTH(arr) (((array_t*)(arr))->length) +#define ARRAY(x, ...) (__typeof(FIX_STR_LITERAL(x))**)new(array_t, \ + .data=memcpy(GC_MALLOC(sizeof((__typeof(FIX_STR_LITERAL(x))[]){x, __VA_ARGS__})), (__typeof(FIX_STR_LITERAL(x))[]){x, __VA_ARGS__}, \ + sizeof((__typeof(FIX_STR_LITERAL(x))[]){x, __VA_ARGS__})), \ + .length=(sizeof((__typeof(FIX_STR_LITERAL(x))[]){x, __VA_ARGS__})) / sizeof(FIX_STR_LITERAL(x)), \ + .stride=sizeof(FIX_STR_LITERAL(x))) +#define STATIC_ARRAY(x, ...) ((array_t){ \ + .data=(__typeof(FIX_STR_LITERAL(x))[]){x, __VA_ARGS__}, \ + .length=(sizeof((__typeof(FIX_STR_LITERAL(x))[]){x, __VA_ARGS__})) / sizeof(FIX_STR_LITERAL(x)), \ + .stride=sizeof(FIX_STR_LITERAL(x))}) +#define foreach(arr, var, last) for (__typeof(arr[0]) var = arr[0], last = ith_addr(arr, LENGTH(arr)-1); var && var <= last; var = ((void*)var) + ((array_t*)(arr))->stride) +#define ith_addr(arr, i) ((__typeof(arr[0]))(((array_t*)(arr))->data + (i)*((array_t*)(arr))->stride)) +#define ith(arr, i) (*ith_addr(arr,i)) +#define append(arr, obj) Array_insert((array_t*)(arr), (__typeof((arr)[0][0])[]){obj}, 0, sizeof((arr)[0][0])) +#define remove(arr, i) Array_remove((array_t*)(arr), (i)+1, 1, sizeof(arr[0][0])) + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/builtins/bool.c b/builtins/bool.c new file mode 100644 index 00000000..7614113f --- /dev/null +++ b/builtins/bool.c @@ -0,0 +1,39 @@ + +#include <gc.h> +#include <gc/cord.h> +#include <stdalign.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <ctype.h> +#include <sys/param.h> +#include <err.h> + +#include "types.h" +#include "../util.h" +#include "../SipHash/halfsiphash.h" + +extern const void *SSS_HASH_VECTOR; + +static CORD Bool_cord(const bool *b, bool colorize, const TypeInfo *type) +{ + (void)type; + if (colorize) + return *b ? "\x1b[35myes\x1b[m" : "\x1b[35mno\x1b[m"; + else + return *b ? "yes" : "no"; +} + +public struct { + TypeInfo type; +} Bool_type = { + .type={ + .name="Bool", + .size=sizeof(bool), + .align=alignof(bool), + .tag=CustomInfo, + .CustomInfo={.cord=(void*)Bool_cord}, + }, +}; + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/builtins/builtins.c b/builtins/builtins.c new file mode 100644 index 00000000..f01addfd --- /dev/null +++ b/builtins/builtins.c @@ -0,0 +1,10 @@ +#include <gc.h> +#include <string.h> +#include <stdio.h> + +#include "array.h" +#include "types.h" +#include "functions.h" +#include "table.h" + +public const char *SSS_HASH_VECTOR = "sss hash vector ----------------------------------------------"; diff --git a/builtins/char.c b/builtins/char.c new file mode 100644 index 00000000..4df519bb --- /dev/null +++ b/builtins/char.c @@ -0,0 +1,86 @@ + +#include <gc.h> +#include <gc/cord.h> +#include <stdalign.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <ctype.h> +#include <sys/param.h> +#include <err.h> + +#include "array.h" +#include "string.h" +#include "types.h" + +static CORD Char_cord(const char *c, bool colorize, const TypeInfo *type) { + (void)type; + CORD cord = 0; + switch (*c) { + case '\a': return "\\a"; + case '\b': return "\\b"; + case '\x1b': return "\\e"; + case '\f': return "\\f"; + case '\n': return "\\n"; + case '\t': return "\\t"; + case '\r': return "\\r"; + case '\v': return "\\v"; + case '\\': return "\\\\"; + case '"': return "\\\""; + default: { + if (!isprint(*c)) + CORD_sprintf(&cord, "\\x%02X", (int)*c); + else + cord = CORD_cat_char(0, *c); + } + } + if (colorize) { + if (CORD_len(cord) > 1) + CORD_sprintf(&cord, "\x1b[34m%r\x1b[m", cord); + else + CORD_sprintf(&cord, "\x1b[35m%r\x1b[m", cord); + } + return cord; +} + +// For some reason, the C functions from ctypes.h return integers instead of +// booleans, and what's worse, the integers are not limited to [0-1]. So, +// it's necessary to cast them to bools to clamp them to those values. +#define BOOLIFY(fn) public bool Char__ ## fn(char c) { return (bool)fn(c); } +BOOLIFY(isalnum) +BOOLIFY(isalpha) +BOOLIFY(iscntrl) +BOOLIFY(isdigit) +BOOLIFY(isgraph) +BOOLIFY(islower) +BOOLIFY(isprint) +BOOLIFY(ispunct) +BOOLIFY(isspace) +BOOLIFY(isupper) +BOOLIFY(isxdigit) +BOOLIFY(isascii) +BOOLIFY(isblank) + +typedef bool (*char_pred_t)(char); +typedef char (*char_map_t)(char); + +public struct { + TypeInfo type; + char_pred_t isalnum, isalpha, iscntrl, isdigit, isgraph, islower, isprint, ispunct, + isspace, isupper, isxdigit, isascii, isblank; + char_map_t tolower, toupper; +} Char_type = { + .type={ + .name="Char", + .size=sizeof(char), + .align=alignof(char), + .tag=CustomInfo, + .CustomInfo={.cord=(void*)Char_cord}, + }, + .isalnum=Char__isalnum, .isalpha=Char__isalpha, .iscntrl=Char__iscntrl, .isdigit=Char__isdigit, .isgraph=Char__isgraph, + .islower=Char__islower, .isprint=Char__isprint, .ispunct=Char__ispunct, .isspace=Char__isspace, .isupper=Char__isupper, + .isxdigit=Char__isxdigit, .isascii=Char__isascii, .isblank=Char__isblank, + .tolower=(char_map_t)(void*)tolower, .toupper=(char_map_t)(void*)toupper, +}; + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/builtins/datatypes.h b/builtins/datatypes.h new file mode 100644 index 00000000..133b273f --- /dev/null +++ b/builtins/datatypes.h @@ -0,0 +1,31 @@ +#pragma once +#include <stdint.h> +#include <stdbool.h> + +typedef struct { + void *data; + int64_t length:42; + uint8_t free:4; + bool copy_on_write:1, atomic:1; + int16_t stride:16; +} array_t; + +typedef struct { + uint32_t occupied:1, index:31; + uint32_t next_bucket; +} bucket_t; + +typedef struct { + uint32_t count:32, last_free:31; + bool copy_on_write:1; + bucket_t buckets[0]; +} bucket_info_t; + +typedef struct table_s { + array_t entries; + bucket_info_t *bucket_info; + struct table_s *fallback; + void *default_value; +} table_t; + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/builtins/floats.c b/builtins/floats.c new file mode 100644 index 00000000..a2188e68 --- /dev/null +++ b/builtins/floats.c @@ -0,0 +1,211 @@ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <gc.h> +#include <gc/cord.h> +#include <math.h> +#include <stdalign.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> + +#include "../SipHash/halfsiphash.h" +#include "array.h" +#include "string.h" +#include "types.h" + +public CORD Num__cord(const double *f, bool colorize, const TypeInfo *type) { + (void)type; + CORD c; + if (colorize) CORD_sprintf(&c, "\x1b[35m%g\x1b[33;2m\x1b[m", *f); + else CORD_sprintf(&c, "%g", *f); + return c; +} + +static int32_t Num__compare(const double *x, const double *y, const TypeInfo *type) { + (void)type; + return (*x > *y) - (*x < *y); +} + +static bool Num__equal(const double *x, const double *y, const TypeInfo *type) { + (void)type; + return *x == *y; +} + +public Str_t Num__format(double f, int64_t precision) { + int len = snprintf(NULL, 0, "%.*f", (int)precision, f); + char *str = GC_MALLOC_ATOMIC(len + 1); + snprintf(str, len+1, "%.*f", (int)precision, f); + return (Str_t){.data=str, .length=len, .stride=1}; +} + +public Str_t Num__scientific(double f, int64_t precision) { + int len = snprintf(NULL, 0, "%.*e", (int)precision, f); + char *str = GC_MALLOC_ATOMIC(len + 1); + snprintf(str, len+1, "%.*e", (int)precision, f); + return (Str_t){.data=str, .length=len, .stride=1}; +} + +public double Num__mod(double num, double modulus) { + double result = fmod(num, modulus); + return (result < 0) != (modulus < 0) ? result + modulus : result; +} + +public bool Num__isinf(double n) { return isinf(n); } +public bool Num__finite(double n) { return finite(n); } +public bool Num__isnan(double n) { return isnan(n); } + +typedef bool (*double_pred_t)(double); +typedef double (*double_unary_fn_t)(double); +typedef double (*double_binary_fn_t)(double, double); + +public struct { + TypeInfo type; + // Constants: + double NaN, _2_sqrt_pi, e, half_pi, inf, inverse_half_pi, inverse_pi, ln10, ln2, + log2e, pi, quarter_pi, sqrt2, sqrt_half, tau; + // Nullary functions: + double (*random)(void); + // Predicates: + double_pred_t finite, isinf, isnan; + // Unary functions: + double_unary_fn_t abs, acos, acosh, asin, asinh, atan, atanh, cbrt, ceil, cos, cosh, erf, erfc, + exp, exp10, exp2, expm1, floor, j0, j1, log, log10, log1p, log2, logb, + nextdown, nextup, rint, round, roundeven, significand, sin, sinh, sqrt, + tan, tanh, tgamma, trunc, y0, y1; + // Binary functions: + double_binary_fn_t atan2, copysign, dist, hypot, maxmag, minmag, mod, nextafter, pow, remainder; + // Odds and ends: + Str_t (*format)(double f, int64_t precision); + Str_t (*scientific)(double f, int64_t precision); +} Num_type = { + .type=(TypeInfo){ + .name="Num", + .size=sizeof(double), + .align=alignof(double), + .tag=CustomInfo, + .CustomInfo={ + .compare=(void*)Num__compare, + .equal=(void*)Num__equal, + .cord=(void*)Num__cord, + }, + }, + .NaN=NAN, ._2_sqrt_pi=M_2_SQRTPI, .e=M_E, .half_pi=M_PI_2, .inf=1./0., .inverse_half_pi=M_2_PI, + .inverse_pi=M_1_PI, .ln10=M_LN10, .ln2=M_LN2, .log2e=M_LOG2E, .pi=M_PI, .quarter_pi=M_PI_4, + .sqrt2=M_SQRT2, .sqrt_half=M_SQRT1_2, .tau=2.*M_PI, + .random=drand48, + .finite=Num__finite, + .isinf=Num__isinf, + .isnan=Num__isnan, + .atan2=atan2, .copysign=copysign, .dist=fdim, .hypot=hypot, .maxmag=fmaxmag, .minmag=fminmag, + .mod=Num__mod, .nextafter=nextafter, .pow=pow, .remainder=remainder, + .abs=fabs, .acos=acos, .acosh=acosh, .asin=asin, .asinh=asinh, .atan=atan, .atanh=atanh, + .cbrt=cbrt, .ceil=ceil, .cos=cos, .cosh=cosh, .erf=erf, .erfc=erfc, .exp=exp, + .exp10=exp10, .exp2=exp2, .expm1=expm1, .floor=floor, .j0=j0, .j1=j1, .log=log, + .log10=log10, .log1p=log1p, .log2=log2, .logb=logb, .nextdown=nextdown, .nextup=nextup, + .rint=rint, .round=round, .roundeven=roundeven, .significand=significand, .sin=sin, + .sinh=sinh, .sqrt=sqrt, .tan=tan, .tanh=tanh, .tgamma=tgamma, .trunc=trunc, .y0=y0, .y1=y1, + .format=Num__format, + .scientific=Num__scientific, +}; + +public CORD Num32__cord(float *f, bool colorize, const TypeInfo *type) { + (void)type; + CORD c; + if (colorize) CORD_sprintf(&c, "\x1b[35m%g_f32\x1b[m", *f); + else CORD_sprintf(&c, "%g_f32", *f); + return c; +} + +static int32_t Num32__compare(const float *x, const float *y, const TypeInfo *type) { + (void)type; + return (*x > *y) - (*x < *y); +} + +static bool Num32__equal(const float *x, const float *y, const TypeInfo *type) { + (void)type; + return *x == *y; +} + +public Str_t Num32__format(float f, int64_t precision) { + int len = snprintf(NULL, 0, "%.*f", (int)precision, f); + char *str = GC_MALLOC_ATOMIC(len + 1); + snprintf(str, len+1, "%.*f", (int)precision, f); + return (Str_t){.data=str, .length=len, .stride=1}; +} + +public Str_t Num32__scientific(float f, int64_t precision) { + int len = snprintf(NULL, 0, "%.*e", (int)precision, f); + char *str = GC_MALLOC_ATOMIC(len + 1); + snprintf(str, len+1, "%.*e", (int)precision, f); + return (Str_t){.data=str, .length=len, .stride=1}; +} + +public float Num32__mod(float num, float modulus) { + float result = fmodf(num, modulus); + return (result < 0) != (modulus < 0) ? result + modulus : result; +} + +public float Num32__random(void) { + return (float)drand48(); +} + +public bool Num32__isinf(float n) { return isinf(n); } +public bool Num32__finite(float n) { return finite(n); } +public bool Num32__isnan(float n) { return isnan(n); } + +typedef bool (*float_pred_t)(float); +typedef float (*float_unary_fn_t)(float); +typedef float (*float_binary_fn_t)(float, float); + +public struct { + TypeInfo type; + // Alphabetized: + float NaN, _2_sqrt_pi, e, half_pi, inf, inverse_half_pi, inverse_pi, ln10, ln2, + log2e, pi, quarter_pi, sqrt2, sqrt_half, tau; + // Nullary functions: + float (*random)(void); + // Predicates: + float_pred_t finite, isinf, isnan; + // Unary functions: + float_unary_fn_t abs, acos, acosh, asin, asinh, atan, atanh, cbrt, ceil, cos, cosh, erf, erfc, + exp, exp10, exp2, expm1, floor, j0, j1, log, log10, log1p, log2, logb, + nextdown, nextup, rint, round, roundeven, significand, sin, sinh, sqrt, + tan, tanh, tgamma, trunc, y0, y1; + // Binary functions: + float_binary_fn_t atan2, copysign, dist, hypot, maxmag, minmag, mod, nextafter, pow, remainder; + // Odds and ends: + Str_t (*format)(float f, int64_t precision); + Str_t (*scientific)(float f, int64_t precision); +} Num32_type = { + .type=(TypeInfo){ + .name="Num32", + .size=sizeof(float), + .align=alignof(float), + .tag=CustomInfo, + .CustomInfo={ + .compare=(void*)Num32__compare, + .equal=(void*)Num32__equal, + .cord=(void*)Num32__cord, + }, + }, + .NaN=NAN, ._2_sqrt_pi=M_2_SQRTPI, .e=M_E, .half_pi=M_PI_2, .inf=1./0., .inverse_half_pi=M_2_PI, + .inverse_pi=M_1_PI, .ln10=M_LN10, .ln2=M_LN2, .log2e=M_LOG2E, .pi=M_PI, .quarter_pi=M_PI_4, + .sqrt2=M_SQRT2, .sqrt_half=M_SQRT1_2, .tau=2.*M_PI, + .random=Num32__random, + .finite=Num32__finite, + .isinf=Num32__isinf, + .isnan=Num32__isnan, + .atan2=atan2f, .copysign=copysignf, .dist=fdimf, .hypot=hypotf, .maxmag=fmaxmagf, .minmag=fminmagf, + .mod=Num32__mod, .nextafter=nextafterf, .pow=powf, .remainder=remainderf, + .abs=fabsf, .acos=acosf, .acosh=acoshf, .asin=asinf, .asinh=asinhf, .atan=atanf, .atanh=atanhf, + .cbrt=cbrtf, .ceil=ceilf, .cos=cosf, .cosh=coshf, .erf=erff, .erfc=erfcf, .exp=expf, + .exp10=exp10f, .exp2=exp2f, .expm1=expm1f, .floor=floorf, .j0=j0f, .j1=j1f, .log=logf, + .log10=log10f, .log1p=log1pf, .log2=log2f, .logb=logbf, .nextdown=nextdownf, .nextup=nextupf, + .rint=rintf, .round=roundf, .roundeven=roundevenf, .significand=significandf, .sin=sinf, + .sinh=sinhf, .sqrt=sqrtf, .tan=tanf, .tanh=tanhf, .tgamma=tgammaf, .trunc=truncf, .y0=y0f, .y1=y1f, + .format=Num32__format, + .scientific=Num32__scientific, +}; + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/builtins/functions.c b/builtins/functions.c new file mode 100644 index 00000000..1e3e1066 --- /dev/null +++ b/builtins/functions.c @@ -0,0 +1,83 @@ +#include <gc.h> +#include <gc/cord.h> +#include <stdbool.h> +#include <stdint.h> +#include <errno.h> +#include <stdlib.h> +#include <sys/param.h> + +#include "../SipHash/halfsiphash.h" +#include "../files.h" +#include "../util.h" +#include "functions.h" +#include "string.h" + +void fail(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + raise(SIGABRT); +} + +Str_t builtin_last_err() +{ + const char *str = strerror(errno); + char *copy = GC_MALLOC_ATOMIC(strlen(str)+1); + strcpy(copy, str); + return (Str_t){.data=copy, .length=strlen(str), .stride=1}; +} + +static inline char *without_colors(const char *str) +{ + // Strip out color escape sequences: "\x1b[" ... "m" + size_t fmt_len = strlen(str); + char *buf = GC_malloc_atomic(fmt_len+1); + char *dest = buf; + for (const char *src = str; *src; ++src) { + if (src[0] == '\x1b' && src[1] == '[') { + src += 2; + while (*src && *src != 'm') + ++src; + } else { + *(dest++) = *src; + } + } + *dest = '\0'; + return buf; +} + +void __doctest(const char *label, CORD expr, const char *type, bool use_color, const char *expected, const char *filename, int start, int end) +{ + static sss_file_t *file = NULL; + if (filename && (file == NULL || strcmp(file->filename, filename) != 0)) + file = sss_load_file(filename); + + if (filename && file) + CORD_fprintf(stderr, use_color ? "\x1b[33;1m>>> \x1b[0m%.*s\x1b[m\n" : ">>> %.*s\n", (end - start), file->text + start); + + if (expr) { + const char *expr_str = CORD_to_const_char_star(expr); + if (!use_color) + expr_str = without_colors(expr_str); + + CORD_fprintf(stderr, use_color ? "\x1b[2m%s\x1b[0m %s \x1b[2m: %s\x1b[m\n" : "%s %s : %s\n", label, expr_str, type); + if (expected) { + const char *actual = use_color ? without_colors(expr_str) : expr_str; + bool success = (strcmp(actual, expected) == 0); + if (!success && strchr(expected, ':')) { + actual = heap_strf("%s : %s", actual, type); + success = (strcmp(actual, expected) == 0); + } + + if (!success) { + if (filename && file) + fprint_span(stderr, file, file->text+start, file->text+end, "\x1b[31;1m", 2, use_color); + builtin_fail(use_color ? "\x1b[31;1mExpected: \x1b[32;7m%s\x1b[0m\n\x1b[31;1m But got: \x1b[31;7m%s\x1b[0m\n" : "Expected: %s\n But got: %s\n", expected, actual); + } + } + } +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/builtins/functions.h b/builtins/functions.h new file mode 100644 index 00000000..c79f7fba --- /dev/null +++ b/builtins/functions.h @@ -0,0 +1,15 @@ +#pragma once + +#include <gc/cord.h> +#include <stdbool.h> +#include <stdint.h> + +#include "string.h" + +void builtin_say(Str_t str, Str_t end); +void builtin_fail(const char *fmt, ...); +void builtin_fail_array(Str_t fmt, ...); +Str_t builtin_last_err(); +void builtin_doctest(const char *label, CORD expr, const char *type, bool use_color, const char *expected, const char *filename, int start, int end); + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/builtins/integers.c b/builtins/integers.c new file mode 100644 index 00000000..f221d104 --- /dev/null +++ b/builtins/integers.c @@ -0,0 +1,89 @@ +#include <gc.h> +#include <gc/cord.h> +#include <stdalign.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> + +#include "../SipHash/halfsiphash.h" +#include "array.h" +#include "types.h" +#include "string.h" + +extern const void *SSS_HASH_VECTOR; + +#define xstr(a) str(a) +#define str(a) #a + +#define DEFINE_INT_TYPE(c_type, KindOfInt, fmt, abs_fn, min_val, max_val)\ + public CORD KindOfInt ## __cord(const c_type *i, bool colorize, const TypeInfo *type) { \ + (void)type; \ + CORD c; \ + if (colorize) CORD_sprintf(&c, "\x1b[35m%"fmt"\x1b[33;2m\x1b[m", *i); \ + else CORD_sprintf(&c, "%"fmt, *i); \ + return c; \ + } \ + public int32_t KindOfInt ## __compare(const c_type *x, const c_type *y, const TypeInfo *type) { \ + (void)type; \ + return (*x > *y) - (*x < *y); \ + } \ + public Str_t KindOfInt ## __format(c_type i, int64_t digits) { \ + int len = snprintf(NULL, 0, "%0*" fmt, (int)digits, i); \ + char *str = GC_MALLOC_ATOMIC(len + 1); \ + snprintf(str, len+1, "%0*" fmt, (int)digits, i); \ + return (Str_t){.data=str, .length=len, .stride=1}; \ + } \ + public Str_t KindOfInt ## __hex(c_type i, int64_t digits, bool uppercase, bool prefix) { \ + const char *hex_fmt = uppercase ? (prefix ? "0x%0.*lX" : "%0.*lX") : (prefix ? "0x%0.*lx" : "%0.*lx"); \ + int len = snprintf(NULL, 0, hex_fmt, (int)digits, (uint64_t)i); \ + char *str = GC_MALLOC_ATOMIC(len + 1); \ + snprintf(str, len+1, hex_fmt, (int)digits, (uint64_t)i); \ + return (Str_t){.data=str, .length=len, .stride=1}; \ + } \ + public Str_t KindOfInt ## __octal(c_type i, int64_t digits, bool prefix) { \ + const char *octal_fmt = prefix ? "0o%0.*lo" : "%0.*lo"; \ + int len = snprintf(NULL, 0, octal_fmt, (int)digits, (uint64_t)i); \ + char *str = GC_MALLOC_ATOMIC(len + 1); \ + snprintf(str, len+1, octal_fmt, (int)digits, (uint64_t)i); \ + return (Str_t){.data=str, .length=len, .stride=1}; \ + } \ + public c_type KindOfInt ## __random(int64_t min, int64_t max) { \ + if (min > max) builtin_fail("Random min (%ld) is larger than max (%ld)", min, max); \ + if (min < (int64_t)min_val) builtin_fail("Random min (%ld) is smaller than the minimum "#KindOfInt" value", min); \ + if (max > (int64_t)max_val) builtin_fail("Random max (%ld) is smaller than the maximum "#KindOfInt" value", max); \ + int64_t range = max - min; \ + if (range > UINT32_MAX) builtin_fail("Random range (%ld) is larger than the maximum allowed (%ld)", range, UINT32_MAX); \ + uint32_t r = arc4random_uniform((uint32_t)range); \ + return min + (c_type)r; \ + } \ + public struct { \ + TypeInfo type; \ + c_type min, max; \ + c_type (*abs)(c_type i); \ + Str_t (*format)(c_type i, int64_t digits); \ + Str_t (*hex)(c_type i, int64_t digits, bool uppercase, bool prefix); \ + Str_t (*octal)(c_type i, int64_t digits, bool prefix); \ + c_type (*random)(int64_t min, int64_t max); \ + } KindOfInt##_type = { \ + .type={ \ + .name=#KindOfInt, \ + .size=sizeof(c_type), \ + .align=alignof(c_type), \ + .tag=CustomInfo, \ + .CustomInfo={.compare=(void*)KindOfInt##__compare, .cord=(void*)KindOfInt##__cord}, \ + }, \ + .min=min_val, \ + .max=max_val, \ + .abs=(void*)abs_fn, \ + .format=KindOfInt##__format, \ + .hex=KindOfInt##__hex, \ + .octal=KindOfInt##__octal, \ + .random=KindOfInt##__random, \ + }; + +DEFINE_INT_TYPE(int64_t, Int, "ld", labs, INT64_MIN, INT64_MAX); +DEFINE_INT_TYPE(int32_t, Int32, "d_i32", abs, INT32_MIN, INT32_MAX); +DEFINE_INT_TYPE(int16_t, Int16, "d_i16", abs, INT16_MIN, INT16_MAX); +DEFINE_INT_TYPE(int8_t, Int8, "d_i8", abs, INT8_MIN, INT8_MAX); + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/builtins/memory.c b/builtins/memory.c new file mode 100644 index 00000000..4db6ca39 --- /dev/null +++ b/builtins/memory.c @@ -0,0 +1,33 @@ + +#include <gc.h> +#include <gc/cord.h> +#include <stdalign.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <ctype.h> +#include <sys/param.h> +#include <err.h> + +#include "types.h" +#include "../util.h" +#include "../SipHash/halfsiphash.h" + +extern const void *SSS_HASH_VECTOR; + +public CORD Memory__cord(const void *p, bool colorize, const TypeInfo *type) { + (void)type; + CORD cord; + CORD_sprintf(&cord, colorize ? "\x1b[0;34;1mMemory<%p>\x1b[m" : "Memory<%p>", p); + return cord; +} + +public TypeInfo Memory_type = { + .name="Memory", + .size=0, + .align=0, + .tag=CustomInfo, + .CustomInfo={.cord=(void*)Memory__cord}, +}; + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/builtins/string.c b/builtins/string.c new file mode 100644 index 00000000..92741903 --- /dev/null +++ b/builtins/string.c @@ -0,0 +1,597 @@ +#include <assert.h> +#include <gc.h> +#include <gc/cord.h> +#include <stdalign.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <ctype.h> +#include <sys/param.h> +#include <err.h> + +#include "../SipHash/halfsiphash.h" +#include "types.h" +#include "array.h" +#include "string.h" + +#define CLAMP(x, lo, hi) MIN(hi, MAX(x,lo)) + +extern const void *SSS_HASH_VECTOR; + +typedef struct { + uint8_t success; + int32_t index; +} find_result_t; + +static Str_t compacted(const Str_t str) +{ + if (str.stride == 1) return str; + char *buf = GC_MALLOC_ATOMIC(str.length + 1); + for (int64_t i = 0; i < str.length; i++) + buf[i] = str.data[i*str.stride]; + return (Str_t){.data=buf, .length=str.length, .stride=1}; +} + +public CORD Str__cord(const Str_t *s, bool colorize, const TypeInfo *type) +{ + (void)type; + char *data = s->data; + // Note: it's important to have unicode strings not get broken up with + // escapes, otherwise they won't print right. + if (colorize) { + CORD cord = "\x1b[35m\""; + for (int32_t i = 0; i < s->length; i++) { + char c = data[i*s->stride]; + switch (c) { +#define BACKSLASHED(esc) "\x1b[34m\\\x1b[1m" esc "\x1b[0;35m" + case '\a': cord = CORD_cat(cord, BACKSLASHED("a")); break; + case '\b': cord = CORD_cat(cord, BACKSLASHED("b")); break; + case '\x1b': cord = CORD_cat(cord, BACKSLASHED("e")); break; + case '\f': cord = CORD_cat(cord, BACKSLASHED("f")); break; + case '\n': cord = CORD_cat(cord, BACKSLASHED("n")); break; + case '\r': cord = CORD_cat(cord, BACKSLASHED("r")); break; + case '\t': cord = CORD_cat(cord, BACKSLASHED("t")); break; + case '\v': cord = CORD_cat(cord, BACKSLASHED("v")); break; + case '"': cord = CORD_cat(cord, BACKSLASHED("\"")); break; + case '\\': cord = CORD_cat(cord, BACKSLASHED("\\")); break; + case '\x00' ... '\x06': case '\x0E' ... '\x1A': + case '\x1C' ... '\x1F': case '\x7F' ... '\x7F': + CORD_sprintf(&cord, "%r" BACKSLASHED("x%02X"), cord, c); + break; + default: cord = CORD_cat_char(cord, c); break; +#undef BACKSLASHED + } + } + cord = CORD_cat(cord, "\"\x1b[m"); + if (strcmp(type->name, "Str") != 0) + CORD_sprintf(&cord, "\x1b[0;1m%s::\x1b[m%r", type->name, cord); + return cord; + } else { + CORD cord = "\""; + for (int32_t i = 0; i < s->length; i++) { + char c = data[i*s->stride]; + switch (c) { + case '\a': cord = CORD_cat(cord, "\\a"); break; + case '\b': cord = CORD_cat(cord, "\\b"); break; + case '\x1b': cord = CORD_cat(cord, "\\e"); break; + case '\f': cord = CORD_cat(cord, "\\f"); break; + case '\n': cord = CORD_cat(cord, "\\n"); break; + case '\r': cord = CORD_cat(cord, "\\r"); break; + case '\t': cord = CORD_cat(cord, "\\t"); break; + case '\v': cord = CORD_cat(cord, "\\v"); break; + case '"': cord = CORD_cat(cord, "\\\""); break; + case '\\': cord = CORD_cat(cord, "\\\\"); break; + case '\x00' ... '\x06': case '\x0E' ... '\x1A': + case '\x1C' ... '\x1F': case '\x7F' ... '\x7F': + CORD_sprintf(&cord, "%r\\x%02X", cord, c); + break; + default: cord = CORD_cat_char(cord, c); break; + } + } + cord = CORD_cat_char(cord, '"'); + if (strcmp(type->name, "Str") != 0) + CORD_sprintf(&cord, "%s::%r", type->name, cord); + return cord; + } +} + +public int32_t Str__compare(const Str_t *x, const Str_t *y) +{ + int64_t length = x->length < y->length ? x->length : y->length; + for (int64_t i = 0; i < length; i++) { + char xc = x->data[i*x->stride], yc = y->data[i*y->stride]; + if (xc != yc) + return (xc > yc) ? 1 : -1; + } + return (x->length > y->length) - (x->length < y->length); +} + +public bool Str__equal(const Str_t *x, const Str_t *y) +{ + return (Str__compare(x, y) == 0); +} + +public int Str__hash(const Str_t *s, const TypeInfo *type) +{ + (void)type; + if (s->length == 0 || !s->data) return 0; + + const char *data; + if (s->stride == 1) { + data = s->data; + } else { + char *buf = alloca(s->length); + for (int64_t i = 0; i < s->length; i++) + buf[i] = s->data[i*s->stride]; + data = buf; + } + + uint32_t hash; + halfsiphash(data, s->length, SSS_HASH_VECTOR, (uint8_t*)&hash, sizeof(hash)); + return hash; +} + +public Str_t Str__uppercased(const Str_t s) +{ + char *s2 = GC_MALLOC_ATOMIC(s.length + 1); + for (int64_t i = 0; i < s.length; i++) + s2[i] = toupper(s.data[i*s.stride]); + return (Str_t){.data=s2, .length=s.length, .stride=1}; +} + +public Str_t Str__lowercased(const Str_t s) +{ + char *s2 = GC_MALLOC_ATOMIC(s.length + 1); + for (int64_t i = 0; i < s.length; i++) + s2[i] = tolower(s.data[i*s.stride]); + return (Str_t){.data=s2, .length=s.length, .stride=1}; +} + +public Str_t Str__capitalized(const Str_t s) +{ + char *s2 = GC_MALLOC_ATOMIC(s.length + 1); + int64_t i; + for (i = 0; i < s.length; i++) { + if (isalpha(s.data[i*s.stride])) { + s2[i] = toupper(s.data[i*s.stride]); + ++i; + break; + } else { + s2[i] = s.data[i*s.stride]; + } + } + for (; i < s.length; i++) + s2[i] = s.data[i*s.stride]; + return (Str_t){.data=s2, .length=s.length, .stride=1}; +} + +public Str_t Str__titlecased(const Str_t s) +{ + char *s2 = GC_MALLOC_ATOMIC(s.length + 1); + bool should_uppercase = true; + for (int64_t i = 0; i < s.length; i++) { + if (isalpha(s.data[i*s.stride])) { + if (should_uppercase) { + s2[i] = toupper(s.data[i*s.stride]); + should_uppercase = false; + } else { + s2[i] = tolower(s.data[i*s.stride]); + } + } else { + should_uppercase = true; + s2[i] = s.data[i*s.stride]; + } + } + return (Str_t){.data=s2, .length=s.length, .stride=1}; +} + +public bool Str__starts_with(const Str_t s, const Str_t prefix) +{ + if (s.length < prefix.length) return false; + for (int64_t i = 0; i < prefix.length; i++) { + if (s.data[i*s.stride] != prefix.data[i*prefix.stride]) + return false; + } + return true; +} + +public bool Str__ends_with(const Str_t s, const Str_t suffix) +{ + if (s.length < suffix.length) return false; + for (int64_t i = 0; i < suffix.length; i++) { + if (s.data[(s.length-suffix.length+i)*s.stride] != suffix.data[i*suffix.stride]) + return false; + } + return true; +} + +public Str_t Str__without_prefix(const Str_t s, const Str_t prefix) +{ + if (s.length < prefix.length) return s; + for (int64_t i = 0; i < prefix.length; i++) { + if (s.data[i*s.stride] != prefix.data[i*prefix.stride]) + return s; + } + return (Str_t){ + .data=s.data + prefix.length*s.stride, + .length=s.length - prefix.length, + .stride=s.stride, + .cow=0, .atomic=1, + }; +} + +public Str_t Str__without_suffix(const Str_t s, const Str_t suffix) +{ + if (s.length < suffix.length) return s; + for (int64_t i = 0; i < suffix.length; i++) { + if (s.data[(s.length - suffix.length + i)*s.stride] != suffix.data[i*suffix.stride]) + return s; + } + return (Str_t){ + .data=s.data, + .length=s.length - suffix.length, + .stride=s.stride, + .cow=0, .atomic=1, + }; +} + +public Str_t Str__trimmed(const Str_t s, const Str_t trim_chars, bool trim_left, bool trim_right) +{ + int64_t length = s.length; + int64_t start = 0; + if (trim_left) { + for (; start < s.length; start++) { + for (int64_t t = 0; t < trim_chars.length; t++) { + if (s.data[start*s.stride] == trim_chars.data[t*trim_chars.stride]) + goto found_ltrim; + } + goto done_trimming_left; + found_ltrim: + --length; + } + } + done_trimming_left:; + if (trim_right) { + while (length > 0) { + for (int64_t t = 0; t < trim_chars.length; t++) { + if (s.data[(start+length-1)*s.stride] == trim_chars.data[t*trim_chars.stride]) + goto found_rtrim; + } + goto done_trimming_right; + found_rtrim: + --length; + } + } + done_trimming_right:; + return (Str_t){.data=s.data+start*s.stride, .length=length, .stride=s.stride}; +} + +public const char *Str__c_string(const Str_t str) +{ + if (str.length == 0) + return ""; + else if (str.stride == 1 + // Verify that the '\0' would be on the same page, so unlikely to segfault: + && (((uint64_t)str.data + str.length + 1) & 0xFFF) != 0 + // Check for nul-termination + && str.data[str.length+1] == '\0') + return str.data; + + char *buf = GC_MALLOC_ATOMIC(str.length + 1); + for (int64_t i = 0; i < str.length; i++) + buf[i] = str.data[i*str.stride]; + buf[str.length] = '\0'; + return buf; +} + +public Str_t Str__from_c_string(const char *str) +{ + size_t length = str ? strlen(str) : 0; + if (length == 0) return (Str_t){.length=0, .stride=0}; + char *buf = GC_MALLOC_ATOMIC(length + 1); + memcpy(buf, str, length+1); + buf[length+1] = '\0'; + return (Str_t){.data=buf, .length=(int64_t)length, .stride=1}; +} + +public find_result_t Str__find(const Str_t str, const Str_t pat) +{ + if (str.length < pat.length) return (find_result_t){.success=0}; + if (pat.length == 0) return (find_result_t){.success=1, .index=1}; + + // For short strings, do naive approach: + // if (str.length*pat.length < UCHAR_MAX) { + for (int64_t s = 0; s < str.length; s++) { + for (int64_t p = 0; p < pat.length; p++) { + if (str.data[s*str.stride] != pat.data[p*pat.stride]) + goto not_a_match; + } + return (find_result_t){.success=1, .index=s+1}; + not_a_match:; + } + return (find_result_t){.success=0}; + // } + + // // Boyer-Moore algorithm: + // static int skip[UCHAR_MAX]; + // for (int32_t i = 0; i <= UCHAR_MAX; ++i) + // skip[i] = str.length; + // for (int32_t i = 0; i < pat.length; ++i) + // skip[(unsigned char)pat.data[i*pat.stride]] = pat.length - i - 1; + // char lastpatchar = pat.data[(pat.length - 1)*pat.stride]; + // int32_t min_skip = pat.length; + // for (int32_t p = 0; p < pat.length - 1; ++p) { + // if (pat.data[p*pat.stride] == lastpatchar) + // min_skip = pat.length - p - 1; + // } + + // for (int32_t i = pat.length - 1; i < str.length; ) { + // // Use skip table: + // int32_t can_skip = skip[(unsigned char)str.data[i*str.stride]]; + // if (can_skip != 0) { + // i += can_skip; + // continue; + // } + // // Check for exact match: + // for (int32_t j = 0; j < pat.length; j++) { + // if (str.data[(i-pat.length+j)*str.stride] != pat.data[j*pat.stride]) { + // // Mismatch: + // i += min_skip; + // goto keep_going; + // } + // } + // return i - pat.length + 1; + // keep_going: continue; + // } + // return 0; +} + +public Str_t Str__replace(Str_t text, Str_t pat, Str_t replacement, int64_t limit) +{ + text = compacted(text); + pat = compacted(pat); + replacement = compacted(replacement); + char *buf; + size_t size; + FILE *mem = open_memstream(&buf, &size); + for (const char *pos = text.data; ; --limit) { + const char *match = limit == 0 ? NULL : memmem(pos, (int64_t)text.length - (int64_t)(pos - text.data), pat.data, pat.length); + if (match) { + fwrite(pos, 1, (size_t)(match - pos), mem); + fwrite(replacement.data, 1, replacement.length, mem); + pos = match + pat.length; + } else { + fwrite(pos, 1, (size_t)text.length - (size_t)(pos - text.data), mem); + break; + } + } + fflush(mem); + char *str = GC_MALLOC_ATOMIC(size + 1); + memcpy(str, buf, size+1); + fclose(mem); + free(buf); + return (Str_t){.data=str, .length=size, .stride=1}; +} + +public Str_t Str__quoted(const Str_t text, const char *dsl, bool colorize) +{ + char *buf; + size_t size; + FILE *mem = open_memstream(&buf, &size); + if (colorize) fputs("\x1b[35m", mem); + if (dsl && dsl[0]) fprintf(mem, "$%s", dsl); + const char *escape_color = colorize ? "\x1b[1;34m" : ""; + const char *reset_color = colorize ? "\x1b[0;35m" : ""; + fputc('"', mem); + for (int64_t i = 0; i < text.length; i++) { + char c = text.data[i*text.stride]; + switch (c) { + case '\\': fprintf(mem, "%s\\\\%s", escape_color, reset_color); break; + case '"': fprintf(mem, "%s\\\"%s", escape_color, reset_color); break; + case '\n': fprintf(mem, "%s\\n%s", escape_color, reset_color); break; + case '\t': fprintf(mem, "%s\\t%s", escape_color, reset_color); break; + case '\r': fprintf(mem, "%s\\r%s", escape_color, reset_color); break; + case '\a': fprintf(mem, "%s\\a%s", escape_color, reset_color); break; + case '\b': fprintf(mem, "%s\\b%s", escape_color, reset_color); break; + case '\v': fprintf(mem, "%s\\v%s", escape_color, reset_color); break; + default: { + if (isprint(c)) + fputc(c, mem); + else + fprintf(mem, "%s\\x%02X%s", escape_color, (int)c, reset_color); + } + } + } + fputc('"', mem); + if (colorize) fputs("\x1b[m", mem); + fflush(mem); + char *str = GC_MALLOC_ATOMIC(size + 1); + memcpy(str, buf, size+1); + fclose(mem); + free(buf); + return (Str_t){.data=str, .length=size, .stride=1}; +} + +public Str_Array_t Str__split(const Str_t str, const Str_t split_chars) +{ + if (str.length == 0) return (Str_Array_t){.stride=sizeof(Str_t)}; + Str_Array_t strings = {.stride=sizeof(Str_t)}; + int64_t capacity = 0; + bool separators[256] = {0}; + for (int64_t i = 0; i < split_chars.length; i++) + separators[(int)split_chars.data[split_chars.stride*i]] = true; + + for (int64_t i = 0; i < str.length; i++) { + if (separators[(int)str.data[str.stride*i]]) continue; + int64_t length = 0; + while (i < str.length && !separators[(int)str.data[str.stride*i]]) { + ++length; + ++i; + } + strings.data = GC_REALLOC(strings.data, sizeof(Str_t)*(capacity += 1)); + strings.data[strings.length++] = (Str_t){ + .data=&str.data[str.stride*(i-length)], + .length=length, + .stride=str.stride, + }; + } + return strings; +} + +public Str_t Str__join(Str_t glue, Str_Array_t pieces) +{ + if (pieces.length == 0) return (Str_t){.stride=1}; + + int64_t length = 0; + for (int64_t i = 0; i < pieces.length; i++) { + if (i > 0) length += glue.length; + length += ((Str_t*)((void*)pieces.data + i*pieces.stride))->length; + } + char *data = GC_MALLOC_ATOMIC((size_t)length+1); + char *ptr = data; + for (int64_t i = 0; i < pieces.length; i++) { + if (i > 0) { + for (int64_t j = 0; j < glue.length; j++) + *(ptr++) = glue.data[j*glue.stride]; + } + Str_t piece = *(Str_t*)((void*)pieces.data + i*pieces.stride); + for (int64_t j = 0; j < piece.length; j++) + *(ptr++) = piece.data[j*piece.stride]; + } + return (Str_t){.data=data, .length=length, .stride=1}; +} + +public struct { + TypeInfo type; + Str_t (*uppercased)(const Str_t s); + Str_t (*lowercased)(const Str_t s); + Str_t (*capitalized)(const Str_t s); + Str_t (*titlecased)(const Str_t s); + bool (*starts_with)(const Str_t s, const Str_t prefix); + bool (*ends_with)(const Str_t s, const Str_t suffix); + Str_t (*without_prefix)(const Str_t s, const Str_t prefix); + Str_t (*without_suffix)(const Str_t s, const Str_t suffix); + Str_t (*trimmed)(const Str_t s, const Str_t trim_chars, bool trim_left, bool trim_right); + Str_t (*slice)(Str_t *s, int64_t first, int64_t stride, int64_t length, bool readonly, const TypeInfo *type); + const char *(*c_string)(const Str_t str); + Str_t (*from_c_string)(const char *str); + find_result_t (*find)(const Str_t str, const Str_t pat); + Str_t (*replace)(Str_t text, Str_t pat, Str_t replacement, int64_t limit); + Str_t (*quoted)(const Str_t text, const char *dsl, bool colorize); + Str_Array_t (*split)(const Str_t str, const Str_t split_chars); + Str_t (*join)(Str_t glue, Str_Array_t pieces); + bool (*equal)(const Str_t *x, const Str_t *y); + int32_t (*compare)(const Str_t *x, const Str_t *y); + int (*hash)(const Str_t *s, const TypeInfo *type); + CORD (*cord)(const Str_t *s, bool colorize, const TypeInfo *type); +} Str_type = { + .type={ + .name="Str", + .size=sizeof(Str_t), + .align=alignof(Str_t), + .tag=CustomInfo, + .CustomInfo={ + .cord=(void*)Str__cord, + .compare=(void*)Str__compare, + .equal=(void*)Str__equal, + .hash=(void*)Str__hash, + }, + }, + .uppercased=Str__uppercased, + .lowercased=Str__lowercased, + .capitalized=Str__capitalized, + .titlecased=Str__titlecased, + .starts_with=Str__starts_with, + .ends_with=Str__ends_with, + .without_prefix=Str__without_prefix, + .without_suffix=Str__without_suffix, + .trimmed=Str__trimmed, + .slice=(void*)Array_slice, + .c_string=Str__c_string, + .from_c_string=Str__from_c_string, + .find=Str__find, + .replace=Str__replace, + .quoted=Str__quoted, + .split=Str__split, + .join=Str__join, +}; + +public CORD CString_cord(const char **s, bool colorize, const TypeInfo *type) +{ + Str_t str = {.data=(char*)*s, .length=*s ? strlen(*s) : 0, .stride=1}; + return Str__cord(&str, colorize, type); +} + +public uint32_t CString_hash(const char **s, const TypeInfo *type) +{ + (void)type; + if (!*s) return 0; + uint32_t hash; + halfsiphash(*s, strlen(*s)+1, SSS_HASH_VECTOR, (uint8_t*)&hash, sizeof(hash)); + assert(strlen(*s) > 0); + return hash; +} + +public uint32_t CString_compare(const char **x, const char **y, const TypeInfo *type) +{ + (void)type; + if (!*x || !*y) + return (!*x) - (!*y); + return strcmp(*x, *y); +} + +public struct { + TypeInfo type; + const char *(*from_str)(const Str_t str); + Str_t (*as_str)(const char *str); +} CString_type = { + .type={ + .name="CString", + .size=sizeof(char*), + .align=alignof(char*), + .tag=CustomInfo, + .CustomInfo={ + .cord=(void*)CString_cord, + .hash=(void*)CString_hash, + .compare=(void*)CString_compare, + }, + }, + .from_str=Str__c_string, + .as_str=Str__from_c_string, +}; + +static CORD Cord_cord(const CORD *c, bool colorize, const TypeInfo *type) +{ + const char *str = CORD_to_const_char_star(*c); + return CString_cord(&str, colorize, type); +} + +static uint32_t Cord_hash(const CORD *c, const TypeInfo *type) +{ + const char *str = CORD_to_const_char_star(*c); + return CString_hash(&str, type); +} + +static uint32_t Cord_compare(const char **x, const char **y, const TypeInfo *type) +{ + (void)type; + return CORD_cmp(*x, *y); +} + +public struct { + TypeInfo type; +} Cord_type = { + .type={ + .name="Cord", + .size=sizeof(CORD), + .align=alignof(CORD), + .tag=CustomInfo, + .CustomInfo={ + .cord=(void*)Cord_cord, + .hash=(void*)Cord_hash, + .compare=(void*)Cord_compare, + }, + }, +}; + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/builtins/string.h b/builtins/string.h new file mode 100644 index 00000000..1d6a91a4 --- /dev/null +++ b/builtins/string.h @@ -0,0 +1,23 @@ +#pragma once +#include <gc/cord.h> +#include <stdbool.h> +#include <stdint.h> + +typedef struct { + char *data; + int64_t length:42; + uint8_t free:4, cow:1, atomic:1; + int16_t stride:16; +} Str_t; + +#define STRING(s) ((Str_t){.data=s, .length=(int32_t)(sizeof(s)), .stride=1, .free=0}) + +typedef struct { + Str_t *data; + unsigned long int length:42; + unsigned short int free:4, cow:1, atomic:1; + short int stride:16; +} Str_Array_t; + + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/builtins/table.c b/builtins/table.c new file mode 100644 index 00000000..7ba33b1c --- /dev/null +++ b/builtins/table.c @@ -0,0 +1,558 @@ + +// table.c - C Hash table implementation for SSS +// Copyright 2023 Bruce Hill +// Provided under the MIT license with the Commons Clause +// See included LICENSE for details. + +// Hash table (aka Dictionary) Implementation +// Hash keys and values are stored *by value* +// The hash insertion/lookup implementation is based on Lua's tables, +// which use a chained scatter with Brent's variation. + +#include <assert.h> +#include <gc.h> +#include <stdalign.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/param.h> + +#include "../SipHash/halfsiphash.h" +#include "../util.h" +#include "array.h" +#include "table.h" +#include "types.h" + +// #define DEBUG_TABLES + +#ifdef DEBUG_TABLES +#define hdebug(fmt, ...) printf("\x1b[2m" fmt "\x1b[m" __VA_OPT__(,) __VA_ARGS__) +#else +#define hdebug(...) (void)0 +#endif + +// Helper accessors for type functions/values: +#define HASH_KEY(t, k) (generic_hash((k), type->TableInfo.key) % ((t)->bucket_info->count)) +#define EQUAL_KEYS(x, y) (generic_equal((x), (y), type->TableInfo.key)) +#define ENTRY_SIZE (type->TableInfo.entry_size) +#define VALUE_OFFSET (type->TableInfo.value_offset) +#define END_OF_CHAIN UINT32_MAX + +#define GET_ENTRY(t, i) ((t)->entries.data + (t)->entries.stride*(i)) + +extern const void *SSS_HASH_VECTOR; + +extern CORD CString_cord(const char **s, bool colorize, const TypeInfo *type); +extern uint32_t CString_hash(const char **s, const TypeInfo *type); +extern uint32_t CString_compare(const char **x, const char **y, const TypeInfo *type); +static TypeInfo CString_typeinfo = { + .name="CString", + .size=sizeof(char*), + .align=alignof(char*), + .tag=CustomInfo, + .CustomInfo={ + .cord=(void*)CString_cord, + .hash=(void*)CString_hash, + .compare=(void*)CString_compare, + }, +}; + +TypeInfo MemoryPointer_typeinfo = { + .name="@Memory", + .size=sizeof(void*), + .align=alignof(void*), + .tag=PointerInfo, + .PointerInfo={ + .sigil="@", + .pointed=NULL, + }, +}; + +TypeInfo CStringToVoidStarTable_type = { + .name="{CString=>@Memory}", + .size=sizeof(table_t), + .align=alignof(table_t), + .tag=TableInfo, + .TableInfo={.key=&CString_typeinfo,.value=&MemoryPointer_typeinfo, + .entry_size=16, .value_offset=8}, +}; + +static inline void hshow(const table_t *t) +{ + hdebug("{"); + for (uint32_t i = 0; t->bucket_info && i < t->bucket_info->count; i++) { + if (i > 0) hdebug(" "); + if (t->bucket_info->buckets[i].occupied) + hdebug("[%d]=%d(%d)", i, t->bucket_info->buckets[i].index, t->bucket_info->buckets[i].next_bucket); + else + hdebug("[%d]=_", i); + } + hdebug("}\n"); +} + +static void maybe_copy_on_write(table_t *t, const TypeInfo *type) +{ + if (t->entries.copy_on_write) { + Array_compact(&t->entries, type->TableInfo.entry_size); + } + + if (t->bucket_info && t->bucket_info->copy_on_write) { + int64_t size = sizeof(bucket_info_t) + t->bucket_info->count*sizeof(bucket_t); + t->bucket_info = memcpy(GC_MALLOC(size), t->bucket_info, size); + t->bucket_info->copy_on_write = 0; + } +} + +public void Table_mark_copy_on_write(table_t *t) +{ + t->entries.copy_on_write = 1; + if (t->bucket_info) t->bucket_info->copy_on_write = 1; +} + +// Return address of value or NULL +public void *Table_get_raw(const table_t *t, const void *key, const TypeInfo *type) +{ + assert(type->tag == TableInfo); + if (!t || !key || !t->bucket_info) return NULL; + + uint32_t hash = HASH_KEY(t, key); + hshow(t); + hdebug("Getting value with initial probe at %u\n", hash); + bucket_t *buckets = t->bucket_info->buckets; + for (uint32_t i = hash; buckets[i].occupied; i = buckets[i].next_bucket) { + hdebug("Checking against key in bucket %u\n", i); + void *entry = GET_ENTRY(t, buckets[i].index); + if (EQUAL_KEYS(entry, key)) { + hdebug("Found key!\n"); + return entry + VALUE_OFFSET; + } + if (buckets[i].next_bucket == END_OF_CHAIN) + break; + } + return NULL; +} + +public void *Table_get(const table_t *t, const void *key, const TypeInfo *type) +{ + assert(type->tag == TableInfo); + for (const table_t *iter = t; iter; iter = iter->fallback) { + void *ret = Table_get_raw(iter, key, type); + if (ret) return ret; + } + for (const table_t *iter = t; iter; iter = iter->fallback) { + if (iter->default_value) return iter->default_value; + } + return NULL; +} + +static void Table_set_bucket(table_t *t, const void *entry, int32_t index, const TypeInfo *type) +{ + assert(t->bucket_info); + hshow(t); + const void *key = entry; + bucket_t *buckets = t->bucket_info->buckets; + uint32_t hash = HASH_KEY(t, key); + hdebug("Hash value (mod %u) = %u\n", t->bucket_info->count, hash); + bucket_t *bucket = &buckets[hash]; + if (!bucket->occupied) { + hdebug("Got an empty space\n"); + // Empty space: + bucket->occupied = 1; + bucket->index = index; + bucket->next_bucket = END_OF_CHAIN; + hshow(t); + return; + } + + hdebug("Collision detected in bucket %u (entry %u)\n", hash, bucket->index); + + while (buckets[t->bucket_info->last_free].occupied) { + assert(t->bucket_info->last_free > 0); + --t->bucket_info->last_free; + } + + uint32_t collided_hash = HASH_KEY(t, GET_ENTRY(t, bucket->index)); + if (collided_hash != hash) { // Collided with a mid-chain entry + hdebug("Hit a mid-chain entry at bucket %u (chain starting at %u)\n", hash, collided_hash); + // Find chain predecessor + uint32_t predecessor = collided_hash; + while (buckets[predecessor].next_bucket != hash) + predecessor = buckets[predecessor].next_bucket; + + // Move mid-chain entry to free space and update predecessor + buckets[predecessor].next_bucket = t->bucket_info->last_free; + buckets[t->bucket_info->last_free] = *bucket; + } else { // Collided with the start of a chain + hdebug("Hit start of a chain\n"); + uint32_t end_of_chain = hash; + while (buckets[end_of_chain].next_bucket != END_OF_CHAIN) + end_of_chain = buckets[end_of_chain].next_bucket; + hdebug("Appending to chain\n"); + // Chain now ends on the free space: + buckets[end_of_chain].next_bucket = t->bucket_info->last_free; + bucket = &buckets[t->bucket_info->last_free]; + } + + bucket->occupied = 1; + bucket->index = index; + bucket->next_bucket = END_OF_CHAIN; + hshow(t); +} + +static void hashmap_resize_buckets(table_t *t, uint32_t new_capacity, const TypeInfo *type) +{ + hdebug("About to resize from %u to %u\n", t->bucket_info ? t->bucket_info->count : 0, new_capacity); + hshow(t); + int64_t alloc_size = sizeof(bucket_info_t) + (int64_t)(new_capacity)*sizeof(bucket_t); + t->bucket_info = GC_MALLOC_ATOMIC(alloc_size); + memset(t->bucket_info->buckets, 0, (int64_t)new_capacity * sizeof(bucket_t)); + t->bucket_info->count = new_capacity; + t->bucket_info->last_free = new_capacity-1; + // Rehash: + for (int64_t i = 0; i < Table_length(t); i++) { + hdebug("Rehashing %u\n", i); + Table_set_bucket(t, GET_ENTRY(t, i), i, type); + } + + hshow(t); + hdebug("Finished resizing\n"); +} + +// Return address of value +public void *Table_reserve(table_t *t, const void *key, const void *value, const TypeInfo *type) +{ + assert(type->tag == TableInfo); + if (!t || !key) return NULL; + hshow(t); + + int64_t key_size = type->TableInfo.key->size, + value_size = type->TableInfo.value->size; + if (!t->bucket_info || t->bucket_info->count == 0) { + hashmap_resize_buckets(t, 4, type); + } else { + // Check if we are clobbering a value: + void *value_home = Table_get_raw(t, key, type); + if (value_home) { // Update existing slot + // Ensure that `value_home` is still inside t->entries, even if COW occurs + ptrdiff_t offset = value_home - t->entries.data; + maybe_copy_on_write(t, type); + value_home = t->entries.data + offset; + + if (value && value_size > 0) + memcpy(value_home, value, value_size); + + return value_home; + } + } + // Otherwise add a new entry: + + // Resize buckets if necessary + if (t->entries.length >= (int64_t)t->bucket_info->count) { + uint32_t newsize = t->bucket_info->count + MIN(t->bucket_info->count, 64); + hashmap_resize_buckets(t, newsize, type); + } + + if (!value && value_size > 0) { + for (table_t *iter = t->fallback; iter; iter = iter->fallback) { + value = Table_get_raw(iter, key, type); + if (value) break; + } + for (table_t *iter = t; !value && iter; iter = iter->fallback) { + if (iter->default_value) value = iter->default_value; + } + } + + maybe_copy_on_write(t, type); + + char buf[ENTRY_SIZE] = {}; + memcpy(buf, key, key_size); + if (value && value_size > 0) + memcpy(buf + VALUE_OFFSET, value, value_size); + else + memset(buf + VALUE_OFFSET, 0, value_size); + Array_insert(&t->entries, buf, 0, ENTRY_SIZE); + + int64_t entry_index = t->entries.length-1; + void *entry = GET_ENTRY(t, entry_index); + Table_set_bucket(t, entry, entry_index, type); + return entry + VALUE_OFFSET; +} + +public void Table_set(table_t *t, const void *key, const void *value, const TypeInfo *type) +{ + assert(type->tag == TableInfo); + (void)Table_reserve(t, key, value, type); +} + +public void Table_remove(table_t *t, const void *key, const TypeInfo *type) +{ + assert(type->tag == TableInfo); + if (!t || Table_length(t) == 0) return; + + // TODO: this work doesn't need to be done if the key is already missing + maybe_copy_on_write(t, type); + + // If unspecified, pop a random key: + if (!key) { + hdebug("Popping random key\n"); + uint32_t index = arc4random_uniform(t->entries.length); + key = GET_ENTRY(t, index); + } + + // Steps: look up the bucket for the removed key + // If missing, then return immediately + // Swap last key/value into the removed bucket's index1 + // Zero out the last key/value and decrement the count + // Find the last key/value's bucket and update its index1 + // Look up the bucket for the removed key + // If bucket is first in chain: + // Move bucket->next to bucket's spot + // zero out bucket->next's old spot + // maybe update lastfree_index1 to second-in-chain's index + // Else: + // set prev->next = bucket->next + // zero out bucket + // maybe update lastfree_index1 to removed bucket's index + + uint32_t hash = HASH_KEY(t, key); + hdebug("Removing key with hash %u\n", hash); + bucket_t *bucket, *prev = NULL; + for (uint32_t i = hash; t->bucket_info->buckets[i].occupied; i = t->bucket_info->buckets[i].next_bucket) { + if (EQUAL_KEYS(GET_ENTRY(t, t->bucket_info->buckets[i].index), key)) { + bucket = &t->bucket_info->buckets[i]; + hdebug("Found key to delete in bucket %u\n", i); + goto found_it; + } + if (t->bucket_info->buckets[i].next_bucket == END_OF_CHAIN) + return; + prev = &t->bucket_info->buckets[i]; + } + return; + + found_it:; + assert(bucket->occupied); + + // Always remove the last entry. If we need to remove some other entry, + // swap the other entry into the last position and then remove the last + // entry. This disturbs the ordering of the table, but keeps removal O(1) + // instead of O(N) + int64_t last_entry = t->entries.length-1; + if (bucket->index != last_entry) { + hdebug("Removing key/value from the middle of the entries array\n"); + + // Find the bucket that points to the last entry's index: + uint32_t i = HASH_KEY(t, GET_ENTRY(t, last_entry)); + while (t->bucket_info->buckets[i].index != last_entry) + i = t->bucket_info->buckets[i].next_bucket; + // Update the bucket to point to the last entry's new home (the space + // where the removed entry currently sits): + t->bucket_info->buckets[i].index = bucket->index; + + // Clobber the entry being removed (in the middle of the array) with + // the last entry: + memcpy(GET_ENTRY(t, bucket->index), GET_ENTRY(t, last_entry), ENTRY_SIZE); + } + + // Last entry is being removed, so clear it out to be safe: + memset(GET_ENTRY(t, last_entry), 0, ENTRY_SIZE); + + Array_remove(&t->entries, t->entries.length, 1, ENTRY_SIZE); + + int64_t bucket_to_clear; + if (prev) { // Middle (or end) of a chain + hdebug("Removing from middle of a chain\n"); + bucket_to_clear = (bucket - t->bucket_info->buckets); + prev->next_bucket = bucket->next_bucket; + } else if (bucket->next_bucket != END_OF_CHAIN) { // Start of a chain + hdebug("Removing from start of a chain\n"); + bucket_to_clear = bucket->next_bucket; + *bucket = t->bucket_info->buckets[bucket_to_clear]; + } else { // Empty chain + hdebug("Removing from empty chain\n"); + bucket_to_clear = (bucket - t->bucket_info->buckets); + } + + t->bucket_info->buckets[bucket_to_clear] = (bucket_t){0}; + if (bucket_to_clear > t->bucket_info->last_free) + t->bucket_info->last_free = bucket_to_clear; + + hshow(t); +} + +public void *Table_entry(const table_t *t, int64_t n) +{ + if (n < 1 || n > Table_length(t)) + return NULL; + return GET_ENTRY(t, n-1); +} + +public void Table_clear(table_t *t) +{ + memset(t, 0, sizeof(table_t)); +} + +public bool Table_equal(const table_t *x, const table_t *y, const TypeInfo *type) +{ + assert(type->tag == TableInfo); + if (Table_length(x) != Table_length(y)) + return false; + + if ((x->default_value != NULL) != (y->default_value != NULL)) + return false; + + if ((x->fallback != NULL) != (y->fallback != NULL)) + return false; + + const TypeInfo *value_type = type->TableInfo.value; + for (int64_t i = 0, length = Table_length(x); i < length; i++) { + void *x_key = GET_ENTRY(x, i); + void *x_value = x_key + VALUE_OFFSET; + void *y_value = Table_get_raw(y, x_key, type); + if (!y_value) + return false; + if (!generic_equal(x_value, y_value, value_type)) + return false; + } + + if (x->default_value && y->default_value + && !generic_equal(x->default_value, y->default_value, value_type)) + return false; + + if (x->fallback && y->fallback + && !Table_equal(x->fallback, y->fallback, type)) + return false; + + return true; +} + +public int32_t Table_compare(const table_t *x, const table_t *y, const TypeInfo *type) +{ + assert(type->tag == TableInfo); + auto table = type->TableInfo; + struct { + const char *name; + const TypeInfo *type; + } member_data[] = {{"key", table.key}, {"value", table.value}}; + TypeInfo entry_type = { + .name="Entry", + .size=ENTRY_SIZE, + .align=MAX(table.key->align, table.value->align), + .tag=StructInfo, + .StructInfo={ + .members=(array_t){.data=member_data, .length=2, .stride=sizeof(member_data[0])}, + } + }; + array_t x_entries = x->entries, y_entries = y->entries; + Array_sort(&x_entries, &entry_type); + Array_sort(&y_entries, &entry_type); + return Array_compare(&x_entries, &y_entries, &entry_type); +} + +public uint32_t Table_hash(const table_t *t, const TypeInfo *type) +{ + assert(type->tag == TableInfo); + // Table hashes are computed as: + // hash(#t, xor(hash(k) for k in t.keys), xor(hash(v) for v in t.values), hash(t.fallback), hash(t.default)) + // Where fallback and default hash to zero if absent + auto table = type->TableInfo; + int64_t value_offset = table.value_offset; + + uint32_t key_hashes = 0, value_hashes = 0, fallback_hash = 0, default_hash = 0; + for (int64_t i = 0, length = Table_length(t); i < length; i++) { + void *entry = GET_ENTRY(t, i); + key_hashes ^= generic_hash(entry, table.key); + value_hashes ^= generic_hash(entry + value_offset, table.value); + } + + if (t->fallback) + fallback_hash = Table_hash(t->fallback, type); + + if (t->default_value) + default_hash = generic_hash(t->default_value, table.value); + + struct { int64_t len; uint32_t k, v, f, d; } components = { + Table_length(t), + key_hashes, + value_hashes, + fallback_hash, + default_hash, + }; + uint32_t hash; + halfsiphash(&components, sizeof(components), SSS_HASH_VECTOR, (uint8_t*)&hash, sizeof(hash)); + return hash; +} + +public CORD Table_cord(const table_t *t, bool colorize, const TypeInfo *type) +{ + assert(type->tag == TableInfo); + auto table = type->TableInfo; + int64_t value_offset = table.value_offset; + CORD c = "{"; + for (int64_t i = 0, length = Table_length(t); i < length; i++) { + if (i > 0) + c = CORD_cat(c, ", "); + void *entry = GET_ENTRY(t, i); + c = CORD_cat(c, generic_cord(entry, colorize, table.key)); + c = CORD_cat(c, "=>"); + c = CORD_cat(c, generic_cord(entry + value_offset, colorize, table.value)); + } + + if (t->fallback) { + c = CORD_cat(c, "; fallback="); + c = CORD_cat(c, Table_cord(t->fallback, colorize, type)); + } + + if (t->default_value) { + c = CORD_cat(c, "; default="); + c = CORD_cat(c, generic_cord(t->default_value, colorize, table.value)); + } + + c = CORD_cat(c, "}"); + return c; +} + +public table_t Table_from_entries(array_t entries, const TypeInfo *type) +{ + assert(type->tag == TableInfo); + table_t t = {.entries=entries}; + for (int64_t i = 0; i < Table_length(&t); i++) { + hdebug("Rehashing %u\n", i); + Table_set_bucket(&t, GET_ENTRY(&t, i), i, type); + } + return t; +} + +void *Table_str_get(const table_t *t, const char *key) +{ + void **ret = Table_get(t, &key, &CStringToVoidStarTable_type); + return ret ? *ret : NULL; +} + +void *Table_str_get_raw(const table_t *t, const char *key) +{ + void **ret = Table_get_raw(t, &key, &CStringToVoidStarTable_type); + return ret ? *ret : NULL; +} + +void *Table_str_reserve(table_t *t, const char *key, const void *value) +{ + return Table_reserve(t, &key, &value, &CStringToVoidStarTable_type); +} + +void Table_str_set(table_t *t, const char *key, const void *value) +{ + Table_set(t, &key, &value, &CStringToVoidStarTable_type); +} + +void Table_str_remove(table_t *t, const char *key) +{ + return Table_remove(t, &key, &CStringToVoidStarTable_type); +} + +void *Table_str_entry(const table_t *t, int64_t n) +{ + return Table_entry(t, n); +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 diff --git a/builtins/table.h b/builtins/table.h new file mode 100644 index 00000000..974d1191 --- /dev/null +++ b/builtins/table.h @@ -0,0 +1,34 @@ +#pragma once +#include <stdalign.h> +#include <stdint.h> +#include <stdbool.h> +#include <string.h> + +#include "types.h" +#include "datatypes.h" +#include "array.h" + +table_t Table_from_entries(array_t entries, const TypeInfo *type); +void *Table_get(const table_t *t, const void *key, const TypeInfo *type); +void *Table_get_raw(const table_t *t, const void *key, const TypeInfo *type); +void *Table_entry(const table_t *t, int64_t n); +void *Table_reserve(table_t *t, const void *key, const void *value, const TypeInfo *type); +void Table_set(table_t *t, const void *key, const void *value, const TypeInfo *type); +void Table_remove(table_t *t, const void *key, const TypeInfo *type); +void Table_clear(table_t *t); +void Table_mark_copy_on_write(table_t *t); +int32_t Table_compare(const table_t *x, const table_t *y, const TypeInfo *type); +bool Table_equal(const table_t *x, const table_t *y, const TypeInfo *type); +uint32_t Table_hash(const table_t *t, const TypeInfo *type); +CORD Table_cord(const table_t *t, bool colorize, const TypeInfo *type); + +void *Table_str_entry(const table_t *t, int64_t n); +void *Table_str_get(const table_t *t, const char *key); +void *Table_str_get_raw(const table_t *t, const char *key); +void Table_str_set(table_t *t, const char *key, const void *value); +void *Table_str_reserve(table_t *t, const char *key, const void *value); +void Table_str_remove(table_t *t, const char *key); + +#define Table_length(t) ((t)->entries.length) + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 diff --git a/builtins/types.c b/builtins/types.c new file mode 100644 index 00000000..965bb067 --- /dev/null +++ b/builtins/types.c @@ -0,0 +1,349 @@ +// Generic type constructor +#include <err.h> +#include <gc.h> +#include <string.h> +#include <stdalign.h> +#include <stdlib.h> +#include <sys/param.h> + +#include "array.h" +#include "table.h" +#include "types.h" +#include "../util.h" +#include "../SipHash/halfsiphash.h" + +extern const void *SSS_HASH_VECTOR; + +public uint32_t generic_hash(const void *obj, const TypeInfo *type) +{ + switch (type->tag) { + case ArrayInfo: return Array_hash(obj, type); + case TableInfo: return Table_hash(obj, type); + case StructInfo: { + auto info = type->StructInfo; + if (info.members.length == 0) + return 0; + + uint32_t hash_values[info.members.length] = {}; + int64_t offset = 0; + for (int64_t i = 0; i < info.members.length; i++) { + typedef struct {const char *name; const TypeInfo *type;} struct_member_t; + struct_member_t *member = info.members.data + i*info.members.stride; + if (member->type->align > 1 && (offset % member->type->align)) + offset += member->type->align - (offset % member->type->align); + hash_values[i] = generic_hash(obj + offset, member->type); + offset += member->type->size; + } + uint32_t hash; + halfsiphash(hash_values, sizeof(uint32_t)*info.members.length, SSS_HASH_VECTOR, (uint8_t*)&hash, sizeof(hash)); + return hash; + } + case TaggedUnionInfo: { + auto info = type->TaggedUnionInfo; + int32_t tag = *(int32_t*)obj; + int64_t offset = 4; + typedef struct { int64_t tag; const char *name; const TypeInfo *type;} tu_member_t; + for (int64_t i = 0; i < info.members.length; i++) { + tu_member_t *member = info.members.data + i*info.members.stride; + offset = MAX(offset, member->type->align); + } + for (int64_t i = 0; i < info.members.length; i++) { + tu_member_t *member = info.members.data + i*info.members.stride; + if (member->tag != tag) continue; + struct { + int64_t tag; + uint32_t value_hash; + } data = {tag, generic_hash(obj + offset, member->type)}; + uint32_t hash; + halfsiphash(&data, sizeof(data), SSS_HASH_VECTOR, (uint8_t*)&hash, sizeof(hash)); + return hash; + } + uint32_t hash; + halfsiphash(&tag, sizeof(tag), SSS_HASH_VECTOR, (uint8_t*)&hash, sizeof(hash)); + return hash; + } + case CustomInfo: + if (!type->CustomInfo.hash) + goto hash_data; + return type->CustomInfo.hash(obj, type); + case PointerInfo: + default: { + hash_data:; + uint32_t hash; + halfsiphash((void*)obj, type->size, SSS_HASH_VECTOR, (uint8_t*)&hash, sizeof(hash)); + return hash; + } + } +} + +public int32_t generic_compare(const void *x, const void *y, const TypeInfo *type) +{ + switch (type->tag) { + case ArrayInfo: return Array_compare(x, y, type); + case TableInfo: return Table_compare(x, y, type); + case StructInfo: { + auto info = type->StructInfo; + typedef struct {const char *name; const TypeInfo *type;} struct_member_t; + auto members = (ARRAY_OF(struct_member_t))&info.members; + int64_t offset = 0; + foreach (members, member, last) { + if (member->type->align > 1 && (offset % member->type->align)) + offset += member->type->align - (offset % member->type->align); + int32_t cmp = generic_compare(x + offset, y + offset, member->type); + if (cmp != 0) return cmp; + offset += member->type->size; + } + return 0; + } + case TaggedUnionInfo: { + auto info = type->TaggedUnionInfo; + int32_t xtag = *(int32_t*)x, + ytag = *(int32_t*)y; + + if (xtag != ytag) + return xtag - ytag; + + int64_t offset = 4; + typedef struct { int64_t tag; const char *name; const TypeInfo *type;} tu_member_t; + for (int64_t i = 0; i < info.members.length; i++) { + tu_member_t *member = info.members.data + i*info.members.stride; + offset = MAX(offset, member->type->align); + } + for (int64_t i = 0; i < info.members.length; i++) { + tu_member_t *member = info.members.data + i*info.members.stride; + if (member->tag != xtag) continue; + return generic_compare(x + offset, y + offset, member->type); + } + return 0; + } + case CustomInfo: + if (!type->CustomInfo.compare) + goto compare_data; + return type->CustomInfo.compare(x, y, type); + case PointerInfo: + default: + compare_data: + return (int32_t)memcmp((void*)x, (void*)y, type->size); + } +} + +public bool generic_equal(const void *x, const void *y, const TypeInfo *type) +{ + switch (type->tag) { + case ArrayInfo: return Array_equal(x, y, type); + case TableInfo: return Table_equal(x, y, type); + case StructInfo: { + auto info = type->StructInfo; + typedef struct {const char *name; const TypeInfo *type;} struct_member_t; + auto members = (ARRAY_OF(struct_member_t))&info.members; + int64_t offset = 0; + foreach (members, member, last) { + if (member->type->align > 1 && (offset % member->type->align)) + offset += member->type->align - (offset % member->type->align); + bool equal = generic_equal(x + offset, y + offset, member->type); + if (!equal) return false; + offset += member->type->size; + } + return true; + } + case TaggedUnionInfo: { + auto info = type->TaggedUnionInfo; + int32_t xtag = *(int32_t*)x, + ytag = *(int32_t*)y; + + if (xtag != ytag) + return false; + + int64_t offset = 4; + typedef struct { int64_t tag; const char *name; const TypeInfo *type;} tu_member_t; + for (int64_t i = 0; i < info.members.length; i++) { + tu_member_t *member = info.members.data + i*info.members.stride; + offset = MAX(offset, member->type->align); + } + for (int64_t i = 0; i < info.members.length; i++) { + tu_member_t *member = info.members.data + i*info.members.stride; + if (member->tag != xtag) continue; + return generic_equal(x + offset, y + offset, member->type); + } + return true; + } + case CustomInfo: + if (!type->CustomInfo.equal) + goto use_generic_compare; + return type->CustomInfo.equal(x, y, type); + case PointerInfo: + default: + use_generic_compare: + return (generic_compare(x, y, type) == 0); + } +} + +public CORD generic_cord(const void *obj, bool colorize, const TypeInfo *type) +{ + switch (type->tag) { + case PointerInfo: { + auto ptr_info = type->PointerInfo; + static TypeInfo ptr_type = {.name="@Memory", .size=sizeof(void*), .align=alignof(void*)}; + static TypeInfo int_type = {.name="Int", .size=sizeof(int64_t), .align=alignof(int64_t)}; + static TypeInfo ptr_to_int_type = { + .name="{@Memory=>Int}", + .size=sizeof(table_t), + .align=alignof(table_t), + .tag=TableInfo, + .TableInfo={.key=&ptr_type, .value=&int_type, .entry_size=16, .value_offset=8}, + }; + + static int64_t zero = 0; + static table_t recursion = {.default_value=&zero}; + static table_t *current_recursion = NULL; + + CORD c; + bool first_ptr_call = (current_recursion == NULL); + if (first_ptr_call && ptr_info.cyclic) { + current_recursion = &recursion; + } + + if (ptr_info.cyclic) { + int64_t *found = Table_reserve(current_recursion, obj, NULL, &ptr_to_int_type); + if (*found) { + CORD_sprintf(&c, colorize ? "\x1b[34;1m%s%s#%ld\x1b[m" : "%s%s#%ld", + ptr_info.sigil, ptr_info.pointed ? ptr_info.pointed->name : "", *found); + goto done; + } else { + *found = Table_length(current_recursion); + } + } + + void *ptr = *(void**)obj; + if (ptr == NULL) { + CORD_sprintf(&c, colorize ? "\x1b[34;1m!%s\x1b[m" : "!%s", ptr_info.pointed ? ptr_info.pointed->name : ""); + } else { + CORD_sprintf(&c, colorize ? "\x1b[34;1m%s\x1b[m%r" : "%s%r", + ptr_info.sigil, ptr_info.pointed ? generic_cord(ptr, colorize, ptr_info.pointed) : "???"); + } + + done:; + if (first_ptr_call && ptr_info.cyclic) { + recursion = (table_t){.default_value=&zero}; + current_recursion = NULL; + } + + return c; + } + case StructInfo: { + auto info = type->StructInfo; + CORD c = "{"; + int64_t offset = 0; + for (int64_t i = 0; i < info.members.length; i++) { + if (i > 0) c = CORD_cat(c, ", "); + typedef struct {const char *name; const TypeInfo *type;} struct_member_t; + struct_member_t *member = info.members.data + i*info.members.stride; + if (member->type->align > 1 && (offset % member->type->align)) + offset += member->type->align - (offset % member->type->align); + + // Print the field name only if it's not an anonymous field ("_%ld" format) + char *end; + if (member->name[0] != '_' || strtol(member->name+1, &end, 10) != i+1 || *end) { + c = CORD_cat(c, member->name); + c = CORD_cat(c, "="); + } + + c = CORD_cat(c, generic_cord(obj + offset, colorize, member->type)); + offset += member->type->size; + } + c = CORD_cat(c, "}"); + if (strncmp(type->name, "struct(", strlen("struct(")) != 0) + CORD_sprintf(&c, colorize ? "\x1b[1m%s\x1b[m%r" : "%s%r", type->name, c); + return c; + } + case TaggedUnionInfo: { + auto info = type->TaggedUnionInfo; + int32_t tag = *(int32_t*)obj; + int64_t offset = 4; + typedef struct { int64_t tag; const char *name; const TypeInfo *type;} tu_member_t; + for (int64_t i = 0; i < info.members.length; i++) { + tu_member_t *member = info.members.data + i*info.members.stride; + offset = MAX(offset, member->type->align); + } + for (int64_t i = 0; i < info.members.length; i++) { + tu_member_t *member = info.members.data + i*info.members.stride; + if (member->tag == tag) { + CORD c; + CORD_sprintf(&c, colorize ? "\x1b[36m%s\x1b[m" : "%s", member->name); + if (member->type && member->type->size > 0) + c = CORD_cat(c, generic_cord(obj + offset, colorize, member->type)); + return c; + } + } + + CORD c = colorize ? "\x1b[36m" : ""; + int32_t tag_remainder = tag; + for (int64_t i = 0; tag_remainder && i < info.members.length; i++) { + tu_member_t *member = info.members.data + i*info.members.stride; + if (tag_remainder & member->tag) { + if (tag_remainder != tag) + c = CORD_cat(c, "+"); + c = CORD_cat(c, member->name); + tag_remainder &= ~member->tag; + } + } + if (tag_remainder) { + if (tag_remainder != tag) + c = CORD_cat(c, "+"); + c = CORD_cat(c, "???"); + } + + if (colorize) + c = CORD_cat(c, "\x1b[m"); + + return c; + } + case ArrayInfo: return Array_cord(obj, colorize, type); + case TableInfo: return Table_cord(obj, colorize, type); + case CustomInfo: + if (!type->CustomInfo.cord) + builtin_fail("No cord function provided for type: %s!\n", type->name); + return type->CustomInfo.cord(obj, colorize, type); + default: errx(1, "Invalid type tag: %d", type->tag); + } +} + +public CORD Type__cord(void *type_namespace, bool colorize, const TypeInfo *type) +{ + (void)type_namespace; + if (!colorize) + return CORD_from_char_star(type->name); + CORD c; + CORD_sprintf(&c, "\x1b[36;1m%s\x1b[m", type->name); + return c; +} + +public struct { + TypeInfo type; +} TypeInfo_type = { + .type={ + .name="TypeInfo", + .size=sizeof(TypeInfo), + .align=alignof(TypeInfo), + .tag=CustomInfo, + .CustomInfo={.cord=(void*)Type__cord}, + }, +}; + +public struct { + TypeInfo type; +} Void_type = {.type={.name="Void", .size=0, .align=0}}; +public struct { + TypeInfo type; +} Abort_type = {.type={.name="Abort", .size=0, .align=0}}; + +public CORD Func__cord(const void *fn, bool colorize, const TypeInfo *type) +{ + (void)fn; + CORD c = type->name; + if (colorize) + CORD_sprintf(&c, "\x1b[32;1m%r\x1b[m", c); + return c; +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/builtins/types.h b/builtins/types.h new file mode 100644 index 00000000..72f78846 --- /dev/null +++ b/builtins/types.h @@ -0,0 +1,55 @@ +#pragma once +#include <gc/cord.h> +#include <stdbool.h> +#include <stdint.h> + +#include "datatypes.h" +#include "string.h" + +struct TypeInfo; + +typedef uint32_t (*hash_fn_t)(const void*, const struct TypeInfo*); +typedef int32_t (*compare_fn_t)(const void*, const void*, const struct TypeInfo*); +typedef bool (*equal_fn_t)(const void*, const void*, const struct TypeInfo*); +typedef CORD (*cord_fn_t)(const void*, bool, const struct TypeInfo*); + +typedef struct TypeInfo { + const char *name; + int64_t size, align; + struct { // Anonymous tagged union for convenience + enum { CustomInfo, PointerInfo, ArrayInfo, TableInfo, StructInfo, TaggedUnionInfo } tag; + union { + struct { + equal_fn_t equal; + compare_fn_t compare; + hash_fn_t hash; + cord_fn_t cord; + } CustomInfo; + struct { + const char *sigil; + struct TypeInfo *pointed; + bool cyclic; + } PointerInfo; + struct { + struct TypeInfo *item; + } ArrayInfo; + struct { + struct TypeInfo *key, *value; + int64_t entry_size, value_offset; + } TableInfo; + struct { + array_t members; // [{name, type}] + } StructInfo; + struct { + array_t members; // [{tag, name, type}] + } TaggedUnionInfo; + }; + }; +} TypeInfo; + +uint32_t generic_hash(const void *obj, const TypeInfo *type); +int32_t generic_compare(const void *x, const void *y, const TypeInfo *type); +bool generic_equal(const void *x, const void *y, const TypeInfo *type); +CORD generic_cord(const void *obj, bool colorize, const TypeInfo *type); + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 |
