aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2025-03-21 21:48:53 -0400
committerBruce Hill <bruce@bruce-hill.com>2025-03-21 21:48:53 -0400
commit5ee185a4896e43c67b6d299becfa616da78fb9f4 (patch)
tree183ceef2fd21230c89334d7d039255d1c86c5dca /src
parentf4aaf7b73481248f6768302be688700a364a1af8 (diff)
Move stdlib into src/
Diffstat (limited to 'src')
-rw-r--r--src/stdlib/README.md35
-rw-r--r--src/stdlib/arrays.c747
-rw-r--r--src/stdlib/arrays.h137
-rw-r--r--src/stdlib/bools.c57
-rw-r--r--src/stdlib/bools.h26
-rw-r--r--src/stdlib/bytes.c64
-rw-r--r--src/stdlib/bytes.h38
-rw-r--r--src/stdlib/c_strings.c86
-rw-r--r--src/stdlib/c_strings.h18
-rw-r--r--src/stdlib/chacha.h201
-rw-r--r--src/stdlib/datatypes.h120
-rw-r--r--src/stdlib/enums.c120
-rw-r--r--src/stdlib/enums.h38
-rw-r--r--src/stdlib/files.c335
-rw-r--r--src/stdlib/files.h37
-rw-r--r--src/stdlib/functiontype.c97
-rw-r--r--src/stdlib/functiontype.h34
-rw-r--r--src/stdlib/integers.c652
-rw-r--r--src/stdlib/integers.h430
-rw-r--r--src/stdlib/memory.c31
-rw-r--r--src/stdlib/memory.h13
-rw-r--r--src/stdlib/metamethods.c124
-rw-r--r--src/stdlib/metamethods.h22
-rw-r--r--src/stdlib/moments.c323
-rw-r--r--src/stdlib/moments.h44
-rw-r--r--src/stdlib/mutexeddata.c38
-rw-r--r--src/stdlib/mutexeddata.h17
-rw-r--r--src/stdlib/nums.c186
-rw-r--r--src/stdlib/nums.h127
-rw-r--r--src/stdlib/optionals.c94
-rw-r--r--src/stdlib/optionals.h49
-rw-r--r--src/stdlib/paths.c822
-rw-r--r--src/stdlib/paths.h71
-rw-r--r--src/stdlib/patterns.c1307
-rw-r--r--src/stdlib/patterns.h47
-rw-r--r--src/stdlib/pointers.c123
-rw-r--r--src/stdlib/pointers.h36
-rw-r--r--src/stdlib/rng.c268
-rw-r--r--src/stdlib/rng.h31
-rw-r--r--src/stdlib/siphash-internals.h126
-rw-r--r--src/stdlib/siphash.c79
-rw-r--r--src/stdlib/siphash.h15
-rw-r--r--src/stdlib/stdlib.c732
-rw-r--r--src/stdlib/stdlib.h57
-rw-r--r--src/stdlib/structs.c237
-rw-r--r--src/stdlib/structs.h40
-rw-r--r--src/stdlib/tables.c798
-rw-r--r--src/stdlib/tables.h110
-rw-r--r--src/stdlib/text.c1499
-rw-r--r--src/stdlib/text.h99
-rw-r--r--src/stdlib/threads.c80
-rw-r--r--src/stdlib/threads.h22
-rw-r--r--src/stdlib/tomo.h35
-rw-r--r--src/stdlib/types.c31
-rw-r--r--src/stdlib/types.h93
-rw-r--r--src/stdlib/util.c26
-rw-r--r--src/stdlib/util.h61
57 files changed, 11185 insertions, 0 deletions
diff --git a/src/stdlib/README.md b/src/stdlib/README.md
new file mode 100644
index 00000000..ba47dd0a
--- /dev/null
+++ b/src/stdlib/README.md
@@ -0,0 +1,35 @@
+# Tomo Standard Library
+
+This directory contains all of the standard library functionality that is built
+into each Tomo program. It has all the logic for core datastructures as well as
+some common functionality.
+
+## Core Functions
+
+- Tomo headers all in one place: [header](stdlib/tomo.h)
+- Tomo standard library functions: [header](stdlib/stdlib.h), [implementation](stdlib/stdlib.c)
+- Metamethods: [header](stdlib/metamethods.h), [implementation](stdlib/metamethods.c)
+- Siphash: [header](stdlib/siphash.h), [implementation](stdlib/siphash.c)
+- Siphash-internals: [header](stdlib/siphash-internals.h), [implementation](stdlib/siphash-internals.c)
+- Util: [header](stdlib/util.h), [implementation](stdlib/util.c)
+
+## Core Data Types
+
+- Datatypes (type definitions): [header](stdlib/datatypes.h), [implementation](stdlib/datatypes.c)
+- Arrays: [header](stdlib/arrays.h), [implementation](stdlib/arrays.c)
+- Bools: [header](stdlib/bools.h), [implementation](stdlib/bools.c)
+- Bytes: [header](stdlib/bytes.h), [implementation](stdlib/bytes.c)
+- C Strings: [header](stdlib/c_strings.h), [implementation](stdlib/c_strings.c)
+- Files (used internally only): [header](stdlib/files.h), [implementation](stdlib/files.c)
+- Functiontype: [header](stdlib/functiontype.h), [implementation](stdlib/functiontype.c)
+- Integers: [header](stdlib/integers.h), [implementation](stdlib/integers.c)
+- Memory: [header](stdlib/memory.h), [implementation](stdlib/memory.c)
+- Nums: [header](stdlib/nums.h), [implementation](stdlib/nums.c)
+- Optionals: [header](stdlib/optionals.h), [implementation](stdlib/optionals.c)
+- Paths: [header](stdlib/paths.h), [implementation](stdlib/paths.c)
+- Patterns: [header](stdlib/patterns.h), [implementation](stdlib/patterns.c)
+- Pointers: [header](stdlib/pointers.h), [implementation](stdlib/pointers.c)
+- Tables: [header](stdlib/tables.h), [implementation](stdlib/tables.c)
+- Text: [header](stdlib/text.h), [implementation](stdlib/text.c)
+- Threads: [header](stdlib/threads.h), [implementation](stdlib/threads.c)
+- Type Infos (for representing types as values): [header](stdlib/types.h), [implementation](stdlib/types.c)
diff --git a/src/stdlib/arrays.c b/src/stdlib/arrays.c
new file mode 100644
index 00000000..cd403c5f
--- /dev/null
+++ b/src/stdlib/arrays.c
@@ -0,0 +1,747 @@
+// Functions that operate on arrays
+
+#include <gc.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/param.h>
+
+#include "arrays.h"
+#include "integers.h"
+#include "math.h"
+#include "metamethods.h"
+#include "optionals.h"
+#include "rng.h"
+#include "tables.h"
+#include "text.h"
+#include "util.h"
+
+// Use inline version of siphash code:
+#include "siphash.h"
+#include "siphash-internals.h"
+
+PUREFUNC static INLINE int64_t get_padded_item_size(const TypeInfo_t *info)
+{
+ int64_t size = info->ArrayInfo.item->size;
+ if (info->ArrayInfo.item->align > 1 && size % info->ArrayInfo.item->align)
+ errx(1, "Item size is not padded!");
+ return size;
+}
+
+// Replace the array's .data pointer with a new pointer to a copy of the
+// data that is compacted and has a stride of exactly `padded_item_size`
+public void Array$compact(Array_t *arr, int64_t padded_item_size)
+{
+ void *copy = NULL;
+ if (arr->length > 0) {
+ copy = arr->atomic ? GC_MALLOC_ATOMIC((size_t)arr->length * (size_t)padded_item_size)
+ : GC_MALLOC((size_t)arr->length * (size_t)padded_item_size);
+ if ((int64_t)arr->stride == padded_item_size) {
+ memcpy(copy, arr->data, (size_t)arr->length * (size_t)padded_item_size);
+ } else {
+ for (int64_t i = 0; i < arr->length; i++)
+ memcpy(copy + i*padded_item_size, arr->data + arr->stride*i, (size_t)padded_item_size);
+ }
+ }
+ *arr = (Array_t){
+ .data=copy,
+ .length=arr->length,
+ .stride=padded_item_size,
+ .atomic=arr->atomic,
+ };
+}
+
+public void Array$insert(Array_t *arr, const void *item, Int_t int_index, int64_t padded_item_size)
+{
+ int64_t index = Int64$from_int(int_index, false);
+ if (index <= 0) index = arr->length + index + 1;
+
+ if (index < 1) index = 1;
+ else if (index > (int64_t)arr->length + 1)
+ fail("Invalid insertion index %ld for an array with length %ld", index, arr->length);
+
+ if (!arr->data) {
+ arr->free = 4;
+ arr->data = arr->atomic ? GC_MALLOC_ATOMIC((size_t)arr->free * (size_t)padded_item_size)
+ : GC_MALLOC((size_t)arr->free * (size_t)padded_item_size);
+ arr->stride = padded_item_size;
+ } else if (arr->free < 1 || arr->data_refcount != 0 || (int64_t)arr->stride != padded_item_size) {
+ // Resize policy: +50% growth (clamped between 8 and ARRAY_MAX_FREE_ENTRIES)
+ arr->free = MIN(ARRAY_MAX_FREE_ENTRIES, MAX(8, arr->length)/2);
+ void *copy = arr->atomic ? GC_MALLOC_ATOMIC((size_t)(arr->length + arr->free) * (size_t)padded_item_size)
+ : GC_MALLOC((size_t)(arr->length + arr->free) * (size_t)padded_item_size);
+ for (int64_t i = 0; i < index-1; i++)
+ memcpy(copy + i*padded_item_size, arr->data + arr->stride*i, (size_t)padded_item_size);
+ for (int64_t i = index-1; i < (int64_t)arr->length; i++)
+ memcpy(copy + (i+1)*padded_item_size, arr->data + arr->stride*i, (size_t)padded_item_size);
+ arr->data = copy;
+ arr->data_refcount = 0;
+ arr->stride = padded_item_size;
+ } else {
+ if (index != arr->length+1)
+ memmove(
+ arr->data + index*padded_item_size,
+ arr->data + (index-1)*padded_item_size,
+ (size_t)((arr->length - index + 1)*padded_item_size));
+ }
+ assert(arr->free > 0);
+ --arr->free;
+ ++arr->length;
+ memcpy((void*)arr->data + (index-1)*padded_item_size, item, (size_t)padded_item_size);
+}
+
+public void Array$insert_all(Array_t *arr, Array_t to_insert, Int_t int_index, int64_t padded_item_size)
+{
+ int64_t index = Int64$from_int(int_index, false);
+ if (to_insert.length == 0)
+ return;
+
+ if (!arr->data) {
+ *arr = to_insert;
+ ARRAY_INCREF(*arr);
+ return;
+ }
+
+ if (index < 1) index = arr->length + index + 1;
+
+ if (index < 1) index = 1;
+ else if (index > (int64_t)arr->length + 1)
+ fail("Invalid insertion index %ld for an array with length %ld", index, arr->length);
+
+ if ((int64_t)arr->free >= (int64_t)to_insert.length // Adequate free space
+ && arr->data_refcount == 0 // Not aliased memory
+ && (int64_t)arr->stride == padded_item_size) { // Contiguous array
+ // If we can fit this within the array's preallocated free space, do that:
+ arr->free -= to_insert.length;
+ arr->length += to_insert.length;
+ if (index != arr->length+1)
+ memmove((void*)arr->data + index*padded_item_size,
+ arr->data + (index-1)*padded_item_size,
+ (size_t)((arr->length - index + to_insert.length-1)*padded_item_size));
+ for (int64_t i = 0; i < to_insert.length; i++)
+ memcpy((void*)arr->data + (index-1 + i)*padded_item_size,
+ to_insert.data + i*to_insert.stride, (size_t)padded_item_size);
+ } else {
+ // Otherwise, allocate a new chunk of memory for the array and populate it:
+ int64_t new_len = arr->length + to_insert.length;
+ arr->free = MIN(ARRAY_MAX_FREE_ENTRIES, MAX(8, new_len/4));
+ void *data = arr->atomic ? GC_MALLOC_ATOMIC((size_t)((new_len + arr->free) * padded_item_size))
+ : GC_MALLOC((size_t)((new_len + arr->free) * padded_item_size));
+ void *p = data;
+
+ // Copy first chunk of `arr` if needed:
+ if (index > 1) {
+ if (arr->stride == padded_item_size) {
+ p = mempcpy(p, arr->data, (size_t)((index-1)*padded_item_size));
+ } else {
+ for (int64_t i = 0; i < index-1; i++)
+ p = mempcpy(p, arr->data + arr->stride*i, (size_t)padded_item_size);
+ }
+ }
+
+ // Copy `to_insert`
+ if (to_insert.stride == padded_item_size) {
+ p = mempcpy(p, to_insert.data, (size_t)(to_insert.length*padded_item_size));
+ } else {
+ for (int64_t i = 0; i < index-1; i++)
+ p = mempcpy(p, to_insert.data + to_insert.stride*i, (size_t)padded_item_size);
+ }
+
+ // Copy last chunk of `arr` if needed:
+ if (index < arr->length + 1) {
+ if (arr->stride == padded_item_size) {
+ p = mempcpy(p, arr->data + padded_item_size*(index-1), (size_t)((arr->length - index + 1)*padded_item_size));
+ } else {
+ for (int64_t i = index-1; i < arr->length-1; i++)
+ p = mempcpy(p, arr->data + arr->stride*i, (size_t)padded_item_size);
+ }
+ }
+ arr->length = new_len;
+ arr->stride = padded_item_size;
+ arr->data = data;
+ arr->data_refcount = 0;
+ }
+}
+
+public void Array$remove_at(Array_t *arr, Int_t int_index, Int_t int_count, int64_t padded_item_size)
+{
+ int64_t index = Int64$from_int(int_index, false);
+ if (index < 1) index = arr->length + index + 1;
+
+ int64_t count = Int64$from_int(int_count, false);
+ if (index < 1 || index > (int64_t)arr->length || count < 1) return;
+
+ if (count > arr->length - index + 1)
+ count = (arr->length - index) + 1;
+
+ if (index == 1) {
+ arr->data += arr->stride * count;
+ } else if (index + count > arr->length) {
+ if (arr->free >= 0)
+ arr->free += count;
+ } else if (arr->data_refcount != 0 || (int64_t)arr->stride != padded_item_size) {
+ void *copy = arr->atomic ? GC_MALLOC_ATOMIC((size_t)((arr->length-1) * padded_item_size))
+ : GC_MALLOC((size_t)((arr->length-1) * padded_item_size));
+ for (int64_t src = 1, dest = 1; src <= (int64_t)arr->length; src++) {
+ if (src < index || src >= index + count) {
+ memcpy(copy + (dest - 1)*padded_item_size, arr->data + arr->stride*(src - 1), (size_t)padded_item_size);
+ ++dest;
+ }
+ }
+ arr->data = copy;
+ arr->free = 0;
+ arr->data_refcount = 0;
+ } else {
+ memmove((void*)arr->data + (index-1)*padded_item_size, arr->data + (index-1 + count)*padded_item_size,
+ (size_t)((arr->length - index + count - 1)*padded_item_size));
+ arr->free += count;
+ }
+ arr->length -= count;
+ if (arr->length == 0) arr->data = NULL;
+}
+
+public void Array$remove_item(Array_t *arr, void *item, Int_t max_removals, const TypeInfo_t *type)
+{
+ int64_t padded_item_size = get_padded_item_size(type);
+ const Int_t ZERO = (Int_t){.small=(0<<2)|1};
+ const Int_t ONE = (Int_t){.small=(1<<2)|1};
+ const TypeInfo_t *item_type = type->ArrayInfo.item;
+ for (int64_t i = 0; i < arr->length; ) {
+ if (max_removals.small == ZERO.small) // zero
+ break;
+
+ if (generic_equal(item, arr->data + i*arr->stride, item_type)) {
+ Array$remove_at(arr, I(i+1), ONE, padded_item_size);
+ max_removals = Int$minus(max_removals, ONE);
+ } else {
+ i++;
+ }
+ }
+}
+
+public OptionalInt_t Array$find(Array_t arr, void *item, const TypeInfo_t *type)
+{
+ const TypeInfo_t *item_type = type->ArrayInfo.item;
+ for (int64_t i = 0; i < arr.length; i++) {
+ if (generic_equal(item, arr.data + i*arr.stride, item_type))
+ return I(i+1);
+ }
+ return NONE_INT;
+}
+
+public OptionalInt_t Array$first(Array_t arr, Closure_t predicate)
+{
+ bool (*is_good)(void*, void*) = (void*)predicate.fn;
+ for (int64_t i = 0; i < arr.length; i++) {
+ if (is_good(arr.data + i*arr.stride, predicate.userdata))
+ return I(i+1);
+ }
+ return NONE_INT;
+}
+
+public void Array$sort(Array_t *arr, Closure_t comparison, int64_t padded_item_size)
+{
+ if (arr->data_refcount != 0 || (int64_t)arr->stride != padded_item_size)
+ Array$compact(arr, padded_item_size);
+
+ qsort_r(arr->data, (size_t)arr->length, (size_t)padded_item_size, comparison.fn, comparison.userdata);
+}
+
+public Array_t Array$sorted(Array_t arr, Closure_t comparison, int64_t padded_item_size)
+{
+ Array$compact(&arr, padded_item_size);
+ qsort_r(arr.data, (size_t)arr.length, (size_t)padded_item_size, comparison.fn, comparison.userdata);
+ return arr;
+}
+
+public void Array$shuffle(Array_t *arr, RNG_t rng, int64_t padded_item_size)
+{
+ if (arr->data_refcount != 0 || (int64_t)arr->stride != padded_item_size)
+ Array$compact(arr, padded_item_size);
+
+ char tmp[padded_item_size];
+ for (int64_t i = arr->length-1; i > 1; i--) {
+ int64_t j = RNG$int64(rng, 0, i);
+ memcpy(tmp, arr->data + i*padded_item_size, (size_t)padded_item_size);
+ memcpy((void*)arr->data + i*padded_item_size, arr->data + j*padded_item_size, (size_t)padded_item_size);
+ memcpy((void*)arr->data + j*padded_item_size, tmp, (size_t)padded_item_size);
+ }
+}
+
+public Array_t Array$shuffled(Array_t arr, RNG_t rng, int64_t padded_item_size)
+{
+ Array$compact(&arr, padded_item_size);
+ Array$shuffle(&arr, rng, padded_item_size);
+ return arr;
+}
+
+public void *Array$random(Array_t arr, RNG_t rng)
+{
+ if (arr.length == 0)
+ return NULL; // fail("Cannot get a random item from an empty array!");
+
+ int64_t index = RNG$int64(rng, 0, arr.length-1);
+ return arr.data + arr.stride*index;
+}
+
+public Table_t Array$counts(Array_t arr, const TypeInfo_t *type)
+{
+ Table_t counts = {};
+ const TypeInfo_t count_type = *Table$info(type->ArrayInfo.item, &Int$info);
+ for (int64_t i = 0; i < arr.length; i++) {
+ void *key = arr.data + i*arr.stride;
+ int64_t *count = Table$get(counts, key, &count_type);
+ int64_t val = count ? *count + 1 : 1;
+ Table$set(&counts, key, &val, &count_type);
+ }
+ return counts;
+}
+
+public Array_t Array$sample(Array_t arr, Int_t int_n, Array_t weights, RNG_t rng, int64_t padded_item_size)
+{
+ int64_t n = Int64$from_int(int_n, false);
+ if (n < 0)
+ fail("Cannot select a negative number of values");
+
+ if (n == 0)
+ return (Array_t){};
+
+ if (arr.length == 0)
+ fail("There are no elements in this array!");
+
+ Array_t selected = {
+ .data=arr.atomic ? GC_MALLOC_ATOMIC((size_t)(n * padded_item_size)) : GC_MALLOC((size_t)(n * padded_item_size)),
+ .length=n,
+ .stride=padded_item_size, .atomic=arr.atomic};
+
+ if (weights.length < 0) {
+ for (int64_t i = 0; i < n; i++) {
+ int64_t index = RNG$int64(rng, 0, arr.length-1);
+ memcpy(selected.data + i*padded_item_size, arr.data + arr.stride*index, (size_t)padded_item_size);
+ }
+ return selected;
+ }
+
+ if (weights.length != arr.length)
+ fail("Array has %ld elements, but there are %ld weights given", arr.length, weights.length);
+
+ double total = 0.0;
+ for (int64_t i = 0; i < weights.length && i < arr.length; i++) {
+ double weight = *(double*)(weights.data + weights.stride*i);
+ if (isinf(weight))
+ fail("Infinite weight!");
+ else if (isnan(weight))
+ fail("NaN weight!");
+ else if (weight < 0.0)
+ fail("Negative weight!");
+ else
+ total += weight;
+ }
+
+ if (isinf(total))
+ fail("Sample weights have overflowed to infinity");
+
+ if (total == 0.0)
+ fail("None of the given weights are nonzero");
+
+ double inverse_average = (double)arr.length / total;
+
+ struct {
+ int64_t alias;
+ double odds;
+ } aliases[arr.length];
+
+ for (int64_t i = 0; i < arr.length; i++) {
+ double weight = i >= weights.length ? 0.0 : *(double*)(weights.data + weights.stride*i);
+ aliases[i].odds = weight * inverse_average;
+ aliases[i].alias = -1;
+ }
+
+ int64_t small = 0;
+ for (int64_t big = 0; big < arr.length; big++) {
+ while (aliases[big].odds >= 1.0) {
+ while (small < arr.length && (aliases[small].odds >= 1.0 || aliases[small].alias != -1))
+ ++small;
+
+ if (small >= arr.length) {
+ aliases[big].odds = 1.0;
+ aliases[big].alias = big;
+ break;
+ }
+
+ aliases[small].alias = big;
+ aliases[big].odds = (aliases[small].odds + aliases[big].odds) - 1.0;
+ }
+ if (big < small) small = big;
+ }
+
+ for (int64_t i = small; i < arr.length; i++)
+ if (aliases[i].alias == -1)
+ aliases[i].alias = i;
+
+ for (int64_t i = 0; i < n; i++) {
+ double r = RNG$num(rng, 0, arr.length);
+ int64_t index = (int64_t)r;
+ if ((r - (double)index) > aliases[index].odds)
+ index = aliases[index].alias;
+ memcpy(selected.data + i*selected.stride, arr.data + index*arr.stride, (size_t)padded_item_size);
+ }
+ return selected;
+}
+
+public Array_t Array$from(Array_t array, Int_t first)
+{
+ return Array$slice(array, first, I_small(-1));
+}
+
+public Array_t Array$to(Array_t array, Int_t last)
+{
+ return Array$slice(array, I_small(1), last);
+}
+
+public Array_t Array$by(Array_t array, Int_t int_stride, int64_t padded_item_size)
+{
+ int64_t stride = Int64$from_int(int_stride, false);
+ // In the unlikely event that the stride value would be too large to fit in
+ // a 15-bit integer, fall back to creating a copy of the array:
+ if (unlikely(array.stride*stride < ARRAY_MIN_STRIDE || array.stride*stride > ARRAY_MAX_STRIDE)) {
+ void *copy = NULL;
+ int64_t len = (stride < 0 ? array.length / -stride : array.length / stride) + ((array.length % stride) != 0);
+ if (len > 0) {
+ copy = array.atomic ? GC_MALLOC_ATOMIC((size_t)(len * padded_item_size)) : GC_MALLOC((size_t)(len * padded_item_size));
+ void *start = (stride < 0 ? array.data + (array.stride * (array.length - 1)) : array.data);
+ for (int64_t i = 0; i < len; i++)
+ memcpy(copy + i*padded_item_size, start + array.stride*stride*i, (size_t)padded_item_size);
+ }
+ return (Array_t){
+ .data=copy,
+ .length=len,
+ .stride=padded_item_size,
+ .atomic=array.atomic,
+ };
+ }
+
+ if (stride == 0)
+ return (Array_t){.atomic=array.atomic};
+
+ return (Array_t){
+ .atomic=array.atomic,
+ .data=(stride < 0 ? array.data + (array.stride * (array.length - 1)) : array.data),
+ .length=(stride < 0 ? array.length / -stride : array.length / stride) + ((array.length % stride) != 0),
+ .stride=array.stride * stride,
+ .data_refcount=array.data_refcount,
+ };
+}
+
+public Array_t Array$slice(Array_t array, Int_t int_first, Int_t int_last)
+
+{
+ int64_t first = Int64$from_int(int_first, false);
+ if (first < 0)
+ first = array.length + first + 1;
+
+ int64_t last = Int64$from_int(int_last, false);
+ if (last < 0)
+ last = array.length + last + 1;
+
+ if (last > array.length)
+ last = array.length;
+
+ if (first < 1 || first > array.length || last == 0)
+ return (Array_t){.atomic=array.atomic};
+
+ return (Array_t){
+ .atomic=array.atomic,
+ .data=array.data + array.stride*(first-1),
+ .length=last - first + 1,
+ .stride=array.stride,
+ .data_refcount=array.data_refcount,
+ };
+}
+
+public Array_t Array$reversed(Array_t array, int64_t padded_item_size)
+{
+ // Just in case negating the stride gives a value that doesn't fit into a
+ // 15-bit integer, fall back to Array$by()'s more general method of copying
+ // the array. This should only happen if array.stride is MIN_STRIDE to
+ // begin with (very unlikely).
+ if (unlikely(-array.stride < ARRAY_MIN_STRIDE || -array.stride > ARRAY_MAX_STRIDE))
+ return Array$by(array, I(-1), padded_item_size);
+
+ Array_t reversed = array;
+ reversed.stride = -array.stride;
+ reversed.data = array.data + (array.length-1)*array.stride;
+ return reversed;
+}
+
+public Array_t Array$concat(Array_t x, Array_t y, int64_t padded_item_size)
+{
+ void *data = x.atomic ? GC_MALLOC_ATOMIC((size_t)(padded_item_size*(x.length + y.length)))
+ : GC_MALLOC((size_t)(padded_item_size*(x.length + y.length)));
+ if (x.stride == padded_item_size) {
+ memcpy(data, x.data, (size_t)(padded_item_size*x.length));
+ } else {
+ for (int64_t i = 0; i < x.length; i++)
+ memcpy(data + i*padded_item_size, x.data + i*padded_item_size, (size_t)padded_item_size);
+ }
+
+ void *dest = data + padded_item_size*x.length;
+ if (y.stride == padded_item_size) {
+ memcpy(dest, y.data, (size_t)(padded_item_size*y.length));
+ } else {
+ for (int64_t i = 0; i < y.length; i++)
+ memcpy(dest + i*padded_item_size, y.data + i*y.stride, (size_t)padded_item_size);
+ }
+
+ return (Array_t){
+ .data=data,
+ .length=x.length + y.length,
+ .stride=padded_item_size,
+ .atomic=x.atomic,
+ };
+}
+
+public bool Array$has(Array_t array, void *item, const TypeInfo_t *type)
+{
+ const TypeInfo_t *item_type = type->ArrayInfo.item;
+ for (int64_t i = 0; i < array.length; i++) {
+ if (generic_equal(array.data + i*array.stride, item, item_type))
+ return true;
+ }
+ return false;
+}
+
+public void Array$clear(Array_t *array)
+{
+ *array = (Array_t){.data=0, .length=0};
+}
+
+public int32_t Array$compare(const void *vx, const void *vy, const TypeInfo_t *type)
+{
+ const Array_t *x = (Array_t*)vx, *y = (Array_t*)vy;
+ // Early out for arrays with the same data, e.g. two copies of the same array:
+ if (x->data == y->data && x->stride == y->stride)
+ return (x->length > y->length) - (x->length < y->length);
+
+ const TypeInfo_t *item = type->ArrayInfo.item;
+ if (item->tag == PointerInfo || !item->metamethods.compare) { // data comparison
+ int64_t item_padded_size = type->ArrayInfo.item->size;
+ if (type->ArrayInfo.item->align > 1 && item_padded_size % type->ArrayInfo.item->align)
+ errx(1, "Item size is not padded!");
+
+ if ((int64_t)x->stride == item_padded_size && (int64_t)y->stride == item_padded_size && item->size == item_padded_size) {
+ int32_t cmp = (int32_t)memcmp(x->data, y->data, (size_t)(MIN(x->length, y->length)*item_padded_size));
+ if (cmp != 0) return cmp;
+ } else {
+ for (int32_t i = 0, len = MIN(x->length, y->length); i < len; i++) {
+ int32_t cmp = (int32_t)memcmp(x->data+ x->stride*i, y->data + y->stride*i, (size_t)(item->size));
+ if (cmp != 0) return cmp;
+ }
+ }
+ } else {
+ for (int32_t i = 0, len = MIN(x->length, y->length); i < len; i++) {
+ int32_t cmp = generic_compare(x->data + x->stride*i, y->data + y->stride*i, item);
+ if (cmp != 0) return cmp;
+ }
+ }
+ return (x->length > y->length) - (x->length < y->length);
+}
+
+public bool Array$equal(const void *x, const void *y, const TypeInfo_t *type)
+{
+ return x == y || (((Array_t*)x)->length == ((Array_t*)y)->length && Array$compare(x, y, type) == 0);
+}
+
+public Text_t Array$as_text(const void *obj, bool colorize, const TypeInfo_t *type)
+{
+ Array_t *arr = (Array_t*)obj;
+ if (!arr)
+ return Text$concat(Text("["), generic_as_text(NULL, false, type->ArrayInfo.item), Text("]"));
+
+ const TypeInfo_t *item_type = type->ArrayInfo.item;
+ Text_t text = Text("[");
+ for (int64_t i = 0; i < arr->length; i++) {
+ if (i > 0)
+ text = Text$concat(text, Text(", "));
+ Text_t item_text = generic_as_text(arr->data + i*arr->stride, colorize, item_type);
+ text = Text$concat(text, item_text);
+ }
+ text = Text$concat(text, Text("]"));
+ return text;
+}
+
+public uint64_t Array$hash(const void *obj, const TypeInfo_t *type)
+{
+ const Array_t *arr = (Array_t*)obj;
+ const TypeInfo_t *item = type->ArrayInfo.item;
+ siphash sh;
+ siphashinit(&sh, sizeof(uint64_t[arr->length]));
+ if (item->tag == PointerInfo || (!item->metamethods.hash && item->size == sizeof(void*))) { // Raw data hash
+ for (int64_t i = 0; i < arr->length; i++)
+ siphashadd64bits(&sh, (uint64_t)(arr->data + i*arr->stride));
+ } else {
+ for (int64_t i = 0; i < arr->length; i++) {
+ uint64_t item_hash = generic_hash(arr->data + i*arr->stride, item);
+ siphashadd64bits(&sh, item_hash);
+ }
+ }
+ return siphashfinish_last_part(&sh, 0);
+}
+
+static void siftdown(Array_t *heap, int64_t startpos, int64_t pos, Closure_t comparison, int64_t padded_item_size)
+{
+ assert(pos > 0 && pos < heap->length);
+ char newitem[padded_item_size];
+ memcpy(newitem, heap->data + heap->stride*pos, (size_t)(padded_item_size));
+ while (pos > startpos) {
+ int64_t parentpos = (pos - 1) >> 1;
+ typedef int32_t (*cmp_fn_t)(void*, void*, void*);
+ int32_t cmp = ((cmp_fn_t)comparison.fn)(newitem, heap->data + heap->stride*parentpos, comparison.userdata);
+ if (cmp >= 0)
+ break;
+
+ memcpy(heap->data + heap->stride*pos, heap->data + heap->stride*parentpos, (size_t)(padded_item_size));
+ pos = parentpos;
+ }
+ memcpy(heap->data + heap->stride*pos, newitem, (size_t)(padded_item_size));
+}
+
+static void siftup(Array_t *heap, int64_t pos, Closure_t comparison, int64_t padded_item_size)
+{
+ int64_t endpos = heap->length;
+ int64_t startpos = pos;
+ assert(pos < endpos);
+
+ char old_top[padded_item_size];
+ memcpy(old_top, heap->data + heap->stride*pos, (size_t)(padded_item_size));
+ // Bubble up the smallest leaf node
+ int64_t limit = endpos >> 1;
+ while (pos < limit) {
+ int64_t childpos = 2*pos + 1; // Smaller of the two child nodes
+ if (childpos + 1 < endpos) {
+ typedef int32_t (*cmp_fn_t)(void*, void*, void*);
+ int32_t cmp = ((cmp_fn_t)comparison.fn)(
+ heap->data + heap->stride*childpos,
+ heap->data + heap->stride*(childpos + 1),
+ comparison.userdata);
+ childpos += (cmp >= 0);
+ }
+
+ // Move the child node up:
+ memcpy(heap->data + heap->stride*pos, heap->data + heap->stride*childpos, (size_t)(padded_item_size));
+ pos = childpos;
+ }
+ memcpy(heap->data + heap->stride*pos, old_top, (size_t)(padded_item_size));
+ // Shift the node's parents down:
+ siftdown(heap, startpos, pos, comparison, padded_item_size);
+}
+
+public void Array$heap_push(Array_t *heap, const void *item, Closure_t comparison, int64_t padded_item_size)
+{
+ Array$insert(heap, item, I(0), padded_item_size);
+
+ if (heap->length > 1) {
+ if (heap->data_refcount != 0)
+ Array$compact(heap, padded_item_size);
+ siftdown(heap, 0, heap->length-1, comparison, padded_item_size);
+ }
+}
+
+public void Array$heap_pop(Array_t *heap, Closure_t comparison, int64_t padded_item_size)
+{
+ if (heap->length == 0)
+ fail("Attempt to pop from an empty array");
+
+ if (heap->length == 1) {
+ *heap = (Array_t){};
+ } else if (heap->length == 2) {
+ heap->data += heap->stride;
+ --heap->length;
+ } else {
+ if (heap->data_refcount != 0)
+ Array$compact(heap, padded_item_size);
+ memcpy(heap->data, heap->data + heap->stride*(heap->length-1), (size_t)(padded_item_size));
+ --heap->length;
+ siftup(heap, 0, comparison, padded_item_size);
+ }
+}
+
+public void Array$heapify(Array_t *heap, Closure_t comparison, int64_t padded_item_size)
+{
+ if (heap->data_refcount != 0)
+ Array$compact(heap, padded_item_size);
+
+ // It's necessary to bump the refcount because the user's comparison
+ // function could do stuff that modifies the heap's data.
+ ARRAY_INCREF(*heap);
+ int64_t i, n = heap->length;
+ for (i = (n >> 1) - 1 ; i >= 0 ; i--)
+ siftup(heap, i, comparison, padded_item_size);
+ ARRAY_DECREF(*heap);
+}
+
+public Int_t Array$binary_search(Array_t array, void *target, Closure_t comparison)
+{
+ typedef int32_t (*cmp_fn_t)(void*, void*, void*);
+ int64_t lo = 0, hi = array.length-1;
+ while (lo <= hi) {
+ int64_t mid = (lo + hi) / 2;
+ int32_t cmp = ((cmp_fn_t)comparison.fn)(
+ array.data + array.stride*mid, target, comparison.userdata);
+ if (cmp == 0)
+ return I(mid+1);
+ else if (cmp < 0)
+ lo = mid + 1;
+ else if (cmp > 0)
+ hi = mid - 1;
+ }
+ return I(lo+1); // Return the index where the target would be inserted
+}
+
+public PUREFUNC bool Array$is_none(const void *obj, const TypeInfo_t*)
+{
+ return ((Array_t*)obj)->length < 0;
+}
+
+public void Array$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *type)
+{
+ Array_t arr = *(Array_t*)obj;
+ int64_t len = arr.length;
+ Int64$serialize(&len, out, pointers, &Int64$info);
+ auto item_serialize = type->ArrayInfo.item->metamethods.serialize;
+ if (item_serialize) {
+ for (int64_t i = 0; i < len; i++)
+ item_serialize(arr.data + i*arr.stride, out, pointers, type->ArrayInfo.item);
+ } else if (arr.stride == type->ArrayInfo.item->size) {
+ fwrite(arr.data, (size_t)type->ArrayInfo.item->size, (size_t)len, out);
+ } else {
+ for (int64_t i = 0; i < len; i++)
+ fwrite(arr.data + i*arr.stride, (size_t)type->ArrayInfo.item->size, 1, out);
+ }
+}
+
+public void Array$deserialize(FILE *in, void *obj, Array_t *pointers, const TypeInfo_t *type)
+{
+ int64_t len = -1;
+ Int64$deserialize(in, &len, pointers, &Int64$info);
+ int64_t padded_size = type->ArrayInfo.item->size;
+ if (type->ArrayInfo.item->align > 0 && padded_size % type->ArrayInfo.item->align > 0)
+ padded_size += type->ArrayInfo.item->align - (padded_size % type->ArrayInfo.item->align);
+ Array_t arr = {
+ .length=len,
+ .data=GC_MALLOC((size_t)(len*padded_size)),
+ .stride=padded_size,
+ };
+ auto item_deserialize = type->ArrayInfo.item->metamethods.deserialize;
+ if (item_deserialize) {
+ for (int64_t i = 0; i < len; i++)
+ item_deserialize(in, arr.data + i*arr.stride, pointers, type->ArrayInfo.item);
+ } else if (arr.stride == type->ArrayInfo.item->size) {
+ fread(arr.data, (size_t)type->ArrayInfo.item->size, (size_t)len, in);
+ } else {
+ for (int64_t i = 0; i < len; i++)
+ fread(arr.data + i*arr.stride, (size_t)type->ArrayInfo.item->size, 1, in);
+ }
+ *(Array_t*)obj = arr;
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/arrays.h b/src/stdlib/arrays.h
new file mode 100644
index 00000000..dc7efee6
--- /dev/null
+++ b/src/stdlib/arrays.h
@@ -0,0 +1,137 @@
+#pragma once
+
+// Functions that operate on arrays
+
+#include <stdbool.h>
+
+#include "datatypes.h"
+#include "integers.h"
+#include "types.h"
+#include "util.h"
+
+// Convert negative indices to back-indexed without branching: index0 = index + (index < 0)*(len+1)) - 1
+#define Array_get(item_type, arr_expr, index_expr, start, end) *({ \
+ const Array_t arr = arr_expr; int64_t index = index_expr; \
+ int64_t off = index + (index < 0) * (arr.length + 1) - 1; \
+ if (unlikely(off < 0 || off >= arr.length)) \
+ fail_source(__SOURCE_FILE__, start, end, "Invalid array index: %s (array has length %ld)\n", Text$as_c_string(Int64$as_text(&index, no, NULL)), arr.length); \
+ (item_type*)(arr.data + arr.stride * off);})
+#define Array_get_unchecked(type, x, i) *({ const Array_t arr = x; int64_t index = i; \
+ int64_t off = index + (index < 0) * (arr.length + 1) - 1; \
+ (type*)(arr.data + arr.stride * off);})
+#define Array_lvalue(item_type, arr_expr, index_expr, start, end) *({ \
+ Array_t *arr = arr_expr; int64_t index = index_expr; \
+ int64_t off = index + (index < 0) * (arr->length + 1) - 1; \
+ if (unlikely(off < 0 || off >= arr->length)) \
+ fail_source(__SOURCE_FILE__, start, end, "Invalid array index: %s (array has length %ld)\n", Text$as_c_string(Int64$as_text(&index, no, NULL)), arr->length); \
+ if (arr->data_refcount > 0) \
+ Array$compact(arr, sizeof(item_type)); \
+ (item_type*)(arr->data + arr->stride * off); })
+#define Array_lvalue_unchecked(item_type, arr_expr, index_expr) *({ \
+ Array_t *arr = arr_expr; int64_t index = index_expr; \
+ int64_t off = index + (index < 0) * (arr->length + 1) - 1; \
+ if (arr->data_refcount > 0) \
+ Array$compact(arr, sizeof(item_type)); \
+ (item_type*)(arr->data + arr->stride * off); })
+#define Array_set(item_type, arr, index, value, start, end) \
+ Array_lvalue(item_type, arr_expr, index, start, end) = value
+#define is_atomic(x) _Generic(x, bool: true, int8_t: true, int16_t: true, int32_t: true, int64_t: true, float: true, double: true, default: false)
+#define TypedArray(t, ...) ({ t items[] = {__VA_ARGS__}; \
+ (Array_t){.length=sizeof(items)/sizeof(items[0]), \
+ .stride=(int64_t)&items[1] - (int64_t)&items[0], \
+ .data=memcpy(GC_MALLOC(sizeof(items)), items, sizeof(items)), \
+ .atomic=0, \
+ .data_refcount=0}; })
+#define TypedArrayN(t, N, ...) ({ t items[N] = {__VA_ARGS__}; \
+ (Array_t){.length=N, \
+ .stride=(int64_t)&items[1] - (int64_t)&items[0], \
+ .data=memcpy(GC_MALLOC(sizeof(items)), items, sizeof(items)), \
+ .atomic=0, \
+ .data_refcount=0}; })
+#define Array(x, ...) ({ __typeof(x) items[] = {x, __VA_ARGS__}; \
+ (Array_t){.length=sizeof(items)/sizeof(items[0]), \
+ .stride=(int64_t)&items[1] - (int64_t)&items[0], \
+ .data=memcpy(is_atomic(x) ? GC_MALLOC_ATOMIC(sizeof(items)) : GC_MALLOC(sizeof(items)), items, sizeof(items)), \
+ .atomic=is_atomic(x), \
+ .data_refcount=0}; })
+// Array refcounts use a saturating add, where once it's at the max value, it stays there.
+#define ARRAY_INCREF(arr) (arr).data_refcount += ((arr).data_refcount < ARRAY_MAX_DATA_REFCOUNT)
+#define ARRAY_DECREF(arr) (arr).data_refcount -= ((arr).data_refcount < ARRAY_MAX_DATA_REFCOUNT)
+#define ARRAY_COPY(arr) ({ ARRAY_INCREF(arr); arr; })
+
+#define Array$insert_value(arr, item_expr, index, padded_item_size) Array$insert(arr, (__typeof(item_expr)[1]){item_expr}, index, padded_item_size)
+void Array$insert(Array_t *arr, const void *item, Int_t index, int64_t padded_item_size);
+void Array$insert_all(Array_t *arr, Array_t to_insert, Int_t index, int64_t padded_item_size);
+void Array$remove_at(Array_t *arr, Int_t index, Int_t count, int64_t padded_item_size);
+void Array$remove_item(Array_t *arr, void *item, Int_t max_removals, const TypeInfo_t *type);
+#define Array$remove_item_value(arr, item_expr, max, type) Array$remove_item(arr, (__typeof(item_expr)[1]){item_expr}, max, type)
+
+#define Array$pop(arr_expr, index_expr, item_type, nonnone_var, nonnone_expr, none_expr) ({ \
+ Array_t *arr = arr_expr; \
+ Int_t index = index_expr; \
+ int64_t index64 = Int64$from_int(index, false); \
+ int64_t off = index64 + (index64 < 0) * (arr->length + 1) - 1; \
+ (off >= 0 && off < arr->length) ? ({ \
+ item_type nonnone_var = *(item_type*)(arr->data + off*arr->stride); \
+ Array$remove_at(arr, index, I_small(1), sizeof(item_type)); \
+ nonnone_expr; \
+ }) : none_expr; })
+
+OptionalInt_t Array$find(Array_t arr, void *item, const TypeInfo_t *type);
+#define Array$find_value(arr, item_expr, type) ({ __typeof(item_expr) item = item_expr; Array$find(arr, &item, type); })
+OptionalInt_t Array$first(Array_t arr, Closure_t predicate);
+void Array$sort(Array_t *arr, Closure_t comparison, int64_t padded_item_size);
+Array_t Array$sorted(Array_t arr, Closure_t comparison, int64_t padded_item_size);
+void Array$shuffle(Array_t *arr, RNG_t rng, int64_t padded_item_size);
+Array_t Array$shuffled(Array_t arr, RNG_t rng, int64_t padded_item_size);
+void *Array$random(Array_t arr, RNG_t rng);
+#define Array$random_value(arr, rng, t) ({ Array_t _arr = arr; if (_arr.length == 0) fail("Cannot get a random value from an empty array!"); *(t*)Array$random(_arr, rng); })
+Array_t Array$sample(Array_t arr, Int_t n, Array_t weights, RNG_t rng, int64_t padded_item_size);
+Table_t Array$counts(Array_t arr, const TypeInfo_t *type);
+void Array$clear(Array_t *array);
+void Array$compact(Array_t *arr, int64_t padded_item_size);
+PUREFUNC bool Array$has(Array_t array, void *item, const TypeInfo_t *type);
+#define Array$has_value(arr, item_expr, type) ({ __typeof(item_expr) item = item_expr; Array$has(arr, &item, type); })
+PUREFUNC Array_t Array$from(Array_t array, Int_t first);
+PUREFUNC Array_t Array$to(Array_t array, Int_t last);
+PUREFUNC Array_t Array$by(Array_t array, Int_t stride, int64_t padded_item_size);
+PUREFUNC Array_t Array$slice(Array_t array, Int_t int_first, Int_t int_last);
+PUREFUNC Array_t Array$reversed(Array_t array, int64_t padded_item_size);
+Array_t Array$concat(Array_t x, Array_t y, int64_t padded_item_size);
+PUREFUNC uint64_t Array$hash(const void *arr, const TypeInfo_t *type);
+PUREFUNC int32_t Array$compare(const void *x, const void *y, const TypeInfo_t *type);
+PUREFUNC bool Array$equal(const void *x, const void *y, const TypeInfo_t *type);
+PUREFUNC bool Array$is_none(const void *obj, const TypeInfo_t*);
+Text_t Array$as_text(const void *arr, bool colorize, const TypeInfo_t *type);
+void Array$heapify(Array_t *heap, Closure_t comparison, int64_t padded_item_size);
+void Array$heap_push(Array_t *heap, const void *item, Closure_t comparison, int64_t padded_item_size);
+#define Array$heap_push_value(heap, _value, comparison, padded_item_size) ({ __typeof(_value) value = _value; Array$heap_push(heap, &value, comparison, padded_item_size); })
+void Array$heap_pop(Array_t *heap, Closure_t comparison, int64_t padded_item_size);
+#define Array$heap_pop_value(heap, comparison, type, nonnone_var, nonnone_expr, none_expr) \
+ ({ Array_t *_heap = heap; \
+ (_heap->length > 0) ? ({ \
+ type nonnone_var = *(type*)_heap->data; \
+ Array$heap_pop(_heap, comparison, sizeof(type)); \
+ nonnone_expr; \
+ }) : none_expr; })
+Int_t Array$binary_search(Array_t array, void *target, Closure_t comparison);
+#define Array$binary_search_value(array, target, comparison) \
+ ({ __typeof(target) _target = target; Array$binary_search(array, &_target, comparison); })
+void Array$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *type);
+void Array$deserialize(FILE *in, void *obj, Array_t *pointers, const TypeInfo_t *type);
+
+#define Array$metamethods { \
+ .as_text=Array$as_text, \
+ .compare=Array$compare, \
+ .equal=Array$equal, \
+ .hash=Array$hash, \
+ .is_none=Array$is_none, \
+ .serialize=Array$serialize, \
+ .deserialize=Array$deserialize, \
+}
+
+#define Array$info(item_info) &((TypeInfo_t){.size=sizeof(Array_t), .align=__alignof__(Array_t), \
+ .tag=ArrayInfo, .ArrayInfo.item=item_info, \
+ .metamethods=Array$metamethods})
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/bools.c b/src/stdlib/bools.c
new file mode 100644
index 00000000..bf820664
--- /dev/null
+++ b/src/stdlib/bools.c
@@ -0,0 +1,57 @@
+// Boolean methods/type info
+#include <ctype.h>
+#include <err.h>
+#include <gc.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/param.h>
+
+#include "bools.h"
+#include "optionals.h"
+#include "text.h"
+#include "util.h"
+
+PUREFUNC public Text_t Bool$as_text(const void *b, bool colorize, const TypeInfo_t*)
+{
+ if (!b) return Text("Bool");
+ if (colorize)
+ return *(Bool_t*)b ? Text("\x1b[35myes\x1b[m") : Text("\x1b[35mno\x1b[m");
+ else
+ return *(Bool_t*)b ? Text("yes") : Text("no");
+}
+
+PUREFUNC public OptionalBool_t Bool$parse(Text_t text)
+{
+ if (Text$equal_ignoring_case(text, Text("yes"), NONE_TEXT)
+ || Text$equal_ignoring_case(text, Text("on"), NONE_TEXT)
+ || Text$equal_ignoring_case(text, Text("true"), NONE_TEXT)
+ || Text$equal_ignoring_case(text, Text("1"), NONE_TEXT)) {
+ return yes;
+ } else if (Text$equal_ignoring_case(text, Text("no"), NONE_TEXT)
+ || Text$equal_ignoring_case(text, Text("off"), NONE_TEXT)
+ || Text$equal_ignoring_case(text, Text("false"), NONE_TEXT)
+ || Text$equal_ignoring_case(text, Text("0"), NONE_TEXT)) {
+ return no;
+ } else {
+ return NONE_BOOL;
+ }
+}
+
+static bool Bool$is_none(const void *b, const TypeInfo_t*)
+{
+ return *(OptionalBool_t*)b == NONE_BOOL;
+}
+
+static const metamethods_t Bool$metamethods = {
+ .as_text=Bool$as_text,
+ .is_none=Bool$is_none,
+};
+
+public const TypeInfo_t Bool$info = {
+ .size=sizeof(bool),
+ .align=__alignof__(bool),
+ .metamethods=Bool$metamethods,
+};
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/bools.h b/src/stdlib/bools.h
new file mode 100644
index 00000000..6d0300d5
--- /dev/null
+++ b/src/stdlib/bools.h
@@ -0,0 +1,26 @@
+#pragma once
+
+// Boolean functions/type info
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "types.h"
+#include "optionals.h"
+#include "util.h"
+
+#define yes (Bool_t)true
+#define no (Bool_t)false
+
+PUREFUNC Text_t Bool$as_text(const void *b, bool colorize, const TypeInfo_t *type);
+OptionalBool_t Bool$parse(Text_t text);
+MACROLIKE Bool_t Bool$from_int(Int_t i) { return (i.small != 0); }
+MACROLIKE Bool_t Bool$from_int64(Int64_t i) { return (i != 0); }
+MACROLIKE Bool_t Bool$from_int32(Int32_t i) { return (i != 0); }
+MACROLIKE Bool_t Bool$from_int16(Int16_t i) { return (i != 0); }
+MACROLIKE Bool_t Bool$from_int8(Int8_t i) { return (i != 0); }
+MACROLIKE Bool_t Bool$from_byte(uint8_t b) { return (b != 0); }
+
+extern const TypeInfo_t Bool$info;
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/bytes.c b/src/stdlib/bytes.c
new file mode 100644
index 00000000..b24a721b
--- /dev/null
+++ b/src/stdlib/bytes.c
@@ -0,0 +1,64 @@
+// The logic for unsigned bytes
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "bytes.h"
+#include "stdlib.h"
+#include "text.h"
+#include "util.h"
+
+public const Byte_t Byte$min = 0;
+public const Byte_t Byte$max = UINT8_MAX;
+
+PUREFUNC public Text_t Byte$as_text(const void *b, bool colorize, const TypeInfo_t*)
+{
+ if (!b) return Text("Byte");
+ return Text$format(colorize ? "\x1b[35m0x%02X\x1b[m" : "0x%02X", *(Byte_t*)b);
+}
+
+public Text_t Byte$hex(Byte_t byte, bool uppercase, bool prefix) {
+ struct Text_s text = {.tag=TEXT_ASCII};
+ text.ascii = GC_MALLOC_ATOMIC(8);
+ if (prefix && uppercase)
+ text.length = (int64_t)snprintf((char*)text.ascii, 8, "0x%02X", byte);
+ else if (prefix && !uppercase)
+ text.length = (int64_t)snprintf((char*)text.ascii, 8, "0x%02x", byte);
+ else if (!prefix && uppercase)
+ text.length = (int64_t)snprintf((char*)text.ascii, 8, "%02X", byte);
+ else if (!prefix && !uppercase)
+ text.length = (int64_t)snprintf((char*)text.ascii, 8, "%02x", byte);
+ return text;
+}
+
+public PUREFUNC Byte_t Byte$from_int(Int_t i, bool truncate) {
+ if unlikely (truncate && Int$compare_value(i, I_small(0xFF)) > 0)
+ fail("This value is too large to convert to a byte without truncation: %k", (Text_t[1]){Int$value_as_text(i)});
+ else if unlikely (truncate && Int$compare_value(i, I_small(0)) < 0)
+ fail("Negative values can't be converted to bytes: %k", (Text_t[1]){Int$value_as_text(i)});
+ return (i.small != 0);
+}
+public PUREFUNC Byte_t Byte$from_int64(Int64_t i, bool truncate) {
+ if unlikely (truncate && i != (Int64_t)(Byte_t)i)
+ fail("This value can't be converted to a byte without truncation: %ld", i);
+ return (Byte_t)i;
+}
+public PUREFUNC Byte_t Byte$from_int32(Int32_t i, bool truncate) {
+ if unlikely (truncate && i != (Int32_t)(Byte_t)i)
+ fail("This value can't be converted to a byte without truncation: %d", i);
+ return (Byte_t)i;
+}
+public PUREFUNC Byte_t Byte$from_int16(Int16_t i, bool truncate) {
+ if unlikely (truncate && i != (Int16_t)(Byte_t)i)
+ fail("This value can't be converted to a byte without truncation: %d", i);
+ return (Byte_t)i;
+}
+
+public const TypeInfo_t Byte$info = {
+ .size=sizeof(Byte_t),
+ .align=__alignof__(Byte_t),
+ .metamethods={
+ .as_text=Byte$as_text,
+ },
+};
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/bytes.h b/src/stdlib/bytes.h
new file mode 100644
index 00000000..ac1b61a3
--- /dev/null
+++ b/src/stdlib/bytes.h
@@ -0,0 +1,38 @@
+#pragma once
+
+// An unsigned byte datatype
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "datatypes.h"
+#include "stdlib.h"
+#include "types.h"
+#include "util.h"
+
+#define Byte(b) ((Byte_t)(b))
+
+PUREFUNC Text_t Byte$as_text(const void *b, bool colorize, const TypeInfo_t *type);
+
+Byte_t Byte$from_int(Int_t i, bool truncate);
+Byte_t Byte$from_int64(int64_t i, bool truncate);
+Byte_t Byte$from_int32(int32_t i, bool truncate);
+Byte_t Byte$from_int16(int16_t i, bool truncate);
+MACROLIKE Byte_t Byte$from_int8(int8_t i) { return (Byte_t)i; }
+MACROLIKE Byte_t Byte$from_bool(bool b) { return (Byte_t)b; }
+
+extern const Byte_t Byte$min;
+extern const Byte_t Byte$max;
+
+extern const TypeInfo_t Byte$info;
+
+Text_t Byte$hex(Byte_t byte, bool uppercase, bool prefix);
+
+typedef struct {
+ Byte_t value;
+ bool has_value:1;
+} OptionalByte_t;
+
+#define NONE_BYTE ((OptionalByte_t){.has_value=false})
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/c_strings.c b/src/stdlib/c_strings.c
new file mode 100644
index 00000000..7987a234
--- /dev/null
+++ b/src/stdlib/c_strings.c
@@ -0,0 +1,86 @@
+// Type info and methods for CString datatype (char*)
+#include <ctype.h>
+#include <err.h>
+#include <gc.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "text.h"
+#include "siphash.h"
+#include "util.h"
+
+public Text_t CString$as_text(const void *c_string, bool colorize, const TypeInfo_t *info)
+{
+ (void)info;
+ if (!c_string) return Text("CString");
+ Text_t text = Text$from_str(*(const char**)c_string);
+ return Text$concat(colorize ? Text("\x1b[34mCString\x1b[m(") : Text("CString("), Text$quoted(text, colorize), Text(")"));
+}
+
+public Text_t CString$as_text_simple(const char *str)
+{
+ return Text$format("%s", str);
+}
+
+PUREFUNC public int32_t CString$compare(const void *x, const void *y, const TypeInfo_t*)
+{
+ if (x == y)
+ return 0;
+
+ if (!*(const char**)x != !*(const char**)y)
+ return (!*(const char**)y) - (!*(const char**)x);
+
+ return strcmp(*(const char**)x, *(const char**)y);
+}
+
+PUREFUNC public bool CString$equal(const void *x, const void *y, const TypeInfo_t *type)
+{
+ return CString$compare(x, y, type) == 0;
+}
+
+PUREFUNC public uint64_t CString$hash(const void *c_str, const TypeInfo_t*)
+{
+ if (!*(const char**)c_str) return 0;
+ return siphash24(*(void**)c_str, strlen(*(const char**)c_str));
+}
+
+PUREFUNC public bool CString$is_none(const void *c_str, const TypeInfo_t*)
+{
+ return *(const char**)c_str == NULL;
+}
+
+static void CString$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t*)
+{
+ const char *str = *(const char **)obj;
+ int64_t len = (int64_t)strlen(str);
+ Int64$serialize(&len, out, pointers, &Int64$info);
+ fwrite(str, sizeof(char), (size_t)len, out);
+}
+
+static void CString$deserialize(FILE *in, void *out, Array_t *pointers, const TypeInfo_t *)
+{
+ int64_t len = -1;
+ Int64$deserialize(in, &len, pointers, &Int64$info);
+ char *str = GC_MALLOC_ATOMIC((size_t)len+1);
+ fread(str, sizeof(char), (size_t)len, in);
+ str[len+1] = '\0';
+ *(const char**)out = str;
+}
+
+public const TypeInfo_t CString$info = {
+ .size=sizeof(const char*),
+ .align=__alignof__(const char*),
+ .metamethods={
+ .hash=CString$hash,
+ .compare=CString$compare,
+ .equal=CString$equal,
+ .as_text=CString$as_text,
+ .is_none=CString$is_none,
+ .serialize=CString$serialize,
+ .deserialize=CString$deserialize,
+ },
+};
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/c_strings.h b/src/stdlib/c_strings.h
new file mode 100644
index 00000000..24cf99da
--- /dev/null
+++ b/src/stdlib/c_strings.h
@@ -0,0 +1,18 @@
+#pragma once
+
+// Type info and methods for CString datatype, which represents C's `char*`
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "types.h"
+
+Text_t CString$as_text(const char **str, bool colorize, const TypeInfo_t *info);
+Text_t CString$as_text_simple(const char *str);
+PUREFUNC int CString$compare(const void *x, const void *y, const TypeInfo_t *type);
+PUREFUNC bool CString$equal(const void *x, const void *y, const TypeInfo_t *type);
+PUREFUNC uint64_t CString$hash(const void *str, const TypeInfo_t *type);
+
+extern const TypeInfo_t CString$info;
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/chacha.h b/src/stdlib/chacha.h
new file mode 100644
index 00000000..69d79ea3
--- /dev/null
+++ b/src/stdlib/chacha.h
@@ -0,0 +1,201 @@
+/*
+chacha-merged.c version 20080118
+D. J. Bernstein
+Public domain.
+*/
+
+/* $OpenBSD: chacha_private.h,v 1.3 2022/02/28 21:56:29 dtucker Exp $ */
+/* Tomo: chacha.h,v 1.0 2024/11/03 Bruce Hill */
+
+typedef unsigned char u8;
+typedef unsigned int u32;
+
+typedef struct
+{
+ u32 input[16]; /* could be compressed */
+} chacha_ctx;
+
+#define U8C(v) (v##U)
+#define U32C(v) (v##U)
+
+#define U8V(v) ((u8)(v) & U8C(0xFF))
+#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
+
+#define ROTL32(v, n) \
+ (U32V((v) << (n)) | ((v) >> (32 - (n))))
+
+#define U8TO32_LITTLE(p) \
+ (((u32)((p)[0]) ) | \
+ ((u32)((p)[1]) << 8) | \
+ ((u32)((p)[2]) << 16) | \
+ ((u32)((p)[3]) << 24))
+
+#define U32TO8_LITTLE(p, v) \
+ do { \
+ (p)[0] = U8V((v) ); \
+ (p)[1] = U8V((v) >> 8); \
+ (p)[2] = U8V((v) >> 16); \
+ (p)[3] = U8V((v) >> 24); \
+ } while (0)
+
+#define ROTATE(v, c) (ROTL32(v, c))
+#define XOR(v, w) ((v) ^ (w))
+#define PLUS(v, w) (U32V((v) + (w)))
+#define PLUSONE(v) (PLUS((v), 1))
+
+#define QUARTERROUND(a, b, c, d) \
+ a = PLUS(a, b); d = ROTATE(XOR(d, a), 16); \
+ c = PLUS(c, d); b = ROTATE(XOR(b, c), 12); \
+ a = PLUS(a, b); d = ROTATE(XOR(d, a), 8); \
+ c = PLUS(c, d); b = ROTATE(XOR(b, c), 7);
+
+static const char sigma[16] = "expand 32-byte k";
+static const char tau[16] = "expand 16-byte k";
+
+static void
+chacha_keysetup(chacha_ctx *chacha, const u8 *k, u32 kbits)
+{
+ const char *constants;
+
+ chacha->input[4] = U8TO32_LITTLE(k + 0);
+ chacha->input[5] = U8TO32_LITTLE(k + 4);
+ chacha->input[6] = U8TO32_LITTLE(k + 8);
+ chacha->input[7] = U8TO32_LITTLE(k + 12);
+ if (kbits == 256) { /* recommended */
+ k += 16;
+ constants = sigma;
+ } else { /* kbits == 128 */
+ constants = tau;
+ }
+ chacha->input[8] = U8TO32_LITTLE(k + 0);
+ chacha->input[9] = U8TO32_LITTLE(k + 4);
+ chacha->input[10] = U8TO32_LITTLE(k + 8);
+ chacha->input[11] = U8TO32_LITTLE(k + 12);
+ chacha->input[0] = U8TO32_LITTLE(constants + 0);
+ chacha->input[1] = U8TO32_LITTLE(constants + 4);
+ chacha->input[2] = U8TO32_LITTLE(constants + 8);
+ chacha->input[3] = U8TO32_LITTLE(constants + 12);
+}
+
+static void
+chacha_ivsetup(chacha_ctx *chacha, const u8 *iv)
+{
+ chacha->input[12] = 0;
+ chacha->input[13] = 0;
+ chacha->input[14] = U8TO32_LITTLE(iv + 0);
+ chacha->input[15] = U8TO32_LITTLE(iv + 4);
+}
+
+static void
+chacha_encrypt_bytes(chacha_ctx *chacha, const u8 *m, u8 *c, u32 bytes)
+{
+ u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+ u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
+ u8 *ctarget = NULL;
+ u8 tmp[64];
+ u_int i;
+
+ if (!bytes) return;
+
+ j0 = chacha->input[0];
+ j1 = chacha->input[1];
+ j2 = chacha->input[2];
+ j3 = chacha->input[3];
+ j4 = chacha->input[4];
+ j5 = chacha->input[5];
+ j6 = chacha->input[6];
+ j7 = chacha->input[7];
+ j8 = chacha->input[8];
+ j9 = chacha->input[9];
+ j10 = chacha->input[10];
+ j11 = chacha->input[11];
+ j12 = chacha->input[12];
+ j13 = chacha->input[13];
+ j14 = chacha->input[14];
+ j15 = chacha->input[15];
+
+ for (;;) {
+ if (bytes < 64) {
+ for (i = 0;i < bytes;++i) tmp[i] = m[i];
+ m = tmp;
+ ctarget = c;
+ c = tmp;
+ }
+ x0 = j0;
+ x1 = j1;
+ x2 = j2;
+ x3 = j3;
+ x4 = j4;
+ x5 = j5;
+ x6 = j6;
+ x7 = j7;
+ x8 = j8;
+ x9 = j9;
+ x10 = j10;
+ x11 = j11;
+ x12 = j12;
+ x13 = j13;
+ x14 = j14;
+ x15 = j15;
+ for (i = 20;i > 0;i -= 2) {
+ QUARTERROUND( x0, x4, x8, x12)
+ QUARTERROUND( x1, x5, x9, x13)
+ QUARTERROUND( x2, x6, x10, x14)
+ QUARTERROUND( x3, x7, x11, x15)
+ QUARTERROUND( x0, x5, x10, x15)
+ QUARTERROUND( x1, x6, x11, x12)
+ QUARTERROUND( x2, x7, x8, x13)
+ QUARTERROUND( x3, x4, x9, x14)
+ }
+ x0 = PLUS(x0, j0);
+ x1 = PLUS(x1, j1);
+ x2 = PLUS(x2, j2);
+ x3 = PLUS(x3, j3);
+ x4 = PLUS(x4, j4);
+ x5 = PLUS(x5, j5);
+ x6 = PLUS(x6, j6);
+ x7 = PLUS(x7, j7);
+ x8 = PLUS(x8, j8);
+ x9 = PLUS(x9, j9);
+ x10 = PLUS(x10, j10);
+ x11 = PLUS(x11, j11);
+ x12 = PLUS(x12, j12);
+ x13 = PLUS(x13, j13);
+ x14 = PLUS(x14, j14);
+ x15 = PLUS(x15, j15);
+
+ j12 = PLUSONE(j12);
+ if (!j12) {
+ j13 = PLUSONE(j13);
+ /* stopping at 2^70 bytes per nonce is user's responsibility */
+ }
+
+ U32TO8_LITTLE(c + 0, x0);
+ U32TO8_LITTLE(c + 4, x1);
+ U32TO8_LITTLE(c + 8, x2);
+ U32TO8_LITTLE(c + 12, x3);
+ U32TO8_LITTLE(c + 16, x4);
+ U32TO8_LITTLE(c + 20, x5);
+ U32TO8_LITTLE(c + 24, x6);
+ U32TO8_LITTLE(c + 28, x7);
+ U32TO8_LITTLE(c + 32, x8);
+ U32TO8_LITTLE(c + 36, x9);
+ U32TO8_LITTLE(c + 40, x10);
+ U32TO8_LITTLE(c + 44, x11);
+ U32TO8_LITTLE(c + 48, x12);
+ U32TO8_LITTLE(c + 52, x13);
+ U32TO8_LITTLE(c + 56, x14);
+ U32TO8_LITTLE(c + 60, x15);
+
+ if (bytes <= 64) {
+ if (bytes < 64) {
+ for (i = 0;i < bytes;++i) ctarget[i] = c[i];
+ }
+ chacha->input[12] = j12;
+ chacha->input[13] = j13;
+ return;
+ }
+ bytes -= 64;
+ c += 64;
+ }
+}
diff --git a/src/stdlib/datatypes.h b/src/stdlib/datatypes.h
new file mode 100644
index 00000000..b81ff741
--- /dev/null
+++ b/src/stdlib/datatypes.h
@@ -0,0 +1,120 @@
+#pragma once
+
+// Common datastructures (arrays, tables, closures)
+
+#include <gmp.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <time.h>
+
+#define ARRAY_LENGTH_BITS 42
+#define ARRAY_FREE_BITS 6
+#define ARRAY_REFCOUNT_BITS 3
+#define ARRAY_STRIDE_BITS 12
+
+#define MAX_FOR_N_BITS(N) ((1<<(N))-1)
+#define ARRAY_MAX_STRIDE MAX_FOR_N_BITS(ARRAY_STRIDE_BITS-1)
+#define ARRAY_MIN_STRIDE (~MAX_FOR_N_BITS(ARRAY_STRIDE_BITS-1))
+#define ARRAY_MAX_DATA_REFCOUNT MAX_FOR_N_BITS(ARRAY_REFCOUNT_BITS)
+#define ARRAY_MAX_FREE_ENTRIES MAX_FOR_N_BITS(ARRAY_FREE_BITS)
+
+#define Num_t double
+#define Num32_t float
+
+#define Int64_t int64_t
+#define Int32_t int32_t
+#define Int16_t int16_t
+#define Int8_t int8_t
+#define Byte_t uint8_t
+#define Bool_t bool
+
+typedef union {
+ int64_t small;
+ mpz_t *big;
+} Int_t;
+
+typedef struct {
+ void *data;
+ // All of the following fields add up to 64 bits, which means that array
+ // structs can be passed in two 64-bit registers. C will handle doing the
+ // bit arithmetic to extract the necessary values, which is cheaper than
+ // spilling onto the stack and needing to retrieve data from the stack.
+ int64_t length:ARRAY_LENGTH_BITS;
+ uint8_t free:ARRAY_FREE_BITS;
+ bool atomic:1;
+ uint8_t data_refcount:ARRAY_REFCOUNT_BITS;
+ int16_t stride:ARRAY_STRIDE_BITS;
+} Array_t;
+
+typedef struct {
+ uint32_t occupied:1, index:31;
+ uint32_t next_bucket;
+} bucket_t;
+
+#define TABLE_MAX_BUCKETS 0x7fffffff
+#define TABLE_MAX_DATA_REFCOUNT 3
+
+typedef struct {
+ uint32_t count:31, last_free:31;
+ uint8_t data_refcount:2;
+ bucket_t buckets[];
+} bucket_info_t;
+
+typedef struct table_s {
+ Array_t entries;
+ uint64_t hash;
+ bucket_info_t *bucket_info;
+ struct table_s *fallback;
+} Table_t;
+
+typedef struct {
+ void *fn, *userdata;
+} Closure_t;
+
+enum text_type { TEXT_ASCII, TEXT_GRAPHEMES, TEXT_CONCAT };
+
+typedef struct Text_s {
+ int64_t length:54; // Number of grapheme clusters
+ uint8_t tag:2;
+ uint8_t depth:8;
+ union {
+ struct {
+ const char *ascii;
+ // char ascii_buf[8];
+ };
+ struct {
+ const int32_t *graphemes;
+ // int32_t grapheme_buf[2];
+ };
+ struct {
+ const struct Text_s *left, *right;
+ };
+ };
+} Text_t;
+
+#define Pattern_t Text_t
+#define OptionalPattern_t Text_t
+
+typedef struct {
+ enum { PATH_NONE, PATH_RELATIVE, PATH_ABSOLUTE, PATH_HOME } $tag;
+} PathType_t;
+#define OptionalPathType_t PathType_t
+
+typedef struct {
+ PathType_t type;
+ Array_t components;
+} Path_t;
+#define OptionalPath_t Path_t
+
+typedef struct timeval Moment_t;
+#define OptionalMoment_t Moment_t
+
+typedef struct RNGState_t* RNG_t;
+
+typedef struct MutexedData_s {
+ pthread_mutex_t mutex;
+ void *data;
+} *MutexedData_t;
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/enums.c b/src/stdlib/enums.c
new file mode 100644
index 00000000..b66a1711
--- /dev/null
+++ b/src/stdlib/enums.c
@@ -0,0 +1,120 @@
+// Metamethods for enums
+
+#include <stdint.h>
+#include <string.h>
+
+#include "arrays.h"
+#include "bools.h"
+#include "functiontype.h"
+#include "integers.h"
+#include "metamethods.h"
+#include "optionals.h"
+#include "pointers.h"
+#include "siphash.h"
+#include "tables.h"
+#include "text.h"
+#include "util.h"
+
+PUREFUNC public uint64_t Enum$hash(const void *obj, const TypeInfo_t *type)
+{
+ int32_t tag = *(int32_t*)obj;
+ uint32_t components[2] = {(uint32_t)tag, 0};
+
+ const TypeInfo_t *value = type->EnumInfo.tags[tag-1].type;
+ if (value && value->size > 0) {
+ ptrdiff_t byte_offset = sizeof(int32_t);
+ if (value->align && byte_offset % value->align > 0)
+ byte_offset += value->align - (byte_offset % value->align);
+ components[1] = generic_hash(obj + byte_offset, value);
+ }
+ return siphash24((void*)components, sizeof(components));
+}
+
+PUREFUNC public int32_t Enum$compare(const void *x, const void *y, const TypeInfo_t *type)
+{
+ if (x == y) return 0;
+
+ int32_t x_tag = *(int32_t*)x;
+ int32_t y_tag = *(int32_t*)y;
+ if (x_tag != y_tag)
+ return x_tag > y_tag ? 1 : -1;
+
+ const TypeInfo_t *value = type->EnumInfo.tags[x_tag-1].type;
+ if (value && value->size > 0) {
+ ptrdiff_t byte_offset = sizeof(int32_t);
+ if (value->align && byte_offset % value->align > 0)
+ byte_offset += value->align - (byte_offset % value->align);
+ return generic_compare(x + byte_offset, y + byte_offset, value);
+ }
+ return 0;
+}
+
+PUREFUNC public bool Enum$equal(const void *x, const void *y, const TypeInfo_t *type)
+{
+ if (x == y) return true;
+
+ int32_t x_tag = *(int32_t*)x;
+ int32_t y_tag = *(int32_t*)y;
+ if (x_tag != y_tag)
+ return false;
+
+ const TypeInfo_t *value = type->EnumInfo.tags[x_tag-1].type;
+ if (value && value->size > 0) {
+ ptrdiff_t byte_offset = sizeof(int32_t);
+ if (value->align && byte_offset % value->align > 0)
+ byte_offset += value->align - (byte_offset % value->align);
+ return generic_equal(x + byte_offset, y + byte_offset, value);
+ }
+ return true;
+}
+
+public Text_t Enum$as_text(const void *obj, bool colorize, const TypeInfo_t *type)
+{
+ if (!obj) return Text$from_str(type->EnumInfo.name);
+
+ int32_t tag = *(int32_t*)obj;
+ NamedType_t value = type->EnumInfo.tags[tag-1];
+ if (!value.type || value.type->size == 0)
+ return Text$format(colorize ? "\x1b[1m%s\x1b[m" : "%s", value.name);
+
+ ptrdiff_t byte_offset = sizeof(int32_t);
+ if (value.type->align && byte_offset % value.type->align > 0)
+ byte_offset += value.type->align - (byte_offset % value.type->align);
+ return generic_as_text(obj + byte_offset, colorize, value.type);
+}
+
+PUREFUNC public bool Enum$is_none(const void *x, const TypeInfo_t*)
+{
+ return *(int32_t*)x == 0;
+}
+
+public void Enum$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *type)
+{
+ int32_t tag = *(int32_t*)obj;
+ Int32$serialize(&tag, out, pointers, &Int32$info);
+
+ NamedType_t value = type->EnumInfo.tags[tag-1];
+ if (value.type && value.type->size > 0) {
+ ptrdiff_t byte_offset = sizeof(int32_t);
+ if (value.type->align && byte_offset % value.type->align > 0)
+ byte_offset += value.type->align - (byte_offset % value.type->align);
+ _serialize(obj + byte_offset, out, pointers, value.type);
+ }
+}
+
+public void Enum$deserialize(FILE *in, void *outval, Array_t *pointers, const TypeInfo_t *type)
+{
+ int32_t tag = 0;
+ Int32$deserialize(in, &tag, pointers, &Int32$info);
+ *(int32_t*)outval = tag;
+
+ NamedType_t value = type->EnumInfo.tags[tag-1];
+ if (value.type && value.type->size > 0) {
+ ptrdiff_t byte_offset = sizeof(int32_t);
+ if (value.type->align && byte_offset % value.type->align > 0)
+ byte_offset += value.type->align - (byte_offset % value.type->align);
+ _deserialize(in, outval + byte_offset, pointers, value.type);
+ }
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/enums.h b/src/stdlib/enums.h
new file mode 100644
index 00000000..b5427711
--- /dev/null
+++ b/src/stdlib/enums.h
@@ -0,0 +1,38 @@
+// Metamethods for enums
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "datatypes.h"
+#include "types.h"
+#include "util.h"
+
+PUREFUNC uint64_t Enum$hash(const void *obj, const TypeInfo_t *type);
+PUREFUNC int32_t Enum$compare(const void *x, const void *y, const TypeInfo_t *type);
+PUREFUNC bool Enum$equal(const void *x, const void *y, const TypeInfo_t *type);
+PUREFUNC Text_t Enum$as_text(const void *obj, bool colorize, const TypeInfo_t *type);
+PUREFUNC bool Enum$is_none(const void *obj, const TypeInfo_t *type);
+void Enum$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *type);
+void Enum$deserialize(FILE *in, void *outval, Array_t *pointers, const TypeInfo_t *type);
+
+#define Enum$metamethods { \
+ .as_text=Enum$as_text, \
+ .compare=Enum$compare, \
+ .equal=Enum$equal, \
+ .hash=Enum$hash, \
+ .is_none=Enum$is_none, \
+ .serialize=Enum$serialize, \
+ .deserialize=Enum$deserialize, \
+}
+
+#define PackedDataEnum$metamethods { \
+ .hash=PackedData$hash, \
+ .compare=Enum$compare, \
+ .equal=PackedData$equal, \
+ .as_text=Enum$as_text, \
+ .is_none=Enum$is_none, \
+ .serialize=Enum$serialize, \
+ .deserialize=Enum$deserialize, \
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/files.c b/src/stdlib/files.c
new file mode 100644
index 00000000..cf777689
--- /dev/null
+++ b/src/stdlib/files.c
@@ -0,0 +1,335 @@
+//
+// files.c - Implementation of some file loading functionality.
+//
+
+#include <ctype.h>
+#include <err.h>
+#include <fcntl.h>
+#include <gc.h>
+#include <libgen.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/param.h>
+
+#include "files.h"
+#include "util.h"
+
+static const int tabstop = 4;
+
+public char *resolve_path(const char *path, const char *relative_to, const char *system_path)
+{
+ if (!relative_to || streq(relative_to, "/dev/stdin")) relative_to = ".";
+ if (!path || strlen(path) == 0) return NULL;
+
+ // Resolve the path to an absolute path, assuming it's relative to the file
+ // it was found in:
+ char buf[PATH_MAX] = {0};
+ if (streq(path, "~") || starts_with(path, "~/")) {
+ char *resolved = realpath(heap_strf("%s%s", getenv("HOME"), path+1), buf);
+ if (resolved) return GC_strdup(resolved);
+ } else if (streq(path, ".") || starts_with(path, "./") || starts_with(path, "../")) {
+ char *relative_dir = dirname(GC_strdup(relative_to));
+ char *resolved = realpath(heap_strf("%s/%s", relative_dir, path), buf);
+ if (resolved) return GC_strdup(resolved);
+ } else if (path[0] == '/') {
+ // Absolute path:
+ char *resolved = realpath(path, buf);
+ if (resolved) return GC_strdup(resolved);
+ } else {
+ // Relative path:
+ char *relative_dir = dirname(GC_strdup(relative_to));
+ if (!system_path) system_path = ".";
+ char *copy = GC_strdup(system_path);
+ for (char *dir, *pos = copy; (dir = strsep(&pos, ":")); ) {
+ if (dir[0] == '/') {
+ char *resolved = realpath(heap_strf("%s/%s", dir, path), buf);
+ if (resolved) return GC_strdup(resolved);
+ } else if (dir[0] == '~' && (dir[1] == '\0' || dir[1] == '/')) {
+ char *resolved = realpath(heap_strf("%s%s/%s", getenv("HOME"), dir+1, path), buf);
+ if (resolved) return GC_strdup(resolved);
+ } else if (streq(dir, ".") || strncmp(dir, "./", 2) == 0) {
+ char *resolved = realpath(heap_strf("%s/%s", relative_dir, path), buf);
+ if (resolved) return GC_strdup(resolved);
+ } else if (streq(dir, ".") || streq(dir, "..") || strncmp(dir, "./", 2) == 0 || strncmp(dir, "../", 3) == 0) {
+ char *resolved = realpath(heap_strf("%s/%s/%s", relative_dir, dir, path), buf);
+ if (resolved) return GC_strdup(resolved);
+ } else {
+ char *resolved = realpath(heap_strf("%s/%s", dir, path), buf);
+ if (resolved) return GC_strdup(resolved);
+ }
+ }
+ }
+ return NULL;
+}
+
+public char *file_base_name(const char *path)
+{
+ const char *slash = strrchr(path, '/');
+ if (slash) path = slash + 1;
+ assert(!isdigit(*path));
+ const char *end = strchrnul(path, '.');
+ size_t len = (size_t)(end - path);
+ char *buf = GC_MALLOC_ATOMIC(len+1);
+ strncpy(buf, path, len);
+ buf[len] = '\0';
+ return buf;
+}
+
+public char *file_base_id(const char *path)
+{
+ const char *slash = strrchr(path, '/');
+ if (slash) path = slash + 1;
+ assert(!isdigit(*path));
+ const char *end = strchrnul(path, '.');
+ size_t len = (size_t)(end - path);
+ char *buf = GC_MALLOC_ATOMIC(len+1);
+ strncpy(buf, path, len);
+ buf[len] = '\0';
+ for (char *p = buf; *p; p++) {
+ if (!isalnum(*p))
+ *p = '_';
+ }
+ return buf;
+}
+
+static file_t *_load_file(const char* filename, FILE *file)
+{
+ if (!file) return NULL;
+
+ file_t *ret = new(file_t, .filename=filename);
+
+ size_t file_size = 0, line_cap = 0;
+ char *file_buf = NULL, *line_buf = NULL;
+ FILE *mem = open_memstream(&file_buf, &file_size);
+ int64_t line_len = 0;
+ while ((line_len = getline(&line_buf, &line_cap, file)) >= 0) {
+ if (ret->line_capacity <= ret->num_lines)
+ ret->line_offsets = GC_REALLOC(ret->line_offsets, sizeof(int64_t[ret->line_capacity += 32]));
+ ret->line_offsets[ret->num_lines++] = (int64_t)file_size;
+ fwrite(line_buf, sizeof(char), (size_t)line_len, mem);
+ fflush(mem);
+ }
+ fclose(file);
+
+ char *copy = GC_MALLOC_ATOMIC(file_size+1);
+ memcpy(copy, file_buf, file_size);
+ copy[file_size] = '\0';
+ ret->text = copy;
+ ret->len = (int64_t)file_size;
+ fclose(mem);
+
+ free(file_buf);
+ ret->relative_filename = filename;
+ if (filename && filename[0] != '<' && !streq(filename, "/dev/stdin")) {
+ filename = resolve_path(filename, ".", ".");
+ // Convert to relative path (if applicable)
+ char buf[PATH_MAX];
+ char *cwd = getcwd(buf, sizeof(buf));
+ size_t cwd_len = strlen(cwd);
+ if (strncmp(cwd, filename, cwd_len) == 0 && filename[cwd_len] == '/')
+ ret->relative_filename = &filename[cwd_len+1];
+ }
+ return ret;
+}
+
+//
+// Read an entire file into memory.
+//
+public file_t *load_file(const char* filename)
+{
+ FILE *file = filename[0] ? fopen(filename, "r") : stdin;
+ return _load_file(filename, file);
+}
+
+//
+// Create a virtual file from a string.
+//
+public file_t *spoof_file(const char* filename, const char *text)
+{
+ FILE *file = fmemopen((char*)text, strlen(text)+1, "r");
+ return _load_file(filename, file);
+}
+
+//
+// Given a pointer, determine which line number it points to (1-indexed)
+//
+public int64_t get_line_number(file_t *f, const char *p)
+{
+ // Binary search:
+ int64_t lo = 0, hi = (int64_t)f->num_lines-1;
+ if (p < f->text) return 0;
+ int64_t offset = (int64_t)(p - f->text);
+ while (lo <= hi) {
+ int64_t mid = (lo + hi) / 2;
+ int64_t line_offset = f->line_offsets[mid];
+ if (line_offset == offset)
+ return mid + 1;
+ else if (line_offset < offset)
+ lo = mid + 1;
+ else if (line_offset > offset)
+ hi = mid - 1;
+ }
+ return lo; // Return the line number whose line starts closest before p
+}
+
+//
+// Given a pointer, determine which line column it points to.
+//
+public int64_t get_line_column(file_t *f, const char *p)
+{
+ int64_t line_no = get_line_number(f, p);
+ int64_t line_offset = f->line_offsets[line_no-1];
+ return 1 + (int64_t)(p - (f->text + line_offset));
+}
+
+//
+// Return a pointer to the line with the specified line number (1-indexed)
+//
+public const char *get_line(file_t *f, int64_t line_number)
+{
+ if (line_number == 0 || line_number > (int64_t)f->num_lines) return NULL;
+ int64_t line_offset = f->line_offsets[line_number-1];
+ return f->text + line_offset;
+}
+
+//
+// Return a value like /foo:line:col
+//
+public const char *get_file_pos(file_t *f, const char *p)
+{
+ return heap_strf("%s:%ld:%ld", f->filename, get_line_number(f, p), get_line_column(f, p));
+}
+
+static int fputc_column(FILE *out, char c, char print_char, int *column)
+{
+ int printed = 0;
+ if (print_char == '\t') print_char = ' ';
+ if (c == '\t') {
+ for (int to_fill = tabstop - (*column % tabstop); to_fill > 0; --to_fill) {
+ printed += fputc(print_char, out);
+ ++*column;
+ }
+ } else {
+ printed += fputc(print_char, out);
+ ++*column;
+ }
+ return printed;
+}
+
+//
+// Print a span from a file
+//
+public int highlight_error(file_t *file, const char *start, const char *end, const char *hl_color, int64_t context_lines, bool use_color)
+{
+ if (!file) return 0;
+
+ // Handle spans that come from multiple files:
+ if (start < file->text || start > file->text + file->len)
+ start = end;
+ if (end < file->text || end > file->text + file->len)
+ end = start;
+ // Just in case neither end of the span came from this file:
+ if (end < file->text || end > file->text + file->len)
+ start = end = file->text;
+
+ const char *lineno_fmt, *normal_color, *empty_marker;
+ bool print_carets = false;
+ int printed = 0;
+ if (use_color) {
+ lineno_fmt = "\x1b[0;2m%*lu\x1b(0\x78\x1b(B\x1b[m ";
+ normal_color = "\x1b[m";
+ empty_marker = "\x1b(0\x61\x1b(B";
+ printed += fprintf(stderr, "\x1b[33;4;1m%s\x1b[m\n", file->relative_filename);
+ } else {
+ lineno_fmt = "%*lu| ";
+ hl_color = "";
+ normal_color = "";
+ empty_marker = " ";
+ print_carets = true;
+ printed += fprintf(stderr, "%s\n", file->relative_filename);
+ }
+
+ if (context_lines == 0)
+ return fprintf(stderr, "%s%.*s%s", hl_color, (int)(end - start), start, normal_color);
+
+ int64_t start_line = get_line_number(file, start),
+ end_line = get_line_number(file, end);
+
+ int64_t first_line = start_line - (context_lines - 1),
+ last_line = end_line + (context_lines - 1);
+
+ if (first_line < 1) first_line = 1;
+ if (last_line > file->num_lines) last_line = file->num_lines;
+
+ int digits = 1;
+ for (int64_t i = last_line; i > 0; i /= 10) ++digits;
+
+ for (int64_t line_no = first_line; line_no <= last_line; ++line_no) {
+ if (line_no > first_line + 5 && line_no < last_line - 5) {
+ if (use_color)
+ printed += fprintf(stderr, "\x1b[0;2;3;4m ... %ld lines omitted ... \x1b[m\n", (last_line - first_line) - 11);
+ else
+ printed += fprintf(stderr, " ... %ld lines omitted ...\n", (last_line - first_line) - 11);
+ line_no = last_line - 6;
+ continue;
+ }
+
+ printed += fprintf(stderr, lineno_fmt, digits, line_no);
+ const char *line = get_line(file, line_no);
+ if (!line) break;
+
+ int column = 0;
+ const char *p = line;
+ // Before match
+ for (; *p && *p != '\r' && *p != '\n' && p < start; ++p)
+ printed += fputc_column(stderr, *p, *p, &column);
+
+ // Zero-width matches
+ if (p == start && start == end) {
+ printed += fprintf(stderr, "%s%s%s", hl_color, empty_marker, normal_color);
+ column += 1;
+ }
+
+ // Inside match
+ if (start <= p && p < end) {
+ printed += fputs(hl_color, stderr);
+ for (; *p && *p != '\r' && *p != '\n' && p < end; ++p)
+ printed += fputc_column(stderr, *p, *p, &column);
+ printed += fputs(normal_color, stderr);
+ }
+
+ // After match
+ for (; *p && *p != '\r' && *p != '\n'; ++p)
+ printed += fputc_column(stderr, *p, *p, &column);
+
+ printed += fprintf(stderr, "\n");
+
+ const char *eol = strchrnul(line, '\n');
+ if (print_carets && start >= line && start < eol && line <= start) {
+ for (int num = 0; num < digits; num++)
+ printed += fputc(' ', stderr);
+ printed += fputs(": ", stderr);
+ int col = 0;
+ for (const char *sp = line; *sp && *sp != '\n'; ++sp) {
+ char print_char;
+ if (sp < start)
+ print_char = ' ';
+ else if (sp == start && sp == end)
+ print_char = '^';
+ else if (sp >= start && sp < end)
+ print_char = '-';
+ else
+ print_char = ' ';
+ printed += fputc_column(stderr, *sp, print_char, &col);
+ }
+ printed += fputs("\n", stderr);
+ }
+ }
+ fflush(stderr);
+ return printed;
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/files.h b/src/stdlib/files.h
new file mode 100644
index 00000000..68827c2a
--- /dev/null
+++ b/src/stdlib/files.h
@@ -0,0 +1,37 @@
+//
+// files.h - Definitions of an API for loading files.
+//
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <unistd.h>
+
+typedef struct {
+ const char *filename, *relative_filename;
+ const char *text;
+ int64_t len;
+ int64_t num_lines, line_capacity;
+ int64_t *line_offsets;
+} file_t;
+
+char *resolve_path(const char *path, const char *relative_to, const char *system_path);
+__attribute__((pure, nonnull))
+char *file_base_name(const char *path);
+__attribute__((pure, nonnull))
+char *file_base_id(const char *path);
+__attribute__((nonnull))
+file_t *load_file(const char *filename);
+__attribute__((nonnull, returns_nonnull))
+file_t *spoof_file(const char *filename, const char *text);
+__attribute__((pure, nonnull))
+int64_t get_line_number(file_t *f, const char *p);
+__attribute__((pure, nonnull))
+int64_t get_line_column(file_t *f, const char *p);
+__attribute__((pure, nonnull))
+const char *get_line(file_t *f, int64_t line_number);
+__attribute__((pure, nonnull))
+const char *get_file_pos(file_t *f, const char *p);
+int highlight_error(file_t *file, const char *start, const char *end, const char *hl_color, int64_t context_lines, bool use_color);
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/functiontype.c b/src/stdlib/functiontype.c
new file mode 100644
index 00000000..b02739a2
--- /dev/null
+++ b/src/stdlib/functiontype.c
@@ -0,0 +1,97 @@
+// Logic for handling function type values
+
+#include <stdbool.h>
+
+#include "datatypes.h"
+#include "functiontype.h"
+#include "optionals.h"
+#include "structs.h"
+#include "tables.h"
+#include "text.h"
+#include "types.h"
+#include "util.h"
+
+typedef struct {
+ Text_t filename, name;
+ int64_t line_num;
+} func_info_t;
+
+static NamedType_t fields[] = {
+ {.name="filename", .type=&Text$info},
+ {.name="name", .type=&Text$info},
+ {.name="line_num", .type=&Int64$info},
+};
+
+static const TypeInfo_t func_info_type = {.size=sizeof(func_info_t), .align=__alignof__(func_info_t), .metamethods=Struct$metamethods,
+ .tag=StructInfo, .StructInfo.name="FuncInfo",
+ .StructInfo.num_fields=3, .StructInfo.fields=fields};
+static Table_t function_info = {};
+
+public void register_function(void *fn, Text_t filename, int64_t line_num, Text_t name)
+{
+ func_info_t info = {
+ .filename=filename,
+ .line_num=line_num,
+ .name=name,
+ };
+ Table$set(&function_info, &fn, &info, Table$info(Function$info("???"), &func_info_type));
+}
+
+PUREFUNC static func_info_t *get_function_info(void *fn)
+{
+ func_info_t *info = Table$get(function_info, &fn, Table$info(Function$info("???"), &func_info_type));
+ if (info) return info;
+
+ void *closest_fn = NULL;
+ for (int64_t i = 0; i < function_info.entries.length; i++) {
+ struct { void *fn; func_info_t info; } *entry = function_info.entries.data + i*function_info.entries.stride;
+ if (entry->fn > fn || entry->fn < closest_fn) continue;
+ closest_fn = entry->fn;
+ info = &entry->info;
+ }
+ return info;
+}
+
+PUREFUNC public OptionalText_t get_function_name(void *fn)
+{
+ func_info_t *info = get_function_info(fn);
+ return info ? info->name : NONE_TEXT;
+}
+
+PUREFUNC public OptionalText_t get_function_filename(void *fn)
+{
+ func_info_t *info = get_function_info(fn);
+ return info ? info->filename : NONE_TEXT;
+}
+
+PUREFUNC public int64_t get_function_line_num(void *fn)
+{
+ func_info_t *info = get_function_info(fn);
+ return info ? info->line_num : -1;
+}
+
+public Text_t Func$as_text(const void *fn, bool colorize, const TypeInfo_t *type)
+{
+ (void)fn;
+ Text_t text = Text$from_str(type->FunctionInfo.type_str);
+ if (fn) {
+ OptionalText_t name = get_function_name(*(void**)fn);
+ if (name.length >= 0)
+ text = name;
+
+ OptionalText_t filename = get_function_filename(*(void**)fn);
+ int64_t line_num = get_function_line_num(*(void**)fn);
+ if (filename.length >= 0)
+ text = Text$format("%k [%k:%ld]", &text, &filename, line_num);
+ }
+ if (fn && colorize)
+ text = Text$concat(Text("\x1b[32;1m"), text, Text("\x1b[m"));
+ return text;
+}
+
+public PUREFUNC bool Func$is_none(const void *obj, const TypeInfo_t*)
+{
+ return *(void**)obj == NULL;
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/functiontype.h b/src/stdlib/functiontype.h
new file mode 100644
index 00000000..d308be96
--- /dev/null
+++ b/src/stdlib/functiontype.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "metamethods.h"
+#include "optionals.h"
+#include "types.h"
+#include "util.h"
+
+// Logic for handling function type values
+
+void register_function(void *fn, Text_t filename, int64_t line_num, Text_t name);
+OptionalText_t get_function_name(void *fn);
+OptionalText_t get_function_filename(void *fn);
+int64_t get_function_line_num(void *fn);
+Text_t Func$as_text(const void *fn, bool colorize, const TypeInfo_t *type);
+PUREFUNC bool Func$is_none(const void *obj, const TypeInfo_t*);
+
+#define Func$metamethods { \
+ .as_text=Func$as_text, \
+ .is_none=Func$is_none, \
+ .serialize=cannot_serialize, \
+ .deserialize=cannot_deserialize, \
+}
+
+#define Function$info(typestr) &((TypeInfo_t){.size=sizeof(void*), .align=__alignof__(void*), \
+ .tag=FunctionInfo, .FunctionInfo.type_str=typestr, \
+ .metamethods=Func$metamethods})
+#define Closure$info(typestr) &((TypeInfo_t){.size=sizeof(void*[2]), .align=__alignof__(void*), \
+ .tag=FunctionInfo, .FunctionInfo.type_str=typestr, \
+ .metamethods=Func$metamethods})
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/integers.c b/src/stdlib/integers.c
new file mode 100644
index 00000000..4d5d0a80
--- /dev/null
+++ b/src/stdlib/integers.c
@@ -0,0 +1,652 @@
+// Integer type infos and methods
+#include <stdio.h> // Must be before gmp.h
+
+#include <ctype.h>
+#include <gc.h>
+#include <gmp.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "arrays.h"
+#include "datatypes.h"
+#include "integers.h"
+#include "optionals.h"
+#include "siphash.h"
+#include "text.h"
+#include "types.h"
+
+public Text_t Int$value_as_text(Int_t i) {
+ if (likely(i.small & 1L)) {
+ return Text$format("%ld", (i.small)>>2L);
+ } else {
+ char *str = mpz_get_str(NULL, 10, *i.big);
+ return Text$from_str(str);
+ }
+}
+
+public Text_t Int$as_text(const void *i, bool colorize, const TypeInfo_t*) {
+ if (!i) return Text("Int");
+ Text_t text = Int$value_as_text(*(Int_t*)i);
+ if (colorize) text = Text$concat(Text("\x1b[35m"), text, Text("\x1b[m"));
+ return text;
+}
+
+static bool Int$is_none(const void *i, const TypeInfo_t*)
+{
+ return ((Int_t*)i)->small == 0L;
+}
+
+public PUREFUNC int32_t Int$compare_value(const Int_t x, const Int_t y) {
+ if (likely(x.small & y.small & 1L))
+ return (x.small > y.small) - (x.small < y.small);
+ else if (x.small & 1)
+ return -mpz_cmp_si(*y.big, x.small);
+ else if (y.small & 1)
+ return mpz_cmp_si(*x.big, y.small);
+ else
+ return x.big == y.big ? 0 : mpz_cmp(*x.big, *y.big);
+}
+
+public PUREFUNC int32_t Int$compare(const void *x, const void *y, const TypeInfo_t*) {
+ return Int$compare_value(*(Int_t*)x, *(Int_t*)y);
+}
+
+public PUREFUNC bool Int$equal_value(const Int_t x, const Int_t y) {
+ if (likely((x.small | y.small) & 1L))
+ return x.small == y.small;
+ else
+ return x.big == y.big ? 0 : (mpz_cmp(*x.big, *y.big) == 0);
+}
+
+public PUREFUNC bool Int$equal(const void *x, const void *y, const TypeInfo_t*) {
+ return Int$equal_value(*(Int_t*)x, *(Int_t*)y);
+}
+
+public PUREFUNC uint64_t Int$hash(const void *vx, const TypeInfo_t*) {
+ Int_t *x = (Int_t*)vx;
+ if (likely(x->small & 1L)) {
+ return siphash24((void*)x, sizeof(Int_t));
+ } else {
+ char *str = mpz_get_str(NULL, 16, *x->big);
+ return siphash24((void*)str, strlen(str));
+ }
+}
+
+public Text_t Int$format(Int_t i, Int_t digits_int) {
+ int64_t digits = Int64$from_int(digits_int, false);
+ if (likely(i.small & 1L)) {
+ return Text$format("%0.*ld", digits, (int64_t)((i.small)>>2L));
+ } else {
+ char *str = mpz_get_str(NULL, 10, *i.big);
+ bool negative = (str[0] == '-');
+ int64_t needed_zeroes = digits - (int64_t)strlen(str);
+ if (needed_zeroes <= 0)
+ return Text$from_str(str);
+
+ char *zeroes = GC_MALLOC_ATOMIC((size_t)(needed_zeroes));
+ memset(zeroes, '0', (size_t)(needed_zeroes));
+ if (negative)
+ return Text$concat(Text("-"), Text$from_str(zeroes), Text$from_str(str + 1));
+ else
+ return Text$concat(Text$from_str(zeroes), Text$from_str(str));
+ }
+}
+
+public Text_t Int$hex(Int_t i, Int_t digits_int, bool uppercase, bool prefix) {
+ if (Int$is_negative(i))
+ return Text$concat(Text("-"), Int$hex(Int$negative(i), digits_int, uppercase, prefix));
+
+ int64_t digits = Int64$from_int(digits_int, false);
+ if (likely(i.small & 1L)) {
+ const char *hex_fmt = uppercase ? (prefix ? "0x%0.*lX" : "%0.*lX") : (prefix ? "0x%0.*lx" : "%0.*lx");
+ return Text$format(hex_fmt, digits, (i.small)>>2L);
+ } else {
+ char *str = mpz_get_str(NULL, 16, *i.big);
+ if (uppercase) {
+ for (char *c = str; *c; c++)
+ *c = (char)toupper(*c);
+ }
+ int64_t needed_zeroes = digits - (int64_t)strlen(str);
+ if (needed_zeroes <= 0)
+ return prefix ? Text$concat(Text("0x"), Text$from_str(str)) : Text$from_str(str);
+
+ char *zeroes = GC_MALLOC_ATOMIC((size_t)(needed_zeroes));
+ memset(zeroes, '0', (size_t)(needed_zeroes));
+ if (prefix)
+ return Text$concat(Text("0x"), Text$from_str(zeroes), Text$from_str(str));
+ else
+ return Text$concat(Text$from_str(zeroes), Text$from_str(str));
+ }
+}
+
+public Text_t Int$octal(Int_t i, Int_t digits_int, bool prefix) {
+ if (Int$is_negative(i))
+ return Text$concat(Text("-"), Int$octal(Int$negative(i), digits_int, prefix));
+
+ int64_t digits = Int64$from_int(digits_int, false);
+ if (likely(i.small & 1L)) {
+ const char *octal_fmt = prefix ? "0o%0.*lo" : "%0.*lo";
+ return Text$format(octal_fmt, digits, (i.small)>>2L);
+ } else {
+ char *str = mpz_get_str(NULL, 8, *i.big);
+ int64_t needed_zeroes = digits - (int64_t)strlen(str);
+ if (needed_zeroes <= 0)
+ return prefix ? Text$concat(Text("0o"), Text$from_str(str)) : Text$from_str(str);
+
+ char *zeroes = GC_MALLOC_ATOMIC((size_t)(needed_zeroes));
+ memset(zeroes, '0', (size_t)(needed_zeroes));
+ if (prefix)
+ return Text$concat(Text("0o"), Text$from_str(zeroes), Text$from_str(str));
+ else
+ return Text$concat(Text$from_str(zeroes), Text$from_str(str));
+ }
+}
+
+public Int_t Int$slow_plus(Int_t x, Int_t y) {
+ mpz_t result;
+ mpz_init_set_int(result, x);
+ if (y.small & 1L) {
+ if (y.small < 0L)
+ mpz_sub_ui(result, result, (uint64_t)(-(y.small >> 2L)));
+ else
+ mpz_add_ui(result, result, (uint64_t)(y.small >> 2L));
+ } else {
+ mpz_add(result, result, *y.big);
+ }
+ return Int$from_mpz(result);
+}
+
+public Int_t Int$slow_minus(Int_t x, Int_t y) {
+ mpz_t result;
+ mpz_init_set_int(result, x);
+ if (y.small & 1L) {
+ if (y.small < 0L)
+ mpz_add_ui(result, result, (uint64_t)(-(y.small >> 2L)));
+ else
+ mpz_sub_ui(result, result, (uint64_t)(y.small >> 2L));
+ } else {
+ mpz_sub(result, result, *y.big);
+ }
+ return Int$from_mpz(result);
+}
+
+public Int_t Int$slow_times(Int_t x, Int_t y) {
+ mpz_t result;
+ mpz_init_set_int(result, x);
+ if (y.small & 1L)
+ mpz_mul_si(result, result, y.small >> 2L);
+ else
+ mpz_mul(result, result, *y.big);
+ return Int$from_mpz(result);
+}
+
+public Int_t Int$slow_divided_by(Int_t dividend, Int_t divisor) {
+ // Euclidean division, see: https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/divmodnote-letter.pdf
+ mpz_t quotient, remainder;
+ mpz_init_set_int(quotient, dividend);
+ mpz_init_set_int(remainder, divisor);
+ mpz_tdiv_qr(quotient, remainder, quotient, remainder);
+ if (mpz_sgn(remainder) < 0) {
+ bool d_positive = likely(divisor.small & 1L) ? divisor.small > 0x1L : mpz_sgn(*divisor.big) > 0;
+ if (d_positive)
+ mpz_sub_ui(quotient, quotient, 1);
+ else
+ mpz_add_ui(quotient, quotient, 1);
+ }
+ return Int$from_mpz(quotient);
+}
+
+public Int_t Int$slow_modulo(Int_t x, Int_t modulus)
+{
+ mpz_t result;
+ mpz_init_set_int(result, x);
+ mpz_t divisor;
+ mpz_init_set_int(divisor, modulus);
+ mpz_mod(result, result, divisor);
+ return Int$from_mpz(result);
+}
+
+public Int_t Int$slow_modulo1(Int_t x, Int_t modulus)
+{
+ mpz_t result;
+ mpz_init_set_int(result, x);
+ mpz_sub_ui(result, result, 1);
+ mpz_t divisor;
+ mpz_init_set_int(divisor, modulus);
+ mpz_mod(result, result, divisor);
+ mpz_add_ui(result, result, 1);
+ return Int$from_mpz(result);
+}
+
+public Int_t Int$slow_left_shifted(Int_t x, Int_t y)
+{
+ mp_bitcnt_t bits = (mp_bitcnt_t)Int64$from_int(y, false);
+ mpz_t result;
+ mpz_init_set_int(result, x);
+ mpz_mul_2exp(result, result, bits);
+ return Int$from_mpz(result);
+}
+
+public Int_t Int$slow_right_shifted(Int_t x, Int_t y)
+{
+ mp_bitcnt_t bits = (mp_bitcnt_t)Int64$from_int(y, false);
+ mpz_t result;
+ mpz_init_set_int(result, x);
+ mpz_tdiv_q_2exp(result, result, bits);
+ return Int$from_mpz(result);
+}
+
+public Int_t Int$slow_bit_and(Int_t x, Int_t y)
+{
+ mpz_t result;
+ mpz_init_set_int(result, x);
+ mpz_t y_mpz;
+ mpz_init_set_int(y_mpz, y);
+ mpz_and(result, result, y_mpz);
+ return Int$from_mpz(result);
+}
+
+public Int_t Int$slow_bit_or(Int_t x, Int_t y)
+{
+ mpz_t result;
+ mpz_init_set_int(result, x);
+ mpz_t y_mpz;
+ mpz_init_set_int(y_mpz, y);
+ mpz_ior(result, result, y_mpz);
+ return Int$from_mpz(result);
+}
+
+public Int_t Int$slow_bit_xor(Int_t x, Int_t y)
+{
+ mpz_t result;
+ mpz_init_set_int(result, x);
+ mpz_t y_mpz;
+ mpz_init_set_int(y_mpz, y);
+ mpz_xor(result, result, y_mpz);
+ return Int$from_mpz(result);
+}
+
+public Int_t Int$slow_negated(Int_t x)
+{
+ mpz_t result;
+ mpz_init_set_int(result, x);
+ mpz_neg(result, result);
+ mpz_sub_ui(result, result, 1);
+ return Int$from_mpz(result);
+}
+
+public Int_t Int$slow_negative(Int_t x)
+{
+ if (likely(x.small & 1L))
+ return (Int_t){.small=4L*-((x.small)>>2L) + 1L};
+
+ mpz_t result;
+ mpz_init_set_int(result, x);
+ mpz_neg(result, result);
+ return Int$from_mpz(result);
+}
+
+public Int_t Int$abs(Int_t x)
+{
+ if (likely(x.small & 1L))
+ return (Int_t){.small=4L*labs((x.small)>>2L) + 1L};
+
+ mpz_t result;
+ mpz_init_set_int(result, x);
+ mpz_abs(result, result);
+ return Int$from_mpz(result);
+}
+
+public Int_t Int$power(Int_t base, Int_t exponent)
+{
+ int64_t exp = Int64$from_int(exponent, false);
+ if (unlikely(exp < 0))
+ fail("Cannot take a negative power of an integer!");
+ mpz_t result;
+ mpz_init_set_int(result, base);
+ mpz_pow_ui(result, result, (uint64_t)exp);
+ return Int$from_mpz(result);
+}
+
+public Int_t Int$gcd(Int_t x, Int_t y)
+{
+ if (likely(x.small & y.small & 0x1L))
+ return I_small(Int32$gcd(x.small >> 2L, y.small >> 2L));
+
+ mpz_t result;
+ mpz_init(result);
+ if (x.small & 0x1L)
+ mpz_gcd_ui(result, *y.big, (uint64_t)labs(x.small>>2L));
+ else if (y.small & 0x1L)
+ mpz_gcd_ui(result, *x.big, (uint64_t)labs(y.small>>2L));
+ else
+ mpz_gcd(result, *x.big, *y.big);
+ return Int$from_mpz(result);
+}
+
+public OptionalInt_t Int$sqrt(Int_t i)
+{
+ if (Int$compare_value(i, I(0)) < 0)
+ return NONE_INT;
+ mpz_t result;
+ mpz_init_set_int(result, i);
+ mpz_sqrt(result, result);
+ return Int$from_mpz(result);
+}
+
+typedef struct {
+ OptionalInt_t current, last;
+ Int_t step;
+} IntRange_t;
+
+static OptionalInt_t _next_int(IntRange_t *info)
+{
+ OptionalInt_t i = info->current;
+ if (!Int$is_none(&i, &Int$info)) {
+ Int_t next = Int$plus(i, info->step);
+ if (!Int$is_none(&info->last, &Int$info) && Int$compare_value(next, info->last) == Int$compare_value(info->step, I(0)))
+ next = NONE_INT;
+ info->current = next;
+ }
+ return i;
+}
+
+public PUREFUNC Closure_t Int$to(Int_t first, Int_t last, OptionalInt_t step) {
+ IntRange_t *range = GC_MALLOC(sizeof(IntRange_t));
+ range->current = first;
+ range->last = last;
+ range->step = Int$is_none(&step, &Int$info) ?
+ Int$compare_value(last, first) >= 0 ? (Int_t){.small=(1L<<2L)|1L} : (Int_t){.small=(-1L>>2L)|1L}
+ : step;
+ return (Closure_t){.fn=_next_int, .userdata=range};
+}
+
+public PUREFUNC Closure_t Int$onward(Int_t first, Int_t step) {
+ IntRange_t *range = GC_MALLOC(sizeof(IntRange_t));
+ range->current = first;
+ range->last = NONE_INT;
+ range->step = step;
+ return (Closure_t){.fn=_next_int, .userdata=range};
+}
+
+public Int_t Int$from_str(const char *str) {
+ mpz_t i;
+ int result;
+ if (strncmp(str, "0x", 2) == 0) {
+ result = mpz_init_set_str(i, str + 2, 16);
+ } else if (strncmp(str, "0o", 2) == 0) {
+ result = mpz_init_set_str(i, str + 2, 8);
+ } else if (strncmp(str, "0b", 2) == 0) {
+ result = mpz_init_set_str(i, str + 2, 2);
+ } else {
+ result = mpz_init_set_str(i, str, 10);
+ }
+ if (result != 0)
+ return NONE_INT;
+ return Int$from_mpz(i);
+}
+
+public OptionalInt_t Int$parse(Text_t text) {
+ return Int$from_str(Text$as_c_string(text));
+}
+
+public bool Int$is_prime(Int_t x, Int_t reps)
+{
+ mpz_t p;
+ mpz_init_set_int(p, x);
+ if (unlikely(Int$compare_value(reps, I(9999)) > 0))
+ fail("Number of prime-test repetitions should not be above 9999");
+ int reps_int = Int32$from_int(reps, false);
+ return (mpz_probab_prime_p(p, reps_int) != 0);
+}
+
+public Int_t Int$next_prime(Int_t x)
+{
+ mpz_t p;
+ mpz_init_set_int(p, x);
+ mpz_nextprime(p, p);
+ return Int$from_mpz(p);
+}
+
+#if __GNU_MP_VERSION >= 6
+#if __GNU_MP_VERSION_MINOR >= 3
+public Int_t Int$prev_prime(Int_t x)
+{
+ mpz_t p;
+ mpz_init_set_int(p, x);
+ if (unlikely(mpz_prevprime(p, p) == 0))
+ fail("There is no prime number before %k", (Text_t[1]){Int$as_text(&x, false, &Int$info)});
+ return Int$from_mpz(p);
+}
+#endif
+#endif
+
+public Int_t Int$choose(Int_t n, Int_t k)
+{
+ if unlikely (Int$compare_value(n, I_small(0)) < 0)
+ fail("Negative inputs are not supported for choose()");
+
+ mpz_t ret;
+ mpz_init(ret);
+
+ int64_t k_i64 = Int64$from_int(k, false);
+ if unlikely (k_i64 < 0)
+ fail("Negative inputs are not supported for choose()");
+
+ if likely (n.small & 1L) {
+ mpz_bin_uiui(ret, (unsigned long)(n.small >> 2L), (unsigned long)k_i64);
+ } else {
+ mpz_t n_mpz;
+ mpz_init_set_int(n_mpz, n);
+ mpz_bin_ui(ret, n_mpz, (unsigned long)k_i64);
+ }
+ return Int$from_mpz(ret);
+}
+
+public Int_t Int$factorial(Int_t n)
+{
+ mpz_t ret;
+ mpz_init(ret);
+ int64_t n_i64 = Int64$from_int(n, false);
+ if unlikely (n_i64 < 0)
+ fail("Factorials are not defined for negative numbers");
+ mpz_fac_ui(ret, (unsigned long)n_i64);
+ return Int$from_mpz(ret);
+}
+
+static void Int$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t*)
+{
+ Int_t i = *(Int_t*)obj;
+ if (likely(i.small & 1L)) {
+ fputc(0, out);
+ int64_t i64 = i.small >> 2L;
+ Int64$serialize(&i64, out, pointers, &Int64$info);
+ } else {
+ fputc(1, out);
+ mpz_t n;
+ mpz_init_set_int(n, *(Int_t*)obj);
+ mpz_out_raw(out, n);
+ }
+}
+
+static void Int$deserialize(FILE *in, void *obj, Array_t *pointers, const TypeInfo_t*)
+{
+ if (fgetc(in) == 0) {
+ int64_t i = 0;
+ Int64$deserialize(in, &i, pointers, &Int64$info);
+ *(Int_t*)obj = (Int_t){.small=(i<<2L) | 1L};
+ } else {
+ mpz_t n;
+ mpz_init(n);
+ mpz_inp_raw(n, in);
+ *(Int_t*)obj = Int$from_mpz(n);
+ }
+}
+
+public const TypeInfo_t Int$info = {
+ .size=sizeof(Int_t),
+ .align=__alignof__(Int_t),
+ .metamethods={
+ .compare=Int$compare,
+ .equal=Int$equal,
+ .hash=Int$hash,
+ .as_text=Int$as_text,
+ .is_none=Int$is_none,
+ .serialize=Int$serialize,
+ .deserialize=Int$deserialize,
+ },
+};
+
+public void Int64$serialize(const void *obj, FILE *out, Table_t*, const TypeInfo_t*)
+{
+ int64_t i = *(int64_t*)obj;
+ uint64_t z = (uint64_t)((i << 1L) ^ (i >> 63L)); // Zigzag encode
+ while (z >= 0x80L) {
+ fputc((uint8_t)(z | 0x80L), out);
+ z >>= 7L;
+ }
+ fputc((uint8_t)z, out);
+}
+
+public void Int64$deserialize(FILE *in, void *outval, Array_t*, const TypeInfo_t*)
+{
+ uint64_t z = 0;
+ for(size_t shift = 0; ; shift += 7) {
+ uint8_t byte = (uint8_t)fgetc(in);
+ z |= ((uint64_t)(byte & 0x7F)) << shift;
+ if ((byte & 0x80) == 0) break;
+ }
+ *(int64_t*)outval = (int64_t)((z >> 1L) ^ -(z & 1L)); // Zigzag decode
+}
+
+public void Int32$serialize(const void *obj, FILE *out, Table_t*, const TypeInfo_t*)
+{
+ int32_t i = *(int32_t*)obj;
+ uint32_t z = (uint32_t)((i << 1) ^ (i >> 31)); // Zigzag encode
+ while (z >= 0x80) {
+ fputc((uint8_t)(z | 0x80), out);
+ z >>= 7;
+ }
+ fputc((uint8_t)z, out);
+}
+
+public void Int32$deserialize(FILE *in, void *outval, Array_t*, const TypeInfo_t*)
+{
+ uint32_t z = 0;
+ for(size_t shift = 0; ; shift += 7) {
+ uint8_t byte = (uint8_t)fgetc(in);
+ z |= ((uint32_t)(byte & 0x7F)) << shift;
+ if ((byte & 0x80) == 0) break;
+ }
+ *(int32_t*)outval = (int32_t)((z >> 1L) ^ -(z & 1L)); // Zigzag decode
+}
+
+// The space savings for smaller ints are not worth having:
+#define Int16$serialize NULL
+#define Int16$deserialize NULL
+#define Int8$serialize NULL
+#define Int8$deserialize NULL
+
+#define DEFINE_INT_TYPE(c_type, KindOfInt, fmt, min_val, max_val, to_attr)\
+ public Text_t KindOfInt ## $as_text(const void *i, bool colorize, const TypeInfo_t*) { \
+ if (!i) return Text(#KindOfInt); \
+ return Text$format(colorize ? "\x1b[35m" fmt "\x1b[m" : fmt, *(c_type*)i); \
+ } \
+ public PUREFUNC int32_t KindOfInt ## $compare(const void *x, const void *y, const TypeInfo_t*) { \
+ return (*(c_type*)x > *(c_type*)y) - (*(c_type*)x < *(c_type*)y); \
+ } \
+ public PUREFUNC bool KindOfInt ## $equal(const void *x, const void *y, const TypeInfo_t*) { \
+ return *(c_type*)x == *(c_type*)y; \
+ } \
+ public Text_t KindOfInt ## $format(c_type i, Int_t digits_int) { \
+ return Text$format("%0*ld", Int32$from_int(digits_int, false), (int64_t)i); \
+ } \
+ public Text_t KindOfInt ## $hex(c_type i, Int_t digits_int, bool uppercase, bool prefix) { \
+ Int_t as_int = Int$from_int64((int64_t)i); \
+ return Int$hex(as_int, digits_int, uppercase, prefix); \
+ } \
+ public Text_t KindOfInt ## $octal(c_type i, Int_t digits_int, bool prefix) { \
+ Int_t as_int = Int$from_int64((int64_t)i); \
+ return Int$octal(as_int, digits_int, prefix); \
+ } \
+ public Array_t KindOfInt ## $bits(c_type x) { \
+ Array_t bit_array = (Array_t){.data=GC_MALLOC_ATOMIC(sizeof(bool[8*sizeof(c_type)])), .atomic=1, .stride=sizeof(bool), .length=8*sizeof(c_type)}; \
+ bool *bits = bit_array.data + sizeof(c_type)*8; \
+ for (size_t i = 0; i < 8*sizeof(c_type); i++) { \
+ *(bits--) = x & 1; \
+ x >>= 1; \
+ } \
+ return bit_array; \
+ } \
+ typedef struct { \
+ Optional##KindOfInt##_t current, last; \
+ KindOfInt##_t step; \
+ } KindOfInt##Range_t; \
+ static Optional##KindOfInt##_t _next_##KindOfInt(KindOfInt##Range_t *info) \
+ { \
+ Optional##KindOfInt##_t i = info->current; \
+ if (!i.is_none) { \
+ KindOfInt##_t next; bool overflow = __builtin_add_overflow(i.i, info->step, &next); \
+ if (overflow || (!info->last.is_none && (info->step >= 0 ? next > info->last.i : next < info->last.i))) \
+ info->current = (Optional##KindOfInt##_t){.is_none=true}; \
+ else \
+ info->current = (Optional##KindOfInt##_t){.i=next}; \
+ } \
+ return i; \
+ } \
+ public to_attr Closure_t KindOfInt ## $to(c_type first, c_type last, Optional ## KindOfInt ## _t step) { \
+ KindOfInt##Range_t *range = GC_MALLOC(sizeof(KindOfInt##Range_t)); \
+ range->current = (Optional##KindOfInt##_t){.i=first}; \
+ range->last = (Optional##KindOfInt##_t){.i=last}; \
+ range->step = step.is_none ? (last >= first ? 1 : -1) : step.i; \
+ return (Closure_t){.fn=_next_##KindOfInt, .userdata=range}; \
+ } \
+ public to_attr Closure_t KindOfInt ## $onward(c_type first, c_type step) { \
+ KindOfInt##Range_t *range = GC_MALLOC(sizeof(KindOfInt##Range_t)); \
+ range->current = (Optional##KindOfInt##_t){.i=first}; \
+ range->last = (Optional##KindOfInt##_t){.is_none=true}; \
+ range->step = step; \
+ return (Closure_t){.fn=_next_##KindOfInt, .userdata=range}; \
+ } \
+ public PUREFUNC Optional ## KindOfInt ## _t KindOfInt ## $parse(Text_t text) { \
+ OptionalInt_t full_int = Int$parse(text); \
+ if (full_int.small == 0L) return (Optional ## KindOfInt ## _t){.is_none=true}; \
+ if (Int$compare_value(full_int, I(min_val)) < 0) { \
+ return (Optional ## KindOfInt ## _t){.is_none=true}; \
+ } \
+ if (Int$compare_value(full_int, I(max_val)) > 0) { \
+ return (Optional ## KindOfInt ## _t){.is_none=true}; \
+ } \
+ return (Optional ## KindOfInt ## _t){.i=KindOfInt##$from_int(full_int, true)}; \
+ } \
+ public CONSTFUNC c_type KindOfInt ## $gcd(c_type x, c_type y) { \
+ if (x == 0 || y == 0) return 0; \
+ x = KindOfInt##$abs(x); \
+ y = KindOfInt##$abs(y); \
+ while (x != y) { \
+ if (x > y) x -= y; \
+ else y -= x; \
+ } \
+ return x; \
+ } \
+ public const c_type KindOfInt##$min = min_val; \
+ public const c_type KindOfInt##$max = max_val; \
+ public const TypeInfo_t KindOfInt##$info = { \
+ .size=sizeof(c_type), \
+ .align=__alignof__(c_type), \
+ .metamethods={ \
+ .compare=KindOfInt##$compare, \
+ .as_text=KindOfInt##$as_text, \
+ .serialize=KindOfInt##$serialize, \
+ .deserialize=KindOfInt##$deserialize, \
+ }, \
+ };
+
+DEFINE_INT_TYPE(int64_t, Int64, "%ld", INT64_MIN, INT64_MAX, __attribute__(()))
+DEFINE_INT_TYPE(int32_t, Int32, "%d", INT32_MIN, INT32_MAX, CONSTFUNC)
+DEFINE_INT_TYPE(int16_t, Int16, "%d", INT16_MIN, INT16_MAX, CONSTFUNC)
+DEFINE_INT_TYPE(int8_t, Int8, "%d", INT8_MIN, INT8_MAX, CONSTFUNC)
+#undef DEFINE_INT_TYPE
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/integers.h b/src/stdlib/integers.h
new file mode 100644
index 00000000..e0586882
--- /dev/null
+++ b/src/stdlib/integers.h
@@ -0,0 +1,430 @@
+#pragma once
+
+// Integer type infos and methods
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <gmp.h>
+
+#include "datatypes.h"
+#include "stdlib.h"
+#include "types.h"
+#include "util.h"
+
+#define I64(x) ((int64_t)x)
+#define I32(x) ((int32_t)x)
+#define I16(x) ((int16_t)x)
+#define I8(x) ((int8_t)x)
+
+#define DEFINE_INT_TYPE(c_type, type_name) \
+ typedef struct { \
+ c_type i; \
+ bool is_none:1; \
+ } Optional ## type_name ## _t; \
+ Text_t type_name ## $as_text(const void *i, bool colorize, const TypeInfo_t *type); \
+ PUREFUNC int32_t type_name ## $compare(const void *x, const void *y, const TypeInfo_t *type); \
+ PUREFUNC bool type_name ## $equal(const void *x, const void *y, const TypeInfo_t *type); \
+ Text_t type_name ## $format(c_type i, Int_t digits); \
+ Text_t type_name ## $hex(c_type i, Int_t digits, bool uppercase, bool prefix); \
+ Text_t type_name ## $octal(c_type i, Int_t digits, bool prefix); \
+ Array_t type_name ## $bits(c_type x); \
+ Closure_t type_name ## $to(c_type first, c_type last, Optional ## type_name ## _t step); \
+ Closure_t type_name ## $onward(c_type first, c_type step); \
+ PUREFUNC Optional ## type_name ## _t type_name ## $parse(Text_t text); \
+ MACROLIKE PUREFUNC c_type type_name ## $clamped(c_type x, c_type min, c_type max) { \
+ return x < min ? min : (x > max ? max : x); \
+ } \
+ MACROLIKE CONSTFUNC c_type type_name ## $from_byte(Byte_t b) { return (c_type)b; } \
+ MACROLIKE CONSTFUNC c_type type_name ## $from_bool(Bool_t b) { return (c_type)b; } \
+ CONSTFUNC c_type type_name ## $gcd(c_type x, c_type y); \
+ extern const c_type type_name ## $min, type_name##$max; \
+ extern const TypeInfo_t type_name ## $info; \
+ MACROLIKE c_type type_name ## $divided_by(c_type D, c_type d) { \
+ c_type q = D/d, r = D%d; \
+ q -= (r < 0) * (2*(d > 0) - 1); \
+ return q; \
+ } \
+ MACROLIKE c_type type_name ## $modulo(c_type D, c_type d) { \
+ c_type r = D%d; \
+ r -= (r < 0) * (2*(d < 0) - 1) * d; \
+ return r; \
+ } \
+ MACROLIKE c_type type_name ## $modulo1(c_type D, c_type d) { \
+ return type_name ## $modulo(D-1, d) + 1; \
+ } \
+ MACROLIKE PUREFUNC c_type type_name ## $wrapping_plus(c_type x, c_type y) { \
+ return (c_type)((u##c_type)x + (u##c_type)y); \
+ } \
+ MACROLIKE PUREFUNC c_type type_name ## $wrapping_minus(c_type x, c_type y) { \
+ return (c_type)((u##c_type)x + (u##c_type)y); \
+ } \
+ MACROLIKE PUREFUNC c_type type_name ## $unsigned_left_shifted(c_type x, c_type y) { \
+ return (c_type)((u##c_type)x << y); \
+ } \
+ MACROLIKE PUREFUNC c_type type_name ## $unsigned_right_shifted(c_type x, c_type y) { \
+ return (c_type)((u##c_type)x >> y); \
+ }
+
+DEFINE_INT_TYPE(int64_t, Int64)
+DEFINE_INT_TYPE(int32_t, Int32)
+DEFINE_INT_TYPE(int16_t, Int16)
+DEFINE_INT_TYPE(int8_t, Int8)
+#undef DEFINE_INT_TYPE
+
+#define NONE_INT64 ((OptionalInt64_t){.is_none=true})
+#define NONE_INT32 ((OptionalInt32_t){.is_none=true})
+#define NONE_INT16 ((OptionalInt16_t){.is_none=true})
+#define NONE_INT8 ((OptionalInt8_t){.is_none=true})
+
+#define Int64$abs(...) I64(labs(__VA_ARGS__))
+#define Int32$abs(...) I32(abs(__VA_ARGS__))
+#define Int16$abs(...) I16(abs(__VA_ARGS__))
+#define Int8$abs(...) I8(abs(__VA_ARGS__))
+
+void Int64$serialize(const void *obj, FILE *out, Table_t*, const TypeInfo_t*);
+void Int64$deserialize(FILE *in, void *outval, Array_t*, const TypeInfo_t*);
+void Int32$serialize(const void *obj, FILE *out, Table_t*, const TypeInfo_t*);
+void Int32$deserialize(FILE *in, void *outval, Array_t*, const TypeInfo_t*);
+
+#define OptionalInt_t Int_t
+
+Text_t Int$as_text(const void *i, bool colorize, const TypeInfo_t *type);
+Text_t Int$value_as_text(Int_t i);
+PUREFUNC uint64_t Int$hash(const void *x, const TypeInfo_t *type);
+PUREFUNC int32_t Int$compare(const void *x, const void *y, const TypeInfo_t *type);
+PUREFUNC int32_t Int$compare_value(const Int_t x, const Int_t y);
+PUREFUNC bool Int$equal(const void *x, const void *y, const TypeInfo_t *type);
+PUREFUNC bool Int$equal_value(const Int_t x, const Int_t y);
+Text_t Int$format(Int_t i, Int_t digits);
+Text_t Int$hex(Int_t i, Int_t digits, bool uppercase, bool prefix);
+Text_t Int$octal(Int_t i, Int_t digits, bool prefix);
+PUREFUNC Closure_t Int$to(Int_t first, Int_t last, OptionalInt_t step);
+PUREFUNC Closure_t Int$onward(Int_t first, Int_t step);
+OptionalInt_t Int$from_str(const char *str);
+OptionalInt_t Int$parse(Text_t text);
+Int_t Int$abs(Int_t x);
+Int_t Int$power(Int_t base, Int_t exponent);
+Int_t Int$gcd(Int_t x, Int_t y);
+OptionalInt_t Int$sqrt(Int_t i);
+
+#define BIGGEST_SMALL_INT 0x3fffffff
+#define SMALLEST_SMALL_INT -0x40000000
+
+#define Int$from_mpz(mpz) (\
+ mpz_cmpabs_ui(mpz, BIGGEST_SMALL_INT) <= 0 ? ( \
+ (Int_t){.small=(mpz_get_si(mpz)<<2L)|1L} \
+ ) : ( \
+ (Int_t){.big=memcpy(new(mpz_t), &mpz, sizeof(mpz_t))} \
+ ))
+
+#define mpz_init_set_int(mpz, i) do { \
+ if likely ((i).small & 1L) mpz_init_set_si(mpz, (i).small >> 2L); \
+ else mpz_init_set(mpz, *(i).big); \
+} while (0)
+
+#define I_small(i) ((Int_t){.small=(int64_t)((uint64_t)(i)<<2L)|1L})
+#define I(i) _Generic(i, int8_t: I_small(i), int16_t: I_small(i), default: Int$from_int64(i))
+#define I_is_zero(i) ((i).small == 1L)
+
+Int_t Int$slow_plus(Int_t x, Int_t y);
+Int_t Int$slow_minus(Int_t x, Int_t y);
+Int_t Int$slow_times(Int_t x, Int_t y);
+Int_t Int$slow_divided_by(Int_t x, Int_t y);
+Int_t Int$slow_modulo(Int_t x, Int_t y);
+Int_t Int$slow_modulo1(Int_t x, Int_t y);
+Int_t Int$slow_left_shifted(Int_t x, Int_t y);
+Int_t Int$slow_right_shifted(Int_t x, Int_t y);
+Int_t Int$slow_bit_and(Int_t x, Int_t y);
+Int_t Int$slow_bit_or(Int_t x, Int_t y);
+Int_t Int$slow_bit_xor(Int_t x, Int_t y);
+Int_t Int$slow_negative(Int_t x);
+Int_t Int$slow_negated(Int_t x);
+bool Int$is_prime(Int_t x, Int_t reps);
+Int_t Int$next_prime(Int_t x);
+#if __GNU_MP_VERSION >= 6
+#if __GNU_MP_VERSION_MINOR >= 3
+Int_t Int$prev_prime(Int_t x);
+#endif
+#endif
+Int_t Int$choose(Int_t n, Int_t k);
+Int_t Int$factorial(Int_t n);
+
+extern const TypeInfo_t Int$info;
+
+MACROLIKE PUREFUNC Int_t Int$clamped(Int_t x, Int_t low, Int_t high) {
+ return (Int$compare(&x, &low, &Int$info) <= 0) ? low : (Int$compare(&x, &high, &Int$info) >= 0 ? high : x);
+}
+
+// Fast-path inline versions for the common case where integer arithmetic is
+// between two small ints.
+
+MACROLIKE Int_t Int$plus(Int_t x, Int_t y) {
+ const int64_t z = (int64_t)((uint64_t)x.small + (uint64_t)y.small);
+ if likely ((z|2L) == (int32_t)z)
+ return (Int_t){.small=(z-1L)};
+ return Int$slow_plus(x, y);
+}
+
+MACROLIKE Int_t Int$minus(Int_t x, Int_t y) {
+ const int64_t z = (int64_t)(((uint64_t)x.small ^ 3L) - (uint64_t)y.small);
+ if likely ((z & ~2L) == (int32_t)z)
+ return (Int_t){.small=z};
+ return Int$slow_minus(x, y);
+}
+
+MACROLIKE Int_t Int$times(Int_t x, Int_t y) {
+ if likely ((x.small & y.small) & 1L) {
+ const int64_t z = (x.small>>1L) * (y.small>>1L);
+ if likely (z == (int32_t)z)
+ return (Int_t){.small=z+1L};
+ }
+ return Int$slow_times(x, y);
+}
+
+MACROLIKE Int_t Int$divided_by(Int_t x, Int_t y) {
+ if likely (x.small & y.small & 1L) {
+ // Euclidean division, see: https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/divmodnote-letter.pdf
+ const int64_t D = (x.small>>2L);
+ const int64_t d = (y.small>>2L);
+ int64_t q = D/d, r = D%d;
+ q -= (r < 0L) * (2L*(d > 0L) - 1L);
+ if likely (q == (int32_t)q)
+ return (Int_t){.small=(q<<2L)|1L};
+ }
+ return Int$slow_divided_by(x, y);
+}
+
+MACROLIKE Int_t Int$modulo(Int_t x, Int_t y) {
+ if likely (x.small & y.small & 1L) {
+ // Euclidean modulus, see: https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/divmodnote-letter.pdf
+ const int64_t D = (x.small>>2L);
+ const int64_t d = (y.small>>2L);
+ int64_t r = D%d;
+ r -= (r < 0L) * (2L*(d < 0L) - 1L) * d;
+ return (Int_t){.small=(r<<2L)|1L};
+ }
+ return Int$slow_modulo(x, y);
+}
+
+MACROLIKE Int_t Int$modulo1(Int_t x, Int_t y) {
+ if likely (x.small & y.small & 1L) {
+ // Euclidean modulus, see: https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/divmodnote-letter.pdf
+ const int64_t D = (x.small>>2L)-1L;
+ const int64_t d = (y.small>>2L);
+ int64_t r = D%d;
+ r -= (r < 0L) * (2L*(d < 0L) - 1L) * d;
+ return (Int_t){.small=((r+1L)<<2L)|1L};
+ }
+ return Int$slow_modulo1(x, y);
+}
+
+MACROLIKE Int_t Int$left_shifted(Int_t x, Int_t y) {
+ if likely (x.small & y.small & 1L) {
+ const int64_t z = ((x.small>>2L) << (y.small>>2L))<<2L;
+ if likely (z == (int32_t)z)
+ return (Int_t){.small=z+1L};
+ }
+ return Int$slow_left_shifted(x, y);
+}
+
+MACROLIKE Int_t Int$right_shifted(Int_t x, Int_t y) {
+ if likely (x.small & y.small & 1L) {
+ const int64_t z = ((x.small>>2L) >> (y.small>>2L))<<2L;
+ if likely (z == (int32_t)z)
+ return (Int_t){.small=z+1L};
+ }
+ return Int$slow_right_shifted(x, y);
+}
+
+MACROLIKE Int_t Int$bit_and(Int_t x, Int_t y) {
+ const int64_t z = x.small & y.small;
+ if likely (z & 1L)
+ return (Int_t){.small=z};
+ return Int$slow_bit_and(x, y);
+}
+
+MACROLIKE Int_t Int$bit_or(Int_t x, Int_t y) {
+ if likely (x.small & y.small & 1L)
+ return (Int_t){.small=(x.small | y.small)};
+ return Int$slow_bit_or(x, y);
+}
+
+MACROLIKE Int_t Int$bit_xor(Int_t x, Int_t y) {
+ if likely (x.small & y.small & 1L)
+ return (Int_t){.small=(x.small ^ y.small) | 1L};
+ return Int$slow_bit_xor(x, y);
+}
+
+MACROLIKE Int_t Int$negated(Int_t x) {
+ if likely (x.small & 1L)
+ return (Int_t){.small=(~x.small) ^ 3L};
+ return Int$slow_negated(x);
+}
+
+MACROLIKE Int_t Int$negative(Int_t x) {
+ if likely (x.small & 1L)
+ return (Int_t){.small=((-((x.small)>>2L))<<2L) | 1L};
+ return Int$slow_negative(x);
+}
+
+MACROLIKE PUREFUNC bool Int$is_negative(Int_t x) {
+ if likely (x.small & 1L)
+ return x.small < 0L;
+ return Int$compare_value(x, I_small(0)) < 0L;
+}
+
+// Constructors/conversion functions:
+
+// Int constructors:
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+MACROLIKE PUREFUNC Int_t Int$from_num(double n, bool truncate) {
+ mpz_t result;
+ mpz_init_set_d(result, n);
+ if (!truncate && unlikely(mpz_get_d(result) != n))
+ fail("Could not convert to an integer without truncation: %g", n);
+ return Int$from_mpz(result);
+}
+MACROLIKE PUREFUNC Int_t Int$from_num32(float n, bool truncate) { return Int$from_num((double)n, truncate); }
+MACROLIKE Int_t Int$from_int64(int64_t i) {
+ if likely (i >= SMALLEST_SMALL_INT && i <= BIGGEST_SMALL_INT)
+ return (Int_t){.small=(i<<2L)|1L};
+ mpz_t result;
+ mpz_init_set_si(result, i);
+ return Int$from_mpz(result);
+}
+MACROLIKE CONSTFUNC Int_t Int$from_int32(Int32_t i) { return Int$from_int64((Int32_t)i); }
+MACROLIKE CONSTFUNC Int_t Int$from_int16(Int16_t i) { return I_small(i); }
+MACROLIKE CONSTFUNC Int_t Int$from_int8(Int8_t i) { return I_small(i); }
+MACROLIKE CONSTFUNC Int_t Int$from_byte(Byte_t b) { return I_small(b); }
+MACROLIKE CONSTFUNC Int_t Int$from_bool(Bool_t b) { return I_small(b); }
+
+// Int64 constructors
+MACROLIKE PUREFUNC Int64_t Int64$from_num(Num_t n, bool truncate) {
+ int64_t i64 = (int64_t)n;
+ if (!truncate && unlikely((Num_t)i64 != n))
+ fail("Could not convert Num to Int64 without truncation: %g\n", n);
+ return i64;
+}
+MACROLIKE PUREFUNC Int64_t Int64$from_num32(Num32_t n, bool truncate) {
+ int64_t i64 = (int64_t)n;
+ if (!truncate && unlikely((Num32_t)i64 != n))
+ fail("Could not convert Num32 to Int64 without truncation: %g\n", (double)n);
+ return i64;
+}
+MACROLIKE PUREFUNC Int64_t Int64$from_int(Int_t i, bool truncate) {
+ if likely (i.small & 1L)
+ return (int64_t)(i.small >> 2L);
+ if (!truncate && unlikely(!mpz_fits_slong_p(*i.big)))
+ fail("Integer is too big to fit in a 64-bit integer: %k", (Text_t[1]){Int$value_as_text(i)});
+ return mpz_get_si(*i.big);
+}
+MACROLIKE CONSTFUNC Int64_t Int64$from_int32(Int32_t i) { return (Int64_t)i; }
+MACROLIKE CONSTFUNC Int64_t Int64$from_int16(Int16_t i) { return (Int64_t)i; }
+MACROLIKE CONSTFUNC Int64_t Int64$from_int8(Int8_t i) { return (Int64_t)i; }
+
+// Int32 constructors
+MACROLIKE PUREFUNC Int32_t Int32$from_num(Num_t n, bool truncate) {
+ int32_t i32 = (int32_t)n;
+ if (!truncate && unlikely((Num_t)i32 != n))
+ fail("Could not convert Num to Int32 without truncation: %g\n", n);
+ return i32;
+}
+MACROLIKE PUREFUNC Int32_t Int32$from_num32(Num32_t n, bool truncate) {
+ int32_t i32 = (int32_t)n;
+ if (!truncate && unlikely((Num32_t)i32 != n))
+ fail("Could not convert Num32 to Int32 without truncation: %g\n", (double)n);
+ return i32;
+}
+MACROLIKE PUREFUNC Int32_t Int32$from_int(Int_t i, bool truncate) {
+ int64_t i64 = Int64$from_int(i, truncate);
+ int32_t i32 = (int32_t)i64;
+ if (!truncate && unlikely((int64_t)i32 != i64))
+ fail("Integer is too big to fit in a 32-bit integer: %k", (Text_t[1]){Int$value_as_text(i)});
+ return i32;
+}
+MACROLIKE PUREFUNC Int32_t Int32$from_int64(Int64_t i64, bool truncate) {
+ int32_t i32 = (int32_t)i64;
+ if (!truncate && unlikely((int64_t)i32 != i64))
+ fail("Integer is too big to fit in a 32-bit integer: %ld", i64);
+ return i32;
+}
+MACROLIKE CONSTFUNC Int32_t Int32$from_int16(Int16_t i) { return (Int32_t)i; }
+MACROLIKE CONSTFUNC Int32_t Int32$from_int8(Int8_t i) { return (Int32_t)i; }
+
+// Int16 constructors
+MACROLIKE PUREFUNC Int16_t Int16$from_num(Num_t n, bool truncate) {
+ int16_t i16 = (int16_t)n;
+ if (!truncate && unlikely((Num_t)i16 != n))
+ fail("Could not convert Num to Int16 without truncation: %g\n", n);
+ return i16;
+}
+MACROLIKE PUREFUNC Int16_t Int16$from_num32(Num32_t n, bool truncate) {
+ int16_t i16 = (int16_t)n;
+ if (!truncate && unlikely((Num32_t)i16 != n))
+ fail("Could not convert Num32 to Int16 without truncation: %g\n", (double)n);
+ return i16;
+}
+MACROLIKE PUREFUNC Int16_t Int16$from_int(Int_t i, bool truncate) {
+ int64_t i64 = Int64$from_int(i, truncate);
+ int16_t i16 = (int16_t)i64;
+ if (!truncate && unlikely((int64_t)i16 != i64))
+ fail("Integer is too big to fit in a 16-bit integer!");
+ return i16;
+}
+MACROLIKE PUREFUNC Int16_t Int16$from_int64(Int64_t i64, bool truncate) {
+ int16_t i16 = (int16_t)i64;
+ if (!truncate && unlikely((int64_t)i16 != i64))
+ fail("Integer is too big to fit in a 16-bit integer: %ld", i64);
+ return i16;
+}
+MACROLIKE PUREFUNC Int16_t Int16$from_int32(Int32_t i32, bool truncate) {
+ int16_t i16 = (int16_t)i32;
+ if (!truncate && unlikely((int32_t)i16 != i32))
+ fail("Integer is too big to fit in a 16-bit integer: %ld", i32);
+ return i16;
+}
+MACROLIKE CONSTFUNC Int16_t Int16$from_int8(Int8_t i) { return (Int16_t)i; }
+
+// Int8 constructors
+MACROLIKE PUREFUNC Int8_t Int8$from_num(Num_t n, bool truncate) {
+ int8_t i8 = (int8_t)n;
+ if (!truncate && unlikely((Num_t)i8 != n))
+ fail("Could not convert Num to Int8 without truncation: %g\n", n);
+ return i8;
+}
+MACROLIKE PUREFUNC Int8_t Int8$from_num32(Num32_t n, bool truncate) {
+ int8_t i8 = (int8_t)n;
+ if (!truncate && unlikely((Num32_t)i8 != n))
+ fail("Could not convert Num32 to Int8 without truncation: %g\n", (double)n);
+ return i8;
+}
+MACROLIKE PUREFUNC Int8_t Int8$from_int(Int_t i, bool truncate) {
+ int64_t i64 = Int64$from_int(i, truncate);
+ int8_t i8 = (int8_t)i64;
+ if (!truncate && unlikely((int64_t)i8 != i64))
+ fail("Integer is too big to fit in an 8-bit integer!");
+ return i8;
+}
+MACROLIKE PUREFUNC Int8_t Int8$from_int64(Int64_t i64, bool truncate) {
+ int8_t i8 = (int8_t)i64;
+ if (!truncate && unlikely((int64_t)i8 != i64))
+ fail("Integer is too big to fit in a 8-bit integer: %ld", i64);
+ return i8;
+}
+MACROLIKE PUREFUNC Int8_t Int8$from_int32(Int32_t i32, bool truncate) {
+ int8_t i8 = (int8_t)i32;
+ if (!truncate && unlikely((int32_t)i8 != i32))
+ fail("Integer is too big to fit in a 8-bit integer: %ld", i32);
+ return i8;
+}
+MACROLIKE PUREFUNC Int8_t Int8$from_int16(Int16_t i16, bool truncate) {
+ int8_t i8 = (int8_t)i16;
+ if (!truncate && unlikely((int16_t)i8 != i16))
+ fail("Integer is too big to fit in a 8-bit integer: %ld", i16);
+ return i8;
+}
+#pragma GCC diagnostic pop
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/memory.c b/src/stdlib/memory.c
new file mode 100644
index 00000000..1805fb6f
--- /dev/null
+++ b/src/stdlib/memory.c
@@ -0,0 +1,31 @@
+// Type info and methods for "Memory" opaque type
+#include <gc.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <sys/param.h>
+#include <err.h>
+
+#include "memory.h"
+#include "metamethods.h"
+#include "text.h"
+#include "types.h"
+#include "util.h"
+
+public Text_t Memory$as_text(const void *p, bool colorize, const TypeInfo_t *) {
+ if (!p) return Text("Memory");
+ return Text$format(colorize ? "\x1b[0;34;1mMemory<%p>\x1b[m" : "Memory<%p>", p);
+}
+
+public const TypeInfo_t Memory$info = {
+ .size=0,
+ .align=0,
+ .metamethods={
+ .as_text=Memory$as_text,
+ .serialize=cannot_serialize,
+ .deserialize=cannot_deserialize,
+ },
+};
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/memory.h b/src/stdlib/memory.h
new file mode 100644
index 00000000..e03d5931
--- /dev/null
+++ b/src/stdlib/memory.h
@@ -0,0 +1,13 @@
+#pragma once
+
+// Type info and methods for "Memory" opaque type
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "types.h"
+
+extern const TypeInfo_t Memory$info;
+Text_t Memory$as_text(const void *p, bool colorize, const TypeInfo_t *type);
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/metamethods.c b/src/stdlib/metamethods.c
new file mode 100644
index 00000000..c0e11cfc
--- /dev/null
+++ b/src/stdlib/metamethods.c
@@ -0,0 +1,124 @@
+// Metamethods are methods that all types share for hashing, equality, comparison, and textifying
+
+#include <stdint.h>
+#include <string.h>
+
+#include "arrays.h"
+#include "bools.h"
+#include "bytes.h"
+#include "functiontype.h"
+#include "integers.h"
+#include "metamethods.h"
+#include "optionals.h"
+#include "pointers.h"
+#include "siphash.h"
+#include "tables.h"
+#include "text.h"
+#include "types.h"
+#include "util.h"
+
+PUREFUNC public uint64_t generic_hash(const void *obj, const TypeInfo_t *type)
+{
+ if (type->metamethods.hash)
+ return type->metamethods.hash(obj, type);
+
+ return siphash24((void*)obj, (size_t)(type->size));
+}
+
+PUREFUNC public int32_t generic_compare(const void *x, const void *y, const TypeInfo_t *type)
+{
+ if (x == y) return 0;
+
+ if (type->metamethods.compare)
+ return type->metamethods.compare(x, y, type);
+
+ return (int32_t)memcmp((void*)x, (void*)y, (size_t)(type->size));
+}
+
+PUREFUNC public bool generic_equal(const void *x, const void *y, const TypeInfo_t *type)
+{
+ if (x == y) return true;
+
+ if (type->metamethods.equal)
+ return type->metamethods.equal(x, y, type);
+
+ return (generic_compare(x, y, type) == 0);
+}
+
+public Text_t generic_as_text(const void *obj, bool colorize, const TypeInfo_t *type)
+{
+ if (!type->metamethods.as_text)
+ fail("No text metamethod provided for type!");
+
+ return type->metamethods.as_text(obj, colorize, type);
+}
+
+public void _serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *type)
+{
+ if (type->metamethods.serialize)
+ return type->metamethods.serialize(obj, out, pointers, type);
+
+ fwrite(obj, (size_t)type->size, 1, out);
+}
+
+public Array_t generic_serialize(const void *x, const TypeInfo_t *type)
+{
+ char *buf = NULL;
+ size_t size = 0;
+ FILE *stream = open_memstream(&buf, &size);
+ Table_t pointers = {};
+ _serialize(x, stream, &pointers, type);
+ fclose(stream);
+ Array_t bytes = {
+ .data=GC_MALLOC_ATOMIC(size),
+ .length=(int64_t)size,
+ .stride=1,
+ .atomic=1,
+ };
+ memcpy(bytes.data, buf, size);
+ free(buf);
+ return bytes;
+}
+
+public void _deserialize(FILE *input, void *outval, Array_t *pointers, const TypeInfo_t *type)
+{
+ if (type->metamethods.deserialize) {
+ type->metamethods.deserialize(input, outval, pointers, type);
+ return;
+ }
+
+ fread(outval, (size_t)type->size, 1, input);
+}
+
+public void generic_deserialize(Array_t bytes, void *outval, const TypeInfo_t *type)
+{
+ if (bytes.stride != 1)
+ Array$compact(&bytes, 1);
+
+ FILE *input = fmemopen(bytes.data, (size_t)bytes.length, "r");
+ Array_t pointers = {};
+ _deserialize(input, outval, &pointers, type);
+ fclose(input);
+}
+
+public int generic_print(const void *obj, bool colorize, const TypeInfo_t *type)
+{
+ Text_t text = generic_as_text(obj, colorize, type);
+ return Text$print(stdout, text) + printf("\n");
+}
+
+__attribute__((noreturn))
+public void cannot_serialize(const void*, FILE*, Table_t*, const TypeInfo_t *type)
+{
+ Text_t typestr = generic_as_text(NULL, false, type);
+ fail("Values of type %k cannot be serialized or deserialized!", &typestr);
+}
+
+__attribute__((noreturn))
+public void cannot_deserialize(FILE*, void*, Array_t*, const TypeInfo_t *type)
+{
+ Text_t typestr = generic_as_text(NULL, false, type);
+ fail("Values of type %k cannot be serialized or deserialized!", &typestr);
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/metamethods.h b/src/stdlib/metamethods.h
new file mode 100644
index 00000000..a75fcf7f
--- /dev/null
+++ b/src/stdlib/metamethods.h
@@ -0,0 +1,22 @@
+#pragma once
+// Metamethods are methods that all types share:
+
+#include <stdint.h>
+
+#include "datatypes.h"
+#include "types.h"
+#include "util.h"
+
+PUREFUNC uint64_t generic_hash(const void *obj, const TypeInfo_t *type);
+PUREFUNC int32_t generic_compare(const void *x, const void *y, const TypeInfo_t *type);
+PUREFUNC bool generic_equal(const void *x, const void *y, const TypeInfo_t *type);
+Text_t generic_as_text(const void *obj, bool colorize, const TypeInfo_t *type);
+void _serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *type);
+Array_t generic_serialize(const void *x, const TypeInfo_t *type);
+void _deserialize(FILE *input, void *outval, Array_t *pointers, const TypeInfo_t *type);
+void generic_deserialize(Array_t bytes, void *outval, const TypeInfo_t *type);
+int generic_print(const void *obj, bool colorize, const TypeInfo_t *type);
+void cannot_serialize(const void*, FILE*, Table_t*, const TypeInfo_t *type);
+void cannot_deserialize(FILE*, void*, Array_t*, const TypeInfo_t *type);
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/moments.c b/src/stdlib/moments.c
new file mode 100644
index 00000000..bb3d70a9
--- /dev/null
+++ b/src/stdlib/moments.c
@@ -0,0 +1,323 @@
+// Moment methods/type info
+#include <ctype.h>
+#include <gc.h>
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "datatypes.h"
+#include "math.h"
+#include "moments.h"
+#include "optionals.h"
+#include "patterns.h"
+#include "stdlib.h"
+#include "text.h"
+#include "util.h"
+
+static OptionalText_t _local_timezone = NONE_TEXT;
+
+#define WITH_TIMEZONE(tz, body) ({ if (tz.length >= 0) { \
+ OptionalText_t old_timezone = _local_timezone; \
+ Moment$set_local_timezone(tz); \
+ body; \
+ Moment$set_local_timezone(old_timezone); \
+ } else { \
+ body; \
+ }})
+
+public Text_t Moment$as_text(const void *moment, bool colorize, const TypeInfo_t*)
+{
+ if (!moment)
+ return Text("Moment");
+
+ struct tm info;
+ struct tm *final_info = localtime_r(&((Moment_t*)moment)->tv_sec, &info);
+ static char buf[256];
+ size_t len = strftime(buf, sizeof(buf), "%c %Z", final_info);
+ Text_t text = Text$format("%.*s", (int)len, buf);
+ if (colorize)
+ text = Text$concat(Text("\x1b[36m"), text, Text("\x1b[m"));
+ return text;
+}
+
+PUREFUNC public int32_t Moment$compare(const void *va, const void *vb, const TypeInfo_t*)
+{
+ Moment_t *a = (Moment_t*)va, *b = (Moment_t*)vb;
+ if (a->tv_sec != b->tv_sec)
+ return (a->tv_sec > b->tv_sec) - (a->tv_sec < b->tv_sec);
+ return (a->tv_usec > b->tv_usec) - (a->tv_usec < b->tv_usec);
+}
+
+CONSTFUNC public bool Moment$is_none(const void *m, const TypeInfo_t*)
+{
+ return ((Moment_t*)m)->tv_usec < 0;
+}
+
+public Moment_t Moment$now(void)
+{
+ struct timespec ts;
+ if (clock_gettime(CLOCK_REALTIME, &ts) != 0)
+ fail("Couldn't get the time!");
+ return (Moment_t){.tv_sec=ts.tv_sec, .tv_usec=ts.tv_nsec/1000};
+}
+
+public Moment_t Moment$new(Int_t year, Int_t month, Int_t day, Int_t hour, Int_t minute, double second, OptionalText_t timezone)
+{
+ struct tm info = {
+ .tm_min=Int32$from_int(minute, false),
+ .tm_hour=Int32$from_int(hour, false),
+ .tm_mday=Int32$from_int(day, false),
+ .tm_mon=Int32$from_int(month, false) - 1,
+ .tm_year=Int32$from_int(year, false) - 1900,
+ .tm_isdst=-1,
+ };
+
+ time_t t;
+ WITH_TIMEZONE(timezone, t = mktime(&info));
+ return (Moment_t){.tv_sec=t + (time_t)second, .tv_usec=(suseconds_t)(fmod(second, 1.0) * 1e9)};
+}
+
+public Moment_t Moment$after(Moment_t moment, double seconds, double minutes, double hours, Int_t days, Int_t weeks, Int_t months, Int_t years, OptionalText_t timezone)
+{
+ double offset = seconds + 60.*minutes + 3600.*hours;
+ moment.tv_sec += (time_t)offset;
+
+ struct tm info = {};
+ WITH_TIMEZONE(timezone, localtime_r(&moment.tv_sec, &info));
+
+ info.tm_mday += Int32$from_int(days, false) + 7*Int32$from_int(weeks, false);
+ info.tm_mon += Int32$from_int(months, false);
+ info.tm_year += Int32$from_int(years, false);
+
+ time_t t = mktime(&info);
+ return (Moment_t){
+ .tv_sec=t,
+ .tv_usec=moment.tv_usec + (suseconds_t)(fmod(offset, 1.0) * 1e9),
+ };
+}
+
+CONSTFUNC public double Moment$seconds_till(Moment_t now, Moment_t then)
+{
+ return (double)(then.tv_sec - now.tv_sec) + 1e-9*(double)(then.tv_usec - now.tv_usec);
+}
+
+CONSTFUNC public double Moment$minutes_till(Moment_t now, Moment_t then)
+{
+ return Moment$seconds_till(now, then)/60.;
+}
+
+CONSTFUNC public double Moment$hours_till(Moment_t now, Moment_t then)
+{
+ return Moment$seconds_till(now, then)/3600.;
+}
+
+public void Moment$get(
+ Moment_t moment, Int_t *year, Int_t *month, Int_t *day, Int_t *hour, Int_t *minute, Int_t *second,
+ Int_t *nanosecond, Int_t *weekday, OptionalText_t timezone)
+{
+ struct tm info = {};
+ WITH_TIMEZONE(timezone, localtime_r(&moment.tv_sec, &info));
+
+ if (year) *year = I(info.tm_year + 1900);
+ if (month) *month = I(info.tm_mon + 1);
+ if (day) *day = I(info.tm_mday);
+ if (hour) *hour = I(info.tm_hour);
+ if (minute) *minute = I(info.tm_min);
+ if (second) *second = I(info.tm_sec);
+ if (nanosecond) *nanosecond = I(moment.tv_usec);
+ if (weekday) *weekday = I(info.tm_wday + 1);
+}
+
+public Int_t Moment$year(Moment_t moment, OptionalText_t timezone)
+{
+ struct tm info = {};
+ WITH_TIMEZONE(timezone, localtime_r(&moment.tv_sec, &info));
+ return I(info.tm_year + 1900);
+}
+
+public Int_t Moment$month(Moment_t moment, OptionalText_t timezone)
+{
+ struct tm info = {};
+ WITH_TIMEZONE(timezone, localtime_r(&moment.tv_sec, &info));
+ return I(info.tm_mon + 1);
+}
+
+public Int_t Moment$day_of_week(Moment_t moment, OptionalText_t timezone)
+{
+ struct tm info = {};
+ WITH_TIMEZONE(timezone, localtime_r(&moment.tv_sec, &info));
+ return I(info.tm_wday + 1);
+}
+
+public Int_t Moment$day_of_month(Moment_t moment, OptionalText_t timezone)
+{
+ struct tm info = {};
+ WITH_TIMEZONE(timezone, localtime_r(&moment.tv_sec, &info));
+ return I(info.tm_mday);
+}
+
+public Int_t Moment$day_of_year(Moment_t moment, OptionalText_t timezone)
+{
+ struct tm info = {};
+ WITH_TIMEZONE(timezone, localtime_r(&moment.tv_sec, &info));
+ return I(info.tm_yday);
+}
+
+public Int_t Moment$hour(Moment_t moment, OptionalText_t timezone)
+{
+ struct tm info = {};
+ WITH_TIMEZONE(timezone, localtime_r(&moment.tv_sec, &info));
+ return I(info.tm_hour);
+}
+
+public Int_t Moment$minute(Moment_t moment, OptionalText_t timezone)
+{
+ struct tm info = {};
+ WITH_TIMEZONE(timezone, localtime_r(&moment.tv_sec, &info));
+ return I(info.tm_min);
+}
+
+public Int_t Moment$second(Moment_t moment, OptionalText_t timezone)
+{
+ struct tm info = {};
+ WITH_TIMEZONE(timezone, localtime_r(&moment.tv_sec, &info));
+ return I(info.tm_sec);
+}
+
+public Int_t Moment$microsecond(Moment_t moment, OptionalText_t timezone)
+{
+ (void)timezone;
+ return I(moment.tv_usec);
+}
+
+public Text_t Moment$format(Moment_t moment, Text_t fmt, OptionalText_t timezone)
+{
+ struct tm info;
+ WITH_TIMEZONE(timezone, localtime_r(&moment.tv_sec, &info));
+ static char buf[256];
+ size_t len = strftime(buf, sizeof(buf), Text$as_c_string(fmt), &info);
+ return Text$format("%.*s", (int)len, buf);
+}
+
+public Text_t Moment$date(Moment_t moment, OptionalText_t timezone)
+{
+ return Moment$format(moment, Text("%F"), timezone);
+}
+
+public Text_t Moment$time(Moment_t moment, bool seconds, bool am_pm, OptionalText_t timezone)
+{
+ Text_t text;
+ if (seconds)
+ text = Moment$format(moment, am_pm ? Text("%l:%M:%S%P") : Text("%T"), timezone);
+ else
+ text = Moment$format(moment, am_pm ? Text("%l:%M%P") : Text("%H:%M"), timezone);
+ return Text$trim(text, Pattern(" "), true, true);
+}
+
+public OptionalMoment_t Moment$parse(Text_t text, Text_t format)
+{
+ struct tm info = {.tm_isdst=-1};
+ const char *str = Text$as_c_string(text);
+ const char *fmt = Text$as_c_string(format);
+ if (strstr(fmt, "%Z"))
+ fail("The %%Z specifier is not supported for time parsing!");
+
+ char *invalid = strptime(str, fmt, &info);
+ if (!invalid || invalid[0] != '\0')
+ return NONE_MOMENT;
+
+ long offset = info.tm_gmtoff; // Need to cache this because mktime() mutates it to local timezone >:(
+ time_t t = mktime(&info);
+ return (Moment_t){.tv_sec=t + offset - info.tm_gmtoff};
+}
+
+static INLINE Text_t num_format(long n, const char *unit)
+{
+ if (n == 0)
+ return Text("now");
+ return Text$format((n == 1 || n == -1) ? "%ld %s %s" : "%ld %ss %s", n < 0 ? -n : n, unit, n < 0 ? "ago" : "later");
+}
+
+public Text_t Moment$relative(Moment_t moment, Moment_t relative_to, OptionalText_t timezone)
+{
+ struct tm info = {};
+ struct tm relative_info = {};
+ WITH_TIMEZONE(timezone, {
+ localtime_r(&moment.tv_sec, &info);
+ localtime_r(&relative_to.tv_sec, &relative_info);
+ });
+
+ double second_diff = Moment$seconds_till(relative_to, moment);
+ if (info.tm_year != relative_info.tm_year && fabs(second_diff) > 365.*24.*60.*60.)
+ return num_format((long)info.tm_year - (long)relative_info.tm_year, "year");
+ else if (info.tm_mon != relative_info.tm_mon && fabs(second_diff) > 31.*24.*60.*60.)
+ return num_format(12*((long)info.tm_year - (long)relative_info.tm_year) + (long)info.tm_mon - (long)relative_info.tm_mon, "month");
+ else if (info.tm_yday != relative_info.tm_yday && fabs(second_diff) > 24.*60.*60.)
+ return num_format(round(second_diff/(24.*60.*60.)), "day");
+ else if (info.tm_hour != relative_info.tm_hour && fabs(second_diff) > 60.*60.)
+ return num_format(round(second_diff/(60.*60.)), "hour");
+ else if (info.tm_min != relative_info.tm_min && fabs(second_diff) > 60.)
+ return num_format(round(second_diff/(60.)), "minute");
+ else {
+ if (fabs(second_diff) < 1e-6)
+ return num_format((long)(second_diff*1e9), "nanosecond");
+ else if (fabs(second_diff) < 1e-3)
+ return num_format((long)(second_diff*1e6), "microsecond");
+ else if (fabs(second_diff) < 1.0)
+ return num_format((long)(second_diff*1e3), "millisecond");
+ else
+ return num_format((long)(second_diff), "second");
+ }
+}
+
+CONSTFUNC public Int64_t Moment$unix_timestamp(Moment_t moment)
+{
+ return (Int64_t)moment.tv_sec;
+}
+
+CONSTFUNC public Moment_t Moment$from_unix_timestamp(Int64_t timestamp)
+{
+ return (Moment_t){.tv_sec=(time_t)timestamp};
+}
+
+public void Moment$set_local_timezone(OptionalText_t timezone)
+{
+ if (timezone.length >= 0) {
+ setenv("TZ", Text$as_c_string(timezone), 1);
+ } else {
+ unsetenv("TZ");
+ }
+ _local_timezone = timezone;
+ tzset();
+}
+
+public Text_t Moment$get_local_timezone(void)
+{
+ if (_local_timezone.length < 0) {
+ static char buf[PATH_MAX];
+ ssize_t len = readlink("/etc/localtime", buf, sizeof(buf));
+ if (len < 0)
+ fail("Could not get local timezone!");
+
+ char *zoneinfo = strstr(buf, "/zoneinfo/");
+ if (zoneinfo)
+ _local_timezone = Text$from_str(zoneinfo + strlen("/zoneinfo/"));
+ else
+ fail("Could not resolve local timezone!");
+ }
+ return _local_timezone;
+}
+
+public const TypeInfo_t Moment$info = {
+ .size=sizeof(Moment_t),
+ .align=__alignof__(Moment_t),
+ .metamethods={
+ .as_text=Moment$as_text,
+ .compare=Moment$compare,
+ .is_none=Moment$is_none,
+ },
+};
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/moments.h b/src/stdlib/moments.h
new file mode 100644
index 00000000..ff6d4119
--- /dev/null
+++ b/src/stdlib/moments.h
@@ -0,0 +1,44 @@
+#pragma once
+
+// Moment objects
+
+#include <stdint.h>
+
+#include "datatypes.h"
+#include "integers.h"
+#include "optionals.h"
+#include "types.h"
+#include "util.h"
+
+Text_t Moment$as_text(const void *moment, bool colorize, const TypeInfo_t *type);
+PUREFUNC int32_t Moment$compare(const void *a, const void *b, const TypeInfo_t *type);
+CONSTFUNC public bool Moment$is_none(const void *m, const TypeInfo_t*);
+Moment_t Moment$now(void);
+Moment_t Moment$new(Int_t year, Int_t month, Int_t day, Int_t hour, Int_t minute, double second, OptionalText_t timezone);
+Moment_t Moment$after(Moment_t moment, double seconds, double minutes, double hours, Int_t days, Int_t weeks, Int_t months, Int_t years, OptionalText_t timezone);
+CONSTFUNC double Moment$seconds_till(Moment_t now, Moment_t then);
+CONSTFUNC double Moment$minutes_till(Moment_t now, Moment_t then);
+CONSTFUNC double Moment$hours_till(Moment_t now, Moment_t then);
+Int_t Moment$year(Moment_t moment, OptionalText_t timezone);
+Int_t Moment$month(Moment_t moment, OptionalText_t timezone);
+Int_t Moment$day_of_week(Moment_t moment, OptionalText_t timezone);
+Int_t Moment$day_of_month(Moment_t moment, OptionalText_t timezone);
+Int_t Moment$day_of_year(Moment_t moment, OptionalText_t timezone);
+Int_t Moment$hour(Moment_t moment, OptionalText_t timezone);
+Int_t Moment$minute(Moment_t moment, OptionalText_t timezone);
+Int_t Moment$second(Moment_t moment, OptionalText_t timezone);
+Int_t Moment$microsecond(Moment_t moment, OptionalText_t timezone);
+Text_t Moment$format(Moment_t moment, Text_t fmt, OptionalText_t timezone);
+Text_t Moment$date(Moment_t moment, OptionalText_t timezone);
+Text_t Moment$time(Moment_t moment, bool seconds, bool am_pm, OptionalText_t timezone);
+OptionalMoment_t Moment$parse(Text_t text, Text_t format);
+Text_t Moment$relative(Moment_t moment, Moment_t relative_to, OptionalText_t timezone);
+CONSTFUNC Int64_t Moment$unix_timestamp(Moment_t moment);
+CONSTFUNC Moment_t Moment$from_unix_timestamp(Int64_t timestamp);
+void Moment$set_local_timezone(OptionalText_t timezone);
+Text_t Moment$get_local_timezone(void);
+
+extern const TypeInfo_t Moment$info;
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
+
diff --git a/src/stdlib/mutexeddata.c b/src/stdlib/mutexeddata.c
new file mode 100644
index 00000000..f47adfc1
--- /dev/null
+++ b/src/stdlib/mutexeddata.c
@@ -0,0 +1,38 @@
+// Mutexed data methods/type info
+#include <ctype.h>
+#include <err.h>
+#include <gc.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/param.h>
+
+#include "bools.h"
+#include "metamethods.h"
+#include "optionals.h"
+#include "text.h"
+#include "util.h"
+
+static Text_t MutexedData$as_text(const void *m, bool colorize, const TypeInfo_t *type)
+{
+ auto mutexed = type->MutexedDataInfo;
+ Text_t typename = generic_as_text(NULL, false, mutexed.type);
+ if (!m) {
+ return Texts(colorize ? Text("\x1b[34;1mmutexed\x1b[m(") : Text("mutexed("), typename, Text(")"));
+ }
+ return Text$format(colorize ? "\x1b[34;1mmutexed %k<%p>\x1b[m" : "mutexed %k<%p>", &typename, *((MutexedData_t*)m));
+}
+
+static bool MutexedData$is_none(const void *m, const TypeInfo_t *)
+{
+ return *((MutexedData_t*)m) == NULL;
+}
+
+public const metamethods_t MutexedData$metamethods = {
+ .as_text=MutexedData$as_text,
+ .is_none=MutexedData$is_none,
+ .serialize=cannot_serialize,
+ .deserialize=cannot_deserialize,
+};
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/mutexeddata.h b/src/stdlib/mutexeddata.h
new file mode 100644
index 00000000..47686195
--- /dev/null
+++ b/src/stdlib/mutexeddata.h
@@ -0,0 +1,17 @@
+#pragma once
+
+// Metamethods and type info for mutexed data
+
+#include "types.h"
+#include "optionals.h"
+#include "util.h"
+
+#define NONE_MUTEXED_DATA ((MutexedData_t)NULL)
+
+extern const metamethods_t MutexedData$metamethods;
+
+#define MutexedData$info(t) &((TypeInfo_t){.size=sizeof(MutexedData_t), .align=__alignof(MutexedData_t), \
+ .tag=MutexedDataInfo, .MutexedDataInfo.type=t, \
+ .metamethods=MutexedData$metamethods})
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/nums.c b/src/stdlib/nums.c
new file mode 100644
index 00000000..98f7b509
--- /dev/null
+++ b/src/stdlib/nums.c
@@ -0,0 +1,186 @@
+// Type infos and methods for Nums (floating point)
+
+#include <float.h>
+#include <gc.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "arrays.h"
+#include "nums.h"
+#include "string.h"
+#include "text.h"
+#include "types.h"
+
+public PUREFUNC Text_t Num$as_text(const void *f, bool colorize, const TypeInfo_t*) {
+ if (!f) return Text("Num");
+ return Text$format(colorize ? "\x1b[35m%.16g\x1b[33;2m\x1b[m" : "%.16g", *(double*)f);
+}
+
+public PUREFUNC int32_t Num$compare(const void *x, const void *y, const TypeInfo_t*) {
+ int64_t rx = *(int64_t*)x,
+ ry = *(int64_t*)y;
+
+ if (rx == ry) return 0;
+
+ if (rx < 0) rx ^= INT64_MAX;
+ if (ry < 0) ry ^= INT64_MAX;
+
+ return (rx > ry) - (rx < ry);
+}
+
+public PUREFUNC bool Num$equal(const void *x, const void *y, const TypeInfo_t*) {
+ return *(double*)x == *(double*)y;
+}
+
+public CONSTFUNC bool Num$near(double a, double b, double ratio, double absolute) {
+ if (ratio < 0) ratio = 0;
+ else if (ratio > 1) ratio = 1;
+
+ if (a == b) return true;
+
+ double diff = fabs(a - b);
+ if (diff < absolute) return true;
+ else if (isnan(diff)) return false;
+
+ double epsilon = fabs(a * ratio) + fabs(b * ratio);
+ if (isinf(epsilon)) epsilon = DBL_MAX;
+ return (diff < epsilon);
+}
+
+public Text_t Num$format(double f, Int_t precision) {
+ return Text$format("%.*f", (int)Int64$from_int(precision, false), f);
+}
+
+public Text_t Num$scientific(double f, Int_t precision) {
+ return Text$format("%.*e", (int)Int64$from_int(precision, false), f);
+}
+
+public CONSTFUNC double Num$mod(double num, double modulus) {
+ // Euclidean division, see: https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/divmodnote-letter.pdf
+ double r = remainder(num, modulus);
+ r -= (r < 0) * (2*(modulus < 0) - 1) * modulus;
+ return r;
+}
+
+public CONSTFUNC double Num$mod1(double num, double modulus) {
+ return 1.0 + Num$mod(num-1, modulus);
+}
+
+public CONSTFUNC double Num$mix(double amount, double x, double y) {
+ return (1.0-amount)*x + amount*y;
+}
+
+public OptionalNum_t Num$parse(Text_t text) {
+ const char *str = Text$as_c_string(text);
+ char *end = NULL;
+ double d = strtod(str, &end);
+ if (end > str && end[0] == '\0')
+ return d;
+ else
+ return nan("null");
+}
+
+static bool Num$is_none(const void *n, const TypeInfo_t*)
+{
+ return isnan(*(Num_t*)n);
+}
+
+public CONSTFUNC bool Num$isinf(double n) { return (fpclassify(n) == FP_INFINITE); }
+public CONSTFUNC bool Num$finite(double n) { return (fpclassify(n) != FP_INFINITE); }
+public CONSTFUNC bool Num$isnan(double n) { return (fpclassify(n) == FP_NAN); }
+
+public const TypeInfo_t Num$info = {
+ .size=sizeof(double),
+ .align=__alignof__(double),
+ .metamethods={
+ .compare=Num$compare,
+ .equal=Num$equal,
+ .as_text=Num$as_text,
+ .is_none=Num$is_none,
+ },
+};
+
+public PUREFUNC Text_t Num32$as_text(const void *f, bool colorize, const TypeInfo_t*) {
+ if (!f) return Text("Num32");
+ return Text$format(colorize ? "\x1b[35m%.8g\x1b[33;2m\x1b[m" : "%.8g", (double)*(float*)f);
+}
+
+public PUREFUNC int32_t Num32$compare(const void *x, const void *y, const TypeInfo_t*) {
+ return (*(float*)x > *(float*)y) - (*(float*)x < *(float*)y);
+}
+
+public PUREFUNC bool Num32$equal(const void *x, const void *y, const TypeInfo_t*) {
+ return *(float*)x == *(float*)y;
+}
+
+public CONSTFUNC bool Num32$near(float a, float b, float ratio, float absolute) {
+ if (ratio < 0) ratio = 0;
+ else if (ratio > 1) ratio = 1;
+
+ if (a == b) return true;
+
+ float diff = fabs(a - b);
+ if (diff < absolute) return true;
+ else if (isnan(diff)) return false;
+
+ float epsilon = fabs(a * ratio) + fabs(b * ratio);
+ if (isinf(epsilon)) epsilon = FLT_MAX;
+ return (diff < epsilon);
+}
+
+public Text_t Num32$format(float f, Int_t precision) {
+ return Text$format("%.*f", (int)Int64$from_int(precision, false), (double)f);
+}
+
+public Text_t Num32$scientific(float f, Int_t precision) {
+ return Text$format("%.*e", (int)Int64$from_int(precision, false), (double)f);
+}
+
+public CONSTFUNC float Num32$mod(float num, float modulus) {
+ // Euclidean division, see: https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/divmodnote-letter.pdf
+ float r = remainderf(num, modulus);
+ r -= (r < 0) * (2*(modulus < 0) - 1) * modulus;
+ return r;
+}
+
+public CONSTFUNC float Num32$mod1(float num, float modulus) {
+ return 1.0f + Num32$mod(num-1, modulus);
+}
+
+public CONSTFUNC float Num32$mix(float amount, float x, float y) {
+ return (1.0f-amount)*x + amount*y;
+}
+
+public OptionalNum32_t Num32$parse(Text_t text) {
+ const char *str = Text$as_c_string(text);
+ char *end = NULL;
+ double d = strtod(str, &end);
+ if (end > str && end[0] == '\0')
+ return d;
+ else
+ return nan("null");
+}
+
+static bool Num32$is_none(const void *n, const TypeInfo_t*)
+{
+ return isnan(*(Num32_t*)n);
+}
+
+public CONSTFUNC bool Num32$isinf(float n) { return (fpclassify(n) == FP_INFINITE); }
+public CONSTFUNC bool Num32$finite(float n) { return (fpclassify(n) != FP_INFINITE); }
+public CONSTFUNC bool Num32$isnan(float n) { return (fpclassify(n) == FP_NAN); }
+
+public const TypeInfo_t Num32$info = {
+ .size=sizeof(float),
+ .align=__alignof__(float),
+ .metamethods={
+ .compare=Num32$compare,
+ .equal=Num32$equal,
+ .as_text=Num32$as_text,
+ .is_none=Num32$is_none,
+ },
+};
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/nums.h b/src/stdlib/nums.h
new file mode 100644
index 00000000..af0e895b
--- /dev/null
+++ b/src/stdlib/nums.h
@@ -0,0 +1,127 @@
+#pragma once
+
+// Type infos and methods for Nums (floating point)
+
+#include <math.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "datatypes.h"
+#include "integers.h"
+#include "stdlib.h"
+#include "types.h"
+#include "util.h"
+
+#define OptionalNum_t double
+#define OptionalNum32_t float
+#define N32(n) ((float)(n))
+#define N64(n) ((double)(n))
+
+Text_t Num$as_text(const void *f, bool colorize, const TypeInfo_t *type);
+PUREFUNC int32_t Num$compare(const void *x, const void *y, const TypeInfo_t *type);
+PUREFUNC bool Num$equal(const void *x, const void *y, const TypeInfo_t *type);
+CONSTFUNC bool Num$near(double a, double b, double ratio, double absolute);
+Text_t Num$format(double f, Int_t precision);
+Text_t Num$scientific(double f, Int_t precision);
+double Num$mod(double num, double modulus);
+double Num$mod1(double num, double modulus);
+CONSTFUNC bool Num$isinf(double n);
+CONSTFUNC bool Num$finite(double n);
+CONSTFUNC bool Num$isnan(double n);
+double Num$nan(Text_t tag);
+CONSTFUNC double Num$mix(double amount, double x, double y);
+OptionalNum_t Num$parse(Text_t text);
+MACROLIKE CONSTFUNC double Num$clamped(double x, double low, double high) {
+ return (x <= low) ? low : (x >= high ? high : x);
+}
+MACROLIKE CONSTFUNC double Num$from_num32(Num32_t n) { return (double)n; }
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+MACROLIKE CONSTFUNC double Num$from_int(Int_t i, bool truncate) {
+ if likely (i.small & 0x1) {
+ double ret = (double)(i.small >> 2);
+ if unlikely (!truncate && (int64_t)ret != (i.small >> 2))
+ fail("Could not convert integer to 64-bit floating point without losing precision: %ld", i.small >> 2);
+ return ret;
+ } else {
+ double ret = mpz_get_d(*i.big);
+ if (!truncate) {
+ mpz_t roundtrip;
+ mpz_init_set_d(roundtrip, ret);
+ if unlikely (mpz_cmp(*i.big, roundtrip) != 0)
+ fail("Could not convert integer to 64-bit floating point without losing precision: %k", (Text_t[1]){Int$value_as_text(i)});
+ }
+ return ret;
+ }
+}
+#pragma GCC diagnostic pop
+MACROLIKE CONSTFUNC double Num$from_int64(Int64_t i, bool truncate) {
+ double n = (double)i;
+ if unlikely (!truncate && (Int64_t)n != i)
+ fail("Could not convert integer to 64-bit floating point without losing precision: %ld", i);
+ return n;
+}
+MACROLIKE CONSTFUNC double Num$from_int32(Int32_t i) { return (double)i; }
+MACROLIKE CONSTFUNC double Num$from_int16(Int16_t i) { return (double)i; }
+MACROLIKE CONSTFUNC double Num$from_int8(Int8_t i) { return (double)i; }
+MACROLIKE CONSTFUNC double Num$from_byte(Byte_t i) { return (double)i; }
+
+extern const TypeInfo_t Num$info;
+
+Text_t Num32$as_text(const void *f, bool colorize, const TypeInfo_t *type);
+PUREFUNC int32_t Num32$compare(const void *x, const void *y, const TypeInfo_t *type);
+PUREFUNC bool Num32$equal(const void *x, const void *y, const TypeInfo_t *type);
+CONSTFUNC bool Num32$near(float a, float b, float ratio, float absolute);
+Text_t Num32$format(float f, Int_t precision);
+Text_t Num32$scientific(float f, Int_t precision);
+float Num32$mod(float num, float modulus);
+float Num32$mod1(float num, float modulus);
+CONSTFUNC bool Num32$isinf(float n);
+CONSTFUNC bool Num32$finite(float n);
+CONSTFUNC bool Num32$isnan(float n);
+CONSTFUNC float Num32$mix(float amount, float x, float y);
+OptionalNum32_t Num32$parse(Text_t text);
+float Num32$nan(Text_t tag);
+MACROLIKE CONSTFUNC float Num32$clamped(float x, float low, float high) {
+ return (x <= low) ? low : (x >= high ? high : x);
+}
+MACROLIKE CONSTFUNC float Num32$from_num(Num_t n) { return (float)n; }
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+MACROLIKE CONSTFUNC float Num32$from_int(Int_t i, bool truncate) {
+ if likely (i.small & 0x1) {
+ float ret = (float)(i.small >> 2);
+ if unlikely (!truncate && (int64_t)ret != (i.small >> 2))
+ fail("Could not convert integer to 32-bit floating point without losing precision: %ld", i.small >> 2);
+ return ret;
+ } else {
+ float ret = (float)mpz_get_d(*i.big);
+ if (!truncate) {
+ mpz_t roundtrip;
+ mpz_init_set_d(roundtrip, ret);
+ if unlikely (mpz_cmp(*i.big, roundtrip) != 0)
+ fail("Could not convert integer to 32-bit floating point without losing precision: %k", (Text_t[1]){Int$value_as_text(i)});
+ }
+ return ret;
+ }
+}
+#pragma GCC diagnostic pop
+MACROLIKE CONSTFUNC float Num32$from_int64(Int64_t i, bool truncate) {
+ float n = (float)i;
+ if unlikely (!truncate && (Int64_t)n != i)
+ fail("Could not convert integer to 32-bit floating point without losing precision: %ld", i);
+ return n;
+}
+MACROLIKE CONSTFUNC float Num32$from_int32(Int32_t i, bool truncate) {
+ float n = (float)i;
+ if unlikely (!truncate && (Int32_t)n != i)
+ fail("Could not convert integer to 32-bit floating point without losing precision: %d", i);
+ return n;
+}
+MACROLIKE CONSTFUNC float Num32$from_int16(Int16_t i) { return (float)i; }
+MACROLIKE CONSTFUNC float Num32$from_int8(Int8_t i) { return (float)i; }
+MACROLIKE CONSTFUNC float Num32$from_byte(Byte_t i) { return (float)i; }
+
+extern const TypeInfo_t Num32$info;
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/optionals.c b/src/stdlib/optionals.c
new file mode 100644
index 00000000..d91ebffc
--- /dev/null
+++ b/src/stdlib/optionals.c
@@ -0,0 +1,94 @@
+// Optional types
+
+#include <pthread.h>
+
+#include "bools.h"
+#include "bytes.h"
+#include "datatypes.h"
+#include "integers.h"
+#include "metamethods.h"
+#include "moments.h"
+#include "nums.h"
+#include "patterns.h"
+#include "text.h"
+#include "threads.h"
+#include "util.h"
+
+public PUREFUNC bool is_none(const void *obj, const TypeInfo_t *non_optional_type)
+{
+ if (non_optional_type->metamethods.is_none)
+ return non_optional_type->metamethods.is_none(obj, non_optional_type);
+
+ return *(bool*)(obj + non_optional_type->size);
+}
+
+PUREFUNC public uint64_t Optional$hash(const void *obj, const TypeInfo_t *type)
+{
+ return is_none(obj, type->OptionalInfo.type) ? 0 : generic_hash(obj, type->OptionalInfo.type);
+}
+
+PUREFUNC public int32_t Optional$compare(const void *x, const void *y, const TypeInfo_t *type)
+{
+ if (x == y) return 0;
+ bool x_is_null = is_none(x, type->OptionalInfo.type);
+ bool y_is_null = is_none(y, type->OptionalInfo.type);
+ if (x_is_null && y_is_null) return 0;
+ else if (x_is_null != y_is_null) return (int32_t)y_is_null - (int32_t)x_is_null;
+ else return generic_compare(x, y, type->OptionalInfo.type);
+}
+
+PUREFUNC public bool Optional$equal(const void *x, const void *y, const TypeInfo_t *type)
+{
+ if (x == y) return true;
+
+ bool x_is_null = is_none(x, type->OptionalInfo.type);
+ bool y_is_null = is_none(y, type->OptionalInfo.type);
+ if (x_is_null && y_is_null) return true;
+ else if (x_is_null != y_is_null) return false;
+ else return generic_equal(x, y, type->OptionalInfo.type);
+}
+
+public Text_t Optional$as_text(const void *obj, bool colorize, const TypeInfo_t *type)
+{
+ if (!obj)
+ return Text$concat(generic_as_text(obj, colorize, type->OptionalInfo.type), Text("?"));
+
+ if (is_none(obj, type->OptionalInfo.type))
+ return colorize ? Text("\x1b[31mnone\x1b[m") : Text("none");
+ return generic_as_text(obj, colorize, type->OptionalInfo.type);
+}
+
+public void Optional$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *type)
+{
+ bool has_value = !is_none(obj, type->OptionalInfo.type);
+ fputc((int)has_value, out);
+ if (has_value)
+ _serialize(obj, out, pointers, type->OptionalInfo.type);
+}
+
+public void Optional$deserialize(FILE *in, void *outval, Array_t *pointers, const TypeInfo_t *type)
+{
+ bool has_value = (bool)fgetc(in);
+ const TypeInfo_t *nonnull = type->OptionalInfo.type;
+ if (has_value) {
+ memset(outval, 0, (size_t)type->size);
+ _deserialize(in, outval, pointers, nonnull);
+ } else {
+ if (nonnull->tag == TextInfo)
+ *(Text_t*)outval = NONE_TEXT;
+ else if (nonnull->tag == ArrayInfo)
+ *(Array_t*)outval = (Array_t){.length=-1};
+ else if (nonnull->tag == TableInfo)
+ *(Table_t*)outval = (Table_t){.entries={.length=-1}};
+ else if (nonnull == &Num$info)
+ *(double*)outval = NAN;
+ else if (nonnull == &Num32$info)
+ *(float*)outval = NAN;
+ else if (nonnull->tag == StructInfo || (nonnull->tag == OpaqueInfo && type->size > nonnull->size))
+ memset(outval + type->size, -1, (size_t)(type->size - nonnull->size));
+ else
+ memset(outval, 0, (size_t)type->size);
+ }
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/src/stdlib/optionals.h b/src/stdlib/optionals.h
new file mode 100644
index 00000000..ccf1b963
--- /dev/null
+++ b/src/stdlib/optionals.h
@@ -0,0 +1,49 @@
+#pragma once
+
+// Optional types
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "datatypes.h"
+#include "integers.h"
+#include "types.h"
+#include "util.h"
+
+#define OptionalBool_t uint8_t
+#define OptionalArray_t Array_t
+#define OptionalTable_t Table_t
+#define OptionalText_t Text_t
+#define OptionalClosure_t Closure_t
+
+#define NONE_ARRAY ((Array_t){.length=-1})
+#define NONE_BOOL ((OptionalBool_t)2)
+#define NONE_INT ((OptionalInt_t){.small=0})
+#define NONE_TABLE ((OptionalTable_t){.entries.length=-1})
+#define NONE_CLOSURE ((OptionalClosure_t){.fn=NULL})
+#define NONE_TEXT ((OptionalText_t){.length=-1})
+#define NONE_MOMENT ((OptionalMoment_t){.tv_usec=-1})
+#define NONE_PATH ((Path_t){.type=PATH_NONE})
+
+PUREFUNC bool is_null(const void *obj, const TypeInfo_t *non_optional_type);
+PUREFUNC uint64_t Optional$hash(const void *obj, const TypeInfo_t *type);
+PUREFUNC int32_t Optional$compare(const void *x, const void *y, const TypeInfo_t *type);
+PUREFUNC bool Optional$equal(const void *x, const void *y, const TypeInfo_t *type);
+Text_t Optional$as_text(const void *obj, bool colorize, const TypeInfo_t *type);
+void Optional$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *type);
+void Optional$deserialize(FILE *in, void *outval, Array_t *pointers, const TypeInfo_t *type);
+
+#define Optional$metamethods { \
+ .hash=Optional$hash, \
+ .compare=Optional$compare, \
+ .equal=Optional$equal, \
+ .as_text=Optional$as_text, \
+ .serialize=Optional$serialize, \
+ .deserialize=Optional$deserialize, \
+}
+
+#define Optional$info(_size, _align, t) &((TypeInfo_t){.size=_size, .align=_align, \
+ .tag=OptionalInfo, .OptionalInfo.type=t, \
+ .metamethods=Optional$metamethods})
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/paths.c b/src/stdlib/paths.c
new file mode 100644
index 00000000..c7743759
--- /dev/null
+++ b/src/stdlib/paths.c
@@ -0,0 +1,822 @@
+// A lang for filesystem paths
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <gc.h>
+#include <glob.h>
+#include <grp.h>
+#include <pwd.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <unistr.h>
+
+#include "arrays.h"
+#include "enums.h"
+#include "files.h"
+#include "integers.h"
+#include "optionals.h"
+#include "paths.h"
+#include "patterns.h"
+#include "structs.h"
+#include "text.h"
+#include "types.h"
+#include "util.h"
+
+// Use inline version of the siphash code for performance:
+#include "siphash.h"
+#include "siphash-internals.h"
+
+static const Path_t HOME_PATH = {.type.$tag=PATH_HOME},
+ ROOT_PATH = {.type.$tag=PATH_ABSOLUTE},
+ CURDIR_PATH = {.type.$tag=PATH_RELATIVE};
+
+static void clean_components(Array_t *components)
+{
+ for (int64_t i = 0; i < components->length; ) {
+ Text_t *component = (Text_t*)(components->data + i*components->stride);
+ if (component->length == 0 || Text$equal_values(*component, Text("."))) {
+ Array$remove_at(components, I(i+1), I(1), sizeof(Text_t));
+ } else if (i > 0 && Text$equal_values(*component, Text(".."))) {
+ Text_t *prev = (Text_t*)(components->data + (i-1)*components->stride);
+ if (!Text$equal_values(*prev, Text(".."))) {
+ Array$remove_at(components, I(i), I(2), sizeof(Text_t));
+ i -= 1;
+ } else {
+ i += 1;
+ }
+ } else {
+ i += 1;
+ }
+ }
+}
+
+public Path_t Path$from_str(const char *str)
+{
+ if (!str || str[0] == '\0' || streq(str, "/")) return ROOT_PATH;
+ else if (streq(str, "~")) return HOME_PATH;
+ else if (streq(str, ".")) return CURDIR_PATH;
+
+ Path_t result = {.components={}};
+ if (str[0] == '/') {
+ result.type.$tag = PATH_ABSOLUTE;
+ str += 1;
+ } else if (str[0] == '~' && str[1] == '/') {
+ result.type.$tag = PATH_HOME;
+ str += 2;
+ } else if (str[0] == '.' && str[1] == '/') {
+ result.type.$tag = PATH_RELATIVE;
+ str += 2;
+ } else {
+ result.type.$tag = PATH_RELATIVE;
+ }
+
+ while (str && *str) {
+ size_t component_len = strcspn(str, "/");
+ if (component_len > 0) {
+ if (component_len == 1 && str[0] == '.') {
+ // ignore /./
+ } else if (component_len == 2 && strncmp(str, "..", 2) == 0
+ && result.components.length > 1
+ && !Text$equal_values(Text(".."), *(Text_t*)(result.components.data + result.components.stride*(result.components.length-1)))) {
+ // Pop off /foo/baz/.. -> /foo
+ Array$remove_at(&result.components, I(result.components.length), I(1), sizeof(Text_t));
+ } else {
+ Text_t component = Text$from_strn(str, component_len);
+ Array$insert_value(&result.components, component, I(0), sizeof(Text_t));
+ }
+ str += component_len;
+ }
+ str += strspn(str, "/");
+ }
+ return result;
+}
+
+public Path_t Path$from_text(Text_t text)
+{
+ return Path$from_str(Text$as_c_string(text));
+}
+
+public Path_t Path$expand_home(Path_t path)
+{
+ if (path.type.$tag == PATH_HOME) {
+ Path_t pwd = Path$from_str(getenv("HOME"));
+ Array_t components = Array$concat(pwd.components, path.components, sizeof(Text_t));
+ assert(components.length == path.components.length + pwd.components.length);
+ clean_components(&components);
+ path = (Path_t){.type.$tag=PATH_ABSOLUTE, .components=components};
+ }
+ return path;
+}
+
+public Path_t Path$_concat(int n, Path_t items[n])
+{
+ assert(n > 0);
+ Path_t result = items[0];
+ ARRAY_INCREF(result.components);
+ for (int i = 1; i < n; i++) {
+ if (items[i].type.$tag != PATH_RELATIVE)
+ fail("Cannot concatenate an absolute or home-based path onto another path: (%s)\n",
+ Path$as_c_string(items[i]));
+ Array$insert_all(&result.components, items[i].components, I(0), sizeof(Text_t));
+ }
+ clean_components(&result.components);
+ return result;
+}
+
+public Path_t Path$resolved(Path_t path, Path_t relative_to)
+{
+ if (path.type.$tag == PATH_RELATIVE && !(relative_to.type.$tag == PATH_RELATIVE && relative_to.components.length == 0)) {
+ Path_t result = {.type.$tag=relative_to.type.$tag};
+ result.components = relative_to.components;
+ ARRAY_INCREF(result.components);
+ Array$insert_all(&result.components, path.components, I(0), sizeof(Text_t));
+ clean_components(&result.components);
+ return result;
+ }
+ return path;
+}
+
+public Path_t Path$relative_to(Path_t path, Path_t relative_to)
+{
+ if (path.type.$tag != relative_to.type.$tag)
+ fail("Cannot create a path relative to a different path with a mismatching type: (%k) relative to (%k)",
+ (Text_t[1]){Path$as_text(&path, false, &Path$info)}, (Text_t[1]){Path$as_text(&relative_to, false, &Path$info)});
+
+ Path_t result = {.type.$tag=PATH_RELATIVE};
+ int64_t shared = 0;
+ for (; shared < path.components.length && shared < relative_to.components.length; shared++) {
+ Text_t *p = (Text_t*)(path.components.data + shared*path.components.stride);
+ Text_t *r = (Text_t*)(relative_to.components.data + shared*relative_to.components.stride);
+ if (!Text$equal_values(*p, *r))
+ break;
+ }
+
+ for (int64_t i = shared; i < relative_to.components.length; i++)
+ Array$insert_value(&result.components, Text(".."), I(1), sizeof(Text_t));
+
+ for (int64_t i = shared; i < path.components.length; i++) {
+ Text_t *p = (Text_t*)(path.components.data + i*path.components.stride);
+ Array$insert(&result.components, p, I(0), sizeof(Text_t));
+ }
+ //clean_components(&result.components);
+ return result;
+}
+
+public bool Path$exists(Path_t path)
+{
+ path = Path$expand_home(path);
+ struct stat sb;
+ return (stat(Path$as_c_string(path), &sb) == 0);
+}
+
+static INLINE int path_stat(Path_t path, bool follow_symlinks, struct stat *sb)
+{
+ path = Path$expand_home(path);
+ const char *path_str = Path$as_c_string(path);
+ return follow_symlinks ? stat(path_str, sb) : lstat(path_str, sb);
+}
+
+public bool Path$is_file(Path_t path, bool follow_symlinks)
+{
+ struct stat sb;
+ int status = path_stat(path, follow_symlinks, &sb);
+ if (status != 0) return false;
+ return (sb.st_mode & S_IFMT) == S_IFREG;
+}
+
+public bool Path$is_directory(Path_t path, bool follow_symlinks)
+{
+ struct stat sb;
+ int status = path_stat(path, follow_symlinks, &sb);
+ if (status != 0) return false;
+ return (sb.st_mode & S_IFMT) == S_IFDIR;
+}
+
+public bool Path$is_pipe(Path_t path, bool follow_symlinks)
+{
+ struct stat sb;
+ int status = path_stat(path, follow_symlinks, &sb);
+ if (status != 0) return false;
+ return (sb.st_mode & S_IFMT) == S_IFIFO;
+}
+
+public bool Path$is_socket(Path_t path, bool follow_symlinks)
+{
+ struct stat sb;
+ int status = path_stat(path, follow_symlinks, &sb);
+ if (status != 0) return false;
+ return (sb.st_mode & S_IFMT) == S_IFSOCK;
+}
+
+public bool Path$is_symlink(Path_t path)
+{
+ struct stat sb;
+ int status = path_stat(path, false, &sb);
+ if (status != 0) return false;
+ return (sb.st_mode & S_IFMT) == S_IFLNK;
+}
+
+public bool Path$can_read(Path_t path)
+{
+ path = Path$expand_home(path);
+ const char *path_str = Path$as_c_string(path);
+ return (euidaccess(path_str, R_OK) == 0);
+}
+
+public bool Path$can_write(Path_t path)
+{
+ path = Path$expand_home(path);
+ const char *path_str = Path$as_c_string(path);
+ return (euidaccess(path_str, W_OK) == 0);
+}
+
+public bool Path$can_execute(Path_t path)
+{
+ path = Path$expand_home(path);
+ const char *path_str = Path$as_c_string(path);
+ return (euidaccess(path_str, X_OK) == 0);
+}
+
+public OptionalMoment_t Path$modified(Path_t path, bool follow_symlinks)
+{
+ struct stat sb;
+ int status = path_stat(path, follow_symlinks, &sb);
+ if (status != 0) return NONE_MOMENT;
+ return (Moment_t){.tv_sec=sb.st_mtime};
+}
+
+public OptionalMoment_t Path$accessed(Path_t path, bool follow_symlinks)
+{
+ struct stat sb;
+ int status = path_stat(path, follow_symlinks, &sb);
+ if (status != 0) return NONE_MOMENT;
+ return (Moment_t){.tv_sec=sb.st_atime};
+}
+
+public OptionalMoment_t Path$changed(Path_t path, bool follow_symlinks)
+{
+ struct stat sb;
+ int status = path_stat(path, follow_symlinks, &sb);
+ if (status != 0) return NONE_MOMENT;
+ return (Moment_t){.tv_sec=sb.st_ctime};
+}
+
+static void _write(Path_t path, Array_t bytes, int mode, int permissions)
+{
+ path = Path$expand_home(path);
+ const char *path_str = Path$as_c_string(path);
+ int fd = open(path_str, mode, permissions);
+ if (fd == -1)
+ fail("Could not write to file: %s\n%s", path_str, strerror(errno));
+
+ if (bytes.stride != 1)
+ Array$compact(&bytes, 1);
+ ssize_t written = write(fd, bytes.data, (size_t)bytes.length);
+ if (written != (ssize_t)bytes.length)
+ fail("Could not write to file: %s\n%s", path_str, strerror(errno));
+ close(fd);
+}
+
+public void Path$write(Path_t path, Text_t text, int permissions)
+{
+ Array_t bytes = Text$utf8_bytes(text);
+ _write(path, bytes, O_WRONLY | O_CREAT, permissions);
+}
+
+public void Path$write_bytes(Path_t path, Array_t bytes, int permissions)
+{
+ _write(path, bytes, O_WRONLY | O_CREAT, permissions);
+}
+
+public void Path$append(Path_t path, Text_t text, int permissions)
+{
+ Array_t bytes = Text$utf8_bytes(text);
+ _write(path, bytes, O_WRONLY | O_APPEND | O_CREAT, permissions);
+}
+
+public void Path$append_bytes(Path_t path, Array_t bytes, int permissions)
+{
+ _write(path, bytes, O_WRONLY | O_APPEND | O_CREAT, permissions);
+}
+
+public OptionalArray_t Path$read_bytes(Path_t path, OptionalInt_t count)
+{
+ path = Path$expand_home(path);
+ int fd = open(Path$as_c_string(path), O_RDONLY);
+ if (fd == -1)
+ return NONE_ARRAY;
+
+ struct stat sb;
+ if (fstat(fd, &sb) != 0)
+ return NONE_ARRAY;
+
+ int64_t const target_count = count.small ? Int64$from_int(count, false) : INT64_MAX;
+ if (target_count < 0)
+ fail("Cannot read a negative number of bytes!");
+
+ if ((sb.st_mode & S_IFMT) == S_IFREG) { // Use memory mapping if it's a real file:
+ const char *mem = mmap(NULL, (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ char *content = GC_MALLOC_ATOMIC((size_t)sb.st_size+1);
+ memcpy(content, mem, (size_t)sb.st_size);
+ content[sb.st_size] = '\0';
+ close(fd);
+ if (count.small && (int64_t)sb.st_size < target_count)
+ fail("Could not read %ld bytes from %k (only got %zu)", target_count, &path, sb.st_size);
+ int64_t len = count.small ? target_count : (int64_t)sb.st_size;
+ return (Array_t){.data=content, .atomic=1, .stride=1, .length=len};
+ } else {
+ size_t capacity = 256, len = 0;
+ char *content = GC_MALLOC_ATOMIC(capacity);
+ int64_t count_remaining = target_count;
+ for (;;) {
+ char chunk[256];
+ size_t to_read = count_remaining < (int64_t)sizeof(chunk) ? (size_t)count_remaining : sizeof(chunk);
+ ssize_t just_read = read(fd, chunk, to_read);
+ if (just_read < 0) {
+ close(fd);
+ return NONE_ARRAY;
+ } else if (just_read == 0) {
+ if (errno == EAGAIN || errno == EINTR)
+ continue;
+ break;
+ }
+ count_remaining -= (int64_t)just_read;
+
+ if (len + (size_t)just_read >= capacity) {
+ content = GC_REALLOC(content, (capacity *= 2));
+ }
+
+ memcpy(&content[len], chunk, (size_t)just_read);
+ len += (size_t)just_read;
+ }
+ close(fd);
+ if (count.small != 0 && (int64_t)len < target_count)
+ fail("Could not read %ld bytes from %k (only got %zu)", target_count, &path, len);
+ return (Array_t){.data=content, .atomic=1, .stride=1, .length=len};
+ }
+}
+
+public OptionalText_t Path$read(Path_t path)
+{
+ Array_t bytes = Path$read_bytes(path, NONE_INT);
+ if (bytes.length < 0) return NONE_TEXT;
+ return Text$from_bytes(bytes);
+}
+
+public OptionalText_t Path$owner(Path_t path, bool follow_symlinks)
+{
+ struct stat sb;
+ int status = path_stat(path, follow_symlinks, &sb);
+ if (status != 0) return NONE_TEXT;
+ struct passwd *pw = getpwuid(sb.st_uid);
+ return pw ? Text$from_str(pw->pw_name) : NONE_TEXT;
+}
+
+public OptionalText_t Path$group(Path_t path, bool follow_symlinks)
+{
+ struct stat sb;
+ int status = path_stat(path, follow_symlinks, &sb);
+ if (status != 0) return NONE_TEXT;
+ struct group *gr = getgrgid(sb.st_uid);
+ return gr ? Text$from_str(gr->gr_name) : NONE_TEXT;
+}
+
+public void Path$set_owner(Path_t path, OptionalText_t owner, OptionalText_t group, bool follow_symlinks)
+{
+ uid_t owner_id = (uid_t)-1;
+ if (owner.length >= 0) {
+ struct passwd *pwd = getpwnam(Text$as_c_string(owner));
+ if (pwd == NULL) fail("Not a valid user: %k", &owner);
+ owner_id = pwd->pw_uid;
+ }
+
+ gid_t group_id = (gid_t)-1;
+ if (group.length >= 0) {
+ struct group *grp = getgrnam(Text$as_c_string(group));
+ if (grp == NULL) fail("Not a valid group: %k", &group);
+ group_id = grp->gr_gid;
+ }
+ const char *path_str = Path$as_c_string(path);
+ int result = follow_symlinks ? chown(path_str, owner_id, group_id) : lchown(path_str, owner_id, group_id);
+ if (result < 0)
+ fail("Could not set owner!");
+}
+
+public void Path$remove(Path_t path, bool ignore_missing)
+{
+ path = Path$expand_home(path);
+ const char *path_str = Path$as_c_string(path);
+ struct stat sb;
+ if (lstat(path_str, &sb) != 0) {
+ if (!ignore_missing)
+ fail("Could not remove file: %s (%s)", path_str, strerror(errno));
+ }
+
+ if ((sb.st_mode & S_IFMT) == S_IFREG || (sb.st_mode & S_IFMT) == S_IFLNK) {
+ if (unlink(path_str) != 0 && !ignore_missing)
+ fail("Could not remove file: %s (%s)", path_str, strerror(errno));
+ } else if ((sb.st_mode & S_IFMT) == S_IFDIR) {
+ if (rmdir(path_str) != 0 && !ignore_missing)
+ fail("Could not remove directory: %s (%s)", path_str, strerror(errno));
+ } else {
+ fail("Could not remove path: %s (not a file or directory)", path_str, strerror(errno));
+ }
+}
+
+public void Path$create_directory(Path_t path, int permissions)
+{
+ path = Path$expand_home(path);
+ const char *c_path = Path$as_c_string(path);
+ int status = mkdir(c_path, (mode_t)permissions);
+ if (status != 0 && errno != EEXIST)
+ fail("Could not create directory: %s (%s)", c_path, strerror(errno));
+}
+
+static Array_t _filtered_children(Path_t path, bool include_hidden, mode_t filter)
+{
+ path = Path$expand_home(path);
+ struct dirent *dir;
+ Array_t children = {};
+ const char *path_str = Path$as_c_string(path);
+ size_t path_len = strlen(path_str);
+ DIR *d = opendir(path_str);
+ if (!d)
+ fail("Could not open directory: %k (%s)", &path, strerror(errno));
+
+ if (path_str[path_len-1] == '/')
+ --path_len;
+
+ while ((dir = readdir(d)) != NULL) {
+ if (!include_hidden && dir->d_name[0] == '.')
+ continue;
+ if (streq(dir->d_name, ".") || streq(dir->d_name, ".."))
+ continue;
+
+ const char *child_str = heap_strf("%.*s/%s", path_len, path_str, dir->d_name);
+ struct stat sb;
+ if (stat(child_str, &sb) != 0)
+ continue;
+ if (!((sb.st_mode & S_IFMT) & filter))
+ continue;
+
+ Path_t child = Path$from_str(child_str);
+ Array$insert(&children, &child, I(0), sizeof(Path_t));
+ }
+ closedir(d);
+ return children;
+}
+
+public Array_t Path$children(Path_t path, bool include_hidden)
+{
+ return _filtered_children(path, include_hidden, (mode_t)-1);
+}
+
+public Array_t Path$files(Path_t path, bool include_hidden)
+{
+ return _filtered_children(path, include_hidden, S_IFREG);
+}
+
+public Array_t Path$subdirectories(Path_t path, bool include_hidden)
+{
+ return _filtered_children(path, include_hidden, S_IFDIR);
+}
+
+public Path_t Path$unique_directory(Path_t path)
+{
+ path = Path$expand_home(path);
+ const char *path_str = Path$as_c_string(path);
+ size_t len = strlen(path_str);
+ if (len >= PATH_MAX) fail("Path is too long: %s", path_str);
+ char buf[PATH_MAX] = {};
+ strcpy(buf, path_str);
+ if (buf[len-1] == '/')
+ buf[--len] = '\0';
+ char *created = mkdtemp(buf);
+ if (!created) fail("Failed to create temporary directory: %s (%s)", path_str, strerror(errno));
+ return Path$from_str(created);
+}
+
+public Path_t Path$write_unique_bytes(Path_t path, Array_t bytes)
+{
+ path = Path$expand_home(path);
+ const char *path_str = Path$as_c_string(path);
+ size_t len = strlen(path_str);
+ if (len >= PATH_MAX) fail("Path is too long: %s", path_str);
+ char buf[PATH_MAX] = {};
+ strcpy(buf, path_str);
+
+ // Count the number of trailing characters leading up to the last "X"
+ // (e.g. "foo_XXXXXX.tmp" would yield suffixlen = 4)
+ size_t suffixlen = 0;
+ while (suffixlen < len && buf[len - 1 - suffixlen] != 'X')
+ ++suffixlen;
+
+ int fd = mkstemps(buf, suffixlen);
+ if (fd == -1)
+ fail("Could not write to unique file: %s\n%s", buf, strerror(errno));
+
+ if (bytes.stride != 1)
+ Array$compact(&bytes, 1);
+
+ ssize_t written = write(fd, bytes.data, (size_t)bytes.length);
+ if (written != (ssize_t)bytes.length)
+ fail("Could not write to file: %s\n%s", buf, strerror(errno));
+ close(fd);
+ return Path$from_str(buf);
+}
+
+public Path_t Path$write_unique(Path_t path, Text_t text)
+{
+ return Path$write_unique_bytes(path, Text$utf8_bytes(text));
+}
+
+public Path_t Path$parent(Path_t path)
+{
+ if (path.type.$tag == PATH_ABSOLUTE && path.components.length == 0) {
+ return path;
+ } else if (path.components.length > 0 && !Text$equal_values(*(Text_t*)(path.components.data + path.components.stride*(path.components.length-1)),
+ Text(".."))) {
+ return (Path_t){.type.$tag=path.type.$tag, .components=Array$slice(path.components, I(1), I(-2))};
+ } else {
+ Path_t result = {.type.$tag=path.type.$tag, .components=path.components};
+ ARRAY_INCREF(result.components);
+ Array$insert_value(&result.components, Text(".."), I(0), sizeof(Text_t));
+ return result;
+ }
+}
+
+public PUREFUNC Text_t Path$base_name(Path_t path)
+{
+ if (path.components.length >= 1)
+ return *(Text_t*)(path.components.data + path.components.stride*(path.components.length-1));
+ else if (path.type.$tag == PATH_HOME)
+ return Text("~");
+ else if (path.type.$tag == PATH_RELATIVE)
+ return Text(".");
+ else
+ return EMPTY_TEXT;
+}
+
+public Text_t Path$extension(Path_t path, bool full)
+{
+ Text_t base = Path$base_name(path);
+ Array_t results = Text$matches(base, full ? Pattern(".{!.}.{..}") : Pattern(".{..}.{!.}{end}"));
+ if (results.length > 0)
+ return *((Text_t*)(results.data + results.stride*1));
+ results = Text$matches(base, full ? Pattern("{!.}.{..}") : Pattern("{..}.{!.}{end}"));
+ if (results.length > 0)
+ return *((Text_t*)(results.data + results.stride*1));
+ else
+ return Text("");
+}
+
+public Path_t Path$with_component(Path_t path, Text_t component)
+{
+ Path_t result = {
+ .type.$tag=path.type.$tag,
+ .components=path.components,
+ };
+ ARRAY_INCREF(result.components);
+ Array$insert(&result.components, &component, I(0), sizeof(Text_t));
+ clean_components(&result.components);
+ return result;
+}
+
+public Path_t Path$with_extension(Path_t path, Text_t extension, bool replace)
+{
+ if (path.components.length == 0)
+ fail("A path with no components can't have an extension!");
+
+ Path_t result = {
+ .type.$tag=path.type.$tag,
+ .components=path.components,
+ };
+ ARRAY_INCREF(result.components);
+ Text_t last = *(Text_t*)(path.components.data + path.components.stride*(path.components.length-1));
+ Array$remove_at(&result.components, I(-1), I(1), sizeof(Text_t));
+ if (replace) {
+ if (Text$starts_with(last, Text(".")))
+ last = Text$replace(last, Pattern(".{!.}.{..}"), Text(".@1"), Pattern("@"), false);
+ else
+ last = Text$replace(last, Pattern("{!.}.{..}"), Text("@1"), Pattern("@"), false);
+ }
+
+ last = Text$concat(last, extension);
+ Array$insert(&result.components, &last, I(0), sizeof(Text_t));
+ return result;
+}
+
+static void _line_reader_cleanup(FILE **f)
+{
+ if (f && *f) {
+ fclose(*f);
+ *f = NULL;
+ }
+}
+
+static Text_t _next_line(FILE **f)
+{
+ if (!f || !*f) return NONE_TEXT;
+
+ char *line = NULL;
+ size_t size = 0;
+ ssize_t len = getline(&line, &size, *f);
+ if (len <= 0) {
+ _line_reader_cleanup(f);
+ return NONE_TEXT;
+ }
+
+ while (len > 0 && (line[len-1] == '\r' || line[len-1] == '\n'))
+ --len;
+
+ if (u8_check((uint8_t*)line, (size_t)len) != NULL)
+ fail("Invalid UTF8!");
+
+ Text_t line_text = Text$from_strn(line, (size_t)len);
+ free(line);
+ return line_text;
+}
+
+public OptionalClosure_t Path$by_line(Path_t path)
+{
+ path = Path$expand_home(path);
+
+ FILE *f = fopen(Path$as_c_string(path), "r");
+ if (f == NULL)
+ return NONE_CLOSURE;
+
+ FILE **wrapper = GC_MALLOC(sizeof(FILE*));
+ *wrapper = f;
+ GC_register_finalizer(wrapper, (void*)_line_reader_cleanup, NULL, NULL, NULL);
+ return (Closure_t){.fn=(void*)_next_line, .userdata=wrapper};
+}
+
+public Array_t Path$glob(Path_t path)
+{
+ glob_t glob_result;
+ int status = glob(Path$as_c_string(path), GLOB_BRACE | GLOB_TILDE | GLOB_TILDE_CHECK, NULL, &glob_result);
+ if (status != 0 && status != GLOB_NOMATCH)
+ fail("Failed to perform globbing");
+
+ Array_t glob_files = {};
+ for (size_t i = 0; i < glob_result.gl_pathc; i++) {
+ size_t len = strlen(glob_result.gl_pathv[i]);
+ if ((len >= 2 && glob_result.gl_pathv[i][len-1] == '.' && glob_result.gl_pathv[i][len-2] == '/')
+ || (len >= 2 && glob_result.gl_pathv[i][len-1] == '.' && glob_result.gl_pathv[i][len-2] == '.' && glob_result.gl_pathv[i][len-3] == '/'))
+ continue;
+ Path_t p = Path$from_str(glob_result.gl_pathv[i]);
+ Array$insert(&glob_files, &p, I(0), sizeof(Path_t));
+ }
+ return glob_files;
+}
+
+public PUREFUNC uint64_t Path$hash(const void *obj, const TypeInfo_t *type)
+{
+ (void)type;
+ Path_t *path = (Path_t*)obj;
+ siphash sh;
+ siphashinit(&sh, (uint64_t)path->type.$tag);
+ for (int64_t i = 0; i < path->components.length; i++) {
+ uint64_t item_hash = Text$hash(path->components.data + i*path->components.stride, &Text$info);
+ siphashadd64bits(&sh, item_hash);
+ }
+ return siphashfinish_last_part(&sh, (uint64_t)path->components.length);
+}
+
+public PUREFUNC int32_t Path$compare(const void *va, const void *vb, const TypeInfo_t *type)
+{
+ (void)type;
+ Path_t *a = (Path_t*)va, *b = (Path_t*)vb;
+ int diff = ((int)a->type.$tag - (int)b->type.$tag);
+ if (diff != 0) return diff;
+ return Array$compare(&a->components, &b->components, Array$info(&Text$info));
+}
+
+public PUREFUNC bool Path$equal(const void *va, const void *vb, const TypeInfo_t *type)
+{
+ (void)type;
+ Path_t *a = (Path_t*)va, *b = (Path_t*)vb;
+ if (a->type.$tag != b->type.$tag) return false;
+ return Array$equal(&a->components, &b->components, Array$info(&Text$info));
+}
+
+public PUREFUNC bool Path$equal_values(Path_t a, Path_t b)
+{
+ if (a.type.$tag != b.type.$tag) return false;
+ return Array$equal(&a.components, &b.components, Array$info(&Text$info));
+}
+
+public const char *Path$as_c_string(Path_t path)
+{
+ if (path.components.length == 0) {
+ if (path.type.$tag == PATH_ABSOLUTE) return "/";
+ else if (path.type.$tag == PATH_RELATIVE) return ".";
+ else if (path.type.$tag == PATH_HOME) return "~";
+ }
+
+ size_t len = 0, capacity = 16;
+ char *buf = GC_MALLOC_ATOMIC(capacity);
+ if (path.type.$tag == PATH_ABSOLUTE) {
+ buf[len++] = '/';
+ } else if (path.type.$tag == PATH_HOME) {
+ buf[len++] = '~';
+ buf[len++] = '/';
+ } else if (path.type.$tag == PATH_RELATIVE) {
+ if (!Text$equal_values(*(Text_t*)path.components.data, Text(".."))) {
+ buf[len++] = '.';
+ buf[len++] = '/';
+ }
+ }
+
+ for (int64_t i = 0; i < path.components.length; i++) {
+ Text_t *comp = (Text_t*)(path.components.data + i*path.components.stride);
+ const char *comp_str = Text$as_c_string(*comp);
+ size_t comp_len = strlen(comp_str);
+ if (len + comp_len + 1 > capacity) {
+ buf = GC_REALLOC(buf, (capacity += MIN(comp_len + 2, 16)));
+ }
+ memcpy(&buf[len], comp_str, comp_len);
+ len += comp_len;
+ if (i + 1 < path.components.length)
+ buf[len++] = '/';
+ }
+ buf[len++] = '\0';
+ return buf;
+}
+
+public Text_t Path$as_text(const void *obj, bool color, const TypeInfo_t *type)
+{
+ (void)type;
+ if (!obj) return Text("Path");
+ Path_t *path = (Path_t*)obj;
+ Text_t text = Text$join(Text("/"), path->components);
+ if (path->type.$tag == PATH_HOME)
+ text = Text$concat(path->components.length > 0 ? Text("~/") : Text("~"), text);
+ else if (path->type.$tag == PATH_ABSOLUTE)
+ text = Text$concat(Text("/"), text);
+ else if (path->type.$tag == PATH_RELATIVE && (path->components.length == 0 || !Text$equal_values(*(Text_t*)(path->components.data), Text(".."))))
+ text = Text$concat(path->components.length > 0 ? Text("./") : Text("."), text);
+
+ if (color)
+ text = Texts(Text("\033[32;1m"), text, Text("\033[m"));
+
+ return text;
+}
+
+public CONSTFUNC bool Path$is_none(const void *obj, const TypeInfo_t *type)
+{
+ (void)type;
+ return ((Path_t*)obj)->type.$tag == PATH_NONE;
+}
+
+public void Path$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *type)
+{
+ (void)type;
+ Path_t *path = (Path_t*)obj;
+ fputc((int)path->type.$tag, out);
+ Array$serialize(&path->components, out, pointers, Array$info(&Text$info));
+}
+
+public void Path$deserialize(FILE *in, void *obj, Array_t *pointers, const TypeInfo_t *type)
+{
+ (void)type;
+ Path_t path = {};
+ path.type.$tag = fgetc(in);
+ Array$deserialize(in, &path.components, pointers, Array$info(&Text$info));
+ *(Path_t*)obj = path;
+}
+
+public const TypeInfo_t Path$info = {
+ .size=sizeof(Path_t),
+ .align=__alignof__(Path_t),
+ .tag=OpaqueInfo,
+ .metamethods={
+ .as_text=Path$as_text,
+ .hash=Path$hash,
+ .compare=Path$compare,
+ .equal=Path$equal,
+ .is_none=Path$is_none,
+ .serialize=Path$serialize,
+ .deserialize=Path$deserialize,
+ }
+};
+
+public const TypeInfo_t PathType$info = {
+ .size=sizeof(PathType_t),
+ .align=__alignof__(PathType_t),
+ .metamethods=PackedDataEnum$metamethods,
+ .tag=EnumInfo,
+ .EnumInfo={
+ .name="PathType",
+ .num_tags=3,
+ .tags=((NamedType_t[3]){{.name="Relative"}, {.name="Absolute"}, {.name="Home"}}),
+ },
+};
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/paths.h b/src/stdlib/paths.h
new file mode 100644
index 00000000..6c6cebd3
--- /dev/null
+++ b/src/stdlib/paths.h
@@ -0,0 +1,71 @@
+#pragma once
+
+// A lang for filesystem paths
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "types.h"
+#include "datatypes.h"
+#include "optionals.h"
+
+Path_t Path$from_str(const char *str);
+Path_t Path$from_text(Text_t text);
+const char *Path$as_c_string(Path_t path);
+#define Path(str) Path$from_str(str)
+Path_t Path$_concat(int n, Path_t items[n]);
+#define Path$concat(...) Path$_concat((int)sizeof((Path_t[]){__VA_ARGS__})/sizeof(Path_t), ((Path_t[]){__VA_ARGS__}))
+Path_t Path$resolved(Path_t path, Path_t relative_to);
+Path_t Path$relative_to(Path_t path, Path_t relative_to);
+Path_t Path$expand_home(Path_t path);
+bool Path$exists(Path_t path);
+bool Path$is_file(Path_t path, bool follow_symlinks);
+bool Path$is_directory(Path_t path, bool follow_symlinks);
+bool Path$is_pipe(Path_t path, bool follow_symlinks);
+bool Path$is_socket(Path_t path, bool follow_symlinks);
+bool Path$is_symlink(Path_t path);
+bool Path$can_read(Path_t path);
+bool Path$can_write(Path_t path);
+bool Path$can_execute(Path_t path);
+OptionalMoment_t Path$modified(Path_t path, bool follow_symlinks);
+OptionalMoment_t Path$accessed(Path_t path, bool follow_symlinks);
+OptionalMoment_t Path$changed(Path_t path, bool follow_symlinks);
+void Path$write(Path_t path, Text_t text, int permissions);
+void Path$write_bytes(Path_t path, Array_t bytes, int permissions);
+void Path$append(Path_t path, Text_t text, int permissions);
+void Path$append_bytes(Path_t path, Array_t bytes, int permissions);
+OptionalText_t Path$read(Path_t path);
+OptionalArray_t Path$read_bytes(Path_t path, OptionalInt_t limit);
+void Path$set_owner(Path_t path, OptionalText_t owner, OptionalText_t group, bool follow_symlinks);
+OptionalText_t Path$owner(Path_t path, bool follow_symlinks);
+OptionalText_t Path$group(Path_t path, bool follow_symlinks);
+void Path$remove(Path_t path, bool ignore_missing);
+void Path$create_directory(Path_t path, int permissions);
+Array_t Path$children(Path_t path, bool include_hidden);
+Array_t Path$files(Path_t path, bool include_hidden);
+Array_t Path$subdirectories(Path_t path, bool include_hidden);
+Path_t Path$unique_directory(Path_t path);
+Path_t Path$write_unique(Path_t path, Text_t text);
+Path_t Path$write_unique_bytes(Path_t path, Array_t bytes);
+Path_t Path$parent(Path_t path);
+Text_t Path$base_name(Path_t path);
+Text_t Path$extension(Path_t path, bool full);
+Path_t Path$with_component(Path_t path, Text_t component);
+Path_t Path$with_extension(Path_t path, Text_t extension, bool replace);
+Closure_t Path$by_line(Path_t path);
+Array_t Path$glob(Path_t path);
+
+uint64_t Path$hash(const void *obj, const TypeInfo_t*);
+int32_t Path$compare(const void *a, const void *b, const TypeInfo_t *type);
+bool Path$equal(const void *a, const void *b, const TypeInfo_t *type);
+bool Path$equal_values(Path_t a, Path_t b);
+Text_t Path$as_text(const void *obj, bool color, const TypeInfo_t *type);
+bool Path$is_none(const void *obj, const TypeInfo_t *type);
+void Path$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *type);
+void Path$deserialize(FILE *in, void *obj, Array_t *pointers, const TypeInfo_t *type);
+
+extern const TypeInfo_t Path$info;
+extern const TypeInfo_t PathType$info;
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
+
diff --git a/src/stdlib/patterns.c b/src/stdlib/patterns.c
new file mode 100644
index 00000000..7f7d711b
--- /dev/null
+++ b/src/stdlib/patterns.c
@@ -0,0 +1,1307 @@
+// Logic for text pattern matching
+
+#include <ctype.h>
+#include <sys/param.h>
+#include <unictype.h>
+#include <uniname.h>
+
+#include "arrays.h"
+#include "integers.h"
+#include "optionals.h"
+#include "patterns.h"
+#include "structs.h"
+#include "tables.h"
+#include "text.h"
+#include "types.h"
+
+#define MAX_BACKREFS 100
+
+typedef struct {
+ int64_t index, length;
+ bool occupied, recursive;
+} capture_t;
+
+typedef struct {
+ enum { PAT_START, PAT_END, PAT_ANY, PAT_GRAPHEME, PAT_PROPERTY, PAT_QUOTE, PAT_PAIR, PAT_FUNCTION } tag;
+ bool negated, non_capturing;
+ int64_t min, max;
+ union {
+ int32_t grapheme;
+ uc_property_t property;
+ int64_t (*fn)(TextIter_t *, int64_t);
+ int32_t quote_graphemes[2];
+ int32_t pair_graphemes[2];
+ };
+} pat_t;
+
+static Text_t Text$replace_array(Text_t text, Array_t replacements, Text_t backref_pat, bool recursive);
+
+static INLINE void skip_whitespace(TextIter_t *state, int64_t *i)
+{
+ while (*i < state->stack[0].text.length) {
+ int32_t grapheme = Text$get_grapheme_fast(state, *i);
+ if (grapheme > 0 && !uc_is_property_white_space((ucs4_t)grapheme))
+ return;
+ *i += 1;
+ }
+}
+
+static INLINE bool match_grapheme(TextIter_t *state, int64_t *i, int32_t grapheme)
+{
+ if (*i < state->stack[0].text.length && Text$get_grapheme_fast(state, *i) == grapheme) {
+ *i += 1;
+ return true;
+ }
+ return false;
+}
+
+static INLINE bool match_str(TextIter_t *state, int64_t *i, const char *str)
+{
+ int64_t matched = 0;
+ while (matched[str]) {
+ if (*i + matched >= state->stack[0].text.length || Text$get_grapheme_fast(state, *i + matched) != str[matched])
+ return false;
+ matched += 1;
+ }
+ *i += matched;
+ return true;
+}
+
+static INLINE bool match_property(TextIter_t *state, int64_t *i, uc_property_t prop)
+{
+ if (*i >= state->stack[0].text.length) return false;
+ uint32_t grapheme = Text$get_main_grapheme_fast(state, *i);
+ // TODO: check every codepoint in the cluster?
+ if (uc_is_property(grapheme, prop)) {
+ *i += 1;
+ return true;
+ }
+ return false;
+}
+
+static int64_t parse_int(TextIter_t *state, int64_t *i)
+{
+ int64_t value = 0;
+ for (;; *i += 1) {
+ uint32_t grapheme = Text$get_main_grapheme_fast(state, *i);
+ int digit = uc_digit_value(grapheme);
+ if (digit < 0) break;
+ if (value >= INT64_MAX/10) break;
+ value = 10*value + digit;
+ }
+ return value;
+}
+
+static const char *get_property_name(TextIter_t *state, int64_t *i)
+{
+ skip_whitespace(state, i);
+ char *name = GC_MALLOC_ATOMIC(UNINAME_MAX);
+ char *dest = name;
+ while (*i < state->stack[0].text.length) {
+ int32_t grapheme = Text$get_grapheme_fast(state, *i);
+ if (!(grapheme & ~0xFF) && (isalnum(grapheme) || grapheme == ' ' || grapheme == '_' || grapheme == '-')) {
+ *dest = (char)grapheme;
+ ++dest;
+ if (dest >= name + UNINAME_MAX - 1)
+ break;
+ } else {
+ break;
+ }
+ *i += 1;
+ }
+
+ while (dest > name && dest[-1] == ' ')
+ *(dest--) = '\0';
+
+ if (dest == name) return NULL;
+ *dest = '\0';
+ return name;
+}
+
+#define EAT1(state, index, cond) ({\
+ int32_t grapheme = Text$get_grapheme_fast(state, index); \
+ bool success = (cond); \
+ if (success) index += 1; \
+ success; })
+
+#define EAT2(state, index, cond1, cond2) ({\
+ int32_t grapheme = Text$get_grapheme_fast(state, index); \
+ bool success = (cond1); \
+ if (success) { \
+ grapheme = Text$get_grapheme_fast(state, index + 1); \
+ success = (cond2); \
+ if (success) \
+ index += 2; \
+ } \
+ success; })
+
+
+#define EAT_MANY(state, index, cond) ({ int64_t _n = 0; while (EAT1(state, index, cond)) { _n += 1; } _n; })
+
+static int64_t match_email(TextIter_t *state, int64_t index)
+{
+ // email = local "@" domain
+ // local = 1-64 ([a-zA-Z0-9!#$%&‘*+–/=?^_`.{|}~] | non-ascii)
+ // domain = dns-label ("." dns-label)*
+ // dns-label = 1-63 ([a-zA-Z0-9-] | non-ascii)
+
+ if (index > 0) {
+ uint32_t prev_codepoint = Text$get_main_grapheme_fast(state, index - 1);
+ if (uc_is_property_alphabetic(prev_codepoint))
+ return -1;
+ }
+
+ int64_t start_index = index;
+
+ // Local part:
+ int64_t local_len = 0;
+ static const char *allowed_local = "!#$%&‘*+–/=?^_`.{|}~";
+ while (EAT1(state, index,
+ (grapheme & ~0x7F) || isalnum((char)grapheme) || strchr(allowed_local, (char)grapheme))) {
+ local_len += 1;
+ if (local_len > 64) return -1;
+ }
+
+ if (!EAT1(state, index, grapheme == '@'))
+ return -1;
+
+ // Host
+ int64_t host_len = 0;
+ do {
+ int64_t label_len = 0;
+ while (EAT1(state, index,
+ (grapheme & ~0x7F) || isalnum((char)grapheme) || grapheme == '-')) {
+ label_len += 1;
+ if (label_len > 63) return -1;
+ }
+
+ if (label_len == 0)
+ return -1;
+
+ host_len += label_len;
+ if (host_len > 255)
+ return -1;
+ host_len += 1;
+ } while (EAT1(state, index, grapheme == '.'));
+
+ return index - start_index;
+}
+
+static int64_t match_ipv6(TextIter_t *state, int64_t index)
+{
+ if (index > 0) {
+ int32_t prev_codepoint = Text$get_grapheme_fast(state, index - 1);
+ if ((prev_codepoint & ~0x7F) && (isxdigit(prev_codepoint) || prev_codepoint == ':'))
+ return -1;
+ }
+ int64_t start_index = index;
+ const int NUM_CLUSTERS = 8;
+ bool double_colon_used = false;
+ for (int cluster = 0; cluster < NUM_CLUSTERS; cluster++) {
+ for (int digits = 0; digits < 4; digits++) {
+ if (!EAT1(state, index, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
+ break;
+ }
+ if (EAT1(state, index, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
+ return -1; // Too many digits
+
+ if (cluster == NUM_CLUSTERS-1) {
+ break;
+ } else if (!EAT1(state, index, grapheme == ':')) {
+ if (double_colon_used)
+ break;
+ return -1;
+ }
+
+ if (EAT1(state, index, grapheme == ':')) {
+ if (double_colon_used)
+ return -1;
+ double_colon_used = true;
+ }
+ }
+ return index - start_index;
+}
+
+static int64_t match_ipv4(TextIter_t *state, int64_t index)
+{
+ if (index > 0) {
+ int32_t prev_codepoint = Text$get_grapheme_fast(state, index - 1);
+ if ((prev_codepoint & ~0x7F) && (isdigit(prev_codepoint) || prev_codepoint == '.'))
+ return -1;
+ }
+ int64_t start_index = index;
+
+ const int NUM_CLUSTERS = 4;
+ for (int cluster = 0; cluster < NUM_CLUSTERS; cluster++) {
+ for (int digits = 0; digits < 3; digits++) {
+ if (!EAT1(state, index, ~(grapheme & ~0x7F) && isdigit((char)grapheme))) {
+ if (digits == 0) return -1;
+ break;
+ }
+ }
+
+ if (EAT1(state, index, ~(grapheme & ~0x7F) && isdigit((char)grapheme)))
+ return -1; // Too many digits
+
+ if (cluster == NUM_CLUSTERS-1)
+ break;
+ else if (!EAT1(state, index, grapheme == '.'))
+ return -1;
+ }
+ return (index - start_index);
+}
+
+static int64_t match_ip(TextIter_t *state, int64_t index)
+{
+ int64_t len = match_ipv6(state, index);
+ if (len >= 0) return len;
+ len = match_ipv4(state, index);
+ return (len >= 0) ? len : -1;
+}
+
+static int64_t match_host(TextIter_t *state, int64_t index)
+{
+ int64_t ip_len = match_ip(state, index);
+ if (ip_len > 0) return ip_len;
+
+ int64_t start_index = index;
+ if (match_grapheme(state, &index, '[')) {
+ ip_len = match_ip(state, index);
+ if (ip_len <= 0) return -1;
+ index += ip_len;
+ if (match_grapheme(state, &index, ']'))
+ return (index - start_index);
+ return -1;
+ }
+
+ if (!EAT1(state, index, isalpha(grapheme)))
+ return -1;
+
+ static const char *non_host_chars = "/#?:@ \t\r\n<>[]{}\\^|\"`";
+ EAT_MANY(state, index, (grapheme & ~0x7F) || !strchr(non_host_chars, (char)grapheme));
+ return (index - start_index);
+}
+
+static int64_t match_authority(TextIter_t *state, int64_t index)
+{
+ int64_t authority_start = index;
+ static const char *non_segment_chars = "/#?:@ \t\r\n<>[]{}\\^|\"`.";
+
+ // Optional user@ prefix:
+ int64_t username_len = EAT_MANY(state, index, (grapheme & ~0x7F) || !strchr(non_segment_chars, (char)grapheme));
+ if (username_len < 1 || !EAT1(state, index, grapheme == '@'))
+ index = authority_start; // No user@ part
+
+ // Host:
+ int64_t host_len = match_host(state, index);
+ if (host_len <= 0) return -1;
+ index += host_len;
+
+ // Port:
+ if (EAT1(state, index, grapheme == ':')) {
+ if (EAT_MANY(state, index, !(grapheme & ~0x7F) && isdigit(grapheme)) == 0)
+ return -1;
+ }
+ return (index - authority_start);
+}
+
+static int64_t match_uri(TextIter_t *state, int64_t index)
+{
+ // URI = scheme ":" ["//" authority] path ["?" query] ["#" fragment]
+ // scheme = [a-zA-Z] [a-zA-Z0-9+.-]
+ // authority = [userinfo "@"] host [":" port]
+
+ if (index > 0) {
+ // Don't match if we're not at a word edge:
+ uint32_t prev_codepoint = Text$get_main_grapheme_fast(state, index - 1);
+ if (uc_is_property_alphabetic(prev_codepoint))
+ return -1;
+ }
+
+ int64_t start_index = index;
+
+ // Scheme:
+ if (!EAT1(state, index, isalpha(grapheme)))
+ return -1;
+ EAT_MANY(state, index, !(grapheme & ~0x7F) && (isalnum(grapheme) || grapheme == '+' || grapheme == '.' || grapheme == '-'));
+ if (!match_grapheme(state, &index, ':'))
+ return -1;
+
+ // Authority:
+ int64_t authority_len;
+ if (match_str(state, &index, "//")) {
+ authority_len = match_authority(state, index);
+ if (authority_len > 0)
+ index += authority_len;
+ } else {
+ authority_len = 0;
+ }
+
+ // Path:
+ int64_t path_start = index;
+ if (EAT1(state, index, grapheme == '/') || authority_len <= 0) {
+ static const char *non_path = " \"#?<>[]{}\\^`|";
+ EAT_MANY(state, index, (grapheme & ~0x7F) || !strchr(non_path, (char)grapheme));
+
+ if (EAT1(state, index, grapheme == '?')) { // Query
+ static const char *non_query = " \"#<>[]{}\\^`|";
+ EAT_MANY(state, index, (grapheme & ~0x7F) || !strchr(non_query, (char)grapheme));
+ }
+
+ if (EAT1(state, index, grapheme == '#')) { // Fragment
+ static const char *non_fragment = " \"#<>[]{}\\^`|";
+ EAT_MANY(state, index, (grapheme & ~0x7F) || !strchr(non_fragment, (char)grapheme));
+ }
+ }
+
+ if (authority_len <= 0 && index == path_start)
+ return -1;
+
+ return index - start_index;
+}
+
+static int64_t match_url(TextIter_t *state, int64_t index)
+{
+ int64_t lookahead = index;
+ if (!(match_str(state, &lookahead, "https:")
+ || match_str(state, &lookahead, "http:")
+ || match_str(state, &lookahead, "ftp:")
+ || match_str(state, &lookahead, "wss:")
+ || match_str(state, &lookahead, "ws:")))
+ return -1;
+
+ return match_uri(state, index);
+}
+
+static int64_t match_id(TextIter_t *state, int64_t index)
+{
+ if (!EAT1(state, index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_XID_START)))
+ return -1;
+ return 1 + EAT_MANY(state, index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_XID_CONTINUE));
+}
+
+static int64_t match_int(TextIter_t *state, int64_t index)
+{
+ int64_t negative = EAT1(state, index, grapheme == '-') ? 1 : 0;
+ int64_t len = EAT_MANY(state, index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_DECIMAL_DIGIT));
+ return len > 0 ? negative + len : -1;
+}
+
+static int64_t match_alphanumeric(TextIter_t *state, int64_t index)
+{
+ return EAT1(state, index, uc_is_property_alphabetic((ucs4_t)grapheme) || uc_is_property_numeric((ucs4_t)grapheme))
+ ? 1 : -1;
+}
+
+static int64_t match_num(TextIter_t *state, int64_t index)
+{
+ bool negative = EAT1(state, index, grapheme == '-') ? 1 : 0;
+ int64_t pre_decimal = EAT_MANY(state, index,
+ uc_is_property((ucs4_t)grapheme, UC_PROPERTY_DECIMAL_DIGIT));
+ bool decimal = (EAT1(state, index, grapheme == '.') == 1);
+ int64_t post_decimal = decimal ? EAT_MANY(state, index,
+ uc_is_property((ucs4_t)grapheme, UC_PROPERTY_DECIMAL_DIGIT)) : 0;
+ if (pre_decimal == 0 && post_decimal == 0)
+ return -1;
+ return negative + pre_decimal + decimal + post_decimal;
+}
+
+static int64_t match_newline(TextIter_t *state, int64_t index)
+{
+ if (index >= state->stack[0].text.length)
+ return -1;
+
+ uint32_t grapheme = index >= state->stack[0].text.length ? 0 : Text$get_main_grapheme_fast(state, index);
+ if (grapheme == '\n')
+ return 1;
+ if (grapheme == '\r' && Text$get_grapheme_fast(state, index + 1) == '\n')
+ return 2;
+ return -1;
+}
+
+static int64_t match_pat(TextIter_t *state, int64_t index, pat_t pat)
+{
+ Text_t text = state->stack[0].text;
+ int32_t grapheme = index >= text.length ? 0 : Text$get_grapheme_fast(state, index);
+
+ switch (pat.tag) {
+ case PAT_START: {
+ if (index == 0)
+ return pat.negated ? -1 : 0;
+ return pat.negated ? 0 : -1;
+ }
+ case PAT_END: {
+ if (index >= text.length)
+ return pat.negated ? -1 : 0;
+ return pat.negated ? 0 : -1;
+ }
+ case PAT_ANY: {
+ assert(!pat.negated);
+ return (index < text.length) ? 1 : -1;
+ }
+ case PAT_GRAPHEME: {
+ if (index >= text.length)
+ return -1;
+ else if (grapheme == pat.grapheme)
+ return pat.negated ? -1 : 1;
+ return pat.negated ? 1 : -1;
+ }
+ case PAT_PROPERTY: {
+ if (index >= text.length)
+ return -1;
+ else if (uc_is_property((ucs4_t)grapheme, pat.property))
+ return pat.negated ? -1 : 1;
+ return pat.negated ? 1 : -1;
+ }
+ case PAT_PAIR: {
+ // Nested punctuation: (?), [?], etc
+ if (index >= text.length)
+ return -1;
+
+ int32_t open = pat.pair_graphemes[0];
+ if (grapheme != open)
+ return pat.negated ? 1 : -1;
+
+ int32_t close = pat.pair_graphemes[1];
+ int64_t depth = 1;
+ int64_t match_len = 1;
+ for (; depth > 0; match_len++) {
+ if (index + match_len >= text.length)
+ return pat.negated ? 1 : -1;
+
+ int32_t c = Text$get_grapheme_fast(state, index + match_len);
+ if (c == open)
+ depth += 1;
+ else if (c == close)
+ depth -= 1;
+ }
+ return pat.negated ? -1 : match_len;
+ }
+ case PAT_QUOTE: {
+ // Nested quotes: "?", '?', etc
+ if (index >= text.length)
+ return -1;
+
+ int32_t open = pat.quote_graphemes[0];
+ if (grapheme != open)
+ return pat.negated ? 1 : -1;
+
+ int32_t close = pat.quote_graphemes[1];
+ for (int64_t i = index + 1; i < text.length; i++) {
+ int32_t c = Text$get_grapheme_fast(state, i);
+ if (c == close) {
+ return pat.negated ? -1 : (i - index) + 1;
+ } else if (c == '\\' && index + 1 < text.length) {
+ i += 1; // Skip ahead an extra step
+ }
+ }
+ return pat.negated ? 1 : -1;
+ }
+ case PAT_FUNCTION: {
+ int64_t match_len = pat.fn(state, index);
+ if (match_len >= 0)
+ return pat.negated ? -1 : match_len;
+ return pat.negated ? 1 : -1;
+ }
+ default: errx(1, "Invalid pattern");
+ }
+ errx(1, "Unreachable");
+}
+
+static pat_t parse_next_pat(TextIter_t *state, int64_t *index)
+{
+ if (EAT2(state, *index,
+ uc_is_property((ucs4_t)grapheme, UC_PROPERTY_QUOTATION_MARK),
+ grapheme == '?')) {
+ // Quotations: "?", '?', etc
+ int32_t open = Text$get_grapheme_fast(state, *index-2);
+ int32_t close = open;
+ uc_mirror_char((ucs4_t)open, (ucs4_t*)&close);
+ if (!match_grapheme(state, index, close))
+ fail("Pattern's closing quote is missing: %k", &state->stack[0].text);
+
+ return (pat_t){
+ .tag=PAT_QUOTE,
+ .min=1, .max=1,
+ .quote_graphemes={open, close},
+ };
+ } else if (EAT2(state, *index,
+ uc_is_property((ucs4_t)grapheme, UC_PROPERTY_PAIRED_PUNCTUATION),
+ grapheme == '?')) {
+ // Nested punctuation: (?), [?], etc
+ int32_t open = Text$get_grapheme_fast(state, *index-2);
+ int32_t close = open;
+ uc_mirror_char((ucs4_t)open, (ucs4_t*)&close);
+ if (!match_grapheme(state, index, close))
+ fail("Pattern's closing brace is missing: %k", &state->stack[0].text);
+
+ return (pat_t){
+ .tag=PAT_PAIR,
+ .min=1, .max=1,
+ .pair_graphemes={open, close},
+ };
+ } else if (EAT1(state, *index, grapheme == '{')) { // named patterns {id}, {2-3 hex}, etc.
+ skip_whitespace(state, index);
+ int64_t min, max;
+ if (uc_is_digit((ucs4_t)Text$get_grapheme_fast(state, *index))) {
+ min = parse_int(state, index);
+ skip_whitespace(state, index);
+ if (match_grapheme(state, index, '+')) {
+ max = INT64_MAX;
+ } else if (match_grapheme(state, index, '-')) {
+ max = parse_int(state, index);
+ } else {
+ max = min;
+ }
+ if (min > max) fail("Minimum repetitions (%ld) is less than the maximum (%ld)", min, max);
+ } else {
+ min = -1, max = -1;
+ }
+
+ skip_whitespace(state, index);
+
+ bool negated = match_grapheme(state, index, '!');
+#define PAT(_tag, ...) ((pat_t){.min=min, .max=max, .negated=negated, .tag=_tag, __VA_ARGS__})
+ const char *prop_name;
+ if (match_str(state, index, ".."))
+ prop_name = "..";
+ else
+ prop_name = get_property_name(state, index);
+
+ if (!prop_name) {
+ // Literal character, e.g. {1?}
+ skip_whitespace(state, index);
+ int32_t grapheme = Text$get_grapheme_fast(state, (*index)++);
+ if (!match_grapheme(state, index, '}'))
+ fail("Missing closing '}' in pattern: %k", &state->stack[0].text);
+ return PAT(PAT_GRAPHEME, .grapheme=grapheme);
+ } else if (strlen(prop_name) == 1) {
+ // Single letter names: {1+ A}
+ skip_whitespace(state, index);
+ if (!match_grapheme(state, index, '}'))
+ fail("Missing closing '}' in pattern: %k", &state->stack[0].text);
+ return PAT(PAT_GRAPHEME, .grapheme=prop_name[0]);
+ }
+
+ skip_whitespace(state, index);
+ if (!match_grapheme(state, index, '}'))
+ fail("Missing closing '}' in pattern: %k", &state->stack[0].text);
+
+ switch (tolower(prop_name[0])) {
+ case '.':
+ if (prop_name[1] == '.') {
+ if (negated)
+ return ((pat_t){.tag=PAT_END, .min=min, .max=max, .non_capturing=true});
+ else
+ return PAT(PAT_ANY);
+ }
+ break;
+ case 'a':
+ if (strcasecmp(prop_name, "authority") == 0) {
+ return PAT(PAT_FUNCTION, .fn=match_authority);
+ } else if (strcasecmp(prop_name, "alphanum") == 0 || strcasecmp(prop_name, "anum") == 0
+ || strcasecmp(prop_name, "alphanumeric") == 0) {
+ return PAT(PAT_FUNCTION, .fn=match_alphanumeric);
+ }
+ break;
+ case 'c':
+ if (strcasecmp(prop_name, "crlf") == 0)
+ return PAT(PAT_FUNCTION, .fn=match_newline);
+ break;
+ case 'd':
+ if (strcasecmp(prop_name, "digit") == 0) {
+ return PAT(PAT_PROPERTY, .property=UC_PROPERTY_DECIMAL_DIGIT);
+ }
+ break;
+ case 'e':
+ if (strcasecmp(prop_name, "end") == 0) {
+ return PAT(PAT_END, .non_capturing=!negated);
+ } else if (strcasecmp(prop_name, "email") == 0) {
+ return PAT(PAT_FUNCTION, .fn=match_email);
+ } else if (strcasecmp(prop_name, "emoji") == 0) {
+ return PAT(PAT_PROPERTY, .property=UC_PROPERTY_EMOJI);
+ }
+ break;
+ case 'h':
+ if (strcasecmp(prop_name, "host") == 0) {
+ return PAT(PAT_FUNCTION, .fn=match_host);
+ }
+ break;
+ case 'i':
+ if (strcasecmp(prop_name, "id") == 0) {
+ return PAT(PAT_FUNCTION, .fn=match_id);
+ } else if (strcasecmp(prop_name, "int") == 0) {
+ return PAT(PAT_FUNCTION, .fn=match_int);
+ } else if (strcasecmp(prop_name, "ipv4") == 0) {
+ return PAT(PAT_FUNCTION, .fn=match_ipv4);
+ } else if (strcasecmp(prop_name, "ipv6") == 0) {
+ return PAT(PAT_FUNCTION, .fn=match_ipv6);
+ } else if (strcasecmp(prop_name, "ip") == 0) {
+ return PAT(PAT_FUNCTION, .fn=match_ip);
+ }
+ break;
+ case 'n':
+ if (strcasecmp(prop_name, "nl") == 0 || strcasecmp(prop_name, "newline") == 0) {
+ return PAT(PAT_FUNCTION, .fn=match_newline);
+ } else if (strcasecmp(prop_name, "num") == 0) {
+ return PAT(PAT_FUNCTION, .fn=match_num);
+ }
+ break;
+ case 's':
+ if (strcasecmp(prop_name, "start") == 0) {
+ return PAT(PAT_START, .non_capturing=!negated);
+ }
+ break;
+ case 'u':
+ if (strcasecmp(prop_name, "uri") == 0) {
+ return PAT(PAT_FUNCTION, .fn=match_uri);
+ } else if (strcasecmp(prop_name, "url") == 0) {
+ return PAT(PAT_FUNCTION, .fn=match_url);
+ }
+ break;
+ case 'w':
+ if (strcasecmp(prop_name, "word") == 0) {
+ return PAT(PAT_FUNCTION, .fn=match_id);
+ }
+ break;
+ default: break;
+ }
+
+ uc_property_t prop = uc_property_byname(prop_name);
+ if (uc_property_is_valid(prop))
+ return PAT(PAT_PROPERTY, .property=prop);
+
+ ucs4_t grapheme = unicode_name_character(prop_name);
+ if (grapheme == UNINAME_INVALID)
+ fail("Not a valid property or character name: %s", prop_name);
+ return PAT(PAT_GRAPHEME, .grapheme=(int32_t)grapheme);
+#undef PAT
+ } else {
+ return (pat_t){.tag=PAT_GRAPHEME, .non_capturing=true, .min=1, .max=1, .grapheme=Text$get_grapheme_fast(state, (*index)++)};
+ }
+}
+
+static int64_t match(Text_t text, int64_t text_index, Pattern_t pattern, int64_t pattern_index, capture_t *captures, int64_t capture_index)
+{
+ if (pattern_index >= pattern.length) // End of the pattern
+ return 0;
+
+ int64_t start_index = text_index;
+ TextIter_t pattern_state = NEW_TEXT_ITER_STATE(pattern), text_state = NEW_TEXT_ITER_STATE(text);
+ pat_t pat = parse_next_pat(&pattern_state, &pattern_index);
+
+ if (pat.min == -1 && pat.max == -1) {
+ if (pat.tag == PAT_ANY && pattern_index >= pattern.length) {
+ pat.min = pat.max = MAX(1, text.length - text_index);
+ } else {
+ pat.min = 1;
+ pat.max = INT64_MAX;
+ }
+ }
+
+ int64_t capture_start = text_index;
+ int64_t count = 0, capture_len = 0, next_match_len = 0;
+
+ if (pat.tag == PAT_ANY && pattern_index >= pattern.length) {
+ int64_t remaining = text.length - text_index;
+ capture_len = remaining >= pat.min ? MIN(remaining, pat.max) : -1;
+ text_index += capture_len;
+ goto success;
+ }
+
+ if (pat.min == 0 && pattern_index < pattern.length) {
+ next_match_len = match(text, text_index, pattern, pattern_index, captures, capture_index + (pat.non_capturing ? 0 : 1));
+ if (next_match_len >= 0) {
+ capture_len = 0;
+ goto success;
+ }
+ }
+
+ while (count < pat.max) {
+ int64_t match_len = match_pat(&text_state, text_index, pat);
+ if (match_len < 0)
+ break;
+ capture_len += match_len;
+ text_index += match_len;
+ count += 1;
+
+ if (pattern_index < pattern.length) { // More stuff after this
+ if (count < pat.min)
+ next_match_len = -1;
+ else
+ next_match_len = match(text, text_index, pattern, pattern_index, captures, capture_index + (pat.non_capturing ? 0 : 1));
+ } else {
+ next_match_len = 0;
+ }
+
+ if (match_len == 0) {
+ if (next_match_len >= 0) {
+ // If we're good to go, no need to keep re-matching zero-length
+ // matches till we hit max:
+ count = pat.max;
+ break;
+ } else {
+ return -1;
+ }
+ }
+
+ if (pattern_index < pattern.length && next_match_len >= 0)
+ break; // Next guy exists and wants to stop here
+
+ if (text_index >= text.length)
+ break;
+ }
+
+ if (count < pat.min || next_match_len < 0)
+ return -1;
+
+ success:
+ if (captures && capture_index < MAX_BACKREFS && !pat.non_capturing) {
+ if (pat.tag == PAT_PAIR || pat.tag == PAT_QUOTE) {
+ assert(capture_len > 0);
+ captures[capture_index] = (capture_t){
+ .index=capture_start + 1, // Skip leading quote/paren
+ .length=capture_len - 2, // Skip open/close
+ .occupied=true,
+ .recursive=(pat.tag == PAT_PAIR),
+ };
+ } else {
+ captures[capture_index] = (capture_t){
+ .index=capture_start,
+ .length=capture_len,
+ .occupied=true,
+ .recursive=false,
+ };
+ }
+ }
+ return (text_index - start_index) + next_match_len;
+}
+
+#undef EAT1
+#undef EAT2
+#undef EAT_MANY
+
+static int64_t _find(Text_t text, Pattern_t pattern, int64_t first, int64_t last, int64_t *match_length, capture_t *captures)
+{
+ int32_t first_grapheme = Text$get_grapheme(pattern, 0);
+ bool find_first = (first_grapheme != '{'
+ && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
+ && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
+
+ TextIter_t text_state = NEW_TEXT_ITER_STATE(text);
+ for (int64_t i = first; i <= last; i++) {
+ // Optimization: quickly skip ahead to first char in pattern:
+ if (find_first) {
+ while (i < text.length && Text$get_grapheme_fast(&text_state, i) != first_grapheme)
+ ++i;
+ }
+
+ int64_t m = match(text, i, pattern, 0, captures, 0);
+ if (m >= 0) {
+ if (match_length)
+ *match_length = m;
+ return i;
+ }
+ }
+ if (match_length)
+ *match_length = -1;
+ return -1;
+}
+
+public OptionalMatch_t Text$find(Text_t text, Pattern_t pattern, Int_t from_index)
+{
+ int64_t first = Int64$from_int(from_index, false);
+ if (first == 0) fail("Invalid index: 0");
+ if (first < 0) first = text.length + first + 1;
+ if (first > text.length || first < 1)
+ return NONE_MATCH;
+
+ capture_t captures[MAX_BACKREFS] = {};
+ int64_t len = 0;
+ int64_t found = _find(text, pattern, first-1, text.length-1, &len, captures);
+ if (found == -1)
+ return NONE_MATCH;
+
+ Array_t capture_array = {};
+ for (int i = 0; captures[i].occupied; i++) {
+ Text_t capture = Text$slice(text, I(captures[i].index+1), I(captures[i].index+captures[i].length));
+ Array$insert(&capture_array, &capture, I(0), sizeof(Text_t));
+ }
+ return (OptionalMatch_t){
+ .text=Text$slice(text, I(found+1), I(found+len)),
+ .index=I(found+1),
+ .captures=capture_array,
+ };
+}
+
+PUREFUNC public bool Text$has(Text_t text, Pattern_t pattern)
+{
+ if (Text$starts_with(pattern, Text("{start}"))) {
+ int64_t m = match(text, 0, pattern, 0, NULL, 0);
+ return m >= 0;
+ } else if (Text$ends_with(text, Text("{end}"))) {
+ for (int64_t i = text.length-1; i >= 0; i--) {
+ int64_t match_len = match(text, i, pattern, 0, NULL, 0);
+ if (match_len >= 0 && i + match_len == text.length)
+ return true;
+ }
+ return false;
+ } else {
+ int64_t found = _find(text, pattern, 0, text.length-1, NULL, NULL);
+ return (found >= 0);
+ }
+}
+
+public OptionalArray_t Text$matches(Text_t text, Pattern_t pattern)
+{
+ capture_t captures[MAX_BACKREFS] = {};
+ int64_t match_len = match(text, 0, pattern, 0, captures, 0);
+ if (match_len != text.length)
+ return NONE_ARRAY;
+
+ Array_t capture_array = {};
+ for (int i = 0; captures[i].occupied; i++) {
+ Text_t capture = Text$slice(text, I(captures[i].index+1), I(captures[i].index+captures[i].length));
+ Array$insert(&capture_array, &capture, I(0), sizeof(Text_t));
+ }
+ return capture_array;
+}
+
+public Array_t Text$find_all(Text_t text, Pattern_t pattern)
+{
+ if (pattern.length == 0) // special case
+ return (Array_t){.length=0};
+
+ Array_t matches = {};
+ for (int64_t i = 1; ; ) {
+ OptionalMatch_t m = Text$find(text, pattern, I(i));
+ if (!m.index.small)
+ break;
+ i = Int64$from_int(m.index, false) + m.text.length;
+ Array$insert(&matches, &m, I_small(0), sizeof(Match_t));
+ }
+ return matches;
+}
+
+typedef struct {
+ TextIter_t state;
+ Int_t i;
+ Pattern_t pattern;
+} match_iter_state_t;
+
+static OptionalMatch_t next_match(match_iter_state_t *state)
+{
+ if (Int64$from_int(state->i, false) > state->state.stack[0].text.length)
+ return NONE_MATCH;
+
+ OptionalMatch_t m = Text$find(state->state.stack[0].text, state->pattern, state->i);
+ if (m.index.small == 0) // No match
+ state->i = I(state->state.stack[0].text.length + 1);
+ else
+ state->i = Int$plus(m.index, I(MAX(1, m.text.length)));
+ return m;
+}
+
+public Closure_t Text$by_match(Text_t text, Pattern_t pattern)
+{
+ return (Closure_t){
+ .fn=(void*)next_match,
+ .userdata=new(match_iter_state_t, .state=NEW_TEXT_ITER_STATE(text), .i=I_small(1), .pattern=pattern),
+ };
+}
+
+static Text_t apply_backrefs(Text_t text, Array_t recursive_replacements, Text_t replacement, Pattern_t backref_pat, capture_t *captures)
+{
+ if (backref_pat.length == 0)
+ return replacement;
+
+ int32_t first_grapheme = Text$get_grapheme(backref_pat, 0);
+ bool find_first = (first_grapheme != '{'
+ && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
+ && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
+
+ Text_t ret = Text("");
+ TextIter_t replacement_state = NEW_TEXT_ITER_STATE(replacement);
+ int64_t nonmatching_pos = 0;
+ for (int64_t pos = 0; pos < replacement.length; ) {
+ // Optimization: quickly skip ahead to first char in the backref pattern:
+ if (find_first) {
+ while (pos < replacement.length && Text$get_grapheme_fast(&replacement_state, pos) != first_grapheme)
+ ++pos;
+ }
+
+ int64_t backref_len = match(replacement, pos, backref_pat, 0, NULL, 0);
+ if (backref_len < 0) {
+ pos += 1;
+ continue;
+ }
+
+ int64_t after_backref = pos + backref_len;
+ int64_t backref = parse_int(&replacement_state, &after_backref);
+ if (after_backref == pos + backref_len) { // Not actually a backref if there's no number
+ pos += 1;
+ continue;
+ }
+ if (backref < 0 || backref > 9) fail("Invalid backref index: %ld (only 0-%d are allowed)", backref, MAX_BACKREFS-1);
+ backref_len = (after_backref - pos);
+
+ if (Text$get_grapheme_fast(&replacement_state, pos + backref_len) == ';')
+ backref_len += 1; // skip optional semicolon
+
+ if (!captures[backref].occupied)
+ fail("There is no capture number %ld!", backref);
+
+ Text_t backref_text = Text$slice(text, I(captures[backref].index+1), I(captures[backref].index + captures[backref].length));
+
+ if (captures[backref].recursive && recursive_replacements.length > 0)
+ backref_text = Text$replace_array(backref_text, recursive_replacements, backref_pat, true);
+
+ if (pos > nonmatching_pos) {
+ Text_t before_slice = Text$slice(replacement, I(nonmatching_pos+1), I(pos));
+ ret = Text$concat(ret, before_slice, backref_text);
+ } else {
+ ret = Text$concat(ret, backref_text);
+ }
+
+ pos += backref_len;
+ nonmatching_pos = pos;
+ }
+ if (nonmatching_pos < replacement.length) {
+ Text_t last_slice = Text$slice(replacement, I(nonmatching_pos+1), I(replacement.length));
+ ret = Text$concat(ret, last_slice);
+ }
+ return ret;
+}
+
+public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, Pattern_t backref_pat, bool recursive)
+{
+ Text_t ret = EMPTY_TEXT;
+
+ int32_t first_grapheme = Text$get_grapheme(pattern, 0);
+ bool find_first = (first_grapheme != '{'
+ && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
+ && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
+
+ Text_t entries[2] = {pattern, replacement};
+ Array_t replacements = {
+ .data=entries,
+ .length=1,
+ .stride=sizeof(entries),
+ };
+
+ TextIter_t text_state = NEW_TEXT_ITER_STATE(text);
+ int64_t nonmatching_pos = 0;
+ for (int64_t pos = 0; pos < text.length; ) {
+ // Optimization: quickly skip ahead to first char in pattern:
+ if (find_first) {
+ while (pos < text.length && Text$get_grapheme_fast(&text_state, pos) != first_grapheme)
+ ++pos;
+ }
+
+ capture_t captures[MAX_BACKREFS] = {};
+ int64_t match_len = match(text, pos, pattern, 0, captures, 1);
+ if (match_len < 0) {
+ pos += 1;
+ continue;
+ }
+ captures[0] = (capture_t){
+ .index = pos, .length = match_len,
+ .occupied = true, .recursive = false,
+ };
+
+ Text_t replacement_text = apply_backrefs(text, recursive ? replacements : (Array_t){}, replacement, backref_pat, captures);
+ if (pos > nonmatching_pos) {
+ Text_t before_slice = Text$slice(text, I(nonmatching_pos+1), I(pos));
+ ret = Text$concat(ret, before_slice, replacement_text);
+ } else {
+ ret = Text$concat(ret, replacement_text);
+ }
+ nonmatching_pos = pos + match_len;
+ pos += MAX(match_len, 1);
+ }
+ if (nonmatching_pos < text.length) {
+ Text_t last_slice = Text$slice(text, I(nonmatching_pos+1), I(text.length));
+ ret = Text$concat(ret, last_slice);
+ }
+ return ret;
+}
+
+public Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right)
+{
+ int64_t first = 0, last = text.length-1;
+ if (trim_left) {
+ int64_t match_len = match(text, 0, pattern, 0, NULL, 0);
+ if (match_len > 0)
+ first = match_len;
+ }
+
+ if (trim_right) {
+ for (int64_t i = text.length-1; i >= first; i--) {
+ int64_t match_len = match(text, i, pattern, 0, NULL, 0);
+ if (match_len > 0 && i + match_len == text.length)
+ last = i-1;
+ }
+ }
+ return Text$slice(text, I(first+1), I(last+1));
+}
+
+public Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn, bool recursive)
+{
+ Text_t ret = EMPTY_TEXT;
+
+ int32_t first_grapheme = Text$get_grapheme(pattern, 0);
+ bool find_first = (first_grapheme != '{'
+ && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
+ && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
+
+ TextIter_t text_state = NEW_TEXT_ITER_STATE(text);
+ int64_t nonmatching_pos = 0;
+
+ Text_t (*text_mapper)(Match_t, void*) = fn.fn;
+ for (int64_t pos = 0; pos < text.length; pos++) {
+ // Optimization: quickly skip ahead to first char in pattern:
+ if (find_first) {
+ while (pos < text.length && Text$get_grapheme_fast(&text_state, pos) != first_grapheme)
+ ++pos;
+ }
+
+ capture_t captures[MAX_BACKREFS] = {};
+ int64_t match_len = match(text, pos, pattern, 0, captures, 0);
+ if (match_len < 0) continue;
+
+ Match_t m = {
+ .text=Text$slice(text, I(pos+1), I(pos+match_len)),
+ .index=I(pos+1),
+ .captures={},
+ };
+ for (int i = 0; captures[i].occupied; i++) {
+ Text_t capture = Text$slice(text, I(captures[i].index+1), I(captures[i].index+captures[i].length));
+ if (recursive)
+ capture = Text$map(capture, pattern, fn, recursive);
+ Array$insert(&m.captures, &capture, I(0), sizeof(Text_t));
+ }
+
+ Text_t replacement = text_mapper(m, fn.userdata);
+ if (pos > nonmatching_pos) {
+ Text_t before_slice = Text$slice(text, I(nonmatching_pos+1), I(pos));
+ ret = Text$concat(ret, before_slice, replacement);
+ } else {
+ ret = Text$concat(ret, replacement);
+ }
+ nonmatching_pos = pos + match_len;
+ pos += (match_len - 1);
+ }
+ if (nonmatching_pos < text.length) {
+ Text_t last_slice = Text$slice(text, I(nonmatching_pos+1), I(text.length));
+ ret = Text$concat(ret, last_slice);
+ }
+ return ret;
+}
+
+public void Text$each(Text_t text, Pattern_t pattern, Closure_t fn, bool recursive)
+{
+ int32_t first_grapheme = Text$get_grapheme(pattern, 0);
+ bool find_first = (first_grapheme != '{'
+ && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
+ && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
+
+ TextIter_t text_state = NEW_TEXT_ITER_STATE(text);
+ void (*action)(Match_t, void*) = fn.fn;
+ for (int64_t pos = 0; pos < text.length; pos++) {
+ // Optimization: quickly skip ahead to first char in pattern:
+ if (find_first) {
+ while (pos < text.length && Text$get_grapheme_fast(&text_state, pos) != first_grapheme)
+ ++pos;
+ }
+
+ capture_t captures[MAX_BACKREFS] = {};
+ int64_t match_len = match(text, pos, pattern, 0, captures, 0);
+ if (match_len < 0) continue;
+
+ Match_t m = {
+ .text=Text$slice(text, I(pos+1), I(pos+match_len)),
+ .index=I(pos+1),
+ .captures={},
+ };
+ for (int i = 0; captures[i].occupied; i++) {
+ Text_t capture = Text$slice(text, I(captures[i].index+1), I(captures[i].index+captures[i].length));
+ if (recursive)
+ Text$each(capture, pattern, fn, recursive);
+ Array$insert(&m.captures, &capture, I(0), sizeof(Text_t));
+ }
+
+ action(m, fn.userdata);
+ pos += (match_len - 1);
+ }
+}
+
+Text_t Text$replace_array(Text_t text, Array_t replacements, Text_t backref_pat, bool recursive)
+{
+ if (replacements.length == 0) return text;
+
+ Text_t ret = EMPTY_TEXT;
+
+ int64_t nonmatch_pos = 0;
+ for (int64_t pos = 0; pos < text.length; ) {
+ // Find the first matching pattern at this position:
+ for (int64_t i = 0; i < replacements.length; i++) {
+ Pattern_t pattern = *(Pattern_t*)(replacements.data + i*replacements.stride);
+ capture_t captures[MAX_BACKREFS] = {};
+ int64_t len = match(text, pos, pattern, 0, captures, 1);
+ if (len < 0) continue;
+ captures[0].index = pos;
+ captures[0].length = len;
+
+ // If we skipped over some non-matching text before finding a match, insert it here:
+ if (pos > nonmatch_pos) {
+ Text_t before_slice = Text$slice(text, I(nonmatch_pos+1), I(pos));
+ ret = Text$concat(ret, before_slice);
+ }
+
+ // Concatenate the replacement:
+ Text_t replacement = *(Text_t*)(replacements.data + i*replacements.stride + sizeof(Text_t));
+ Text_t replacement_text = apply_backrefs(text, recursive ? replacements : (Array_t){}, replacement, backref_pat, captures);
+ ret = Text$concat(ret, replacement_text);
+ pos += MAX(len, 1);
+ nonmatch_pos = pos;
+ goto next_pos;
+ }
+
+ pos += 1;
+ next_pos:
+ continue;
+ }
+
+ if (nonmatch_pos <= text.length) {
+ Text_t last_slice = Text$slice(text, I(nonmatch_pos+1), I(text.length));
+ ret = Text$concat(ret, last_slice);
+ }
+ return ret;
+}
+
+public Text_t Text$replace_all(Text_t text, Table_t replacements, Text_t backref_pat, bool recursive)
+{
+ return Text$replace_array(text, replacements.entries, backref_pat, recursive);
+}
+
+public Array_t Text$split(Text_t text, Pattern_t pattern)
+{
+ if (text.length == 0) // special case
+ return (Array_t){.length=0};
+
+ if (pattern.length == 0) // special case
+ return Text$clusters(text);
+
+ Array_t chunks = {};
+
+ int64_t i = 0;
+ for (;;) {
+ int64_t len = 0;
+ int64_t found = _find(text, pattern, i, text.length-1, &len, NULL);
+ if (found == i && len == 0)
+ found = _find(text, pattern, i + 1, text.length-1, &len, NULL);
+ if (found < 0) break;
+ Text_t chunk = Text$slice(text, I(i+1), I(found));
+ Array$insert(&chunks, &chunk, I_small(0), sizeof(Text_t));
+ i = MAX(found + len, i + 1);
+ }
+
+ Text_t last_chunk = Text$slice(text, I(i+1), I(text.length));
+ Array$insert(&chunks, &last_chunk, I_small(0), sizeof(Text_t));
+
+ return chunks;
+}
+
+typedef struct {
+ TextIter_t state;
+ int64_t i;
+ Pattern_t pattern;
+} split_iter_state_t;
+
+static OptionalText_t next_split(split_iter_state_t *state)
+{
+ Text_t text = state->state.stack[0].text;
+ if (state->i >= text.length) {
+ if (state->pattern.length > 0 && state->i == text.length) { // special case
+ state->i = text.length + 1;
+ return EMPTY_TEXT;
+ }
+ return NONE_TEXT;
+ }
+
+ if (state->pattern.length == 0) { // special case
+ Text_t ret = Text$cluster(text, I(state->i+1));
+ state->i += 1;
+ return ret;
+ }
+
+ int64_t start = state->i;
+ int64_t len = 0;
+ int64_t found = _find(text, state->pattern, start, text.length-1, &len, NULL);
+
+ if (found == start && len == 0)
+ found = _find(text, state->pattern, start + 1, text.length-1, &len, NULL);
+
+ if (found >= 0) {
+ state->i = MAX(found + len, state->i + 1);
+ return Text$slice(text, I(start+1), I(found));
+ } else {
+ state->i = state->state.stack[0].text.length + 1;
+ return Text$slice(text, I(start+1), I(text.length));
+ }
+}
+
+public Closure_t Text$by_split(Text_t text, Pattern_t pattern)
+{
+ return (Closure_t){
+ .fn=(void*)next_split,
+ .userdata=new(split_iter_state_t, .state=NEW_TEXT_ITER_STATE(text), .i=0, .pattern=pattern),
+ };
+}
+
+public const TypeInfo_t Pattern$info = {
+ .size=sizeof(Pattern_t),
+ .align=__alignof__(Pattern_t),
+ .tag=TextInfo,
+ .TextInfo={.lang="Pattern"},
+ .metamethods=Text$metamethods,
+};
+
+static const TypeInfo_t _text_array = {
+ .size=sizeof(Array_t),
+ .align=__alignof__(Array_t),
+ .tag=ArrayInfo,
+ .ArrayInfo.item=&Text$info,
+ .metamethods=Array$metamethods,
+};
+
+static NamedType_t _match_fields[3] = {
+ {"text", &Text$info},
+ {"index", &Int$info},
+ {"captures", &_text_array},
+};
+
+static bool Match$is_none(const void *m, const TypeInfo_t*)
+{
+ return ((OptionalMatch_t*)m)->index.small == 0;
+}
+
+public const TypeInfo_t Match$info = {
+ .size=sizeof(Match_t),
+ .align=__alignof__(Match_t),
+ .tag=StructInfo,
+ .StructInfo={
+ .name="Match",
+ .num_fields=3,
+ .fields=_match_fields,
+ },
+ .metamethods={
+ .as_text=Struct$as_text,
+ .hash=Struct$hash,
+ .compare=Struct$compare,
+ .equal=Struct$equal,
+ .is_none=Match$is_none,
+ },
+};
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/patterns.h b/src/stdlib/patterns.h
new file mode 100644
index 00000000..53db0978
--- /dev/null
+++ b/src/stdlib/patterns.h
@@ -0,0 +1,47 @@
+#pragma once
+
+// The type representing text patterns for pattern matching.
+
+#include <stdbool.h>
+#include <printf.h>
+#include <stdint.h>
+
+#include "datatypes.h"
+#include "integers.h"
+#include "optionals.h"
+#include "types.h"
+
+#define Pattern(text) ((Pattern_t)Text(text))
+#define Patterns(...) ((Pattern_t)Texts(__VA_ARGS__))
+
+typedef struct {
+ Text_t text;
+ Int_t index;
+ Array_t captures;
+} Match_t;
+
+typedef Match_t OptionalMatch_t;
+#define NONE_MATCH ((OptionalMatch_t){.index=NONE_INT})
+
+Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement, Pattern_t backref_pat, bool recursive);
+Pattern_t Pattern$escape_text(Text_t text);
+Text_t Text$replace_all(Text_t text, Table_t replacements, Pattern_t backref_pat, bool recursive);
+Array_t Text$split(Text_t text, Pattern_t pattern);
+Closure_t Text$by_split(Text_t text, Pattern_t pattern);
+Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right);
+OptionalMatch_t Text$find(Text_t text, Pattern_t pattern, Int_t i);
+Array_t Text$find_all(Text_t text, Pattern_t pattern);
+Closure_t Text$by_match(Text_t text, Pattern_t pattern);
+PUREFUNC bool Text$has(Text_t text, Pattern_t pattern);
+OptionalArray_t Text$matches(Text_t text, Pattern_t pattern);
+Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn, bool recursive);
+void Text$each(Text_t text, Pattern_t pattern, Closure_t fn, bool recursive);
+
+#define Pattern$hash Text$hash
+#define Pattern$compare Text$compare
+#define Pattern$equal Text$equal
+
+extern const TypeInfo_t Match$info;
+extern const TypeInfo_t Pattern$info;
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/pointers.c b/src/stdlib/pointers.c
new file mode 100644
index 00000000..76e882ec
--- /dev/null
+++ b/src/stdlib/pointers.c
@@ -0,0 +1,123 @@
+// Type infos and methods for Pointer types
+#include <ctype.h>
+#include <err.h>
+#include <gc.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/param.h>
+
+#include "integers.h"
+#include "metamethods.h"
+#include "tables.h"
+#include "text.h"
+#include "types.h"
+#include "util.h"
+
+public Text_t Pointer$as_text(const void *x, bool colorize, const TypeInfo_t *type) {
+ auto ptr_info = type->PointerInfo;
+ if (!x) {
+ Text_t typename = generic_as_text(NULL, false, ptr_info.pointed);
+ if (colorize)
+ return Text$concat(Text("\x1b[34;1m"), Text$from_str(ptr_info.sigil), typename, Text("\x1b[m"));
+ else
+ return Text$concat(Text$from_str(ptr_info.sigil), typename);
+ }
+ const void *ptr = *(const void**)x;
+ if (!ptr) {
+ Text_t typename = generic_as_text(NULL, false, ptr_info.pointed);
+ if (colorize)
+ return Text$concat(Text("\x1b[34;1m!"), typename, Text("\x1b[m"));
+ else
+ return Text$concat(Text("!"), typename);
+ }
+
+ static const void *root = NULL;
+ static Table_t pending = {};
+ bool top_level = (root == NULL);
+
+ // Check for recursive references, so if `x.foo = x`, then it prints as
+ // `@Foo{foo=@~1}` instead of overflowing the stack:
+ if (top_level) {
+ root = ptr;
+ } else if (ptr == root) {
+ return Text$format(colorize ? "\x1b[34;1m%s~1\x1b[m" : "%s~1", ptr_info.sigil);
+ } else {
+ TypeInfo_t rec_table = *Table$info(type, &Int64$info);
+ int64_t *id = Table$get(pending, x, &rec_table);
+ if (id)
+ return Text$format(colorize ? "\x1b[34;1m%s~%ld\x1b[m" : "%s~%ld", ptr_info.sigil, *id);
+ int64_t next_id = pending.entries.length + 2;
+ Table$set(&pending, x, &next_id, &rec_table);
+ }
+
+ Text_t pointed = generic_as_text(ptr, colorize, ptr_info.pointed);
+
+ if (top_level) {
+ pending = (Table_t){}; // Restore
+ root = NULL;
+ }
+
+ Text_t text;
+ if (colorize)
+ text = Text$concat(Text("\x1b[34;1m"), Text$from_str(ptr_info.sigil), Text("\x1b[m"), pointed);
+ else
+ text = Text$concat(Text$from_str(ptr_info.sigil), pointed);
+ return text;
+}
+
+PUREFUNC public int32_t Pointer$compare(const void *x, const void *y, const TypeInfo_t*) {
+ const void *xp = *(const void**)x, *yp = *(const void**)y;
+ return (xp > yp) - (xp < yp);
+}
+
+PUREFUNC public bool Pointer$equal(const void *x, const void *y, const TypeInfo_t*) {
+ const void *xp = *(const void**)x, *yp = *(const void**)y;
+ return xp == yp;
+}
+
+PUREFUNC public bool Pointer$is_none(const void *x, const TypeInfo_t*)
+{
+ return *(void**)x == NULL;
+}
+
+public void Pointer$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *type)
+{
+ void *ptr = *(void**)obj;
+ assert(ptr != NULL);
+
+ const TypeInfo_t ptr_to_int_table = {.size=sizeof(Table_t), .align=__alignof__(Table_t),
+ .tag=TableInfo, .TableInfo.key=type, .TableInfo.value=&Int64$info};
+
+ int64_t *id_ptr = Table$get(*pointers, &ptr, &ptr_to_int_table);
+ int64_t id;
+ if (id_ptr) {
+ id = *id_ptr;
+ } else {
+ id = pointers->entries.length + 1;
+ Table$set(pointers, &ptr, &id, &ptr_to_int_table);
+ }
+
+ Int64$serialize(&id, out, pointers, &Int64$info);
+
+ if (!id_ptr)
+ _serialize(ptr, out, pointers, type->PointerInfo.pointed);
+}
+
+public void Pointer$deserialize(FILE *in, void *outval, Array_t *pointers, const TypeInfo_t *type)
+{
+ int64_t id = 0;
+ Int64$deserialize(in, &id, pointers, &Int64$info);
+ assert(id != 0);
+
+ if (id > pointers->length) {
+ void *obj = GC_MALLOC((size_t)type->PointerInfo.pointed->size);
+ Array$insert(pointers, &obj, I(0), sizeof(void*));
+ _deserialize(in, obj, pointers, type->PointerInfo.pointed);
+ *(void**)outval = obj;
+ } else {
+ *(void**)outval = *(void**)(pointers->data + (id-1)*pointers->stride);
+ }
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/pointers.h b/src/stdlib/pointers.h
new file mode 100644
index 00000000..165a5184
--- /dev/null
+++ b/src/stdlib/pointers.h
@@ -0,0 +1,36 @@
+#pragma once
+
+// Type infos and methods for Pointer types
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "datatypes.h"
+#include "types.h"
+#include "util.h"
+
+Text_t Pointer$as_text(const void *x, bool colorize, const TypeInfo_t *type);
+PUREFUNC int32_t Pointer$compare(const void *x, const void *y, const TypeInfo_t *type);
+PUREFUNC bool Pointer$equal(const void *x, const void *y, const TypeInfo_t *type);
+PUREFUNC bool Pointer$is_none(const void *x, const TypeInfo_t*);
+void Pointer$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *type);
+void Pointer$deserialize(FILE *in, void *outval, Array_t *pointers, const TypeInfo_t *type);
+
+#define Null(t) (t*)NULL
+#define POINTER_TYPE(_sigil, _pointed) (&(TypeInfo_t){\
+ .size=sizeof(void*), .align=alignof(void*), .tag=PointerInfo, .PointerInfo.sigil=_sigil, .PointerInfo.pointed=_pointed})
+
+#define Pointer$metamethods { \
+ .as_text=Pointer$as_text, \
+ .compare=Pointer$compare, \
+ .equal=Pointer$equal, \
+ .is_none=Pointer$is_none, \
+ .serialize=Pointer$serialize, \
+ .deserialize=Pointer$deserialize, \
+}
+
+#define Pointer$info(sigil_expr, pointed_info) &((TypeInfo_t){.size=sizeof(void*), .align=__alignof__(void*), \
+ .tag=PointerInfo, .PointerInfo={.sigil=sigil_expr, .pointed=pointed_info}, \
+ .metamethods=Pointer$metamethods})
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/rng.c b/src/stdlib/rng.c
new file mode 100644
index 00000000..07b2f36c
--- /dev/null
+++ b/src/stdlib/rng.c
@@ -0,0 +1,268 @@
+// Random Number Generator (RNG) implementation based on ChaCha
+
+#include <ctype.h>
+#include <err.h>
+#include <gc.h>
+#include <gmp.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/param.h>
+
+#include "arrays.h"
+#include "datatypes.h"
+#include "rng.h"
+#include "text.h"
+#include "util.h"
+
+#include "chacha.h"
+
+struct RNGState_t {
+ chacha_ctx chacha;
+ size_t unused_bytes;
+ uint8_t random_bytes[1024];
+};
+
+public _Thread_local RNG_t default_rng = (struct RNGState_t[1]){};
+
+PUREFUNC static Text_t RNG$as_text(const void *rng, bool colorize, const TypeInfo_t*)
+{
+ if (!rng) return Text("RNG");
+ return Text$format(colorize ? "\x1b[34;1mRNG(%p)\x1b[m" : "RNG(%p)", *(RNG_t**)rng);
+}
+
+#define KEYSZ 32
+#define IVSZ 8
+
+public void RNG$set_seed(RNG_t rng, Array_t seed)
+{
+ uint8_t seed_bytes[KEYSZ + IVSZ] = {};
+ for (int64_t i = 0; i < (int64_t)sizeof(seed_bytes); i++)
+ seed_bytes[i] = i < seed.length ? *(uint8_t*)(seed.data + i*seed.stride) : 0;
+
+ rng->unused_bytes = 0;
+ chacha_keysetup(&rng->chacha, seed_bytes, KEYSZ/8);
+ chacha_ivsetup(&rng->chacha, seed_bytes + KEYSZ);
+}
+
+public RNG_t RNG$copy(RNG_t rng)
+{
+ RNG_t copy = GC_MALLOC_ATOMIC(sizeof(struct RNGState_t));
+ *copy = *rng;
+ return copy;
+}
+
+public RNG_t RNG$new(Array_t seed)
+{
+ RNG_t rng = GC_MALLOC_ATOMIC(sizeof(struct RNGState_t));
+ RNG$set_seed(rng, seed);
+ return rng;
+}
+
+static void rekey(RNG_t rng)
+{
+ // Fill the buffer with the keystream
+ chacha_encrypt_bytes(&rng->chacha, rng->random_bytes, rng->random_bytes, sizeof(rng->random_bytes));
+ // Immediately reinitialize for backtracking resistance
+ chacha_keysetup(&rng->chacha, rng->random_bytes, KEYSZ/8);
+ chacha_ivsetup(&rng->chacha, rng->random_bytes + KEYSZ);
+ explicit_bzero(rng->random_bytes, KEYSZ + IVSZ);
+ rng->unused_bytes = sizeof(rng->random_bytes) - KEYSZ - IVSZ;
+ assert(rng->unused_bytes <= sizeof(rng->random_bytes));
+}
+
+static void random_bytes(RNG_t rng, uint8_t *dest, size_t needed)
+{
+ while (needed > 0) {
+ assert(rng->unused_bytes <= sizeof(rng->random_bytes));
+ if (rng->unused_bytes == 0)
+ rekey(rng);
+
+ size_t batch_size = MIN(needed, rng->unused_bytes);
+ uint8_t *batch_src = rng->random_bytes + sizeof(rng->random_bytes) - rng->unused_bytes;
+ memcpy(dest, batch_src, batch_size);
+ memset(batch_src, 0, batch_size);
+ rng->unused_bytes -= batch_size;
+ dest += batch_size;
+ needed -= batch_size;
+ assert(rng->unused_bytes <= sizeof(rng->random_bytes));
+ }
+}
+
+public Bool_t RNG$bool(RNG_t rng, Num_t p)
+{
+ if (p == 0.5) {
+ uint8_t b;
+ random_bytes(rng, &b, sizeof(b));
+ return b & 1;
+ } else {
+ return RNG$num(rng, 0.0, 1.0) < p;
+ }
+}
+
+public Int_t RNG$int(RNG_t rng, Int_t min, Int_t max)
+{
+ if (likely(((min.small & max.small) & 1) != 0)) {
+ int32_t r = RNG$int32(rng, (int32_t)(min.small >> 2), (int32_t)(max.small >> 2));
+ return I_small(r);
+ }
+
+ int32_t cmp = Int$compare_value(min, max);
+ if (cmp > 0) {
+ Text_t min_text = Int$as_text(&min, false, &Int$info), max_text = Int$as_text(&max, false, &Int$info);
+ fail("Random minimum value (%k) is larger than the maximum value (%k)",
+ &min_text, &max_text);
+ }
+ if (cmp == 0) return min;
+
+ mpz_t range_size;
+ mpz_init_set_int(range_size, max);
+ if (min.small & 1) {
+ mpz_t min_mpz;
+ mpz_init_set_si(min_mpz, min.small >> 2);
+ mpz_sub(range_size, range_size, min_mpz);
+ } else {
+ mpz_sub(range_size, range_size, *min.big);
+ }
+
+ gmp_randstate_t gmp_rng;
+ gmp_randinit_default(gmp_rng);
+ gmp_randseed_ui(gmp_rng, (unsigned long)RNG$int64(rng, INT64_MIN, INT64_MAX));
+
+ mpz_t r;
+ mpz_init(r);
+ mpz_urandomm(r, gmp_rng, range_size);
+
+ gmp_randclear(gmp_rng);
+ return Int$plus(min, Int$from_mpz(r));
+}
+
+public Int64_t RNG$int64(RNG_t rng, Int64_t min, Int64_t max)
+{
+ if (min > max) fail("Random minimum value (%ld) is larger than the maximum value (%ld)", min, max);
+ if (min == max) return min;
+ if (min == INT64_MIN && max == INT64_MAX) {
+ int64_t r;
+ random_bytes(rng, (uint8_t*)&r, sizeof(r));
+ return r;
+ }
+ uint64_t range = (uint64_t)max - (uint64_t)min + 1;
+ uint64_t min_r = -range % range;
+ uint64_t r;
+ for (;;) {
+ random_bytes(rng, (uint8_t*)&r, sizeof(r));
+ if (r >= min_r) break;
+ }
+ return (int64_t)((uint64_t)min + (r % range));
+}
+
+public Int32_t RNG$int32(RNG_t rng, Int32_t min, Int32_t max)
+{
+ if (min > max) fail("Random minimum value (%d) is larger than the maximum value (%d)", min, max);
+ if (min == max) return min;
+ if (min == INT32_MIN && max == INT32_MAX) {
+ int32_t r;
+ random_bytes(rng, (uint8_t*)&r, sizeof(r));
+ return r;
+ }
+ uint32_t range = (uint32_t)max - (uint32_t)min + 1;
+ uint32_t min_r = -range % range;
+ uint32_t r;
+ for (;;) {
+ random_bytes(rng, (uint8_t*)&r, sizeof(r));
+ if (r >= min_r) break;
+ }
+ return (int32_t)((uint32_t)min + (r % range));
+}
+
+public Int16_t RNG$int16(RNG_t rng, Int16_t min, Int16_t max)
+{
+ if (min > max) fail("Random minimum value (%d) is larger than the maximum value (%d)", min, max);
+ if (min == max) return min;
+ if (min == INT16_MIN && max == INT16_MAX) {
+ int16_t r;
+ random_bytes(rng, (uint8_t*)&r, sizeof(r));
+ return r;
+ }
+ uint16_t range = (uint16_t)max - (uint16_t)min + 1;
+ uint16_t min_r = -range % range;
+ uint16_t r;
+ for (;;) {
+ random_bytes(rng, (uint8_t*)&r, sizeof(r));
+ if (r >= min_r) break;
+ }
+ return (int16_t)((uint16_t)min + (r % range));
+}
+
+public Int8_t RNG$int8(RNG_t rng, Int8_t min, Int8_t max)
+{
+ if (min > max) fail("Random minimum value (%d) is larger than the maximum value (%d)", min, max);
+ if (min == max) return min;
+ if (min == INT8_MIN && max == INT8_MAX) {
+ int8_t r;
+ random_bytes(rng, (uint8_t*)&r, sizeof(r));
+ return r;
+ }
+ uint8_t range = (uint8_t)max - (uint8_t)min + 1;
+ uint8_t min_r = -range % range;
+ uint8_t r;
+ for (;;) {
+ random_bytes(rng, (uint8_t*)&r, sizeof(r));
+ if (r >= min_r) break;
+ }
+ return (int8_t)((uint8_t)min + (r % range));
+}
+
+public Num_t RNG$num(RNG_t rng, Num_t min, Num_t max)
+{
+ if (min > max) fail("Random minimum value (%g) is larger than the maximum value (%g)", min, max);
+ if (min == max) return min;
+
+ union {
+ Num_t num;
+ uint64_t bits;
+ } r = {.bits=0}, one = {.num=1.0};
+ random_bytes(rng, (void*)&r, sizeof(r));
+
+ // Set r.num to 1.<random-bits>
+ r.bits &= ~(0xFFFULL << 52);
+ r.bits |= (one.bits & (0xFFFULL << 52));
+
+ r.num -= 1.0;
+
+ if (min == 0.0 && max == 1.0)
+ return r.num;
+
+ return (1.0-r.num)*min + r.num*max;
+}
+
+public Num32_t RNG$num32(RNG_t rng, Num32_t min, Num32_t max)
+{
+ return (Num32_t)RNG$num(rng, (Num_t)min, (Num_t)max);
+}
+
+public Byte_t RNG$byte(RNG_t rng)
+{
+ Byte_t b;
+ random_bytes(rng, &b, sizeof(b));
+ return b;
+}
+
+public Array_t RNG$bytes(RNG_t rng, Int_t count)
+{
+ int64_t n = Int64$from_int(count, false);
+ Byte_t *r = GC_MALLOC_ATOMIC(sizeof(Byte_t[n]));
+ random_bytes(rng, r, sizeof(Byte_t[n]));
+ return (Array_t){.data=r, .length=n, .stride=1, .atomic=1};
+}
+
+public const TypeInfo_t RNG$info = {
+ .size=sizeof(void*),
+ .align=__alignof__(void*),
+ .metamethods={
+ .as_text=RNG$as_text,
+ },
+};
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/rng.h b/src/stdlib/rng.h
new file mode 100644
index 00000000..5bc4794f
--- /dev/null
+++ b/src/stdlib/rng.h
@@ -0,0 +1,31 @@
+#pragma once
+
+// Random Number Generator (RNG) functions/type info
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "datatypes.h"
+#include "types.h"
+#include "bools.h"
+#include "bytes.h"
+#include "util.h"
+
+RNG_t RNG$new(Array_t seed);
+void RNG$set_seed(RNG_t rng, Array_t seed);
+RNG_t RNG$copy(RNG_t rng);
+Bool_t RNG$bool(RNG_t rng, Num_t p);
+Int_t RNG$int(RNG_t rng, Int_t min, Int_t max);
+Int64_t RNG$int64(RNG_t rng, Int64_t min, Int64_t max);
+Int32_t RNG$int32(RNG_t rng, Int32_t min, Int32_t max);
+Int16_t RNG$int16(RNG_t rng, Int16_t min, Int16_t max);
+Int8_t RNG$int8(RNG_t rng, Int8_t min, Int8_t max);
+Byte_t RNG$byte(RNG_t rng);
+Array_t RNG$bytes(RNG_t rng, Int_t count);
+Num_t RNG$num(RNG_t rng, Num_t min, Num_t max);
+Num32_t RNG$num32(RNG_t rng, Num32_t min, Num32_t max);
+
+extern const TypeInfo_t RNG$info;
+extern _Thread_local RNG_t default_rng;
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/siphash-internals.h b/src/stdlib/siphash-internals.h
new file mode 100644
index 00000000..b359cea7
--- /dev/null
+++ b/src/stdlib/siphash-internals.h
@@ -0,0 +1,126 @@
+#pragma once
+
+// This file holds the internals for the SipHash implementation. For a few
+// cases, we want to include this for incrementally computing hashes.
+// Otherwise, it suffices to just use the siphash24() function from siphash.h
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "siphash.h"
+
+/* <MIT License>
+ Copyright (c) 2013 Marek Majkowski <marek@popcount.org>
+ Copyright (c) 2018 Samantha McVey <samantham@posteo.net>
+ Copyright (c) 2024 Bruce Hill <bruce@bruce-hill.com>
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+ </MIT License>
+
+ Original location:
+ https://github.com/majek/csiphash/
+
+ Original solution inspired by code from:
+ Samuel Neves (supercop/crypto_auth/siphash24/little)
+ djb (supercop/crypto_auth/siphash24/little2)
+ Jean-Philippe Aumasson (https://131002.net/siphash/siphash24.c)
+
+ Extensive modifications for MoarVM by Samantha McVey
+
+ Further modifications for Tomo by Bruce Hill
+*/
+struct siphash {
+ uint64_t v0;
+ uint64_t v1;
+ uint64_t v2;
+ uint64_t v3;
+ uint64_t b;
+};
+typedef struct siphash siphash;
+#define ROTATE(x, b) (uint64_t)( ((x) << (b)) | ( (x) >> (64 - (b))) )
+
+#define HALF_ROUND(a,b,c,d,s,t) \
+ a += b; c += d; \
+ b = ROTATE(b, s) ^ a; \
+ d = ROTATE(d, t) ^ c; \
+ a = ROTATE(a, 32);
+
+#define DOUBLE_ROUND(v0,v1,v2,v3) \
+ HALF_ROUND(v0,v1,v2,v3,13,16); \
+ HALF_ROUND(v2,v1,v0,v3,17,21); \
+ HALF_ROUND(v0,v1,v2,v3,13,16); \
+ HALF_ROUND(v2,v1,v0,v3,17,21);
+
+MACROLIKE void siphashinit (siphash *sh, size_t src_sz) {
+ const uint64_t k0 = TOMO_HASH_KEY[0];
+ const uint64_t k1 = TOMO_HASH_KEY[1];
+ sh->b = (uint64_t)src_sz << 56;
+ sh->v0 = k0 ^ 0x736f6d6570736575ULL;
+ sh->v1 = k1 ^ 0x646f72616e646f6dULL;
+ sh->v2 = k0 ^ 0x6c7967656e657261ULL;
+ sh->v3 = k1 ^ 0x7465646279746573ULL;
+}
+MACROLIKE void siphashadd64bits (siphash *sh, const uint64_t in) {
+ const uint64_t mi = in;
+ sh->v3 ^= mi;
+ DOUBLE_ROUND(sh->v0,sh->v1,sh->v2,sh->v3);
+ sh->v0 ^= mi;
+}
+MACROLIKE uint64_t siphashfinish_last_part (siphash *sh, uint64_t t) {
+ sh->b |= t;
+ sh->v3 ^= sh->b;
+ DOUBLE_ROUND(sh->v0,sh->v1,sh->v2,sh->v3);
+ sh->v0 ^= sh->b;
+ sh->v2 ^= 0xff;
+ DOUBLE_ROUND(sh->v0,sh->v1,sh->v2,sh->v3);
+ DOUBLE_ROUND(sh->v0,sh->v1,sh->v2,sh->v3);
+ return (sh->v0 ^ sh->v1) ^ (sh->v2 ^ sh->v3);
+}
+/* This union helps us avoid doing weird things with pointers that can cause old
+ * compilers like GCC 4 to generate bad code. In addition it is nicely more C
+ * standards compliant to keep type punning to a minimum. */
+union SipHash64_union {
+ uint64_t u64;
+ uint32_t u32;
+ uint8_t u8[8];
+};
+MACROLIKE uint64_t siphashfinish (siphash *sh, const uint8_t *src, size_t src_sz) {
+ union SipHash64_union t = { 0 };
+ switch (src_sz) {
+ /* Falls through */
+ case 7: t.u8[6] = src[6];
+ /* Falls through */
+ case 6: t.u8[5] = src[5];
+ /* Falls through */
+ case 5: t.u8[4] = src[4];
+ /* Falls through */
+ case 4: t.u8[3] = src[3];
+ /* Falls through */
+ case 3: t.u8[2] = src[2];
+ /* Falls through */
+ case 2: t.u8[1] = src[1];
+ /* Falls through */
+ case 1: t.u8[0] = src[0];
+ default: break;
+ }
+ return siphashfinish_last_part(sh, t.u64);
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/siphash.c b/src/stdlib/siphash.c
new file mode 100644
index 00000000..44e8b6eb
--- /dev/null
+++ b/src/stdlib/siphash.c
@@ -0,0 +1,79 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "siphash.h"
+#include "util.h"
+
+public uint64_t TOMO_HASH_KEY[2] = {23, 42}; // Randomized in tomo_init()
+
+/* <MIT License>
+ Copyright (c) 2013 Marek Majkowski <marek@popcount.org>
+ Copyright (c) 2018 Samantha McVey <samantham@posteo.net>
+ Copyright (c) 2024 Bruce Hill <bruce@bruce-hill.com>
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+ </MIT License>
+
+ Original location:
+ https://github.com/majek/csiphash/
+
+ Original solution inspired by code from:
+ Samuel Neves (supercop/crypto_auth/siphash24/little)
+ djb (supercop/crypto_auth/siphash24/little2)
+ Jean-Philippe Aumasson (https://131002.net/siphash/siphash24.c)
+
+ Extensive modifications for MoarVM by Samantha McVey
+
+ Further modifications for Tomo by Bruce Hill
+*/
+
+#include "siphash-internals.h"
+
+PUREFUNC public uint64_t siphash24(const uint8_t *src, size_t src_sz) {
+ siphash sh;
+ if ((uint64_t)src % __alignof__(uint64_t) == 0) {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-align"
+ const uint64_t *in = (uint64_t*)src;
+ /* Find largest src_sz evenly divisible by 8 bytes. */
+ const ptrdiff_t src_sz_nearest_8bits = ((ptrdiff_t)src_sz >> 3) << 3;
+ const uint64_t *goal = (uint64_t*)(src + src_sz_nearest_8bits);
+#pragma GCC diagnostic pop
+ siphashinit(&sh, src_sz);
+ src_sz -= (size_t)src_sz_nearest_8bits;
+ while (in < goal) {
+ siphashadd64bits(&sh, *in);
+ in++;
+ }
+ return siphashfinish(&sh, (uint8_t *)in, src_sz);
+ } else {
+ const uint8_t *in = src;
+ siphashinit(&sh, src_sz);
+ while (src_sz >= 8) {
+ uint64_t in_64;
+ memcpy(&in_64, in, sizeof(uint64_t));
+ siphashadd64bits(&sh, in_64);
+ in += 8; src_sz -= 8;
+ }
+ return siphashfinish(&sh, (uint8_t *)in, src_sz);
+ }
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/siphash.h b/src/stdlib/siphash.h
new file mode 100644
index 00000000..67bad582
--- /dev/null
+++ b/src/stdlib/siphash.h
@@ -0,0 +1,15 @@
+#pragma once
+
+// An implementation of the SipHash algorithm.
+
+#include <stdint.h>
+#include <stddef.h>
+
+#include "util.h"
+
+// This value will be randomized on startup in tomo_init():
+extern uint64_t TOMO_HASH_KEY[2];
+
+PUREFUNC uint64_t siphash24(const uint8_t *src, size_t src_sz);
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/stdlib.c b/src/stdlib/stdlib.c
new file mode 100644
index 00000000..819414fa
--- /dev/null
+++ b/src/stdlib/stdlib.c
@@ -0,0 +1,732 @@
+// Built-in functions
+
+#include <errno.h>
+#include <execinfo.h>
+#include <fcntl.h>
+#include <gc.h>
+#include <locale.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/param.h>
+#include <sys/random.h>
+#include <time.h>
+
+#include "bools.h"
+#include "files.h"
+#include "functiontype.h"
+#include "integers.h"
+#include "optionals.h"
+#include "metamethods.h"
+#include "nums.h"
+#include "patterns.h"
+#include "paths.h"
+#include "rng.h"
+#include "siphash.h"
+#include "stdlib.h"
+#include "tables.h"
+#include "text.h"
+#include "util.h"
+
+public bool USE_COLOR;
+
+static void signal_handler(int sig, siginfo_t *, void *)
+{
+ assert(sig == SIGILL);
+ fflush(stdout);
+ if (USE_COLOR) fputs("\x1b[31;7m ===== ILLEGAL INSTRUCTION ===== \n\n\x1b[m", stderr);
+ else fputs("===== ILLEGAL INSTRUCTION =====\n\n", stderr);
+ print_stack_trace(stderr, 3, 4);
+ fflush(stderr);
+ raise(SIGABRT);
+ _exit(1);
+}
+
+public void tomo_init(void)
+{
+ GC_INIT();
+ USE_COLOR = getenv("COLOR") ? strcmp(getenv("COLOR"), "1") == 0 : isatty(STDOUT_FILENO);
+ if (getenv("NO_COLOR") && getenv("NO_COLOR")[0] != '\0')
+ USE_COLOR = false;
+
+ setlocale(LC_ALL, "");
+ getrandom(TOMO_HASH_KEY, sizeof(TOMO_HASH_KEY), 0);
+
+ uint8_t *random_bytes[40] = {};
+ getrandom(random_bytes, sizeof(random_bytes), 0);
+ Array_t rng_seed = {.length=sizeof(random_bytes), .data=random_bytes, .stride=1, .atomic=1};
+ RNG$set_seed(default_rng, rng_seed);
+
+ if (register_printf_specifier('k', printf_text, printf_text_size))
+ errx(1, "Couldn't set printf specifier");
+
+ struct sigaction sigact;
+ sigact.sa_sigaction = signal_handler;
+ sigemptyset(&sigact.sa_mask);
+ sigact.sa_flags = 0;
+ sigaction(SIGILL, &sigact, (struct sigaction *)NULL);
+}
+
+static bool parse_single_arg(const TypeInfo_t *info, char *arg, void *dest)
+{
+ if (!arg) return false;
+
+ if (info->tag == OptionalInfo && streq(arg, "none"))
+ return true;
+
+ while (info->tag == OptionalInfo)
+ info = info->OptionalInfo.type;
+
+ if (info == &Int$info) {
+ OptionalInt_t parsed = Int$from_str(arg);
+ if (parsed.small != 0)
+ *(OptionalInt_t*)dest = parsed;
+ return parsed.small != 0;
+ } else if (info == &Int64$info) {
+ OptionalInt64_t parsed = Int64$parse(Text$from_str(arg));
+ if (!parsed.is_none)
+ *(OptionalInt64_t*)dest = parsed;
+ return !parsed.is_none;
+ } else if (info == &Int32$info) {
+ OptionalInt32_t parsed = Int32$parse(Text$from_str(arg));
+ if (!parsed.is_none)
+ *(OptionalInt32_t*)dest = parsed;
+ return !parsed.is_none;
+ } else if (info == &Int16$info) {
+ OptionalInt16_t parsed = Int16$parse(Text$from_str(arg));
+ if (!parsed.is_none)
+ *(OptionalInt16_t*)dest = parsed;
+ return !parsed.is_none;
+ } else if (info == &Int8$info) {
+ OptionalInt8_t parsed = Int8$parse(Text$from_str(arg));
+ if (!parsed.is_none)
+ *(OptionalInt8_t*)dest = parsed;
+ return !parsed.is_none;
+ } else if (info == &Bool$info) {
+ OptionalBool_t parsed = Bool$parse(Text$from_str(arg));
+ if (parsed != NONE_BOOL)
+ *(OptionalBool_t*)dest = parsed;
+ return parsed != NONE_BOOL;
+ } else if (info == &Num$info) {
+ OptionalNum_t parsed = Num$parse(Text$from_str(arg));
+ if (!isnan(parsed))
+ *(OptionalNum_t*)dest = parsed;
+ return !isnan(parsed);
+ } else if (info == &Num32$info) {
+ OptionalNum32_t parsed = Num32$parse(Text$from_str(arg));
+ if (!isnan(parsed))
+ *(OptionalNum32_t*)dest = parsed;
+ return !isnan(parsed);
+ } else if (info == &Path$info) {
+ *(OptionalPath_t*)dest = Path$from_str(arg);
+ return true;
+ } else if (info->tag == TextInfo) {
+ *(OptionalText_t*)dest = Text$from_str(arg);
+ return true;
+ } else if (info->tag == EnumInfo) {
+ for (int t = 0; t < info->EnumInfo.num_tags; t++) {
+ NamedType_t named = info->EnumInfo.tags[t];
+ size_t len = strlen(named.name);
+ if (strncmp(arg, named.name, len) == 0 && (arg[len] == '\0' || arg[len] == ':')) {
+ *(int32_t*)dest = (t + 1);
+
+ // Simple tag (no associated data):
+ if (!named.type || (named.type->tag == StructInfo && named.type->StructInfo.num_fields == 0))
+ return true;
+
+ // Single-argument tag:
+ if (arg[len] != ':')
+ errx(1, "Invalid value for %k.%s: %s", &t, named.name, arg);
+ size_t offset = sizeof(int32_t);
+ if (named.type->align > 0 && offset % (size_t)named.type->align > 0)
+ offset += (size_t)named.type->align - (offset % (size_t)named.type->align);
+ if (!parse_single_arg(named.type, arg + len + 1, dest + offset))
+ return false;
+ return true;
+ }
+ }
+ errx(1, "Invalid value for %s: %s", info->EnumInfo.name, arg);
+ } else if (info->tag == StructInfo) {
+ if (info->StructInfo.num_fields == 0)
+ return true;
+ else if (info->StructInfo.num_fields == 1)
+ return parse_single_arg(info->StructInfo.fields[0].type, arg, dest);
+
+ Text_t t = generic_as_text(NULL, false, info);
+ errx(1, "Unsupported multi-argument struct type for argument parsing: %k", &t);
+ } else if (info->tag == ArrayInfo) {
+ errx(1, "Array arguments must be specified as `--flag ...` not `--flag=...`");
+ } else if (info->tag == TableInfo) {
+ errx(1, "Table arguments must be specified as `--flag ...` not `--flag=...`");
+ } else {
+ Text_t t = generic_as_text(NULL, false, info);
+ errx(1, "Unsupported type for argument parsing: %k", &t);
+ }
+}
+
+static Array_t parse_array(const TypeInfo_t *item_info, int n, char *args[])
+{
+ int64_t padded_size = item_info->size;
+ if ((padded_size % item_info->align) > 0)
+ padded_size = padded_size + item_info->align - (padded_size % item_info->align);
+
+ Array_t items = {
+ .stride=padded_size,
+ .length=n,
+ .data=GC_MALLOC((size_t)(padded_size*n)),
+ };
+ for (int i = 0; i < n; i++) {
+ bool success = parse_single_arg(item_info, args[i], items.data + items.stride*i);
+ if (!success)
+ errx(1, "Couldn't parse argument: %s", args[i]);
+ }
+ return items;
+}
+
+// Arguments take the form key=value, with a guarantee that there is an '='
+static Table_t parse_table(const TypeInfo_t *table, int n, char *args[])
+{
+ const TypeInfo_t *key = table->TableInfo.key, *value = table->TableInfo.value;
+ int64_t padded_size = key->size;
+ if ((padded_size % value->align) > 0)
+ padded_size = padded_size + value->align - (padded_size % value->align);
+ int64_t value_offset = padded_size;
+ padded_size += value->size;
+ if ((padded_size % key->align) > 0)
+ padded_size = padded_size + key->align - (padded_size % key->align);
+
+ Array_t entries = {
+ .stride=padded_size,
+ .length=n,
+ .data=GC_MALLOC((size_t)(padded_size*n)),
+ };
+ for (int i = 0; i < n; i++) {
+ char *key_arg = args[i];
+ char *equals = strchr(key_arg, '=');
+ assert(equals);
+ char *value_arg = equals + 1;
+ *equals = '\0';
+
+ bool success = parse_single_arg(key, key_arg, entries.data + entries.stride*i);
+ if (!success)
+ errx(1, "Couldn't parse table key: %s", key_arg);
+
+ success = parse_single_arg(value, value_arg, entries.data + entries.stride*i + value_offset);
+ if (!success)
+ errx(1, "Couldn't parse table value: %s", value_arg);
+
+ *equals = '=';
+ }
+ return Table$from_entries(entries, table);
+}
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstack-protector"
+public void _tomo_parse_args(int argc, char *argv[], Text_t usage, Text_t help, int spec_len, cli_arg_t spec[spec_len])
+{
+ bool populated_args[spec_len];
+ bool used_args[argc];
+ memset(populated_args, 0, sizeof(populated_args));
+ memset(used_args, 0, sizeof(used_args));
+ for (int i = 1; i < argc; ) {
+ if (argv[i][0] == '-' && argv[i][1] == '-') {
+ if (argv[i][2] == '\0') { // "--" signals the rest of the arguments are literal
+ used_args[i] = true;
+ i += 1;
+ break;
+ }
+
+ for (int s = 0; s < spec_len; s++) {
+ const TypeInfo_t *non_opt_type = spec[s].type;
+ while (non_opt_type->tag == OptionalInfo)
+ non_opt_type = non_opt_type->OptionalInfo.type;
+
+ if (non_opt_type == &Bool$info
+ && strncmp(argv[i], "--no-", strlen("--no-")) == 0
+ && strcmp(argv[i] + strlen("--no-"), spec[s].name) == 0) {
+ *(OptionalBool_t*)spec[s].dest = false;
+ populated_args[s] = true;
+ used_args[i] = true;
+ goto next_arg;
+ }
+
+ if (strncmp(spec[s].name, argv[i] + 2, strlen(spec[s].name)) != 0)
+ continue;
+
+ char after_name = argv[i][2+strlen(spec[s].name)];
+ if (after_name == '\0') { // --foo val
+ used_args[i] = true;
+ if (non_opt_type->tag == ArrayInfo) {
+ int num_args = 0;
+ while (i + 1 + num_args < argc) {
+ if (argv[i+1+num_args][0] == '-')
+ break;
+ used_args[i+1+num_args] = true;
+ num_args += 1;
+ }
+ populated_args[s] = true;
+ *(OptionalArray_t*)spec[s].dest = parse_array(non_opt_type->ArrayInfo.item, num_args, &argv[i+1]);
+ } else if (non_opt_type->tag == TableInfo) {
+ int num_args = 0;
+ while (i + 1 + num_args < argc) {
+ if (argv[i+1+num_args][0] == '-' || !strchr(argv[i+1+num_args], '='))
+ break;
+ used_args[i+1+num_args] = true;
+ num_args += 1;
+ }
+ populated_args[s] = true;
+ *(OptionalTable_t*)spec[s].dest = parse_table(non_opt_type, num_args, &argv[i+1]);
+ } else if (non_opt_type == &Bool$info) { // --flag
+ populated_args[s] = true;
+ *(OptionalBool_t*)spec[s].dest = true;
+ } else {
+ if (i + 1 >= argc)
+ errx(1, "Missing argument: %s\n%k", argv[i], &usage);
+ used_args[i+1] = true;
+ populated_args[s] = parse_single_arg(spec[s].type, argv[i+1], spec[s].dest);
+ if (!populated_args[s])
+ errx(1, "Couldn't parse argument: %s %s\n%k", argv[i], argv[i+1], &usage);
+ }
+ goto next_arg;
+ } else if (after_name == '=') { // --foo=val
+ used_args[i] = true;
+ populated_args[s] = parse_single_arg(spec[s].type, 2 + argv[i] + strlen(spec[s].name) + 1, spec[s].dest);
+ if (!populated_args[s])
+ errx(1, "Couldn't parse argument: %s\n%k", argv[i], &usage);
+ goto next_arg;
+ } else {
+ continue;
+ }
+ }
+
+ if (streq(argv[i], "--help")) {
+ say(help, true);
+ exit(0);
+ }
+ errx(1, "Unrecognized argument: %s\n%k", argv[i], &usage);
+ } else if (argv[i][0] == '-' && argv[i][1] && argv[i][1] != '-') { // Single flag args
+ used_args[i] = true;
+ for (char *f = argv[i] + 1; *f; f++) {
+ for (int s = 0; s < spec_len; s++) {
+ if (spec[s].name[0] != *f || strlen(spec[s].name) > 1)
+ continue;
+
+ const TypeInfo_t *non_opt_type = spec[s].type;
+ while (non_opt_type->tag == OptionalInfo)
+ non_opt_type = non_opt_type->OptionalInfo.type;
+
+ if (non_opt_type->tag == ArrayInfo) {
+ if (f[1]) errx(1, "No value provided for -%c\n%k", *f, &usage);
+ int num_args = 0;
+ while (i + 1 + num_args < argc) {
+ if (argv[i+1+num_args][0] == '-')
+ break;
+ used_args[i+1+num_args] = true;
+ num_args += 1;
+ }
+ populated_args[s] = true;
+ *(OptionalArray_t*)spec[s].dest = parse_array(non_opt_type->ArrayInfo.item, num_args, &argv[i+1]);
+ } else if (non_opt_type->tag == TableInfo) {
+ int num_args = 0;
+ while (i + 1 + num_args < argc) {
+ if (argv[i+1+num_args][0] == '-' || !strchr(argv[i+1+num_args], '='))
+ break;
+ used_args[i+1+num_args] = true;
+ num_args += 1;
+ }
+ populated_args[s] = true;
+ *(OptionalTable_t*)spec[s].dest = parse_table(non_opt_type, num_args, &argv[i+1]);
+ } else if (non_opt_type == &Bool$info) { // -f
+ populated_args[s] = true;
+ *(OptionalBool_t*)spec[s].dest = true;
+ } else {
+ if (f[1] || i+1 >= argc) errx(1, "No value provided for -%c\n%k", *f, &usage);
+ used_args[i+1] = true;
+ populated_args[s] = parse_single_arg(spec[s].type, argv[i+1], spec[s].dest);
+ if (!populated_args[s])
+ errx(1, "Couldn't parse argument: %s %s\n%k", argv[i], argv[i+1], &usage);
+ }
+ goto next_flag;
+ }
+
+ if (*f == 'h') {
+ say(help, true);
+ exit(0);
+ }
+ errx(1, "Unrecognized flag: -%c\n%k", *f, &usage);
+ next_flag:;
+ }
+ } else {
+ // Handle positional args later
+ i += 1;
+ continue;
+ }
+
+ next_arg:
+ while (used_args[i] && i < argc)
+ i += 1;
+ }
+
+ // Get remaining positional arguments
+ bool ignore_dashes = false;
+ for (int i = 1, s = 0; i < argc; i++) {
+ if (!ignore_dashes && streq(argv[i], "--")) {
+ ignore_dashes = true;
+ continue;
+ }
+ if (used_args[i]) continue;
+
+ while (populated_args[s]) {
+ next_non_bool_flag:
+ ++s;
+ if (s >= spec_len)
+ errx(1, "Extra argument: %s\n%k", argv[i], &usage);
+ }
+
+ const TypeInfo_t *non_opt_type = spec[s].type;
+ while (non_opt_type->tag == OptionalInfo)
+ non_opt_type = non_opt_type->OptionalInfo.type;
+
+ // You can't specify boolean flags positionally
+ if (non_opt_type == &Bool$info)
+ goto next_non_bool_flag;
+
+ if (non_opt_type->tag == ArrayInfo) {
+ int num_args = 0;
+ while (i + num_args < argc) {
+ if (!ignore_dashes && argv[i+num_args][0] == '-')
+ break;
+ used_args[i+num_args] = true;
+ num_args += 1;
+ }
+ populated_args[s] = true;
+ *(OptionalArray_t*)spec[s].dest = parse_array(non_opt_type->ArrayInfo.item, num_args, &argv[i]);
+ } else if (non_opt_type->tag == TableInfo) {
+ int num_args = 0;
+ while (i + num_args < argc) {
+ if (argv[i+num_args][0] == '-' || !strchr(argv[i+num_args], '='))
+ break;
+ used_args[i+num_args] = true;
+ num_args += 1;
+ }
+ populated_args[s] = true;
+ *(OptionalTable_t*)spec[s].dest = parse_table(non_opt_type, num_args, &argv[i]);
+ } else {
+ populated_args[s] = parse_single_arg(spec[s].type, argv[i], spec[s].dest);
+ }
+
+ if (!populated_args[s])
+ errx(1, "Invalid value for %s: %s\n%k", spec[s].name, argv[i], &usage);
+ }
+
+ for (int s = 0; s < spec_len; s++) {
+ if (!populated_args[s] && spec[s].required) {
+ if (spec[s].type->tag == ArrayInfo)
+ *(OptionalArray_t*)spec[s].dest = (Array_t){};
+ else if (spec[s].type->tag == TableInfo)
+ *(OptionalTable_t*)spec[s].dest = (Table_t){};
+ else
+ errx(1, "The required argument '%s' was not provided\n%k", spec[s].name, &usage);
+ }
+ }
+}
+#pragma GCC diagnostic pop
+
+static void print_stack_line(FILE *out, OptionalText_t fn_name, const char *filename, int64_t line_num)
+{
+ // NOTE: this function is a bit inefficient. Each time we print a line, we
+ // do a linear scan through the whole file. However, performance shouldn't
+ // really matter if we only print stack lines when there's a crash.
+ if (filename) {
+ fprintf(out, "\033[34mFile\033[m \033[35;1m%s\033[m", filename);
+ if (line_num >= 1)
+ fprintf(out, "\033[34m line\033[m \033[35;1m%ld\033[m", line_num);
+ }
+ if (fn_name.length > 0) {
+ fprintf(out, filename ? "\033[34m, in \033[m \033[36;1m%k\033[m" : "\033[36;1m%k\033[m", &fn_name);
+ }
+ fprintf(out, "\n");
+
+ FILE *f = fopen(filename, "r");
+ if (!f) return;
+ char *line = NULL;
+ size_t size = 0;
+ ssize_t nread;
+ int64_t cur_line = 1;
+ while ((nread = getline(&line, &size, f)) != -1) {
+ if (line[strlen(line)-1] == '\n')
+ line[strlen(line)-1] = '\0';
+
+ if (cur_line >= line_num)
+ fprintf(out, "\033[33;1m%s\033[m\n", line);
+
+ cur_line += 1;
+ if (cur_line > line_num)
+ break;
+ }
+ if (line) free(line);
+ fclose(f);
+}
+
+void print_stack_trace(FILE *out, int start, int stop)
+{
+ // Print stack trace:
+ void *stack[1024];
+ int64_t size = (int64_t)backtrace(stack, sizeof(stack)/sizeof(stack[0]));
+ char **strings = strings = backtrace_symbols(stack, size);
+ for (int64_t i = start; i < size - stop; i++) {
+ char *filename = strings[i];
+ char *paren = strchrnul(strings[i], '(');
+ char *addr_end = paren + 1 + strcspn(paren + 1, ")");
+ ptrdiff_t offset = strtol(paren + 1, &addr_end, 16) - 1;
+ const char *cmd = heap_strf("addr2line -e %.*s -is +0x%x", strcspn(filename, "("), filename, offset);
+ FILE *fp = popen(cmd, "r");
+ OptionalText_t fn_name = get_function_name(stack[i]);
+ const char *src_filename = NULL;
+ int64_t line_number = 0;
+ if (fp) {
+ char buf[PATH_MAX + 10] = {};
+ if (fgets(buf, sizeof(buf), fp)) {
+ char *saveptr, *line_num_str;
+ if ((src_filename=strtok_r(buf, ":", &saveptr))
+ && (line_num_str=strtok_r(NULL, ":", &saveptr)))
+ line_number = atoi(line_num_str);
+ }
+ pclose(fp);
+ }
+ print_stack_line(out, fn_name, src_filename, line_number);
+ }
+}
+
+__attribute__((format(printf, 1, 2)))
+public _Noreturn void fail(const char *fmt, ...)
+{
+ fflush(stdout);
+ if (USE_COLOR) fputs("\x1b[31;7m ==================== ERROR ==================== \n\n\x1b[0;1m", stderr);
+ else fputs("==================== ERROR ====================\n\n", stderr);
+ va_list args;
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ if (USE_COLOR) fputs("\x1b[m", stderr);
+ fputs("\n\n", stderr);
+ va_end(args);
+ print_stack_trace(stderr, 2, 4);
+ fflush(stderr);
+ raise(SIGABRT);
+ _exit(1);
+}
+
+public _Noreturn void fail_text(Text_t message)
+{
+ fail("%k", &message);
+}
+
+__attribute__((format(printf, 4, 5)))
+public _Noreturn void fail_source(const char *filename, int64_t start, int64_t end, const char *fmt, ...)
+{
+ if (USE_COLOR) fputs("\n\x1b[31;7m ==================== ERROR ==================== \n\n\x1b[0;1m", stderr);
+ else fputs("\n==================== ERROR ====================\n\n", stderr);
+
+ va_list args;
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+
+ file_t *file = filename ? load_file(filename) : NULL;
+ if (filename && file) {
+ fputs("\n", stderr);
+ highlight_error(file, file->text+start, file->text+end, "\x1b[31;1m", 2, USE_COLOR);
+ fputs("\n", stderr);
+ }
+ if (USE_COLOR) fputs("\x1b[m", stderr);
+
+ print_stack_trace(stderr, 2, 4);
+ fflush(stderr);
+ raise(SIGABRT);
+ _exit(1);
+}
+
+public Text_t builtin_last_err()
+{
+ return Text$from_str(strerror(errno));
+}
+
+static int _inspect_depth = 0;
+static file_t *file = NULL;
+
+__attribute__((nonnull))
+public void start_inspect(const char *filename, int64_t start, int64_t end)
+{
+ if (file == NULL || strcmp(file->filename, filename) != 0)
+ file = load_file(filename);
+
+ if (file) {
+ const char *spaces = " ";
+ int64_t first_line_len = (int64_t)strcspn(file->text + start, "\r\n");
+ const char *slash = strrchr(filename, '/');
+ const char *file_base = slash ? slash + 1 : filename;
+
+ int64_t line_num = get_line_number(file, file->text + start);
+ fprintf(stderr, USE_COLOR ? "%.*s\x1b[33;1m>> \x1b[m%.*s %.*s\x1b[32;2m[%s:%ld]\x1b[m\n" : "%.*s>> %.*s %.*s[%s:%ld]\n",
+ 3*_inspect_depth, spaces, first_line_len, file->text + start,
+ MAX(0, 35-first_line_len-3*_inspect_depth), spaces, file_base, line_num);
+
+ // For multi-line expressions, dedent each and print it on a new line with ".. " in front:
+ if (end > start + first_line_len) {
+ const char *line_start = get_line(file, line_num);
+ int64_t indent_len = (int64_t)strspn(line_start, " \t");
+ for (const char *line = file->text + start + first_line_len; line < file->text + end; line += strcspn(line, "\r\n")) {
+ line += strspn(line, "\r\n");
+ if ((int64_t)strspn(line, " \t") >= indent_len)
+ line += indent_len;
+ fprintf(stderr, USE_COLOR ? "%.*s\x1b[33m.. \x1b[m%.*s\n" : "%.*s.. %.*s\n",
+ 3*_inspect_depth, spaces, strcspn(line, "\r\n"), line);
+ }
+ }
+ }
+ _inspect_depth += 1;
+}
+
+__attribute__((nonnull))
+public void end_inspect(const void *expr, const TypeInfo_t *type)
+{
+ _inspect_depth -= 1;
+
+ if (type->metamethods.as_text) {
+ Text_t expr_text = generic_as_text(expr, USE_COLOR, type);
+ Text_t type_name = generic_as_text(NULL, false, type);
+
+ for (int i = 0; i < 3*_inspect_depth; i++) fputc(' ', stderr);
+ fprintf(stderr, USE_COLOR ? "\x1b[33;1m=\x1b[0m %k \x1b[2m: \x1b[36m%k\x1b[m\n" : "= %k : %k\n", &expr_text, &type_name);
+ }
+}
+
+__attribute__((nonnull))
+public void test_value(const void *expr, const TypeInfo_t *type, const char *expected)
+{
+ Text_t expr_text = generic_as_text(expr, USE_COLOR, type);
+ Text_t type_name = generic_as_text(NULL, false, type);
+
+ Text_t expected_text = Text$from_str(expected);
+ Text_t expr_plain = USE_COLOR ? generic_as_text(expr, false, type) : expr_text;
+ bool success = Text$equal_values(expr_plain, expected_text);
+ if (!success) {
+ OptionalMatch_t colon = Text$find(expected_text, Text(":"), I_small(1));
+ if (colon.index.small) {
+ Text_t with_type = Text$concat(expr_plain, Text(" : "), type_name);
+ success = Text$equal_values(with_type, expected_text);
+ }
+ }
+
+ if (!success) {
+ print_stack_trace(stderr, 2, 4);
+ fprintf(stderr,
+ USE_COLOR
+ ? "\n\x1b[31;7m ==================== TEST FAILED ==================== \x1b[0;1m\n\nExpected: \x1b[1;32m%s\x1b[0m\n\x1b[1m But got:\x1b[m %k\n\n"
+ : "\n==================== TEST FAILED ====================\n\nExpected: %s\n But got: %k\n\n",
+ expected, &expr_text);
+
+ fflush(stderr);
+ raise(SIGABRT);
+ }
+}
+
+public void say(Text_t text, bool newline)
+{
+ Text$print(stdout, text);
+ if (newline)
+ fputc('\n', stdout);
+ fflush(stdout);
+}
+
+public _Noreturn void tomo_exit(Text_t text, int32_t status)
+{
+ if (text.length > 0)
+ say(text, true);
+ _exit(status);
+}
+
+public OptionalText_t ask(Text_t prompt, bool bold, bool force_tty)
+{
+ OptionalText_t ret = NONE_TEXT;
+ FILE *out = stdout;
+ FILE *in = stdin;
+
+ char *line = NULL;
+ size_t bufsize = 0;
+ ssize_t length = 0;
+ char *gc_input = NULL;
+
+ if (force_tty && !isatty(STDOUT_FILENO)) {
+ out = fopen("/dev/tty", "w");
+ if (!out) goto cleanup;
+ }
+
+ if (bold) fputs("\x1b[1m", out);
+ Text$print(out, prompt);
+ if (bold) fputs("\x1b[m", out);
+ fflush(out);
+
+ if (force_tty && !isatty(STDIN_FILENO)) {
+ in = fopen("/dev/tty", "r");
+ if (!in) {
+ fputs("\n", out); // finish the line, since the user can't
+ goto cleanup;
+ }
+ }
+
+ length = getline(&line, &bufsize, in);
+ if (length == -1) {
+ fputs("\n", out); // finish the line, since we didn't get any input
+ goto cleanup;
+ }
+
+ if (length > 0 && line[length-1] == '\n') {
+ line[length-1] = '\0';
+ --length;
+ }
+
+ gc_input = GC_MALLOC_ATOMIC((size_t)(length + 1));
+ memcpy(gc_input, line, (size_t)(length + 1));
+
+ ret = Text$from_strn(gc_input, (size_t)(length));
+
+ cleanup:
+ if (out && out != stdout) fclose(out);
+ if (in && in != stdin) fclose(in);
+ return ret;
+}
+
+public bool pop_flag(char **argv, int *i, const char *flag, Text_t *result)
+{
+ if (argv[*i][0] != '-' || argv[*i][1] != '-') {
+ return false;
+ } else if (streq(argv[*i] + 2, flag)) {
+ *result = EMPTY_TEXT;
+ argv[*i] = NULL;
+ *i += 1;
+ return true;
+ } else if (strncmp(argv[*i] + 2, "no-", 3) == 0 && streq(argv[*i] + 5, flag)) {
+ *result = Text("no");
+ argv[*i] = NULL;
+ *i += 1;
+ return true;
+ } else if (strncmp(argv[*i] + 2, flag, strlen(flag)) == 0 && argv[*i][2 + strlen(flag)] == '=') {
+ *result = Text$from_str(argv[*i] + 2 + strlen(flag) + 1);
+ argv[*i] = NULL;
+ *i += 1;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+public void sleep_num(double seconds)
+{
+ struct timespec ts;
+ ts.tv_sec = (time_t)seconds;
+ ts.tv_nsec = (long)((seconds - (double)ts.tv_sec) * 1e9);
+ nanosleep(&ts, NULL);
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/stdlib.h b/src/stdlib/stdlib.h
new file mode 100644
index 00000000..1b633dff
--- /dev/null
+++ b/src/stdlib/stdlib.h
@@ -0,0 +1,57 @@
+#pragma once
+
+// Built-in functions
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "datatypes.h"
+#include "types.h"
+#include "util.h"
+
+extern bool USE_COLOR;
+
+typedef struct {
+ const char *name;
+ bool required;
+ const TypeInfo_t *type;
+ void *dest;
+} cli_arg_t;
+
+void tomo_init(void);
+void _tomo_parse_args(int argc, char *argv[], Text_t usage, Text_t help, int spec_len, cli_arg_t spec[spec_len]);
+#define tomo_parse_args(argc, argv, usage, help, ...) \
+ _tomo_parse_args(argc, argv, usage, help, sizeof((cli_arg_t[]){__VA_ARGS__})/sizeof(cli_arg_t), (cli_arg_t[]){__VA_ARGS__})
+__attribute__((format(printf, 1, 2)))
+_Noreturn void fail(const char *fmt, ...);
+_Noreturn void fail_text(Text_t message);
+__attribute__((format(printf, 4, 5)))
+_Noreturn void fail_source(const char *filename, int64_t start, int64_t end, const char *fmt, ...);
+Text_t builtin_last_err();
+__attribute__((nonnull))
+void start_inspect(const char *filename, int64_t start, int64_t end);
+__attribute__((nonnull))
+void end_inspect(const void *expr, const TypeInfo_t *type);
+#define inspect(expr, typeinfo, start, end) {\
+ start_inspect(__SOURCE_FILE__, start, end); \
+ auto _expr = expr; \
+ end_inspect(&_expr, typeinfo); \
+}
+__attribute__((nonnull))
+void test_value(const void *expr, const TypeInfo_t *type, const char *expected);
+#define test(expr, typeinfo, expected, start, end) {\
+ auto _expr = expr; \
+ test_value(&_expr, typeinfo, expected); \
+}
+
+void say(Text_t text, bool newline);
+Text_t ask(Text_t prompt, bool bold, bool force_tty);
+_Noreturn void tomo_exit(Text_t text, int32_t status);
+
+Closure_t spawn(Closure_t fn);
+bool pop_flag(char **argv, int *i, const char *flag, Text_t *result);
+void print_stack_trace(FILE *out, int start, int stop);
+void sleep_num(double seconds);
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/structs.c b/src/stdlib/structs.c
new file mode 100644
index 00000000..ca88262c
--- /dev/null
+++ b/src/stdlib/structs.c
@@ -0,0 +1,237 @@
+// Metamethods for structs
+
+#include <stdint.h>
+#include <string.h>
+
+#include "arrays.h"
+#include "bools.h"
+#include "functiontype.h"
+#include "metamethods.h"
+#include "optionals.h"
+#include "pointers.h"
+#include "siphash.h"
+#include "tables.h"
+#include "text.h"
+#include "util.h"
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstack-protector"
+PUREFUNC public uint64_t Struct$hash(const void *obj, const TypeInfo_t *type)
+{
+ if (type->StructInfo.num_fields == 0)
+ return 0;
+
+ if (type->StructInfo.num_fields == 1)
+ return generic_hash(obj, type->StructInfo.fields[0].type);
+
+ uint64_t field_hashes[type->StructInfo.num_fields];
+ ptrdiff_t byte_offset = 0;
+ ptrdiff_t bit_offset = 0;
+ for (int i = 0; i < type->StructInfo.num_fields; i++) {
+ NamedType_t field = type->StructInfo.fields[i];
+ if (field.type == &Bool$info) {
+ bool b = ((*(char*)(obj + byte_offset)) >> bit_offset) & 0x1;
+ field_hashes[i] = (uint32_t)b;
+ bit_offset += 1;
+ if (bit_offset >= 8) {
+ byte_offset += 1;
+ bit_offset = 0;
+ }
+ } else {
+ if (bit_offset > 0) {
+ byte_offset += 1;
+ bit_offset = 0;
+ }
+ if (field.type->align && byte_offset % field.type->align > 0)
+ byte_offset += field.type->align - (byte_offset % field.type->align);
+ field_hashes[i] = generic_hash(obj + byte_offset, field.type);
+ byte_offset += field.type->size;
+ }
+ }
+ return siphash24((void*)field_hashes, sizeof(field_hashes));
+}
+#pragma GCC diagnostic pop
+
+PUREFUNC public uint64_t PackedData$hash(const void *obj, const TypeInfo_t *type)
+{
+ if (type->StructInfo.num_fields == 0)
+ return 0;
+
+ return siphash24(obj, (size_t)type->size);
+}
+
+PUREFUNC public int32_t Struct$compare(const void *x, const void *y, const TypeInfo_t *type)
+{
+ if (x == y)
+ return 0;
+
+ ptrdiff_t byte_offset = 0;
+ ptrdiff_t bit_offset = 0;
+ for (int i = 0; i < type->StructInfo.num_fields; i++) {
+ NamedType_t field = type->StructInfo.fields[i];
+ if (field.type == &Bool$info) {
+ bool bx = ((*(char*)(x + byte_offset)) >> bit_offset) & 0x1;
+ bool by = ((*(char*)(y + byte_offset)) >> bit_offset) & 0x1;
+ if (bx != by)
+ return (int32_t)bx - (int32_t)by;
+ bit_offset += 1;
+ if (bit_offset >= 8) {
+ byte_offset += 1;
+ bit_offset = 0;
+ }
+ } else {
+ if (bit_offset > 0) {
+ byte_offset += 1;
+ bit_offset = 0;
+ }
+ if (field.type->align && byte_offset % field.type->align > 0)
+ byte_offset += field.type->align - (byte_offset % field.type->align);
+ int32_t cmp = generic_compare(x + byte_offset, y + byte_offset, field.type);
+ if (cmp != 0)
+ return cmp;
+ byte_offset += field.type->size;
+ }
+ }
+ return 0;
+}
+
+PUREFUNC public bool Struct$equal(const void *x, const void *y, const TypeInfo_t *type)
+{
+ if (x == y)
+ return true;
+
+ ptrdiff_t byte_offset = 0;
+ ptrdiff_t bit_offset = 0;
+ for (int i = 0; i < type->StructInfo.num_fields; i++) {
+ NamedType_t field = type->StructInfo.fields[i];
+ if (field.type == &Bool$info) {
+ bool bx = ((*(char*)(x + byte_offset)) >> bit_offset) & 0x1;
+ bool by = ((*(char*)(y + byte_offset)) >> bit_offset) & 0x1;
+ if (bx != by)
+ return false;
+ bit_offset += 1;
+ if (bit_offset >= 8) {
+ byte_offset += 1;
+ bit_offset = 0;
+ }
+ } else {
+ if (bit_offset > 0) {
+ byte_offset += 1;
+ bit_offset = 0;
+ }
+ if (field.type->align && byte_offset % field.type->align > 0)
+ byte_offset += field.type->align - (byte_offset % field.type->align);
+ if (!generic_equal(x + byte_offset, y + byte_offset, field.type))
+ return false;
+ byte_offset += field.type->size;
+ }
+ }
+ return true;
+}
+
+PUREFUNC public bool PackedData$equal(const void *x, const void *y, const TypeInfo_t *type)
+{
+ if (x == y) return true;
+ return (memcmp(x, y, (size_t)type->size) == 0);
+}
+
+PUREFUNC public Text_t Struct$as_text(const void *obj, bool colorize, const TypeInfo_t *type)
+{
+ if (!obj) return Text$from_str(type->StructInfo.name);
+
+ if (type->StructInfo.is_secret || type->StructInfo.is_opaque)
+ return Text$format(colorize ? "\x1b[0;1m%s\x1b[m(...)" : "%s(...)", type->StructInfo.name);
+
+ Text_t text = Text$format(colorize ? "\x1b[0;1m%s\x1b[m(" : "%s(", type->StructInfo.name);
+ ptrdiff_t byte_offset = 0;
+ ptrdiff_t bit_offset = 0;
+ for (int i = 0; i < type->StructInfo.num_fields; i++) {
+ NamedType_t field = type->StructInfo.fields[i];
+ if (i > 0)
+ text = Text$concat(text, Text(", "));
+
+ if (type->StructInfo.num_fields > 1)
+ text = Text$concat(text, Text$from_str(field.name), Text("="));
+
+ if (field.type == &Bool$info) {
+ bool b = ((*(char*)(obj + byte_offset)) >> bit_offset) & 0x1;
+ text = Text$concat(text, Text$from_str(colorize ? (b ? "\x1b[35myes\x1b[m" : "\x1b[35mno\x1b[m") : (b ? "yes" : "no")));
+ bit_offset += 1;
+ if (bit_offset >= 8) {
+ byte_offset += 1;
+ bit_offset = 0;
+ }
+ } else {
+ if (bit_offset > 0) {
+ byte_offset += 1;
+ bit_offset = 0;
+ }
+ if (field.type->align && byte_offset % field.type->align > 0)
+ byte_offset += field.type->align - (byte_offset % field.type->align);
+ text = Text$concat(text, generic_as_text(obj + byte_offset, colorize, field.type));
+ byte_offset += field.type->size;
+ }
+ }
+ return Text$concat(text, Text(")"));
+}
+
+PUREFUNC public bool Struct$is_none(const void *obj, const TypeInfo_t *type)
+{
+ return *(bool*)(obj + type->size);
+}
+
+public void Struct$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *type)
+{
+ ptrdiff_t byte_offset = 0;
+ ptrdiff_t bit_offset = 0;
+ for (int i = 0; i < type->StructInfo.num_fields; i++) {
+ NamedType_t field = type->StructInfo.fields[i];
+ if (field.type == &Bool$info) {
+ bool b = ((*(char*)(obj + byte_offset)) >> bit_offset) & 0x1;
+ fputc((int)b, out);
+ bit_offset += 1;
+ if (bit_offset >= 8) {
+ byte_offset += 1;
+ bit_offset = 0;
+ }
+ } else {
+ if (bit_offset > 0) {
+ byte_offset += 1;
+ bit_offset = 0;
+ }
+ if (field.type->align && byte_offset % field.type->align > 0)
+ byte_offset += field.type->align - (byte_offset % field.type->align);
+ _serialize(obj + byte_offset, out, pointers, field.type);
+ byte_offset += field.type->size;
+ }
+ }
+}
+
+public void Struct$deserialize(FILE *in, void *outval, Array_t *pointers, const TypeInfo_t *type)
+{
+ ptrdiff_t byte_offset = 0;
+ ptrdiff_t bit_offset = 0;
+ for (int i = 0; i < type->StructInfo.num_fields; i++) {
+ NamedType_t field = type->StructInfo.fields[i];
+ if (field.type == &Bool$info) {
+ bool b = (bool)fgetc(in);
+ *(char*)(outval + byte_offset) |= (b << bit_offset);
+ bit_offset += 1;
+ if (bit_offset >= 8) {
+ byte_offset += 1;
+ bit_offset = 0;
+ }
+ } else {
+ if (bit_offset > 0) {
+ byte_offset += 1;
+ bit_offset = 0;
+ }
+ if (field.type->align && byte_offset % field.type->align > 0)
+ byte_offset += field.type->align - (byte_offset % field.type->align);
+ _deserialize(in, outval + byte_offset, pointers, field.type);
+ byte_offset += field.type->size;
+ }
+ }
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/structs.h b/src/stdlib/structs.h
new file mode 100644
index 00000000..bab702cd
--- /dev/null
+++ b/src/stdlib/structs.h
@@ -0,0 +1,40 @@
+// Metamethods for structs
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "datatypes.h"
+#include "types.h"
+#include "util.h"
+
+PUREFUNC uint64_t Struct$hash(const void *obj, const TypeInfo_t *type);
+PUREFUNC uint64_t PackedData$hash(const void *obj, const TypeInfo_t *type);
+PUREFUNC int32_t Struct$compare(const void *x, const void *y, const TypeInfo_t *type);
+PUREFUNC bool Struct$equal(const void *x, const void *y, const TypeInfo_t *type);
+PUREFUNC bool PackedData$equal(const void *x, const void *y, const TypeInfo_t *type);
+PUREFUNC Text_t Struct$as_text(const void *obj, bool colorize, const TypeInfo_t *type);
+PUREFUNC bool Struct$is_none(const void *obj, const TypeInfo_t *type);
+void Struct$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *type);
+void Struct$deserialize(FILE *in, void *outval, Array_t *pointers, const TypeInfo_t *type);
+
+#define Struct$metamethods { \
+ .hash=Struct$hash, \
+ .compare=Struct$compare, \
+ .equal=Struct$equal, \
+ .as_text=Struct$as_text, \
+ .is_none=Struct$is_none, \
+ .serialize=Struct$serialize, \
+ .deserialize=Struct$deserialize, \
+}
+
+#define PackedData$metamethods { \
+ .hash=PackedData$hash, \
+ .compare=Struct$compare, \
+ .equal=PackedData$equal, \
+ .as_text=Struct$as_text, \
+ .is_none=Struct$is_none, \
+ .serialize=Struct$serialize, \
+ .deserialize=Struct$deserialize, \
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/tables.c b/src/stdlib/tables.c
new file mode 100644
index 00000000..97419327
--- /dev/null
+++ b/src/stdlib/tables.c
@@ -0,0 +1,798 @@
+// table.c - C Hash table implementation
+// Copyright 2024 Bruce Hill
+// Provided under the MIT license with the Commons Clause
+// See included LICENSE for details.
+
+// Hash table (aka Dictionary) Implementation
+// Hash keys and values are stored *by value*
+// The hash insertion/lookup implementation is based on Lua's tables,
+// which use a chained scatter with Brent's variation.
+
+#include <assert.h>
+#include <gc.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/param.h>
+
+#include "arrays.h"
+#include "c_strings.h"
+#include "datatypes.h"
+#include "memory.h"
+#include "metamethods.h"
+#include "pointers.h"
+#include "siphash.h"
+#include "tables.h"
+#include "text.h"
+#include "types.h"
+#include "util.h"
+
+// #define DEBUG_TABLES
+
+#ifdef DEBUG_TABLES
+#define hdebug(fmt, ...) printf("\x1b[2m" fmt "\x1b[m" __VA_OPT__(,) __VA_ARGS__)
+#else
+#define hdebug(...) (void)0
+#endif
+
+// Helper accessors for type functions/values:
+#define HASH_KEY(t, k) (generic_hash((k), type->TableInfo.key) % ((t).bucket_info->count))
+#define EQUAL_KEYS(x, y) (generic_equal((x), (y), type->TableInfo.key))
+#define END_OF_CHAIN UINT32_MAX
+
+#define GET_ENTRY(t, i) ((t).entries.data + (t).entries.stride*(i))
+
+static TypeInfo_t MemoryPointer = {
+ .size=sizeof(void*),
+ .align=__alignof__(void*),
+ .tag=PointerInfo,
+ .PointerInfo={
+ .sigil="@",
+ .pointed=&Memory$info,
+ },
+ .metamethods=Pointer$metamethods,
+};
+
+const TypeInfo_t CStrToVoidStarTable = {
+ .size=sizeof(Table_t),
+ .align=__alignof__(Table_t),
+ .tag=TableInfo,
+ .TableInfo={.key=&CString$info, .value=&MemoryPointer},
+ .metamethods=Table$metamethods,
+};
+
+PUREFUNC static INLINE size_t entry_size(const TypeInfo_t *info)
+{
+ size_t size = (size_t)info->TableInfo.key->size;
+ if (info->TableInfo.value->align > 1 && size % (size_t)info->TableInfo.value->align)
+ size += (size_t)info->TableInfo.value->align - (size % (size_t)info->TableInfo.value->align); // padding
+ size += (size_t)info->TableInfo.value->size;
+ if (info->TableInfo.key->align > 1 && size % (size_t)info->TableInfo.key->align)
+ size += (size_t)info->TableInfo.key->align - (size % (size_t)info->TableInfo.key->align); // padding
+ return size;
+}
+
+PUREFUNC static INLINE size_t entry_align(const TypeInfo_t *info)
+{
+ return (size_t)MAX(info->TableInfo.key->align, info->TableInfo.value->align);
+}
+
+PUREFUNC static INLINE size_t value_offset(const TypeInfo_t *info)
+{
+ size_t offset = (size_t)info->TableInfo.key->size;
+ if ((size_t)info->TableInfo.value->align > 1 && offset % (size_t)info->TableInfo.value->align)
+ offset += (size_t)info->TableInfo.value->align - (offset % (size_t)info->TableInfo.value->align); // padding
+ return offset;
+}
+
+static INLINE void hshow(const Table_t *t)
+{
+ hdebug("{");
+ for (uint32_t i = 0; t->bucket_info && i < t->bucket_info->count; i++) {
+ if (i > 0) hdebug(" ");
+ if (t->bucket_info->buckets[i].occupied)
+ hdebug("[%d]=%d(%d)", i, t->bucket_info->buckets[i].index, t->bucket_info->buckets[i].next_bucket);
+ else
+ hdebug("[%d]=_", i);
+ }
+ hdebug("}\n");
+}
+
+static void maybe_copy_on_write(Table_t *t, const TypeInfo_t *type)
+{
+ if (t->entries.data_refcount != 0)
+ Array$compact(&t->entries, (int64_t)entry_size(type));
+
+ if (t->bucket_info && t->bucket_info->data_refcount != 0) {
+ size_t size = sizeof(bucket_info_t) + sizeof(bucket_t[t->bucket_info->count]);
+ t->bucket_info = memcpy(GC_MALLOC(size), t->bucket_info, size);
+ t->bucket_info->data_refcount = 0;
+ }
+}
+
+// Return address of value or NULL
+PUREFUNC public void *Table$get_raw(Table_t t, const void *key, const TypeInfo_t *type)
+{
+ assert(type->tag == TableInfo);
+ if (!key || !t.bucket_info) return NULL;
+
+ uint64_t hash = HASH_KEY(t, key);
+ hshow(&t);
+ hdebug("Getting value with initial probe at %u\n", hash);
+ bucket_t *buckets = t.bucket_info->buckets;
+ for (uint64_t i = hash; buckets[i].occupied; i = buckets[i].next_bucket) {
+ hdebug("Checking against key in bucket %u\n", i);
+ void *entry = GET_ENTRY(t, buckets[i].index);
+ if (EQUAL_KEYS(entry, key)) {
+ hdebug("Found key!\n");
+ return entry + value_offset(type);
+ }
+ if (buckets[i].next_bucket == END_OF_CHAIN)
+ break;
+ }
+ return NULL;
+}
+
+PUREFUNC public void *Table$get(Table_t t, const void *key, const TypeInfo_t *type)
+{
+ assert(type->tag == TableInfo);
+ for (const Table_t *iter = &t; iter; iter = iter->fallback) {
+ void *ret = Table$get_raw(*iter, key, type);
+ if (ret) return ret;
+ }
+ return NULL;
+}
+
+static void Table$set_bucket(Table_t *t, const void *entry, int32_t index, const TypeInfo_t *type)
+{
+ assert(t->bucket_info);
+ hshow(t);
+ const void *key = entry;
+ bucket_t *buckets = t->bucket_info->buckets;
+ uint64_t hash = HASH_KEY(*t, key);
+ hdebug("Hash value (mod %u) = %u\n", t->bucket_info->count, hash);
+ bucket_t *bucket = &buckets[hash];
+ if (!bucket->occupied) {
+ hdebug("Got an empty space\n");
+ // Empty space:
+ bucket->occupied = 1;
+ bucket->index = index;
+ bucket->next_bucket = END_OF_CHAIN;
+ hshow(t);
+ return;
+ }
+
+ hdebug("Collision detected in bucket %u (entry %u)\n", hash, bucket->index);
+
+ while (buckets[t->bucket_info->last_free].occupied) {
+ assert(t->bucket_info->last_free > 0);
+ --t->bucket_info->last_free;
+ }
+
+ uint64_t collided_hash = HASH_KEY(*t, GET_ENTRY(*t, bucket->index));
+ if (collided_hash != hash) { // Collided with a mid-chain entry
+ hdebug("Hit a mid-chain entry at bucket %u (chain starting at %u)\n", hash, collided_hash);
+ // Find chain predecessor
+ uint64_t predecessor = collided_hash;
+ while (buckets[predecessor].next_bucket != hash)
+ predecessor = buckets[predecessor].next_bucket;
+
+ // Move mid-chain entry to free space and update predecessor
+ buckets[predecessor].next_bucket = t->bucket_info->last_free;
+ buckets[t->bucket_info->last_free] = *bucket;
+ } else { // Collided with the start of a chain
+ hdebug("Hit start of a chain\n");
+ uint64_t end_of_chain = hash;
+ while (buckets[end_of_chain].next_bucket != END_OF_CHAIN)
+ end_of_chain = buckets[end_of_chain].next_bucket;
+ hdebug("Appending to chain\n");
+ // Chain now ends on the free space:
+ buckets[end_of_chain].next_bucket = t->bucket_info->last_free;
+ bucket = &buckets[t->bucket_info->last_free];
+ }
+
+ bucket->occupied = 1;
+ bucket->index = index;
+ bucket->next_bucket = END_OF_CHAIN;
+ hshow(t);
+}
+
+static void hashmap_resize_buckets(Table_t *t, uint32_t new_capacity, const TypeInfo_t *type)
+{
+ if (unlikely(new_capacity > TABLE_MAX_BUCKETS))
+ fail("Table has exceeded the maximum table size (2^31) and cannot grow further!");
+ hdebug("About to resize from %u to %u\n", t->bucket_info ? t->bucket_info->count : 0, new_capacity);
+ hshow(t);
+ size_t alloc_size = sizeof(bucket_info_t) + sizeof(bucket_t[new_capacity]);
+ t->bucket_info = GC_MALLOC_ATOMIC(alloc_size);
+ memset(t->bucket_info->buckets, 0, sizeof(bucket_t[new_capacity]));
+ t->bucket_info->count = new_capacity;
+ t->bucket_info->last_free = new_capacity-1;
+ // Rehash:
+ for (int64_t i = 0; i < Table$length(*t); i++) {
+ hdebug("Rehashing %u\n", i);
+ Table$set_bucket(t, GET_ENTRY(*t, i), i, type);
+ }
+
+ hshow(t);
+ hdebug("Finished resizing\n");
+}
+
+// Return address of value
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstack-protector"
+public void *Table$reserve(Table_t *t, const void *key, const void *value, const TypeInfo_t *type)
+{
+ assert(type->tag == TableInfo);
+ if (!t || !key) return NULL;
+ hshow(t);
+
+ t->hash = 0;
+
+ int64_t key_size = type->TableInfo.key->size,
+ value_size = type->TableInfo.value->size;
+ if (!t->bucket_info || t->bucket_info->count == 0) {
+ hashmap_resize_buckets(t, 8, type);
+ } else {
+ // Check if we are clobbering a value:
+ void *value_home = Table$get_raw(*t, key, type);
+ if (value_home) { // Update existing slot
+ // Ensure that `value_home` is still inside t->entries, even if COW occurs
+ ptrdiff_t offset = value_home - t->entries.data;
+ maybe_copy_on_write(t, type);
+ value_home = t->entries.data + offset;
+
+ if (value && value_size > 0)
+ memcpy(value_home, value, (size_t)value_size);
+
+ return value_home;
+ }
+ }
+ // Otherwise add a new entry:
+
+ // Resize buckets if necessary
+ if (t->entries.length >= (int64_t)t->bucket_info->count) {
+ // Current resize policy: +50% at a time:
+ uint32_t newsize = MAX(8, (uint32_t)(3*t->bucket_info->count)/2);
+ if (unlikely(newsize > TABLE_MAX_BUCKETS))
+ newsize = TABLE_MAX_BUCKETS;
+ hashmap_resize_buckets(t, newsize, type);
+ }
+
+ if (!value && value_size > 0) {
+ for (Table_t *iter = t->fallback; iter; iter = iter->fallback) {
+ value = Table$get_raw(*iter, key, type);
+ if (value) break;
+ }
+ }
+
+ maybe_copy_on_write(t, type);
+
+ char buf[entry_size(type)];
+ memset(buf, 0, sizeof(buf));
+ memcpy(buf, key, (size_t)key_size);
+ if (value && value_size > 0)
+ memcpy(buf + value_offset(type), value, (size_t)value_size);
+ else
+ memset(buf + value_offset(type), 0, (size_t)value_size);
+ Array$insert(&t->entries, buf, I(0), (int64_t)entry_size(type));
+
+ int64_t entry_index = t->entries.length-1;
+ void *entry = GET_ENTRY(*t, entry_index);
+ Table$set_bucket(t, entry, entry_index, type);
+ return entry + value_offset(type);
+}
+#pragma GCC diagnostic pop
+
+public void Table$set(Table_t *t, const void *key, const void *value, const TypeInfo_t *type)
+{
+ assert(type->tag == TableInfo);
+ (void)Table$reserve(t, key, value, type);
+}
+
+public void Table$remove(Table_t *t, const void *key, const TypeInfo_t *type)
+{
+ assert(type->tag == TableInfo);
+ if (!t || Table$length(*t) == 0) return;
+
+ // TODO: this work doesn't need to be done if the key is already missing
+ maybe_copy_on_write(t, type);
+
+ // If unspecified, pop the last key:
+ if (!key)
+ key = GET_ENTRY(*t, t->entries.length-1);
+
+ // Steps: look up the bucket for the removed key
+ // If missing, then return immediately
+ // Swap last key/value into the removed bucket's index1
+ // Zero out the last key/value and decrement the count
+ // Find the last key/value's bucket and update its index1
+ // Look up the bucket for the removed key
+ // If bucket is first in chain:
+ // Move bucket->next to bucket's spot
+ // zero out bucket->next's old spot
+ // maybe update lastfree_index1 to second-in-chain's index
+ // Else:
+ // set prev->next = bucket->next
+ // zero out bucket
+ // maybe update lastfree_index1 to removed bucket's index
+
+ uint64_t hash = HASH_KEY(*t, key);
+ hdebug("Removing key with hash %u\n", hash);
+ bucket_t *bucket, *prev = NULL;
+ for (uint64_t i = hash; t->bucket_info->buckets[i].occupied; i = t->bucket_info->buckets[i].next_bucket) {
+ if (EQUAL_KEYS(GET_ENTRY(*t, t->bucket_info->buckets[i].index), key)) {
+ bucket = &t->bucket_info->buckets[i];
+ hdebug("Found key to delete in bucket %u\n", i);
+ goto found_it;
+ }
+ if (t->bucket_info->buckets[i].next_bucket == END_OF_CHAIN)
+ return;
+ prev = &t->bucket_info->buckets[i];
+ }
+ return;
+
+ found_it:;
+ assert(bucket->occupied);
+
+ t->hash = 0;
+
+ // Always remove the last entry. If we need to remove some other entry,
+ // swap the other entry into the last position and then remove the last
+ // entry. This disturbs the ordering of the table, but keeps removal O(1)
+ // instead of O(N)
+ int64_t last_entry = t->entries.length-1;
+ if (bucket->index != last_entry) {
+ hdebug("Removing key/value from the middle of the entries array\n");
+
+ // Find the bucket that points to the last entry's index:
+ uint64_t i = HASH_KEY(*t, GET_ENTRY(*t, last_entry));
+ while (t->bucket_info->buckets[i].index != last_entry)
+ i = t->bucket_info->buckets[i].next_bucket;
+ // Update the bucket to point to the last entry's new home (the space
+ // where the removed entry currently sits):
+ t->bucket_info->buckets[i].index = bucket->index;
+
+ // Clobber the entry being removed (in the middle of the array) with
+ // the last entry:
+ memcpy(GET_ENTRY(*t, bucket->index), GET_ENTRY(*t, last_entry), entry_size(type));
+ }
+
+ // Last entry is being removed, so clear it out to be safe:
+ memset(GET_ENTRY(*t, last_entry), 0, entry_size(type));
+
+ Array$remove_at(&t->entries, I(t->entries.length), I(1), (int64_t)entry_size(type));
+
+ int64_t bucket_to_clear;
+ if (prev) { // Middle (or end) of a chain
+ hdebug("Removing from middle of a chain\n");
+ bucket_to_clear = (bucket - t->bucket_info->buckets);
+ prev->next_bucket = bucket->next_bucket;
+ } else if (bucket->next_bucket != END_OF_CHAIN) { // Start of a chain
+ hdebug("Removing from start of a chain\n");
+ bucket_to_clear = bucket->next_bucket;
+ *bucket = t->bucket_info->buckets[bucket_to_clear];
+ } else { // Empty chain
+ hdebug("Removing from empty chain\n");
+ bucket_to_clear = (bucket - t->bucket_info->buckets);
+ }
+
+ t->bucket_info->buckets[bucket_to_clear] = (bucket_t){0};
+ if (bucket_to_clear > t->bucket_info->last_free)
+ t->bucket_info->last_free = bucket_to_clear;
+
+ hshow(t);
+}
+
+CONSTFUNC public void *Table$entry(Table_t t, int64_t n)
+{
+ if (n < 1 || n > Table$length(t))
+ return NULL;
+ return GET_ENTRY(t, n-1);
+}
+
+public void Table$clear(Table_t *t)
+{
+ memset(t, 0, sizeof(Table_t));
+}
+
+public Table_t Table$sorted(Table_t t, const TypeInfo_t *type)
+{
+ Closure_t cmp = (Closure_t){.fn=generic_compare, .userdata=(void*)type->TableInfo.key};
+ Array_t entries = Array$sorted(t.entries, cmp, (int64_t)entry_size(type));
+ return Table$from_entries(entries, type);
+}
+
+PUREFUNC public bool Table$equal(const void *vx, const void *vy, const TypeInfo_t *type)
+{
+ if (vx == vy) return true;
+ Table_t *x = (Table_t*)vx, *y = (Table_t*)vy;
+
+ if (x->hash && y->hash && x->hash != y->hash)
+ return false;
+
+ assert(type->tag == TableInfo);
+ if (x->entries.length != y->entries.length)
+ return false;
+
+ if ((x->fallback != NULL) != (y->fallback != NULL))
+ return false;
+
+ const TypeInfo_t *value_type = type->TableInfo.value;
+ size_t offset = value_offset(type);
+ for (int64_t i = 0; i < x->entries.length; i++) {
+ void *x_key = x->entries.data + i*x->entries.stride;
+ void *y_value = Table$get_raw(*y, x_key, type);
+ if (!y_value) return false;
+ void *x_value = x_key + offset;
+ if (!generic_equal(y_value, x_value, value_type))
+ return false;
+ }
+ return true;
+}
+
+PUREFUNC public int32_t Table$compare(const void *vx, const void *vy, const TypeInfo_t *type)
+{
+ if (vx == vy) return 0;
+
+ Table_t *x = (Table_t*)vx, *y = (Table_t*)vy;
+ assert(type->tag == TableInfo);
+ auto table = type->TableInfo;
+
+ // Sort empty tables before non-empty tables:
+ if (x->entries.length == 0 || y->entries.length == 0)
+ return ((x->entries.length > 0) - (y->entries.length > 0));
+
+ // Table comparison rules:
+ // - If two tables have different keys, then compare as if comparing a
+ // sorted array of the keys of the two tables:
+ // `x.keys:sorted() <> y.keys:sorted()`
+ // - Otherwise, compare as if comparing arrays of values for the sorted key
+ // arrays:
+ // `[x[k] for k in x.keys:sorted()] <> [y[k] for k in y.keys:sorted()]`
+ //
+ // We can do this in _linear_ time if we find the smallest `k` such that
+ // `x[k] != y[k]`, as well as the largest key in `x` and `y`.
+
+ void *mismatched_key = NULL, *max_x_key = NULL;
+ for (int64_t i = 0; i < x->entries.length; i++) {
+ void *key = x->entries.data + x->entries.stride * i;
+ if (max_x_key == NULL || generic_compare(key, max_x_key, table.key) > 0)
+ max_x_key = key;
+
+ void *x_value = key + value_offset(type);
+ void *y_value = Table$get_raw(*y, key, type);
+
+ if (!y_value || (table.value->size > 0 && !generic_equal(x_value, y_value, table.value))) {
+ if (mismatched_key == NULL || generic_compare(key, mismatched_key, table.key) < 0)
+ mismatched_key = key;
+ }
+ }
+
+ // If the keys are not all equal, we gotta check to see if there exists a
+ // `y[k]` such that `k` is smaller than all keys that `x` has and `y` doesn't:
+ void *max_y_key = NULL;
+ for (int64_t i = 0; i < y->entries.length; i++) {
+ void *key = y->entries.data + y->entries.stride * i;
+ if (max_y_key == NULL || generic_compare(key, max_y_key, table.key) > 0)
+ max_y_key = key;
+
+ void *y_value = key + value_offset(type);
+ void *x_value = Table$get_raw(*x, key, type);
+ if (!x_value || !generic_equal(x_value, y_value, table.value)) {
+ if (mismatched_key == NULL || generic_compare(key, mismatched_key, table.key) < 0)
+ mismatched_key = key;
+ }
+ }
+
+ if (mismatched_key) {
+ void *x_value = Table$get_raw(*x, mismatched_key, type);
+ void *y_value = Table$get_raw(*y, mismatched_key, type);
+ if (x_value && y_value) {
+ return generic_compare(x_value, y_value, table.value);
+ } else if (y_value) {
+ // The smallest mismatched key is in Y, but not X.
+ // In this case, we should judge if the key is smaller than *any*
+ // key in X or if it's bigger than *every* key in X.
+ // Example 1:
+ // x={10, 20, 30} > y={10, 20, 25, 30}
+ // The smallest mismatched key is `25`, and we know that `x` is
+ // larger than `y` because `30 > 25`.
+ // Example 2:
+ // x={10, 20, 30} > y={10, 20, 30, 999}
+ // The smallest mismatched key is `999`, and we know that `x` is
+ // smaller than `y` because `30 < 999`.
+ return max_x_key ? generic_compare(max_x_key, mismatched_key, table.key) : -1;
+ } else {
+ assert(x_value);
+ // The smallest mismatched key is in X, but not Y. The same logic
+ // above applies, but reversed.
+ return max_y_key ? -generic_compare(max_y_key, mismatched_key, table.key) : 1;
+ }
+ }
+
+ assert(x->entries.length == y->entries.length);
+
+ // Assuming keys are the same, compare values:
+ if (table.value->size > 0) {
+ for (int64_t i = 0; i < x->entries.length; i++) {
+ void *key = x->entries.data + x->entries.stride * i;
+ void *x_value = key + value_offset(type);
+ void *y_value = Table$get_raw(*y, key, type);
+ int32_t diff = generic_compare(x_value, y_value, table.value);
+ if (diff != 0) return diff;
+ }
+ }
+
+ if (!x->fallback != !y->fallback) {
+ return (!x->fallback) - (!y->fallback);
+ } else if (x->fallback && y->fallback) {
+ return generic_compare(x->fallback, y->fallback, type);
+ }
+
+ return 0;
+}
+
+PUREFUNC public uint64_t Table$hash(const void *obj, const TypeInfo_t *type)
+{
+ assert(type->tag == TableInfo);
+ Table_t *t = (Table_t*)obj;
+ if (t->hash != 0)
+ return t->hash;
+
+ // Table hashes are computed as:
+ // hash(t.length, (xor: t.keys), (xor: t.values), t.fallback)
+ // Where fallback and default hash to zero if absent
+ auto table = type->TableInfo;
+ uint64_t keys_hash = 0, values_hash = 0;
+ size_t offset = value_offset(type);
+ if (table.value->size > 0) {
+ for (int64_t i = 0; i < t->entries.length; i++) {
+ keys_hash ^= generic_hash(t->entries.data + i*t->entries.stride, table.key);
+ values_hash ^= generic_hash(t->entries.data + i*t->entries.stride + offset, table.value);
+ }
+ } else {
+ for (int64_t i = 0; i < t->entries.length; i++)
+ keys_hash ^= generic_hash(t->entries.data + i*t->entries.stride, table.key);
+ }
+
+ struct {
+ int64_t length;
+ uint64_t keys_hash, values_hash;
+ Table_t *fallback;
+ } components = {
+ t->entries.length,
+ keys_hash,
+ values_hash,
+ t->fallback,
+ };
+ t->hash = siphash24((void*)&components, sizeof(components));
+ if unlikely (t->hash == 0)
+ t->hash = 1234567;
+ return t->hash;
+}
+
+public Text_t Table$as_text(const void *obj, bool colorize, const TypeInfo_t *type)
+{
+ Table_t *t = (Table_t*)obj;
+ assert(type->tag == TableInfo);
+ auto table = type->TableInfo;
+
+ if (!t) {
+ if (table.value != &Void$info)
+ return Text$concat(
+ Text("{"),
+ generic_as_text(NULL, false, table.key),
+ Text(","),
+ generic_as_text(NULL, false, table.value),
+ Text("}"));
+ else
+ return Text$concat(
+ Text("{"),
+ generic_as_text(NULL, false, table.key),
+ Text("}"));
+ }
+
+ int64_t val_off = (int64_t)value_offset(type);
+ Text_t text = Text("{");
+ for (int64_t i = 0, length = Table$length(*t); i < length; i++) {
+ if (i > 0)
+ text = Text$concat(text, Text(", "));
+ void *entry = GET_ENTRY(*t, i);
+ text = Text$concat(text, generic_as_text(entry, colorize, table.key));
+ if (table.value != &Void$info)
+ text = Text$concat(text, Text("="), generic_as_text(entry + val_off, colorize, table.value));
+ }
+
+ if (t->fallback) {
+ text = Text$concat(text, Text("; fallback="), Table$as_text(t->fallback, colorize, type));
+ }
+
+ text = Text$concat(text, Text("}"));
+ return text;
+}
+
+public Table_t Table$from_entries(Array_t entries, const TypeInfo_t *type)
+{
+ assert(type->tag == TableInfo);
+ if (entries.length == 0)
+ return (Table_t){};
+
+ Table_t t = {};
+ int64_t length = entries.length + entries.length / 4;
+ size_t alloc_size = sizeof(bucket_info_t) + sizeof(bucket_t[length]);
+ t.bucket_info = GC_MALLOC_ATOMIC(alloc_size);
+ memset(t.bucket_info->buckets, 0, sizeof(bucket_t[length]));
+ t.bucket_info->count = length;
+ t.bucket_info->last_free = length-1;
+
+ size_t offset = value_offset(type);
+ for (int64_t i = 0; i < entries.length; i++) {
+ void *key = entries.data + i*entries.stride;
+ Table$set(&t, key, key + offset, type);
+ }
+ return t;
+}
+
+// Overlap is "set intersection" in formal terms
+public Table_t Table$overlap(Table_t a, Table_t b, const TypeInfo_t *type)
+{
+ // Return a table such that t[k]==a[k] for all k such that a:has(k), b:has(k), and a[k]==b[k]
+ Table_t result = {};
+ const size_t offset = value_offset(type);
+ for (int64_t i = 0; i < Table$length(a); i++) {
+ void *key = GET_ENTRY(a, i);
+ void *a_value = key + offset;
+ void *b_value = Table$get(b, key, type);
+ if (b_value && generic_equal(a_value, b_value, type->TableInfo.value))
+ Table$set(&result, key, a_value, type);
+ }
+
+ if (a.fallback) {
+ result.fallback = new(Table_t);
+ *result.fallback = Table$overlap(*a.fallback, b, type);
+ }
+
+ return result;
+}
+
+// With is "set union" in formal terms
+public Table_t Table$with(Table_t a, Table_t b, const TypeInfo_t *type)
+{
+ // return a table such that t[k]==b[k] for all k such that b:has(k), and t[k]==a[k] for all k such that a:has(k) and not b:has(k)
+ Table_t result = {};
+ const size_t offset = value_offset(type);
+ for (int64_t i = 0; i < Table$length(a); i++) {
+ void *key = GET_ENTRY(a, i);
+ Table$set(&result, key, key + offset, type);
+ }
+ for (int64_t i = 0; i < Table$length(b); i++) {
+ void *key = GET_ENTRY(b, i);
+ Table$set(&result, key, key + offset, type);
+ }
+
+ if (a.fallback && b.fallback) {
+ result.fallback = new(Table_t);
+ *result.fallback = Table$with(*a.fallback, *b.fallback, type);
+ } else {
+ result.fallback = a.fallback ? a.fallback : b.fallback;
+ }
+
+ return result;
+}
+
+// Without is "set difference" in formal terms
+public Table_t Table$without(Table_t a, Table_t b, const TypeInfo_t *type)
+{
+ // Return a table such that t[k]==a[k] for all k such that not b:has(k) or b[k] != a[k]
+ Table_t result = {};
+ const size_t offset = value_offset(type);
+ for (int64_t i = 0; i < Table$length(a); i++) {
+ void *key = GET_ENTRY(a, i);
+ void *a_value = key + offset;
+ void *b_value = Table$get(b, key, type);
+ if (!b_value || !generic_equal(a_value, b_value, type->TableInfo.value))
+ Table$set(&result, key, a_value, type);
+ }
+
+ if (a.fallback) {
+ result.fallback = new(Table_t);
+ *result.fallback = Table$without(*a.fallback, b, type);
+ }
+
+ return result;
+}
+
+PUREFUNC public bool Table$is_subset_of(Table_t a, Table_t b, bool strict, const TypeInfo_t *type)
+{
+ if (a.entries.length > b.entries.length || (strict && a.entries.length == b.entries.length))
+ return false;
+
+ for (int64_t i = 0; i < Table$length(a); i++) {
+ void *found = Table$get_raw(b, GET_ENTRY(a, i), type);
+ if (!found) return false;
+ }
+ return true;
+}
+
+PUREFUNC public bool Table$is_superset_of(Table_t a, Table_t b, bool strict, const TypeInfo_t *type)
+{
+ return Table$is_subset_of(b, a, strict, type);
+}
+
+PUREFUNC public void *Table$str_get(Table_t t, const char *key)
+{
+ void **ret = Table$get(t, &key, &CStrToVoidStarTable);
+ return ret ? *ret : NULL;
+}
+
+PUREFUNC public void *Table$str_get_raw(Table_t t, const char *key)
+{
+ void **ret = Table$get_raw(t, &key, &CStrToVoidStarTable);
+ return ret ? *ret : NULL;
+}
+
+public void *Table$str_reserve(Table_t *t, const char *key, const void *value)
+{
+ return Table$reserve(t, &key, &value, &CStrToVoidStarTable);
+}
+
+public void Table$str_set(Table_t *t, const char *key, const void *value)
+{
+ Table$set(t, &key, &value, &CStrToVoidStarTable);
+}
+
+public void Table$str_remove(Table_t *t, const char *key)
+{
+ return Table$remove(t, &key, &CStrToVoidStarTable);
+}
+
+CONSTFUNC public void *Table$str_entry(Table_t t, int64_t n)
+{
+ return Table$entry(t, n);
+}
+
+PUREFUNC public bool Table$is_none(const void *obj, const TypeInfo_t*)
+{
+ return ((Table_t*)obj)->entries.length < 0;
+}
+
+public void Table$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *type)
+{
+ Table_t *t = (Table_t*)obj;
+ int64_t len = t->entries.length;
+ Int64$serialize(&len, out, pointers, &Int64$info);
+
+ size_t offset = value_offset(type);
+ for (int64_t i = 0; i < len; i++) {
+ _serialize(t->entries.data + i*t->entries.stride, out, pointers, type->TableInfo.key);
+ _serialize(t->entries.data + i*t->entries.stride + offset, out, pointers, type->TableInfo.value);
+ }
+
+ Optional$serialize(&t->fallback, out, pointers, Optional$info(sizeof(void*), __alignof__(void*), Pointer$info("&", type)));
+}
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstack-protector"
+public void Table$deserialize(FILE *in, void *outval, Array_t *pointers, const TypeInfo_t *type)
+{
+ int64_t len;
+ Int64$deserialize(in, &len, pointers, &Int$info);
+
+ Table_t t = {};
+ for (int64_t i = 0; i < len; i++) {
+ char key[type->TableInfo.key->size];
+ _deserialize(in, key, pointers, type->TableInfo.key);
+ char value[type->TableInfo.value->size];
+ _deserialize(in, value, pointers, type->TableInfo.value);
+ Table$set(&t, key, value, type);
+ }
+
+ Optional$deserialize(in, &t.fallback, pointers, Optional$info(sizeof(void*), __alignof__(void*), Pointer$info("&", type)));
+
+ *(Table_t*)outval = t;
+}
+#pragma GCC diagnostic pop
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/src/stdlib/tables.h b/src/stdlib/tables.h
new file mode 100644
index 00000000..979da5e7
--- /dev/null
+++ b/src/stdlib/tables.h
@@ -0,0 +1,110 @@
+#pragma once
+
+// Hash table datastructure with methods and type information
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include "arrays.h"
+#include "datatypes.h"
+#include "types.h"
+#include "util.h"
+
+#define Table(key_t, val_t, key_info, value_info, fb, N, ...) ({ \
+ struct { key_t k; val_t v; } ents[N] = {__VA_ARGS__}; \
+ Table_t table = Table$from_entries((Array_t){ \
+ .data=memcpy(GC_MALLOC(sizeof(ents)), ents, sizeof(ents)), \
+ .length=sizeof(ents)/sizeof(ents[0]), \
+ .stride=(void*)&ents[1] - (void*)&ents[0], \
+ }, Table$info(key_info, value_info)); \
+ table.fallback = fb; \
+ table; })
+#define Set(item_t, item_info, N, ...) ({ \
+ item_t ents[N] = {__VA_ARGS__}; \
+ Table_t set = Table$from_entries((Array_t){ \
+ .data=memcpy(GC_MALLOC(sizeof(ents)), ents, sizeof(ents)), \
+ .length=sizeof(ents)/sizeof(ents[0]), \
+ .stride=(void*)&ents[1] - (void*)&ents[0], \
+ }, Set$info(item_info)); \
+ set; })
+
+Table_t Table$from_entries(Array_t entries, const TypeInfo_t *type);
+void *Table$get(Table_t t, const void *key, const TypeInfo_t *type);
+#define Table$get_optional(table_expr, key_t, val_t, key_expr, nonnull_var, nonnull_expr, null_expr, info_expr) ({ \
+ const Table_t t = table_expr; const key_t k = key_expr; \
+ val_t *nonnull_var = Table$get(t, &k, info_expr); \
+ nonnull_var ? nonnull_expr : null_expr; })
+#define Table$get_or_setdefault(table_expr, key_t, val_t, key_expr, default_expr, info_expr) ({ \
+ Table_t *t = table_expr; const key_t k = key_expr; \
+ val_t *v = Table$get(*t, &k, info_expr); \
+ v ? v : (val_t*)Table$reserve(t, &k, (val_t[1]){default_expr}, info_expr); })
+#define Table$get_or_default(table_expr, key_t, val_t, key_expr, default_expr, info_expr) ({ \
+ const Table_t t = table_expr; const key_t k = key_expr; \
+ val_t *v = Table$get(t, &k, info_expr); \
+ v ? *v : default_expr; })
+#define Table$has_value(table_expr, key_expr, info_expr) ({ \
+ const Table_t t = table_expr; __typeof(key_expr) k = key_expr; \
+ (Table$get(t, &k, info_expr) != NULL); })
+PUREFUNC void *Table$get_raw(Table_t t, const void *key, const TypeInfo_t *type);
+CONSTFUNC void *Table$entry(Table_t t, int64_t n);
+void *Table$reserve(Table_t *t, const void *key, const void *value, const TypeInfo_t *type);
+void Table$set(Table_t *t, const void *key, const void *value, const TypeInfo_t *type);
+#define Table$set_value(t, key_expr, value_expr, type) ({ __typeof(key_expr) k = key_expr; __typeof(value_expr) v = value_expr; \
+ Table$set(t, &k, &v, type); })
+#define Table$reserve_value(t, key_expr, type) ({ __typeof(key_expr) k = key_expr; Table$reserve(t, &k, NULL, type); })
+#define Table$bump(t_expr, key_expr, amount_expr, type) ({ __typeof(key_expr) key = key_expr; \
+ Table_t *t = t_expr; \
+ __typeof(amount_expr) *val = Table$get_raw(*t, &key, type); \
+ if (val) *val += amount_expr; \
+ else { __typeof(amount_expr) init = amount_expr; Table$set(t, &key, &init, type); } (void)0; })
+
+void Table$remove(Table_t *t, const void *key, const TypeInfo_t *type);
+#define Table$remove_value(t, key_expr, type) ({ __typeof(key_expr) k = key_expr; Table$remove(t, &k, type); })
+
+Table_t Table$overlap(Table_t a, Table_t b, const TypeInfo_t *type);
+Table_t Table$with(Table_t a, Table_t b, const TypeInfo_t *type);
+Table_t Table$without(Table_t a, Table_t b, const TypeInfo_t *type);
+PUREFUNC bool Table$is_subset_of(Table_t a, Table_t b, bool strict, const TypeInfo_t *type);
+PUREFUNC bool Table$is_superset_of(Table_t a, Table_t b, bool strict, const TypeInfo_t *type);
+
+void Table$clear(Table_t *t);
+Table_t Table$sorted(Table_t t, const TypeInfo_t *type);
+void Table$mark_copy_on_write(Table_t *t);
+#define TABLE_INCREF(t) ({ ARRAY_INCREF((t).entries); if ((t).bucket_info) (t).bucket_info->data_refcount += ((t).bucket_info->data_refcount < TABLE_MAX_DATA_REFCOUNT); })
+#define TABLE_COPY(t) ({ TABLE_INCREF(t); t; })
+PUREFUNC int32_t Table$compare(const void *x, const void *y, const TypeInfo_t *type);
+PUREFUNC bool Table$equal(const void *x, const void *y, const TypeInfo_t *type);
+PUREFUNC uint64_t Table$hash(const void *t, const TypeInfo_t *type);
+Text_t Table$as_text(const void *t, bool colorize, const TypeInfo_t *type);
+PUREFUNC bool Table$is_none(const void *obj, const TypeInfo_t*);
+
+CONSTFUNC void *Table$str_entry(Table_t t, int64_t n);
+PUREFUNC void *Table$str_get(Table_t t, const char *key);
+PUREFUNC void *Table$str_get_raw(Table_t t, const char *key);
+void Table$str_set(Table_t *t, const char *key, const void *value);
+void *Table$str_reserve(Table_t *t, const char *key, const void *value);
+void Table$str_remove(Table_t *t, const char *key);
+void Table$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *type);
+void Table$deserialize(FILE *in, void *outval, Array_t *pointers, const TypeInfo_t *type);
+
+#define Table$length(t) ((t).entries.length)
+
+extern const TypeInfo_t CStrToVoidStarTable;
+
+#define Table$metamethods { \
+ .as_text=Table$as_text, \
+ .compare=Table$compare, \
+ .equal=Table$equal, \
+ .hash=Table$hash, \
+ .is_none=Table$is_none, \
+ .serialize=Table$serialize, \
+ .deserialize=Table$deserialize, \
+}
+
+#define Table$info(key_expr, value_expr) &((TypeInfo_t){.size=sizeof(Table_t), .align=__alignof__(Table_t), \
+ .tag=TableInfo, .TableInfo.key=key_expr, .TableInfo.value=value_expr, .metamethods=Table$metamethods})
+#define Set$info(item_info) &((TypeInfo_t){.size=sizeof(Table_t), .align=__alignof__(Table_t), \
+ .tag=TableInfo, .TableInfo.key=item_info, .TableInfo.value=&Void$info, .metamethods=Table$metamethods})
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/src/stdlib/text.c b/src/stdlib/text.c
new file mode 100644
index 00000000..bfaa0581
--- /dev/null
+++ b/src/stdlib/text.c
@@ -0,0 +1,1499 @@
+// This file defines type info and methods for the Text datatype, which uses
+// libunistr for Unicode support and implements a datastructure based on a
+// hybrid of Raku/MoarVM's space-efficient grapheme cluster representation of
+// strings and Cords (Boehm et al), which have good runtime performance for
+// text constructed by a series of many concatenations.
+//
+// For more information on MoarVM's grapheme cluster strings, see:
+// https://docs.raku.org/language/unicode
+// https://github.com/MoarVM/MoarVM/blob/main/docs/strings.asciidoc For more
+// information on Cords, see the paper "Ropes: an Alternative to Strings"
+// (Boehm, Atkinson, Plass 1995):
+// https://www.cs.tufts.edu/comp/150FP/archive/hans-boehm/ropes.pdf
+//
+// A note on grapheme clusters: In Unicode, codepoints can be represented using
+// a 32-bit integer. Most codepoints correspond to the intuitive notion of a
+// "letter", which is more formally known as a "grapheme cluster". A grapheme
+// cluster is roughly speaking the amount of text that your cursor moves over
+// when you press the arrow key once. However, some codepoints act as modifiers
+// on other codepoints. For example, U+0301 (COMBINING ACUTE ACCENT) can modify
+// a letter like "e" to form "é". During normalization, this frequently
+// resolves down to a single unicode codepoint, in this case, "é" resolves to
+// the single codepoint U+00E9 (LATIN SMALL LETTER E WITH ACUTE). However, in
+// some cases, multiple codepoints make up a grapheme cluster but *don't*
+// normalize to a single codepoint. For example, LATIN SMALL LETTER E (U+0065)
+// + COMBINING VERTICAL LINE BELOW (U+0329) combine to form an unusual glyph
+// that is not used frequently enough to warrant its own unique codepoint (this
+// is basically what Zalgo text is).
+//
+// There are a lot of benefits to storing unicode text with one grapheme
+// cluster per index in a densely packed array instead of storing the text as
+// variable-width UTF8-encoded bytes. It lets us have one canonical length for
+// the text that can be precomputed and is meaningful to users. It lets us
+// quickly get the Nth "letter" in the text. Substring slicing is fast.
+// However, since not all grapheme clusters take up the same number of
+// codepoints, we're faced with the problem of how to jam multiple codepoints
+// into a single 32-bit slot. Inspired by Raku and MoarVM's approach, this
+// implementation uses "synthetic graphemes" (in Raku's terms, Normal Form
+// Graphemes, aka NFG). A synthetic grapheme is a negative 32-bit signed
+// integer that represents a multi-codepoint grapheme cluster that has been
+// encountered during the program's runtime. These clusters are stored in a
+// lookup array and hash map so that we can rapidly convert between the
+// synthetic grapheme integer ID and the unicode codepoints associated with it.
+// Essentially, it's like we create a supplement to the unicode standard with
+// things that would be nice if they had their own codepoint so things worked
+// out nicely because we're using them right now, and we'll give them a
+// negative number so it doesn't overlap with any real codepoints.
+//
+// Example 1: U+0048, U+00E9 AKA: LATIN CAPITAL LETTER H, LATIN SMALL LETTER E
+// WITH ACUTE This would be stored as: (int32_t[]){0x48, 0xE9} Example 2:
+// U+0048, U+0065, U+0309 AKA: LATIN CAPITAL LETTER H, LATIN SMALL LETTER E,
+// COMBINING VERTICAL LINE BELOW This would be stored as: (int32_t[]){0x48, -2}
+// Where -2 is used as a lookup in an array that holds the actual unicode
+// codepoints: (ucs4_t[]){0x65, 0x0309}
+
+#include <assert.h>
+#include <ctype.h>
+#include <gc.h>
+#include <printf.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/param.h>
+
+#include <unicase.h>
+#include <unictype.h>
+#include <unigbrk.h>
+#include <uniname.h>
+#include <unistr.h>
+#include <unistring/version.h>
+#include <uniwidth.h>
+
+#include "arrays.h"
+#include "integers.h"
+#include "patterns.h"
+#include "tables.h"
+#include "text.h"
+
+// Use inline version of the siphash code for performance:
+#include "siphash.h"
+#include "siphash-internals.h"
+
+typedef struct {
+ ucs4_t main_codepoint;
+ ucs4_t *utf32_cluster; // length-prefixed
+ const uint8_t *utf8;
+} synthetic_grapheme_t;
+
+// Synthetic grapheme clusters (clusters of more than one codepoint):
+static Table_t grapheme_ids_by_codepoints = {}; // ucs4_t* length-prefixed codepoints -> int32_t ID
+
+// This will hold a dynamically growing array of synthetic graphemes:
+static synthetic_grapheme_t *synthetic_graphemes = NULL;
+static int32_t synthetic_grapheme_capacity = 0;
+static int32_t num_synthetic_graphemes = 0;
+
+#define NUM_GRAPHEME_CODEPOINTS(id) (synthetic_graphemes[-(id)-1].utf32_cluster[0])
+#define GRAPHEME_CODEPOINTS(id) (&synthetic_graphemes[-(id)-1].utf32_cluster[1])
+#define GRAPHEME_UTF8(id) (synthetic_graphemes[-(id)-1].utf8)
+
+// Somewhat arbitrarily chosen, if two short literal ASCII or grapheme chunks
+// are concatenated below this length threshold, we just merge them into a
+// single literal node instead of a concatenation node.
+#define SHORT_ASCII_LENGTH 64
+#define SHORT_GRAPHEMES_LENGTH 16
+
+static Text_t text_from_u32(ucs4_t *codepoints, int64_t num_codepoints, bool normalize);
+static Text_t simple_concatenation(Text_t a, Text_t b);
+
+public Text_t EMPTY_TEXT = {
+ .length=0,
+ .tag=TEXT_ASCII,
+ .ascii=0,
+};
+
+PUREFUNC static bool graphemes_equal(const void *va, const void *vb, const TypeInfo_t*) {
+ ucs4_t *a = *(ucs4_t**)va;
+ ucs4_t *b = *(ucs4_t**)vb;
+ if (a[0] != b[0]) return false;
+ for (int i = 0; i < (int)a[0]; i++)
+ if (a[i] != b[i]) return false;
+ return true;
+}
+
+PUREFUNC static uint64_t grapheme_hash(const void *g, const TypeInfo_t*) {
+ ucs4_t *cluster = *(ucs4_t**)g;
+ return siphash24((void*)&cluster[1], sizeof(ucs4_t[cluster[0]]));
+}
+
+static const TypeInfo_t GraphemeClusterInfo = {
+ .size=sizeof(ucs4_t*),
+ .align=__alignof__(ucs4_t*),
+ .metamethods={
+ .equal=graphemes_equal,
+ .hash=grapheme_hash,
+ },
+};
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstack-protector"
+public int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_len)
+{
+ ucs4_t length_prefixed[1+utf32_len];
+ length_prefixed[0] = (ucs4_t)utf32_len;
+ for (int i = 0; i < utf32_len; i++)
+ length_prefixed[i+1] = codepoints[i];
+ ucs4_t *ptr = &length_prefixed[0];
+
+ // Optimization for common case of one frequently used synthetic grapheme:
+ static int32_t last_grapheme = 0;
+ if (last_grapheme != 0 && graphemes_equal(&ptr, &synthetic_graphemes[-last_grapheme-1].utf32_cluster, NULL))
+ return last_grapheme;
+
+ TypeInfo_t GraphemeIDLookupTableInfo = *Table$info(&GraphemeClusterInfo, &Int32$info);
+ int32_t *found = Table$get(grapheme_ids_by_codepoints, &ptr, &GraphemeIDLookupTableInfo);
+ if (found) return *found;
+
+ // New synthetic grapheme:
+ if (num_synthetic_graphemes >= synthetic_grapheme_capacity) {
+ // If we don't have space, allocate more:
+ synthetic_grapheme_capacity = MAX(128, synthetic_grapheme_capacity * 2);
+ synthetic_grapheme_t *new = GC_MALLOC_ATOMIC(sizeof(synthetic_grapheme_t[synthetic_grapheme_capacity]));
+ memcpy(new, synthetic_graphemes, sizeof(synthetic_grapheme_t[num_synthetic_graphemes]));
+ synthetic_graphemes = new;
+ }
+
+ int32_t grapheme_id = -(num_synthetic_graphemes+1);
+ num_synthetic_graphemes += 1;
+
+ // Get UTF8 representation:
+ uint8_t u8_buf[64];
+ size_t u8_len = sizeof(u8_buf)/sizeof(u8_buf[0]);
+ uint8_t *u8 = u32_to_u8(codepoints, (size_t)utf32_len, u8_buf, &u8_len);
+
+ // For performance reasons, use an arena allocator here to ensure that
+ // synthetic graphemes store all of their information in a densely packed
+ // area with good cache locality:
+ static void *arena = NULL, *arena_end = NULL;
+ // Eat up any space needed to make arena 32-bit aligned:
+ if ((size_t)arena % __alignof__(ucs4_t) != 0)
+ arena += __alignof__(ucs4_t) - ((size_t)arena % __alignof__(ucs4_t));
+
+ // If we have filled up this arena, allocate a new one:
+ size_t needed_memory = sizeof(ucs4_t[1+utf32_len]) + sizeof(uint8_t[u8_len + 1]);
+ if (arena + needed_memory > arena_end) {
+ // Do reasonably big chunks at a time, so most synthetic codepoints are
+ // nearby each other in memory and cache locality is good. This is a
+ // rough guess at a good size:
+ size_t chunk_size = MAX(needed_memory, 512);
+ arena = GC_MALLOC_ATOMIC(chunk_size);
+ arena_end = arena + chunk_size;
+ }
+
+ // Copy length-prefixed UTF32 codepoints into the arena and store where they live:
+ ucs4_t *codepoint_copy = arena;
+ mempcpy(codepoint_copy, length_prefixed, sizeof(ucs4_t[1+utf32_len]));
+ synthetic_graphemes[-grapheme_id-1].utf32_cluster = codepoint_copy;
+ arena += sizeof(ucs4_t[1+utf32_len]);
+
+ // Copy UTF8 bytes into the arena and store where they live:
+ uint8_t *utf8_final = arena;
+ memcpy(utf8_final, u8, sizeof(uint8_t[u8_len]));
+ utf8_final[u8_len] = '\0'; // Add a terminating NUL byte
+ synthetic_graphemes[-grapheme_id-1].utf8 = utf8_final;
+ arena += sizeof(uint8_t[u8_len + 1]);
+
+ // Sickos at the unicode consortium decreed that you can have grapheme clusters
+ // that begin with *prefix* modifiers, so we gotta check for that case:
+ synthetic_graphemes[-grapheme_id-1].main_codepoint = length_prefixed[1];
+ for (ucs4_t i = 0; i < utf32_len; i++) {
+#if _LIBUNISTRING_VERSION >= 0x010200
+// libuinstring version 1.2.0 introduced uc_is_property_prepended_concatenation_mark()
+// It's not critical, but it's technically more correct to have this check:
+ if (unlikely(uc_is_property_prepended_concatenation_mark(length_prefixed[1+i])))
+ continue;
+#endif
+ synthetic_graphemes[-grapheme_id-1].main_codepoint = length_prefixed[1+i];
+ break;
+ }
+
+ // Cleanup from unicode API:
+ if (u8 != u8_buf) free(u8);
+
+ Table$set(&grapheme_ids_by_codepoints, &codepoint_copy, &grapheme_id, &GraphemeIDLookupTableInfo);
+
+ last_grapheme = grapheme_id;
+ return grapheme_id;
+}
+#pragma GCC diagnostic pop
+
+int text_visualize(FILE *stream, Text_t t, int depth)
+{
+ switch (t.tag) {
+ case TEXT_ASCII: return fprintf(stream, "<ascii length=%ld>%.*s</ascii>", t.length, t.length, t.ascii);
+ case TEXT_GRAPHEMES: {
+ int printed = fprintf(stream, "<graphemes length=%ld>", t.length);
+ printed += Text$print(stream, t);
+ printed += fprintf(stream, "</graphemes>");
+ return printed;
+ }
+ case TEXT_CONCAT: {
+ int printed = fprintf(stream, "<concat depth=%ld length=%ld>\n", t.depth, t.length);
+ for (int i = 0; i < depth+1; i++)
+ printed += fputc(' ', stream);
+ printed += text_visualize(stream, *t.left, depth+1);
+ printed += fputc('\n', stream);
+ for (int i = 0; i < depth+1; i++)
+ printed += fputc(' ', stream);
+ printed += text_visualize(stream, *t.right, depth+1);
+ printed += fputc('\n', stream);
+ for (int i = 0; i < depth; i++)
+ printed += fputc(' ', stream);
+ printed += fprintf(stream, "</concat>");
+ return printed;
+ }
+ default: return 0;
+ }
+}
+
+public int Text$print(FILE *stream, Text_t t)
+{
+ if (t.length == 0) return 0;
+
+ switch (t.tag) {
+ case TEXT_ASCII: return fwrite(t.ascii, sizeof(char), (size_t)t.length, stream);
+ case TEXT_GRAPHEMES: {
+ const int32_t *graphemes = t.graphemes;
+ int written = 0;
+ for (int64_t i = 0; i < t.length; i++) {
+ int32_t grapheme = graphemes[i];
+ if (grapheme >= 0) {
+ uint8_t buf[8];
+ size_t len = sizeof(buf);
+ uint8_t *u8 = u32_to_u8((ucs4_t*)&grapheme, 1, buf, &len);
+ written += (int)fwrite(u8, sizeof(char), len, stream);
+ if (u8 != buf) free(u8);
+ } else {
+ const uint8_t *u8 = GRAPHEME_UTF8(grapheme);
+ assert(u8);
+ written += (int)fwrite(u8, sizeof(uint8_t), strlen((char*)u8), stream);
+ }
+ }
+ return written;
+ }
+ case TEXT_CONCAT: {
+ return (Text$print(stream, *t.left)
+ + Text$print(stream, *t.right));
+ }
+ default: return 0;
+ }
+}
+
+static const int64_t min_len_for_depth[MAX_TEXT_DEPTH] = {
+ // Fibonacci numbers (skipping first two)
+ 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, 10946,
+ 17711, 28657, 46368, 75025, 121393, 196418, 317811, 514229, 832040, 1346269, 2178309, 3524578,
+ 5702887, 9227465, 14930352, 24157817, 39088169, 63245986, 102334155, 165580141, 267914296,
+ 433494437, 701408733, 1134903170, 1836311903, 2971215073, 4807526976, 7778742049,
+};
+
+#define IS_BALANCED_TEXT(t) ((t).length >= min_len_for_depth[(t).depth])
+
+static void insert_balanced(Text_t balanced_texts[MAX_TEXT_DEPTH], Text_t to_insert)
+{
+ int i = 0;
+ Text_t accumulator = EMPTY_TEXT;
+ for (; to_insert.length > min_len_for_depth[i + 1]; i++) {
+ if (balanced_texts[i].length) {
+ accumulator = simple_concatenation(balanced_texts[i], accumulator);
+ balanced_texts[i] = EMPTY_TEXT;
+ }
+ }
+
+ accumulator = simple_concatenation(accumulator, to_insert);
+
+ while (accumulator.length >= min_len_for_depth[i]) {
+ if (balanced_texts[i].length) {
+ accumulator = simple_concatenation(balanced_texts[i], accumulator);
+ balanced_texts[i] = EMPTY_TEXT;
+ }
+ i++;
+ }
+ i--;
+ balanced_texts[i] = accumulator;
+}
+
+static void insert_balanced_recursive(Text_t balanced_texts[MAX_TEXT_DEPTH], Text_t text)
+{
+ if (text.tag == TEXT_CONCAT && (!IS_BALANCED_TEXT(text) || text.depth >= MAX_TEXT_DEPTH)) {
+ insert_balanced_recursive(balanced_texts, *text.left);
+ insert_balanced_recursive(balanced_texts, *text.right);
+ } else {
+ insert_balanced(balanced_texts, text);
+ }
+}
+
+static Text_t rebalanced(Text_t a, Text_t b)
+{
+ Text_t balanced_texts[MAX_TEXT_DEPTH];
+ memset(balanced_texts, 0, sizeof(balanced_texts));
+ insert_balanced_recursive(balanced_texts, a);
+ insert_balanced_recursive(balanced_texts, b);
+
+ Text_t ret = EMPTY_TEXT;
+ for (int i = 0; ret.length < a.length + b.length; i++) {
+ if (balanced_texts[i].length)
+ ret = simple_concatenation(balanced_texts[i], ret);
+ }
+ return ret;
+}
+
+Text_t simple_concatenation(Text_t a, Text_t b)
+{
+ if (a.length == 0) return b;
+ if (b.length == 0) return a;
+
+ uint16_t new_depth = 1 + MAX(a.depth, b.depth);
+ // Rebalance only if depth exceeds the maximum allowed. We don't require
+ // every concatenation to yield a balanced text, since many concatenations
+ // are ephemeral (e.g. doing a loop repeatedly concatenating without using
+ // the intermediary values).
+ if (new_depth >= MAX_TEXT_DEPTH)
+ return rebalanced(a, b);
+
+ Text_t *children = GC_MALLOC(sizeof(Text_t[2]));
+ children[0] = a;
+ children[1] = b;
+ return (Text_t){
+ .tag=TEXT_CONCAT,
+ .length=a.length + b.length,
+ .depth=new_depth,
+ .left=&children[0],
+ .right=&children[1],
+ };
+}
+
+static Text_t concat2_assuming_safe(Text_t a, Text_t b)
+{
+ if (a.length == 0) return b;
+ if (b.length == 0) return a;
+
+ if (a.tag == TEXT_ASCII && b.tag == TEXT_ASCII && (size_t)(a.length + b.length) <= SHORT_ASCII_LENGTH) {
+ struct Text_s ret = {
+ .tag=TEXT_ASCII,
+ .length=a.length + b.length,
+ };
+ ret.ascii = GC_MALLOC_ATOMIC(sizeof(char[ret.length]));
+ memcpy((char*)ret.ascii, a.ascii, sizeof(char[a.length]));
+ memcpy((char*)&ret.ascii[a.length], b.ascii, sizeof(char[b.length]));
+ return ret;
+ } else if (a.tag == TEXT_GRAPHEMES && b.tag == TEXT_GRAPHEMES && (size_t)(a.length + b.length) <= SHORT_GRAPHEMES_LENGTH) {
+ struct Text_s ret = {
+ .tag=TEXT_GRAPHEMES,
+ .length=a.length + b.length,
+ };
+ ret.graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[ret.length]));
+ memcpy((int32_t*)ret.graphemes, a.graphemes, sizeof(int32_t[a.length]));
+ memcpy((int32_t*)&ret.graphemes[a.length], b.graphemes, sizeof(int32_t[b.length]));
+ return ret;
+ } else if (a.tag != TEXT_CONCAT && b.tag != TEXT_CONCAT && (size_t)(a.length + b.length) <= SHORT_GRAPHEMES_LENGTH) {
+ // Turn a small bit of ASCII into graphemes if it helps make things smaller
+ // Text structs come with an extra 8 bytes, so allocate enough to hold the text
+ struct Text_s ret = {
+ .tag=TEXT_GRAPHEMES,
+ .length=a.length + b.length,
+ };
+ ret.graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[ret.length]));
+ int32_t *dest = (int32_t*)ret.graphemes;
+ if (a.tag == TEXT_GRAPHEMES) {
+ dest = mempcpy(dest, a.graphemes, sizeof(int32_t[a.length]));
+ } else {
+ for (int64_t i = 0; i < a.length; i++)
+ *(dest++) = (int32_t)a.ascii[i];
+ }
+ if (b.tag == TEXT_GRAPHEMES) {
+ memcpy(dest, b.graphemes, sizeof(int32_t[b.length]));
+ } else {
+ for (int64_t i = 0; i < b.length; i++)
+ *(dest++) = (int32_t)b.ascii[i];
+ }
+ return ret;
+ }
+
+ if (a.tag == TEXT_CONCAT && b.tag != TEXT_CONCAT && a.right->tag != TEXT_CONCAT)
+ return concat2_assuming_safe(*a.left, concat2_assuming_safe(*a.right, b));
+
+ return simple_concatenation(a, b);
+}
+
+static Text_t concat2(Text_t a, Text_t b)
+{
+ if (a.length == 0) return b;
+ if (b.length == 0) return a;
+
+ int32_t last_a = Text$get_grapheme(a, a.length-1);
+ int32_t first_b = Text$get_grapheme(b, 0);
+
+ // Magic number, we know that no codepoints below here trigger instability:
+ static const int32_t LOWEST_CODEPOINT_TO_CHECK = 0x300; // COMBINING GRAVE ACCENT
+ if (last_a >= 0 && last_a < LOWEST_CODEPOINT_TO_CHECK && first_b >= 0 && first_b < LOWEST_CODEPOINT_TO_CHECK)
+ return concat2_assuming_safe(a, b);
+
+ size_t len = (last_a >= 0) ? 1 : NUM_GRAPHEME_CODEPOINTS(last_a);
+ len += (first_b >= 0) ? 1 : NUM_GRAPHEME_CODEPOINTS(first_b);
+
+ ucs4_t codepoints[len];
+ ucs4_t *dest = codepoints;
+ if (last_a < 0)
+ dest = mempcpy(dest, GRAPHEME_CODEPOINTS(last_a), sizeof(ucs4_t[NUM_GRAPHEME_CODEPOINTS(last_a)]));
+ else
+ *(dest++) = (ucs4_t)last_a;
+
+ if (first_b < 0)
+ dest = mempcpy(dest, GRAPHEME_CODEPOINTS(first_b), sizeof(ucs4_t[NUM_GRAPHEME_CODEPOINTS(first_b)]));
+ else
+ *(dest++) = (ucs4_t)first_b;
+
+ // Do a normalization run for these two codepoints and see if it looks different.
+ // Normalization should not exceed 3x in the input length (but if it does, it will be
+ // handled gracefully)
+ ucs4_t norm_buf[3*len];
+ size_t norm_length = sizeof(norm_buf)/sizeof(norm_buf[0]);
+ ucs4_t *normalized = u32_normalize(UNINORM_NFC, codepoints, len, norm_buf, &norm_length);
+ bool stable = (norm_length == len && memcmp(codepoints, normalized, sizeof(codepoints)) == 0);
+
+ if (stable) {
+ const void *second_grapheme = u32_grapheme_next(normalized, &normalized[norm_length]);
+ if (second_grapheme == &normalized[norm_length])
+ stable = false;
+ }
+
+ if likely (stable) {
+ if (normalized != norm_buf)
+ free(normalized);
+ return concat2_assuming_safe(a, b);
+ }
+
+ Text_t glue = text_from_u32(norm_buf, (int64_t)norm_length, false);
+
+ if (normalized != norm_buf)
+ free(normalized);
+
+ if (a.length == 1 && b.length == 1)
+ return glue;
+ else if (a.length == 1)
+ return concat2_assuming_safe(glue, Text$slice(b, I(2), I(b.length)));
+ else if (b.length == 1)
+ return concat2_assuming_safe(Text$slice(a, I(1), I(a.length-1)), glue);
+ else
+ return concat2_assuming_safe(
+ concat2_assuming_safe(Text$slice(a, I(1), I(a.length-1)), glue),
+ Text$slice(b, I(2), I(b.length)));
+}
+
+public Text_t Text$_concat(int n, Text_t items[n])
+{
+ if (n == 0) return EMPTY_TEXT;
+
+ Text_t ret = items[0];
+ for (int i = 1; i < n; i++) {
+ if (items[i].length > 0)
+ ret = concat2(ret, items[i]);
+ }
+ return ret;
+}
+
+public Text_t Text$repeat(Text_t text, Int_t count)
+{
+ if (text.length == 0 || Int$is_negative(count))
+ return EMPTY_TEXT;
+
+ Int_t result_len = Int$times(count, I(text.length));
+ if (Int$compare_value(result_len, I(1l<<40)) > 0)
+ fail("Text repeating would produce too big of an result!");
+
+ int64_t count64 = Int64$from_int(count, false);
+ Text_t ret = text;
+ for (int64_t c = 1; c < count64; c++)
+ ret = concat2(ret, text);
+ return ret;
+}
+
+public Int_t Text$width(Text_t text, Text_t language)
+{
+ int width = u8_strwidth((const uint8_t*)Text$as_c_string(text), Text$as_c_string(language));
+ return Int$from_int32(width);
+}
+
+static Text_t Text$repeat_to_width(Text_t to_repeat, int64_t target_width, Text_t language)
+{
+ if (target_width <= 0)
+ return EMPTY_TEXT;
+
+ const char *lang_str = Text$as_c_string(language);
+ int64_t width = (int64_t)u8_strwidth((const uint8_t*)Text$as_c_string(to_repeat), lang_str);
+ Text_t repeated = EMPTY_TEXT;
+ int64_t repeated_width = 0;
+ while (repeated_width + width <= target_width) {
+ repeated = concat2(repeated, to_repeat);
+ repeated_width += width;
+ }
+
+ if (repeated_width < target_width) {
+ for (int64_t i = 0; repeated_width < target_width && i < to_repeat.length; i++) {
+ Text_t c = Text$slice(to_repeat, I_small(i+1), I_small(i+1));
+ int64_t w = (int64_t)u8_strwidth((const uint8_t*)Text$as_c_string(c), lang_str);
+ if (repeated_width + w > target_width) {
+ repeated = concat2(repeated, Text$repeat(Text(" "), I(target_width - repeated_width)));
+ repeated_width = target_width;
+ break;
+ }
+ repeated = concat2(repeated, c);
+ repeated_width += w;
+ }
+ }
+
+ return repeated;
+}
+
+public Text_t Text$left_pad(Text_t text, Int_t width, Text_t padding, Text_t language)
+{
+ if (padding.length == 0)
+ fail("Cannot pad with an empty text!");
+
+ int64_t needed = Int64$from_int(width, false) - Int64$from_int(Text$width(text, language), false);
+ return concat2(Text$repeat_to_width(padding, needed, language), text);
+}
+
+public Text_t Text$right_pad(Text_t text, Int_t width, Text_t padding, Text_t language)
+{
+ if (padding.length == 0)
+ fail("Cannot pad with an empty text!");
+
+ int64_t needed = Int64$from_int(width, false) - Int64$from_int(Text$width(text, language), false);
+ return concat2(text, Text$repeat_to_width(padding, needed, language));
+}
+
+public Text_t Text$middle_pad(Text_t text, Int_t width, Text_t padding, Text_t language)
+{
+ if (padding.length == 0)
+ fail("Cannot pad with an empty text!");
+
+ int64_t needed = Int64$from_int(width, false) - Int64$from_int(Text$width(text, language), false);
+ return Texts(Text$repeat_to_width(padding, needed/2, language), text, Text$repeat_to_width(padding, (needed+1)/2, language));
+}
+
+public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int)
+{
+ int64_t first = Int64$from_int(first_int, false);
+ int64_t last = Int64$from_int(last_int, false);
+ if (first == 0) fail("Invalid index: 0");
+ if (last == 0) return EMPTY_TEXT;
+
+ if (first < 0) first = text.length + first + 1;
+ if (last < 0) last = text.length + last + 1;
+
+ if (last > text.length) last = text.length;
+
+ if (first > text.length || last < first)
+ return EMPTY_TEXT;
+
+ if (first == 1 && last == text.length)
+ return text;
+
+ while (text.tag == TEXT_CONCAT) {
+ if (last < text.left->length) {
+ text = *text.left;
+ } else if (first > text.left->length) {
+ first -= text.left->length;
+ last -= text.left->length;
+ text = *text.right;
+ } else {
+ return concat2(Text$slice(*text.left, I(first), I(text.length)),
+ Text$slice(*text.right, I(1), I(last-text.left->length)));
+ }
+ }
+
+ switch (text.tag) {
+ case TEXT_ASCII: {
+ return (Text_t){
+ .tag=TEXT_ASCII,
+ .length=last - first + 1,
+ .ascii=text.ascii + (first-1),
+ };
+ }
+ case TEXT_GRAPHEMES: {
+ return (Text_t){
+ .tag=TEXT_GRAPHEMES,
+ .length=last - first + 1,
+ .graphemes=text.graphemes + (first-1),
+ };
+ }
+ default: errx(1, "Invalid tag");
+ }
+}
+
+public Text_t Text$from(Text_t text, Int_t first)
+{
+ return Text$slice(text, first, I_small(-1));
+}
+
+public Text_t Text$to(Text_t text, Int_t last)
+{
+ return Text$slice(text, I_small(1), last);
+}
+
+public Text_t Text$reversed(Text_t text)
+{
+ switch (text.tag) {
+ case TEXT_ASCII: {
+ struct Text_s ret = {
+ .tag=TEXT_ASCII,
+ .length=text.length,
+ };
+ ret.ascii = GC_MALLOC_ATOMIC(sizeof(char[ret.length]));
+ for (int64_t i = 0; i < text.length; i++)
+ ((char*)ret.ascii)[text.length-1-i] = text.ascii[i];
+ return ret;
+ }
+ case TEXT_GRAPHEMES: {
+ struct Text_s ret = {
+ .tag=TEXT_GRAPHEMES,
+ .length=text.length,
+ };
+ ret.graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[ret.length]));
+ for (int64_t i = 0; i < text.length; i++)
+ ((int32_t*)ret.graphemes)[text.length-1-i] = text.graphemes[i];
+ return ret;
+ }
+ case TEXT_CONCAT: {
+ return concat2(Text$reversed(*text.right), Text$reversed(*text.left));
+ }
+ default: errx(1, "Invalid tag");
+ }
+}
+
+public PUREFUNC Text_t Text$cluster(Text_t text, Int_t index_int)
+{
+ int64_t index = Int64$from_int(index_int, false);
+ if (index == 0) fail("Invalid index: 0");
+
+ if (index < 0) index = text.length + index + 1;
+
+ if (index > text.length || index < 1)
+ fail("Invalid index: %ld is beyond the length of the text (length = %ld)",
+ Int64$from_int(index_int, false), text.length);
+
+ while (text.tag == TEXT_CONCAT) {
+ if (index <= text.left->length)
+ text = *text.left;
+ else
+ text = *text.right;
+ }
+
+ switch (text.tag) {
+ case TEXT_ASCII: {
+ struct Text_s ret = {
+ .tag=TEXT_ASCII,
+ .length=1,
+ .ascii=GC_MALLOC_ATOMIC(sizeof(char)),
+ };
+ *(char*)&ret.ascii[0] = text.ascii[index-1];
+ return ret;
+ }
+ case TEXT_GRAPHEMES: {
+ struct Text_s ret = {
+ .tag=TEXT_GRAPHEMES,
+ .length=1,
+ .graphemes=GC_MALLOC_ATOMIC(sizeof(int32_t)),
+ };
+ *(int32_t*)&ret.graphemes[0] = text.graphemes[index-1];
+ return ret;
+ }
+ default: errx(1, "Invalid tag");
+ }
+}
+
+Text_t text_from_u32(ucs4_t *codepoints, int64_t num_codepoints, bool normalize)
+{
+ // Normalization is apparently guaranteed to never exceed 3x in the input length
+ ucs4_t norm_buf[MIN(256, 3*num_codepoints)];
+ if (normalize) {
+ size_t norm_length = sizeof(norm_buf)/sizeof(norm_buf[0]);
+ ucs4_t *normalized = u32_normalize(UNINORM_NFC, codepoints, (size_t)num_codepoints, norm_buf, &norm_length);
+ codepoints = normalized;
+ num_codepoints = (int64_t)norm_length;
+ }
+
+ // Intentionally overallocate here: allocate assuming each codepoint is a
+ // grapheme cluster. If that's not true, we'll have extra space at the end
+ // of the array, but the length will still be calculated correctly.
+ int32_t *graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[num_codepoints]));
+ struct Text_s ret = {
+ .tag=TEXT_GRAPHEMES,
+ .length=0,
+ .graphemes=graphemes,
+ };
+ const ucs4_t *src = codepoints;
+ while (src < &codepoints[num_codepoints]) {
+ // TODO: use grapheme breaks instead of u32_grapheme_next()?
+ const ucs4_t *next = u32_grapheme_next(src, &codepoints[num_codepoints]);
+ if (next == &src[1]) {
+ graphemes[ret.length] = (int32_t)*src;
+ } else {
+ // Synthetic grapheme
+ graphemes[ret.length] = get_synthetic_grapheme(src, next-src);
+ }
+ ++ret.length;
+ src = next;
+ }
+ if (normalize && codepoints != norm_buf) free(codepoints);
+ return ret;
+}
+
+public OptionalText_t Text$from_strn(const char *str, size_t len)
+{
+ int64_t ascii_span = 0;
+ for (size_t i = 0; i < len && isascii(str[i]); i++)
+ ascii_span++;
+
+ if (ascii_span == (int64_t)len) { // All ASCII
+ char *copy = GC_MALLOC_ATOMIC(len);
+ memcpy(copy, str, len);
+ return (Text_t){
+ .tag=TEXT_ASCII,
+ .length=ascii_span,
+ .ascii=copy,
+ };
+ } else {
+ if (u8_check((uint8_t*)str, len) != NULL)
+ return NONE_TEXT;
+
+ ucs4_t buf[128];
+ size_t length = sizeof(buf)/sizeof(buf[0]);
+
+ ucs4_t *codepoints = u8_to_u32((uint8_t*)str, (size_t)ascii_span + strlen(str + ascii_span), buf, &length);
+ Text_t ret = text_from_u32(codepoints, (int64_t)length, true);
+ if (codepoints != buf) free(codepoints);
+ return ret;
+ }
+}
+
+public OptionalText_t Text$from_str(const char *str)
+{
+ return str ? Text$from_strn(str, strlen(str)) : Text("");
+}
+
+static void u8_buf_append(Text_t text, char **buf, int64_t *capacity, int64_t *i)
+{
+ switch (text.tag) {
+ case TEXT_ASCII: {
+ if (*i + text.length > (int64_t)*capacity) {
+ *capacity = *i + text.length + 1;
+ *buf = GC_REALLOC(*buf, (size_t)*capacity);
+ }
+
+ const char *bytes = text.ascii;
+ memcpy(*buf + *i, bytes, (size_t)text.length);
+ *i += text.length;
+ break;
+ }
+ case TEXT_GRAPHEMES: {
+ const int32_t *graphemes = text.graphemes;
+ for (int64_t g = 0; g < text.length; g++) {
+ if (graphemes[g] >= 0) {
+ uint8_t u8_buf[64];
+ size_t u8_len = sizeof(u8_buf);
+ uint8_t *u8 = u32_to_u8((ucs4_t*)&graphemes[g], 1, u8_buf, &u8_len);
+
+ if (*i + (int64_t)u8_len > (int64_t)*capacity) {
+ *capacity = *i + (int64_t)u8_len + 1;
+ *buf = GC_REALLOC(*buf, (size_t)*capacity);
+ }
+
+ memcpy(*buf + *i, u8, u8_len);
+ *i += (int64_t)u8_len;
+ if (u8 != u8_buf) free(u8);
+ } else {
+ const uint8_t *u8 = GRAPHEME_UTF8(graphemes[g]);
+ size_t u8_len = u8_strlen(u8);
+ if (*i + (int64_t)u8_len > (int64_t)*capacity) {
+ *capacity = *i + (int64_t)u8_len + 1;
+ *buf = GC_REALLOC(*buf, (size_t)*capacity);
+ }
+
+ memcpy(*buf + *i, u8, u8_len);
+ *i += (int64_t)u8_len;
+ }
+ }
+ break;
+ }
+ case TEXT_CONCAT: {
+ u8_buf_append(*text.left, buf, capacity, i);
+ u8_buf_append(*text.right, buf, capacity, i);
+ break;
+ }
+ default: break;
+ }
+}
+
+public char *Text$as_c_string(Text_t text)
+{
+ int64_t capacity = text.length + 1;
+ char *buf = GC_MALLOC_ATOMIC((size_t)capacity);
+ int64_t i = 0;
+ u8_buf_append(text, &buf, &capacity, &i);
+
+ if (i + 1 > (int64_t)capacity) {
+ capacity = i + 1;
+ buf = GC_REALLOC(buf, (size_t)capacity);
+ }
+ buf[i] = '\0';
+ return buf;
+}
+
+PUREFUNC public uint64_t Text$hash(const void *obj, const TypeInfo_t*)
+{
+ Text_t text = *(Text_t*)obj;
+ siphash sh;
+ siphashinit(&sh, sizeof(int32_t[text.length]));
+
+ union {
+ int32_t chunks[2];
+ uint64_t whole;
+ } tmp;
+ switch (text.tag) {
+ case TEXT_ASCII: {
+ const char *bytes = text.ascii;
+ for (int64_t i = 0; i + 1 < text.length; i += 2) {
+ tmp.chunks[0] = (int32_t)bytes[i];
+ tmp.chunks[1] = (int32_t)bytes[i+1];
+ siphashadd64bits(&sh, tmp.whole);
+ }
+ int32_t last = text.length & 0x1 ? (int32_t)bytes[text.length-1] : 0; // Odd number of graphemes
+ return siphashfinish_last_part(&sh, (uint64_t)last);
+ }
+ case TEXT_GRAPHEMES: {
+ const int32_t *graphemes = text.graphemes;
+ for (int64_t i = 0; i + 1 < text.length; i += 2) {
+ tmp.chunks[0] = graphemes[i];
+ tmp.chunks[1] = graphemes[i];
+ siphashadd64bits(&sh, tmp.whole);
+ }
+ int32_t last = text.length & 0x1 ? graphemes[text.length-1] : 0; // Odd number of graphemes
+ return siphashfinish_last_part(&sh, (uint64_t)last);
+ }
+ case TEXT_CONCAT: {
+ TextIter_t state = NEW_TEXT_ITER_STATE(text);
+ for (int64_t i = 0; i < (text.length & ~0x1); i += 2) {
+ tmp.chunks[0] = Text$get_grapheme_fast(&state, i);
+ tmp.chunks[0] = Text$get_grapheme_fast(&state, i+1);
+ siphashadd64bits(&sh, tmp.whole);
+ }
+
+ int32_t last = (text.length & 0x1) ? Text$get_grapheme_fast(&state, text.length-1) : 0;
+ return siphashfinish_last_part(&sh, (uint64_t)last);
+ }
+ default: errx(1, "Invalid text");
+ }
+}
+
+public int32_t Text$get_grapheme_fast(TextIter_t *state, int64_t index)
+{
+ if (index < 0) return 0;
+ if (index >= state->stack[0].text.length) return 0;
+
+ assert(state->stack[0].text.depth <= MAX_TEXT_DEPTH);
+
+ // Go up the stack as needed:
+ while (index < state->stack[state->stack_index].offset
+ || index >= state->stack[state->stack_index].offset + state->stack[state->stack_index].text.length) {
+ state->stack_index -= 1;
+ assert(state->stack_index >= 0);
+ }
+
+ assert(state->stack_index >= 0 && state->stack_index <= MAX_TEXT_DEPTH);
+
+ // Go down the stack as needed:
+ while (state->stack[state->stack_index].text.tag == TEXT_CONCAT) {
+ Text_t text = state->stack[state->stack_index].text;
+ int64_t offset = state->stack[state->stack_index].offset;
+ assert(state->stack_index <= MAX_TEXT_DEPTH);
+ assert(index >= offset);
+ assert(index < offset + text.length);
+
+ state->stack_index += 1;
+ if (index < offset + text.left->length) {
+ state->stack[state->stack_index].text = *text.left;
+ state->stack[state->stack_index].offset = offset;
+ } else {
+ state->stack[state->stack_index].text = *text.right;
+ state->stack[state->stack_index].offset = offset + text.left->length;
+ }
+ assert(state->stack_index >= 0 && state->stack_index <= MAX_TEXT_DEPTH);
+ }
+
+ Text_t text = state->stack[state->stack_index].text;
+ int64_t offset = state->stack[state->stack_index].offset;
+
+ if (index < offset || index >= offset + text.length) {
+ return 0;
+ }
+
+ switch (text.tag) {
+ case TEXT_ASCII: return (int32_t)text.ascii[index - offset];
+ case TEXT_GRAPHEMES: return text.graphemes[index - offset];
+ default: errx(1, "Invalid text");
+ }
+ return 0;
+}
+
+public uint32_t Text$get_main_grapheme_fast(TextIter_t *state, int64_t index)
+{
+ int32_t g = Text$get_grapheme_fast(state, index);
+ return (g) >= 0 ? (ucs4_t)(g) : synthetic_graphemes[-(g)-1].main_codepoint;
+}
+
+PUREFUNC public int32_t Text$compare(const void *va, const void *vb, const TypeInfo_t*)
+{
+ if (va == vb) return 0;
+ const Text_t a = *(const Text_t*)va;
+ const Text_t b = *(const Text_t*)vb;
+
+ // TODO: make this smarter and more efficient
+ int64_t len = MAX(a.length, b.length);
+ TextIter_t a_state = NEW_TEXT_ITER_STATE(a), b_state = NEW_TEXT_ITER_STATE(b);
+ for (int64_t i = 0; i < len; i++) {
+ int32_t ai = Text$get_grapheme_fast(&a_state, i);
+ int32_t bi = Text$get_grapheme_fast(&b_state, i);
+ if (ai == bi) continue;
+ int32_t cmp;
+ if (ai > 0 && bi > 0) {
+ cmp = u32_cmp((ucs4_t*)&ai, (ucs4_t*)&bi, 1);
+ } else if (ai > 0) {
+ cmp = u32_cmp2(
+ (ucs4_t*)&ai, 1,
+ GRAPHEME_CODEPOINTS(bi),
+ NUM_GRAPHEME_CODEPOINTS(bi));
+ } else if (bi > 0) {
+ cmp = u32_cmp2(
+ GRAPHEME_CODEPOINTS(ai),
+ NUM_GRAPHEME_CODEPOINTS(ai),
+ (ucs4_t*)&bi, 1);
+ } else {
+ cmp = u32_cmp2(
+ GRAPHEME_CODEPOINTS(ai),
+ NUM_GRAPHEME_CODEPOINTS(ai),
+ GRAPHEME_CODEPOINTS(bi),
+ NUM_GRAPHEME_CODEPOINTS(bi));
+ }
+ if (cmp != 0) return cmp;
+ }
+ return 0;
+}
+
+PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix)
+{
+ if (text.length < prefix.length)
+ return false;
+ TextIter_t text_state = NEW_TEXT_ITER_STATE(text), prefix_state = NEW_TEXT_ITER_STATE(prefix);
+ for (int64_t i = 0; i < prefix.length; i++) {
+ int32_t text_i = Text$get_grapheme_fast(&text_state, i);
+ int32_t prefix_i = Text$get_grapheme_fast(&prefix_state, i);
+ if (text_i != prefix_i) return false;
+ }
+ return true;
+}
+
+PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix)
+{
+ if (text.length < suffix.length)
+ return false;
+ TextIter_t text_state = NEW_TEXT_ITER_STATE(text), suffix_state = NEW_TEXT_ITER_STATE(suffix);
+ for (int64_t i = 0; i < suffix.length; i++) {
+ int32_t text_i = Text$get_grapheme_fast(&text_state, text.length - suffix.length + i);
+ int32_t suffix_i = Text$get_grapheme_fast(&suffix_state, i);
+ if (text_i != suffix_i) return false;
+ }
+ return true;
+}
+
+PUREFUNC public bool Text$equal_values(Text_t a, Text_t b)
+{
+ if (a.length != b.length)
+ return false;
+ int64_t len = a.length;
+ TextIter_t a_state = NEW_TEXT_ITER_STATE(a), b_state = NEW_TEXT_ITER_STATE(b);
+ // TODO: make this smarter and more efficient
+ for (int64_t i = 0; i < len; i++) {
+ int32_t ai = Text$get_grapheme_fast(&a_state, i);
+ int32_t bi = Text$get_grapheme_fast(&b_state, i);
+ if (ai != bi) return false;
+ }
+ return true;
+}
+
+PUREFUNC public bool Text$equal(const void *a, const void *b, const TypeInfo_t*)
+{
+ if (a == b) return true;
+ return Text$equal_values(*(Text_t*)a, *(Text_t*)b);
+}
+
+PUREFUNC public bool Text$equal_ignoring_case(Text_t a, Text_t b, Text_t language)
+{
+ if (a.length != b.length)
+ return false;
+ int64_t len = a.length;
+ TextIter_t a_state = NEW_TEXT_ITER_STATE(a), b_state = NEW_TEXT_ITER_STATE(b);
+ const char *uc_language = Text$as_c_string(language);
+ for (int64_t i = 0; i < len; i++) {
+ int32_t ai = Text$get_grapheme_fast(&a_state, i);
+ int32_t bi = Text$get_grapheme_fast(&b_state, i);
+ if (ai != bi) {
+ const ucs4_t *a_codepoints = ai >= 0 ? (ucs4_t*)&ai : GRAPHEME_CODEPOINTS(ai);
+ int64_t a_len = ai >= 0 ? 1 : NUM_GRAPHEME_CODEPOINTS(ai);
+
+ const ucs4_t *b_codepoints = bi >= 0 ? (ucs4_t*)&bi : GRAPHEME_CODEPOINTS(bi);
+ int64_t b_len = bi >= 0 ? 1 : NUM_GRAPHEME_CODEPOINTS(bi);
+
+ int cmp = 0;
+ (void)u32_casecmp(a_codepoints, (size_t)a_len, b_codepoints, (size_t)b_len, uc_language, UNINORM_NFC, &cmp);
+ if (cmp != 0)
+ return false;
+ }
+ }
+ return true;
+}
+
+public Text_t Text$upper(Text_t text, Text_t language)
+{
+ if (text.length == 0) return text;
+ Array_t codepoints = Text$utf32_codepoints(text);
+ const char *uc_language = Text$as_c_string(language);
+ ucs4_t buf[128];
+ size_t out_len = sizeof(buf)/sizeof(buf[0]);
+ ucs4_t *upper = u32_toupper(codepoints.data, (size_t)codepoints.length, uc_language, UNINORM_NFC, buf, &out_len);
+ Text_t ret = text_from_u32(upper, (int64_t)out_len, false);
+ if (upper != buf) free(upper);
+ return ret;
+}
+
+public Text_t Text$lower(Text_t text, Text_t language)
+{
+ if (text.length == 0) return text;
+ Array_t codepoints = Text$utf32_codepoints(text);
+ const char *uc_language = Text$as_c_string(language);
+ ucs4_t buf[128];
+ size_t out_len = sizeof(buf)/sizeof(buf[0]);
+ ucs4_t *lower = u32_tolower(codepoints.data, (size_t)codepoints.length, uc_language, UNINORM_NFC, buf, &out_len);
+ Text_t ret = text_from_u32(lower, (int64_t)out_len, false);
+ if (lower != buf) free(lower);
+ return ret;
+}
+
+public Text_t Text$title(Text_t text, Text_t language)
+{
+ if (text.length == 0) return text;
+ Array_t codepoints = Text$utf32_codepoints(text);
+ const char *uc_language = Text$as_c_string(language);
+ ucs4_t buf[128];
+ size_t out_len = sizeof(buf)/sizeof(buf[0]);
+ ucs4_t *title = u32_totitle(codepoints.data, (size_t)codepoints.length, uc_language, UNINORM_NFC, buf, &out_len);
+ Text_t ret = text_from_u32(title, (int64_t)out_len, false);
+ if (title != buf) free(title);
+ return ret;
+}
+
+public int printf_text_size(const struct printf_info *info, size_t n, int argtypes[n], int sizes[n])
+{
+ if (n < 1) return -1;
+ (void)info;
+ argtypes[0] = PA_POINTER;
+ sizes[0] = sizeof(Text_t);
+ return 1;
+}
+
+public int printf_text(FILE *stream, const struct printf_info *info, const void *const args[])
+{
+ Text_t *t = *(Text_t**)args[0];
+ if (info->alt)
+ return text_visualize(stream, *t, 0);
+ else
+ return Text$print(stream, *t);
+}
+
+static INLINE Text_t _quoted(Text_t text, bool colorize, char quote_char)
+{
+ Text_t ret = colorize ? Text("\x1b[35m") : EMPTY_TEXT;
+ if (quote_char != '"' && quote_char != '\'' && quote_char != '`')
+ ret = concat2_assuming_safe(ret, Text("$"));
+
+ Text_t quote_text = Text$from_strn(&quote_char, 1);
+ ret = concat2_assuming_safe(ret, quote_text);
+
+#define add_escaped(str) ({ if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[34;1m")); \
+ if (!just_escaped) ret = concat2_assuming_safe(ret, Text("$")); \
+ ret = concat2_assuming_safe(ret, Text("\\" str)); \
+ just_escaped = true; \
+ if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[0;35m")); })
+ TextIter_t state = NEW_TEXT_ITER_STATE(text);
+ bool just_escaped = false;
+ // TODO: optimize for spans of non-escaped text
+ for (int64_t i = 0; i < text.length; i++) {
+ int32_t g = Text$get_grapheme_fast(&state, i);
+ switch (g) {
+ case '\a': add_escaped("a"); break;
+ case '\b': add_escaped("b"); break;
+ case '\x1b': add_escaped("e"); break;
+ case '\f': add_escaped("f"); break;
+ case '\n': add_escaped("n"); break;
+ case '\r': add_escaped("r"); break;
+ case '\t': add_escaped("t"); break;
+ case '\v': add_escaped("v"); break;
+ case '\\': {
+ if (just_escaped) {
+ add_escaped("\\");
+ } else {
+ ret = concat2_assuming_safe(ret, Text("\\"));
+ just_escaped = false;
+ }
+ break;
+ }
+ case '$': {
+ if (quote_char == '\'') {
+ ret = concat2_assuming_safe(ret, Text("$"));
+ just_escaped = false;
+ } else {
+ add_escaped("$");
+ }
+ break;
+ }
+ case '\x00' ... '\x06': case '\x0E' ... '\x1A':
+ case '\x1C' ... '\x1F': case '\x7F' ... '\x7F': {
+ if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[34;1m"));
+ ret = concat2_assuming_safe(ret, Text("\\x"));
+ char tmp[2];
+ sprintf(tmp, "%02X", g);
+ ret = concat2_assuming_safe(ret, Text$from_strn(tmp, 2));
+ if (colorize)
+ ret = concat2_assuming_safe(ret, Text("\x1b[0;35m"));
+ just_escaped = true;
+ break;
+ }
+ default: {
+ if (g == quote_char) {
+ ret = concat2_assuming_safe(ret, quote_text);
+ } else {
+ ret = concat2_assuming_safe(ret, Text$slice(text, I(i+1), I(i+1)));
+ just_escaped = false;
+ }
+ break;
+ }
+ }
+ }
+#undef add_escaped
+
+ ret = concat2_assuming_safe(ret, quote_text);
+ if (colorize)
+ ret = concat2_assuming_safe(ret, Text("\x1b[m"));
+
+ return ret;
+}
+
+public Text_t Text$as_text(const void *vtext, bool colorize, const TypeInfo_t *info)
+{
+ (void)info;
+ if (info->TextInfo.lang && streq(info->TextInfo.lang, "Path")) {
+ if (!vtext) return Text("Path");
+ Text_t text = *(Text_t*)vtext;
+ return Text$format("(%s%k%s)", colorize ? "\x1b[35m" : "", &text, colorize ? "\x1b[m" : "");
+ }
+
+ if (!vtext) return info && info->TextInfo.lang ? Text$from_str(info->TextInfo.lang) : Text("Text");
+
+ Text_t text = *(Text_t*)vtext;
+ char quote_char;
+ if (info == &Pattern$info) {
+ quote_char = Text$has(text, Pattern("/")) && !Text$has(text, Pattern("|")) ? '|' : '/';
+ } else {
+ // Figure out the best quotation mark to use:
+ bool has_dollar = false, has_double_quote = false, has_backtick = false,
+ has_single_quote = false, needs_escapes = false;
+ TextIter_t state = NEW_TEXT_ITER_STATE(text);
+ for (int64_t i = 0; i < text.length; i++) {
+ int32_t g = Text$get_grapheme_fast(&state, i);
+ if (g == '$') {
+ has_dollar = true;
+ } else if (g == '"') {
+ has_double_quote = true;
+ } else if (g == '`') {
+ has_backtick = true;
+ } else if (g == (g & 0x7F) && (g == '\'' || g == '\n' || g == '\r' || g == '\t' || !isprint((char)g))) {
+ needs_escapes = true;
+ }
+ }
+
+ // If there's dollar signs and/or double quotes in the string, it would
+ // be nice to avoid needing to escape them by using single quotes, but
+ // only if we don't have single quotes or need to escape anything else
+ // (because single quotes don't have interpolation):
+ if ((has_dollar || has_double_quote) && !has_single_quote && !needs_escapes)
+ quote_char = '\'';
+ // If there is a double quote, but no backtick, we can save a bit of
+ // escaping by using backtick instead of double quote:
+ else if (has_double_quote && !has_backtick)
+ quote_char = '`';
+ // Otherwise fall back to double quotes as the default quoting style:
+ else
+ quote_char = '"';
+ }
+
+ Text_t as_text = _quoted(text, colorize, quote_char);
+ if (info && info->TextInfo.lang && info != &Text$info && info != &Pattern$info)
+ as_text = Text$concat(
+ colorize ? Text("\x1b[1m$") : Text("$"),
+ Text$from_str(info->TextInfo.lang),
+ colorize ? Text("\x1b[0m") : Text(""),
+ as_text);
+ return as_text;
+}
+
+public Text_t Text$quoted(Text_t text, bool colorize)
+{
+ return _quoted(text, colorize, '"');
+}
+
+public Text_t Text$join(Text_t glue, Array_t pieces)
+{
+ if (pieces.length == 0) return EMPTY_TEXT;
+
+ Text_t result = *(Text_t*)pieces.data;
+ for (int64_t i = 1; i < pieces.length; i++) {
+ result = Text$concat(result, glue, *(Text_t*)(pieces.data + i*pieces.stride));
+ }
+ return result;
+}
+
+__attribute__((format(printf, 1, 2)))
+public Text_t Text$format(const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+
+ char buf[9];
+ int len = vsnprintf(buf, sizeof(buf), fmt, args);
+ char *str = GC_MALLOC_ATOMIC((size_t)(len+1));
+ vsnprintf(str, (size_t)(len+1), fmt, args);
+ Text_t ret = Text$from_str(str);
+ va_end(args);
+ return ret;
+}
+
+public Array_t Text$clusters(Text_t text)
+{
+ Array_t clusters = {};
+ for (int64_t i = 1; i <= text.length; i++) {
+ Text_t cluster = Text$slice(text, I(i), I(i));
+ Array$insert(&clusters, &cluster, I_small(0), sizeof(Text_t));
+ }
+ return clusters;
+}
+
+public Array_t Text$utf32_codepoints(Text_t text)
+{
+ Array_t codepoints = {.atomic=1};
+ TextIter_t state = NEW_TEXT_ITER_STATE(text);
+ for (int64_t i = 0; i < text.length; i++) {
+ int32_t grapheme = Text$get_grapheme_fast(&state, i);
+ if (grapheme < 0) {
+ for (int64_t c = 0; c < NUM_GRAPHEME_CODEPOINTS(grapheme); c++) {
+ ucs4_t subg = GRAPHEME_CODEPOINTS(grapheme)[c];
+ Array$insert(&codepoints, &subg, I_small(0), sizeof(ucs4_t));
+ }
+ } else {
+ Array$insert(&codepoints, &grapheme, I_small(0), sizeof(ucs4_t));
+ }
+ }
+ return codepoints;
+}
+
+public Array_t Text$utf8_bytes(Text_t text)
+{
+ const char *str = Text$as_c_string(text);
+ return (Array_t){.length=strlen(str), .stride=1, .atomic=1, .data=(void*)str};
+}
+
+static INLINE const char *codepoint_name(ucs4_t c)
+{
+ char *name = GC_MALLOC_ATOMIC(UNINAME_MAX);
+ char *found_name = unicode_character_name(c, name);
+ if (found_name) return found_name;
+ const uc_block_t *block = uc_block(c);
+ assert(block);
+ snprintf(name, UNINAME_MAX, "%s-%X", block->name, c);
+ return name;
+}
+
+public Array_t Text$codepoint_names(Text_t text)
+{
+ Array_t names = {};
+ TextIter_t state = NEW_TEXT_ITER_STATE(text);
+ for (int64_t i = 0; i < text.length; i++) {
+ int32_t grapheme = Text$get_grapheme_fast(&state, i);
+ if (grapheme < 0) {
+ for (int64_t c = 0; c < NUM_GRAPHEME_CODEPOINTS(grapheme); c++) {
+ const char *name = codepoint_name(GRAPHEME_CODEPOINTS(grapheme)[c]);
+ Text_t name_text = Text$from_str(name);
+ Array$insert(&names, &name_text, I_small(0), sizeof(Text_t));
+ }
+ } else {
+ const char *name = codepoint_name((ucs4_t)grapheme);
+ Text_t name_text = Text$from_str(name);
+ Array$insert(&names, &name_text, I_small(0), sizeof(Text_t));
+ }
+ }
+ return names;
+}
+
+public Text_t Text$from_codepoints(Array_t codepoints)
+{
+ if (codepoints.stride != sizeof(int32_t))
+ Array$compact(&codepoints, sizeof(int32_t));
+
+ return text_from_u32(codepoints.data, codepoints.length, true);
+}
+
+public OptionalText_t Text$from_codepoint_names(Array_t codepoint_names)
+{
+ Array_t codepoints = {};
+ for (int64_t i = 0; i < codepoint_names.length; i++) {
+ Text_t *name = ((Text_t*)(codepoint_names.data + i*codepoint_names.stride));
+ const char *name_str = Text$as_c_string(*name);
+ ucs4_t codepoint = unicode_name_character(name_str);
+ if (codepoint == UNINAME_INVALID)
+ return NONE_TEXT;
+ Array$insert(&codepoints, &codepoint, I_small(0), sizeof(ucs4_t));
+ }
+ return Text$from_codepoints(codepoints);
+}
+
+public OptionalText_t Text$from_bytes(Array_t bytes)
+{
+ if (bytes.stride != sizeof(int8_t))
+ Array$compact(&bytes, sizeof(int8_t));
+
+ return Text$from_strn(bytes.data, (size_t)bytes.length);
+}
+
+public Array_t Text$lines(Text_t text)
+{
+ Array_t lines = {};
+ TextIter_t state = NEW_TEXT_ITER_STATE(text);
+ for (int64_t i = 0, line_start = 0; i < text.length; i++) {
+ int32_t grapheme = Text$get_grapheme_fast(&state, i);
+ if (grapheme == '\r' && Text$get_grapheme_fast(&state, i + 1) == '\n') { // CRLF
+ Text_t line = Text$slice(text, I(line_start+1), I(i));
+ Array$insert(&lines, &line, I_small(0), sizeof(Text_t));
+ i += 1; // skip one extra for CR
+ line_start = i + 1;
+ } else if (grapheme == '\n') { // newline
+ Text_t line = Text$slice(text, I(line_start+1), I(i));
+ Array$insert(&lines, &line, I_small(0), sizeof(Text_t));
+ line_start = i + 1;
+ } else if (i == text.length-1 && line_start != i) { // last line
+ Text_t line = Text$slice(text, I(line_start+1), I(i+1));
+ Array$insert(&lines, &line, I_small(0), sizeof(Text_t));
+ }
+ }
+ return lines;
+}
+
+typedef struct {
+ TextIter_t state;
+ int64_t i;
+} line_iter_state_t;
+
+static OptionalText_t next_line(line_iter_state_t *state)
+{
+ Text_t text = state->state.stack[0].text;
+ for (int64_t i = state->i; i < text.length; i++) {
+ int32_t grapheme = Text$get_grapheme_fast(&state->state, i);
+ if (grapheme == '\r' && Text$get_grapheme_fast(&state->state, i + 1) == '\n') { // CRLF
+ Text_t line = Text$slice(text, I(state->i+1), I(i));
+ state->i = i + 2; // skip one extra for CR
+ return line;
+ } else if (grapheme == '\n') { // newline
+ Text_t line = Text$slice(text, I(state->i+1), I(i));
+ state->i = i + 1;
+ return line;
+ } else if (i == text.length-1 && state->i != i) { // last line
+ Text_t line = Text$slice(text, I(state->i+1), I(i+1));
+ state->i = i + 1;
+ return line;
+ }
+ }
+ return NONE_TEXT;
+}
+
+public Closure_t Text$by_line(Text_t text)
+{
+ return (Closure_t){
+ .fn=(void*)next_line,
+ .userdata=new(line_iter_state_t, .state=NEW_TEXT_ITER_STATE(text), .i=0),
+ };
+}
+
+PUREFUNC public bool Text$is_none(const void *t, const TypeInfo_t*)
+{
+ return ((Text_t*)t)->length < 0;
+}
+
+public void Text$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *)
+{
+ const char *str = Text$as_c_string(*(Text_t*)obj);
+ int64_t len = (int64_t)strlen(str);
+ Int64$serialize(&len, out, pointers, &Int64$info);
+ fwrite(str, sizeof(char), (size_t)len, out);
+}
+
+public void Text$deserialize(FILE *in, void *out, Array_t *pointers, const TypeInfo_t *)
+{
+ int64_t len = -1;
+ Int64$deserialize(in, &len, pointers, &Int64$info);
+ char *buf = GC_MALLOC_ATOMIC((size_t)len+1);
+ fread(buf, sizeof(char), (size_t)len, in);
+ buf[len+1] = '\0';
+ *(Text_t*)out = Text$from_strn(buf, (size_t)len);
+}
+
+public const TypeInfo_t Text$info = {
+ .size=sizeof(Text_t),
+ .align=__alignof__(Text_t),
+ .tag=TextInfo,
+ .TextInfo={.lang="Text"},
+ .metamethods=Text$metamethods,
+};
+
+public Pattern_t Pattern$escape_text(Text_t text)
+{
+ // TODO: optimize for spans of non-escaped text
+ Text_t ret = EMPTY_TEXT;
+ TextIter_t state = NEW_TEXT_ITER_STATE(text);
+ for (int64_t i = 0; i < text.length; i++) {
+ int32_t g = Text$get_grapheme_fast(&state, i);
+ ucs4_t g0 = g < 0 ? GRAPHEME_CODEPOINTS(g)[0] : (ucs4_t)g;
+
+ if (g == '{') {
+ ret = concat2_assuming_safe(ret, Text("{1{}"));
+ } else if (g0 == '?'
+ || uc_is_property_quotation_mark(g0)
+ || (uc_is_property_paired_punctuation(g0) && uc_is_property_left_of_pair(g0))) {
+ ret = Text$concat(ret, Text("{1"), Text$slice(text, I(i+1), I(i+1)), Text("}"));
+ } else {
+ ret = concat2_assuming_safe(ret, Text$slice(text, I(i+1), I(i+1)));
+ }
+ }
+ return ret;
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/text.h b/src/stdlib/text.h
new file mode 100644
index 00000000..0a44f4e4
--- /dev/null
+++ b/src/stdlib/text.h
@@ -0,0 +1,99 @@
+#pragma once
+
+// Type info and methods for Text datatype, which uses a struct inspired by
+// Raku's string representation and libunistr
+
+#include <stdbool.h>
+#include <printf.h>
+#include <stdint.h>
+
+#include "datatypes.h"
+#include "integers.h"
+#include "optionals.h"
+#include "types.h"
+#include "util.h"
+
+#define MAX_TEXT_DEPTH 48
+
+typedef struct {
+ struct {
+ Text_t text;
+ int64_t offset;
+ } stack[MAX_TEXT_DEPTH];
+ int64_t stack_index;
+} TextIter_t;
+
+#define NEW_TEXT_ITER_STATE(t) (TextIter_t){.stack={{t, 0}}, .stack_index=0}
+
+int printf_text(FILE *stream, const struct printf_info *info, const void *const args[]);
+int printf_text_size(const struct printf_info *info, size_t n, int argtypes[n], int sizes[n]);
+
+#define Text(str) ((Text_t){.length=sizeof(str)-1, .tag=TEXT_ASCII, .ascii="" str})
+
+int Text$print(FILE *stream, Text_t t);
+Text_t Text$_concat(int n, Text_t items[n]);
+#define Text$concat(...) Text$_concat(sizeof((Text_t[]){__VA_ARGS__})/sizeof(Text_t), (Text_t[]){__VA_ARGS__})
+#define Texts(...) Text$concat(__VA_ARGS__)
+Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int);
+Text_t Text$from(Text_t text, Int_t first);
+Text_t Text$to(Text_t text, Int_t last);
+Text_t Text$reversed(Text_t text);
+Text_t Text$cluster(Text_t text, Int_t index_int);
+OptionalText_t Text$from_str(const char *str);
+OptionalText_t Text$from_strn(const char *str, size_t len);
+PUREFUNC uint64_t Text$hash(const void *text, const TypeInfo_t*);
+PUREFUNC int32_t Text$compare(const void *va, const void *vb, const TypeInfo_t*);
+PUREFUNC bool Text$equal(const void *a, const void *b, const TypeInfo_t*);
+PUREFUNC bool Text$equal_values(Text_t a, Text_t b);
+PUREFUNC bool Text$equal_ignoring_case(Text_t a, Text_t b, Text_t language);
+PUREFUNC bool Text$is_none(const void *t, const TypeInfo_t*);
+Text_t Text$upper(Text_t text, Text_t language);
+Text_t Text$lower(Text_t text, Text_t language);
+Text_t Text$title(Text_t text, Text_t language);
+Text_t Text$as_text(const void *text, bool colorize, const TypeInfo_t *info);
+Text_t Text$quoted(Text_t str, bool colorize);
+PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix);
+PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix);
+char *Text$as_c_string(Text_t text);
+__attribute__((format(printf, 1, 2)))
+public Text_t Text$format(const char *fmt, ...);
+Array_t Text$clusters(Text_t text);
+Array_t Text$utf32_codepoints(Text_t text);
+Array_t Text$utf8_bytes(Text_t text);
+Array_t Text$codepoint_names(Text_t text);
+Text_t Text$from_codepoints(Array_t codepoints);
+OptionalText_t Text$from_codepoint_names(Array_t codepoint_names);
+OptionalText_t Text$from_bytes(Array_t bytes);
+Array_t Text$lines(Text_t text);
+Closure_t Text$by_line(Text_t text);
+Text_t Text$join(Text_t glue, Array_t pieces);
+Text_t Text$repeat(Text_t text, Int_t count);
+Int_t Text$width(Text_t text, Text_t language);
+Text_t Text$left_pad(Text_t text, Int_t width, Text_t padding, Text_t language);
+Text_t Text$right_pad(Text_t text, Int_t width, Text_t padding, Text_t language);
+Text_t Text$middle_pad(Text_t text, Int_t width, Text_t padding, Text_t language);
+int32_t Text$get_grapheme_fast(TextIter_t *state, int64_t index);
+uint32_t Text$get_main_grapheme_fast(TextIter_t *state, int64_t index);
+void Text$serialize(const void *obj, FILE *out, Table_t *, const TypeInfo_t *);
+void Text$deserialize(FILE *in, void *out, Array_t *, const TypeInfo_t *);
+
+MACROLIKE int32_t Text$get_grapheme(Text_t text, int64_t index)
+{
+ TextIter_t state = NEW_TEXT_ITER_STATE(text);
+ return Text$get_grapheme_fast(&state, index);
+}
+
+extern const TypeInfo_t Text$info;
+extern Text_t EMPTY_TEXT;
+
+#define Text$metamethods { \
+ .as_text=Text$as_text, \
+ .hash=Text$hash, \
+ .compare=Text$compare, \
+ .equal=Text$equal, \
+ .is_none=Text$is_none, \
+ .serialize=Text$serialize, \
+ .deserialize=Text$deserialize, \
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/threads.c b/src/stdlib/threads.c
new file mode 100644
index 00000000..9ad68c81
--- /dev/null
+++ b/src/stdlib/threads.c
@@ -0,0 +1,80 @@
+// Logic for the Thread type, representing a pthread
+
+#include <ctype.h>
+#include <err.h>
+#include <fcntl.h>
+#include <gc.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <sys/param.h>
+#include <sys/random.h>
+
+#include "arrays.h"
+#include "datatypes.h"
+#include "metamethods.h"
+#include "rng.h"
+#include "text.h"
+#include "threads.h"
+#include "types.h"
+#include "util.h"
+
+static void *run_thread(Closure_t *closure)
+{
+ uint8_t *random_bytes = GC_MALLOC_ATOMIC(40);
+ getrandom(random_bytes, 40, 0);
+ Array_t rng_seed = {.length=40, .data=random_bytes, .stride=1, .atomic=1};
+ default_rng = RNG$new(rng_seed);
+ ((void(*)(void*))closure->fn)(closure->userdata);
+ return NULL;
+}
+
+public Thread_t Thread$new(Closure_t fn)
+{
+ Thread_t thread = GC_MALLOC(sizeof(pthread_t));
+ Closure_t *closure = new(Closure_t, .fn=fn.fn, .userdata=fn.userdata);
+ pthread_create(thread, NULL, (void*)run_thread, closure);
+ return thread;
+}
+
+public void Thread$join(Thread_t thread)
+{
+ pthread_join(*thread, NULL);
+}
+
+public void Thread$cancel(Thread_t thread)
+{
+ pthread_cancel(*thread);
+}
+
+public void Thread$detach(Thread_t thread)
+{
+ pthread_detach(*thread);
+}
+
+Text_t Thread$as_text(const void *thread, bool colorize, const TypeInfo_t*)
+{
+ if (!thread) {
+ return colorize ? Text("\x1b[34;1mThread\x1b[m") : Text("Thread");
+ }
+ return Text$format(colorize ? "\x1b[34;1mThread(%p)\x1b[m" : "Thread(%p)", *(Thread_t**)thread);
+}
+
+static bool Thread$is_none(const void *obj, const TypeInfo_t*)
+{
+ return *(Thread_t*)obj == NULL;
+}
+
+public const TypeInfo_t Thread$info = {
+ .size=sizeof(Thread_t), .align=__alignof(Thread_t),
+ .metamethods={
+ .as_text=Thread$as_text,
+ .is_none=Thread$is_none,
+ .serialize=cannot_serialize,
+ .deserialize=cannot_deserialize,
+ },
+};
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/threads.h b/src/stdlib/threads.h
new file mode 100644
index 00000000..9f1c3d33
--- /dev/null
+++ b/src/stdlib/threads.h
@@ -0,0 +1,22 @@
+#pragma once
+
+// Logic for the Thread type, representing a pthread
+
+#include <pthread.h>
+#include <stdbool.h>
+
+#include "datatypes.h"
+#include "types.h"
+#include "util.h"
+
+#define Thread_t pthread_t*
+
+Thread_t Thread$new(Closure_t fn);
+void Thread$cancel(Thread_t thread);
+void Thread$join(Thread_t thread);
+void Thread$detach(Thread_t thread);
+Text_t Thread$as_text(const void *thread, bool colorize, const TypeInfo_t *type);
+
+extern const TypeInfo_t Thread$info;
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/tomo.h b/src/stdlib/tomo.h
new file mode 100644
index 00000000..61dba404
--- /dev/null
+++ b/src/stdlib/tomo.h
@@ -0,0 +1,35 @@
+#pragma once
+
+// All of the different builtin modules can be included by including this one
+// import
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/param.h>
+
+#include "arrays.h"
+#include "bools.h"
+#include "bytes.h"
+#include "c_strings.h"
+#include "datatypes.h"
+#include "enums.h"
+#include "functiontype.h"
+#include "integers.h"
+#include "memory.h"
+#include "metamethods.h"
+#include "moments.h"
+#include "mutexeddata.h"
+#include "nums.h"
+#include "optionals.h"
+#include "paths.h"
+#include "patterns.h"
+#include "pointers.h"
+#include "rng.h"
+#include "siphash.h"
+#include "structs.h"
+#include "tables.h"
+#include "text.h"
+#include "threads.h"
+#include "types.h"
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/types.c b/src/stdlib/types.c
new file mode 100644
index 00000000..8ced9051
--- /dev/null
+++ b/src/stdlib/types.c
@@ -0,0 +1,31 @@
+// Type information and methods for TypeInfos (i.e. runtime representations of types)
+#include <err.h>
+#include <gc.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/param.h>
+
+#include "util.h"
+#include "arrays.h"
+#include "pointers.h"
+#include "tables.h"
+#include "text.h"
+#include "types.h"
+
+public Text_t Type$as_text(const void *typeinfo, bool colorize, const TypeInfo_t *type)
+{
+ if (!typeinfo) return Text("Type");
+
+ if (colorize)
+ return Text$concat(
+ Text("\x1b[36;1m"),
+ Text$from_str(type->TypeInfoInfo.type_str),
+ Text("\x1b[m"));
+ else
+ return Text$from_str(type->TypeInfoInfo.type_str);
+}
+
+public const TypeInfo_t Void$info = {.size=0, .align=0, .tag=StructInfo};
+public const TypeInfo_t Abort$info = {.size=0, .align=0, .tag=StructInfo};
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/types.h b/src/stdlib/types.h
new file mode 100644
index 00000000..c7b938a0
--- /dev/null
+++ b/src/stdlib/types.h
@@ -0,0 +1,93 @@
+#pragma once
+
+// Type information and methods for TypeInfos (i.e. runtime representations of types)
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "datatypes.h"
+
+typedef struct TypeInfo_s TypeInfo_t;
+
+typedef struct {
+ uint64_t (*hash)(const void*, const TypeInfo_t*);
+ int32_t (*compare)(const void*, const void*, const TypeInfo_t*);
+ bool (*equal)(const void*, const void*, const TypeInfo_t*);
+ Text_t (*as_text)(const void*, bool, const TypeInfo_t*);
+ bool (*is_none)(const void*, const TypeInfo_t*);
+ void (*serialize)(const void*, FILE*, Table_t*, const TypeInfo_t*);
+ void (*deserialize)(FILE*, void*, Array_t*, const TypeInfo_t*);
+} metamethods_t;
+
+typedef struct {
+ const char *name;
+ const TypeInfo_t *type;
+} NamedType_t;
+
+struct TypeInfo_s {
+ int64_t size, align;
+ metamethods_t metamethods;
+ struct { // Anonymous tagged union for convenience
+ enum { OpaqueInfo, StructInfo, EnumInfo, PointerInfo, TextInfo, ArrayInfo, TableInfo, FunctionInfo,
+ OptionalInfo, MutexedDataInfo, TypeInfoInfo } tag;
+ union {
+ struct {} OpaqueInfo;
+ struct {
+ const char *sigil;
+ const TypeInfo_t *pointed;
+ } PointerInfo;
+ struct {
+ const char *lang;
+ } TextInfo;
+ struct {
+ const TypeInfo_t *item;
+ } ArrayInfo;
+ struct {
+ const TypeInfo_t *key, *value;
+ } TableInfo;
+ struct {
+ const char *type_str;
+ } FunctionInfo;
+ struct {
+ const char *type_str;
+ } TypeInfoInfo;
+ struct {
+ const TypeInfo_t *type;
+ } OptionalInfo, MutexedDataInfo;
+ struct {
+ const char *name;
+ int num_tags;
+ NamedType_t *tags;
+ } EnumInfo;
+ struct {
+ const char *name;
+ int num_fields;
+ bool is_secret:1, is_opaque:1;
+ NamedType_t *fields;
+ } StructInfo;
+ };
+ };
+};
+
+extern const TypeInfo_t Void$info;
+extern const TypeInfo_t Abort$info;
+#define Void_t void
+
+Text_t Type$as_text(const void *typeinfo, bool colorize, const TypeInfo_t *type);
+
+#define Type$info(typestr) &((TypeInfo_t){.size=sizeof(TypeInfo_t), .align=__alignof__(TypeInfo_t), \
+ .tag=TypeInfoInfo, .TypeInfoInfo.type_str=typestr, \
+ .metamethods={.serialize=cannot_serialize, .deserialize=cannot_deserialize, .as_text=Type$as_text}})
+
+#define DEFINE_OPTIONAL_TYPE(t, unpadded_size, name) \
+ typedef struct { \
+ union { \
+ t value; \
+ struct { \
+ char _padding[unpadded_size]; \
+ Bool_t is_none:1; \
+ }; \
+ }; \
+ } name
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/util.c b/src/stdlib/util.c
new file mode 100644
index 00000000..1fe33dfa
--- /dev/null
+++ b/src/stdlib/util.c
@@ -0,0 +1,26 @@
+// Built-in utility functions
+#include <ctype.h>
+#include <gc.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "text.h"
+#include "util.h"
+
+__attribute__((format(printf, 1, 2)))
+public char *heap_strf(const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ char *tmp = NULL;
+ int len = vasprintf(&tmp, fmt, args);
+ if (len < 0) return NULL;
+ va_end(args);
+ char *ret = GC_strndup(tmp, (size_t)len);
+ free(tmp);
+ return ret;
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/src/stdlib/util.h b/src/stdlib/util.h
new file mode 100644
index 00000000..6f79bed6
--- /dev/null
+++ b/src/stdlib/util.h
@@ -0,0 +1,61 @@
+#pragma once
+
+// Built-in utility functions
+
+#include <assert.h>
+#include <gc.h>
+#include <stdbool.h>
+#include <string.h>
+#include <err.h>
+
+#define streq(a, b) (((a) == NULL && (b) == NULL) || (((a) == NULL) == ((b) == NULL) && strcmp(a, b) == 0))
+#define starts_with(line, prefix) (strncmp(line, prefix, strlen(prefix)) == 0)
+#define ends_with(line, suffix) (strlen(line) >= strlen(suffix) && strcmp(line + strlen(line) - strlen(suffix), suffix) == 0)
+#define new(t, ...) ((t*)memcpy(GC_MALLOC(sizeof(t)), &(t){__VA_ARGS__}, sizeof(t)))
+#define heap(x) (__typeof(x)*)memcpy(GC_MALLOC(sizeof(x)), (__typeof(x)[1]){x}, sizeof(x))
+#define stack(x) (__typeof(x)*)((__typeof(x)[1]){x})
+#define check_initialized(var, name) *({ if (!var ## $initialized) fail("The variable " name " is being accessed before it has been initialized!"); \
+ &var; })
+
+#define IF_DECLARE(decl, expr, block) if (({ decl; expr ? ({ block; 1; }) : 0; })) {}
+
+#define WHEN(subj, var, body) { auto var = subj; switch (var.$tag) body }
+
+#ifndef auto
+#define auto __auto_type
+#endif
+
+#ifndef public
+#define public __attribute__ ((visibility ("default")))
+#endif
+
+#ifndef PUREFUNC
+#define PUREFUNC __attribute__ ((pure))
+#endif
+
+#ifndef CONSTFUNC
+#define CONSTFUNC __attribute__ ((const))
+#endif
+
+#ifndef INLINE
+#define INLINE inline __attribute__ ((always_inline))
+#endif
+
+#ifndef likely
+#define likely(x) (__builtin_expect(!!(x), 1))
+#endif
+
+#ifndef unlikely
+#define unlikely(x) (__builtin_expect(!!(x), 0))
+#endif
+
+// GCC lets you define macro-like functions which are always inlined and never
+// compiled using this combination of flags. See: https://gcc.gnu.org/onlinedocs/gcc/Inline.html
+#ifndef MACROLIKE
+#define MACROLIKE extern inline __attribute__((gnu_inline, always_inline))
+#endif
+
+__attribute__((format(printf, 1, 2)))
+char *heap_strf(const char *fmt, ...);
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0