Autoformat everything with clang-format

author: Bruce Hill <bruce@bruce-hill.com> 2025-08-23 19:28:08 -0400
committer: Bruce Hill <bruce@bruce-hill.com> 2025-08-23 19:28:08 -0400
commit: fcda36561d668f43bac91ea31cd55cbbd605d330 (patch)
tree: eb74c0b17df584af0fd8154422ad924e04c96cc2 /src/stdlib/text.c
parent: 414b0c7472c87c5a013029aefef49e2dbc41e700 (diff)
1 files changed, 405 insertions, 475 deletions
diff --git a/src/stdlib/text.c b/src/stdlib/text.c
index d9793eb8..aad3fd76 100644
--- a/src/stdlib/text.c
+++ b/src/stdlib/text.c
@@ -116,8 +116,8 @@
 #include "text.h"
 
 // Use inline version of the siphash code for performance:
-#include "siphash.h"
 #include "siphash-internals.h"
+#include "siphash.h"
 
 typedef struct {
     ucs4_t main_codepoint;
@@ -133,9 +133,9 @@ static synthetic_grapheme_t *synthetic_graphemes = NULL;
 static int32_t synthetic_grapheme_capacity = 0;
 static int32_t num_synthetic_graphemes = 0;
 
-#define NUM_GRAPHEME_CODEPOINTS(id) (synthetic_graphemes[-(id)-1].utf32_cluster[0])
-#define GRAPHEME_CODEPOINTS(id) (&synthetic_graphemes[-(id)-1].utf32_cluster[1])
-#define GRAPHEME_UTF8(id) (synthetic_graphemes[-(id)-1].utf8)
+#define NUM_GRAPHEME_CODEPOINTS(id) (synthetic_graphemes[-(id) - 1].utf32_cluster[0])
+#define GRAPHEME_CODEPOINTS(id) (&synthetic_graphemes[-(id) - 1].utf32_cluster[1])
+#define GRAPHEME_UTF8(id) (synthetic_graphemes[-(id) - 1].utf8)
 
 // Somewhat arbitrarily chosen, if two short literal ASCII or grapheme chunks
 // are concatenated below this length threshold, we just merge them into a
@@ -145,16 +145,17 @@ static int32_t num_synthetic_graphemes = 0;
 
 static Text_t simple_concatenation(Text_t a, Text_t b);
 
-public Text_t EMPTY_TEXT = {
-    .length=0,
-    .tag=TEXT_ASCII,
-    .ascii=0,
+public
+Text_t EMPTY_TEXT = {
+    .length = 0,
+    .tag = TEXT_ASCII,
+    .ascii = 0,
 };
 
 PUREFUNC static bool graphemes_equal(const void *va, const void *vb, const TypeInfo_t *info) {
     (void)info;
-    ucs4_t *a = *(ucs4_t**)va;
-    ucs4_t *b = *(ucs4_t**)vb;
+    ucs4_t *a = *(ucs4_t **)va;
+    ucs4_t *b = *(ucs4_t **)vb;
     if (a[0] != b[0]) return false;
     for (int i = 0; i < (int)a[0]; i++)
         if (a[i] != b[i]) return false;
@@ -163,37 +164,37 @@ PUREFUNC static bool graphemes_equal(const void *va, const void *vb, const TypeI
 
 PUREFUNC static uint64_t grapheme_hash(const void *g, const TypeInfo_t *info) {
     (void)info;
-    ucs4_t *cluster = *(ucs4_t**)g;
-    return siphash24((void*)&cluster[1], sizeof(ucs4_t[cluster[0]]));
+    ucs4_t *cluster = *(ucs4_t **)g;
+    return siphash24((void *)&cluster[1], sizeof(ucs4_t[cluster[0]]));
 }
 
 static const TypeInfo_t GraphemeClusterInfo = {
-    .size=sizeof(ucs4_t*),
-    .align=__alignof__(ucs4_t*),
-    .metamethods={
-        .equal=graphemes_equal,
-        .hash=grapheme_hash,
-    },
+    .size = sizeof(ucs4_t *),
+    .align = __alignof__(ucs4_t *),
+    .metamethods =
+        {
+            .equal = graphemes_equal,
+            .hash = grapheme_hash,
+        },
 };
 
 #ifdef __GNUC__
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wstack-protector"
 #endif
-public int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_len)
-{
-    if (utf32_len == 1)
-        return (int32_t)*codepoints;
+public
+int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_len) {
+    if (utf32_len == 1) return (int32_t)*codepoints;
 
-    ucs4_t length_prefixed[1+utf32_len];
+    ucs4_t length_prefixed[1 + utf32_len];
     length_prefixed[0] = (ucs4_t)utf32_len;
     for (int i = 0; i < utf32_len; i++)
-        length_prefixed[i+1] = codepoints[i];
+        length_prefixed[i + 1] = codepoints[i];
     ucs4_t *ptr = &length_prefixed[0];
 
     // Optimization for common case of one frequently used synthetic grapheme:
     static int32_t last_grapheme = 0;
-    if (last_grapheme != 0 && graphemes_equal(&ptr, &synthetic_graphemes[-last_grapheme-1].utf32_cluster, NULL))
+    if (last_grapheme != 0 && graphemes_equal(&ptr, &synthetic_graphemes[-last_grapheme - 1].utf32_cluster, NULL))
         return last_grapheme;
 
     TypeInfo_t GraphemeIDLookupTableInfo = *Table$info(&GraphemeClusterInfo, &Int32$info);
@@ -209,12 +210,12 @@ public int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_le
         synthetic_graphemes = new;
     }
 
-    int32_t grapheme_id = -(num_synthetic_graphemes+1);
+    int32_t grapheme_id = -(num_synthetic_graphemes + 1);
     num_synthetic_graphemes += 1;
 
     // Get UTF8 representation:
     uint8_t u8_buf[64];
-    size_t u8_len = sizeof(u8_buf)/sizeof(u8_buf[0]);
+    size_t u8_len = sizeof(u8_buf) / sizeof(u8_buf[0]);
     uint8_t *u8 = u32_to_u8(codepoints, (size_t)utf32_len, u8_buf, &u8_len);
     if (u8 == NULL) fail("Invalid graphemes encountered!");
 
@@ -223,11 +224,10 @@ public int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_le
     // area with good cache locality:
     static void *arena = NULL, *arena_end = NULL;
     // Eat up any space needed to make arena 32-bit aligned:
-    if ((size_t)arena % __alignof__(ucs4_t) != 0)
-        arena += __alignof__(ucs4_t) - ((size_t)arena % __alignof__(ucs4_t));
+    if ((size_t)arena % __alignof__(ucs4_t) != 0) arena += __alignof__(ucs4_t) - ((size_t)arena % __alignof__(ucs4_t));
 
     // If we have filled up this arena, allocate a new one:
-    size_t needed_memory = sizeof(ucs4_t[1+utf32_len]) + sizeof(uint8_t[u8_len + 1]);
+    size_t needed_memory = sizeof(ucs4_t[1 + utf32_len]) + sizeof(uint8_t[u8_len + 1]);
     if (arena + needed_memory > arena_end) {
         // Do reasonably big chunks at a time, so most synthetic codepoints are
         // nearby each other in memory and cache locality is good. This is a
@@ -239,28 +239,27 @@ public int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_le
 
     // Copy length-prefixed UTF32 codepoints into the arena and store where they live:
     ucs4_t *codepoint_copy = arena;
-    memcpy(codepoint_copy, length_prefixed, sizeof(ucs4_t[1+utf32_len]));
-    synthetic_graphemes[-grapheme_id-1].utf32_cluster = codepoint_copy;
-    arena += sizeof(ucs4_t[1+utf32_len]);
+    memcpy(codepoint_copy, length_prefixed, sizeof(ucs4_t[1 + utf32_len]));
+    synthetic_graphemes[-grapheme_id - 1].utf32_cluster = codepoint_copy;
+    arena += sizeof(ucs4_t[1 + utf32_len]);
 
     // Copy UTF8 bytes into the arena and store where they live:
     uint8_t *utf8_final = arena;
     memcpy(utf8_final, u8, sizeof(uint8_t[u8_len]));
     utf8_final[u8_len] = '\0'; // Add a terminating NUL byte
-    synthetic_graphemes[-grapheme_id-1].utf8 = utf8_final;
+    synthetic_graphemes[-grapheme_id - 1].utf8 = utf8_final;
     arena += sizeof(uint8_t[u8_len + 1]);
 
     // Sickos at the unicode consortium decreed that you can have grapheme clusters
     // that begin with *prefix* modifiers, so we gotta check for that case:
-    synthetic_graphemes[-grapheme_id-1].main_codepoint = length_prefixed[1];
+    synthetic_graphemes[-grapheme_id - 1].main_codepoint = length_prefixed[1];
     for (ucs4_t i = 0; i < utf32_len; i++) {
 #if _LIBUNISTRING_VERSION >= 0x010200
-// libuinstring version 1.2.0 introduced uc_is_property_prepended_concatenation_mark()
-// It's not critical, but it's technically more correct to have this check:
-        if (unlikely(uc_is_property_prepended_concatenation_mark(length_prefixed[1+i])))
-            continue;
+        // libuinstring version 1.2.0 introduced uc_is_property_prepended_concatenation_mark()
+        // It's not critical, but it's technically more correct to have this check:
+        if (unlikely(uc_is_property_prepended_concatenation_mark(length_prefixed[1 + i]))) continue;
 #endif
-        synthetic_graphemes[-grapheme_id-1].main_codepoint = length_prefixed[1+i];
+        synthetic_graphemes[-grapheme_id - 1].main_codepoint = length_prefixed[1 + i];
         break;
     }
 
@@ -276,8 +275,8 @@ public int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_le
 #pragma GCC diagnostic pop
 #endif
 
-public int Text$print(FILE *stream, Text_t t)
-{
+public
+int Text$print(FILE *stream, Text_t t) {
     if (t.length == 0) return 0;
 
     switch (t.tag) {
@@ -290,14 +289,14 @@ public int Text$print(FILE *stream, Text_t t)
             if (grapheme >= 0) {
                 uint8_t buf[8];
                 size_t len = sizeof(buf);
-                uint8_t *u8 = u32_to_u8((ucs4_t*)&grapheme, 1, buf, &len);
+                uint8_t *u8 = u32_to_u8((ucs4_t *)&grapheme, 1, buf, &len);
                 if (u8 == NULL) fail("Invalid grapheme encountered: ", grapheme);
                 written += (int)fwrite(u8, sizeof(char), len, stream);
                 if (u8 != buf) free(u8);
             } else {
                 const uint8_t *u8 = GRAPHEME_UTF8(grapheme);
                 assert(u8);
-                written += (int)fwrite(u8, sizeof(uint8_t), strlen((char*)u8), stream);
+                written += (int)fwrite(u8, sizeof(uint8_t), strlen((char *)u8), stream);
             }
         }
         return written;
@@ -309,14 +308,14 @@ public int Text$print(FILE *stream, Text_t t)
             if (grapheme >= 0) {
                 uint8_t buf[8];
                 size_t len = sizeof(buf);
-                uint8_t *u8 = u32_to_u8((ucs4_t*)&grapheme, 1, buf, &len);
+                uint8_t *u8 = u32_to_u8((ucs4_t *)&grapheme, 1, buf, &len);
                 if (u8 == NULL) fail("Invalid grapheme encountered: ", grapheme);
                 written += (int)fwrite(u8, sizeof(char), len, stream);
                 if (u8 != buf) free(u8);
             } else {
                 const uint8_t *u8 = GRAPHEME_UTF8(grapheme);
                 assert(u8);
-                written += (int)fwrite(u8, sizeof(uint8_t), strlen((char*)u8), stream);
+                written += (int)fwrite(u8, sizeof(uint8_t), strlen((char *)u8), stream);
             }
         }
         return written;
@@ -332,16 +331,16 @@ public int Text$print(FILE *stream, Text_t t)
 
 static const int64_t min_len_for_depth[MAX_TEXT_DEPTH] = {
     // Fibonacci numbers (skipping first two)
-    1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, 10946,
-    17711, 28657, 46368, 75025, 121393, 196418, 317811, 514229, 832040, 1346269, 2178309, 3524578,
-    5702887, 9227465, 14930352, 24157817, 39088169, 63245986, 102334155, 165580141, 267914296,
-    433494437, 701408733, 1134903170, 1836311903, 2971215073, 4807526976, 7778742049,
+    1,         2,         3,         5,          8,          13,         21,         34,         55,        89,
+    144,       233,       377,       610,        987,        1597,       2584,       4181,       6765,      10946,
+    17711,     28657,     46368,     75025,      121393,     196418,     317811,     514229,     832040,    1346269,
+    2178309,   3524578,   5702887,   9227465,    14930352,   24157817,   39088169,   63245986,   102334155, 165580141,
+    267914296, 433494437, 701408733, 1134903170, 1836311903, 2971215073, 4807526976, 7778742049,
 };
 
 #define IS_BALANCED_TEXT(t) ((t).length >= min_len_for_depth[(t).depth])
 
-static void insert_balanced_recursive(Text_t balanced_texts[MAX_TEXT_DEPTH], Text_t text)
-{
+static void insert_balanced_recursive(Text_t balanced_texts[MAX_TEXT_DEPTH], Text_t text) {
     if (text.tag == TEXT_CONCAT && (!IS_BALANCED_TEXT(text) || text.depth >= MAX_TEXT_DEPTH)) {
         insert_balanced_recursive(balanced_texts, *text.left);
         insert_balanced_recursive(balanced_texts, *text.right);
@@ -370,8 +369,7 @@ static void insert_balanced_recursive(Text_t balanced_texts[MAX_TEXT_DEPTH], Tex
     balanced_texts[i] = accumulator;
 }
 
-static Text_t rebalanced(Text_t a, Text_t b)
-{
+static Text_t rebalanced(Text_t a, Text_t b) {
     Text_t balanced_texts[MAX_TEXT_DEPTH];
     memset(balanced_texts, 0, sizeof(balanced_texts));
     insert_balanced_recursive(balanced_texts, a);
@@ -379,14 +377,12 @@ static Text_t rebalanced(Text_t a, Text_t b)
 
     Text_t ret = EMPTY_TEXT;
     for (int i = 0; ret.length < a.length + b.length; i++) {
-        if (balanced_texts[i].length)
-            ret = simple_concatenation(balanced_texts[i], ret);
+        if (balanced_texts[i].length) ret = simple_concatenation(balanced_texts[i], ret);
     }
     return ret;
 }
 
-Text_t simple_concatenation(Text_t a, Text_t b)
-{
+Text_t simple_concatenation(Text_t a, Text_t b) {
     if (a.length == 0) return b;
     if (b.length == 0) return a;
 
@@ -395,53 +391,53 @@ Text_t simple_concatenation(Text_t a, Text_t b)
     // every concatenation to yield a balanced text, since many concatenations
     // are ephemeral (e.g. doing a loop repeatedly concatenating without using
     // the intermediary values).
-    if (new_depth >= MAX_TEXT_DEPTH)
-        return rebalanced(a, b);
+    if (new_depth >= MAX_TEXT_DEPTH) return rebalanced(a, b);
 
     Text_t *children = GC_MALLOC(sizeof(Text_t[2]));
     children[0] = a;
     children[1] = b;
     return (Text_t){
-        .tag=TEXT_CONCAT,
-        .length=a.length + b.length,
-        .depth=new_depth,
-        .left=&children[0],
-        .right=&children[1],
+        .tag = TEXT_CONCAT,
+        .length = a.length + b.length,
+        .depth = new_depth,
+        .left = &children[0],
+        .right = &children[1],
     };
 }
 
-static Text_t concat2_assuming_safe(Text_t a, Text_t b)
-{
+static Text_t concat2_assuming_safe(Text_t a, Text_t b) {
     if (a.length == 0) return b;
     if (b.length == 0) return a;
 
     if (a.tag == TEXT_ASCII && b.tag == TEXT_ASCII && (size_t)(a.length + b.length) <= SHORT_ASCII_LENGTH) {
         struct Text_s ret = {
-            .tag=TEXT_ASCII,
-            .length=a.length + b.length,
+            .tag = TEXT_ASCII,
+            .length = a.length + b.length,
         };
         ret.ascii = GC_MALLOC_ATOMIC(sizeof(char[ret.length]));
-        memcpy((char*)ret.ascii, a.ascii, sizeof(char[a.length]));
-        memcpy((char*)&ret.ascii[a.length], b.ascii, sizeof(char[b.length]));
+        memcpy((char *)ret.ascii, a.ascii, sizeof(char[a.length]));
+        memcpy((char *)&ret.ascii[a.length], b.ascii, sizeof(char[b.length]));
         return ret;
-    } else if (a.tag == TEXT_GRAPHEMES && b.tag == TEXT_GRAPHEMES && (size_t)(a.length + b.length) <= SHORT_GRAPHEMES_LENGTH) {
+    } else if (a.tag == TEXT_GRAPHEMES && b.tag == TEXT_GRAPHEMES
+               && (size_t)(a.length + b.length) <= SHORT_GRAPHEMES_LENGTH) {
         struct Text_s ret = {
-            .tag=TEXT_GRAPHEMES,
-            .length=a.length + b.length,
+            .tag = TEXT_GRAPHEMES,
+            .length = a.length + b.length,
         };
         ret.graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[ret.length]));
-        memcpy((int32_t*)ret.graphemes, a.graphemes, sizeof(int32_t[a.length]));
-        memcpy((int32_t*)&ret.graphemes[a.length], b.graphemes, sizeof(int32_t[b.length]));
+        memcpy((int32_t *)ret.graphemes, a.graphemes, sizeof(int32_t[a.length]));
+        memcpy((int32_t *)&ret.graphemes[a.length], b.graphemes, sizeof(int32_t[b.length]));
         return ret;
-    } else if (a.tag != TEXT_CONCAT && b.tag != TEXT_CONCAT && (size_t)(a.length + b.length) <= SHORT_GRAPHEMES_LENGTH) {
+    } else if (a.tag != TEXT_CONCAT && b.tag != TEXT_CONCAT
+               && (size_t)(a.length + b.length) <= SHORT_GRAPHEMES_LENGTH) {
         // Turn a small bit of ASCII into graphemes if it helps make things smaller
         // Text structs come with an extra 8 bytes, so allocate enough to hold the text
         struct Text_s ret = {
-            .tag=TEXT_GRAPHEMES,
-            .length=a.length + b.length,
+            .tag = TEXT_GRAPHEMES,
+            .length = a.length + b.length,
         };
         ret.graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[ret.length]));
-        int32_t *dest = (int32_t*)ret.graphemes;
+        int32_t *dest = (int32_t *)ret.graphemes;
         if (a.tag == TEXT_GRAPHEMES) {
             memcpy(dest, a.graphemes, sizeof(int32_t[a.length]));
             dest += a.length;
@@ -474,12 +470,11 @@ static Text_t concat2_assuming_safe(Text_t a, Text_t b)
     return simple_concatenation(a, b);
 }
 
-static Text_t concat2(Text_t a, Text_t b)
-{
+static Text_t concat2(Text_t a, Text_t b) {
     if (a.length == 0) return b;
     if (b.length == 0) return a;
 
-    int32_t last_a = Text$get_grapheme(a, a.length-1);
+    int32_t last_a = Text$get_grapheme(a, a.length - 1);
     int32_t first_b = Text$get_grapheme(b, 0);
 
     // Magic number, we know that no codepoints below here trigger instability:
@@ -509,60 +504,51 @@ static Text_t concat2(Text_t a, Text_t b)
     // Do a normalization run for these two codepoints and see if it looks different.
     // Normalization should not exceed 3x in the input length (but if it does, it will be
     // handled gracefully)
-    ucs4_t norm_buf[3*len];
-    size_t norm_length = sizeof(norm_buf)/sizeof(norm_buf[0]);
+    ucs4_t norm_buf[3 * len];
+    size_t norm_length = sizeof(norm_buf) / sizeof(norm_buf[0]);
     ucs4_t *normalized = u32_normalize(UNINORM_NFC, codepoints, len, norm_buf, &norm_length);
     bool stable = (norm_length == len && memcmp(codepoints, normalized, sizeof(codepoints)) == 0);
 
     if (stable) {
         const void *second_grapheme = u32_grapheme_next(normalized, &normalized[norm_length]);
-        if (second_grapheme == &normalized[norm_length])
-            stable = false;
+        if (second_grapheme == &normalized[norm_length]) stable = false;
     }
 
     if likely (stable) {
-        if (normalized != norm_buf)
-            free(normalized);
+        if (normalized != norm_buf) free(normalized);
         return concat2_assuming_safe(a, b);
     }
 
-    Text_t glue = Text$from_codepoints((List_t){.data=norm_buf, .length=(int64_t)norm_length, .stride=sizeof(int32_t)});
+    Text_t glue =
+        Text$from_codepoints((List_t){.data = norm_buf, .length = (int64_t)norm_length, .stride = sizeof(int32_t)});
 
-    if (normalized != norm_buf)
-        free(normalized);
+    if (normalized != norm_buf) free(normalized);
 
-    if (a.length == 1 && b.length == 1)
-        return glue;
-    else if (a.length == 1)
-        return concat2_assuming_safe(glue, Text$slice(b, I(2), I(b.length)));
-    else if (b.length == 1)
-        return concat2_assuming_safe(Text$slice(a, I(1), I(a.length-1)), glue);
+    if (a.length == 1 && b.length == 1) return glue;
+    else if (a.length == 1) return concat2_assuming_safe(glue, Text$slice(b, I(2), I(b.length)));
+    else if (b.length == 1) return concat2_assuming_safe(Text$slice(a, I(1), I(a.length - 1)), glue);
     else
-        return concat2_assuming_safe(
-            concat2_assuming_safe(Text$slice(a, I(1), I(a.length-1)), glue),
-            Text$slice(b, I(2), I(b.length)));
+        return concat2_assuming_safe(concat2_assuming_safe(Text$slice(a, I(1), I(a.length - 1)), glue),
+                                     Text$slice(b, I(2), I(b.length)));
 }
 
-public Text_t Text$_concat(int n, Text_t items[n])
-{
+public
+Text_t Text$_concat(int n, Text_t items[n]) {
     if (n == 0) return EMPTY_TEXT;
 
     Text_t ret = items[0];
     for (int i = 1; i < n; i++) {
-        if (items[i].length > 0)
-            ret = concat2(ret, items[i]);
+        if (items[i].length > 0) ret = concat2(ret, items[i]);
     }
     return ret;
 }
 
-public Text_t Text$repeat(Text_t text, Int_t count)
-{
-    if (text.length == 0 || Int$is_negative(count))
-        return EMPTY_TEXT;
+public
+Text_t Text$repeat(Text_t text, Int_t count) {
+    if (text.length == 0 || Int$is_negative(count)) return EMPTY_TEXT;
 
     Int_t result_len = Int$times(count, I(text.length));
-    if (Int$compare_value(result_len, I(1l<<40)) > 0)
-        fail("Text repeating would produce too big of an result!");
+    if (Int$compare_value(result_len, I(1l << 40)) > 0) fail("Text repeating would produce too big of an result!");
 
     int64_t count64 = Int64$from_int(count, false);
     Text_t ret = text;
@@ -571,19 +557,17 @@ public Text_t Text$repeat(Text_t text, Int_t count)
     return ret;
 }
 
-public Int_t Text$width(Text_t text, Text_t language)
-{
-    int width = u8_strwidth((const uint8_t*)Text$as_c_string(text), Text$as_c_string(language));
+public
+Int_t Text$width(Text_t text, Text_t language) {
+    int width = u8_strwidth((const uint8_t *)Text$as_c_string(text), Text$as_c_string(language));
     return Int$from_int32(width);
 }
 
-static Text_t Text$repeat_to_width(Text_t to_repeat, int64_t target_width, Text_t language)
-{
-    if (target_width <= 0)
-        return EMPTY_TEXT;
+static Text_t Text$repeat_to_width(Text_t to_repeat, int64_t target_width, Text_t language) {
+    if (target_width <= 0) return EMPTY_TEXT;
 
     const char *lang_str = Text$as_c_string(language);
-    int64_t width = (int64_t)u8_strwidth((const uint8_t*)Text$as_c_string(to_repeat), lang_str);
+    int64_t width = (int64_t)u8_strwidth((const uint8_t *)Text$as_c_string(to_repeat), lang_str);
     Text_t repeated = EMPTY_TEXT;
     int64_t repeated_width = 0;
     while (repeated_width + width <= target_width) {
@@ -593,8 +577,8 @@ static Text_t Text$repeat_to_width(Text_t to_repeat, int64_t target_width, Text_
 
     if (repeated_width < target_width) {
         for (int64_t i = 0; repeated_width < target_width && i < to_repeat.length; i++) {
-            Text_t c = Text$slice(to_repeat, I_small(i+1), I_small(i+1));
-            int64_t w = (int64_t)u8_strwidth((const uint8_t*)Text$as_c_string(c), lang_str);
+            Text_t c = Text$slice(to_repeat, I_small(i + 1), I_small(i + 1));
+            int64_t w = (int64_t)u8_strwidth((const uint8_t *)Text$as_c_string(c), lang_str);
             if (repeated_width + w > target_width) {
                 repeated = concat2(repeated, Text$repeat(Text(" "), I(target_width - repeated_width)));
                 repeated_width = target_width;
@@ -608,35 +592,33 @@ static Text_t Text$repeat_to_width(Text_t to_repeat, int64_t target_width, Text_
     return repeated;
 }
 
-public Text_t Text$left_pad(Text_t text, Int_t width, Text_t padding, Text_t language)
-{
-    if (padding.length == 0)
-        fail("Cannot pad with an empty text!");
+public
+Text_t Text$left_pad(Text_t text, Int_t width, Text_t padding, Text_t language) {
+    if (padding.length == 0) fail("Cannot pad with an empty text!");
 
     int64_t needed = Int64$from_int(width, false) - Int64$from_int(Text$width(text, language), false);
     return concat2(Text$repeat_to_width(padding, needed, language), text);
 }
 
-public Text_t Text$right_pad(Text_t text, Int_t width, Text_t padding, Text_t language)
-{
-    if (padding.length == 0)
-        fail("Cannot pad with an empty text!");
+public
+Text_t Text$right_pad(Text_t text, Int_t width, Text_t padding, Text_t language) {
+    if (padding.length == 0) fail("Cannot pad with an empty text!");
 
     int64_t needed = Int64$from_int(width, false) - Int64$from_int(Text$width(text, language), false);
     return concat2(text, Text$repeat_to_width(padding, needed, language));
 }
 
-public Text_t Text$middle_pad(Text_t text, Int_t width, Text_t padding, Text_t language)
-{
-    if (padding.length == 0)
-        fail("Cannot pad with an empty text!");
+public
+Text_t Text$middle_pad(Text_t text, Int_t width, Text_t padding, Text_t language) {
+    if (padding.length == 0) fail("Cannot pad with an empty text!");
 
     int64_t needed = Int64$from_int(width, false) - Int64$from_int(Text$width(text, language), false);
-    return Texts(Text$repeat_to_width(padding, needed/2, language), text, Text$repeat_to_width(padding, (needed+1)/2, language));
+    return Texts(Text$repeat_to_width(padding, needed / 2, language), text,
+                 Text$repeat_to_width(padding, (needed + 1) / 2, language));
 }
 
-public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int)
-{
+public
+Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int) {
     int64_t first = Int64$from_int(first_int, false);
     int64_t last = Int64$from_int(last_int, false);
     if (first == 0) fail("Invalid index: 0");
@@ -647,11 +629,9 @@ public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int)
 
     if (last > text.length) last = text.length;
 
-    if (first > text.length || last < first)
-        return EMPTY_TEXT;
+    if (first > text.length || last < first) return EMPTY_TEXT;
 
-    if (first == 1 && last == text.length)
-        return text;
+    if (first == 1 && last == text.length) return text;
 
     while (text.tag == TEXT_CONCAT) {
         if (last < text.left->length) {
@@ -662,31 +642,31 @@ public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int)
             text = *text.right;
         } else {
             return concat2_assuming_safe(Text$slice(*text.left, I(first), I(text.length)),
-                                         Text$slice(*text.right, I(1), I(last-text.left->length)));
+                                         Text$slice(*text.right, I(1), I(last - text.left->length)));
         }
     }
 
     switch (text.tag) {
     case TEXT_ASCII: {
         return (Text_t){
-            .tag=TEXT_ASCII,
-            .length=last - first + 1,
-            .ascii=text.ascii + (first-1),
+            .tag = TEXT_ASCII,
+            .length = last - first + 1,
+            .ascii = text.ascii + (first - 1),
         };
     }
     case TEXT_GRAPHEMES: {
         return (Text_t){
-            .tag=TEXT_GRAPHEMES,
-            .length=last - first + 1,
-            .graphemes=text.graphemes + (first-1),
+            .tag = TEXT_GRAPHEMES,
+            .length = last - first + 1,
+            .graphemes = text.graphemes + (first - 1),
         };
     }
     case TEXT_BLOB: {
         Text_t ret = (Text_t){
-            .tag=TEXT_BLOB,
-            .length=last - first + 1,
-            .blob.map=text.blob.map,
-            .blob.bytes=text.blob.bytes + (first-1),
+            .tag = TEXT_BLOB,
+            .length = last - first + 1,
+            .blob.map = text.blob.map,
+            .blob.bytes = text.blob.bytes + (first - 1),
         };
         return ret;
     }
@@ -695,48 +675,44 @@ public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int)
     return EMPTY_TEXT;
 }
 
-public Text_t Text$from(Text_t text, Int_t first)
-{
-    return Text$slice(text, first, I_small(-1));
-}
+public
+Text_t Text$from(Text_t text, Int_t first) { return Text$slice(text, first, I_small(-1)); }
 
-public Text_t Text$to(Text_t text, Int_t last)
-{
-    return Text$slice(text, I_small(1), last);
-}
+public
+Text_t Text$to(Text_t text, Int_t last) { return Text$slice(text, I_small(1), last); }
 
-public Text_t Text$reversed(Text_t text)
-{
+public
+Text_t Text$reversed(Text_t text) {
     switch (text.tag) {
     case TEXT_ASCII: {
         struct Text_s ret = {
-            .tag=TEXT_ASCII,
-            .length=text.length,
+            .tag = TEXT_ASCII,
+            .length = text.length,
         };
         ret.ascii = GC_MALLOC_ATOMIC(sizeof(char[ret.length]));
         for (int64_t i = 0; i < text.length; i++)
-            ((char*)ret.ascii)[text.length-1-i] = text.ascii[i];
+            ((char *)ret.ascii)[text.length - 1 - i] = text.ascii[i];
         return ret;
     }
     case TEXT_GRAPHEMES: {
         struct Text_s ret = {
-            .tag=TEXT_GRAPHEMES,
-            .length=text.length,
+            .tag = TEXT_GRAPHEMES,
+            .length = text.length,
         };
         ret.graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[ret.length]));
         for (int64_t i = 0; i < text.length; i++)
-            ((int32_t*)ret.graphemes)[text.length-1-i] = text.graphemes[i];
+            ((int32_t *)ret.graphemes)[text.length - 1 - i] = text.graphemes[i];
         return ret;
     }
     case TEXT_BLOB: {
         struct Text_s ret = {
-            .tag=TEXT_BLOB,
-            .length=text.length,
-            .blob.map=text.blob.map,
+            .tag = TEXT_BLOB,
+            .length = text.length,
+            .blob.map = text.blob.map,
         };
         ret.blob.bytes = GC_MALLOC_ATOMIC(sizeof(uint8_t[ret.length]));
         for (int64_t i = 0; i < text.length; i++)
-            ((uint8_t*)ret.blob.bytes)[text.length-1-i] = text.graphemes[i];
+            ((uint8_t *)ret.blob.bytes)[text.length - 1 - i] = text.graphemes[i];
         return ret;
     }
     case TEXT_CONCAT: {
@@ -747,32 +723,30 @@ public Text_t Text$reversed(Text_t text)
     return EMPTY_TEXT;
 }
 
-public PUREFUNC Text_t Text$cluster(Text_t text, Int_t index)
-{
-    return Text$slice(text, index, index);
-}
+public
+PUREFUNC Text_t Text$cluster(Text_t text, Int_t index) { return Text$slice(text, index, index); }
 
-static Text_t Text$from_components(List_t graphemes, Table_t unique_clusters)
-{
-    size_t blob_size = (
-        sizeof(int32_t[unique_clusters.entries.length])
-        + sizeof(uint8_t[graphemes.length]));
+static Text_t Text$from_components(List_t graphemes, Table_t unique_clusters) {
+    size_t blob_size = (sizeof(int32_t[unique_clusters.entries.length]) + sizeof(uint8_t[graphemes.length]));
     // If blob optimization will save at least 200 bytes:
     if (unique_clusters.entries.length <= 256 && blob_size + 200 < sizeof(int32_t[graphemes.length])) {
         Text_t ret = {
-            .tag=TEXT_BLOB,
-            .length=graphemes.length,
-            .depth=0,
+            .tag = TEXT_BLOB,
+            .length = graphemes.length,
+            .depth = 0,
         };
         void *blob = GC_MALLOC_ATOMIC(blob_size);
         int32_t *map = blob;
         uint8_t *bytes = blob + sizeof(int32_t[unique_clusters.entries.length]);
         for (int64_t i = 0; i < unique_clusters.entries.length; i++) {
-            struct { int32_t g; uint8_t b; } *entry = unique_clusters.entries.data + i*unique_clusters.entries.stride;
+            struct {
+                int32_t g;
+                uint8_t b;
+            } *entry = unique_clusters.entries.data + i * unique_clusters.entries.stride;
             map[entry->b] = entry->g;
         }
         for (int64_t i = 0; i < graphemes.length; i++) {
-            int32_t g = *(int32_t*)(graphemes.data + i*graphemes.stride);
+            int32_t g = *(int32_t *)(graphemes.data + i * graphemes.stride);
             uint8_t *byte = Table$get(unique_clusters, &g, Table$info(&Int32$info, &Byte$info));
             assert(byte);
             bytes[i] = *byte;
@@ -782,15 +756,15 @@ static Text_t Text$from_components(List_t graphemes, Table_t unique_clusters)
         return ret;
     } else {
         return (Text_t){
-            .tag=TEXT_GRAPHEMES,
-            .length=graphemes.length,
-            .graphemes=graphemes.data,
+            .tag = TEXT_GRAPHEMES,
+            .length = graphemes.length,
+            .graphemes = graphemes.data,
         };
     }
 }
 
-public OptionalText_t Text$from_strn(const char *str, size_t len)
-{
+public
+OptionalText_t Text$from_strn(const char *str, size_t len) {
     int64_t ascii_span = 0;
     for (size_t i = 0; i < len && isascii(str[i]); i++)
         ascii_span++;
@@ -799,52 +773,48 @@ public OptionalText_t Text$from_strn(const char *str, size_t len)
         char *copy = GC_MALLOC_ATOMIC(len);
         memcpy(copy, str, len);
         return (Text_t){
-            .tag=TEXT_ASCII,
-            .length=ascii_span,
-            .ascii=copy,
+            .tag = TEXT_ASCII,
+            .length = ascii_span,
+            .ascii = copy,
         };
     }
-    if (u8_check((uint8_t*)str, len) != NULL)
-        return NONE_TEXT;
+    if (u8_check((uint8_t *)str, len) != NULL) return NONE_TEXT;
 
     List_t graphemes = {};
     Table_t unique_clusters = {};
-    const uint8_t *pos = (const uint8_t*)str;
-    const uint8_t *end = (const uint8_t*)&str[len];
+    const uint8_t *pos = (const uint8_t *)str;
+    const uint8_t *end = (const uint8_t *)&str[len];
     // Iterate over grapheme clusters
-    for (const uint8_t *next; (next=u8_grapheme_next(pos, end)); pos = next) {
+    for (const uint8_t *next; (next = u8_grapheme_next(pos, end)); pos = next) {
         uint32_t buf[256];
-        size_t u32_len = sizeof(buf)/sizeof(buf[0]);
-        uint32_t *u32s = u8_to_u32(pos, (size_t)(next-pos), buf, &u32_len);
+        size_t u32_len = sizeof(buf) / sizeof(buf[0]);
+        uint32_t *u32s = u8_to_u32(pos, (size_t)(next - pos), buf, &u32_len);
 
         uint32_t buf2[256];
-        size_t u32_normlen = sizeof(buf2)/sizeof(buf2[0]);
+        size_t u32_normlen = sizeof(buf2) / sizeof(buf2[0]);
         uint32_t *u32s_normalized = u32_normalize(UNINORM_NFC, u32s, u32_len, buf2, &u32_normlen);
 
         int32_t g = get_synthetic_grapheme(u32s_normalized, (int64_t)u32_normlen);
         List$insert(&graphemes, &g, I(0), sizeof(int32_t));
-        Table$get_or_setdefault(&unique_clusters, int32_t, uint8_t, g, (uint8_t)unique_clusters.entries.length, Table$info(&Int32$info, &Byte$info));
+        Table$get_or_setdefault(&unique_clusters, int32_t, uint8_t, g, (uint8_t)unique_clusters.entries.length,
+                                Table$info(&Int32$info, &Byte$info));
 
         if (u32s != buf) free(u32s);
         if (u32s_normalized != buf2) free(u32s_normalized);
 
         if (unique_clusters.entries.length >= 256) {
-            return concat2_assuming_safe(
-                Text$from_components(graphemes, unique_clusters),
-                Text$from_strn((const char*)next, (size_t)(end-next)));
+            return concat2_assuming_safe(Text$from_components(graphemes, unique_clusters),
+                                         Text$from_strn((const char *)next, (size_t)(end - next)));
         }
     }
 
     return Text$from_components(graphemes, unique_clusters);
 }
 
-public OptionalText_t Text$from_str(const char *str)
-{
-    return str ? Text$from_strn(str, strlen(str)) : Text("");
-}
+public
+OptionalText_t Text$from_str(const char *str) { return str ? Text$from_strn(str, strlen(str)) : Text(""); }
 
-static void u8_buf_append(Text_t text, char **buf, int64_t *capacity, int64_t *i)
-{
+static void u8_buf_append(Text_t text, char **buf, int64_t *capacity, int64_t *i) {
     switch (text.tag) {
     case TEXT_ASCII: {
         if (*i + text.length > (int64_t)*capacity) {
@@ -863,7 +833,7 @@ static void u8_buf_append(Text_t text, char **buf, int64_t *capacity, int64_t *i
             if (graphemes[g] >= 0) {
                 uint8_t u8_buf[64];
                 size_t u8_len = sizeof(u8_buf);
-                uint8_t *u8 = u32_to_u8((ucs4_t*)&graphemes[g], 1, u8_buf, &u8_len);
+                uint8_t *u8 = u32_to_u8((ucs4_t *)&graphemes[g], 1, u8_buf, &u8_len);
                 if (u8 == NULL) fail("Invalid grapheme encountered: ", graphemes[g]);
 
                 if (*i + (int64_t)u8_len > (int64_t)*capacity) {
@@ -894,7 +864,7 @@ static void u8_buf_append(Text_t text, char **buf, int64_t *capacity, int64_t *i
             if (grapheme >= 0) {
                 uint8_t u8_buf[64];
                 size_t u8_len = sizeof(u8_buf);
-                uint8_t *u8 = u32_to_u8((ucs4_t*)&grapheme, 1, u8_buf, &u8_len);
+                uint8_t *u8 = u32_to_u8((ucs4_t *)&grapheme, 1, u8_buf, &u8_len);
                 if (u8 == NULL) fail("Invalid grapheme encountered: ", grapheme);
 
                 if (*i + (int64_t)u8_len > (int64_t)*capacity) {
@@ -928,8 +898,8 @@ static void u8_buf_append(Text_t text, char **buf, int64_t *capacity, int64_t *i
     }
 }
 
-public char *Text$as_c_string(Text_t text)
-{
+public
+char *Text$as_c_string(Text_t text) {
     int64_t capacity = text.length + 1;
     char *buf = GC_MALLOC_ATOMIC((size_t)capacity);
     int64_t i = 0;
@@ -943,10 +913,9 @@ public char *Text$as_c_string(Text_t text)
     return buf;
 }
 
-PUREFUNC public uint64_t Text$hash(const void *obj, const TypeInfo_t *info)
-{
+PUREFUNC public uint64_t Text$hash(const void *obj, const TypeInfo_t *info) {
     (void)info;
-    Text_t text = *(Text_t*)obj;
+    Text_t text = *(Text_t *)obj;
     siphash sh;
     siphashinit(&sh, sizeof(int32_t[text.length]));
 
@@ -959,40 +928,41 @@ PUREFUNC public uint64_t Text$hash(const void *obj, const TypeInfo_t *info)
         const char *bytes = text.ascii;
         for (int64_t i = 0; i + 1 < text.length; i += 2) {
             tmp.chunks[0] = (int32_t)bytes[i];
-            tmp.chunks[1] = (int32_t)bytes[i+1];
+            tmp.chunks[1] = (int32_t)bytes[i + 1];
             siphashadd64bits(&sh, tmp.whole);
         }
-        int32_t last = text.length & 0x1 ? (int32_t)bytes[text.length-1] : 0; // Odd number of graphemes
+        int32_t last = text.length & 0x1 ? (int32_t)bytes[text.length - 1] : 0; // Odd number of graphemes
         return siphashfinish_last_part(&sh, (uint64_t)last);
     }
     case TEXT_GRAPHEMES: {
         const int32_t *graphemes = text.graphemes;
         for (int64_t i = 0; i + 1 < text.length; i += 2) {
             tmp.chunks[0] = graphemes[i];
-            tmp.chunks[1] = graphemes[i+1];
+            tmp.chunks[1] = graphemes[i + 1];
             siphashadd64bits(&sh, tmp.whole);
         }
-        int32_t last = text.length & 0x1 ? graphemes[text.length-1] : 0; // Odd number of graphemes
+        int32_t last = text.length & 0x1 ? graphemes[text.length - 1] : 0; // Odd number of graphemes
         return siphashfinish_last_part(&sh, (uint64_t)last);
     }
     case TEXT_BLOB: {
         for (int64_t i = 0; i + 1 < text.length; i += 2) {
             tmp.chunks[0] = text.blob.map[text.blob.bytes[i]];
-            tmp.chunks[1] = text.blob.map[text.blob.bytes[i+1]];
+            tmp.chunks[1] = text.blob.map[text.blob.bytes[i + 1]];
             siphashadd64bits(&sh, tmp.whole);
         }
-        int32_t last = text.length & 0x1 ? text.blob.map[text.blob.bytes[text.length-1]] : 0; // Odd number of graphemes
+        int32_t last =
+            text.length & 0x1 ? text.blob.map[text.blob.bytes[text.length - 1]] : 0; // Odd number of graphemes
         return siphashfinish_last_part(&sh, (uint64_t)last);
     }
     case TEXT_CONCAT: {
         TextIter_t state = NEW_TEXT_ITER_STATE(text);
         for (int64_t i = 0; i + 1 < text.length; i += 2) {
             tmp.chunks[0] = Text$get_grapheme_fast(&state, i);
-            tmp.chunks[1] = Text$get_grapheme_fast(&state, i+1);
+            tmp.chunks[1] = Text$get_grapheme_fast(&state, i + 1);
             siphashadd64bits(&sh, tmp.whole);
         }
 
-        int32_t last = (text.length & 0x1) ? Text$get_grapheme_fast(&state, text.length-1) : 0;
+        int32_t last = (text.length & 0x1) ? Text$get_grapheme_fast(&state, text.length - 1) : 0;
         return siphashfinish_last_part(&sh, (uint64_t)last);
     }
     default: errx(1, "Invalid text");
@@ -1000,8 +970,8 @@ PUREFUNC public uint64_t Text$hash(const void *obj, const TypeInfo_t *info)
     return 0;
 }
 
-public int32_t Text$get_grapheme_fast(TextIter_t *state, int64_t index)
-{
+public
+int32_t Text$get_grapheme_fast(TextIter_t *state, int64_t index) {
     if (index < 0) return 0;
     if (index >= state->stack[0].text.length) return 0;
 
@@ -1051,18 +1021,17 @@ public int32_t Text$get_grapheme_fast(TextIter_t *state, int64_t index)
     return 0;
 }
 
-public uint32_t Text$get_main_grapheme_fast(TextIter_t *state, int64_t index)
-{
+public
+uint32_t Text$get_main_grapheme_fast(TextIter_t *state, int64_t index) {
     int32_t g = Text$get_grapheme_fast(state, index);
     return (g) >= 0 ? (ucs4_t)(g) : synthetic_graphemes[-(g)-1].main_codepoint;
 }
 
-PUREFUNC public int32_t Text$compare(const void *va, const void *vb, const TypeInfo_t *info)
-{
+PUREFUNC public int32_t Text$compare(const void *va, const void *vb, const TypeInfo_t *info) {
     (void)info;
     if (va == vb) return 0;
-    const Text_t a = *(const Text_t*)va;
-    const Text_t b = *(const Text_t*)vb;
+    const Text_t a = *(const Text_t *)va;
+    const Text_t b = *(const Text_t *)vb;
 
     // TODO: make this smarter and more efficient
     int64_t len = MAX(a.length, b.length);
@@ -1073,31 +1042,21 @@ PUREFUNC public int32_t Text$compare(const void *va, const void *vb, const TypeI
         if (ai == bi) continue;
         int32_t cmp;
         if (ai > 0 && bi > 0) {
-            cmp = u32_cmp((ucs4_t*)&ai, (ucs4_t*)&bi, 1);
+            cmp = u32_cmp((ucs4_t *)&ai, (ucs4_t *)&bi, 1);
         } else if (ai > 0) {
-            cmp = u32_cmp2(
-                (ucs4_t*)&ai, 1,
-                GRAPHEME_CODEPOINTS(bi),
-                NUM_GRAPHEME_CODEPOINTS(bi));
+            cmp = u32_cmp2((ucs4_t *)&ai, 1, GRAPHEME_CODEPOINTS(bi), NUM_GRAPHEME_CODEPOINTS(bi));
         } else if (bi > 0) {
-            cmp = u32_cmp2(
-                GRAPHEME_CODEPOINTS(ai),
-                NUM_GRAPHEME_CODEPOINTS(ai),
-                (ucs4_t*)&bi, 1);
+            cmp = u32_cmp2(GRAPHEME_CODEPOINTS(ai), NUM_GRAPHEME_CODEPOINTS(ai), (ucs4_t *)&bi, 1);
         } else {
-            cmp = u32_cmp2(
-                GRAPHEME_CODEPOINTS(ai),
-                NUM_GRAPHEME_CODEPOINTS(ai),
-                GRAPHEME_CODEPOINTS(bi),
-                NUM_GRAPHEME_CODEPOINTS(bi));
+            cmp = u32_cmp2(GRAPHEME_CODEPOINTS(ai), NUM_GRAPHEME_CODEPOINTS(ai), GRAPHEME_CODEPOINTS(bi),
+                           NUM_GRAPHEME_CODEPOINTS(bi));
         }
         if (cmp != 0) return cmp;
     }
     return 0;
 }
 
-bool _matches(TextIter_t *text_state, TextIter_t *target_state, int64_t pos)
-{
+bool _matches(TextIter_t *text_state, TextIter_t *target_state, int64_t pos) {
     for (int64_t i = 0; i < target_state->stack[0].text.length; i++) {
         int32_t text_i = Text$get_grapheme_fast(text_state, pos + i);
         int32_t prefix_i = Text$get_grapheme_fast(target_state, i);
@@ -1106,10 +1065,8 @@ bool _matches(TextIter_t *text_state, TextIter_t *target_state, int64_t pos)
     return true;
 }
 
-PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix, Text_t *remainder)
-{
-    if (text.length < prefix.length)
-        return false;
+PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix, Text_t *remainder) {
+    if (text.length < prefix.length) return false;
     TextIter_t text_state = NEW_TEXT_ITER_STATE(text), prefix_state = NEW_TEXT_ITER_STATE(prefix);
     if (_matches(&text_state, &prefix_state, 0)) {
         if (remainder) *remainder = Text$from(text, Int$from_int64(prefix.length + 1));
@@ -1120,10 +1077,8 @@ PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix, Text_t *remain
     }
 }
 
-PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix, Text_t *remainder)
-{
-    if (text.length < suffix.length)
-        return false;
+PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix, Text_t *remainder) {
+    if (text.length < suffix.length) return false;
     TextIter_t text_state = NEW_TEXT_ITER_STATE(text), suffix_state = NEW_TEXT_ITER_STATE(suffix);
     if (_matches(&text_state, &suffix_state, text.length - suffix.length)) {
         if (remainder) *remainder = Text$to(text, Int$from_int64(text.length - suffix.length));
@@ -1134,18 +1089,17 @@ PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix, Text_t *remainde
     }
 }
 
-public Text_t Text$without_prefix(Text_t text, Text_t prefix)
-{
+public
+Text_t Text$without_prefix(Text_t text, Text_t prefix) {
     return Text$starts_with(text, prefix, NULL) ? Text$slice(text, I(prefix.length + 1), I(text.length)) : text;
 }
 
-public Text_t Text$without_suffix(Text_t text, Text_t suffix)
-{
+public
+Text_t Text$without_suffix(Text_t text, Text_t suffix) {
     return Text$ends_with(text, suffix, NULL) ? Text$slice(text, I(1), I(text.length - suffix.length)) : text;
 }
 
-static bool _has_grapheme(TextIter_t *text, int32_t g)
-{
+static bool _has_grapheme(TextIter_t *text, int32_t g) {
     for (int64_t t = 0; t < text->stack[0].text.length; t++) {
         if (g == Text$get_grapheme_fast(text, t)) {
             return true;
@@ -1154,8 +1108,8 @@ static bool _has_grapheme(TextIter_t *text, int32_t g)
     return false;
 }
 
-public Text_t Text$trim(Text_t text, Text_t to_trim, bool left, bool right)
-{
+public
+Text_t Text$trim(Text_t text, Text_t to_trim, bool left, bool right) {
     int64_t first = 0;
     TextIter_t text_state = NEW_TEXT_ITER_STATE(text), trim_state = NEW_TEXT_ITER_STATE(to_trim);
     if (left) {
@@ -1163,28 +1117,29 @@ public Text_t Text$trim(Text_t text, Text_t to_trim, bool left, bool right)
             first += 1;
         }
     }
-    int64_t last = text.length-1;
+    int64_t last = text.length - 1;
     if (right) {
         while (last >= first && _has_grapheme(&trim_state, Text$get_grapheme_fast(&text_state, last))) {
             last -= 1;
         }
     }
-    return (first != 0 || last != text.length-1) ? Text$slice(text, I(first+1), I(last+1)) : text;
+    return (first != 0 || last != text.length - 1) ? Text$slice(text, I(first + 1), I(last + 1)) : text;
 }
 
-public Text_t Text$translate(Text_t text, Table_t translations)
-{
+public
+Text_t Text$translate(Text_t text, Table_t translations) {
     TextIter_t text_state = NEW_TEXT_ITER_STATE(text);
     Text_t result = EMPTY_TEXT;
     int64_t span_start = 0;
     List_t replacement_list = translations.entries;
-    for (int64_t i = 0; i < text.length; ) {
+    for (int64_t i = 0; i < text.length;) {
         for (int64_t r = 0; r < replacement_list.length; r++) {
-            struct { Text_t target, replacement; } *entry = replacement_list.data + r*replacement_list.stride;
+            struct {
+                Text_t target, replacement;
+            } *entry = replacement_list.data + r * replacement_list.stride;
             TextIter_t target_state = NEW_TEXT_ITER_STATE(entry->target);
             if (_matches(&text_state, &target_state, i)) {
-                if (i > span_start)
-                    result = concat2(result, Text$slice(text, I(span_start+1), I(i)));
+                if (i > span_start) result = concat2(result, Text$slice(text, I(span_start + 1), I(i)));
 
                 result = concat2(result, entry->replacement);
                 i += entry->target.length;
@@ -1193,22 +1148,21 @@ public Text_t Text$translate(Text_t text, Table_t translations)
             }
         }
         i += 1;
-      found_match: continue;
+    found_match:
+        continue;
     }
-    if (span_start < text.length)
-        result = concat2(result, Text$slice(text, I(span_start+1), I(text.length)));
+    if (span_start < text.length) result = concat2(result, Text$slice(text, I(span_start + 1), I(text.length)));
     return result;
 }
 
-public Text_t Text$replace(Text_t text, Text_t target, Text_t replacement)
-{
+public
+Text_t Text$replace(Text_t text, Text_t target, Text_t replacement) {
     TextIter_t text_state = NEW_TEXT_ITER_STATE(text), target_state = NEW_TEXT_ITER_STATE(target);
     Text_t result = EMPTY_TEXT;
     int64_t span_start = 0;
-    for (int64_t i = 0; i < text.length; ) {
+    for (int64_t i = 0; i < text.length;) {
         if (_matches(&text_state, &target_state, i)) {
-            if (i > span_start)
-                result = concat2(result, Text$slice(text, I(span_start+1), I(i)));
+            if (i > span_start) result = concat2(result, Text$slice(text, I(span_start + 1), I(i)));
 
             result = concat2(result, replacement);
             i += target.length;
@@ -1217,34 +1171,31 @@ public Text_t Text$replace(Text_t text, Text_t target, Text_t replacement)
             i += 1;
         }
     }
-    if (span_start < text.length)
-        result = concat2(result, Text$slice(text, I(span_start+1), I(text.length)));
+    if (span_start < text.length) result = concat2(result, Text$slice(text, I(span_start + 1), I(text.length)));
     return result;
 }
 
-public PUREFUNC bool Text$has(Text_t text, Text_t target)
-{
+public
+PUREFUNC bool Text$has(Text_t text, Text_t target) {
     TextIter_t text_state = NEW_TEXT_ITER_STATE(text), target_state = NEW_TEXT_ITER_STATE(target);
     for (int64_t i = 0; i < text.length; i++) {
-        if (_matches(&text_state, &target_state, i))
-            return true;
+        if (_matches(&text_state, &target_state, i)) return true;
     }
     return false;
 }
 
-public List_t Text$split(Text_t text, Text_t delimiters)
-{
-    if (delimiters.length == 0)
-        return Text$clusters(text);
+public
+List_t Text$split(Text_t text, Text_t delimiters) {
+    if (delimiters.length == 0) return Text$clusters(text);
 
     TextIter_t text_state = NEW_TEXT_ITER_STATE(text), delim_state = NEW_TEXT_ITER_STATE(delimiters);
     List_t splits = {};
-    for (int64_t i = 0; i < text.length; ) {
+    for (int64_t i = 0; i < text.length;) {
         int64_t span_len = 0;
         while (i + span_len < text.length && !_matches(&text_state, &delim_state, i + span_len)) {
             span_len += 1;
         }
-        Text_t slice = Text$slice(text, I(i+1), I(i+span_len));
+        Text_t slice = Text$slice(text, I(i + 1), I(i + span_len));
         List$insert(&splits, &slice, I(0), sizeof(slice));
         i += span_len + delimiters.length;
         if (i == text.length) {
@@ -1255,20 +1206,20 @@ public List_t Text$split(Text_t text, Text_t delimiters)
     return splits;
 }
 
-public List_t Text$split_any(Text_t text, Text_t delimiters)
-{
-    if (delimiters.length == 0)
-        return List(text);
+public
+List_t Text$split_any(Text_t text, Text_t delimiters) {
+    if (delimiters.length == 0) return List(text);
 
     TextIter_t text_state = NEW_TEXT_ITER_STATE(text), delim_state = NEW_TEXT_ITER_STATE(delimiters);
     List_t splits = {};
-    for (int64_t i = 0; i < text.length; ) {
+    for (int64_t i = 0; i < text.length;) {
         int64_t span_len = 0;
-        while (i + span_len < text.length && !_has_grapheme(&delim_state, Text$get_grapheme_fast(&text_state, i + span_len))) {
+        while (i + span_len < text.length
+               && !_has_grapheme(&delim_state, Text$get_grapheme_fast(&text_state, i + span_len))) {
             span_len += 1;
         }
         bool trailing_delim = i + span_len < text.length;
-        Text_t slice = Text$slice(text, I(i+1), I(i+span_len));
+        Text_t slice = Text$slice(text, I(i + 1), I(i + span_len));
         List$insert(&splits, &slice, I(0), sizeof(slice));
         i += span_len + 1;
         while (i < text.length && _has_grapheme(&delim_state, Text$get_grapheme_fast(&text_state, i))) {
@@ -1288,8 +1239,7 @@ typedef struct {
     Text_t delimiter;
 } split_iter_state_t;
 
-static OptionalText_t next_split(split_iter_state_t *state)
-{
+static OptionalText_t next_split(split_iter_state_t *state) {
     Text_t text = state->state.stack[0].text;
     if (state->i >= text.length) {
         if (state->delimiter.length > 0 && state->i == text.length) { // special case
@@ -1310,21 +1260,20 @@ static OptionalText_t next_split(split_iter_state_t *state)
     while (i + span_len < text.length && !_matches(&state->state, &delim_state, i + span_len)) {
         span_len += 1;
     }
-    Text_t slice = Text$slice(text, I(i+1), I(i+span_len));
+    Text_t slice = Text$slice(text, I(i + 1), I(i + span_len));
     state->i = i + span_len + state->delimiter.length;
     return slice;
 }
 
-public Closure_t Text$by_split(Text_t text, Text_t delimiter)
-{
+public
+Closure_t Text$by_split(Text_t text, Text_t delimiter) {
     return (Closure_t){
-        .fn=(void*)next_split,
-        .userdata=new(split_iter_state_t, .state=NEW_TEXT_ITER_STATE(text), .i=0, .delimiter=delimiter),
+        .fn = (void *)next_split,
+        .userdata = new (split_iter_state_t, .state = NEW_TEXT_ITER_STATE(text), .i = 0, .delimiter = delimiter),
     };
 }
 
-static OptionalText_t next_split_any(split_iter_state_t *state)
-{
+static OptionalText_t next_split_any(split_iter_state_t *state) {
     Text_t text = state->state.stack[0].text;
     if (state->i >= text.length) {
         if (state->delimiter.length > 0 && state->i == text.length) { // special case
@@ -1335,7 +1284,7 @@ static OptionalText_t next_split_any(split_iter_state_t *state)
     }
 
     if (state->delimiter.length == 0) { // special case
-        Text_t ret = Text$cluster(text, I(state->i+1));
+        Text_t ret = Text$cluster(text, I(state->i + 1));
         state->i += 1;
         return ret;
     }
@@ -1343,10 +1292,11 @@ static OptionalText_t next_split_any(split_iter_state_t *state)
     TextIter_t delim_state = NEW_TEXT_ITER_STATE(state->delimiter);
     int64_t i = state->i;
     int64_t span_len = 0;
-    while (i + span_len < text.length && !_has_grapheme(&delim_state, Text$get_grapheme_fast(&state->state, i + span_len))) {
+    while (i + span_len < text.length
+           && !_has_grapheme(&delim_state, Text$get_grapheme_fast(&state->state, i + span_len))) {
         span_len += 1;
     }
-    Text_t slice = Text$slice(text, I(i+1), I(i+span_len));
+    Text_t slice = Text$slice(text, I(i + 1), I(i + span_len));
     i += span_len + 1;
     while (i < text.length && _has_grapheme(&delim_state, Text$get_grapheme_fast(&state->state, i))) {
         i += 1;
@@ -1355,18 +1305,16 @@ static OptionalText_t next_split_any(split_iter_state_t *state)
     return slice;
 }
 
-public Closure_t Text$by_split_any(Text_t text, Text_t delimiters)
-{
+public
+Closure_t Text$by_split_any(Text_t text, Text_t delimiters) {
     return (Closure_t){
-        .fn=(void*)next_split_any,
-        .userdata=new(split_iter_state_t, .state=NEW_TEXT_ITER_STATE(text), .i=0, .delimiter=delimiters),
+        .fn = (void *)next_split_any,
+        .userdata = new (split_iter_state_t, .state = NEW_TEXT_ITER_STATE(text), .i = 0, .delimiter = delimiters),
     };
 }
 
-PUREFUNC public bool Text$equal_values(Text_t a, Text_t b)
-{
-    if (a.length != b.length)
-        return false;
+PUREFUNC public bool Text$equal_values(Text_t a, Text_t b) {
+    if (a.length != b.length) return false;
     int64_t len = a.length;
     TextIter_t a_state = NEW_TEXT_ITER_STATE(a), b_state = NEW_TEXT_ITER_STATE(b);
     // TODO: make this smarter and more efficient
@@ -1378,17 +1326,14 @@ PUREFUNC public bool Text$equal_values(Text_t a, Text_t b)
     return true;
 }
 
-PUREFUNC public bool Text$equal(const void *a, const void *b, const TypeInfo_t *info)
-{
+PUREFUNC public bool Text$equal(const void *a, const void *b, const TypeInfo_t *info) {
     (void)info;
     if (a == b) return true;
-    return Text$equal_values(*(Text_t*)a, *(Text_t*)b);
+    return Text$equal_values(*(Text_t *)a, *(Text_t *)b);
 }
 
-PUREFUNC public bool Text$equal_ignoring_case(Text_t a, Text_t b, Text_t language)
-{
-    if (a.length != b.length)
-        return false;
+PUREFUNC public bool Text$equal_ignoring_case(Text_t a, Text_t b, Text_t language) {
+    if (a.length != b.length) return false;
     int64_t len = a.length;
     TextIter_t a_state = NEW_TEXT_ITER_STATE(a), b_state = NEW_TEXT_ITER_STATE(b);
     const char *uc_language = Text$as_c_string(language);
@@ -1396,76 +1341,79 @@ PUREFUNC public bool Text$equal_ignoring_case(Text_t a, Text_t b, Text_t languag
         int32_t ai = Text$get_grapheme_fast(&a_state, i);
         int32_t bi = Text$get_grapheme_fast(&b_state, i);
         if (ai != bi) {
-            const ucs4_t *a_codepoints = ai >= 0 ? (ucs4_t*)&ai : GRAPHEME_CODEPOINTS(ai);
+            const ucs4_t *a_codepoints = ai >= 0 ? (ucs4_t *)&ai : GRAPHEME_CODEPOINTS(ai);
             int64_t a_len = ai >= 0 ? 1 : NUM_GRAPHEME_CODEPOINTS(ai);
 
-            const ucs4_t *b_codepoints = bi >= 0 ? (ucs4_t*)&bi : GRAPHEME_CODEPOINTS(bi);
+            const ucs4_t *b_codepoints = bi >= 0 ? (ucs4_t *)&bi : GRAPHEME_CODEPOINTS(bi);
             int64_t b_len = bi >= 0 ? 1 : NUM_GRAPHEME_CODEPOINTS(bi);
 
             int cmp = 0;
             (void)u32_casecmp(a_codepoints, (size_t)a_len, b_codepoints, (size_t)b_len, uc_language, UNINORM_NFC, &cmp);
-            if (cmp != 0)
-                return false;
+            if (cmp != 0) return false;
         }
     }
     return true;
 }
 
-public Text_t Text$upper(Text_t text, Text_t language)
-{
+public
+Text_t Text$upper(Text_t text, Text_t language) {
     if (text.length == 0) return text;
     List_t codepoints = Text$utf32_codepoints(text);
     const char *uc_language = Text$as_c_string(language);
     size_t out_len = 0;
     ucs4_t *upper = u32_toupper(codepoints.data, (size_t)codepoints.length, uc_language, UNINORM_NFC, NULL, &out_len);
-    Text_t ret = Text$from_codepoints((List_t){.data=upper, .length=(int64_t)out_len, .stride=sizeof(int32_t)});
+    Text_t ret = Text$from_codepoints((List_t){.data = upper, .length = (int64_t)out_len, .stride = sizeof(int32_t)});
     return ret;
 }
 
-public Text_t Text$lower(Text_t text, Text_t language)
-{
+public
+Text_t Text$lower(Text_t text, Text_t language) {
     if (text.length == 0) return text;
     List_t codepoints = Text$utf32_codepoints(text);
     const char *uc_language = Text$as_c_string(language);
     size_t out_len = 0;
     ucs4_t *lower = u32_tolower(codepoints.data, (size_t)codepoints.length, uc_language, UNINORM_NFC, NULL, &out_len);
-    Text_t ret = Text$from_codepoints((List_t){.data=lower, .length=(int64_t)out_len, .stride=sizeof(int32_t)});
+    Text_t ret = Text$from_codepoints((List_t){.data = lower, .length = (int64_t)out_len, .stride = sizeof(int32_t)});
     return ret;
 }
 
-public Text_t Text$title(Text_t text, Text_t language)
-{
+public
+Text_t Text$title(Text_t text, Text_t language) {
     if (text.length == 0) return text;
     List_t codepoints = Text$utf32_codepoints(text);
     const char *uc_language = Text$as_c_string(language);
     size_t out_len = 0;
     ucs4_t *title = u32_totitle(codepoints.data, (size_t)codepoints.length, uc_language, UNINORM_NFC, NULL, &out_len);
-    Text_t ret = Text$from_codepoints((List_t){.data=title, .length=(int64_t)out_len, .stride=sizeof(int32_t)});
+    Text_t ret = Text$from_codepoints((List_t){.data = title, .length = (int64_t)out_len, .stride = sizeof(int32_t)});
     return ret;
 }
 
-public Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark)
-{
-    if (quotation_mark.length != 1)
-        fail("Invalid quote text: ", quotation_mark, " (must have length == 1)");
+public
+Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) {
+    if (quotation_mark.length != 1) fail("Invalid quote text: ", quotation_mark, " (must have length == 1)");
 
     Text_t ret = colorize ? Text("\x1b[35m") : EMPTY_TEXT;
-    if (!Text$equal_values(quotation_mark, Text("\"")) && !Text$equal_values(quotation_mark, Text("'")) && !Text$equal_values(quotation_mark, Text("`")))
+    if (!Text$equal_values(quotation_mark, Text("\"")) && !Text$equal_values(quotation_mark, Text("'"))
+        && !Text$equal_values(quotation_mark, Text("`")))
         ret = concat2_assuming_safe(ret, Text("$"));
 
     ret = concat2_assuming_safe(ret, quotation_mark);
     int32_t quote_char = Text$get_grapheme(quotation_mark, 0);
 
-#define flush_unquoted() ({ \
-                          if (unquoted_span > 0) { \
-                              ret = concat2_assuming_safe(ret, Text$slice(text, I(i-unquoted_span+1), I(i))); \
-                              unquoted_span = 0; \
-                          } })
-#define add_escaped(str) ({ \
-                          flush_unquoted(); \
-                          if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[34;1m")); \
-                          ret = concat2_assuming_safe(ret, Text("\\" str)); \
-                          if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[0;35m")); })
+#define flush_unquoted()                                                                                               \
+    ({                                                                                                                 \
+        if (unquoted_span > 0) {                                                                                       \
+            ret = concat2_assuming_safe(ret, Text$slice(text, I(i - unquoted_span + 1), I(i)));                        \
+            unquoted_span = 0;                                                                                         \
+        }                                                                                                              \
+    })
+#define add_escaped(str)                                                                                               \
+    ({                                                                                                                 \
+        flush_unquoted();                                                                                              \
+        if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[34;1m"));                                            \
+        ret = concat2_assuming_safe(ret, Text("\\" str));                                                              \
+        if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[0;35m"));                                            \
+    })
     TextIter_t state = NEW_TEXT_ITER_STATE(text);
     int64_t unquoted_span = 0;
     int64_t i = 0;
@@ -1488,8 +1436,10 @@ public Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark)
             add_escaped("$");
             break;
         }
-        case '\x00' ... '\x06': case '\x0E' ... '\x1A':
-        case '\x1C' ... '\x1F': case '\x7F' ... '\x7F': {
+        case '\x00' ... '\x06':
+        case '\x0E' ... '\x1A':
+        case '\x1C' ... '\x1F':
+        case '\x7F' ... '\x7F': {
             flush_unquoted();
             if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[34;1m"));
             ret = concat2_assuming_safe(ret, Text("\\x"));
@@ -1499,8 +1449,7 @@ public Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark)
                 '\0',
             };
             ret = concat2_assuming_safe(ret, Text$from_strn(tmp, 2));
-            if (colorize)
-                ret = concat2_assuming_safe(ret, Text("\x1b[0;35m"));
+            if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[0;35m"));
             break;
         }
         default: {
@@ -1522,21 +1471,19 @@ public Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark)
 #undef flush_unquoted
 
     ret = concat2_assuming_safe(ret, quotation_mark);
-    if (colorize)
-        ret = concat2_assuming_safe(ret, Text("\x1b[m"));
+    if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[m"));
 
     return ret;
 }
 
-public Text_t Text$as_text(const void *vtext, bool colorize, const TypeInfo_t *info)
-{
+public
+Text_t Text$as_text(const void *vtext, bool colorize, const TypeInfo_t *info) {
     (void)info;
     if (!vtext) return info && info->TextInfo.lang ? Text$from_str(info->TextInfo.lang) : Text("Text");
 
-    Text_t text = *(Text_t*)vtext;
+    Text_t text = *(Text_t *)vtext;
     // Figure out the best quotation mark to use:
-    bool has_double_quote = false, has_backtick = false,
-         has_single_quote = false, needs_escapes = false;
+    bool has_double_quote = false, has_backtick = false, has_single_quote = false, needs_escapes = false;
     TextIter_t state = NEW_TEXT_ITER_STATE(text);
     for (int64_t i = 0; i < text.length; i++) {
         int32_t g = Text$get_grapheme_fast(&state, i);
@@ -1554,39 +1501,33 @@ public Text_t Text$as_text(const void *vtext, bool colorize, const TypeInfo_t *i
     // needing to escape them by using single quotes, but only if we don't have
     // single quotes or need to escape anything else (because single quotes
     // don't have interpolation):
-    if (has_double_quote && !has_single_quote)
-        quote = Text("'");
+    if (has_double_quote && !has_single_quote) quote = Text("'");
     // If there is a double quote, but no backtick, we can save a bit of
     // escaping by using backtick instead of double quote:
-    else if (has_double_quote && has_single_quote && !has_backtick && !needs_escapes)
-        quote = Text("`");
+    else if (has_double_quote && has_single_quote && !has_backtick && !needs_escapes) quote = Text("`");
     // Otherwise fall back to double quotes as the default quoting style:
-    else
-        quote = Text("\"");
+    else quote = Text("\"");
 
     Text_t as_text = Text$quoted(text, colorize, quote);
     if (info && info->TextInfo.lang && info != &Text$info)
-        as_text = Text$concat(
-            colorize ? Text("\x1b[1m$") : Text("$"),
-            Text$from_str(info->TextInfo.lang),
-            colorize ? Text("\x1b[0m") : Text(""),
-            as_text);
+        as_text = Text$concat(colorize ? Text("\x1b[1m$") : Text("$"), Text$from_str(info->TextInfo.lang),
+                              colorize ? Text("\x1b[0m") : Text(""), as_text);
     return as_text;
 }
 
-public Text_t Text$join(Text_t glue, List_t pieces)
-{
+public
+Text_t Text$join(Text_t glue, List_t pieces) {
     if (pieces.length == 0) return EMPTY_TEXT;
 
-    Text_t result = *(Text_t*)pieces.data;
+    Text_t result = *(Text_t *)pieces.data;
     for (int64_t i = 1; i < pieces.length; i++) {
-        result = Text$concat(result, glue, *(Text_t*)(pieces.data + i*pieces.stride));
+        result = Text$concat(result, glue, *(Text_t *)(pieces.data + i * pieces.stride));
     }
     return result;
 }
 
-public List_t Text$clusters(Text_t text)
-{
+public
+List_t Text$clusters(Text_t text) {
     List_t clusters = {};
     for (int64_t i = 1; i <= text.length; i++) {
         Text_t cluster = Text$slice(text, I(i), I(i));
@@ -1595,9 +1536,9 @@ public List_t Text$clusters(Text_t text)
     return clusters;
 }
 
-public List_t Text$utf32_codepoints(Text_t text)
-{
-    List_t codepoints = {.atomic=1};
+public
+List_t Text$utf32_codepoints(Text_t text) {
+    List_t codepoints = {.atomic = 1};
     TextIter_t state = NEW_TEXT_ITER_STATE(text);
     for (int64_t i = 0; i < text.length; i++) {
         int32_t grapheme = Text$get_grapheme_fast(&state, i);
@@ -1613,24 +1554,23 @@ public List_t Text$utf32_codepoints(Text_t text)
     return codepoints;
 }
 
-public List_t Text$utf8_bytes(Text_t text)
-{
+public
+List_t Text$utf8_bytes(Text_t text) {
     const char *str = Text$as_c_string(text);
-    return (List_t){.length=(int64_t)strlen(str), .stride=1, .atomic=1, .data=(void*)str};
+    return (List_t){.length = (int64_t)strlen(str), .stride = 1, .atomic = 1, .data = (void *)str};
 }
 
-static INLINE const char *codepoint_name(ucs4_t c)
-{
+static INLINE const char *codepoint_name(ucs4_t c) {
     char *name = GC_MALLOC_ATOMIC(UNINAME_MAX);
     char *found_name = unicode_character_name(c, name);
     if (found_name) return found_name;
     const uc_block_t *block = uc_block(c);
     assert(block);
-    return String(block->name, "-", hex(c, .no_prefix=true, .uppercase=true));
+    return String(block->name, "-", hex(c, .no_prefix = true, .uppercase = true));
 }
 
-public List_t Text$codepoint_names(Text_t text)
-{
+public
+List_t Text$codepoint_names(Text_t text) {
     List_t names = {};
     TextIter_t state = NEW_TEXT_ITER_STATE(text);
     for (int64_t i = 0; i < text.length; i++) {
@@ -1650,81 +1590,78 @@ public List_t Text$codepoint_names(Text_t text)
     return names;
 }
 
-public Text_t Text$from_codepoints(List_t codepoints)
-{
-    if (codepoints.stride != sizeof(uint32_t))
-        List$compact(&codepoints, sizeof(uint32_t));
+public
+Text_t Text$from_codepoints(List_t codepoints) {
+    if (codepoints.stride != sizeof(uint32_t)) List$compact(&codepoints, sizeof(uint32_t));
 
     List_t graphemes = {};
     Table_t unique_clusters = {};
-    const uint32_t *pos = (const uint32_t*)codepoints.data;
-    const uint32_t *end = (const uint32_t*)&pos[codepoints.length];
+    const uint32_t *pos = (const uint32_t *)codepoints.data;
+    const uint32_t *end = (const uint32_t *)&pos[codepoints.length];
     // Iterate over grapheme clusters
-    for (const uint32_t *next; (next=u32_grapheme_next(pos, end)); pos = next) {
+    for (const uint32_t *next; (next = u32_grapheme_next(pos, end)); pos = next) {
         // Buffer for normalized cluster:
         uint32_t buf[256];
-        size_t u32_normlen = sizeof(buf)/sizeof(buf[0]);
-        uint32_t *u32s_normalized = u32_normalize(UNINORM_NFC, pos, (size_t)(next-pos), buf, &u32_normlen);
+        size_t u32_normlen = sizeof(buf) / sizeof(buf[0]);
+        uint32_t *u32s_normalized = u32_normalize(UNINORM_NFC, pos, (size_t)(next - pos), buf, &u32_normlen);
 
         int32_t g = get_synthetic_grapheme(u32s_normalized, (int64_t)u32_normlen);
         List$insert(&graphemes, &g, I(0), sizeof(int32_t));
-        Table$get_or_setdefault(
-            &unique_clusters, int32_t, uint8_t, g, (uint8_t)unique_clusters.entries.length,
-            Table$info(&Int32$info, &Byte$info));
+        Table$get_or_setdefault(&unique_clusters, int32_t, uint8_t, g, (uint8_t)unique_clusters.entries.length,
+                                Table$info(&Int32$info, &Byte$info));
 
         if (u32s_normalized != buf) free(u32s_normalized);
 
         if (unique_clusters.entries.length == 256) {
             List_t remaining_codepoints = {
-                .length=(int64_t)(end-next),
-                .data=(void*)next,
-                .stride=sizeof(int32_t),
+                .length = (int64_t)(end - next),
+                .data = (void *)next,
+                .stride = sizeof(int32_t),
             };
-            return concat2_assuming_safe(Text$from_components(graphemes, unique_clusters), Text$from_codepoints(remaining_codepoints));
+            return concat2_assuming_safe(Text$from_components(graphemes, unique_clusters),
+                                         Text$from_codepoints(remaining_codepoints));
         }
     }
     return Text$from_components(graphemes, unique_clusters);
 }
 
-public OptionalText_t Text$from_codepoint_names(List_t codepoint_names)
-{
+public
+OptionalText_t Text$from_codepoint_names(List_t codepoint_names) {
     List_t codepoints = {};
     for (int64_t i = 0; i < codepoint_names.length; i++) {
-        Text_t *name = ((Text_t*)(codepoint_names.data + i*codepoint_names.stride));
+        Text_t *name = ((Text_t *)(codepoint_names.data + i * codepoint_names.stride));
         const char *name_str = Text$as_c_string(*name);
         ucs4_t codepoint = unicode_name_character(name_str);
-        if (codepoint == UNINAME_INVALID)
-            return NONE_TEXT;
+        if (codepoint == UNINAME_INVALID) return NONE_TEXT;
         List$insert(&codepoints, &codepoint, I_small(0), sizeof(ucs4_t));
     }
     return Text$from_codepoints(codepoints);
 }
 
-public OptionalText_t Text$from_bytes(List_t bytes)
-{
-    if (bytes.stride != sizeof(int8_t))
-        List$compact(&bytes, sizeof(int8_t));
+public
+OptionalText_t Text$from_bytes(List_t bytes) {
+    if (bytes.stride != sizeof(int8_t)) List$compact(&bytes, sizeof(int8_t));
 
     return Text$from_strn(bytes.data, (size_t)bytes.length);
 }
 
-public List_t Text$lines(Text_t text)
-{
+public
+List_t Text$lines(Text_t text) {
     List_t lines = {};
     TextIter_t state = NEW_TEXT_ITER_STATE(text);
     for (int64_t i = 0, line_start = 0; i < text.length; i++) {
         int32_t grapheme = Text$get_grapheme_fast(&state, i);
         if (grapheme == '\r' && Text$get_grapheme_fast(&state, i + 1) == '\n') { // CRLF
-            Text_t line = Text$slice(text, I(line_start+1), I(i));
+            Text_t line = Text$slice(text, I(line_start + 1), I(i));
             List$insert(&lines, &line, I_small(0), sizeof(Text_t));
             i += 1; // skip one extra for CR
             line_start = i + 1;
         } else if (grapheme == '\n') { // newline
-            Text_t line = Text$slice(text, I(line_start+1), I(i));
+            Text_t line = Text$slice(text, I(line_start + 1), I(i));
             List$insert(&lines, &line, I_small(0), sizeof(Text_t));
             line_start = i + 1;
-        } else if (i == text.length-1 && line_start != i) { // last line
-            Text_t line = Text$slice(text, I(line_start+1), I(i+1));
+        } else if (i == text.length - 1 && line_start != i) { // last line
+            Text_t line = Text$slice(text, I(line_start + 1), I(i + 1));
             List$insert(&lines, &line, I_small(0), sizeof(Text_t));
         }
     }
@@ -1736,21 +1673,20 @@ typedef struct {
     int64_t i;
 } line_iter_state_t;
 
-static OptionalText_t next_line(line_iter_state_t *state)
-{
+static OptionalText_t next_line(line_iter_state_t *state) {
     Text_t text = state->state.stack[0].text;
     for (int64_t i = state->i; i < text.length; i++) {
         int32_t grapheme = Text$get_grapheme_fast(&state->state, i);
         if (grapheme == '\r' && Text$get_grapheme_fast(&state->state, i + 1) == '\n') { // CRLF
-            Text_t line = Text$slice(text, I(state->i+1), I(i));
+            Text_t line = Text$slice(text, I(state->i + 1), I(i));
             state->i = i + 2; // skip one extra for CR
             return line;
         } else if (grapheme == '\n') { // newline
-            Text_t line = Text$slice(text, I(state->i+1), I(i));
+            Text_t line = Text$slice(text, I(state->i + 1), I(i));
             state->i = i + 1;
             return line;
-        } else if (i == text.length-1 && state->i != i) { // last line
-            Text_t line = Text$slice(text, I(state->i+1), I(i+1));
+        } else if (i == text.length - 1 && state->i != i) { // last line
+            Text_t line = Text$slice(text, I(state->i + 1), I(i + 1));
             state->i = i + 1;
             return line;
         }
@@ -1758,81 +1694,75 @@ static OptionalText_t next_line(line_iter_state_t *state)
     return NONE_TEXT;
 }
 
-public Closure_t Text$by_line(Text_t text)
-{
+public
+Closure_t Text$by_line(Text_t text) {
     return (Closure_t){
-        .fn=(void*)next_line,
-        .userdata=new(line_iter_state_t, .state=NEW_TEXT_ITER_STATE(text), .i=0),
+        .fn = (void *)next_line,
+        .userdata = new (line_iter_state_t, .state = NEW_TEXT_ITER_STATE(text), .i = 0),
     };
 }
 
-PUREFUNC public bool Text$is_none(const void *t, const TypeInfo_t *info)
-{
+PUREFUNC public bool Text$is_none(const void *t, const TypeInfo_t *info) {
     (void)info;
-    return ((Text_t*)t)->length < 0;
+    return ((Text_t *)t)->length < 0;
 }
 
-public Int_t Text$memory_size(Text_t text)
-{
+public
+Int_t Text$memory_size(Text_t text) {
     switch (text.tag) {
-    case TEXT_ASCII:
-        return Int$from_int64((int64_t)sizeof(Text_t) + (int64_t)sizeof(char[text.length]));
-    case TEXT_GRAPHEMES:
-        return Int$from_int64((int64_t)sizeof(Text_t) + (int64_t)sizeof(int32_t[text.length]));
+    case TEXT_ASCII: return Int$from_int64((int64_t)sizeof(Text_t) + (int64_t)sizeof(char[text.length]));
+    case TEXT_GRAPHEMES: return Int$from_int64((int64_t)sizeof(Text_t) + (int64_t)sizeof(int32_t[text.length]));
     case TEXT_BLOB:
-        return Int$from_int64((int64_t)sizeof(Text_t) + (int64_t)((void*)text.blob.bytes - (void*)text.blob.map) + (int64_t)sizeof(uint8_t[text.length]));
+        return Int$from_int64((int64_t)sizeof(Text_t) + (int64_t)((void *)text.blob.bytes - (void *)text.blob.map)
+                              + (int64_t)sizeof(uint8_t[text.length]));
     case TEXT_CONCAT:
-        return Int$plus(
-            Int$from_int64((int64_t)sizeof(Text_t)),
-            Int$plus(Text$memory_size(*text.left), Text$memory_size(*text.right)));
+        return Int$plus(Int$from_int64((int64_t)sizeof(Text_t)),
+                        Int$plus(Text$memory_size(*text.left), Text$memory_size(*text.right)));
     default: errx(1, "Invalid text tag: %d", text.tag);
     }
 }
 
-public Text_t Text$layout(Text_t text)
-{
+public
+Text_t Text$layout(Text_t text) {
     switch (text.tag) {
-    case TEXT_ASCII:
-        return Texts(Text("ASCII("), Int64$as_text((int64_t[1]){text.length}, false, NULL), Text(")"));
+    case TEXT_ASCII: return Texts(Text("ASCII("), Int64$as_text((int64_t[1]){text.length}, false, NULL), Text(")"));
     case TEXT_GRAPHEMES:
         return Texts(Text("Graphemes("), Int64$as_text((int64_t[1]){text.length}, false, NULL), Text(")"));
-    case TEXT_BLOB:
-        return Texts(Text("Blob("), Int64$as_text((int64_t[1]){text.length}, false, NULL), Text(")"));
+    case TEXT_BLOB: return Texts(Text("Blob("), Int64$as_text((int64_t[1]){text.length}, false, NULL), Text(")"));
     case TEXT_CONCAT:
         return Texts(Text("Concat("), Text$layout(*text.left), Text(", "), Text$layout(*text.right), Text(")"));
     default: errx(1, "Invalid text tag: %d", text.tag);
     }
 }
 
-public void Text$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *info)
-{
+public
+void Text$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *info) {
     (void)info;
-    const char *str = Text$as_c_string(*(Text_t*)obj);
+    const char *str = Text$as_c_string(*(Text_t *)obj);
     int64_t len = (int64_t)strlen(str);
     Int64$serialize(&len, out, pointers, &Int64$info);
     fwrite(str, sizeof(char), (size_t)len, out);
 }
 
-public void Text$deserialize(FILE *in, void *out, List_t *pointers, const TypeInfo_t *info)
-{
+public
+void Text$deserialize(FILE *in, void *out, List_t *pointers, const TypeInfo_t *info) {
     (void)info;
     int64_t len = 0;
     Int64$deserialize(in, &len, pointers, &Int64$info);
-    if (len < 0)
-        fail("Cannot deserialize text with a negative length!");
-    char *buf = GC_MALLOC_ATOMIC((size_t)len+1);
-    if (fread(buf, sizeof(char), (size_t)len, in) != (size_t)len)
-        fail("Not enough data in stream to deserialize");
-    buf[len+1] = '\0';
-    *(Text_t*)out = Text$from_strn(buf, (size_t)len);
-}
-
-public const TypeInfo_t Text$info = {
-    .size=sizeof(Text_t),
-    .align=__alignof__(Text_t),
-    .tag=TextInfo,
-    .TextInfo={.lang="Text"},
-    .metamethods=Text$metamethods,
+    if (len < 0) fail("Cannot deserialize text with a negative length!");
+    char *buf = GC_MALLOC_ATOMIC((size_t)len + 1);
+    if (fread(buf, sizeof(char), (size_t)len, in) != (size_t)len) fail("Not enough data in stream to deserialize");
+    buf[len + 1] = '\0';
+    *(Text_t *)out = Text$from_strn(buf, (size_t)len);
+}
+
+public
+const TypeInfo_t Text$info = {
+    .size = sizeof(Text_t),
+    .align = __alignof__(Text_t),
+    .tag = TextInfo,
+    .TextInfo = {.lang = "Text"},
+    .metamethods = Text$metamethods,
 };
 
 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
author	Bruce Hill <bruce@bruce-hill.com>	2025-08-23 19:28:08 -0400
committer	Bruce Hill <bruce@bruce-hill.com>	2025-08-23 19:28:08 -0400
commit	fcda36561d668f43bac91ea31cd55cbbd605d330 (patch)
tree	eb74c0b17df584af0fd8154422ad924e04c96cc2 /src/stdlib/text.c
parent	414b0c7472c87c5a013029aefef49e2dbc41e700 (diff)