Initial WIP first past

author: Bruce Hill <bruce@bruce-hill.com> 2024-09-02 18:47:39 -0400
committer: Bruce Hill <bruce@bruce-hill.com> 2024-09-02 18:47:39 -0400
commit: 61e482f6f36aee6f72392a6188f2ec5c858b88fd (patch)
tree: bea4123fcc62dd834405ae89ce9fe260e90a0023
parent: f0f8f218703ebb4512b3cd3f9e06b86a7d9861b0 (diff)
38 files changed, 1857 insertions, 697 deletions
diff --git a/ast.c b/ast.c
index 6f1d20c9..b380a04f 100644
--- a/ast.c
+++ b/ast.c
@@ -35,9 +35,9 @@ static CORD optional_tagged_type(const char *tag, type_ast_t *ast);
 
 CORD xml_escape(CORD text)
 {
-    text = Text$replace(text, "&", "&amp;", I(-1));
-    text = Text$replace(text, "<", "&lt;", I(-1));
-    text = Text$replace(text, ">", "&gt;", I(-1));
+    text = CORD_replace(text, "&", "&amp;");
+    text = CORD_replace(text, "<", "&lt;");
+    text = CORD_replace(text, ">", "&gt;");
     return text;
 }
 
diff --git a/builtins/array.c b/builtins/array.c
index bf1fe4d8..63539559 100644
--- a/builtins/array.c
+++ b/builtins/array.c
@@ -12,12 +12,14 @@
 
 #include "array.h"
 #include "functions.h"
-#include "halfsiphash.h"
 #include "integers.h"
 #include "table.h"
+#include "text.h"
 #include "types.h"
 #include "util.h"
 
+#include "siphash.c"
+
 static inline int64_t get_padded_item_size(const TypeInfo *info)
 {
     int64_t size = info->ArrayInfo.item->size;
@@ -532,67 +534,38 @@ public bool Array$equal(const array_t *x, const array_t *y, const TypeInfo *type
     return (Array$compare(x, y, type) == 0);
 }
 
-public CORD Array$as_text(const array_t *arr, bool colorize, const TypeInfo *type)
+public Text_t Array$as_text(const array_t *arr, bool colorize, const TypeInfo *type)
 {
     if (!arr)
-        return CORD_all("[", generic_as_text(NULL, false, type->ArrayInfo.item), "]");
+        return Text$concat(Text$from_str("["), generic_as_text(NULL, false, type->ArrayInfo.item), Text$from_str("]"));
 
     const TypeInfo *item_type = type->ArrayInfo.item;
-    CORD c = "[";
+    Text_t text = Text$from_str("[");
     for (int64_t i = 0; i < arr->length; i++) {
         if (i > 0)
-            c = CORD_cat(c, ", ");
-        CORD item_cord = generic_as_text(arr->data + i*arr->stride, colorize, item_type);
-        c = CORD_cat(c, item_cord);
+            text = Text$concat(text, Text$from_str(", "));
+        Text_t item_text = generic_as_text(arr->data + i*arr->stride, colorize, item_type);
+        text = Text$concat(text, item_text);
     }
-    c = CORD_cat(c, "]");
-    return c;
+    text = Text$concat(text, Text$from_str("]"));
+    return text;
 }
 
-public uint32_t Array$hash(const array_t *arr, const TypeInfo *type)
+public uint64_t Array$hash(const array_t *arr, const TypeInfo *type)
 {
-    // Array hash is calculated as a rolling, compacting hash of the length of the array, followed by
-    // the hashes of its items (or the items themselves if they're small plain data)
-    // In other words, it reads in a chunk of items or item hashes, then when it fills up the chunk,
-    // hashes it down to a single item to start the next chunk. This repeats until the end, when it
-    // hashes the last chunk down to a uint32_t.
     const TypeInfo *item = type->ArrayInfo.item;
-    if (item->tag == PointerInfo || (item->tag == CustomInfo && item->CustomInfo.hash == NULL)) { // Raw data hash
-        uint8_t hash_batch[4 + 8*item->size];
-        memset(hash_batch, 0, sizeof(hash_batch));
-        uint8_t *p = hash_batch, *end = hash_batch + sizeof(hash_batch);
-        int64_t length = arr->length;
-        *p = (uint32_t)length;
-        p += sizeof(uint32_t);
-        for (int64_t i = 0; i < arr->length; i++) {
-            if (p >= end) {
-                uint32_t chunk_hash;
-                halfsiphash(&hash_batch, sizeof(hash_batch), TOMO_HASH_KEY, (uint8_t*)&chunk_hash, sizeof(chunk_hash));
-                p = hash_batch;
-                *(uint32_t*)p = chunk_hash;
-                p += sizeof(uint32_t);
-            }
-            memcpy((p += item->size), arr->data + i*arr->stride, item->size);
-        }
-        uint32_t hash;
-        halfsiphash(&hash_batch, ((int64_t)p) - ((int64_t)hash_batch), TOMO_HASH_KEY, (uint8_t*)&hash, sizeof(hash));
-        return hash;
+    siphash sh;
+    siphashinit(&sh, sizeof(uint64_t[arr->length]), (uint64_t*)TOMO_HASH_KEY);
+    if (item->tag == PointerInfo || (item->tag == CustomInfo && item->CustomInfo.hash == NULL && item->size == sizeof(void*))) { // Raw data hash
+        for (int64_t i = 0; i < arr->length; i++)
+            siphashadd64bits(&sh, (uint64_t)(arr->data + i*arr->stride));
     } else {
-        uint32_t hash_batch[16] = {(uint32_t)arr->length};
-        uint32_t *p = &hash_batch[1], *end = hash_batch + sizeof(hash_batch)/sizeof(hash_batch[0]);
         for (int64_t i = 0; i < arr->length; i++) {
-            if (p >= end) {
-                uint64_t chunk_hash;
-                halfsiphash(&hash_batch, sizeof(hash_batch), TOMO_HASH_KEY, (uint8_t*)&chunk_hash, sizeof(chunk_hash));
-                p = hash_batch;
-                *(p++) = chunk_hash;
-            }
-            *(p++) = generic_hash(arr->data + i*arr->stride, item);
+            uint64_t item_hash = generic_hash(arr->data + i*arr->stride, item);
+            siphashadd64bits(&sh, item_hash);
         }
-        uint32_t hash;
-        halfsiphash(&hash_batch, ((int64_t)p) - ((int64_t)hash_batch), TOMO_HASH_KEY, (uint8_t*)&hash, sizeof(hash));
-        return hash;
     }
+    return siphashfinish_last_part(&sh, 0);
 }
 
 static void siftdown(array_t *heap, int64_t startpos, int64_t pos, closure_t comparison, int64_t padded_item_size)
diff --git a/builtins/array.h b/builtins/array.h
index 47d10fd1..9dcdca6f 100644
--- a/builtins/array.h
+++ b/builtins/array.h
@@ -16,7 +16,7 @@
     const array_t arr = arr_expr; int64_t index = index_expr; \
     int64_t off = index + (index < 0) * (arr.length + 1) - 1; \
     if (__builtin_expect(off < 0 || off >= arr.length, 0)) \
-        fail_source(filename, start, end, "Invalid array index: %r (array has length %ld)\n", Int64$as_text(&index, no, NULL), arr.length); \
+        fail_source(filename, start, end, "Invalid array index: %s (array has length %ld)\n", Text$as_c_string(Int64$as_text(&index, no, NULL)), arr.length); \
     (item_type*)(arr.data + arr.stride * off);})
 #define Array_get_unchecked(type, x, i) *({ const array_t arr = x; int64_t index = i; \
                                           int64_t off = index + (index < 0) * (arr.length + 1) - 1; \
@@ -25,7 +25,7 @@
     array_t *arr = arr_expr; int64_t index = index_expr; \
     int64_t off = index + (index < 0) * (arr->length + 1) - 1; \
     if (__builtin_expect(off < 0 || off >= arr->length, 0)) \
-        fail_source(filename, start, end, "Invalid array index: %r (array has length %ld)\n", Int64$as_text(&index, no, NULL), arr->length); \
+        fail_source(filename, start, end, "Invalid array index: %s (array has length %ld)\n", Text$as_c_string(Int64$as_text(&index, no, NULL)), arr->length); \
     if (arr->data_refcount > 0) \
         Array$compact(arr, padded_item_size); \
     (item_type*)(arr->data + arr->stride * off); })
@@ -87,10 +87,10 @@ array_t Array$to(array_t array, Int_t last);
 array_t Array$by(array_t array, Int_t stride, int64_t padded_item_size);
 array_t Array$reversed(array_t array, int64_t padded_item_size);
 array_t Array$concat(array_t x, array_t y, int64_t padded_item_size);
-uint32_t Array$hash(const array_t *arr, const TypeInfo *type);
+uint64_t Array$hash(const array_t *arr, const TypeInfo *type);
 int32_t Array$compare(const array_t *x, const array_t *y, const TypeInfo *type);
 bool Array$equal(const array_t *x, const array_t *y, const TypeInfo *type);
-CORD Array$as_text(const array_t *arr, bool colorize, const TypeInfo *type);
+Text_t Array$as_text(const array_t *arr, bool colorize, const TypeInfo *type);
 void Array$heapify(array_t *heap, closure_t comparison, int64_t padded_item_size);
 void Array$heap_push(array_t *heap, const void *item, closure_t comparison, int64_t padded_item_size);
 #define Array$heap_push_value(heap, _value, comparison, padded_item_size) ({ __typeof(_value) value = _value; Array$heap_push(heap, &value, comparison, padded_item_size); })
diff --git a/builtins/bool.c b/builtins/bool.c
index af2f0ac7..488c6ddc 100644
--- a/builtins/bool.c
+++ b/builtins/bool.c
@@ -13,25 +13,28 @@
 #include "types.h"
 #include "util.h"
 
-public CORD Bool$as_text(const bool *b, bool colorize, const TypeInfo *type)
+public Text_t Bool$as_text(const bool *b, bool colorize, const TypeInfo *type)
 {
     (void)type;
-    if (!b) return "Bool";
+    if (!b) return Text$from_str("Bool");
     if (colorize)
-        return *b ? "\x1b[35myes\x1b[m" : "\x1b[35mno\x1b[m";
+        return *b ? Text$from_str("\x1b[35myes\x1b[m") : Text$from_str("\x1b[35mno\x1b[m");
     else
-        return *b ? "yes" : "no";
+        return *b ? Text$from_str("yes") : Text$from_str("no");
 }
 
-public Bool_t Bool$from_text(CORD text, bool *success)
+public Bool_t Bool$from_text(Text_t text, bool *success)
 {
-    CORD lower = Text$lower(text);
-    if (CORD_cmp(lower, "yes") == 0 || CORD_cmp(lower, "on") == 0
-        || CORD_cmp(lower, "true") == 0 || CORD_cmp(lower, "1") == 0) {
+    if (Text$equal_ignoring_case(text, Text$from_str("yes"))
+        || Text$equal_ignoring_case(text, Text$from_str("on"))
+        || Text$equal_ignoring_case(text, Text$from_str("true"))
+        || Text$equal_ignoring_case(text, Text$from_str("1"))) {
         if (success) *success = yes;
         return yes;
-    } else if (CORD_cmp(lower, "no") == 0 || CORD_cmp(lower, "off") == 0
-               || CORD_cmp(lower, "false") == 0 || CORD_cmp(lower, "0") == 0) {
+    } else if (Text$equal_ignoring_case(text, Text$from_str("no"))
+        || Text$equal_ignoring_case(text, Text$from_str("off"))
+        || Text$equal_ignoring_case(text, Text$from_str("false"))
+        || Text$equal_ignoring_case(text, Text$from_str("0"))) {
         if (success) *success = yes;
         return no;
     } else {
diff --git a/builtins/bool.h b/builtins/bool.h
index 716ddd5b..578cad6c 100644
--- a/builtins/bool.h
+++ b/builtins/bool.h
@@ -12,8 +12,8 @@
 #define yes (Bool_t)true
 #define no (Bool_t)false
 
-CORD Bool$as_text(const bool *b, bool colorize, const TypeInfo *type);
-bool Bool$from_text(CORD text, bool *success);
+Text_t Bool$as_text(const bool *b, bool colorize, const TypeInfo *type);
+bool Bool$from_text(Text_t text, bool *success);
 Bool_t Bool$random(double p);
 
 extern const TypeInfo $Bool;
diff --git a/builtins/c_string.c b/builtins/c_string.c
index 3b258aad..8abb2b9f 100644
--- a/builtins/c_string.c
+++ b/builtins/c_string.c
@@ -13,12 +13,12 @@
 #include "types.h"
 #include "util.h"
 
-public CORD CString$as_text(const void *c_string, bool colorize, const TypeInfo *info)
+public Text_t CString$as_text(const void *c_string, bool colorize, const TypeInfo *info)
 {
     (void)info;
-    if (!c_string) return "CString";
-    CORD text = CORD_from_char_star(*(char**)c_string);
-    return CORD_all(colorize ? "\x1b[34mCString\x1b[m(" : "CString(", Text$quoted(text, colorize), ")");
+    if (!c_string) return Text$from_str("CString");
+    Text_t text = Text$from_str(*(char**)c_string);
+    return Text$concat(Text$from_str(colorize ? "\x1b[34mCString\x1b[m(" : "CString("), Text$quoted(text, colorize), Text$from_str(")"));
 }
 
 public int CString$compare(const char **x, const char **y)
diff --git a/builtins/c_string.h b/builtins/c_string.h
index 6b4b0aad..d909083d 100644
--- a/builtins/c_string.h
+++ b/builtins/c_string.h
@@ -8,10 +8,10 @@
 
 #include "types.h"
 
-CORD CString$as_text(const void *str, bool colorize, const TypeInfo *info);
+Text_t CString$as_text(const void *str, bool colorize, const TypeInfo *info);
 int CString$compare(const char **x, const char **y);
 bool CString$equal(const char **x, const char **y);
-uint32_t CString$hash(const char **str);
+uint64_t CString$hash(const char **str);
 
 extern const TypeInfo $CString;
 
diff --git a/builtins/channel.c b/builtins/channel.c
index c2e2cf82..a0a0ddc5 100644
--- a/builtins/channel.c
+++ b/builtins/channel.c
@@ -15,6 +15,7 @@
 #include "functions.h"
 #include "halfsiphash.h"
 #include "integers.h"
+#include "text.h"
 #include "types.h"
 #include "util.h"
 #include "where.h"
@@ -120,15 +121,21 @@ bool Channel$equal(const channel_t **x, const channel_t **y, const TypeInfo *typ
     return (*x == *y);
 }
 
-CORD Channel$as_text(const channel_t **channel, bool colorize, const TypeInfo *type)
+Text_t Channel$as_text(const channel_t **channel, bool colorize, const TypeInfo *type)
 {
     const TypeInfo *item_type = type->ChannelInfo.item;
     if (!channel) {
-        CORD typename = generic_as_text(NULL, false, item_type);
-        return colorize ? CORD_asprintf("\x1b[34;1m|:%s|\x1b[m", typename) : CORD_all("|:", typename, "|");
+        Text_t typename = generic_as_text(NULL, false, item_type);
+        return Text$concat(Text$from_str(colorize ? "\x1b[34;1m|:" : "|:"), typename, Text$from_str(colorize ? "|\x1b[m" : "|"));
     }
-    CORD typename = generic_as_text(NULL, false, item_type);
-    return CORD_asprintf(colorize ? "\x1b[34;1m|:%s|<%p>\x1b[m" : "|:%s|<%p>", typename, *channel);
+    Text_t typename = generic_as_text(NULL, false, item_type);
+    return Text$concat(
+        Text$from_str(colorize ? "\x1b[34;1m|:" : "|:"),
+        typename,
+        Text$from_str("|<"),
+        Int64$hex((int64_t)(void*)*channel, I_small(0), true, true),
+        Text$from_str(colorize ? ">\x1b[m" : ">")
+    );
 }
 
 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/channel.h b/builtins/channel.h
index 241e7ec9..bf24f806 100644
--- a/builtins/channel.h
+++ b/builtins/channel.h
@@ -22,9 +22,9 @@ void Channel$peek(channel_t *channel, void *out, Where_t where, int64_t item_siz
 #define Channel$peek_value(channel, where, t) ({ t _val; Channel$peek(channel, &_val, where, sizeof(t)); _val; })
 void Channel$clear(channel_t *channel);
 array_t Channel$view(channel_t *channel);
-uint32_t Channel$hash(const channel_t **channel, const TypeInfo *type);
+uint64_t Channel$hash(const channel_t **channel, const TypeInfo *type);
 int32_t Channel$compare(const channel_t **x, const channel_t **y, const TypeInfo *type);
 bool Channel$equal(const channel_t **x, const channel_t **y, const TypeInfo *type);
-CORD Channel$as_text(const channel_t **channel, bool colorize, const TypeInfo *type);
+Text_t Channel$as_text(const channel_t **channel, bool colorize, const TypeInfo *type);
 
 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/datatypes.h b/builtins/datatypes.h
index 699c40e0..433e1dd9 100644
--- a/builtins/datatypes.h
+++ b/builtins/datatypes.h
@@ -71,4 +71,19 @@ typedef struct {
     int64_t max_size;
 } channel_t;
 
+enum text_type { TEXT_SHORT_ASCII, TEXT_ASCII, TEXT_SHORT_GRAPHEMES, TEXT_GRAPHEMES, TEXT_SUBTEXT };
+
+typedef struct Text_s {
+    int64_t length; // Number of grapheme clusters
+    uint64_t hash:61;
+    uint8_t tag:3;
+    union {
+        char short_ascii[8];
+        const char *ascii;
+        int32_t short_graphemes[2];
+        int32_t *graphemes;
+        struct Text_s *subtexts;
+    };
+} Text_t;
+
 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/functions.c b/builtins/functions.c
index 3eea3c89..06636cba 100644
--- a/builtins/functions.c
+++ b/builtins/functions.c
@@ -2,7 +2,6 @@
 #include <errno.h>
 #include <execinfo.h>
 #include <gc.h>
-#include <gc/cord.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdlib.h>
@@ -16,7 +15,6 @@
 #include "channel.h"
 #include "files.h"
 #include "functions.h"
-#include "halfsiphash.h"
 #include "integers.h"
 #include "pointer.h"
 #include "string.h"
@@ -25,7 +23,9 @@
 #include "types.h"
 #include "util.h"
 
-public uint8_t TOMO_HASH_KEY[8] = {0};
+#include "siphash.c"
+
+public uint8_t TOMO_HASH_KEY[16] = {0};
 
 public void tomo_init(void)
 {
@@ -37,6 +37,9 @@ public void tomo_init(void)
    srand(seed);
    srand48(seed);
    Int$init_random(seed);
+
+    if (register_printf_specifier('k', printf_text, printf_text_size))
+        errx(1, "Couldn't set printf specifier");
 }
 
 static void print_stack_trace(FILE *out)
@@ -60,13 +63,13 @@ static void print_stack_trace(FILE *out)
     fprintf(out, "\x1b[m");
 }
 
-public void fail(CORD fmt, ...)
+public void fail(const char *fmt, ...)
 {
     if (USE_COLOR) fputs("\x1b[31;7m ==================== ERROR ==================== \n\n\x1b[0;1m", stderr);
     else fputs("==================== ERROR ====================\n\n", stderr);
     va_list args;
     va_start(args, fmt);
-    CORD_vfprintf(stderr, fmt, args);
+    vfprintf(stderr, fmt, args);
     if (USE_COLOR) fputs("\x1b[m", stderr);
     fputs("\n\n", stderr);
     va_end(args);
@@ -75,14 +78,14 @@ public void fail(CORD fmt, ...)
     raise(SIGABRT);
 }
 
-public void fail_source(const char *filename, int64_t start, int64_t end, CORD fmt, ...)
+public void fail_source(const char *filename, int64_t start, int64_t end, const char *fmt, ...)
 {
     if (USE_COLOR) fputs("\n\x1b[31;7m ==================== ERROR ==================== \n\n\x1b[0;1m", stderr);
     else fputs("\n==================== ERROR ====================\n\n", stderr);
 
     va_list args;
     va_start(args, fmt);
-    CORD_vfprintf(stderr, fmt, args);
+    vfprintf(stderr, fmt, args);
     va_end(args);
 
     file_t *file = filename ? load_file(filename) : NULL;
@@ -98,11 +101,10 @@ public void fail_source(const char *filename, int64_t start, int64_t end, CORD f
     raise(SIGABRT);
 }
 
-public uint32_t generic_hash(const void *obj, const TypeInfo *type)
+public uint64_t generic_hash(const void *obj, const TypeInfo *type)
 {
     switch (type->tag) {
-    case PointerInfo: case FunctionInfo: return Pointer$hash(obj, type);
-    case TextInfo: return Text$hash(obj);
+    case TextInfo: return Text$hash((void*)obj);
     case ArrayInfo: return Array$hash(obj, type);
     case ChannelInfo: return Channel$hash((const channel_t**)obj, type);
     case TableInfo: return Table$hash(obj, type);
@@ -113,9 +115,7 @@ public uint32_t generic_hash(const void *obj, const TypeInfo *type)
         return type->CustomInfo.hash(obj, type);
     default: {
       hash_data:;
-        uint32_t hash;
-        halfsiphash((void*)obj, type->size, TOMO_HASH_KEY, (uint8_t*)&hash, sizeof(hash));
-        return hash;
+        return siphash24((void*)obj, type->size, (uint64_t*)TOMO_HASH_KEY);
     }
     }
 }
@@ -158,7 +158,7 @@ public bool generic_equal(const void *x, const void *y, const TypeInfo *type)
     }
 }
 
-public CORD generic_as_text(const void *obj, bool colorize, const TypeInfo *type)
+public Text_t generic_as_text(const void *obj, bool colorize, const TypeInfo *type)
 {
     switch (type->tag) {
     case PointerInfo: return Pointer$as_text(obj, colorize, type);
@@ -168,19 +168,21 @@ public CORD generic_as_text(const void *obj, bool colorize, const TypeInfo *type
     case ChannelInfo: return Channel$as_text((const channel_t**)obj, colorize, type);
     case TableInfo: return Table$as_text(obj, colorize, type);
     case TypeInfoInfo: return Type$as_text(obj, colorize, type);
-    case EmptyStruct: return colorize ? CORD_all("\x1b[0;1m", type->EmptyStruct.name, "\x1b[m()") : CORD_all(type->EmptyStruct.name, "()");
+    case EmptyStruct: return colorize ?
+                      Text$concat(Text$from_str("\x1b[0;1m"), Text$from_str(type->EmptyStruct.name), Text$from_str("\x1b[m()"))
+                          : Text$concat(Text$from_str(type->EmptyStruct.name), Text$from_str("()"));
     case CustomInfo:
         if (!type->CustomInfo.as_text)
-            fail("No cord function provided for type!\n");
+            fail("No text function provided for type!\n");
         return type->CustomInfo.as_text(obj, colorize, type);
     default: errx(1, "Invalid type tag: %d", type->tag);
     }
 }
 
 
-public CORD builtin_last_err()
+public Text_t builtin_last_err()
 {
-    return CORD_from_char_star(strerror(errno));
+    return Text$from_str(strerror(errno));
 }
 
 static int TEST_DEPTH = 0;
@@ -193,12 +195,12 @@ public void start_test(const char *filename, int64_t start, int64_t end)
 
     if (filename && file) {
         for (int i = 0; i < 3*TEST_DEPTH; i++) fputc(' ', stderr);
-        CORD_fprintf(stderr, USE_COLOR ? "\x1b[33;1m>> \x1b[0m%.*s\x1b[m\n" : ">> %.*s\n", (end - start), file->text + start);
+        fprintf(stderr, USE_COLOR ? "\x1b[33;1m>> \x1b[0m%.*s\x1b[m\n" : ">> %.*s\n", (end - start), file->text + start);
     }
     ++TEST_DEPTH;
 }
 
-public void end_test(void *expr, const TypeInfo *type, CORD expected, const char *filename, int64_t start, int64_t end)
+public void end_test(void *expr, const TypeInfo *type, const char *expected, const char *filename, int64_t start, int64_t end)
 {
     (void)filename;
     (void)start;
@@ -206,25 +208,29 @@ public void end_test(void *expr, const TypeInfo *type, CORD expected, const char
     --TEST_DEPTH;
     if (!expr) return;
 
-    CORD expr_cord = generic_as_text(expr, USE_COLOR, type);
-    CORD type_name = generic_as_text(NULL, false, type);
+    Text_t expr_text = generic_as_text(expr, USE_COLOR, type);
+    Text_t type_name = generic_as_text(NULL, false, type);
 
     for (int i = 0; i < 3*TEST_DEPTH; i++) fputc(' ', stderr);
-    CORD_fprintf(stderr, USE_COLOR ? "\x1b[2m=\x1b[0m %r \x1b[2m: %r\x1b[m\n" : "= %r : %r\n", expr_cord, type_name);
-    if (expected) {
-        CORD expr_plain = USE_COLOR ? generic_as_text(expr, false, type) : expr_cord;
-        bool success = Text$equal(&expr_plain, &expected);
-        if (!success && CORD_chr(expected, 0, ':')) {
-            CORD with_type = CORD_catn(3, expr_plain, " : ", type_name);
-            success = Text$equal(&with_type, &expected);
+    fprintf(stderr, USE_COLOR ? "\x1b[2m=\x1b[0m %k \x1b[2m: %k\x1b[m\n" : "= %k : %k\n", &expr_text, &type_name);
+    if (expected && expected[0]) {
+        Text_t expected_text = Text$from_str(expected);
+        Text_t expr_plain = USE_COLOR ? generic_as_text(expr, false, type) : expr_text;
+        bool success = Text$equal(&expr_plain, &expected_text);
+        if (!success) {
+            Int_t colon = Text$find(expected_text, Text$from_str(":"), I_small(0), NULL);
+            if (colon.small != I_small(0).small) {
+                Text_t with_type = Text$concat(expr_plain, Text$from_str(" : "), type_name);
+                success = Text$equal(&with_type, &expected_text);
+            }
         }
 
         if (!success) {
             fprintf(stderr, 
                     USE_COLOR
-                    ? "\n\x1b[31;7m ==================== TEST FAILED ==================== \x1b[0;1m\n\nExpected: \x1b[1;32m%s\x1b[0m\n\x1b[1m But got:\x1b[m %s\n\n"
-                    : "\n==================== TEST FAILED ====================\nExpected: %s\n\n But got: %s\n\n",
-                    CORD_to_const_char_star(expected), CORD_to_const_char_star(expr_cord));
+                    ? "\n\x1b[31;7m ==================== TEST FAILED ==================== \x1b[0;1m\n\nExpected: \x1b[1;32m%s\x1b[0m\n\x1b[1m But got:\x1b[m %k\n\n"
+                    : "\n==================== TEST FAILED ====================\nExpected: %s\n\n But got: %k\n\n",
+                    expected, &expr_text);
 
             print_stack_trace(stderr);
             fflush(stderr);
@@ -233,37 +239,29 @@ public void end_test(void *expr, const TypeInfo *type, CORD expected, const char
     }
 }
 
-public void say(CORD text, bool newline)
+public void say(Text_t text, bool newline)
 {
-    uint8_t buf[512] = {0};
-    size_t buf_len = sizeof(buf)-1;
-    const char *str = CORD_to_const_char_star(text);
-    uint8_t *normalized = u8_normalize(UNINORM_NFD, (uint8_t*)str, strlen(str), buf, &buf_len);
-    if (normalized) {
-        write(STDOUT_FILENO, normalized, buf_len);
-        if (newline)
-            write(STDOUT_FILENO, "\n", 1);
-        if (normalized != buf)
-            free(normalized);
-    }
+    Text$print(stdout, text);
+    if (newline)
+        fputc('\n', stdout);
 }
 
-public bool pop_flag(char **argv, int *i, const char *flag, CORD *result)
+public bool pop_flag(char **argv, int *i, const char *flag, Text_t *result)
 {
     if (argv[*i][0] != '-' || argv[*i][1] != '-') {
         return false;
     } else if (streq(argv[*i] + 2, flag)) {
-        *result = CORD_EMPTY;
+        *result = (Text_t){.length=0};
         argv[*i] = NULL;
         *i += 1;
         return true;
     } else if (strncmp(argv[*i] + 2, "no-", 3) == 0 && streq(argv[*i] + 5, flag)) {
-        *result = "no";
+        *result = Text$from_str("no");
         argv[*i] = NULL;
         *i += 1;
         return true;
     } else if (strncmp(argv[*i] + 2, flag, strlen(flag)) == 0 && argv[*i][2 + strlen(flag)] == '=') {
-        *result = CORD_from_char_star(argv[*i] + 2 + strlen(flag) + 1);
+        *result = Text$from_str(argv[*i] + 2 + strlen(flag) + 1);
         argv[*i] = NULL;
         *i += 1;
         return true;
diff --git a/builtins/functions.h b/builtins/functions.h
index 70266ba6..96837249 100644
--- a/builtins/functions.h
+++ b/builtins/functions.h
@@ -9,25 +9,25 @@
 #include "datatypes.h"
 #include "types.h"
 
-extern uint8_t TOMO_HASH_KEY[8];
+extern uint8_t TOMO_HASH_KEY[16];
 
 void tomo_init(void);
 
-void fail(CORD fmt, ...);
-void fail_source(const char *filename, int64_t start, int64_t end, CORD fmt, ...);
-CORD builtin_last_err();
+void fail(const char *fmt, ...);
+void fail_source(const char *filename, int64_t start, int64_t end, const char *fmt, ...);
+Text_t builtin_last_err();
 void start_test(const char *filename, int64_t start, int64_t end);
-void end_test(void *expr, const TypeInfo *type, CORD expected, const char *filename, int64_t start, int64_t end);
+void end_test(void *expr, const TypeInfo *type, const char *expected, const char *filename, int64_t start, int64_t end);
 #define test(expr, type, expected, filename, start, end) {\
     start_test(filename, start, end); \
     end_test(expr, type, expected, filename, start, end); }
-void say(CORD text, bool newline);
+void say(Text_t text, bool newline);
 
-uint32_t generic_hash(const void *obj, const TypeInfo *type);
+uint64_t generic_hash(const void *obj, const TypeInfo *type);
 int32_t generic_compare(const void *x, const void *y, const TypeInfo *type);
 bool generic_equal(const void *x, const void *y, const TypeInfo *type);
-CORD generic_as_text(const void *obj, bool colorize, const TypeInfo *type);
+Text_t generic_as_text(const void *obj, bool colorize, const TypeInfo *type);
 closure_t spawn(closure_t fn);
-bool pop_flag(char **argv, int *i, const char *flag, CORD *result);
+bool pop_flag(char **argv, int *i, const char *flag, Text_t *result);
 
 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/integers.c b/builtins/integers.c
index 4a7b5c3a..45db160d 100644
--- a/builtins/integers.c
+++ b/builtins/integers.c
@@ -1,4 +1,5 @@
 // Integer type infos and methods
+#include <ctype.h>
 #include <gc.h>
 #include <gc/cord.h>
 #include <gmp.h>
@@ -11,7 +12,8 @@
 #include "integers.h"
 #include "text.h"
 #include "types.h"
-#include "SipHash/halfsiphash.h"
+
+#include "siphash.c"
 
 static gmp_randstate_t Int_rng = {};
 
@@ -21,15 +23,17 @@ public void Int$init_random(long seed)
     gmp_randseed_ui(Int_rng, (unsigned long)seed);
 }
 
-public CORD Int$as_text(const Int_t *i, bool colorize, const TypeInfo *type) {
+public Text_t Int$as_text(const Int_t *i, bool colorize, const TypeInfo *type) {
     (void)type;
-    if (!i) return "Int";
+    if (!i) return Text$from_str("Int");
 
     if (__builtin_expect(i->small & 1, 1)) {
-        return CORD_asprintf(colorize ? "\x1b[35m%ld\x1b[33;2m\x1b[m" : "%ld", (i->small)>>2);
+        return Text$format(colorize ? "\x1b[35m%ld\x1b[m" : "%ld", (i->small)>>2);
     } else {
         char *str = mpz_get_str(NULL, 10, *i->big);
-        return CORD_asprintf(colorize ? "\x1b[35m%s\x1b[33;2m\x1b[m" : "%s", str);
+        Text_t text = Text$from_str(str);
+        if (colorize) text = Text$concat(Text$from_str("\x1b[35m"), text, Text$from_str("\x1b[m"));
+        return text;
     }
 }
 
@@ -55,62 +59,86 @@ public bool Int$equal_value(const Int_t x, const Int_t y) {
     return x.small == y.small || (__builtin_expect(((x.small | y.small) & 1) == 0, 0) && mpz_cmp(*x.big, *y.big) == 0);
 }
 
-public uint32_t Int$hash(const Int_t *x, const TypeInfo *type) {
+public uint64_t Int$hash(const Int_t *x, const TypeInfo *type) {
     (void)type;
-    uint32_t hash;
     if (__builtin_expect(x->small & 1, 1)) {
-        halfsiphash(&x->small, sizeof(x->small), TOMO_HASH_KEY, (uint8_t*)&hash, sizeof(hash));
+        int64_t i = (x->small>>2);
+        return siphash24((void*)&i, sizeof(i), (uint64_t*)TOMO_HASH_KEY);
     } else {
         char *str = mpz_get_str(NULL, 16, *x->big);
-        halfsiphash(str, strlen(str), TOMO_HASH_KEY, (uint8_t*)&hash, sizeof(hash));
+        return siphash24((void*)str, strlen(str), (uint64_t*)TOMO_HASH_KEY);
     }
-    return hash;
 }
 
-public CORD Int$format(Int_t i, Int_t digits_int)
+public Text_t Int$format(Int_t i, Int_t digits_int)
 {
     int64_t digits = Int_to_Int64(digits_int, false);
     if (__builtin_expect(i.small & 1, 1)) {
-        return CORD_asprintf("%0.*ld", digits, (i.small)>>2);
+        return Text$format("%0.*ld", digits, (i.small)>>2);
     } else {
-        CORD str = mpz_get_str(NULL, 10, *i.big);
+        char *str = mpz_get_str(NULL, 10, *i.big);
         bool negative = (str[0] == '-');
-        if (digits > (int64_t)CORD_len(str)) {
-            if (negative)
-                str = CORD_all("-", CORD_chars('0', digits - CORD_len(str)), CORD_substr(str, 1, ~0));
-            else
-                str = CORD_all(CORD_chars('0', digits - CORD_len(str)), str);
-        }
-        return str;
+        int64_t needed_zeroes = digits - (int64_t)strlen(str);
+        if (needed_zeroes <= 0)
+            return Text$from_str(str);
+
+        char *zeroes = GC_MALLOC_ATOMIC(needed_zeroes);
+        memset(zeroes, '0', needed_zeroes);
+        if (negative)
+            return Text$concat(Text$from_str("-"), Text$from_str(zeroes), Text$from_str(str + 1));
+        else
+            return Text$concat(Text$from_str(zeroes), Text$from_str(str));
     }
 }
 
-public CORD Int$hex(Int_t i, Int_t digits_int, bool uppercase, bool prefix) {
+public Text_t Int$hex(Int_t i, Int_t digits_int, bool uppercase, bool prefix) {
+    if (Int$compare(&i, (Int_t[1]){I_small(0)}, &$Int) < 0)
+        return Text$concat(Text$from_str("-"), Int$hex(Int$negative(i), digits_int, uppercase, prefix));
+
     int64_t digits = Int_to_Int64(digits_int, false);
-    const char *hex_fmt = uppercase ? (prefix ? "0x%0.*lX" : "%0.*lX") : (prefix ? "0x%0.*lx" : "%0.*lx");
     if (__builtin_expect(i.small & 1, 1)) {
-        return CORD_asprintf(hex_fmt, digits, (i.small)>>2);
+        const char *hex_fmt = uppercase ? (prefix ? "0x%0.*lX" : "%0.*lX") : (prefix ? "0x%0.*lx" : "%0.*lx");
+        return Text$format(hex_fmt, digits, (i.small)>>2);
     } else {
-        CORD str = mpz_get_str(NULL, 16, *i.big);
-        if (uppercase) str = Text$upper(str);
-        if (digits > (int64_t)CORD_len(str))
-            str = CORD_cat(CORD_chars('0', digits - CORD_len(str)), str);
-        if (prefix) str = CORD_cat("0x", str);
-        return str;
+        char *str = mpz_get_str(NULL, 16, *i.big);
+        if (uppercase) {
+            for (char *c = str; *c; c++)
+                *c = (char)toupper(*c);
+        }
+        int64_t needed_zeroes = digits - (int64_t)strlen(str);
+        if (needed_zeroes <= 0)
+            return prefix ? Text$concat(Text$from_str("0x"), Text$from_str(str)) : Text$from_str(str);
+
+        char *zeroes = GC_MALLOC_ATOMIC(needed_zeroes);
+        memset(zeroes, '0', needed_zeroes);
+        if (prefix)
+            return Text$concat(Text$from_str("0x"), Text$from_str(zeroes), Text$from_str(str));
+        else
+            return Text$concat(Text$from_str(zeroes), Text$from_str(str));
     }
 }
 
-public CORD Int$octal(Int_t i, Int_t digits_int, bool prefix) {
+public Text_t Int$octal(Int_t i, Int_t digits_int, bool prefix) {
+    Int_t zero = I_small(0);
+    if (Int$compare(&i, &zero, &$Int) < 0)
+        return Text$concat(Text$from_str("-"), Int$octal(Int$negative(i), digits_int, prefix));
+
     int64_t digits = Int_to_Int64(digits_int, false);
-    const char *octal_fmt = prefix ? "0o%0.*lo" : "%0.*lo";
     if (__builtin_expect(i.small & 1, 1)) {
-        return CORD_asprintf(octal_fmt, (int)digits, (uint64_t)(i.small >> 2));
+        const char *octal_fmt = prefix ? "0o%0.*lo" : "%0.*lo";
+        return Text$format(octal_fmt, digits, (i.small)>>2);
     } else {
-        CORD str = mpz_get_str(NULL, 8, *i.big);
-        if (digits > (int64_t)CORD_len(str))
-            str = CORD_cat(CORD_chars('0', digits - CORD_len(str)), str);
-        if (prefix) str = CORD_cat("0o", str);
-        return str;
+        char *str = mpz_get_str(NULL, 8, *i.big);
+        int64_t needed_zeroes = digits - (int64_t)strlen(str);
+        if (needed_zeroes <= 0)
+            return prefix ? Text$concat(Text$from_str("0o"), Text$from_str(str)) : Text$from_str(str);
+
+        char *zeroes = GC_MALLOC_ATOMIC(needed_zeroes);
+        memset(zeroes, '0', needed_zeroes);
+        if (prefix)
+            return Text$concat(Text$from_str("0o"), Text$from_str(zeroes), Text$from_str(str));
+        else
+            return Text$concat(Text$from_str(zeroes), Text$from_str(str));
     }
 }
 
@@ -290,9 +318,11 @@ public Int_t Int$sqrt(Int_t i)
 
 public Int_t Int$random(Int_t min, Int_t max) {
     int32_t cmp = Int$compare(&min, &max, &$Int);
-    if (cmp > 0)
-        fail("Random minimum value (%r) is larger than the maximum value (%r)",
-             Int$as_text(&min, false, &$Int), Int$as_text(&max, false, &$Int));
+    if (cmp > 0) {
+        Text_t min_text = Int$as_text(&min, false, &$Int), max_text = Int$as_text(&max, false, &$Int);
+        fail("Random minimum value (%k) is larger than the maximum value (%k)",
+             &min_text, &max_text);
+    }
     if (cmp == 0) return min;
 
     mpz_t range_size;
@@ -315,8 +345,8 @@ public Range_t Int$to(Int_t from, Int_t to) {
     return (Range_t){from, to, Int$compare(&to, &from, &$Int) >= 0 ? (Int_t){.small=(1<<2)|1} : (Int_t){.small=(-1>>2)|1}};
 }
 
-public Int_t Int$from_text(CORD text, bool *success) {
-    const char *str = CORD_to_const_char_star(text);
+public Int_t Int$from_text(Text_t text, bool *success) {
+    const char *str = Text$as_c_string(text);
     mpz_t i;
     int result;
     if (strncmp(str, "0x", 2) == 0) {
@@ -355,7 +385,7 @@ public Int_t Int$prev_prime(Int_t x)
     mpz_t p;
     mpz_init_set_int(p, x);
     if (mpz_prevprime(p, p) == 0)
-        fail("There is no prime number before %r", Int$as_text(&x, false, &$Int));
+        fail("There is no prime number before %k", (Text_t[1]){Int$as_text(&x, false, &$Int)});
     return Int$from_mpz(p);
 }
 
@@ -373,13 +403,11 @@ public const TypeInfo $Int = {
 
 
 #define DEFINE_INT_TYPE(c_type, KindOfInt, fmt, min_val, max_val)\
-    public CORD KindOfInt ## $as_text(const c_type *i, bool colorize, const TypeInfo *type) { \
+    public Text_t KindOfInt ## $as_text(const c_type *i, bool colorize, const TypeInfo *type) { \
         (void)type; \
-        if (!i) return #KindOfInt; \
-        CORD c; \
-        if (colorize) CORD_sprintf(&c, "\x1b[35m%"fmt"\x1b[33;2m\x1b[m", *i); \
-        else CORD_sprintf(&c, "%"fmt, *i); \
-        return c; \
+        if (!i) return Text$from_str(#KindOfInt); \
+        Int_t as_int = KindOfInt##_to_Int(*i); \
+        return Int$as_text(&as_int, colorize, type); \
     } \
     public int32_t KindOfInt ## $compare(const c_type *x, const c_type *y, const TypeInfo *type) { \
         (void)type; \
@@ -389,19 +417,17 @@ public const TypeInfo $Int = {
         (void)type; \
         return *x == *y; \
     } \
-    public CORD KindOfInt ## $format(c_type i, Int_t digits_int) { \
-        int64_t digits = Int_to_Int64(digits_int, false); \
-        return CORD_asprintf("%0*ld", (int)digits, (int64_t)i); \
+    public Text_t KindOfInt ## $format(c_type i, Int_t digits_int) { \
+        Int_t as_int = KindOfInt##_to_Int(i); \
+        return Int$format(as_int, digits_int); \
     } \
-    public CORD KindOfInt ## $hex(c_type i, Int_t digits_int, bool uppercase, bool prefix) { \
-        int64_t digits = Int_to_Int64(digits_int, false); \
-        const char *hex_fmt = uppercase ? (prefix ? "0x%0.*lX" : "%0.*lX") : (prefix ? "0x%0.*lx" : "%0.*lx"); \
-        return CORD_asprintf(hex_fmt, (int)digits, (uint64_t)i); \
+    public Text_t KindOfInt ## $hex(c_type i, Int_t digits_int, bool uppercase, bool prefix) { \
+        Int_t as_int = KindOfInt##_to_Int(i); \
+        return Int$hex(as_int, digits_int, uppercase, prefix); \
     } \
-    public CORD KindOfInt ## $octal(c_type i, Int_t digits_int, bool prefix) { \
-        int64_t digits = Int_to_Int64(digits_int, false); \
-        const char *octal_fmt = prefix ? "0o%0.*lo" : "%0.*lo"; \
-        return CORD_asprintf(octal_fmt, (int)digits, (uint64_t)i); \
+    public Text_t KindOfInt ## $octal(c_type i, Int_t digits_int, bool prefix) { \
+        Int_t as_int = KindOfInt##_to_Int(i); \
+        return Int$octal(as_int, digits_int, prefix); \
     } \
     public array_t KindOfInt ## $bits(c_type x) { \
         array_t bit_array = (array_t){.data=GC_MALLOC_ATOMIC(sizeof(bool[8*sizeof(c_type)])), .atomic=1, .stride=sizeof(bool), .length=8*sizeof(c_type)}; \
@@ -432,8 +458,8 @@ public const TypeInfo $Int = {
     public Range_t KindOfInt ## $to(c_type from, c_type to) { \
         return (Range_t){Int64_to_Int(from), Int64_to_Int(to), to >= from ? (Int_t){.small=(1<<2)&1} : (Int_t){.small=(1<<2)&1}}; \
     } \
-    public c_type KindOfInt ## $from_text(CORD text, CORD *the_rest) { \
-        const char *str = CORD_to_const_char_star(text); \
+    public c_type KindOfInt ## $from_text(Text_t text, Text_t *the_rest) { \
+        const char *str = Text$as_c_string(text); \
         long i; \
         char *end_ptr = NULL; \
         if (strncmp(str, "0x", 2) == 0) { \
@@ -445,7 +471,7 @@ public const TypeInfo $Int = {
         } else { \
             i = strtol(str, &end_ptr, 10); \
         } \
-        if (the_rest) *the_rest = CORD_from_char_star(end_ptr); \
+        if (the_rest) *the_rest = Text$from_str(end_ptr); \
         if (i < min_val) i = min_val; \
         else if (i > max_val) i = min_val; \
         return (c_type)i; \
diff --git a/builtins/integers.h b/builtins/integers.h
index e5a662cc..359b1d57 100644
--- a/builtins/integers.h
+++ b/builtins/integers.h
@@ -24,16 +24,16 @@
 #define I8(x) ((int8_t)x)
 
 #define DEFINE_INT_TYPE(c_type, type_name) \
-    CORD type_name ## $as_text(const c_type *i, bool colorize, const TypeInfo *type); \
+    Text_t type_name ## $as_text(const c_type *i, bool colorize, const TypeInfo *type); \
     int32_t type_name ## $compare(const c_type *x, const c_type *y, const TypeInfo *type); \
     bool type_name ## $equal(const c_type *x, const c_type *y, const TypeInfo *type); \
-    CORD type_name ## $format(c_type i, Int_t digits); \
-    CORD type_name ## $hex(c_type i, Int_t digits, bool uppercase, bool prefix); \
-    CORD type_name ## $octal(c_type i, Int_t digits, bool prefix); \
+    Text_t type_name ## $format(c_type i, Int_t digits); \
+    Text_t type_name ## $hex(c_type i, Int_t digits, bool uppercase, bool prefix); \
+    Text_t type_name ## $octal(c_type i, Int_t digits, bool prefix); \
     array_t type_name ## $bits(c_type x); \
     c_type type_name ## $random(c_type min, c_type max); \
     Range_t type_name ## $to(c_type from, c_type to); \
-    c_type type_name ## $from_text(CORD text, CORD *the_rest); \
+    c_type type_name ## $from_text(Text_t text, Text_t *the_rest); \
     static inline c_type type_name ## $clamped(c_type x, c_type min, c_type max) { \
         return x < min ? min : (x > max ? max : x); \
     } \
@@ -70,19 +70,19 @@ DEFINE_INT_TYPE(int8_t,  Int8);
 #define Int16$abs(...) I16(abs(__VA_ARGS__))
 #define Int8$abs(...) I8(abs(__VA_ARGS__))
 
-CORD Int$as_text(const Int_t *i, bool colorize, const TypeInfo *type);
-uint32_t Int$hash(const Int_t *x, const TypeInfo *type);
+Text_t Int$as_text(const Int_t *i, bool colorize, const TypeInfo *type);
+uint64_t Int$hash(const Int_t *x, const TypeInfo *type);
 int32_t Int$compare(const Int_t *x, const Int_t *y, const TypeInfo *type);
 int32_t Int$compare_value(const Int_t x, const Int_t y);
 bool Int$equal(const Int_t *x, const Int_t *y, const TypeInfo *type);
 bool Int$equal_value(const Int_t x, const Int_t y);
-CORD Int$format(Int_t i, Int_t digits);
-CORD Int$hex(Int_t i, Int_t digits, bool uppercase, bool prefix);
-CORD Int$octal(Int_t i, Int_t digits, bool prefix);
+Text_t Int$format(Int_t i, Int_t digits);
+Text_t Int$hex(Int_t i, Int_t digits, bool uppercase, bool prefix);
+Text_t Int$octal(Int_t i, Int_t digits, bool prefix);
 void Int$init_random(long seed);
 Int_t Int$random(Int_t min, Int_t max);
 Range_t Int$to(Int_t from, Int_t to);
-Int_t Int$from_text(CORD text, bool *success);
+Int_t Int$from_text(Text_t text, bool *success);
 Int_t Int$abs(Int_t x);
 Int_t Int$power(Int_t base, Int_t exponent);
 Int_t Int$sqrt(Int_t i);
diff --git a/builtins/memory.c b/builtins/memory.c
index 5b9f39ad..4e8e4c50 100644
--- a/builtins/memory.c
+++ b/builtins/memory.c
@@ -1,6 +1,5 @@
 // Type info and methods for "Memory" opaque type
 #include <gc.h>
-#include <gc/cord.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdlib.h>
@@ -8,17 +7,16 @@
 #include <sys/param.h>
 #include <err.h>
 
-#include "util.h"
 #include "halfsiphash.h"
 #include "memory.h"
+#include "text.h"
 #include "types.h"
+#include "util.h"
 
-public CORD Memory__as_text(const void *p, bool colorize, const TypeInfo *type) {
+public Text_t Memory__as_text(const void *p, bool colorize, const TypeInfo *type) {
     (void)type;
-    if (!p) return "Memory";
-    CORD cord;
-    CORD_sprintf(&cord, colorize ? "\x1b[0;34;1mMemory<%p>\x1b[m" : "Memory<%p>", p);
-    return cord;
+    if (!p) return Text$from_str("Memory");
+    return Text$format(colorize ? "\x1b[0;34;1mMemory<%p>\x1b[m" : "Memory<%p>", p);
 }
 
 public const TypeInfo $Memory = {
diff --git a/builtins/memory.h b/builtins/memory.h
index 48a2dafd..e3cb2983 100644
--- a/builtins/memory.h
+++ b/builtins/memory.h
@@ -9,6 +9,6 @@
 #include "types.h"
 
 extern const TypeInfo $Memory;
-CORD Memory$as_text(const void *p, bool colorize, const TypeInfo *type);
+Text_t Memory$as_text(const void *p, bool colorize, const TypeInfo *type);
 
 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/nums.c b/builtins/nums.c
index 6b4f6a8a..5848a589 100644
--- a/builtins/nums.c
+++ b/builtins/nums.c
@@ -11,15 +11,13 @@
 #include "array.h"
 #include "nums.h"
 #include "string.h"
+#include "text.h"
 #include "types.h"
 
-public CORD Num$as_text(const double *f, bool colorize, const TypeInfo *type) { 
+public Text_t Num$as_text(const double *f, bool colorize, const TypeInfo *type) { 
     (void)type;
-    if (!f) return "Num";
-    CORD c;
-    if (colorize) CORD_sprintf(&c, "\x1b[35m%.16g\x1b[33;2m\x1b[m", *f); 
-    else CORD_sprintf(&c, "%.16g", *f); 
-    return c; 
+    if (!f) return Text$from_str("Num");
+    return Text$format(colorize ? "\x1b[35m%.16g\x1b[33;2m\x1b[m" : "%.16g", *f); 
 } 
 
 public int32_t Num$compare(const double *x, const double *y, const TypeInfo *type) { 
@@ -47,12 +45,12 @@ public bool Num$near(double a, double b, double ratio, double absolute) {
     return (diff < epsilon);
 }
 
-public CORD Num$format(double f, Int_t precision) { 
-    return CORD_asprintf("%.*f", (int)Int_to_Int64(precision, false), f);
+public Text_t Num$format(double f, Int_t precision) { 
+    return Text$format("%.*f", (int)Int_to_Int64(precision, false), f); 
 }
 
-public CORD Num$scientific(double f, Int_t precision) { 
-    return CORD_asprintf("%.*e", (int)Int_to_Int64(precision, false), f); 
+public Text_t Num$scientific(double f, Int_t precision) { 
+    return Text$format("%.*e", (int)Int_to_Int64(precision, false), f); 
 }
 
 public double Num$mod(double num, double modulus) { 
@@ -68,16 +66,16 @@ public double Num$mix(double amount, double x, double y) {
     return (1.0-amount)*x + amount*y;
 }
 
-public double Num$from_text(CORD text, CORD *the_rest) {
-    const char *str = CORD_to_const_char_star(text);
+public double Num$from_text(Text_t text, Text_t *the_rest) {
+    const char *str = Text$as_c_string(text);
     char *end = NULL;
     double d = strtod(str, &end);
-    if (the_rest) *the_rest = CORD_from_char_star(end);
+    if (the_rest) *the_rest = Text$from_str(end);
     return d;
 }
 
-public double Num$nan(CORD tag) {
-    return nan(CORD_to_const_char_star(tag));
+public double Num$nan(Text_t tag) {
+    return nan(Text$as_c_string(tag));
 }
 
 public bool Num$isinf(double n) { return !!isinf(n); }
@@ -95,13 +93,10 @@ public const TypeInfo $Num = {
     },
 };
 
-public CORD Num32$as_text(const float *f, bool colorize, const TypeInfo *type) { 
+public Text_t Num32$as_text(const float *f, bool colorize, const TypeInfo *type) { 
     (void)type;
-    if (!f) return "Num32";
-    CORD c;
-    if (colorize) CORD_sprintf(&c, "\x1b[35m%.8g_f32\x1b[m", *f);
-    else CORD_sprintf(&c, "%.8g_f32", *f);
-    return c;
+    if (!f) return Text$from_str("Num32");
+    return Text$format(colorize ? "\x1b[35m%.8g_f32\x1b[33;2m\x1b[m" : "%.8g_f32", *f); 
 }
 
 public int32_t Num32$compare(const float *x, const float *y, const TypeInfo *type) { 
@@ -129,12 +124,12 @@ public bool Num32$near(float a, float b, float ratio, float absolute) {
     return (diff < epsilon);
 }
 
-public CORD Num32$format(float f, Int_t precision) { 
-    return CORD_asprintf("%.*f", (int)Int_to_Int64(precision, false), f); 
+public Text_t Num32$format(float f, Int_t precision) { 
+    return Text$format("%.*f", (int)Int_to_Int64(precision, false), f); 
 }
 
-public CORD Num32$scientific(float f, Int_t precision) { 
-    return CORD_asprintf("%.*e", (int)Int_to_Int64(precision, false), f); 
+public Text_t Num32$scientific(float f, Int_t precision) { 
+    return Text$format("%.*e", (int)Int_to_Int64(precision, false), f); 
 }
 
 public float Num32$mod(float num, float modulus) { 
@@ -150,16 +145,16 @@ public float Num32$mix(float amount, float x, float y) {
     return (1.0-amount)*x + amount*y;
 }
 
-public float Num32$from_text(CORD text, CORD *the_rest) {
-    const char *str = CORD_to_const_char_star(text);
+public float Num32$from_text(Text_t text, Text_t *the_rest) {
+    const char *str = Text$as_c_string(text);
     char *end = NULL;
     double d = strtod(str, &end);
-    if (the_rest) *the_rest = CORD_from_char_star(end);
+    if (the_rest) *the_rest = Text$from_str(end);
     return (float)d;
 }
 
-public float Num32$nan(CORD tag) {
-    return nanf(CORD_to_const_char_star(tag));
+public float Num32$nan(Text_t tag) {
+    return nanf(Text$as_c_string(tag));
 }
 
 public bool Num32$isinf(float n) { return isinf(n); }
diff --git a/builtins/nums.h b/builtins/nums.h
index 94b11055..c5562f0a 100644
--- a/builtins/nums.h
+++ b/builtins/nums.h
@@ -14,39 +14,39 @@
 #define N32(n) ((float)n)
 #define N64(n) ((double)n)
 
-CORD Num$as_text(const double *f, bool colorize, const TypeInfo *type);
+Text_t Num$as_text(const double *f, bool colorize, const TypeInfo *type);
 int32_t Num$compare(const double *x, const double *y, const TypeInfo *type);
 bool Num$equal(const double *x, const double *y, const TypeInfo *type);
 bool Num$near(double a, double b, double ratio, double absolute);
-CORD Num$format(double f, Int_t precision);
-CORD Num$scientific(double f, Int_t precision);
+Text_t Num$format(double f, Int_t precision);
+Text_t Num$scientific(double f, Int_t precision);
 double Num$mod(double num, double modulus);
 bool Num$isinf(double n);
 bool Num$finite(double n);
 bool Num$isnan(double n);
-double Num$nan(CORD tag);
+double Num$nan(Text_t tag);
 double Num$random(void);
 double Num$mix(double amount, double x, double y);
-double Num$from_text(CORD text, CORD *the_rest);
+double Num$from_text(Text_t text, Text_t *the_rest);
 static inline double Num$clamped(double x, double low, double high) {
     return (x <= low) ? low : (x >= high ? high : x);
 }
 extern const TypeInfo $Num;
 
-CORD Num32$as_text(const float *f, bool colorize, const TypeInfo *type);
+Text_t Num32$as_text(const float *f, bool colorize, const TypeInfo *type);
 int32_t Num32$compare(const float *x, const float *y, const TypeInfo *type);
 bool Num32$equal(const float *x, const float *y, const TypeInfo *type);
 bool Num32$near(float a, float b, float ratio, float absolute);
-CORD Num32$format(float f, Int_t precision);
-CORD Num32$scientific(float f, Int_t precision);
+Text_t Num32$format(float f, Int_t precision);
+Text_t Num32$scientific(float f, Int_t precision);
 float Num32$mod(float num, float modulus);
 bool Num32$isinf(float n);
 bool Num32$finite(float n);
 bool Num32$isnan(float n);
 float Num32$random(void);
 float Num32$mix(float amount, float x, float y);
-float Num32$from_text(CORD text, CORD *the_rest);
-float Num32$nan(CORD tag);
+float Num32$from_text(Text_t text, Text_t *the_rest);
+float Num32$nan(Text_t tag);
 static inline float Num32$clamped(float x, float low, float high) {
     return (x <= low) ? low : (x >= high ? high : x);
 }
diff --git a/builtins/pointer.c b/builtins/pointer.c
index 73bd41be..41f4a2a1 100644
--- a/builtins/pointer.c
+++ b/builtins/pointer.c
@@ -8,27 +8,39 @@
 #include <stdlib.h>
 #include <sys/param.h>
 
-#include "util.h"
 #include "functions.h"
 #include "halfsiphash.h"
+#include "text.h"
 #include "types.h"
+#include "util.h"
 
 typedef struct recursion_s {
     const void *ptr;
     struct recursion_s *next;
 } recursion_t;
 
-public CORD Pointer$as_text(const void *x, bool colorize, const TypeInfo *type) {
+public Text_t Pointer$as_text(const void *x, bool colorize, const TypeInfo *type) {
     auto ptr_info = type->PointerInfo;
     if (!x) {
-        CORD typename = generic_as_text(NULL, false, ptr_info.pointed);
-        CORD c = colorize ? CORD_asprintf("\x1b[34;1m%s%s\x1b[m", ptr_info.sigil, typename) : CORD_cat(ptr_info.sigil, typename);
-        return ptr_info.is_optional ? CORD_cat(c, "?") : c;
+        Text_t typename = generic_as_text(NULL, false, ptr_info.pointed);
+        Text_t text;
+        if (colorize)
+            text = Text$concat(Text$from_str("\x1b[34;1m"), Text$from_str(ptr_info.sigil), typename, Text$from_str("\x1b[m"));
+        else
+            text = Text$concat(Text$from_str(ptr_info.sigil), typename);
+
+        if (ptr_info.is_optional)
+            text = Text$concat(text, Text$from_str("?"));
+
+        return text;
     }
     const void *ptr = *(const void**)x;
     if (!ptr) {
-        CORD typename = generic_as_text(NULL, false, ptr_info.pointed);
-        return colorize ? CORD_asprintf("\x1b[34;1m!%s\x1b[m", typename) : CORD_cat("!", typename);
+        Text_t typename = generic_as_text(NULL, false, ptr_info.pointed);
+        if (colorize)
+            return Text$concat(Text$from_str("\x1b[34;1m!"), typename, Text$from_str("\x1b[m"));
+        else
+            return Text$concat(Text$from_str("!"), typename);
     }
 
     // Check for recursive references, so if `x.foo = x`, then it prints as
@@ -38,22 +50,34 @@ public CORD Pointer$as_text(const void *x, bool colorize, const TypeInfo *type)
     for (recursion_t *r = recursion; r; r = r->next) {
         ++depth;
         if (r->ptr == ptr) {
-            CORD c = CORD_asprintf(colorize ? "\x1b[34;1m%s..%d\x1b[m" : "%s..%d", ptr_info.sigil, depth);
-            if (ptr_info.is_optional) c = CORD_cat(c, colorize ? "\x1b[34;1m?\x1b[m" : "?");
-            return c;
+            Text_t text = Text$concat(
+                Text$from_str(colorize ? "\x1b[34;1m" : ""),
+                Text$from_str(ptr_info.sigil),
+                Text$from_str(".."),
+                Int32$as_text(&depth, false, &$Int32),
+                Text$from_str(colorize ? "\x1b[m" : ""));
+            if (ptr_info.is_optional)
+                text = Text$concat(text, Text$from_str(colorize ? "\x1b[34;1m?\x1b[m" : "?"));
+            return text;
         }
     }
 
-    CORD pointed;
+    Text_t pointed;
     { // Stringify with this pointer flagged as a recursive one:
         recursion_t my_recursion = {.ptr=ptr, .next=recursion};
         recursion = &my_recursion;
         pointed = generic_as_text(ptr, colorize, ptr_info.pointed);
         recursion = recursion->next;
     }
-    CORD c = colorize ? CORD_asprintf("\x1b[34;1m%s\x1b[m%r", ptr_info.sigil, pointed) : CORD_cat(ptr_info.sigil, pointed);
-    if (ptr_info.is_optional) c = CORD_cat(c, colorize ? "\x1b[34;1m?\x1b[m" : "?");
-    return c;
+    Text_t text;
+    if (colorize)
+        text = Text$concat(Text$from_str("\x1b[34;1m"), Text$from_str(ptr_info.sigil), Text$from_str("\x1b[m"), pointed);
+    else
+        text = Text$concat(Text$from_str(ptr_info.sigil), pointed);
+
+    if (ptr_info.is_optional)
+        text = Text$concat(text, Text$from_str("?"));
+    return text;
 }
 
 public int32_t Pointer$compare(const void *x, const void *y, const TypeInfo *type) {
@@ -68,11 +92,4 @@ public bool Pointer$equal(const void *x, const void *y, const TypeInfo *type) {
     return xp == yp;
 }
 
-public uint32_t Pointer$hash(const void *x, const TypeInfo *type) {
-    (void)type;
-    uint32_t hash;
-    halfsiphash(x, sizeof(void*), TOMO_HASH_KEY, (uint8_t*)&hash, sizeof(hash));
-    return hash;
-}
-
 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/pointer.h b/builtins/pointer.h
index 538960b3..7748da4b 100644
--- a/builtins/pointer.h
+++ b/builtins/pointer.h
@@ -8,10 +8,9 @@
 
 #include "types.h"
 
-CORD Pointer$as_text(const void *x, bool colorize, const TypeInfo *type);
+Text_t Pointer$as_text(const void *x, bool colorize, const TypeInfo *type);
 int32_t Pointer$compare(const void *x, const void *y, const TypeInfo *type);
 bool Pointer$equal(const void *x, const void *y, const TypeInfo *type);
-uint32_t Pointer$hash(const void *x, const TypeInfo *type);
 
 #define Null(t) (t*)NULL
 #define POINTER_TYPE(_sigil, _pointed) (&(TypeInfo){\
diff --git a/builtins/range.c b/builtins/range.c
index 840397b9..9b5af8cd 100644
--- a/builtins/range.c
+++ b/builtins/range.c
@@ -4,15 +4,15 @@
 #include <err.h>
 #include <gmp.h>
 #include <gc.h>
-#include <gc/cord.h>
 #include <math.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <sys/param.h>
 
-#include "types.h"
 #include "integers.h"
+#include "text.h"
+#include "types.h"
 #include "util.h"
 
 
@@ -32,15 +32,15 @@ static bool Range$equal(const Range_t *x, const Range_t *y, const TypeInfo *type
     return Int$equal(&x->first, &y->first, &$Int) && Int$equal(&x->last, &y->last, &$Int) && Int$equal(&x->step, &y->step, &$Int);
 }
 
-static CORD Range$as_text(const Range_t *r, bool use_color, const TypeInfo *type)
+static Text_t Range$as_text(const Range_t *r, bool use_color, const TypeInfo *type)
 {
     (void)type;
-    if (!r) return "Range";
+    if (!r) return Text$from_str("Range");
 
-    return CORD_asprintf(use_color ? "\x1b[0;1mRange\x1b[m(first=%r, last=%r, step=%r)"
-                         : "Range(first=%r, last=%r, step=%r)",
-                         Int$as_text(&r->first, use_color, &$Int), Int$as_text(&r->last, use_color, &$Int),
-                         Int$as_text(&r->step, use_color, &$Int));
+    return Text$format(use_color ? "\x1b[0;1mRange\x1b[m(first=%r, last=%r, step=%r)"
+                       : "Range(first=%r, last=%r, step=%r)",
+                       Int$as_text(&r->first, use_color, &$Int), Int$as_text(&r->last, use_color, &$Int),
+                       Int$as_text(&r->step, use_color, &$Int));
 }
 
 public Range_t Range$reversed(Range_t r)
diff --git a/builtins/table.c b/builtins/table.c
index 8de6532c..9bc3ded1 100644
--- a/builtins/table.c
+++ b/builtins/table.c
@@ -16,14 +16,15 @@
 #include <string.h>
 #include <sys/param.h>
 
-#include "util.h"
 #include "array.h"
+#include "c_string.h"
 #include "datatypes.h"
 #include "halfsiphash.h"
 #include "memory.h"
 #include "table.h"
 #include "text.h"
 #include "types.h"
+#include "util.h"
 
 // #define DEBUG_TABLES
 
@@ -51,11 +52,11 @@ static const TypeInfo MemoryPointer = {
     },
 };
 
-const TypeInfo StrToVoidStarTable = {
+const TypeInfo CStrToVoidStarTable = {
     .size=sizeof(table_t),
     .align=__alignof__(table_t),
     .tag=TableInfo,
-    .TableInfo={.key=&$Text, .value=&MemoryPointer},
+    .TableInfo={.key=&$CString, .value=&MemoryPointer},
 };
 
 static inline size_t entry_size(const TypeInfo *info)
@@ -450,36 +451,43 @@ public uint32_t Table$hash(const table_t *t, const TypeInfo *type)
     return hash;
 }
 
-public CORD Table$as_text(const table_t *t, bool colorize, const TypeInfo *type)
+public Text_t Table$as_text(const table_t *t, bool colorize, const TypeInfo *type)
 {
     assert(type->tag == TableInfo);
     auto table = type->TableInfo;
 
     if (!t) {
         if (table.value != &$Void) 
-            return CORD_all("{", generic_as_text(NULL, false, table.key), ":", generic_as_text(NULL, false, table.value), "}");
+            return Text$concat(
+                Text$from_str("{"),
+                generic_as_text(NULL, false, table.key),
+                Text$from_str(":"),
+                generic_as_text(NULL, false, table.value),
+                Text$from_str("}"));
         else
-            return CORD_all("{", generic_as_text(NULL, false, table.key), "}");
+            return Text$concat(
+                Text$from_str("{"),
+                generic_as_text(NULL, false, table.key),
+                Text$from_str("}"));
     }
 
     int64_t val_off = value_offset(type);
-    CORD c = "{";
+    Text_t text = Text$from_str("{");
     for (int64_t i = 0, length = Table$length(*t); i < length; i++) {
         if (i > 0)
-            c = CORD_cat(c, ", ");
+            text = Text$concat(text, Text$from_str(", "));
         void *entry = GET_ENTRY(*t, i);
-        c = CORD_cat(c, generic_as_text(entry, colorize, table.key));
+        text = Text$concat(text, generic_as_text(entry, colorize, table.key));
         if (table.value != &$Void) 
-            c = CORD_all(c, ":", generic_as_text(entry + val_off, colorize, table.value));
+            text = Text$concat(text, Text$from_str(":"), generic_as_text(entry + val_off, colorize, table.value));
     }
 
     if (t->fallback) {
-        c = CORD_cat(c, "; fallback=");
-        c = CORD_cat(c, Table$as_text(t->fallback, colorize, type));
+        text = Text$concat(text, Text$from_str("; fallback="), Table$as_text(t->fallback, colorize, type));
     }
 
-    c = CORD_cat(c, "}");
-    return c;
+    text = Text$concat(text, Text$from_str("}"));
+    return text;
 }
 
 public table_t Table$from_entries(array_t entries, const TypeInfo *type)
@@ -592,29 +600,29 @@ public bool Table$is_superset_of(table_t a, table_t b, bool strict, const TypeIn
 
 public void *Table$str_get(table_t t, const char *key)
 {
-    void **ret = Table$get(t, &key, &StrToVoidStarTable);
+    void **ret = Table$get(t, &key, &CStrToVoidStarTable);
     return ret ? *ret : NULL;
 }
 
 public void *Table$str_get_raw(table_t t, const char *key)
 {
-    void **ret = Table$get_raw(t, &key, &StrToVoidStarTable);
+    void **ret = Table$get_raw(t, &key, &CStrToVoidStarTable);
     return ret ? *ret : NULL;
 }
 
 public void *Table$str_reserve(table_t *t, const char *key, const void *value)
 {
-    return Table$reserve(t, &key, &value, &StrToVoidStarTable);
+    return Table$reserve(t, &key, &value, &CStrToVoidStarTable);
 }
 
 public void Table$str_set(table_t *t, const char *key, const void *value)
 {
-    Table$set(t, &key, &value, &StrToVoidStarTable);
+    Table$set(t, &key, &value, &CStrToVoidStarTable);
 }
 
 public void Table$str_remove(table_t *t, const char *key)
 {
-    return Table$remove(t, &key, &StrToVoidStarTable);
+    return Table$remove(t, &key, &CStrToVoidStarTable);
 }
 
 public void *Table$str_entry(table_t t, int64_t n)
diff --git a/builtins/table.h b/builtins/table.h
index 0ff4cb91..da60b3be 100644
--- a/builtins/table.h
+++ b/builtins/table.h
@@ -74,7 +74,7 @@ void Table$mark_copy_on_write(table_t *t);
 int32_t Table$compare(const table_t *x, const table_t *y, const TypeInfo *type);
 bool Table$equal(const table_t *x, const table_t *y, const TypeInfo *type);
 uint32_t Table$hash(const table_t *t, const TypeInfo *type);
-CORD Table$as_text(const table_t *t, bool colorize, const TypeInfo *type);
+Text_t Table$as_text(const table_t *t, bool colorize, const TypeInfo *type);
 
 void *Table$str_entry(table_t t, int64_t n);
 void *Table$str_get(table_t t, const char *key);
@@ -85,6 +85,6 @@ void Table$str_remove(table_t *t, const char *key);
 
 #define Table$length(t) ((t).entries.length)
 
-extern const TypeInfo StrToVoidStarTable;
+extern const TypeInfo CStrToVoidStarTable;
 
 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/builtins/text.c b/builtins/text.c
index 966018f1..ff709e02 100644
--- a/builtins/text.c
+++ b/builtins/text.c
@@ -1,417 +1,1454 @@
 // Type info and methods for Text datatype, which uses the Boehm "cord" library
 // and libunistr
+
 #include <assert.h>
 #include <ctype.h>
 #include <err.h>
 #include <gc.h>
-#include <gc/cord.h>
 #include <gmp.h>
 #include <limits.h>
+#include <printf.h>
 #include <readline/history.h>
 #include <readline/readline.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <sys/param.h>
+
 #include <unicase.h>
+#include <unictype.h>
 #include <unigbrk.h>
 #include <uniname.h>
 #include <uninorm.h>
+#include <unistd.h>
+#include <unistdio.h>
 #include <unistr.h>
 
 #include "array.h"
 #include "functions.h"
-#include "halfsiphash.h"
 #include "integers.h"
 #include "text.h"
 #include "types.h"
 
-#define CLAMP(x, lo, hi) MIN(hi, MAX(x,lo))
+static struct {
+    size_t num_codepoints;
+    const uint32_t *codepoints;
+} synthetic_graphemes[1024] = {};
+
+static int32_t num_synthetic_graphemes = 0;
+
+static int32_t get_grapheme(Text_t text, int64_t index);
+
+typedef struct {
+    int64_t subtext, sum_of_previous_subtexts;
+} iteration_state_t;
 
-static inline uint8_t *_normalize(CORD str, uint8_t *buf, size_t *len)
+static int32_t _next_grapheme(Text_t text, iteration_state_t *state, int64_t index);
+
+int32_t find_synthetic_grapheme(const uint32_t *codepoints, size_t len)
 {
-    const uint8_t *str_u8 = (const uint8_t*)CORD_to_const_char_star(str);
-    uint8_t *normalized = u8_normalize(UNINORM_NFD, str_u8, strlen((char*)str_u8)+1, buf, len);
-    if (!normalized) errx(1, "Unicode normalization error!");
-    return normalized;
+    int32_t lo = 0, hi = num_synthetic_graphemes;
+    while (lo <= hi) {
+        int32_t mid = (lo + hi) / 2;
+        int32_t cmp = (synthetic_graphemes[mid].num_codepoints > len) - (synthetic_graphemes[mid].num_codepoints < len);
+        if (cmp == 0)
+            cmp = memcmp(synthetic_graphemes[mid].codepoints, codepoints, sizeof(uint32_t[len]));
+
+        if (cmp == 0)
+            return mid;
+        else if (cmp < 0)
+            lo = mid + 1;    
+        else if (cmp > 0)
+            hi = mid - 1;
+    }
+    return hi;
 }
 
-public CORD Text$as_text(const void *text, bool colorize, const TypeInfo *info)
+int32_t get_synthetic_grapheme(const uint32_t *codepoints, size_t len)
 {
-    if (!text) return info->TextInfo.lang;
-    CORD ret = Text$quoted(*(CORD*)text, colorize);
-    if (!streq(info->TextInfo.lang, "Text"))
-        ret = colorize ? CORD_all("\x1b[1m$", info->TextInfo.lang, "\x1b[m", ret) : CORD_all("$", info->TextInfo.lang, ret);
-    return ret;
+    int32_t index = find_synthetic_grapheme(codepoints, len);
+    if (index < num_synthetic_graphemes
+        && synthetic_graphemes[index].num_codepoints == len
+        && memcmp(synthetic_graphemes[index].codepoints, codepoints, len) == 0) {
+        return -(index+1);
+    } else {
+        if (num_synthetic_graphemes > 0)
+            memmove(&synthetic_graphemes[index], &synthetic_graphemes[index + 1], num_synthetic_graphemes - index);
+
+        uint32_t *buf = GC_MALLOC_ATOMIC(sizeof(uint32_t[len]));
+        memcpy(buf, codepoints, sizeof(uint32_t[len]));
+        synthetic_graphemes[index].codepoints = buf;
+        synthetic_graphemes[index].num_codepoints = len;
+
+        ++num_synthetic_graphemes;
+        return -(index+1);
+    }
 }
 
-public CORD Text$quoted(CORD str, bool colorize)
-{
-    // Note: it's important to have unicode strings not get broken up with
-    // escapes, otherwise they won't print right.
-    if (colorize) {
-        CORD quoted = "\x1b[35m\"";
-        CORD_pos i;
-        CORD_FOR(i, str) {
-            char c = CORD_pos_fetch(i);
-            switch (c) {
-#define BACKSLASHED(esc) "\x1b[34m\\\x1b[1m" esc "\x1b[0;35m"
-            case '\a': quoted = CORD_cat(quoted, BACKSLASHED("a")); break;
-            case '\b': quoted = CORD_cat(quoted, BACKSLASHED("b")); break;
-            case '\x1b': quoted = CORD_cat(quoted, BACKSLASHED("e")); break;
-            case '\f': quoted = CORD_cat(quoted, BACKSLASHED("f")); break;
-            case '\n': quoted = CORD_cat(quoted, BACKSLASHED("n")); break;
-            case '\r': quoted = CORD_cat(quoted, BACKSLASHED("r")); break;
-            case '\t': quoted = CORD_cat(quoted, BACKSLASHED("t")); break;
-            case '\v': quoted = CORD_cat(quoted, BACKSLASHED("v")); break;
-            case '"': quoted = CORD_cat(quoted, BACKSLASHED("\"")); break;
-            case '\\': quoted = CORD_cat(quoted, BACKSLASHED("\\")); break;
-            case '\x00' ... '\x06': case '\x0E' ... '\x1A':
-            case '\x1C' ... '\x1F': case '\x7F' ... '\x7F':
-                CORD_sprintf(&quoted, "%r" BACKSLASHED("x%02X"), quoted, c);
-                break;
-            default: quoted = CORD_cat_char(quoted, c); break;
-#undef BACKSLASHED
+static inline size_t num_subtexts(Text_t t)
+{
+    if (t.tag != TEXT_SUBTEXT) return 1;
+    size_t len = t.length;
+    size_t n = 0;
+    while (len > 0) {
+        len -= t.subtexts[n].length;
+        ++n;
+    }
+    return n;
+}
+
+int text_visualize(FILE *stream, Text_t t)
+{
+    switch (t.tag) {
+    case TEXT_SHORT_ASCII: return fprintf(stream, "<ascii length=%ld>%.*s</ascii>", t.length, t.length, t.short_ascii);
+    case TEXT_ASCII: return fprintf(stream, "<ascii length=%ld>%.*s</ascii>", t.length, t.length, t.ascii);
+    case TEXT_GRAPHEMES: case TEXT_SHORT_GRAPHEMES: {
+        int printed = fprintf(stream, "<graphemes length=%ld>", t.length);
+        printed += Text$print(stream, t);
+        printed += fprintf(stream, "</graphemes>");
+        return printed;
+    }
+    case TEXT_SUBTEXT: {
+        int printed = fprintf(stream, "<text length=%ld>", t.length);
+        size_t to_print = t.length;
+        for (int i = 0; to_print > 0; ++i) {
+            printed += fprintf(stream, "\n  ");
+            printed += text_visualize(stream, t.subtexts[i]);
+            to_print -= t.subtexts[i].length;
+            if (t.subtexts[i].length == 0) break;
+        }
+        printed += fprintf(stream, "\n</text>");
+        return printed;
+    }
+    default: return 0;
+    }
+}
+
+public int Text$print(FILE *stream, Text_t t)
+{
+    switch (t.tag) {
+    case TEXT_SHORT_ASCII: return fwrite(t.short_ascii, sizeof(char), t.length, stream);
+    case TEXT_ASCII: return fwrite(t.ascii, sizeof(char), t.length, stream);
+    case TEXT_GRAPHEMES: case TEXT_SHORT_GRAPHEMES: {
+        int32_t *graphemes = t.tag == TEXT_SHORT_GRAPHEMES ? t.short_graphemes : t.graphemes;
+        int written = 0;
+        for (int64_t i = 0; i < t.length; i++) {
+            int32_t grapheme = graphemes[i];
+            if (grapheme >= 0) {
+                written += ulc_fprintf(stream, "%.*llU", 1, &grapheme);
+            } else {
+                written += ulc_fprintf(
+                    stream, "%.*llU",
+                    synthetic_graphemes[-grapheme-1].num_codepoints,
+                    synthetic_graphemes[-grapheme-1].codepoints);
             }
         }
-        quoted = CORD_cat(quoted, "\"\x1b[m");
-        return quoted;
+        return written;
+    }
+    case TEXT_SUBTEXT: {
+        int written = 0;
+        int i = 0;
+        for (size_t to_print = t.length; to_print > 0; to_print -= t.subtexts[i].length, ++i)
+            written += Text$print(stream, t.subtexts[i]);
+        return written;
+    }
+    default: return 0;
+    }
+}
+
+static Text_t concat2(Text_t a, Text_t b)
+{
+    if (a.length == 0) return b;
+    if (b.length == 0) return a;
+
+    if (a.tag == TEXT_SUBTEXT && b.tag == TEXT_SUBTEXT) {
+        size_t na = num_subtexts(a);
+        size_t nb = num_subtexts(b);
+        Text_t ret = {
+            .length=a.length + b.length,
+            .tag=TEXT_SUBTEXT,
+            .subtexts=GC_MALLOC(sizeof(Text_t[na + nb])),
+        };
+        memcpy(&ret.subtexts[0], a.subtexts, sizeof(Text_t[na]));
+        memcpy(&ret.subtexts[na], b.subtexts, sizeof(Text_t[nb]));
+        return ret;
+    } else if (a.tag == TEXT_SUBTEXT) {
+        size_t n = num_subtexts(a);
+        Text_t ret = {
+            .length=a.length + b.length,
+            .tag=TEXT_SUBTEXT,
+            .subtexts=GC_MALLOC(sizeof(Text_t[n + 1])),
+        };
+        memcpy(ret.subtexts, a.subtexts, sizeof(Text_t[n]));
+        ret.subtexts[n] = b;
+        return ret;
+    } else if (b.tag == TEXT_SUBTEXT) {
+        size_t n = num_subtexts(b);
+        Text_t ret = {
+            .length=a.length + b.length,
+            .tag=TEXT_SUBTEXT,
+            .subtexts=GC_MALLOC(sizeof(Text_t[n + 1])),
+        };
+        ret.subtexts[0] = a;
+        memcpy(&ret.subtexts[1], b.subtexts, sizeof(Text_t[n]));
+        return ret;
     } else {
-        CORD quoted = "\"";
-        CORD_pos i;
-        CORD_FOR(i, str) {
-            char c = CORD_pos_fetch(i);
-            switch (c) {
-            case '\a': quoted = CORD_cat(quoted, "\\a"); break;
-            case '\b': quoted = CORD_cat(quoted, "\\b"); break;
-            case '\x1b': quoted = CORD_cat(quoted, "\\e"); break;
-            case '\f': quoted = CORD_cat(quoted, "\\f"); break;
-            case '\n': quoted = CORD_cat(quoted, "\\n"); break;
-            case '\r': quoted = CORD_cat(quoted, "\\r"); break;
-            case '\t': quoted = CORD_cat(quoted, "\\t"); break;
-            case '\v': quoted = CORD_cat(quoted, "\\v"); break;
-            case '"': quoted = CORD_cat(quoted, "\\\""); break;
-            case '\\': quoted = CORD_cat(quoted, "\\\\"); break;
-            case '\x00' ... '\x06': case '\x0E' ... '\x1A':
-            case '\x1C' ... '\x1F': case '\x7F' ... '\x7F':
-                CORD_sprintf(&quoted, "%r\\x%02X", quoted, c);
-                break;
-            default: quoted = CORD_cat_char(quoted, c); break;
+        Text_t ret = {
+            .length=a.length + b.length,
+            .tag=TEXT_SUBTEXT,
+            .subtexts=GC_MALLOC(sizeof(Text_t[2])),
+        };
+        ret.subtexts[0] = a;
+        ret.subtexts[1] = b;
+        return ret;
+    }
+}
+
+public Text_t Text$_concat(int n, Text_t items[n])
+{
+    if (n == 0) return (Text_t){.length=0};
+    if (n == 1) return items[0];
+    if (n == 2) return concat2(items[0], items[1]);
+
+    int64_t len = 0, subtexts = 0;
+    for (int i = 0; i < n; i++) {
+        len += items[i].length;
+        subtexts += num_subtexts(items[i]);
+    }
+
+    Text_t ret = {
+        .length=len,
+        .tag=TEXT_SUBTEXT,
+        .subtexts=GC_MALLOC(sizeof(Text_t[len])),
+    };
+    int64_t sub_i = 0;
+    for (int i = 0; i < n; i++) {
+        if (items[i].tag == TEXT_SUBTEXT) {
+            for (int64_t j = 0, remainder = items[i].length; remainder > 0; j++) {
+                ret.subtexts[sub_i++] = items[i].subtexts[j];
+                remainder -= items[i].subtexts[j].length;
             }
+        } else {
+            ret.subtexts[sub_i++] = items[i];
         }
-        quoted = CORD_cat_char(quoted, '"');
-        return quoted;
     }
+    return ret;
 }
 
-public int Text$compare(const CORD *x, const CORD *y)
+public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int)
 {
-    uint8_t *xx = (uint8_t*)CORD_to_const_char_star(*x);
-    uint8_t *yy = (uint8_t*)CORD_to_const_char_star(*y);
-    int result = 0;
-    if (u8_normcmp(xx, strlen((char*)xx), yy, strlen((char*)yy), UNINORM_NFD, &result))
-        fail("Something went wrong while comparing text");
-    return result;
+    int64_t first = Int_to_Int64(first_int, false)-1;
+    int64_t last = Int_to_Int64(last_int, false)-1;
+    if (first == 0) errx(1, "Invalid index: 0");
+    if (last == 0) return (Text_t){.length=0};
+
+    if (first < 0) first = text.length + first + 1;
+    if (last < 0) last = text.length + last + 1;
+
+    if (last > text.length) last = text.length;
+
+    if (first > text.length || last < first)
+        return (Text_t){.length=0};
+
+    if (first == 1 && last == text.length)
+        return text;
+
+    switch (text.tag) {
+    case TEXT_SHORT_ASCII: {
+        Text_t ret = text;
+        ret.length = last - first + 1;
+        if (first > 1)
+            memcpy(ret.short_ascii, text.short_ascii + (first-1), ret.length);
+        return ret;
+    }
+    case TEXT_ASCII: {
+        Text_t ret = {
+            .tag=TEXT_ASCII,
+            .length=last - first + 1,
+            .ascii=text.ascii + (first-1),
+        };
+        return ret;
+    }
+    case TEXT_SHORT_GRAPHEMES: {
+        assert((first == 1 && last == 1) || (first == 2 && last == 2));
+        Text_t ret = {
+            .tag=TEXT_SHORT_GRAPHEMES,
+            .length=1,
+            .short_graphemes={text.short_graphemes[first-1]},
+        };
+        return ret;
+    }
+    case TEXT_GRAPHEMES: {
+        Text_t ret = {
+            .tag=TEXT_GRAPHEMES,
+            .length=last - first + 1,
+            .graphemes=text.graphemes + (first-1),
+        };
+        return ret;
+    }
+    case TEXT_SUBTEXT: {
+        Text_t *subtexts = text.subtexts;
+        while (first > subtexts[0].length) {
+            first -= subtexts[0].length;
+            last -= subtexts[0].length;
+            ++subtexts;
+        }
+
+        int64_t needed_len = (last - first) + 1;
+        int64_t num_subtexts = 0;
+        for (int64_t included = 0; included < needed_len; ) {
+            if (included == 0)
+                included += subtexts[num_subtexts].length - first + 1;
+            else
+                included += subtexts[num_subtexts].length;
+            num_subtexts += 1;
+        }
+        if (num_subtexts == 1)
+            return Text$slice(subtexts[0], Int64_to_Int(first+1), Int64_to_Int(last+1));
+
+        Text_t ret = {
+            .length=needed_len,
+            .tag=TEXT_SUBTEXT,
+            .subtexts=GC_MALLOC(sizeof(Text_t[num_subtexts])),
+        };
+        for (int64_t i = 0; i < num_subtexts; i++) {
+            ret.subtexts[i] = Text$slice(subtexts[i], Int64_to_Int(first+1), Int64_to_Int(last+1));
+            first = 1;
+            needed_len -= ret.subtexts[i].length;
+            last = first + needed_len - 1;
+        }
+        return ret;
+    }
+    default: errx(1, "Invalid tag");
+    }
 }
 
-public bool Text$equal(const CORD *x, const CORD *y)
+Text_t text_from_u32(uint32_t *codepoints, size_t num_codepoints, bool normalize)
 {
-    return Text$compare(x, y) == 0;
+    uint32_t norm_buf[128];
+    if (normalize) {
+        size_t norm_length = sizeof(norm_buf)/sizeof(norm_buf[0]);
+        uint32_t *normalized = u32_normalize(UNINORM_NFC, codepoints, num_codepoints, norm_buf, &norm_length);
+        codepoints = normalized;
+        num_codepoints = norm_length;
+    }
+
+    char breaks[num_codepoints];
+    u32_grapheme_breaks(codepoints, num_codepoints, breaks);
+
+    Text_t ret = {
+        .length=0,
+        .tag=TEXT_SHORT_GRAPHEMES,
+    };
+    const uint32_t *src = codepoints;
+    int32_t *dest = &ret.short_graphemes[0];
+    while (src != &codepoints[num_codepoints]) {
+        ++ret.length;
+
+        if (ret.tag == TEXT_SHORT_GRAPHEMES && ret.length > 2) {
+            int32_t *graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[num_codepoints])); // May be a slight overallocation
+            graphemes[0] = ret.short_graphemes[0];
+            graphemes[1] = ret.short_graphemes[1];
+            ret.tag = TEXT_GRAPHEMES;
+            ret.graphemes = graphemes;
+            dest = &graphemes[2];
+        }
+
+        const uint32_t *next = u32_grapheme_next(src, &codepoints[num_codepoints]);
+        if (next == &src[1]) {
+            *dest = (int32_t)*src;
+        } else {
+            // Synthetic grapheme
+            *dest = get_synthetic_grapheme(src, next-src);
+        }
+        ++dest;
+        src = next;
+    }
+    if (normalize && codepoints != norm_buf) free(codepoints);
+    return ret;
 }
 
-public uint32_t Text$hash(const CORD *cord)
+public Text_t Text$from_str(const char *str)
 {
-    if (!*cord) return 0;
+    size_t ascii_span = 0;
+    while (str[ascii_span] && isascii(str[ascii_span]))
+        ascii_span++;
 
-    uint8_t buf[128] = {0}; size_t norm_len = sizeof(buf);
-    uint8_t *normalized = _normalize(*cord, buf, &norm_len);
+    if (str[ascii_span] == '\0') { // All ASCII
+        Text_t ret = {.length=ascii_span};
+        if (ascii_span <= 8) {
+            ret.tag = TEXT_SHORT_ASCII;
+            for (size_t i = 0; i < ascii_span; i++)
+                ret.short_ascii[i] = str[i];
+        } else {
+            ret.tag = TEXT_ASCII;
+            ret.ascii = str;
+        }
+        return ret;
+    } else {
+        uint32_t buf[128];
+        size_t length = sizeof(buf)/sizeof(buf[0]);
+        uint32_t *codepoints = u8_to_u32((uint8_t*)str, ascii_span + strlen(str + ascii_span), buf, &length);
+        Text_t ret = text_from_u32(codepoints, length, true);
+        if (codepoints != buf) free(codepoints);
+        return ret;
+    }
+}
+
+static void u8_buf_append(Text_t text, char **buf, size_t *capacity, int64_t *i)
+{
+    switch (text.tag) {
+    case TEXT_ASCII: case TEXT_SHORT_ASCII: {
+        if (*i + text.length > (int64_t)*capacity) {
+            *capacity = *i + text.length;
+            *buf = GC_REALLOC(*buf, *capacity);
+        }
+
+        const char *bytes = text.tag == TEXT_ASCII ? text.ascii : text.short_ascii;
+        memcpy(*buf + *i, bytes, text.length);
+        *i += text.length;
+        break;
+    }
+    case TEXT_GRAPHEMES: case TEXT_SHORT_GRAPHEMES: {
+        const int32_t *graphemes = text.tag == TEXT_GRAPHEMES ? text.graphemes : text.short_graphemes;
+        for (int64_t g = 0; g + 1 < text.length; g++) {
+            const uint32_t *codepoints = graphemes[g] < 0 ? synthetic_graphemes[-graphemes[g]-1].codepoints : (uint32_t*)&graphemes[g];
+            size_t num_codepoints = graphemes[g] < 0 ? synthetic_graphemes[-graphemes[g]-1].num_codepoints : 1;
+            uint8_t u8_buf[64];
+            size_t u8_len = sizeof(u8_buf);
+            uint8_t *u8 = u32_to_u8(codepoints, num_codepoints, u8_buf, &u8_len);
+
+            if (*i + (int64_t)u8_len > (int64_t)*capacity) {
+                *capacity = *i + u8_len;
+                *buf = GC_REALLOC(*buf, *capacity);
+            }
 
-    uint32_t hash;
-    halfsiphash(normalized, norm_len, TOMO_HASH_KEY, (uint8_t*)&hash, sizeof(hash));
-    if (normalized != buf) free(normalized);
-    return hash;
+            memcpy(*buf + *i, u8, u8_len);
+            *i += u8_len;
+            if (u8 != u8_buf) free(u8);
+        }
+        break;
+    }
+    case TEXT_SUBTEXT: {
+        for (int64_t s = 0, remaining = text.length; remaining > 0; s++) {
+            u8_buf_append(text.subtexts[s], buf, capacity, i);
+            remaining -= text.subtexts[s].length;
+        }
+        break;
+    }
+    default: break;
+    }
 }
 
-public CORD Text$upper(CORD str)
+public const char *Text$as_c_string(Text_t text)
 {
-    if (!str) return str;
-    size_t len = strlen(str) + 1;
-    uint8_t *dest = GC_MALLOC_ATOMIC(len);
-    dest[len-1] = 0;
-    return (CORD)u8_toupper((const uint8_t*)str, len-1, uc_locale_language(), NULL, dest, &len);
+    size_t capacity = text.length;
+    char *buf = GC_MALLOC_ATOMIC(capacity);
+    int64_t i = 0;
+    u8_buf_append(text, &buf, &capacity, &i);
+    return buf;
 }
 
-public CORD Text$lower(CORD str)
+uint32_t *text_to_u32(Text_t text, size_t *length)
 {
-    if (!str) return str;
-    size_t len = strlen(str) + 1;
-    uint8_t *dest = GC_MALLOC_ATOMIC(len);
-    dest[len-1] = 0;
-    return (CORD)u8_tolower((const uint8_t*)str, len-1, uc_locale_language(), NULL, dest, &len);
+    // Precalculate size:
+    size_t len = 0;
+    if (text.tag == TEXT_ASCII) {
+        len = text.length;
+    } else {
+        iteration_state_t state = {0, 0};
+        for (int64_t i = 0; i < text.length; i++) {
+            int32_t grapheme = _next_grapheme(text, &state, i);
+            if (grapheme < 0)
+                len += synthetic_graphemes[-grapheme-1].num_codepoints;
+            else
+                len += 1;
+        }
+    }
+    assert(length);
+    *length = len;
+
+    // Copy over codepoints one grapheme cluster at a time:
+    uint32_t *ret = GC_MALLOC_ATOMIC(sizeof(uint32_t[len]));
+    uint32_t *dest = ret;
+    iteration_state_t state = {0, 0};
+    for (int64_t i = 0; i < text.length; i++) {
+        int32_t grapheme = _next_grapheme(text, &state, i);
+        if (grapheme < 0) {
+            const uint32_t *codepoints = synthetic_graphemes[-grapheme-1].codepoints;
+            size_t num_codepoints = synthetic_graphemes[-grapheme-1].num_codepoints;
+            for (size_t j = 0; j < num_codepoints; j++)
+                *(dest++) = codepoints[j];
+        } else {
+            *(dest++) = (uint32_t)grapheme;
+        }
+    }
+    return ret;
 }
 
-public CORD Text$title(CORD str)
+#include "siphash.c"
+
+public uint64_t Text$hash(Text_t *text)
 {
-    if (!str) return str;
-    size_t len = strlen(str) + 1;
-    uint8_t *dest = GC_MALLOC_ATOMIC(len);
-    dest[len-1] = 0;
-    return (CORD)u8_totitle((const uint8_t*)str, len-1, uc_locale_language(), NULL, dest, &len);
+    if (text->hash != 0) return text->hash;
+    siphash sh;
+    siphashinit(&sh, sizeof(int32_t[text->length]), (uint64_t*)TOMO_HASH_KEY);
+
+    union {
+        int32_t chunks[2];
+        uint64_t whole;
+    } tmp;
+    switch (text->tag) {
+    case TEXT_ASCII: case TEXT_SHORT_ASCII: {
+        const char *bytes = text->tag == TEXT_ASCII ? text->ascii : text->short_ascii;
+        for (int64_t i = 0; i + 1 < text->length; i++) {
+            tmp.chunks[0] = (int32_t)bytes[i];
+            tmp.chunks[1] = (int32_t)bytes[i+1];
+            siphashadd64bits(&sh, tmp.whole);
+        }
+        int32_t last = text->length & 0x1 ? (int32_t)bytes[text->length-1] : 0; // Odd number of graphemes
+        text->hash = siphashfinish_last_part(&sh, (uint64_t)last);
+        break;
+    }
+    case TEXT_GRAPHEMES: {
+        const int32_t *graphemes = text->graphemes;
+        for (int64_t i = 0; i + 1 < text->length; i++) {
+            tmp.chunks[0] = graphemes[i];
+            tmp.chunks[1] = graphemes[i];
+            siphashadd64bits(&sh, tmp.whole);
+        }
+        int32_t last = text->length & 0x1 ? graphemes[text->length-1] : 0; // Odd number of graphemes
+        text->hash = siphashfinish_last_part(&sh, (uint64_t)last);
+        break;
+    }
+    case TEXT_SHORT_GRAPHEMES: {
+        tmp.chunks[0] = text->short_graphemes[0];
+        if (text->length > 1)
+            tmp.chunks[1] = text->short_graphemes[1];
+        text->hash = siphashfinish_last_part(&sh, (uint64_t)tmp.whole);
+        break;
+    }
+    case TEXT_SUBTEXT: {
+        int32_t leftover = 0;
+        for (int64_t sub_i = 0, to_hash = text->length; to_hash > 0; ) {
+            Text_t subtext = text->subtexts[sub_i];
+            if (subtext.tag == TEXT_ASCII || subtext.tag == TEXT_SHORT_ASCII) {
+                const char *bytes = subtext.tag == TEXT_ASCII ? subtext.ascii : subtext.short_ascii;
+                int64_t grapheme = 0;
+                if (leftover) {
+                    tmp.chunks[0] = leftover;
+                    tmp.chunks[1] = (int32_t)bytes[0];
+                    siphashadd64bits(&sh, tmp.whole);
+                    grapheme += 1;
+                }
+                for (; grapheme + 1 < subtext.length; grapheme += 2) {
+                    tmp.chunks[0] = (int32_t)bytes[grapheme];
+                    tmp.chunks[1] = (int32_t)bytes[grapheme+1];
+                    siphashadd64bits(&sh, tmp.whole);
+                }
+                leftover = grapheme < subtext.length ? (int32_t)bytes[grapheme] : 0;
+            } else if (subtext.tag == TEXT_SHORT_GRAPHEMES) {
+                if (leftover) {
+                    tmp.chunks[0] = leftover;
+                    tmp.chunks[1] = subtext.short_graphemes[0];
+                    siphashadd64bits(&sh, tmp.whole);
+                    leftover = subtext.length > 1 ? subtext.short_graphemes[1] : 0;
+                } else if (subtext.length == 1) {
+                    leftover = subtext.short_graphemes[0];
+                } else {
+                    tmp.chunks[0] = subtext.short_graphemes[0];
+                    tmp.chunks[1] = subtext.short_graphemes[1];
+                    siphashadd64bits(&sh, tmp.whole);
+                }
+            } else if (subtext.tag == TEXT_GRAPHEMES) {
+                int32_t *graphemes = subtext.graphemes;
+                int64_t grapheme = 0;
+                if (leftover) {
+                    tmp.chunks[0] = leftover;
+                    tmp.chunks[1] = graphemes[0];
+                    siphashadd64bits(&sh, tmp.whole);
+                    grapheme += 1;
+                }
+                for (; grapheme + 1 < subtext.length; grapheme += 2) {
+                    tmp.chunks[0] = graphemes[grapheme];
+                    tmp.chunks[1] = graphemes[grapheme+1];
+                    siphashadd64bits(&sh, tmp.whole);
+                }
+                leftover = grapheme < subtext.length ? graphemes[grapheme] : 0;
+            }
+        
+            to_hash -= text->subtexts[sub_i].length;
+
+            ++sub_i;
+        }
+
+        text->hash = siphashfinish_last_part(&sh, leftover);
+        break;
+    }
+    default: errx(1, "Invalid text");
+    }
+
+    if (text->hash == 0)
+        text->hash = 1;
+
+    return text->hash;
 }
 
-public bool Text$has(CORD str, CORD target, Where_t where)
+int32_t _next_grapheme(Text_t text, iteration_state_t *state, int64_t index)
 {
-    if (!target) return true;
-    if (!str) return false;
+    switch (text.tag) {
+    case TEXT_ASCII: return index < text.length ? (int32_t)text.ascii[index] : 0;
+    case TEXT_SHORT_ASCII: return index < text.length ? (int32_t)text.short_ascii[index] : 0;
+    case TEXT_GRAPHEMES: return index < text.length ? text.graphemes[index] : 0;
+    case TEXT_SHORT_GRAPHEMES: return index < text.length ? text.short_graphemes[index] : 0;
+    case TEXT_SUBTEXT: {
+        iteration_state_t backup_state = {0, 0};
+        if (!state) state = &backup_state;
 
-    uint8_t str_buf[128] = {0}; size_t str_norm_len = sizeof(str_buf);
-    uint8_t *str_normalized = _normalize(str, str_buf, &str_norm_len);
+        if (index < 0 || index >= text.length)
+            return 0;
 
-    uint8_t target_buf[128] = {0}; size_t target_norm_len = sizeof(target_buf);
-    uint8_t *target_normalized = _normalize(target, target_buf, &target_norm_len);
+        while (index < state->sum_of_previous_subtexts && state->subtext > 0) {
+            state->sum_of_previous_subtexts -= text.subtexts[state->subtext].length;
+            state->subtext -= 1;
+        }
+        for (;;) {
+            if (index < state->sum_of_previous_subtexts + text.subtexts[state->subtext].length)
+                return _next_grapheme(text.subtexts[state->subtext], NULL, index);
+            state->sum_of_previous_subtexts += text.subtexts[state->subtext].length;
+            state->subtext += 1;
+        }
+        return 0;
+    }
+    default: errx(1, "Invalid text");
+    }
+    return 0;
+}
 
-    if (target_norm_len > str_norm_len) return false;
+int32_t get_grapheme(Text_t text, int64_t index)
+{
+    iteration_state_t state = {0, 0};
+    return _next_grapheme(text, &state, index);
+}
 
-    bool ret;
-    if (where.tag == $tag$Where$Start) {
-        ret = (u8_strncmp(str_normalized, target_normalized, target_norm_len-1) == 0);
-    } else if (where.tag == $tag$Where$End) {
-        ret = (u8_strcmp(str_normalized + str_norm_len - target_norm_len, target_normalized) == 0);
-    } else {
-        assert(where.tag == $tag$Where$Anywhere);
-        ret = (u8_strstr(str_normalized, target_normalized) != NULL);
+int32_t Text$compare(const Text_t *a, const Text_t *b)
+{
+    int64_t len = MAX(a->length, b->length);
+    iteration_state_t a_state = {0, 0}, b_state = {0, 0};
+    for (int64_t i = 0; i < len; i++) {
+        int32_t ai = _next_grapheme(*a, &a_state, i);
+        int32_t bi = _next_grapheme(*b, &b_state, i);
+        if (ai == bi) continue;
+        int32_t cmp;
+        if (ai > 0 && bi > 0) {
+            cmp = u32_cmp((uint32_t*)&ai, (uint32_t*)&bi, 1);
+        } else if (ai > 0) {
+            cmp = u32_cmp2(
+                (uint32_t*)&ai, 1,
+                synthetic_graphemes[-bi-1].codepoints,
+                synthetic_graphemes[-bi-1].num_codepoints);
+        } else if (bi > 0) {
+            cmp = u32_cmp2(
+                synthetic_graphemes[-ai-1].codepoints,
+                synthetic_graphemes[-ai-1].num_codepoints,
+                (uint32_t*)&bi, 1);
+        } else {
+            cmp = u32_cmp2(
+                synthetic_graphemes[-ai-1].codepoints,
+                synthetic_graphemes[-ai-1].num_codepoints,
+                synthetic_graphemes[-bi-1].codepoints,
+                synthetic_graphemes[-bi-1].num_codepoints);
+        }
+        if (cmp != 0) return cmp;
     }
+    return 0;
+}
 
-    if (str_normalized != str_buf) free(str_normalized);
-    if (target_normalized != target_buf) free(target_normalized);
-    return ret;
+public bool Text$equal(const Text_t *a, const Text_t *b)
+{
+    if (a->length != b->length || (a->hash != 0 && b->hash != 0 && a->hash != b->hash))
+        return false;
+    int64_t len = a->length;
+    iteration_state_t a_state = {0, 0}, b_state = {0, 0};
+    for (int64_t i = 0; i < len; i++) {
+        int32_t ai = _next_grapheme(*a, &a_state, i);
+        int32_t bi = _next_grapheme(*b, &b_state, i);
+        if (ai != bi) return false;
+    }
+    return true;
 }
 
-public CORD Text$without(CORD str, CORD target, Where_t where)
+public bool Text$equal_ignoring_case(Text_t a, Text_t b)
 {
-    if (!str || !target) return str;
+    if (a.length != b.length)
+        return false;
+    int64_t len = a.length;
+    iteration_state_t a_state = {0, 0}, b_state = {0, 0};
+    const char *language = uc_locale_language();
+    for (int64_t i = 0; i < len; i++) {
+        int32_t ai = _next_grapheme(a, &a_state, i);
+        int32_t bi = _next_grapheme(b, &b_state, i);
+        if (ai != bi) {
+            const uint32_t *a_codepoints = ai >= 0 ? (uint32_t*)&ai : synthetic_graphemes[-ai-1].codepoints;
+            size_t a_len = ai >= 0 ? 1 : synthetic_graphemes[-ai-1].num_codepoints;
 
-    size_t target_len = CORD_len(target);
-    size_t str_len = CORD_len(str);
-    if (where.tag == $tag$Where$Start) {
-        if (CORD_ncmp(str, 0, target, 0, target_len) == 0)
-            return CORD_substr(str, target_len, str_len - target_len);
-        return str;
-    } else if (where.tag == $tag$Where$End) {
-        if (CORD_ncmp(str, str_len-target_len, target, 0, target_len) == 0)
-            return CORD_substr(str, 0, str_len - target_len);
-        return str;
-    } else {
-        CORD ret = CORD_EMPTY;
-        size_t i = 0;
-        for (;;) {
-            size_t match = CORD_str(str, i, target);
-            if (match == CORD_NOT_FOUND) {
-                if (i == 0) return str; // No matches!
-                ret = CORD_cat(ret, CORD_substr(str, i, str_len));
-                break;
-            }
-            ret = CORD_cat(ret, CORD_substr(str, i, (match-i)));
-            i = match + target_len;
+            const uint32_t *b_codepoints = bi >= 0 ? (uint32_t*)&bi : synthetic_graphemes[-bi-1].codepoints;
+            size_t b_len = bi >= 0 ? 1 : synthetic_graphemes[-bi-1].num_codepoints;
+
+            int cmp;
+            (void)u32_casecmp(a_codepoints, a_len, b_codepoints, b_len, language, UNINORM_NFC, &cmp);
+            if (cmp != 0)
+                return false;
         }
-        return ret;
     }
+    return true;
 }
 
-public CORD Text$trimmed(CORD str, CORD skip, Where_t where)
+public Text_t Text$upper(Text_t text)
 {
-    if (!str || !skip) return str;
-    const uint8_t *ustr = (const uint8_t*)CORD_to_const_char_star(str);
-    const uint8_t *uskip = (const uint8_t*)CORD_to_const_char_star(skip);
-    // TODO: implement proper reverse iteration with u8_prev()
-    if (where.tag == $tag$Where$Start) {
-        size_t span = u8_strspn(ustr, uskip);
-        return (CORD)ustr + span;
-    } else if (where.tag == $tag$Where$End) {
-        size_t len = u8_strlen(ustr);
-        const uint8_t *back = ustr + len;
-        size_t back_span = 0;
-        while (back - back_span > ustr && u8_strspn(back-back_span-1, uskip) > back_span)
-            ++back_span;
-        return CORD_substr((CORD)ustr, 0, len - back_span);
-    } else {
-        size_t span = u8_strspn(ustr, uskip);
-        size_t len = u8_strlen(ustr);
-        const uint8_t *back = ustr + len;
-        size_t back_span = 0;
-        while (back - back_span > ustr + span && u8_strspn(back-back_span-1, uskip) > back_span)
-            ++back_span;
-        return CORD_substr((CORD)(ustr + span), 0, len - span - back_span);
+    size_t length;
+    uint32_t *codepoints = text_to_u32(text, &length);
+    const char *language = uc_locale_language();
+    uint32_t buf[128]; 
+    size_t out_len;
+    uint32_t *upper = u32_toupper(codepoints, length, language, UNINORM_NFC, buf, &out_len);
+    Text_t ret = text_from_u32(upper, out_len, false);
+    if (upper != buf) free(upper);
+    return ret;
+}
+
+public Text_t Text$lower(Text_t text)
+{
+    size_t length;
+    uint32_t *codepoints = text_to_u32(text, &length);
+    const char *language = uc_locale_language();
+    uint32_t buf[128]; 
+    size_t out_len;
+    uint32_t *lower = u32_tolower(codepoints, length, language, UNINORM_NFC, buf, &out_len);
+    Text_t ret = text_from_u32(lower, out_len, false);
+    if (lower != codepoints) free(lower);
+    return ret;
+}
+
+public Text_t Text$title(Text_t text)
+{
+    size_t length;
+    uint32_t *codepoints = text_to_u32(text, &length);
+    const char *language = uc_locale_language();
+    uint32_t buf[128]; 
+    size_t out_len;
+    uint32_t *title = u32_totitle(codepoints, length, language, UNINORM_NFC, buf, &out_len);
+    Text_t ret = text_from_u32(title, out_len, false);
+    if (title != codepoints) free(title);
+    return ret;
+}
+
+static inline void skip_whitespace(Text_t text, int64_t *i)
+{
+    iteration_state_t state = {0, 0};
+    while (*i < text.length) {
+        int32_t grapheme = _next_grapheme(text, &state, *i);
+        if (grapheme > 0 && !uc_is_property_white_space(grapheme))
+            return;
+        *i += 1;
     }
 }
 
-public find_result_t Text$find(CORD str, CORD pat)
+static inline bool match_grapheme(Text_t text, int64_t *i, int32_t grapheme)
 {
-    if (!pat) return (find_result_t){.status=FIND_SUCCESS, .index=1};
-    size_t pos = CORD_str(str, 0, pat);
-    return (pos == CORD_NOT_FOUND) ? (find_result_t){.status=FIND_FAILURE} : (find_result_t){.status=FIND_SUCCESS, .index=(int32_t)pos};
+    if (*i < text.length && get_grapheme(text, *i) == grapheme) {
+        *i += 1;
+        return true;
+    }
+    return false;
 }
 
-public CORD Text$replace(CORD text, CORD pat, CORD replacement, Int_t int_limit)
+static inline bool match_str(Text_t text, int64_t *i, const char *str)
 {
-    if (!text || !pat) return text;
-    CORD ret = CORD_EMPTY;
-    size_t pos = 0, pat_len = CORD_len(pat);
-    int64_t limit = Int_to_Int64(int_limit, false);
-    for (size_t found; limit != 0 && (found=CORD_str(text, pos, pat)) != CORD_NOT_FOUND; --limit) {
-        ret = CORD_all(ret, CORD_substr(text, pos, found - pos), replacement);
-        pos = found + pat_len;
+    iteration_state_t state = {0, 0};
+    int64_t matched = 0;
+    while (matched[str]) {
+        if (*i + matched >= text.length || _next_grapheme(text, &state, *i + matched) != str[matched])
+            return false;
+        matched += 1;
     }
-    size_t str_len = CORD_len(text);
-    return CORD_cat(ret, CORD_substr(text, pos, str_len - pos));
+    *i += matched;
+    return true;
 }
 
-public array_t Text$split(CORD str, CORD split)
+static inline bool match_property(Text_t text, int64_t *i, uc_property_t prop)
 {
-    if (!str) return (array_t){.data=GC_MALLOC(sizeof(CORD)), .atomic=1, .length=1, .stride=sizeof(CORD)};
-    array_t strings = {.stride=sizeof(CORD), .atomic=1};
+    if (*i >= text.length) return false;
+    int32_t grapheme = get_grapheme(text, *i);
+    if (grapheme < 0) // TODO: check every codepoint in the cluster?
+        grapheme = synthetic_graphemes[-grapheme-1].codepoints[0];
 
-    const uint8_t *ustr = (uint8_t*)CORD_to_const_char_star(str);
-    const uint8_t *usplit = (uint8_t*)CORD_to_const_char_star(split);
-    for (int64_t i = 0; ; ) {
-        size_t non_split = u8_strcspn(ustr + i, usplit);
-        CORD chunk = CORD_substr((CORD)ustr, i, non_split);
-        Array$insert(&strings, &chunk, I(0), sizeof(CORD));
+    if (uc_is_property(grapheme, prop)) {
+        *i += 1;
+        return true;
+    }
+    return false;
+}
 
-        i += non_split;
+static int64_t parse_int(Text_t text, int64_t *i)
+{
+    iteration_state_t state = {0, 0};
+    int64_t value = 0;
+    for (;; *i += 1) {
+        int32_t grapheme = _next_grapheme(text, &state, *i);
+        if (grapheme < 0)
+            grapheme = synthetic_graphemes[-grapheme-1].codepoints[0];
+        int digit = uc_digit_value(grapheme);
+        if (digit < 0) break;
+        if (value >= INT64_MAX/10) break;
+        value = 10*value + digit;
+    }
+    return value;
+}
 
-        size_t split_span = u8_strspn(ustr + i, usplit);
-        if (split_span == 0) break;
-        i += split_span;
+const char *get_property_name(Text_t text, int64_t *i)
+{
+    skip_whitespace(text, i);
+    char *name = GC_MALLOC_ATOMIC(UNINAME_MAX);
+    char *dest = name;
+    iteration_state_t state = {0, 0};
+    while (*i < text.length) {
+        int32_t grapheme = _next_grapheme(text, &state, *i);
+        if (!(grapheme & ~0xFF) && (isalnum(grapheme) || grapheme == ' ' || grapheme == '_' || grapheme == '-')) {
+            *dest = (char)grapheme;
+            ++dest;
+            if (dest >= name + UNINAME_MAX - 1)
+                break;
+        } else if (dest == name && grapheme >= 0 && grapheme != ']') {
+            // Literal character escape: [..[] --> "LEFT SQUARE BRACKET"
+            name = unicode_character_name(grapheme, name);
+            *i += 1;
+            return name;
+        } else {
+            break;
+        }
+        *i += 1;
     }
-    return strings;
+    if (dest == name) return NULL;
+    *dest = '\0';
+    return name;
 }
 
-public CORD Text$join(CORD glue, array_t pieces)
+#define EAT1(state, cond) ({\
+        int32_t grapheme = _next_grapheme(text, state, text_index); \
+        bool success = (cond); \
+        if (success) text_index += 1; \
+        success; })
+
+#define EAT_MANY(state, cond) ({ int64_t n = 0; while (EAT1(state, cond)) { n += 1; } n; })
+
+int64_t match_email(Text_t text, int64_t text_index)
 {
-    if (pieces.length == 0) return CORD_EMPTY;
+    // email = local "@" domain
+    // local = 1-64 ([a-zA-Z0-9!#$%&‘*+–/=?^_`.{|}~] | non-ascii)
+    // domain = dns-label ("." dns-label)*
+    // dns-label = 1-63 ([a-zA-Z0-9-] | non-ascii)
 
-    CORD ret = CORD_EMPTY;
-    for (int64_t i = 0; i < pieces.length; i++) {
-        if (i > 0) ret = CORD_cat(ret, glue);
-        ret = CORD_cat(ret, *(CORD*)((void*)pieces.data + i*pieces.stride));
+    iteration_state_t state = {0, 0};
+    if (text_index > 0) {
+        int32_t prev_codepoint = _next_grapheme(text, &state, text_index - 1);
+        if (prev_codepoint < 0)
+            prev_codepoint = synthetic_graphemes[-prev_codepoint-1].codepoints[0];
+        if (uc_is_property_alphabetic(prev_codepoint))
+            return -1;
     }
-    return ret;
+
+    int64_t start_index = text_index;
+
+    // Local part:
+    int64_t local_len = 0;
+    static const char *allowed_local = "!#$%&‘*+–/=?^_`.{|}~";
+    while (EAT1(&state, (grapheme & ~0x7F) || isalnum((char)grapheme) || strchr(allowed_local, (char)grapheme))) {
+        local_len += 1;
+        if (local_len > 64) return -1;
+    }
+    
+    if (!EAT1(&state, grapheme == '@'))
+        return -1;
+
+    // Host
+    int64_t host_len = 0;
+    do {
+        int64_t label_len = 0;
+        while (EAT1(&state, (grapheme & ~0x7F) || isalnum((char)grapheme) || grapheme == '-')) {
+            label_len += 1;
+            if (label_len > 63) return -1;
+        }
+
+        if (label_len == 0)
+            return -1;
+
+        host_len += label_len;
+        if (host_len > 255)
+            return -1;
+        host_len += 1;
+    } while (EAT1(&state, grapheme == '.'));
+
+    return text_index - start_index;
 }
 
-public array_t Text$clusters(CORD text)
+int64_t match_ipv6(Text_t text, int64_t text_index)
 {
-    array_t clusters = {.atomic=1};
-    uint8_t buf[128] = {0}; size_t norm_len = sizeof(buf);
-    uint8_t *normalized = _normalize(text, buf, &norm_len);
-    const uint8_t *end = normalized + strlen((char*)normalized);
-    for (const uint8_t *pos = normalized; pos != end; ) {
-        const uint8_t *next = u8_grapheme_next(pos, end);
-        size_t len = (size_t)(next - pos);
-        char cluster_buf[len+1];
-        strlcpy(cluster_buf, (char*)pos, len+1);
-        CORD cluster = CORD_from_char_star(cluster_buf);
-        Array$insert(&clusters, &cluster, I(0), sizeof(CORD));
-        pos = next;
+    iteration_state_t state = {0, 0};
+    if (text_index > 0) {
+        int32_t prev_codepoint = _next_grapheme(text, &state, text_index - 1);
+        if ((prev_codepoint & ~0x7F) && (isxdigit(prev_codepoint) || prev_codepoint == ':'))
+            return -1;
     }
+    int64_t start_index = text_index;
+    const int NUM_CLUSTERS = 8;
+    bool double_colon_used = false;
+    for (int cluster = 0; cluster < NUM_CLUSTERS; cluster++) {
+        for (int digits = 0; digits < 4; digits++) {
+            if (!EAT1(&state, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
+                break;
+        }
+        if (EAT1(&state, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
+            return -1; // Too many digits
+
+        if (cluster == NUM_CLUSTERS-1) {
+            break;
+        } else if (!EAT1(&state, grapheme == ':')) {
+            if (double_colon_used)
+                break;
+            return -1;
+        }
 
-    if (normalized != buf) free(normalized);
-    return clusters;
+        if (EAT1(&state, grapheme == ':')) {
+            if (double_colon_used)
+                return -1;
+            double_colon_used = true;
+        }
+    }
+    return text_index - start_index;
 }
 
-public array_t Text$codepoints(CORD text)
+static int64_t match_ipv4(Text_t text, int64_t text_index)
 {
-    uint8_t norm_buf[128] = {0}; size_t norm_len = sizeof(norm_buf);
-    uint8_t *normalized = _normalize(text, norm_buf, &norm_len);
+    iteration_state_t state = {0, 0};
+    if (text_index > 0) {
+        int32_t prev_codepoint = _next_grapheme(text, &state, text_index - 1);
+        if ((prev_codepoint & ~0x7F) && (isdigit(prev_codepoint) || prev_codepoint == '.'))
+            return -1;
+    }
+    int64_t start_index = text_index;
 
-    uint32_t codepoint_buf[128] = {0};
-    size_t codepoint_len = sizeof(codepoint_buf);
-    uint32_t *codepoints = u8_to_u32(normalized, norm_len-1, codepoint_buf, &codepoint_len);
-    array_t ret = {
-        .length=codepoint_len,
-        .data=memcpy(GC_MALLOC_ATOMIC(sizeof(int32_t[codepoint_len])), codepoints, sizeof(int32_t[codepoint_len])),
-        .stride=sizeof(int32_t),
-        .atomic=1,
-    };
+    const int NUM_CLUSTERS = 4;
+    for (int cluster = 0; cluster < NUM_CLUSTERS; cluster++) {
+        for (int digits = 0; digits < 3; digits++) {
+            if (!EAT1(&state, ~(grapheme & ~0x7F) && isdigit((char)grapheme))) {
+                if (digits == 0) return -1;
+                break;
+            }
+        }
 
-    if (normalized != norm_buf) free(normalized);
-    if (codepoints != codepoint_buf) free(codepoints);
-    return ret;
+        if (EAT1(&state, ~(grapheme & ~0x7F) && isdigit((char)grapheme)))
+            return -1; // Too many digits
+
+        if (cluster == NUM_CLUSTERS-1)
+            break;
+        else if (!EAT1(&state, grapheme == '.'))
+            return -1;
+    }
+    return (text_index - start_index);
 }
 
-public array_t Text$bytes(CORD text)
+int64_t match_uri(Text_t text, int64_t text_index)
 {
-    uint8_t norm_buf[128] = {0}; size_t norm_len = sizeof(norm_buf);
-    uint8_t *normalized = _normalize(text, norm_buf, &norm_len);
+    // URI = scheme ":" ["//" authority] path ["?" query] ["#" fragment]
+    // scheme = [a-zA-Z] [a-zA-Z0-9+.-]
+    // authority = [userinfo "@"] host [":" port]
 
-    --norm_len; // NUL byte
-    array_t ret = {
-        .length=norm_len,
-        .data=memcpy(GC_MALLOC_ATOMIC(sizeof(uint8_t[norm_len])), normalized, sizeof(uint8_t[norm_len])),
-        .stride=sizeof(uint8_t),
-        .atomic=1,
-    };
+    iteration_state_t state = {0, 0};
+    if (text_index > 0) {
+        int32_t prev_codepoint = _next_grapheme(text, &state, text_index - 1);
+        if (prev_codepoint < 0)
+            prev_codepoint = synthetic_graphemes[-prev_codepoint-1].codepoints[0];
+        if (uc_is_property_alphabetic(prev_codepoint))
+            return -1;
+    }
 
-    if (normalized != norm_buf) free(normalized);
-    return ret;
+    int64_t start_index = text_index;
+
+    // Scheme:
+    if (!EAT1(&state, isalpha(grapheme)))
+        return -1;
+
+    EAT_MANY(&state, !(grapheme & ~0x7F) && (isalnum(grapheme) || grapheme == '+' || grapheme == '.' || grapheme == '-'));
+
+    if (text_index == start_index)
+        return -1;
+
+    if (!match_grapheme(text, &text_index, ':'))
+        return -1;
+
+    // Authority:
+    if (match_str(text, &text_index, "//")) {
+        int64_t authority_start = text_index;
+        // Username or host:
+        static const char *forbidden = "#?:@ \t\r\n<>[]{}\\^|\"`/";
+        if (EAT_MANY(&state, (grapheme & ~0x7F) || !strchr(forbidden, (char)grapheme)) == 0)
+            return -1;
+
+        if (EAT1(&state, grapheme == '@')) {
+            // Found a username, now get a host:
+            if (EAT_MANY(&state, (grapheme & ~0x7F) || !strchr(forbidden, (char)grapheme)) == 0)
+                return -1;
+        } else {
+            int64_t ip = authority_start;
+            int64_t ipv4_len = match_ipv4(text, ip);
+            if (ipv4_len > 0) {
+                ip += ipv4_len;
+            } else if (match_grapheme(text, &ip, '[')) {
+                ip += match_ipv6(text, ip);
+                if (ip > authority_start + 1 && match_grapheme(text, &ip, ']'))
+                    text_index = ip;
+            }
+        }
+
+        // Port:
+        if (EAT1(&state, grapheme == ':')) {
+            if (EAT_MANY(&state, !(grapheme & ~0x7F) && isdigit(grapheme)) == 0)
+                return -1;
+        }
+        if (!EAT1(&state, grapheme == '/'))
+            return (text_index - start_index); // No path
+    } else {
+        // Optional path root:
+        EAT1(&state, grapheme == '/');
+    }
+
+    // Path:
+    static const char *non_path = " \"#?<>[]{}\\^`|";
+    EAT_MANY(&state, (grapheme & ~0x7F) || !strchr(non_path, (char)grapheme));
+
+    if (EAT1(&state, grapheme == '?')) { // Query
+        static const char *non_query = " \"#<>[]{}\\^`|";
+        EAT_MANY(&state, (grapheme & ~0x7F) || !strchr(non_query, (char)grapheme));
+    }
+    
+    if (EAT1(&state, grapheme == '#')) { // Fragment
+        static const char *non_fragment = " \"#<>[]{}\\^`|";
+        EAT_MANY(&state, (grapheme & ~0x7F) || !strchr(non_fragment, (char)grapheme));
+    }
+    return text_index - start_index;
 }
 
-public Int_t Text$num_clusters(CORD text)
+int64_t match(Text_t text, Text_t pattern, int64_t text_index, int64_t pattern_index)
 {
-    const uint8_t *ustr = (const uint8_t*)CORD_to_const_char_star(text);
-    int64_t num_clusters = 0;
-    const uint8_t *end = ustr + u8_strlen(ustr);
-    for (const uint8_t *pos = ustr; pos != end; ) {
-        const uint8_t *next = u8_grapheme_next(pos, end);
-        ++num_clusters;
-        pos = next;
+    if (pattern_index >= pattern.length) return 0;
+    int64_t start_index = text_index;
+    iteration_state_t pattern_state = {0, 0}, text_state = {0, 0};
+    while (pattern_index < pattern.length) {
+        int64_t old_pat_index = pattern_index;
+        if (match_str(pattern, &pattern_index, "[..")) {
+            skip_whitespace(pattern, &pattern_index);
+            int64_t min, max;
+            if (uc_is_digit(_next_grapheme(pattern, &pattern_state, pattern_index))) {
+                min = parse_int(pattern, &pattern_index);
+                skip_whitespace(pattern, &pattern_index);
+                if (match_grapheme(pattern, &pattern_index, '+')) {
+                    max = INT64_MAX;
+                } else if (match_grapheme(pattern, &pattern_index, '-')) {
+                    max = parse_int(pattern, &pattern_index);
+                } else {
+                    max = min;
+                }
+            } else {
+                min = 1, max = INT64_MAX;
+            }
+
+            skip_whitespace(pattern, &pattern_index);
+            bool want_to_match = !match_grapheme(pattern, &pattern_index, '!');
+            const char *prop_name = get_property_name(pattern, &pattern_index);
+
+            skip_whitespace(pattern, &pattern_index);
+            if (!match_grapheme(pattern, &pattern_index, ']'))
+                errx(1, "Missing closing ']' in pattern: \"%T\"", &pattern);
+
+            int64_t before_group = text_index;
+            bool any = false;
+            uc_property_t prop;
+            int32_t specific_codepoint = UNINAME_INVALID;
+
+#define FAIL() ({ if (min < 1) { text_index = before_group; continue; } else { return -1; } })
+            if (prop_name) {
+                switch (tolower(prop_name[0])) {
+                case 'd':
+                    if (strcasecmp(prop_name, "digit") == 0) {
+                        prop = UC_PROPERTY_DECIMAL_DIGIT;
+                        goto got_prop;
+                    }
+                    break;
+                case 'e':
+                    if (strcasecmp(prop_name, "end") == 0) {
+                        if (text_index != text.length)
+                            FAIL();
+                        continue;
+                    } else if (prop_name && strcasecmp(prop_name, "email") == 0) {
+                        int64_t len = match_email(text, text_index);
+                        if (len < 0)
+                            FAIL();
+                        text_index += len;
+                        continue;
+                    } else if (prop_name && strcasecmp(prop_name, "emoji") == 0) {
+                        prop = UC_PROPERTY_EMOJI;
+                        goto got_prop;
+                    }
+                    break;
+                case 'i':
+                    if (prop_name && strcasecmp(prop_name, "id") == 0) {
+                        if (!EAT1(&text_state, uc_is_property(grapheme, UC_PROPERTY_XID_START)))
+                            FAIL();
+                        EAT_MANY(&text_state, uc_is_property(grapheme, UC_PROPERTY_XID_CONTINUE));
+                        continue;
+                    } else if (prop_name && strcasecmp(prop_name, "ipv4") == 0) {
+                        int64_t len = match_ipv4(text, text_index);
+                        if (len < 0)
+                            FAIL();
+                        text_index += len;
+                        continue;
+                    } else if (prop_name && strcasecmp(prop_name, "ipv6") == 0) {
+                        int64_t len = match_ipv6(text, text_index);
+                        if (len < 0)
+                            FAIL();
+                        text_index += len;
+                        continue;
+                    } else if (prop_name && strcasecmp(prop_name, "ip") == 0) {
+                        int64_t len = match_ipv6(text, text_index);
+                        if (len < 0)
+                            len = match_ipv4(text, text_index);
+                        if (len < 0)
+                            FAIL();
+                        text_index += len;
+                        continue;
+                    }
+                    break;
+                case 's':
+                    if (strcasecmp(prop_name, "start") == 0) {
+                        if (text_index != 0) return -1;
+                        continue;
+                    }
+                    break;
+                case 'u':
+                    if (prop_name && strcasecmp(prop_name, "uri") == 0) {
+                        int64_t len = match_uri(text, text_index);
+                        if (len < 0)
+                            FAIL();
+                        text_index += len;
+                        continue;
+                    } else if (prop_name && strcasecmp(prop_name, "url") == 0) {
+                        int64_t lookahead = text_index;
+                        if (!(match_str(text, &lookahead, "https:")
+                            || match_str(text, &lookahead, "http:")
+                            || match_str(text, &lookahead, "ftp:")
+                            || match_str(text, &lookahead, "wss:")
+                            || match_str(text, &lookahead, "ws:")))
+                            FAIL();
+
+                        int64_t len = match_uri(text, text_index);
+                        if (len < 0)
+                            FAIL();
+                        text_index += len;
+                        continue;
+                    }
+                    break;
+                }
+
+                prop = uc_property_byname(prop_name);
+                if (!uc_property_is_valid(prop)) {
+                    specific_codepoint = unicode_name_character(prop_name);
+                    if (specific_codepoint == UNINAME_INVALID)
+                        errx(1, "Not a valid property or character name: %s", prop_name);
+                }
+            } else {
+                any = true;
+                prop = UC_PROPERTY_PRIVATE_USE;
+            }
+      got_prop:;
+
+            if (min == 0 && pattern_index < pattern.length) {
+                int64_t match_len = match(text, pattern, text_index, pattern_index);
+                if (match_len >= 0)
+                    return (text_index - start_index) + match_len;
+            }
+
+            for (int64_t count = 0; count < max; ) {
+                int32_t grapheme = _next_grapheme(text, &text_state, text_index);
+                if (grapheme < 0)
+                    grapheme = synthetic_graphemes[-grapheme-1].codepoints[0];
+
+                bool success;
+                if (any)
+                    success = true;
+                else if (specific_codepoint != UNINAME_INVALID)
+                    success = (grapheme == specific_codepoint);
+                else
+                    success = uc_is_property(grapheme, prop);
+
+                if (success != want_to_match) {
+                    if (count < min) return -1;
+                    else break;
+                }
+
+                text_index += 1;
+                count += 1;
+
+                if (count >= min) {
+                    if (pattern_index < pattern.length) {
+                        int64_t match_len = match(text, pattern, text_index, pattern_index);
+                        if (match_len >= 0) {
+                            return (text_index - start_index) + match_len;
+                        }
+                    } else if (text_index >= text.length) {
+                        break;
+                    }
+                }
+            }
+        } else if (uc_is_property(_next_grapheme(pattern, &pattern_state, pattern_index), UC_PROPERTY_QUOTATION_MARK)
+                   && (pattern_index += 1, match_grapheme(pattern, &pattern_index, '?'))) {
+            // Quotation: "?", '?', etc
+            int32_t open = _next_grapheme(pattern, &pattern_state, pattern_index-2);
+            if (!match_grapheme(text, &text_index, open)) return -1;
+            int32_t close = open;
+            uc_mirror_char(open, (uint32_t*)&close);
+            if (!match_grapheme(pattern, &pattern_index, close))
+                errx(1, "I expected a closing brace");
+            while (text_index < text.length) {
+                int32_t c = _next_grapheme(text, &text_state, text_index);
+                if (c == close)
+                    return (text_index - start_index);
+
+                if (c == '\\' && text_index < text.length) {
+                    text_index += 2;
+                } else {
+                    text_index += 1;
+                }
+            }
+            return -1;
+        } else if (uc_is_property(_next_grapheme(pattern, &pattern_state, pattern_index), UC_PROPERTY_PAIRED_PUNCTUATION)
+                   && (pattern_index += 1, match_grapheme(pattern, &pattern_index, '?'))) {
+            // Nested punctuation: (?), [?], etc
+            int32_t open = _next_grapheme(pattern, &pattern_state, pattern_index-2);
+            if (!match_grapheme(text, &text_index, open)) return -1;
+            int32_t close = open;
+            uc_mirror_char(open, (uint32_t*)&close);
+            if (!match_grapheme(pattern, &pattern_index, close))
+                errx(1, "I expected a closing brace");
+            int64_t depth = 1;
+            for (; depth > 0 && text_index < text.length; ++text_index) {
+                int32_t c = _next_grapheme(text, &text_state, text_index);
+                if (c == open)
+                    depth += 1;
+                else if (c == close)
+                    depth -= 1;
+            }
+            if (depth > 0) return -1;
+        } else {
+            // Plain character:
+            pattern_index = old_pat_index;
+            int32_t pat_grapheme = _next_grapheme(pattern, &pattern_state, pattern_index);
+
+            if (pattern_index == 0 && text_index > 0) {
+                int32_t pat_codepoint = pat_grapheme;
+                if (pat_codepoint < 0)
+                    pat_codepoint = synthetic_graphemes[-pat_codepoint-1].codepoints[0];
+
+                int32_t prev_codepoint = _next_grapheme(text, &text_state, text_index - 1);
+                if (prev_codepoint < 0)
+                    prev_codepoint = synthetic_graphemes[-prev_codepoint-1].codepoints[0];
+                if (uc_is_property_alphabetic(pat_codepoint) && uc_is_property_alphabetic(prev_codepoint))
+                    return -1;
+            }
+
+            int32_t text_grapheme = _next_grapheme(text, &text_state, text_index);
+            if (pat_grapheme != text_grapheme)
+                return -1;
+
+            pattern_index += 1;
+            text_index += 1;
+
+            if (pattern_index == pattern.length && text_index < text.length) {
+                int32_t pat_codepoint = pat_grapheme;
+                if (pat_codepoint < 0)
+                    pat_codepoint = synthetic_graphemes[-pat_codepoint-1].codepoints[0];
+
+                int32_t next_codepoint = _next_grapheme(text, &text_state, text_index);
+                if (next_codepoint < 0)
+                    next_codepoint = synthetic_graphemes[-next_codepoint-1].codepoints[0];
+                if (uc_is_property_alphabetic(pat_codepoint) && uc_is_property_alphabetic(next_codepoint))
+                    return -1;
+            }
+        }
+    }
+    if (text_index >= text.length && pattern_index < pattern.length)
+        return -1;
+    return (text_index - start_index);
+}
+
+#undef EAT1
+#undef EAT_MANY
+
+public Int_t Text$find(Text_t text, Text_t pattern, Int_t from_index, int64_t *match_length)
+{
+    int32_t first = get_grapheme(pattern, 0);
+    bool find_first = (first != '['
+                       && !uc_is_property(first, UC_PROPERTY_QUOTATION_MARK)
+                       && !uc_is_property(first, UC_PROPERTY_PAIRED_PUNCTUATION));
+
+    iteration_state_t text_state = {0, 0};
+    for (int64_t i = Int_to_Int64(from_index, false)-1; i < text.length; i++) {
+        // Optimization: quickly skip ahead to first char in pattern:
+        if (find_first) {
+            while (i < text.length && _next_grapheme(text, &text_state, i) != first)
+                ++i;
+        }
+
+        int64_t m = match(text, pattern, i, 0);
+        if (m >= 0) {
+            if (match_length)
+                *match_length = m;
+            return I(i+1);
+        }
     }
-    return I(num_clusters);
+    if (match_length)
+        *match_length = -1;
+    return I(0);
+}
+
+public int printf_text_size(const struct printf_info *info, size_t n, int argtypes[n], int sizes[n])
+{
+    if (n < 1) return -1;
+    (void)info;
+    argtypes[0] = PA_POINTER;
+    sizes[0] = sizeof(Text_t*);
+    return 1;
+}
+
+public int printf_text(FILE *stream, const struct printf_info *info, const void *const args[])
+{
+    (void)info;
+    Text_t t = **(Text_t**)args[0];
+    return Text$print(stream, t);
 }
 
-public Int_t Text$num_codepoints(CORD text)
+public Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info)
 {
-    uint8_t buf[128] = {0}; size_t norm_len = sizeof(buf);
-    uint8_t *normalized = _normalize(text, buf, &norm_len);
-    int64_t num_codepoints = u8_mbsnlen(normalized, norm_len-1);
-    if (normalized != buf) free(normalized);
-    return I(num_codepoints);
+    (void)info;
+    if (!text) return Text$from_str("Text");
+    return Text$quoted(*(Text_t*)text, colorize);
 }
 
-public Int_t Text$num_bytes(CORD text)
+public Text_t Text$quoted(Text_t text, bool colorize)
 {
-    uint8_t norm_buf[128] = {0}; size_t norm_len = sizeof(norm_buf);
-    uint8_t *normalized = _normalize(text, norm_buf, &norm_len);
-    --norm_len; // NUL byte
-    if (!normalized) errx(1, "Unicode normalization error!");
-    if (normalized != norm_buf) free(normalized);
-    return I(norm_len);
+    // TODO: optimize for ASCII and short strings
+    array_t graphemes = {.atomic=1};
+#define add_char(c) Array$insert_value(&graphemes, (uint32_t)c, I_small(0), sizeof(uint32_t))
+#define add_str(s) ({ for (char *_c = s; *_c; ++_c) Array$insert_value(&graphemes, (uint32_t)*_c, I_small(0), sizeof(uint32_t)); })
+    if (colorize)
+        add_str("\x1b[35m\"");
+    else
+        add_char('"');
+
+#define add_escaped(str) ({ if (colorize) add_str("\x1b[34;1m"); add_char('\\'); add_str(str); if (colorize) add_str("\x1b[0;35m"); })
+    iteration_state_t state = {0, 0};
+    for (int64_t i = 0; i < text.length; i++) {
+        int32_t g = _next_grapheme(text, &state, i);
+        switch (g) {
+        case '\a': add_escaped("a"); break;
+        case '\b': add_escaped("b"); break;
+        case '\x1b': add_escaped("e"); break;
+        case '\f': add_escaped("f"); break;
+        case '\n': add_escaped("n"); break;
+        case '\r': add_escaped("r"); break;
+        case '\t': add_escaped("t"); break;
+        case '\v': add_escaped("v"); break;
+        case '"': add_escaped("\""); break;
+        case '\\': add_escaped("\\"); break;
+        case '\x00' ... '\x06': case '\x0E' ... '\x1A':
+        case '\x1C' ... '\x1F': case '\x7F' ... '\x7F': {
+            if (colorize) add_str("\x1b[34;1m");
+            add_char('\\');
+            add_char('x');
+            char tmp[4];
+            sprintf(tmp, "%02X", g);
+            add_str(tmp);
+            if (colorize)
+                add_str("\x1b[0;35m");
+            break;
+        }
+        default: add_char(g); break;
+        }
+    }
+
+    if (colorize)
+        add_str("\"\x1b[m");
+    else
+        add_char('"');
+
+    return (Text_t){.length=graphemes.length, .tag=TEXT_GRAPHEMES, .graphemes=graphemes.data};
+#undef add_str
+#undef add_char
+#undef add_escaped
 }
 
-public array_t Text$character_names(CORD text)
+public Text_t Text$replace(Text_t text, Text_t pattern, Text_t replacement)
 {
-    array_t codepoints = Text$codepoints(text);
-    array_t ret = {.length=codepoints.length, .stride=sizeof(CORD), .data=GC_MALLOC(sizeof(CORD[codepoints.length]))};
-    for (int64_t i = 0; i < codepoints.length; i++) {
-        char buf[UNINAME_MAX];
-        unicode_character_name(*(ucs4_t*)(codepoints.data + codepoints.stride*i), buf);
-        *(CORD*)(ret.data + ret.stride*i) = CORD_from_char_star(buf);
+    Text_t ret = {.length=0};
+
+    Int_t i = I_small(0);
+    for (;;) {
+        int64_t len;
+        Int_t found = Text$find(text, pattern, i, &len);
+        if (found.small == I_small(0).small) break;
+        if (Int$compare(&found, &i, &$Text) > 0) {
+            ret = Text$concat(
+                ret,
+                Text$slice(text, i, Int$minus(found, I_small(1))),
+                replacement
+            );
+        } else {
+            ret = concat2(ret, replacement);
+        }
+    }
+    if (Int_to_Int64(i, false) <= text.length) {
+        ret = concat2(ret, Text$slice(text, i, Int64_to_Int(text.length)));
     }
     return ret;
 }
 
-public CORD Text$read_line(CORD prompt)
+public Text_t Text$format(const char *fmt, ...)
 {
-    char *line = readline(CORD_to_const_char_star(prompt));
-    if (!line) return CORD_EMPTY;
-    CORD ret = CORD_from_char_star(line);
-    free(line);
+    va_list args;
+    va_start(args, fmt);
+
+    char buf[8];
+    int len = vsnprintf(buf, sizeof(buf), fmt, args);
+    Text_t ret;
+    if (len <= (int)sizeof(buf)) {
+        ret = (Text_t){
+            .length=len,
+            .tag = TEXT_SHORT_ASCII,
+        };
+        for (int i = 0; i < len; i++)
+            ret.short_ascii[i] = buf[i];
+    } else {
+        char *str = GC_MALLOC_ATOMIC(len);
+        vsnprintf(str, len, fmt, args);
+        ret = Text$from_str(str);
+    }
+    va_end(args);
     return ret;
 }
 
 public const TypeInfo $Text = {
-    .size=sizeof(CORD),
-    .align=__alignof__(CORD),
+    .size=sizeof(Text_t),
+    .align=__alignof__(Text_t),
     .tag=TextInfo,
     .TextInfo={.lang="Text"},
 };
diff --git a/builtins/text.h b/builtins/text.h
index 017a2804..1e671695 100644
--- a/builtins/text.h
+++ b/builtins/text.h
@@ -1,10 +1,10 @@
 #pragma once
 
-// Type info and methods for Text datatype, which uses the Boehm "cord" library
-// and libunistr
+// Type info and methods for Text datatype, which uses a struct inspired by
+// Raku's string representation and libunistr
 
-#include <gc/cord.h>
 #include <stdbool.h>
+#include <printf.h>
 #include <stdint.h>
 
 #include "datatypes.h"
@@ -12,36 +12,57 @@
 #include "types.h"
 #include "where.h"
 
-#define Text_t CORD
-
 typedef struct {
     enum { FIND_FAILURE, FIND_SUCCESS } status;
     int32_t index;
 } find_result_t;
 
-CORD Text$as_text(const void *str, bool colorize, const TypeInfo *info);
-CORD Text$quoted(CORD str, bool colorize);
-int Text$compare(const CORD *x, const CORD *y);
-bool Text$equal(const CORD *x, const CORD *y);
-uint32_t Text$hash(const CORD *cord);
-CORD Text$upper(CORD str);
-CORD Text$lower(CORD str);
-CORD Text$title(CORD str);
-bool Text$has(CORD str, CORD target, Where_t where);
-CORD Text$without(CORD str, CORD target, Where_t where);
-CORD Text$trimmed(CORD str, CORD skip, Where_t where);
-find_result_t Text$find(CORD str, CORD pat);
-CORD Text$replace(CORD text, CORD pat, CORD replacement, Int_t limit);
-array_t Text$split(CORD str, CORD split);
-CORD Text$join(CORD glue, array_t pieces);
-array_t Text$clusters(CORD text);
-array_t Text$codepoints(CORD text);
-array_t Text$bytes(CORD text);
-Int_t Text$num_clusters(CORD text);
-Int_t Text$num_codepoints(CORD text);
-Int_t Text$num_bytes(CORD text);
-array_t Text$character_names(CORD text);
-CORD Text$read_line(CORD prompt);
+// CORD Text$as_text(const void *str, bool colorize, const TypeInfo *info);
+// CORD Text$quoted(CORD str, bool colorize);
+// // int Text$compare(const CORD *x, const CORD *y);
+// // bool Text$equal(const CORD *x, const CORD *y);
+// // uint32_t Text$hash(const CORD *cord);
+// // CORD Text$upper(CORD str);
+// // CORD Text$lower(CORD str);
+// // CORD Text$title(CORD str);
+// bool Text$has(CORD str, CORD target, Where_t where);
+// CORD Text$without(CORD str, CORD target, Where_t where);
+// CORD Text$trimmed(CORD str, CORD skip, Where_t where);
+// find_result_t Text$find(CORD str, CORD pat);
+// CORD Text$replace(CORD text, CORD pat, CORD replacement, Int_t limit);
+// array_t Text$split(CORD str, CORD split);
+// CORD Text$join(CORD glue, array_t pieces);
+// array_t Text$clusters(CORD text);
+// array_t Text$codepoints(CORD text);
+// array_t Text$bytes(CORD text);
+// Int_t Text$num_clusters(CORD text);
+// Int_t Text$num_codepoints(CORD text);
+// Int_t Text$num_bytes(CORD text);
+// array_t Text$character_names(CORD text);
+// CORD Text$read_line(CORD prompt);
+
+int printf_text(FILE *stream, const struct printf_info *info, const void *const args[]);
+int printf_text_size(const struct printf_info *info, size_t n, int argtypes[n], int sizes[n]);
+
+int Text$print(FILE *stream, Text_t t);
+void Text$visualize(Text_t t);
+Text_t Text$_concat(int n, Text_t items[n]);
+#define Text$concat(...) Text$_concat(sizeof((Text_t[]){__VA_ARGS__})/sizeof(Text_t), (Text_t[]){__VA_ARGS__})
+Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int);
+Text_t Text$from_str(const char *str);
+uint64_t Text$hash(Text_t *text);
+int32_t Text$compare(const Text_t *a, const Text_t *b);
+bool Text$equal(const Text_t *a, const Text_t *b);
+bool Text$equal_ignoring_case(Text_t a, Text_t b);
+Text_t Text$upper(Text_t text);
+Text_t Text$lower(Text_t text);
+Text_t Text$title(Text_t text);
+Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info);
+Text_t Text$quoted(Text_t str, bool colorize);
+Text_t Text$replace(Text_t str, Text_t pat, Text_t replacement);
+Int_t Text$find(Text_t text, Text_t pattern, Int_t i, int64_t *match_length);
+const char *Text$as_c_string(Text_t text);
+public Text_t Text$format(const char *fmt, ...);
 
 extern const TypeInfo $Text;
 
diff --git a/builtins/thread.c b/builtins/thread.c
index b9586917..793a0101 100644
--- a/builtins/thread.c
+++ b/builtins/thread.c
@@ -3,7 +3,6 @@
 #include <ctype.h>
 #include <err.h>
 #include <gc.h>
-#include <gc/cord.h>
 #include <math.h>
 #include <stdbool.h>
 #include <stdint.h>
@@ -14,6 +13,7 @@
 #include "array.h"
 #include "functions.h"
 #include "halfsiphash.h"
+#include "text.h"
 #include "types.h"
 #include "util.h"
 
@@ -39,13 +39,13 @@ public void Thread$detach(pthread_t *thread)
     pthread_detach(*thread);
 }
 
-CORD Thread$as_text(const pthread_t **thread, bool colorize, const TypeInfo *type)
+Text_t Thread$as_text(const pthread_t **thread, bool colorize, const TypeInfo *type)
 {
     (void)type;
     if (!thread) {
-        return colorize ? "\x1b[34;1mThread\x1b[m" : "Thread";
+        return Text$from_str(colorize ? "\x1b[34;1mThread\x1b[m" : "Thread");
     }
-    return CORD_asprintf(colorize ? "\x1b[34;1mThread(%p)\x1b[m" : "Thread(%p)", *thread);
+    return Text$format(colorize ? "\x1b[34;1mThread(%p)\x1b[m" : "Thread(%p)", *thread);
 }
 
 public const TypeInfo Thread = {
diff --git a/builtins/thread.h b/builtins/thread.h
index efccae33..2956dda6 100644
--- a/builtins/thread.h
+++ b/builtins/thread.h
@@ -14,7 +14,7 @@ pthread_t *Thread$new(closure_t fn);
 void Thread$cancel(pthread_t *thread);
 void Thread$join(pthread_t *thread);
 void Thread$detach(pthread_t *thread);
-CORD Thread$as_text(const pthread_t **thread, bool colorize, const TypeInfo *type);
+Text_t Thread$as_text(const pthread_t **thread, bool colorize, const TypeInfo *type);
 
 extern TypeInfo Thread;
 
diff --git a/builtins/types.c b/builtins/types.c
index 4fb2c523..ab1b8013 100644
--- a/builtins/types.c
+++ b/builtins/types.c
@@ -9,17 +9,20 @@
 #include "array.h"
 #include "pointer.h"
 #include "table.h"
+#include "text.h"
 #include "types.h"
 
-public CORD Type$as_text(const void *typeinfo, bool colorize, const TypeInfo *type)
+public Text_t Type$as_text(const void *typeinfo, bool colorize, const TypeInfo *type)
 {
-    if (!typeinfo) return "TypeInfo";
+    if (!typeinfo) return Text$from_str("TypeInfo");
 
-    if (!colorize)
-        return type->TypeInfoInfo.type_str;
-    CORD c;
-    CORD_sprintf(&c, "\x1b[36;1m%s\x1b[m", type->TypeInfoInfo.type_str);
-    return c;
+    if (colorize)
+        return Text$concat(
+            Text$from_str("\x1b[36;1m"),
+            Text$from_str(type->TypeInfoInfo.type_str),
+            Text$from_str("\x1b[m"));
+    else
+        return Text$from_str(type->TypeInfoInfo.type_str);
 }
 
 public const TypeInfo $TypeInfo = {
@@ -32,13 +35,13 @@ public const TypeInfo $TypeInfo = {
 public const TypeInfo $Void = {.size=0, .align=0, .tag=EmptyStruct};
 public const TypeInfo $Abort = {.size=0, .align=0, .tag=EmptyStruct};
 
-public CORD Func$as_text(const void *fn, bool colorize, const TypeInfo *type)
+public Text_t Func$as_text(const void *fn, bool colorize, const TypeInfo *type)
 {
     (void)fn;
-    CORD c = type->FunctionInfo.type_str;
+    Text_t text = Text$from_str(type->FunctionInfo.type_str);
     if (fn && colorize)
-        CORD_sprintf(&c, "\x1b[32;1m%r\x1b[m", c);
-    return c;
+        text = Text$concat(Text$from_str("\x1b[32;1m"), text, Text$from_str("\x1b[m"));
+    return text;
 }
 
 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/types.h b/builtins/types.h
index 70f8dc00..2647ef92 100644
--- a/builtins/types.h
+++ b/builtins/types.h
@@ -2,7 +2,6 @@
 
 // Type information and methods for TypeInfos (i.e. runtime representations of types)
 
-#include <gc/cord.h>
 #include <stdbool.h>
 #include <stdint.h>
 
@@ -13,7 +12,7 @@ struct TypeInfo;
 typedef uint32_t (*hash_fn_t)(const void*, const struct TypeInfo*);
 typedef int32_t (*compare_fn_t)(const void*, const void*, const struct TypeInfo*);
 typedef bool (*equal_fn_t)(const void*, const void*, const struct TypeInfo*);
-typedef CORD (*str_fn_t)(const void*, bool, const struct TypeInfo*);
+typedef Text_t (*text_fn_t)(const void*, bool, const struct TypeInfo*);
 
 typedef struct TypeInfo {
     int64_t size, align;
@@ -24,7 +23,7 @@ typedef struct TypeInfo {
                 equal_fn_t equal;
                 compare_fn_t compare;
                 hash_fn_t hash;
-                str_fn_t as_text;
+                text_fn_t as_text;
             } CustomInfo;
             struct {
                 const char *sigil;
@@ -76,7 +75,7 @@ extern const TypeInfo $Void;
 extern const TypeInfo $Abort;
 #define Void_t void
 
-CORD Type$as_text(const void *typeinfo, bool colorize, const TypeInfo *type);
-CORD Func$as_text(const void *fn, bool colorize, const TypeInfo *type);
+Text_t Type$as_text(const void *typeinfo, bool colorize, const TypeInfo *type);
+Text_t Func$as_text(const void *fn, bool colorize, const TypeInfo *type);
 
 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/util.c b/builtins/util.c
index d4f3cd31..7fca15e3 100644
--- a/builtins/util.c
+++ b/builtins/util.c
@@ -7,6 +7,7 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include "text.h"
 #include "util.h"
 
 public bool USE_COLOR;
@@ -67,4 +68,54 @@ public CORD CORD_asprintf(CORD fmt, ...)
     return c;
 }
 
+public CORD CORD_quoted(CORD str)
+{
+    CORD quoted = "\"";
+    CORD_pos i;
+    CORD_FOR(i, str) {
+        char c = CORD_pos_fetch(i);
+        switch (c) {
+        case '\a': quoted = CORD_cat(quoted, "\\a"); break;
+        case '\b': quoted = CORD_cat(quoted, "\\b"); break;
+        case '\x1b': quoted = CORD_cat(quoted, "\\e"); break;
+        case '\f': quoted = CORD_cat(quoted, "\\f"); break;
+        case '\n': quoted = CORD_cat(quoted, "\\n"); break;
+        case '\r': quoted = CORD_cat(quoted, "\\r"); break;
+        case '\t': quoted = CORD_cat(quoted, "\\t"); break;
+        case '\v': quoted = CORD_cat(quoted, "\\v"); break;
+        case '"': quoted = CORD_cat(quoted, "\\\""); break;
+        case '\\': quoted = CORD_cat(quoted, "\\\\"); break;
+        case '\x00' ... '\x06': case '\x0E' ... '\x1A':
+        case '\x1C' ... '\x1F': case '\x7F' ... '\x7F':
+            CORD_sprintf(&quoted, "%r\\x%02X", quoted, c);
+            break;
+        default: quoted = CORD_cat_char(quoted, c); break;
+        }
+    }
+    quoted = CORD_cat_char(quoted, '"');
+    return quoted;
+}
+
+public CORD CORD_replace(CORD c, CORD to_replace, CORD replacement)
+{
+    size_t len = CORD_len(c);
+    size_t replaced_len = CORD_len(to_replace);
+    size_t pos = 0;
+    CORD ret = CORD_EMPTY;
+    while (pos < len) {
+        size_t found = CORD_str(c, pos, to_replace);
+        if (found == CORD_NOT_FOUND) {
+            if (pos < len-1)
+                ret = CORD_cat(ret, CORD_substr(c, pos, len));
+            return ret;
+        }
+        if (found > pos)
+            ret = CORD_cat(ret, CORD_substr(c, pos, found-pos));
+        ret = CORD_cat(ret, replacement);
+        pos = found + replaced_len;
+    }
+    return ret;
+}
+
+
 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/util.h b/builtins/util.h
index 271403ff..a3f5f2b4 100644
--- a/builtins/util.h
+++ b/builtins/util.h
@@ -31,6 +31,8 @@ extern bool USE_COLOR;
 
 char *heap_strf(const char *fmt, ...);
 CORD CORD_asprintf(CORD fmt, ...);
+CORD CORD_quoted(CORD str);
+CORD CORD_replace(CORD c, CORD to_replace, CORD replacement);
 #define CORD_appendf(cord, fmt, ...) CORD_sprintf(cord, "%r" fmt, *(cord) __VA_OPT__(,) __VA_ARGS__)
 #define CORD_all(...) CORD_catn(sizeof((CORD[]){__VA_ARGS__})/sizeof(CORD), __VA_ARGS__)
 
diff --git a/builtins/where.c b/builtins/where.c
index f7db2db3..d57f532e 100644
--- a/builtins/where.c
+++ b/builtins/where.c
@@ -1,27 +1,27 @@
 // A type called "Where" that is an enum for "Anywhere", "Start", or "End"
 // Mainly used for text methods
 
-#include <gc/cord.h>
 #include <stdbool.h>
 #include <stdint.h>
 
+#include "text.h"
 #include "types.h"
-#include "where.h"
 #include "util.h"
+#include "where.h"
 
-static CORD Where$as_text(Where_t *obj, bool use_color)
+static Text_t Where$as_text(Where_t *obj, bool use_color)
 {
     if (!obj)
-        return "Where";
+        return Text$from_str("Where");
     switch (obj->tag) {
     case $tag$Where$Anywhere:
-        return use_color ? "\x1b[36;1mWhere.Anywhere\x1b[m" : "Where.Anywhere";
+        return Text$from_str(use_color ? "\x1b[36;1mWhere.Anywhere\x1b[m" : "Where.Anywhere");
     case $tag$Where$Start:
-        return use_color ? "\x1b[36;1mWhere.Start\x1b[m" : "Where.Start";
+        return Text$from_str(use_color ? "\x1b[36;1mWhere.Start\x1b[m" : "Where.Start");
     case $tag$Where$End:
-        return use_color ? "\x1b[36;1mWhere.End\x1b[m" : "Where.End";
+        return Text$from_str(use_color ? "\x1b[36;1mWhere.End\x1b[m" : "Where.End");
     default:
-        return CORD_EMPTY;
+        return (Text_t){.length=0};
     }
 }
 
diff --git a/compile.c b/compile.c
index 1dfe54d9..0bc09c74 100644
--- a/compile.c
+++ b/compile.c
@@ -224,7 +224,7 @@ static CORD compile_lvalue(env_t *env, ast_t *ast)
                 return CORD_all("Array_lvalue(", compile_type(item_type), ", ", target_code, ", ", 
                                 compile_int_to_type(env, index->index, Type(IntType, .bits=TYPE_IBITS64)),
                                 ", ", CORD_asprintf("%ld", padded_type_size(item_type)),
-                                ", ", Text$quoted(ast->file->filename, false), ", ", heap_strf("%ld", ast->start - ast->file->text),
+                                ", ", CORD_quoted(ast->file->filename), ", ", heap_strf("%ld", ast->start - ast->file->text),
                                 ", ", heap_strf("%ld", ast->end - ast->file->text), ")");
             }
         } else {
@@ -320,7 +320,7 @@ CORD compile_statement(env_t *env, ast_t *ast)
         if (!expr_t)
             code_err(test->expr, "I couldn't figure out the type of this expression");
 
-        CORD output = NULL;
+        CORD output = CORD_EMPTY;
         if (test->output) {
             const uint8_t *raw = (const uint8_t*)CORD_to_const_char_star(test->output);
             uint8_t buf[128] = {0};
@@ -328,6 +328,7 @@ CORD compile_statement(env_t *env, ast_t *ast)
             uint8_t *norm = u8_normalize(UNINORM_NFD, (uint8_t*)raw, strlen((char*)raw)+1, buf, &norm_len);
             assert(norm[norm_len-1] == 0);
             output = CORD_from_char_star((char*)norm);
+            CORD_printf("OUTPUT: %r\n", output);
             if (norm && norm != buf) free(norm);
         }
 
@@ -337,8 +338,8 @@ CORD compile_statement(env_t *env, ast_t *ast)
                 assert(compile_statement(env, test->expr) == CORD_EMPTY);
                 return CORD_asprintf(
                     "test(NULL, NULL, %r, %r, %ld, %ld);",
-                    compile(env, WrapAST(test->expr, TextLiteral, .cord=output)),
-                    compile(env, WrapAST(test->expr, TextLiteral, .cord=test->expr->file->filename)),
+                    CORD_quoted(output),
+                    CORD_quoted(test->expr->file->filename),
                     (int64_t)(test->expr->start - test->expr->file->text),
                     (int64_t)(test->expr->end - test->expr->file->text));
             } else {
@@ -355,8 +356,8 @@ CORD compile_statement(env_t *env, ast_t *ast)
                     compile_declaration(t, var),
                     var, val_code, var,
                     compile_type_info(env, get_type(env, decl->value)),
-                    compile(env, WrapAST(test->expr, TextLiteral, .cord=output)),
-                    compile(env, WrapAST(test->expr, TextLiteral, .cord=test->expr->file->filename)),
+                    CORD_quoted(output),
+                    CORD_quoted(test->expr->file->filename),
                     (int64_t)(test->expr->start - test->expr->file->text),
                     (int64_t)(test->expr->end - test->expr->file->text));
             }
@@ -382,8 +383,8 @@ CORD compile_statement(env_t *env, ast_t *ast)
                     compile_assignment(env, assign->targets->ast, value),
                     compile(env, assign->targets->ast),
                     compile_type_info(env, lhs_t),
-                    compile(env, WrapAST(test->expr, TextLiteral, .cord=test->output)),
-                    compile(env, WrapAST(test->expr, TextLiteral, .cord=test->expr->file->filename)),
+                    CORD_quoted(test->output),
+                    CORD_quoted(test->expr->file->filename),
                     (int64_t)(test->expr->start - test->expr->file->text),
                     (int64_t)(test->expr->end - test->expr->file->text));
             } else {
@@ -415,8 +416,8 @@ CORD compile_statement(env_t *env, ast_t *ast)
 
                 CORD_appendf(&code, "&$1; }), %r, %r, %r, %ld, %ld);",
                     compile_type_info(env, get_type(env, assign->targets->ast)),
-                    compile(env, WrapAST(test->expr, TextLiteral, .cord=test->output)),
-                    compile(env, WrapAST(test->expr, TextLiteral, .cord=test->expr->file->filename)),
+                    CORD_quoted(test->output),
+                    CORD_quoted(test->expr->file->filename),
                     (int64_t)(test->expr->start - test->expr->file->text),
                     (int64_t)(test->expr->end - test->expr->file->text));
                 return code;
@@ -427,25 +428,25 @@ CORD compile_statement(env_t *env, ast_t *ast)
                 compile_statement(env, test->expr),
                 compile_lvalue(env, Match(test->expr, UpdateAssign)->lhs),
                 compile_type_info(env, get_type(env, Match(test->expr, UpdateAssign)->lhs)),
-                compile(env, WrapAST(test->expr, TextLiteral, .cord=test->output)),
-                compile(env, WrapAST(test->expr, TextLiteral, .cord=test->expr->file->filename)),
+                CORD_quoted(test->output),
+                CORD_quoted(test->expr->file->filename),
                 (int64_t)(test->expr->start - test->expr->file->text),
                 (int64_t)(test->expr->end - test->expr->file->text));
         } else if (expr_t->tag == VoidType || expr_t->tag == AbortType || expr_t->tag == ReturnType) {
             return CORD_asprintf(
                 "test(({ %r; NULL; }), NULL, NULL, %r, %ld, %ld);",
                 compile_statement(env, test->expr),
-                compile(env, WrapAST(test->expr, TextLiteral, .cord=test->expr->file->filename)),
+                CORD_quoted(test->expr->file->filename),
                 (int64_t)(test->expr->start - test->expr->file->text),
                 (int64_t)(test->expr->end - test->expr->file->text));
         } else {
             return CORD_asprintf(
                 "test(%r, %r, %r, %r, %ld, %ld);",
                 test->expr->tag == Var ? CORD_all("&", compile(env, test->expr))
-                : CORD_all("(", compile_type(expr_t), "[1]){", compile(env, test->expr), "}"),
+                  : CORD_all("(", compile_type(expr_t), "[1]){", compile(env, test->expr), "}"),
                 compile_type_info(env, expr_t),
-                compile(env, WrapAST(test->expr, TextLiteral, .cord=output)),
-                compile(env, WrapAST(test->expr, TextLiteral, .cord=test->expr->file->filename)),
+                CORD_quoted(output),
+                CORD_quoted(test->expr->file->filename),
                 (int64_t)(test->expr->start - test->expr->file->text),
                 (int64_t)(test->expr->end - test->expr->file->text));
         }
@@ -629,7 +630,7 @@ CORD compile_statement(env_t *env, ast_t *ast)
         auto def = Match(ast, LangDef);
         CORD_appendf(&env->code->typeinfos, "public const TypeInfo %r%s = {%zu, %zu, {.tag=TextInfo, .TextInfo={%r}}};\n",
                      namespace_prefix(env->libname, env->namespace), def->name, sizeof(CORD), __alignof__(CORD),
-                     Text$quoted(def->name, false));
+                     CORD_quoted(def->name));
         compile_namespace(env, def->name, def->namespace);
         return CORD_EMPTY;
     }
@@ -703,7 +704,7 @@ CORD compile_statement(env_t *env, ast_t *ast)
                 "}\n");
             env->code->funcs = CORD_cat(env->code->funcs, wrapper);
         } else if (fndef->cache && fndef->cache->tag == Int) {
-            int64_t cache_size = Int64$from_text(Match(fndef->cache, Int)->str, NULL);
+            int64_t cache_size = Int64$from_text(Text$from_str(Match(fndef->cache, Int)->str), NULL);
             const char *arg_type_name = heap_strf("%s$args", Match(fndef->name, Var)->name);
             ast_t *args_def = FakeAST(StructDef, .name=arg_type_name, .fields=fndef->args);
             prebind_statement(env, args_def);
@@ -1314,7 +1315,7 @@ CORD compile_int_to_type(env_t *env, ast_t *ast, type_t *target)
     }
 
     int64_t target_bits = (int64_t)Match(target, IntType)->bits;
-    Int_t int_val = Int$from_text(Match(ast, Int)->str, NULL);
+    Int_t int_val = Int$from_text(Text$from_str(Match(ast, Int)->str), NULL);
     mpz_t i;
     mpz_init_set_int(i, int_val);
 
@@ -1354,7 +1355,7 @@ CORD compile_arguments(env_t *env, ast_t *call_ast, arg_t *spec_args, arg_ast_t
                     if (spec_arg->type->tag == IntType && call_arg->value->tag == Int) {
                         value = compile_int_to_type(env, call_arg->value, spec_arg->type);
                     } else if (spec_arg->type->tag == NumType && call_arg->value->tag == Int) {
-                        Int_t int_val = Int$from_text(Match(call_arg->value, Int)->str, NULL);
+                        Int_t int_val = Int$from_text(Text$from_str(Match(call_arg->value, Int)->str), NULL);
                         double n = Int_to_Num(int_val);
                         value = CORD_asprintf(Match(spec_arg->type, NumType)->bits == TYPE_NBITS64
                                               ? "N64(%.20g)" : "N32(%.10g)", n);
@@ -1382,7 +1383,7 @@ CORD compile_arguments(env_t *env, ast_t *call_ast, arg_t *spec_args, arg_ast_t
                 if (spec_arg->type->tag == IntType && call_arg->value->tag == Int) {
                     value = compile_int_to_type(env, call_arg->value, spec_arg->type);
                 } else if (spec_arg->type->tag == NumType && call_arg->value->tag == Int) {
-                    Int_t int_val = Int$from_text(Match(call_arg->value, Int)->str, NULL);
+                    Int_t int_val = Int$from_text(Text$from_str(Match(call_arg->value, Int)->str), NULL);
                     double n = Int_to_Num(int_val);
                     value = CORD_asprintf(Match(spec_arg->type, NumType)->bits == TYPE_NBITS64
                                           ? "N64(%.20g)" : "N32(%.10g)", n);
@@ -1513,7 +1514,7 @@ CORD compile(env_t *env, ast_t *ast)
     }
     case Int: {
         const char *str = Match(ast, Int)->str;
-        Int_t int_val = Int$from_text(str, NULL);
+        Int_t int_val = Int$from_text(Text$from_str(str), NULL);
         mpz_t i;
         mpz_init_set_int(i, int_val);
 
@@ -1780,8 +1781,8 @@ CORD compile(env_t *env, ast_t *ast)
     case TextLiteral: {
         CORD literal = Match(ast, TextLiteral)->cord; 
         if (literal == CORD_EMPTY)
-            return "(CORD)CORD_EMPTY";
-        CORD code = "(CORD)\"";
+            return "((Text_t){.length=0})";
+        CORD code = "Text$from_str(\"";
         CORD_pos i;
         CORD_FOR(i, literal) {
             char c = CORD_pos_fetch(i);
@@ -1803,7 +1804,7 @@ CORD compile(env_t *env, ast_t *ast)
             }
             }
         }
-        return CORD_cat_char(code, '"');
+        return CORD_cat(code, "\")");
     }
     case TextJoin: {
         const char *lang = Match(ast, TextJoin)->lang;
@@ -1812,7 +1813,7 @@ CORD compile(env_t *env, ast_t *ast)
             code_err(ast, "%s is not a valid text language name", lang);
         ast_list_t *chunks = Match(ast, TextJoin)->children;
         if (!chunks) {
-            return "(CORD)CORD_EMPTY";
+            return "((Text_t){.length=0})";
         } else if (!chunks->next && chunks->ast->tag == TextLiteral) {
             return compile(env, chunks->ast);
         } else {
@@ -1839,7 +1840,7 @@ CORD compile(env_t *env, ast_t *ast)
                 if (chunk->next) code = CORD_cat(code, ", ");
             }
             if (chunks->next)
-                return CORD_all("CORD_all(", code, ")");
+                return CORD_all("Text$concat(", code, ")");
             else
                 return code;
         }
@@ -2447,7 +2448,8 @@ CORD compile(env_t *env, ast_t *ast)
                     file_t *f = ast->file;
                     return CORD_all("Table$get_value_or_fail(", self, ", ", compile_type(table->key_type), ", ", compile_type(table->value_type), ", ",
                                     compile_arguments(env, ast, arg_spec, call->args), ", ", compile_type_info(env, self_value_t), ", ",
-                                    Text$quoted(f->filename, false), ", ", CORD_asprintf("%ld", (int64_t)(ast->start - f->text)), ", ",
+                                    CORD_quoted(f->filename), ", ",
+                                    CORD_asprintf("%ld", (int64_t)(ast->start - f->text)), ", ",
                                     CORD_asprintf("%ld", (int64_t)(ast->end - f->text)),
                                     ")");
                 }
@@ -2630,8 +2632,9 @@ CORD compile(env_t *env, ast_t *ast)
         } else {
             empty = FakeAST(
                 InlineCCode, 
-                CORD_asprintf("fail_source(%r, %ld, %ld, \"This collection was empty!\");\n",
-                              Text$quoted(ast->file->filename, false), (long)(reduction->iter->start - reduction->iter->file->text),
+                CORD_asprintf("fail_source(%s, %ld, %ld, \"This collection was empty!\");\n",
+                              CORD_quoted(ast->file->filename),
+                              (long)(reduction->iter->start - reduction->iter->file->text),
                               (long)(reduction->iter->end - reduction->iter->file->text)));
         }
         ast_t *item = FakeAST(Var, "$iter_value");
@@ -2785,7 +2788,8 @@ CORD compile(env_t *env, ast_t *ast)
             else
                 return CORD_all("Array_get(", compile_type(item_type), ", ", arr, ", ",
                                 compile_int_to_type(env, indexing->index, Type(IntType, .bits=TYPE_IBITS64)), ", ",
-                                Text$quoted(f->filename, false), ", ", CORD_asprintf("%ld", (int64_t)(indexing->index->start - f->text)), ", ",
+                                CORD_quoted(f->filename), ", ",
+                                CORD_asprintf("%ld", (int64_t)(indexing->index->start - f->text)), ", ",
                                 CORD_asprintf("%ld", (int64_t)(indexing->index->end - f->text)),
                                 ")");
         } else {
@@ -2935,15 +2939,15 @@ CORD compile_type_info(env_t *env, type_t *t)
         CORD sigil = ptr->is_stack ? "&" : "@";
         if (ptr->is_readonly) sigil = CORD_cat(sigil, "%");
         return CORD_asprintf("$PointerInfo(%r, %r, %s)",
-                             Text$quoted(sigil, false),
+                             CORD_quoted(sigil),
                              compile_type_info(env, ptr->pointed),
                              ptr->is_optional ? "yes" : "no");
     }
     case FunctionType: {
-        return CORD_asprintf("$FunctionInfo(%r)", Text$quoted(type_to_cord(t), false));
+        return CORD_asprintf("$FunctionInfo(%r)", CORD_quoted(type_to_cord(t)));
     }
     case ClosureType: {
-        return CORD_asprintf("$ClosureInfo(%r)", Text$quoted(type_to_cord(t), false));
+        return CORD_asprintf("$ClosureInfo(%r)", CORD_quoted(type_to_cord(t)));
     }
     case TypeInfoType: return "&$TypeInfo";
     case MemoryType: return "&$Memory";
@@ -2968,7 +2972,7 @@ CORD compile_cli_arg_call(env_t *env, CORD fn_name, type_t *fn_type)
     for (arg_t *arg = fn_info->args; arg; arg = arg->next) {
         usage = CORD_cat(usage, " ");
         type_t *t = get_arg_type(main_env, arg);
-        CORD flag = Text$replace(arg->name, "_", "-", I(-1));
+        CORD flag = CORD_replace(arg->name, "_", "-");
         if (arg->default_val) {
             if (t->tag == BoolType)
                 usage = CORD_all(usage, "[--", flag, "]");
@@ -2983,7 +2987,7 @@ CORD compile_cli_arg_call(env_t *env, CORD fn_name, type_t *fn_type)
                 usage = CORD_all(usage, "<", flag, ">");
         }
     }
-    CORD code = CORD_all("CORD usage = CORD_all(\"Usage: \", argv[0], ", usage ? Text$quoted(usage, false) : "CORD_EMPTY", ");\n",
+    CORD code = CORD_all("CORD usage = CORD_all(\"Usage: \", argv[0], ", usage ? CORD_quoted(usage) : "CORD_EMPTY", ");\n",
                          "#define USAGE_ERR(...) errx(1, CORD_to_const_char_star(CORD_all(__VA_ARGS__)))\n"
                          "#define IS_FLAG(str, flag) (strncmp(str, flag, strlen(flag) == 0 && (str[strlen(flag)] == 0 || str[strlen(flag)] == '=')) == 0)\n");
 
@@ -3006,7 +3010,7 @@ CORD compile_cli_arg_call(env_t *env, CORD fn_name, type_t *fn_type)
                     "if (strncmp(argv[i], \"--\", 2) != 0) {\n++i;\ncontinue;\n}\n");
     for (arg_t *arg = fn_info->args; arg; arg = arg->next) {
         type_t *t = get_arg_type(main_env, arg);
-        CORD flag = Text$replace(arg->name, "_", "-", I(-1));
+        CORD flag = CORD_replace(arg->name, "_", "-");
         switch (t->tag) {
         case BoolType: {
             code = CORD_all(code, "else if (pop_flag(argv, &i, \"", flag, "\", &flag)) {\n"
diff --git a/parse.c b/parse.c
index af011877..8ab3688e 100644
--- a/parse.c
+++ b/parse.c
@@ -15,6 +15,7 @@
 
 #include "ast.h"
 #include "builtins/integers.h"
+#include "builtins/text.h"
 #include "builtins/table.h"
 #include "builtins/util.h"
 
@@ -1894,7 +1895,7 @@ ast_t *parse_enum_def(parse_ctx_t *ctx, const char *pos) {
         spaces(&pos);
         if (match(&pos, "=")) {
             ast_t *val = expect(ctx, tag_start, &pos, parse_int, "I expected an integer literal after this '='");
-            Int_t i = Int$from_text(Match(val, Int)->str, NULL);
+            Int_t i = Int$from_text(Text$from_str(Match(val, Int)->str), NULL);
             // TODO check for overflow
             next_value = (i.small >> 2);
         }
diff --git a/repl.c b/repl.c
index a2b77a81..85c1ec89 100644
--- a/repl.c
+++ b/repl.c
@@ -208,7 +208,7 @@ static double ast_to_num(env_t *env, ast_t *ast)
     }
 }
 
-static CORD obj_to_text(type_t *t, const void *obj, bool use_color)
+static Text_t obj_to_text(type_t *t, const void *obj, bool use_color)
 {
     const TypeInfo *info = type_to_type_info(t);
     return generic_as_text(obj, use_color, info);
@@ -272,8 +272,8 @@ void run(env_t *env, ast_t *ast)
         } else {
             void *value = GC_MALLOC(size);
             eval(env, doctest->expr, value);
-            CORD c = obj_to_text(t, value, true);
-            printf("= %s \x1b[2m: %T\x1b[m\n", CORD_to_const_char_star(c), t);
+            Text_t text = obj_to_text(t, value, true);
+            printf("= %k \x1b[2m: %T\x1b[m\n", &text, t);
             fflush(stdout);
         }
         break;
@@ -353,11 +353,11 @@ void eval(env_t *env, ast_t *ast, void *dest)
     case Int: {
         if (!dest) return;
         switch (Match(ast, Int)->bits) {
-        case 0: *(Int_t*)dest = Int$from_text(Match(ast, Int)->str, NULL); break;
-        case 64: *(int64_t*)dest = Int64$from_text(Match(ast, Int)->str, NULL); break;
-        case 32: *(int32_t*)dest = Int32$from_text(Match(ast, Int)->str, NULL); break;
-        case 16: *(int16_t*)dest = Int16$from_text(Match(ast, Int)->str, NULL); break;
-        case 8: *(int8_t*)dest = Int8$from_text(Match(ast, Int)->str, NULL); break;
+        case 0: *(Int_t*)dest = Int$from_text(Text$from_str(Match(ast, Int)->str), NULL); break;
+        case 64: *(int64_t*)dest = Int64$from_text(Text$from_str(Match(ast, Int)->str), NULL); break;
+        case 32: *(int32_t*)dest = Int32$from_text(Text$from_str(Match(ast, Int)->str), NULL); break;
+        case 16: *(int16_t*)dest = Int16$from_text(Text$from_str(Match(ast, Int)->str), NULL); break;
+        case 8: *(int8_t*)dest = Int8$from_text(Text$from_str(Match(ast, Int)->str), NULL); break;
         default: errx(1, "Invalid int bits: %ld", Match(ast, Int)->bits);
         }
         break;
@@ -386,7 +386,7 @@ void eval(env_t *env, ast_t *ast, void *dest)
                 size_t chunk_size = type_size(chunk_t);
                 char buf[chunk_size];
                 eval(env, chunk->ast, buf);
-                ret = CORD_cat(ret, obj_to_text(chunk_t, buf, false));
+                ret = CORD_cat(ret, Text$as_c_string(obj_to_text(chunk_t, buf, false)));
             }
         }
         if (dest) *(CORD*)dest = ret;
diff --git a/structs.c b/structs.c
index 8099012e..66ca3ff1 100644
--- a/structs.c
+++ b/structs.c
@@ -166,7 +166,7 @@ void compile_struct_def(env_t *env, ast_t *ast)
     } else {
         // If there are no fields, we can use an EmptyStruct typeinfo, which generates less code:
         CORD typeinfo = CORD_asprintf("public const TypeInfo %r = {%zu, %zu, {.tag=EmptyStruct, .EmptyStruct.name=%r}};\n",
-                                      full_name, type_size(t), type_align(t), Text$quoted(def->name, false));
+                                      full_name, type_size(t), type_align(t), Text$quoted(Text$from_str(def->name), false));
         env->code->typeinfos = CORD_all(env->code->typeinfos, typeinfo);
     }
 
diff --git a/tomo.c b/tomo.c
index 8b69f1f4..01bdf5ae 100644
--- a/tomo.c
+++ b/tomo.c
@@ -84,6 +84,8 @@ int main(int argc, char *argv[])
         errx(1, "Couldn't set printf specifier");
     if (register_printf_specifier('W', printf_ast, printf_pointer_size))
         errx(1, "Couldn't set printf specifier");
+    if (register_printf_specifier('k', printf_text, printf_text_size))
+        errx(1, "Couldn't set printf specifier");
 
     setenv("TOMO_IMPORT_PATH", "~/.local/src/tomo:.", 0);
     setenv("TOMO_LIB_PATH", "~/.local/lib/tomo:.", 0);
diff --git a/typecheck.c b/typecheck.c
index bb40666f..4d4c080e 100644
--- a/typecheck.c
+++ b/typecheck.c
@@ -9,11 +9,12 @@
 #include <sys/stat.h>
 
 #include "ast.h"
+#include "builtins/text.h"
+#include "builtins/util.h"
 #include "environment.h"
 #include "parse.h"
 #include "typecheck.h"
 #include "types.h"
-#include "builtins/util.h"
 
 type_t *parse_type_ast(env_t *env, type_ast_t *ast)
 {
@@ -1367,7 +1368,7 @@ bool is_constant(env_t *env, ast_t *ast)
     case Int: {
         auto info = Match(ast, Int);
         if (info->bits == IBITS_UNSPECIFIED) {
-            Int_t int_val = Int$from_text(info->str, NULL);
+            Int_t int_val = Int$from_text(Text$from_str(info->str), NULL);
             mpz_t i;
             mpz_init_set_int(i, int_val);
             return (mpz_cmpabs_ui(i, BIGGEST_SMALL_INT) <= 0);
author	Bruce Hill <bruce@bruce-hill.com>	2024-09-02 18:47:39 -0400
committer	Bruce Hill <bruce@bruce-hill.com>	2024-09-02 18:47:39 -0400
commit	61e482f6f36aee6f72392a6188f2ec5c858b88fd (patch)
tree	bea4123fcc62dd834405ae89ce9fe260e90a0023
parent	f0f8f218703ebb4512b3cd3f9e06b86a7d9861b0 (diff)