diff --git a/compile.c b/compile.c
index e6a22b0..ca89062 100644
--- a/compile.c
+++ b/compile.c
@@ -34,6 +34,7 @@ static CORD compile_none(type_t *t);
 static CORD compile_to_type(env_t *env, ast_t *ast, type_t *t);
 static CORD check_none(type_t *t, CORD value);
 static CORD optional_into_nonnone(type_t *t, CORD value);
+static CORD compile_string_literal(CORD literal);
 
 CORD promote_to_optional(type_t *t, CORD code)
 {
@@ -569,7 +570,7 @@ CORD compile_statement(env_t *env, ast_t *ast)
             const uint8_t *raw = (const uint8_t*)CORD_to_const_char_star(test->output);
             uint8_t buf[128] = {0};
             size_t norm_len = sizeof(buf);
-            uint8_t *norm = u8_normalize(UNINORM_NFD, (uint8_t*)raw, strlen((char*)raw)+1, buf, &norm_len);
+            uint8_t *norm = u8_normalize(UNINORM_NFC, (uint8_t*)raw, strlen((char*)raw)+1, buf, &norm_len);
             assert(norm[norm_len-1] == 0);
             output = CORD_from_char_star((char*)norm);
             if (norm && norm != buf) free(norm);
@@ -579,7 +580,7 @@ CORD compile_statement(env_t *env, ast_t *ast)
             auto decl = Match(test->expr, Declare);
             const char *varname = Match(decl->var, Var)->name;
             if (streq(varname, "_"))
-                return compile_statement(env, WrapAST(ast, DocTest, .expr=decl->value, .output=test->output, .skip_source=test->skip_source));
+                return compile_statement(env, WrapAST(ast, DocTest, .expr=decl->value, .output=output, .skip_source=test->skip_source));
             CORD var = CORD_all("$", Match(decl->var, Var)->name);
             type_t *t = get_type(env, decl->value);
             CORD val_code = compile_maybe_incref(env, decl->value, t);
@@ -593,7 +594,7 @@ CORD compile_statement(env_t *env, ast_t *ast)
                 compile_declaration(t, var),
                 var, val_code,
                 compile_type_info(env, get_type(env, decl->value)),
-                CORD_quoted(output),
+                compile_string_literal(output),
                 (int64_t)(test->expr->start - test->expr->file->text),
                 (int64_t)(test->expr->end - test->expr->file->text));
         } else if (test->expr->tag == Assign) {
@@ -612,12 +613,12 @@ CORD compile_statement(env_t *env, ast_t *ast)
                     "test((%r), %r, %r, %ld, %ld);",
                     compile_assignment(env, assign->targets->ast, value),
                     compile_type_info(env, lhs_t),
-                    CORD_quoted(test->output),
+                    compile_string_literal(output),
                     (int64_t)(test->expr->start - test->expr->file->text),
                     (int64_t)(test->expr->end - test->expr->file->text));
             } else {
                 // Multi-assign or assignment to potentially non-idempotent targets
-                if (test->output && assign->targets->next)
+                if (output && assign->targets->next)
                     code_err(ast, "Sorry, but doctesting with '=' is not supported for multi-assignments");
 
                 CORD code = "test(({ // Assignment\n";
@@ -643,7 +644,7 @@ CORD compile_statement(env_t *env, ast_t *ast)
 
                 CORD_appendf(&code, "$1; }), %r, %r, %ld, %ld);",
                     compile_type_info(env, first_type),
-                    CORD_quoted(test->output),
+                    compile_string_literal(output),
                     (int64_t)(test->expr->start - test->expr->file->text),
                     (int64_t)(test->expr->end - test->expr->file->text));
                 return code;
@@ -667,7 +668,7 @@ CORD compile_statement(env_t *env, ast_t *ast)
                 compile_lvalue(env, update->lhs),
                 compile_statement(env, update_var),
                 compile_type_info(env, lhs_t),
-                CORD_quoted(test->output),
+                compile_string_literal(output),
                 (int64_t)(test->expr->start - test->expr->file->text),
                 (int64_t)(test->expr->end - test->expr->file->text));
         } else if (expr_t->tag == VoidType || expr_t->tag == AbortType || expr_t->tag == ReturnType) {
@@ -681,7 +682,7 @@ CORD compile_statement(env_t *env, ast_t *ast)
                 "test(%r, %r, %r, %ld, %ld);",
                 compile(env, test->expr),
                 compile_type_info(env, expr_t),
-                CORD_quoted(output),
+                compile_string_literal(output),
                 (int64_t)(test->expr->start - test->expr->file->text),
                 (int64_t)(test->expr->end - test->expr->file->text));
         }
@@ -1953,7 +1954,7 @@ CORD compile_math_method(env_t *env, binop_e op, ast_t *lhs, ast_t *rhs, type_t
     return CORD_EMPTY;
 }
 
-static CORD compile_string_literal(CORD literal)
+CORD compile_string_literal(CORD literal)
 {
     CORD code = "\"";
     CORD_pos i;
diff --git a/docs/text.md b/docs/text.md
index 0384aa4..c8a105d 100644
--- a/docs/text.md
+++ b/docs/text.md
@@ -9,12 +9,22 @@ etc.).
 
 ## Implementation
 
-Internally, Tomo text's implementation is based on [Raku's
-strings](https://docs.raku.org/language/unicode). Strings store their grapheme
-cluster count and either a compact array of 8-bit ASCII characters (for ASCII
-text), an array of 32-bit normal-form grapheme cluster values (see below), or a
-flat subarray of multiple texts that are either ASCII or graphemes (the
-structure is not arbitrarily nested).
+Internally, Tomo text's implementation is based on [Raku/MoarVM's
+strings](https://docs.raku.org/language/unicode) and [Boehm et al's
+Cords](https://www.cs.tufts.edu/comp/150FP/archive/hans-boehm/ropes.pdf).
+Strings store their grapheme cluster count and either a compact array of 8-bit
+ASCII characters (for ASCII text), an array of 32-bit normal-form grapheme
+cluster values (see below), or a (roughly) balanced binary tree concatenation
+of two texts. The upside is that repeated concatenations are typically a
+constant-time operation, which will occasionally require a small rebalancing
+operation. Index-based text operations (like retrieving an arbitrary index or
+slicing) are very fast: typically a constant-time operation for arbitrary
+unicode text, but in the worst case scenario (text built from many
+concatenations), `O(log(n))` time with very generous constant factors typically
+amounting to only a handful of steps. Since concatenations use shared
+substructures, they are very memory-efficient and can be used efficiently for
+applications like implementing a text editor that stores a full edit history of
+a large file's contents.
 
 ### Normal-Form Graphemes
 
diff --git a/stdlib/bytes.c b/stdlib/bytes.c
index 6c94c05..1e889f6 100644
--- a/stdlib/bytes.c
+++ b/stdlib/bytes.c
@@ -17,15 +17,16 @@ PUREFUNC public Text_t Byte$as_text(const void *b, bool colorize, const TypeInfo
 }
 
 public Text_t Byte$hex(Byte_t byte, bool uppercase, bool prefix) {
-    Text_t text = {.tag=TEXT_SHORT_ASCII};
+    struct Text_s text = {.tag=TEXT_ASCII};
+    text.ascii = GC_MALLOC_ATOMIC(8);
     if (prefix && uppercase)
-        text.length = (int64_t)snprintf(text.short_ascii, sizeof(text.short_ascii), "0x%02X", byte);
+        text.length = (int64_t)snprintf((char*)text.ascii, 8, "0x%02X", byte);
     else if (prefix && !uppercase)
-        text.length = (int64_t)snprintf(text.short_ascii, sizeof(text.short_ascii), "0x%02x", byte);
+        text.length = (int64_t)snprintf((char*)text.ascii, 8, "0x%02x", byte);
     else if (!prefix && uppercase)
-        text.length = (int64_t)snprintf(text.short_ascii, sizeof(text.short_ascii), "%02X", byte);
+        text.length = (int64_t)snprintf((char*)text.ascii, 8, "%02X", byte);
     else if (!prefix && !uppercase)
-        text.length = (int64_t)snprintf(text.short_ascii, sizeof(text.short_ascii), "%02x", byte);
+        text.length = (int64_t)snprintf((char*)text.ascii, 8, "%02x", byte);
     return text;
 }
 
diff --git a/stdlib/datatypes.h b/stdlib/datatypes.h
index 11ae130..80591ad 100644
--- a/stdlib/datatypes.h
+++ b/stdlib/datatypes.h
@@ -66,18 +66,24 @@ typedef struct Range_s {
     Int_t first, last, step;
 } Range_t;
 
-enum text_type { TEXT_SHORT_ASCII, TEXT_ASCII, TEXT_SHORT_GRAPHEMES, TEXT_GRAPHEMES, TEXT_SUBTEXT };
+enum text_type { TEXT_ASCII, TEXT_GRAPHEMES, TEXT_CONCAT };
 
 typedef struct Text_s {
-    int64_t length; // Number of grapheme clusters
-    uint64_t hash:61;
-    uint8_t tag:3;
+    int64_t length:54; // Number of grapheme clusters
+    uint8_t depth:8;
+    uint8_t tag:2;
     union {
-        char short_ascii[8];
-        const char *ascii;
-        int32_t short_graphemes[2];
-        const int32_t *graphemes;
-        struct Text_s *subtexts;
+        struct {
+            const char *ascii;
+            // char ascii_buf[8];
+        };
+        struct {
+            const int32_t *graphemes;
+            // int32_t grapheme_buf[2];
+        };
+        struct {
+            const struct Text_s *left, *right;
+        };
     };
 } Text_t;
 
diff --git a/stdlib/optionals.c b/stdlib/optionals.c
index ccda980..990ca13 100644
--- a/stdlib/optionals.c
+++ b/stdlib/optionals.c
@@ -74,7 +74,7 @@ public void Optional$deserialize(FILE *in, void *outval, Array_t *pointers, cons
         _deserialize(in, outval, pointers, nonnull);
     } else {
         if (nonnull->tag == TextInfo)
-            *(Text_t*)outval = (Text_t){.length=-1};
+            *(Text_t*)outval = NONE_TEXT;
         else if (nonnull->tag == ArrayInfo)
             *(Array_t*)outval = (Array_t){.length=-1};
         else if (nonnull->tag == TableInfo)
diff --git a/stdlib/patterns.c b/stdlib/patterns.c
index 48f43ae..bee8476 100644
--- a/stdlib/patterns.c
+++ b/stdlib/patterns.c
@@ -36,7 +36,7 @@ typedef struct {
 
 static INLINE void skip_whitespace(TextIter_t *state, int64_t *i)
 {
-    while (*i < state->text.length) {
+    while (*i < state->stack[0].text.length) {
         int32_t grapheme = Text$get_grapheme_fast(state, *i);
         if (grapheme > 0 && !uc_is_property_white_space((ucs4_t)grapheme))
             return;
@@ -46,7 +46,7 @@ static INLINE void skip_whitespace(TextIter_t *state, int64_t *i)
 
 static INLINE bool match_grapheme(TextIter_t *state, int64_t *i, int32_t grapheme)
 {
-    if (*i < state->text.length && Text$get_grapheme_fast(state, *i) == grapheme) {
+    if (*i < state->stack[0].text.length && Text$get_grapheme_fast(state, *i) == grapheme) {
         *i += 1;
         return true;
     }
@@ -57,7 +57,7 @@ static INLINE bool match_str(TextIter_t *state, int64_t *i, const char *str)
 {
     int64_t matched = 0;
     while (matched[str]) {
-        if (*i + matched >= state->text.length || Text$get_grapheme_fast(state, *i + matched) != str[matched])
+        if (*i + matched >= state->stack[0].text.length || Text$get_grapheme_fast(state, *i + matched) != str[matched])
             return false;
         matched += 1;
     }
@@ -67,7 +67,7 @@ static INLINE bool match_str(TextIter_t *state, int64_t *i, const char *str)
 
 static INLINE bool match_property(TextIter_t *state, int64_t *i, uc_property_t prop)
 {
-    if (*i >= state->text.length) return false;
+    if (*i >= state->stack[0].text.length) return false;
     uint32_t grapheme = Text$get_main_grapheme_fast(state, *i);
     // TODO: check every codepoint in the cluster?
     if (uc_is_property(grapheme, prop)) {
@@ -95,7 +95,7 @@ static const char *get_property_name(TextIter_t *state, int64_t *i)
     skip_whitespace(state, i);
     char *name = GC_MALLOC_ATOMIC(UNINAME_MAX);
     char *dest = name;
-    while (*i < state->text.length) {
+    while (*i < state->stack[0].text.length) {
         int32_t grapheme = Text$get_grapheme_fast(state, *i);
         if (!(grapheme & ~0xFF) && (isalnum(grapheme) || grapheme == ' ' || grapheme == '_' || grapheme == '-')) {
             *dest = (char)grapheme;
@@ -406,10 +406,10 @@ static int64_t match_num(TextIter_t *state, int64_t index)
 
 static int64_t match_newline(TextIter_t *state, int64_t index)
 {
-    if (index >= state->text.length)
+    if (index >= state->stack[0].text.length)
         return -1;
 
-    uint32_t grapheme = index >= state->text.length ? 0 : Text$get_main_grapheme_fast(state, index);
+    uint32_t grapheme = index >= state->stack[0].text.length ? 0 : Text$get_main_grapheme_fast(state, index);
     if (grapheme == '\n')
         return 1;
     if (grapheme == '\r' && Text$get_grapheme_fast(state, index + 1) == '\n')
@@ -419,7 +419,7 @@ static int64_t match_newline(TextIter_t *state, int64_t index)
 
 static int64_t match_pat(TextIter_t *state, int64_t index, pat_t pat)
 {
-    Text_t text = state->text;
+    Text_t text = state->stack[0].text;
     int32_t grapheme = index >= text.length ? 0 : Text$get_grapheme_fast(state, index);
 
     switch (pat.tag) {
@@ -516,7 +516,7 @@ static pat_t parse_next_pat(TextIter_t *state, int64_t *index)
         int32_t close = open;
         uc_mirror_char((ucs4_t)open, (ucs4_t*)&close);
         if (!match_grapheme(state, index, close))
-            fail("Pattern's closing quote is missing: %k", &state->text);
+            fail("Pattern's closing quote is missing: %k", &state->stack[0].text);
 
         return (pat_t){
             .tag=PAT_QUOTE,
@@ -531,7 +531,7 @@ static pat_t parse_next_pat(TextIter_t *state, int64_t *index)
         int32_t close = open;
         uc_mirror_char((ucs4_t)open, (ucs4_t*)&close);
         if (!match_grapheme(state, index, close))
-            fail("Pattern's closing brace is missing: %k", &state->text);
+            fail("Pattern's closing brace is missing: %k", &state->stack[0].text);
         
         return (pat_t){
             .tag=PAT_PAIR,
@@ -571,19 +571,19 @@ static pat_t parse_next_pat(TextIter_t *state, int64_t *index)
             skip_whitespace(state, index);
             int32_t grapheme = Text$get_grapheme_fast(state, (*index)++);
             if (!match_grapheme(state, index, '}'))
-                fail("Missing closing '}' in pattern: %k", &state->text);
+                fail("Missing closing '}' in pattern: %k", &state->stack[0].text);
             return PAT(PAT_GRAPHEME, .grapheme=grapheme);
         } else if (strlen(prop_name) == 1) {
             // Single letter names: {1+ A}
             skip_whitespace(state, index);
             if (!match_grapheme(state, index, '}'))
-                fail("Missing closing '}' in pattern: %k", &state->text);
+                fail("Missing closing '}' in pattern: %k", &state->stack[0].text);
             return PAT(PAT_GRAPHEME, .grapheme=prop_name[0]);
         }
 
         skip_whitespace(state, index);
         if (!match_grapheme(state, index, '}'))
-            fail("Missing closing '}' in pattern: %k", &state->text);
+            fail("Missing closing '}' in pattern: %k", &state->stack[0].text);
 
         switch (tolower(prop_name[0])) {
         case '.':
@@ -677,7 +677,7 @@ static int64_t match(Text_t text, int64_t text_index, Pattern_t pattern, int64_t
         return 0;
 
     int64_t start_index = text_index;
-    TextIter_t pattern_state = {pattern, 0, 0}, text_state = {text, 0, 0};
+    TextIter_t pattern_state = NEW_TEXT_ITER_STATE(pattern), text_state = NEW_TEXT_ITER_STATE(text);
     pat_t pat = parse_next_pat(&pattern_state, &pattern_index);
 
     if (pat.min == -1 && pat.max == -1) {
@@ -778,7 +778,7 @@ static int64_t _find(Text_t text, Pattern_t pattern, int64_t first, int64_t last
                        && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
                        && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
 
-    TextIter_t text_state = {text, 0, 0};
+    TextIter_t text_state = NEW_TEXT_ITER_STATE(text);
     for (int64_t i = first; i <= last; i++) {
         // Optimization: quickly skip ahead to first char in pattern:
         if (find_first) {
@@ -881,12 +881,12 @@ typedef struct {
 
 static OptionalMatch_t next_match(match_iter_state_t *state)
 {
-    if (Int_to_Int64(state->i, false) > state->state.text.length)
+    if (Int_to_Int64(state->i, false) > state->state.stack[0].text.length)
         return NONE_MATCH;
 
-    OptionalMatch_t m = Text$find(state->state.text, state->pattern, state->i);
+    OptionalMatch_t m = Text$find(state->state.stack[0].text, state->pattern, state->i);
     if (m.index.small == 0) // No match
-        state->i = I(state->state.text.length + 1);
+        state->i = I(state->state.stack[0].text.length + 1);
     else
         state->i = Int$plus(m.index, I(MAX(1, m.text.length)));
     return m;
@@ -896,7 +896,7 @@ public Closure_t Text$by_match(Text_t text, Pattern_t pattern)
 {
     return (Closure_t){
         .fn=(void*)next_match,
-        .userdata=new(match_iter_state_t, .state={text, 0, 0}, .i=I_small(1), .pattern=pattern),
+        .userdata=new(match_iter_state_t, .state=NEW_TEXT_ITER_STATE(text), .i=I_small(1), .pattern=pattern),
     };
 }
 
@@ -911,7 +911,7 @@ static Text_t apply_backrefs(Text_t text, Pattern_t original_pattern, Text_t rep
                        && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
 
     Text_t ret = Text("");
-    TextIter_t replacement_state = {replacement, 0, 0};
+    TextIter_t replacement_state = NEW_TEXT_ITER_STATE(replacement);
     int64_t nonmatching_pos = 0;
     for (int64_t pos = 0; pos < replacement.length; ) {
         // Optimization: quickly skip ahead to first char in the backref pattern:
@@ -965,14 +965,14 @@ static Text_t apply_backrefs(Text_t text, Pattern_t original_pattern, Text_t rep
 
 public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, Pattern_t backref_pat, bool recursive)
 {
-    Text_t ret = {.length=0};
+    Text_t ret = EMPTY_TEXT;
 
     int32_t first_grapheme = Text$get_grapheme(pattern, 0);
     bool find_first = (first_grapheme != '{'
                        && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
                        && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
 
-    TextIter_t text_state = {text, 0, 0};
+    TextIter_t text_state = NEW_TEXT_ITER_STATE(text);
     int64_t nonmatching_pos = 0;
     for (int64_t pos = 0; pos < text.length; ) {
         // Optimization: quickly skip ahead to first char in pattern:
@@ -1030,14 +1030,14 @@ public Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool tri
 
 public Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn)
 {
-    Text_t ret = {.length=0};
+    Text_t ret = EMPTY_TEXT;
 
     int32_t first_grapheme = Text$get_grapheme(pattern, 0);
     bool find_first = (first_grapheme != '{'
                        && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
                        && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
 
-    TextIter_t text_state = {text, 0, 0};
+    TextIter_t text_state = NEW_TEXT_ITER_STATE(text);
     int64_t nonmatching_pos = 0;
 
     Text_t (*text_mapper)(Match_t, void*) = fn.fn;
@@ -1086,7 +1086,7 @@ public void Text$each(Text_t text, Pattern_t pattern, Closure_t fn)
                        && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
                        && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
 
-    TextIter_t text_state = {text, 0, 0};
+    TextIter_t text_state = NEW_TEXT_ITER_STATE(text);
     void (*action)(Match_t, void*) = fn.fn;
     for (int64_t pos = 0; pos < text.length; pos++) {
         // Optimization: quickly skip ahead to first char in pattern:
@@ -1118,7 +1118,7 @@ public Text_t Text$replace_all(Text_t text, Table_t replacements, Text_t backref
 {
     if (replacements.entries.length == 0) return text;
 
-    Text_t ret = {.length=0};
+    Text_t ret = EMPTY_TEXT;
 
     int64_t nonmatch_pos = 0;
     for (int64_t pos = 0; pos < text.length; ) {
@@ -1194,11 +1194,11 @@ typedef struct {
 
 static OptionalText_t next_split(split_iter_state_t *state)
 {
-    Text_t text = state->state.text;
+    Text_t text = state->state.stack[0].text;
     if (state->i >= text.length) {
         if (state->pattern.length > 0 && state->i == text.length) { // special case
             state->i = text.length + 1;
-            return (Text_t){.length=0};
+            return EMPTY_TEXT;
         }
         return NONE_TEXT;
     }
@@ -1220,7 +1220,7 @@ static OptionalText_t next_split(split_iter_state_t *state)
         state->i = MAX(found + len, state->i + 1);
         return Text$slice(text, I(start+1), I(found));
     } else {
-        state->i = state->state.text.length + 1;
+        state->i = state->state.stack[0].text.length + 1;
         return Text$slice(text, I(start+1), I(text.length));
     }
 }
@@ -1229,7 +1229,7 @@ public Closure_t Text$by_split(Text_t text, Pattern_t pattern)
 {
     return (Closure_t){
         .fn=(void*)next_split,
-        .userdata=new(split_iter_state_t, .state={text, 0, 0}, .i=0, .pattern=pattern),
+        .userdata=new(split_iter_state_t, .state=NEW_TEXT_ITER_STATE(text), .i=0, .pattern=pattern),
     };
 }
 
diff --git a/stdlib/shell.c b/stdlib/shell.c
index 4a48f5c..694d155 100644
--- a/stdlib/shell.c
+++ b/stdlib/shell.c
@@ -14,24 +14,7 @@
 
 public Shell_t Shell$escape_text(Text_t text)
 {
-    // TODO: optimize for ASCII and short strings
-    Array_t shell_graphemes = {.atomic=1};
-#define add_char(c) Array$insert(&shell_graphemes, (uint32_t[1]){c}, I_small(0), sizeof(uint32_t))
-    add_char('\'');
-    const char *text_utf8 = Text$as_c_string(text);
-    for (const char *p = text_utf8; *p; p++) {
-        if (*p == '\'') {
-            add_char('\'');
-            add_char('"');
-            add_char('\'');
-            add_char('"');
-            add_char('\'');
-        } else
-            add_char((uint8_t)*p);
-    }
-    add_char('\'');
-#undef add_char
-    return (Text_t){.length=shell_graphemes.length, .tag=TEXT_GRAPHEMES, .graphemes=shell_graphemes.data};
+    return Text$replace(text, Text("'"), Text("'\"'\"'"), Text(""), false);
 }
 
 public Shell_t Shell$escape_text_array(Array_t texts)
diff --git a/stdlib/stdlib.c b/stdlib/stdlib.c
index cb9d221..bab9790 100644
--- a/stdlib/stdlib.c
+++ b/stdlib/stdlib.c
@@ -500,12 +500,12 @@ public void end_test(const void *expr, const TypeInfo_t *type, const char *expec
     if (expected && expected[0]) {
         Text_t expected_text = Text$from_str(expected);
         Text_t expr_plain = USE_COLOR ? generic_as_text(expr, false, type) : expr_text;
-        bool success = Text$equal(&expr_plain, &expected_text, &Text$info);
+        bool success = Text$equal_values(expr_plain, expected_text);
         if (!success) {
             OptionalMatch_t colon = Text$find(expected_text, Text(":"), I_small(1));
             if (colon.index.small) {
                 Text_t with_type = Text$concat(expr_plain, Text(" : "), type_name);
-                success = Text$equal(&with_type, &expected_text, &Text$info);
+                success = Text$equal_values(with_type, expected_text);
             }
         }
 
@@ -594,7 +594,7 @@ public bool pop_flag(char **argv, int *i, const char *flag, Text_t *result)
     if (argv[*i][0] != '-' || argv[*i][1] != '-') {
         return false;
     } else if (streq(argv[*i] + 2, flag)) {
-        *result = (Text_t){.length=0};
+        *result = EMPTY_TEXT;
         argv[*i] = NULL;
         *i += 1;
         return true;
diff --git a/stdlib/text.c b/stdlib/text.c
index e25e7bc..96ea02d 100644
--- a/stdlib/text.c
+++ b/stdlib/text.c
@@ -1,9 +1,15 @@
-// Type info and methods for Text datatype, which uses libunistr for Unicode
-// support and implements a datastructure based on Raku/MoarVM's strings to
-// efficiently store arbitrary unicode data using a mix of densely packed plain
-// ASCII, 32-bit integers representing grapheme clusters (see below), and ropes
-// that represent text that is a composite of multiple subtexts. Subtexts are
-// only nested one level deep, not arbitrarily deep trees.
+// This file defines type info and methods for the Text datatype, which uses
+// libunistr for Unicode support and implements a datastructure based on a
+// hybrid of Raku/MoarVM's space-efficient grapheme cluster representation of
+// strings and Cords (Boehm et al), which have good runtime performance for
+// text constructed by a series of many concatenations.
+//
+// For more information on MoarVM's grapheme cluster strings, see:
+//     https://docs.raku.org/language/unicode
+//     https://github.com/MoarVM/MoarVM/blob/main/docs/strings.asciidoc For more
+// information on Cords, see the paper "Ropes: an Alternative to Strings"
+// (Boehm, Atkinson, Plass 1995):
+//     https://www.cs.tufts.edu/comp/150FP/archive/hans-boehm/ropes.pdf
 //
 // A note on grapheme clusters: In Unicode, codepoints can be represented using
 // a 32-bit integer. Most codepoints correspond to the intuitive notion of a
@@ -20,8 +26,9 @@
 // that is not used frequently enough to warrant its own unique codepoint (this
 // is basically what Zalgo text is).
 //
-// There are a lot of benefits to storing text with one grapheme cluster per
-// index in a densely packed array. It lets us have one canonical length for
+// There are a lot of benefits to storing unicode text with one grapheme
+// cluster per index in a densely packed array instead of storing the text as
+// variable-width UTF8-encoded bytes. It lets us have one canonical length for
 // the text that can be precomputed and is meaningful to users. It lets us
 // quickly get the Nth "letter" in the text. Substring slicing is fast.
 // However, since not all grapheme clusters take up the same number of
@@ -38,14 +45,12 @@
 // out nicely because we're using them right now, and we'll give them a
 // negative number so it doesn't overlap with any real codepoints.
 //
-// Example 1: U+0048, U+00E9
-//    AKA: LATIN CAPITAL LETTER H, LATIN SMALL LETTER E WITH ACUTE
-//    This would be stored as: (int32_t[]){0x48, 0xE9}
-// Example 2: U+0048, U+0065, U+0309
-//    AKA: LATIN CAPITAL LETTER H, LATIN SMALL LETTER E, COMBINING VERTICAL LINE BELOW
-//    This would be stored as: (int32_t[]){0x48, -2}
-//    Where -2 is used as a lookup in an array that holds the actual unicode codepoints:
-//       (ucs4_t[]){0x65, 0x0309}
+// Example 1: U+0048, U+00E9 AKA: LATIN CAPITAL LETTER H, LATIN SMALL LETTER E
+// WITH ACUTE This would be stored as: (int32_t[]){0x48, 0xE9} Example 2:
+// U+0048, U+0065, U+0309 AKA: LATIN CAPITAL LETTER H, LATIN SMALL LETTER E,
+// COMBINING VERTICAL LINE BELOW This would be stored as: (int32_t[]){0x48, -2}
+// Where -2 is used as a lookup in an array that holds the actual unicode
+// codepoints: (ucs4_t[]){0x65, 0x0309}
 
 #include <assert.h>
 #include <ctype.h>
@@ -90,7 +95,20 @@ static int32_t num_synthetic_graphemes = 0;
 #define GRAPHEME_CODEPOINTS(id) (&synthetic_graphemes[-(id)-1].utf32_cluster[1])
 #define GRAPHEME_UTF8(id) (synthetic_graphemes[-(id)-1].utf8)
 
+// Somewhat arbitrarily chosen, if two short literal ASCII or grapheme chunks
+// are concatenated below this length threshold, we just merge them into a
+// single literal node instead of a concatenation node.
+#define SHORT_ASCII_LENGTH 64
+#define SHORT_GRAPHEMES_LENGTH 16
+
 static Text_t text_from_u32(ucs4_t *codepoints, int64_t num_codepoints, bool normalize);
+static Text_t simple_concatenation(Text_t a, Text_t b);
+
+public Text_t EMPTY_TEXT = {
+    .length=0,
+    .tag=TEXT_ASCII,
+    .ascii=0,
+};
 
 PUREFUNC static bool graphemes_equal(const void *va, const void *vb, const TypeInfo_t*) {
     ucs4_t *a = *(ucs4_t**)va;
@@ -138,7 +156,7 @@ public int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_le
     if (num_synthetic_graphemes >= synthetic_grapheme_capacity) {
         // If we don't have space, allocate more:
         synthetic_grapheme_capacity = MAX(128, synthetic_grapheme_capacity * 2);
-        synthetic_grapheme_t *new = GC_MALLOC(sizeof(synthetic_grapheme_t[synthetic_grapheme_capacity]));
+        synthetic_grapheme_t *new = GC_MALLOC_ATOMIC(sizeof(synthetic_grapheme_t[synthetic_grapheme_capacity]));
         memcpy(new, synthetic_graphemes, sizeof(synthetic_grapheme_t[num_synthetic_graphemes]));
         synthetic_graphemes = new;
     }
@@ -203,39 +221,29 @@ public int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_le
 }
 #pragma GCC diagnostic pop
 
-PUREFUNC static inline int64_t num_subtexts(Text_t t)
-{
-    if (t.tag != TEXT_SUBTEXT) return 1;
-    int64_t remaining = t.length;
-    int64_t subtexts = 0;
-    while (remaining > 0) {
-        remaining -= t.subtexts[subtexts].length;
-        ++subtexts;
-    }
-    return subtexts;
-}
-
-int text_visualize(FILE *stream, Text_t t)
+int text_visualize(FILE *stream, Text_t t, int depth)
 {
     switch (t.tag) {
-    case TEXT_SHORT_ASCII: return fprintf(stream, "<ascii length=%ld>%.*s</ascii>", t.length, t.length, t.short_ascii);
     case TEXT_ASCII: return fprintf(stream, "<ascii length=%ld>%.*s</ascii>", t.length, t.length, t.ascii);
-    case TEXT_GRAPHEMES: case TEXT_SHORT_GRAPHEMES: {
+    case TEXT_GRAPHEMES: {
         int printed = fprintf(stream, "<graphemes length=%ld>", t.length);
         printed += Text$print(stream, t);
         printed += fprintf(stream, "</graphemes>");
         return printed;
     }
-    case TEXT_SUBTEXT: {
-        int printed = fprintf(stream, "<text length=%ld>", t.length);
-        int64_t to_print = t.length;
-        for (int i = 0; to_print > 0; ++i) {
-            printed += fprintf(stream, "\n  ");
-            printed += text_visualize(stream, t.subtexts[i]);
-            to_print -= t.subtexts[i].length;
-            if (t.subtexts[i].length == 0) break;
-        }
-        printed += fprintf(stream, "\n</text>");
+    case TEXT_CONCAT: {
+        int printed = fprintf(stream, "<concat depth=%ld length=%ld>\n", t.depth, t.length);
+        for (int i = 0; i < depth+1; i++)
+            printed += fputc(' ', stream);
+        printed += text_visualize(stream, *t.left, depth+1);
+        printed += fputc('\n', stream);
+        for (int i = 0; i < depth+1; i++)
+            printed += fputc(' ', stream);
+        printed += text_visualize(stream, *t.right, depth+1);
+        printed += fputc('\n', stream);
+        for (int i = 0; i < depth; i++)
+            printed += fputc(' ', stream);
+        printed += fprintf(stream, "</concat>");
         return printed;
     }
     default: return 0;
@@ -247,10 +255,9 @@ public int Text$print(FILE *stream, Text_t t)
     if (t.length == 0) return 0;
 
     switch (t.tag) {
-    case TEXT_SHORT_ASCII: return fwrite(t.short_ascii, sizeof(char), (size_t)t.length, stream);
     case TEXT_ASCII: return fwrite(t.ascii, sizeof(char), (size_t)t.length, stream);
-    case TEXT_GRAPHEMES: case TEXT_SHORT_GRAPHEMES: {
-        const int32_t *graphemes = t.tag == TEXT_SHORT_GRAPHEMES ? t.short_graphemes : t.graphemes;
+    case TEXT_GRAPHEMES: {
+        const int32_t *graphemes = t.graphemes;
         int written = 0;
         for (int64_t i = 0; i < t.length; i++) {
             int32_t grapheme = graphemes[i];
@@ -268,12 +275,9 @@ public int Text$print(FILE *stream, Text_t t)
         }
         return written;
     }
-    case TEXT_SUBTEXT: {
-        int written = 0;
-        int i = 0;
-        for (int64_t to_print = t.length; to_print > 0; to_print -= t.subtexts[i].length, ++i)
-            written += Text$print(stream, t.subtexts[i]);
-        return written;
+    case TEXT_CONCAT: {
+        return (Text$print(stream, *t.left)
+                + Text$print(stream, *t.right));
     }
     default: return 0;
     }
@@ -314,52 +318,140 @@ static bool is_concat_stable(Text_t a, Text_t b)
     return (second_grapheme == &normalized[1]);
 }
 
+static const int64_t min_len_for_depth[MAX_TEXT_DEPTH] = {
+    // Fibonacci numbers (skipping first two)
+    1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, 10946,
+    17711, 28657, 46368, 75025, 121393, 196418, 317811, 514229, 832040, 1346269, 2178309, 3524578,
+    5702887, 9227465, 14930352, 24157817, 39088169, 63245986, 102334155, 165580141, 267914296,
+    433494437, 701408733, 1134903170, 1836311903, 2971215073, 4807526976, 7778742049,
+};
+
+#define IS_BALANCED_TEXT(t) ((t).length >= min_len_for_depth[(t).depth])
+
+static void insert_balanced(Text_t balanced_texts[MAX_TEXT_DEPTH], Text_t to_insert)
+{
+    int i = 0;
+    Text_t accumulator = EMPTY_TEXT;
+    for (; to_insert.length > min_len_for_depth[i + 1]; i++) {
+        if (balanced_texts[i].length) {
+            accumulator = simple_concatenation(balanced_texts[i], accumulator);
+            balanced_texts[i] = EMPTY_TEXT;
+        }
+    }
+
+    accumulator = simple_concatenation(accumulator, to_insert);
+
+    while (accumulator.length >= min_len_for_depth[i]) {
+        if (balanced_texts[i].length) {
+            accumulator = simple_concatenation(balanced_texts[i], accumulator);
+            balanced_texts[i] = EMPTY_TEXT;
+        }
+        i++;
+    }
+    i--;
+    balanced_texts[i] = accumulator;
+}
+
+static void insert_balanced_recursive(Text_t balanced_texts[MAX_TEXT_DEPTH], Text_t text)
+{
+    if (text.tag == TEXT_CONCAT && (!IS_BALANCED_TEXT(text) || text.depth >= MAX_TEXT_DEPTH)) {
+        insert_balanced_recursive(balanced_texts, *text.left);
+        insert_balanced_recursive(balanced_texts, *text.right);
+    } else {
+        insert_balanced(balanced_texts, text);
+    }
+}
+
+static Text_t rebalanced(Text_t a, Text_t b)
+{
+    Text_t balanced_texts[MAX_TEXT_DEPTH] = {};
+    insert_balanced_recursive(balanced_texts, a);
+    insert_balanced_recursive(balanced_texts, b);
+
+    Text_t ret = EMPTY_TEXT;
+    for (int i = 0; ret.length < a.length + b.length; i++) {
+        if (balanced_texts[i].length)
+            ret = simple_concatenation(balanced_texts[i], ret);
+    }
+    return ret;
+}
+
+Text_t simple_concatenation(Text_t a, Text_t b)
+{
+    if (a.length == 0) return b;
+    if (b.length == 0) return a;
+
+    uint16_t new_depth = 1 + MAX(a.depth, b.depth);
+    // Rebalance only if depth exceeds the maximum allowed. We don't require
+    // every concatenation to yield a balanced text, since many concatenations
+    // are ephemeral (e.g. doing a loop repeatedly concatenating without using
+    // the intermediary values).
+    if (new_depth >= MAX_TEXT_DEPTH)
+        return rebalanced(a, b);
+
+    Text_t *children = GC_MALLOC(sizeof(Text_t[2]));
+    children[0] = a;
+    children[1] = b;
+    return (Text_t){
+        .tag=TEXT_CONCAT,
+        .length=a.length + b.length,
+        .depth=new_depth,
+        .left=&children[0],
+        .right=&children[1],
+    };
+}
+
 static Text_t concat2_assuming_safe(Text_t a, Text_t b)
 {
     if (a.length == 0) return b;
     if (b.length == 0) return a;
 
-    if (a.tag == TEXT_SUBTEXT && b.tag == TEXT_SUBTEXT) {
-        int64_t na = num_subtexts(a);
-        int64_t nb = num_subtexts(b);
-        Text_t ret = {
+    if (a.tag == TEXT_ASCII && b.tag == TEXT_ASCII && (size_t)(a.length + b.length) <= SHORT_ASCII_LENGTH) {
+        struct Text_s ret = {
+            .tag=TEXT_ASCII,
             .length=a.length + b.length,
-            .tag=TEXT_SUBTEXT,
-            .subtexts=GC_MALLOC(sizeof(Text_t[na + nb])),
         };
-        memcpy(&ret.subtexts[0], a.subtexts, sizeof(Text_t[na]));
-        memcpy(&ret.subtexts[na], b.subtexts, sizeof(Text_t[nb]));
+        ret.ascii = GC_MALLOC_ATOMIC(sizeof(char[ret.length]));
+        memcpy((char*)ret.ascii, a.ascii, sizeof(char[a.length]));
+        memcpy((char*)&ret.ascii[a.length], b.ascii, sizeof(char[b.length]));
         return ret;
-    } else if (a.tag == TEXT_SUBTEXT) {
-        int64_t n = num_subtexts(a);
-        Text_t ret = {
+    } else if (a.tag == TEXT_GRAPHEMES && b.tag == TEXT_GRAPHEMES && (size_t)(a.length + b.length) <= SHORT_GRAPHEMES_LENGTH) {
+        struct Text_s ret = {
+            .tag=TEXT_GRAPHEMES,
             .length=a.length + b.length,
-            .tag=TEXT_SUBTEXT,
-            .subtexts=GC_MALLOC(sizeof(Text_t[n + 1])),
         };
-        memcpy(ret.subtexts, a.subtexts, sizeof(Text_t[n]));
-        ret.subtexts[n] = b;
+        ret.graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[ret.length]));
+        memcpy((int32_t*)ret.graphemes, a.graphemes, sizeof(int32_t[a.length]));
+        memcpy((int32_t*)&ret.graphemes[a.length], b.graphemes, sizeof(int32_t[b.length]));
         return ret;
-    } else if (b.tag == TEXT_SUBTEXT) {
-        int64_t n = num_subtexts(b);
-        Text_t ret = {
+    } else if (a.tag != TEXT_CONCAT && b.tag != TEXT_CONCAT && (size_t)(a.length + b.length) <= SHORT_GRAPHEMES_LENGTH) {
+        // Turn a small bit of ASCII into graphemes if it helps make things smaller
+        // Text structs come with an extra 8 bytes, so allocate enough to hold the text
+        struct Text_s ret = {
+            .tag=TEXT_GRAPHEMES,
             .length=a.length + b.length,
-            .tag=TEXT_SUBTEXT,
-            .subtexts=GC_MALLOC(sizeof(Text_t[n + 1])),
         };
-        ret.subtexts[0] = a;
-        memcpy(&ret.subtexts[1], b.subtexts, sizeof(Text_t[n]));
-        return ret;
-    } else {
-        Text_t ret = {
-            .length=a.length + b.length,
-            .tag=TEXT_SUBTEXT,
-            .subtexts=GC_MALLOC(sizeof(Text_t[2])),
-        };
-        ret.subtexts[0] = a;
-        ret.subtexts[1] = b;
+        ret.graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[ret.length]));
+        int32_t *dest = (int32_t*)ret.graphemes;
+        if (a.tag == TEXT_GRAPHEMES) {
+            dest = mempcpy(dest, a.graphemes, sizeof(int32_t[a.length]));
+        } else {
+            for (int64_t i = 0; i < a.length; i++)
+                *(dest++) = (int32_t)a.ascii[i];
+        }
+        if (b.tag == TEXT_GRAPHEMES) {
+            memcpy(dest, b.graphemes, sizeof(int32_t[b.length]));
+        } else {
+            for (int64_t i = 0; i < b.length; i++)
+                *(dest++) = (int32_t)b.ascii[i];
+        }
         return ret;
     }
+
+    if (a.tag == TEXT_CONCAT && b.tag != TEXT_CONCAT && a.right->tag != TEXT_CONCAT)
+        return concat2_assuming_safe(*a.left, concat2_assuming_safe(*a.right, b));
+
+    return simple_concatenation(a, b);
 }
 
 static Text_t concat2(Text_t a, Text_t b)
@@ -398,41 +490,12 @@ static Text_t concat2(Text_t a, Text_t b)
 
 public Text_t Text$_concat(int n, Text_t items[n])
 {
-    if (n == 0) return (Text_t){.length=0};
-    if (n == 1) return items[0];
-    if (n == 2) return concat2(items[0], items[1]);
+    if (n == 0) return EMPTY_TEXT;
 
-    int64_t subtexts = 0;
-    for (int i = 0; i < n; i++) {
+    Text_t ret = items[0];
+    for (int i = 1; i < n; i++) {
         if (items[i].length > 0)
-            subtexts += num_subtexts(items[i]);
-    }
-
-    Text_t ret = {
-        .length=0,
-        .tag=TEXT_SUBTEXT,
-        .subtexts=GC_MALLOC(sizeof(Text_t[subtexts])),
-    };
-    int64_t sub_i = 0;
-    for (int i = 0; i < n; i++) {
-        if (items[i].length == 0)
-            continue;
-
-        if (i > 0 && unlikely(!is_concat_stable(ret, items[i]))) {
-            // Oops, guess this wasn't stable for concatenation, let's break it
-            // up into subtasks:
-            return concat2(ret, Text$_concat(n-i, &items[i]));
-        }
-
-        if (items[i].tag == TEXT_SUBTEXT) {
-            for (int64_t j = 0, remainder = items[i].length; remainder > 0; j++) {
-                ret.subtexts[sub_i++] = items[i].subtexts[j];
-                remainder -= items[i].subtexts[j].length;
-            }
-        } else {
-            ret.subtexts[sub_i++] = items[i];
-        }
-        ret.length += items[i].length;
+            ret = concat2(ret, items[i]);
     }
     return ret;
 }
@@ -440,37 +503,17 @@ public Text_t Text$_concat(int n, Text_t items[n])
 public Text_t Text$repeat(Text_t text, Int_t count)
 {
     if (text.length == 0 || Int$is_negative(count))
-        return Text("");
+        return EMPTY_TEXT;
 
     Int_t result_len = Int$times(count, I(text.length));
     if (Int$compare_value(result_len, I(1l<<40)) > 0)
         fail("Text repeating would produce too big of an result!");
 
     int64_t count64 = Int_to_Int64(count, false);
-    if (text.tag == TEXT_SUBTEXT) {
-        int64_t subtexts = num_subtexts(text);
-        Text_t ret = {
-            .length=text.length * count64,
-            .tag=TEXT_SUBTEXT,
-            .subtexts=GC_MALLOC(sizeof(Text_t[subtexts * count64])),
-        };
-        for (int64_t c = 0; c < count64; c++) {
-            for (int64_t i = 0; i < subtexts; i++) {
-                if (text.subtexts[i].length > 0)
-                    ret.subtexts[c*subtexts + i] = text.subtexts[i];
-            }
-        }
-        return ret;
-    } else {
-        Text_t ret = {
-            .length=text.length * count64,
-            .tag=TEXT_SUBTEXT,
-            .subtexts=GC_MALLOC(sizeof(Text_t[count64])),
-        };
-        for (int64_t i = 0; i < count64; i++)
-            ret.subtexts[i] = text;
-        return ret;
-    }
+    Text_t ret = text;
+    for (int64_t c = 1; c < count64; c++)
+        ret = concat2(ret, text);
+    return ret;
 }
 
 public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int)
@@ -478,7 +521,7 @@ public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int)
     int64_t first = Int_to_Int64(first_int, false);
     int64_t last = Int_to_Int64(last_int, false);
     if (first == 0) fail("Invalid index: 0");
-    if (last == 0) return (Text_t){.length=0};
+    if (last == 0) return EMPTY_TEXT;
 
     if (first < 0) first = text.length + first + 1;
     if (last < 0) last = text.length + last + 1;
@@ -486,77 +529,38 @@ public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int)
     if (last > text.length) last = text.length;
 
     if (first > text.length || last < first)
-        return (Text_t){.length=0};
+        return EMPTY_TEXT;
 
     if (first == 1 && last == text.length)
         return text;
 
-    switch (text.tag) {
-    case TEXT_SHORT_ASCII: {
-        Text_t ret = (Text_t) {
-            .tag=TEXT_SHORT_ASCII,
-            .length=last - first + 1,
-        };
-        memcpy(ret.short_ascii, text.short_ascii + (first-1), (size_t)ret.length);
-        return ret;
+    while (text.tag == TEXT_CONCAT) {
+        if (last < text.left->length) {
+            text = *text.left;
+        } else if (first > text.left->length) {
+            first -= text.left->length;
+            last -= text.left->length;
+            text = *text.right;
+        } else {
+            return concat2(Text$slice(*text.left, I(first), I(text.length)),
+                           Text$slice(*text.right, I(1), I(last-text.left->length)));
+        }
     }
+
+    switch (text.tag) {
     case TEXT_ASCII: {
-        Text_t ret = {
+        return (Text_t){
             .tag=TEXT_ASCII,
             .length=last - first + 1,
             .ascii=text.ascii + (first-1),
         };
-        return ret;
-    }
-    case TEXT_SHORT_GRAPHEMES: {
-        assert((first == 1 && last == 1) || (first == 2 && last == 2));
-        Text_t ret = {
-            .tag=TEXT_SHORT_GRAPHEMES,
-            .length=1,
-            .short_graphemes={text.short_graphemes[first-1]},
-        };
-        return ret;
     }
     case TEXT_GRAPHEMES: {
-        Text_t ret = {
+        return (Text_t){
             .tag=TEXT_GRAPHEMES,
             .length=last - first + 1,
             .graphemes=text.graphemes + (first-1),
         };
-        return ret;
-    }
-    case TEXT_SUBTEXT: {
-        Text_t *subtexts = text.subtexts;
-        while (first > subtexts[0].length) {
-            first -= subtexts[0].length;
-            last -= subtexts[0].length;
-            ++subtexts;
-        }
-
-        int64_t needed_len = (last - first) + 1;
-        int64_t num_subtexts = 0;
-        for (int64_t included = 0; included < needed_len; ) {
-            if (included == 0)
-                included += subtexts[num_subtexts].length - first + 1;
-            else
-                included += subtexts[num_subtexts].length;
-            num_subtexts += 1;
-        }
-        if (num_subtexts == 1)
-            return Text$slice(subtexts[0], I(first), I(last));
-
-        Text_t ret = {
-            .length=needed_len,
-            .tag=TEXT_SUBTEXT,
-            .subtexts=GC_MALLOC(sizeof(Text_t[num_subtexts])),
-        };
-        for (int64_t i = 0; i < num_subtexts; i++) {
-            ret.subtexts[i] = Text$slice(subtexts[i], I(first), I(last));
-            first = 1;
-            needed_len -= ret.subtexts[i].length;
-            last = first + needed_len - 1;
-        }
-        return ret;
     }
     default: errx(1, "Invalid tag");
     }
@@ -575,45 +579,34 @@ public Text_t Text$to(Text_t text, Int_t last)
 public Text_t Text$reversed(Text_t text)
 {
     switch (text.tag) {
-    case TEXT_SHORT_ASCII: {
-        Text_t ret = text;
-        for (int64_t i = 0; i < text.length; i++)
-            ret.short_ascii[text.length-1-i] = text.short_ascii[i];
-        return ret;
-    }
     case TEXT_ASCII: {
-        Text_t ret = text;
-        ret.ascii = GC_MALLOC_ATOMIC(sizeof(uint8_t[text.length]));
+        struct Text_s ret = {
+            .tag=TEXT_ASCII,
+            .length=text.length,
+        };
+        ret.ascii = GC_MALLOC_ATOMIC(sizeof(char[ret.length]));
         for (int64_t i = 0; i < text.length; i++)
             ((char*)ret.ascii)[text.length-1-i] = text.ascii[i];
         return ret;
     }
-    case TEXT_SHORT_GRAPHEMES: {
-        Text_t ret = text;
-        for (int64_t i = 0; i < text.length; i++)
-            ret.short_graphemes[text.length-1-i] = text.short_graphemes[i];
-        return ret;
-    }
     case TEXT_GRAPHEMES: {
-        Text_t ret = text;
-        ret.graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[text.length]));
+        struct Text_s ret = {
+            .tag=TEXT_GRAPHEMES,
+            .length=text.length,
+        };
+        ret.graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[ret.length]));
         for (int64_t i = 0; i < text.length; i++)
             ((int32_t*)ret.graphemes)[text.length-1-i] = text.graphemes[i];
         return ret;
     }
-    case TEXT_SUBTEXT: {
-        Text_t ret = text;
-        int64_t n = num_subtexts(text);
-        ret.subtexts = GC_MALLOC(sizeof(Text_t*[n]));
-        for (int64_t i = 0; i < n; i++)
-            ret.subtexts[n-1-i] = Text$reversed(text.subtexts[i]);
-        return ret;
+    case TEXT_CONCAT: {
+        return concat2(Text$reversed(*text.right), Text$reversed(*text.left));
     }
     default: errx(1, "Invalid tag");
     }
 }
 
-public Text_t Text$cluster(Text_t text, Int_t index_int)
+public PUREFUNC Text_t Text$cluster(Text_t text, Int_t index_int)
 {
     int64_t index = Int_to_Int64(index_int, false);
     if (index == 0) fail("Invalid index: 0");
@@ -624,42 +617,31 @@ public Text_t Text$cluster(Text_t text, Int_t index_int)
         fail("Invalid index: %ld is beyond the length of the text (length = %ld)",
              Int_to_Int64(index_int, false), text.length);
 
+    while (text.tag == TEXT_CONCAT) {
+        if (index <= text.left->length)
+            text = *text.left;
+        else
+            text = *text.right;
+    }
+
     switch (text.tag) {
-    case TEXT_SHORT_ASCII: {
-        return (Text_t) {
-            .tag=TEXT_SHORT_ASCII,
-            .length=1,
-            .short_ascii={text.short_ascii[index-1]},
-        };
-    }
     case TEXT_ASCII: {
-        return (Text_t) {
-            .tag=TEXT_SHORT_ASCII,
+        struct Text_s ret = {
+            .tag=TEXT_ASCII,
             .length=1,
-            .short_ascii={text.ascii[index-1]},
-        };
-    }
-    case TEXT_SHORT_GRAPHEMES: {
-        return (Text_t) {
-            .tag=TEXT_SHORT_GRAPHEMES,
-            .length=1,
-            .short_graphemes={text.short_graphemes[index-1]},
+            .ascii=GC_MALLOC_ATOMIC(sizeof(char)),
         };
+        *(char*)&ret.ascii[0] = text.ascii[index-1];
+        return ret;
     }
     case TEXT_GRAPHEMES: {
-        return (Text_t) {
-            .tag=TEXT_SHORT_GRAPHEMES,
+        struct Text_s ret = {
+            .tag=TEXT_GRAPHEMES,
             .length=1,
-            .short_graphemes={text.graphemes[index-1]},
+            .graphemes=GC_MALLOC_ATOMIC(sizeof(int32_t)),
         };
-    }
-    case TEXT_SUBTEXT: {
-        Text_t *subtext = text.subtexts;
-        while (index > subtext[0].length) {
-            index -= subtext[0].length;
-            ++subtext;
-        }
-        return Text$cluster(*subtext, I(index));
+        *(int32_t*)&ret.graphemes[0] = text.graphemes[index-1];
+        return ret;
     }
     default: errx(1, "Invalid tag");
     }
@@ -676,25 +658,18 @@ Text_t text_from_u32(ucs4_t *codepoints, int64_t num_codepoints, bool normalize)
         num_codepoints = (int64_t)norm_length;
     }
 
-    // char breaks[num_codepoints];
-    // u32_grapheme_breaks(codepoints, num_codepoints, breaks);
-
-    Text_t ret = {
+    // Intentionally overallocate here: allocate assuming each codepoint is a
+    // grapheme cluster. If that's not true, we'll have extra space at the end
+    // of the array, but the length will still be calculated correctly.
+    int32_t *graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[num_codepoints]));
+    struct Text_s ret = {
+        .tag=TEXT_GRAPHEMES,
         .length=0,
-        .tag=TEXT_SHORT_GRAPHEMES,
+        .graphemes=graphemes,
     };
     const ucs4_t *src = codepoints;
-    int32_t *graphemes = ret.short_graphemes;
     while (src < &codepoints[num_codepoints]) {
-        if (ret.tag == TEXT_SHORT_GRAPHEMES && ret.length + 1 > 2) {
-            graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[num_codepoints])); // May be a slight overallocation
-            graphemes[0] = ret.short_graphemes[0];
-            graphemes[1] = ret.short_graphemes[1];
-            ret.tag = TEXT_GRAPHEMES;
-            ret.graphemes = graphemes;
-        }
-
-        // TODO: use grapheme breaks instead of u32_grapheme_next()
+        // TODO: use grapheme breaks instead of u32_grapheme_next()?
         const ucs4_t *next = u32_grapheme_next(src, &codepoints[num_codepoints]);
         if (next == &src[1]) {
             graphemes[ret.length] = (int32_t)*src;
@@ -716,16 +691,13 @@ public OptionalText_t Text$from_strn(const char *str, size_t len)
         ascii_span++;
 
     if (ascii_span == (int64_t)len) { // All ASCII
-        Text_t ret = {.length=ascii_span};
-        if (ascii_span <= 8) {
-            ret.tag = TEXT_SHORT_ASCII;
-            for (int64_t i = 0; i < ascii_span; i++)
-                ret.short_ascii[i] = str[i];
-        } else {
-            ret.tag = TEXT_ASCII;
-            ret.ascii = str;
-        }
-        return ret;
+        char *copy = GC_MALLOC_ATOMIC(len);
+        memcpy(copy, str, len);
+        return (Text_t){
+            .tag=TEXT_ASCII,
+            .length=ascii_span,
+            .ascii=copy,
+        };
     } else {
         if (u8_check((uint8_t*)str, len) != NULL)
             return NONE_TEXT;
@@ -748,19 +720,19 @@ public OptionalText_t Text$from_str(const char *str)
 static void u8_buf_append(Text_t text, char **buf, int64_t *capacity, int64_t *i)
 {
     switch (text.tag) {
-    case TEXT_ASCII: case TEXT_SHORT_ASCII: {
+    case TEXT_ASCII: {
         if (*i + text.length > (int64_t)*capacity) {
             *capacity = *i + text.length + 1;
             *buf = GC_REALLOC(*buf, (size_t)*capacity);
         }
 
-        const char *bytes = text.tag == TEXT_ASCII ? text.ascii : text.short_ascii;
+        const char *bytes = text.ascii;
         memcpy(*buf + *i, bytes, (size_t)text.length);
         *i += text.length;
         break;
     }
-    case TEXT_GRAPHEMES: case TEXT_SHORT_GRAPHEMES: {
-        const int32_t *graphemes = text.tag == TEXT_GRAPHEMES ? text.graphemes : text.short_graphemes;
+    case TEXT_GRAPHEMES: {
+        const int32_t *graphemes = text.graphemes;
         for (int64_t g = 0; g < text.length; g++) {
             if (graphemes[g] >= 0) {
                 uint8_t u8_buf[64];
@@ -789,11 +761,9 @@ static void u8_buf_append(Text_t text, char **buf, int64_t *capacity, int64_t *i
         }
         break;
     }
-    case TEXT_SUBTEXT: {
-        for (int64_t s = 0, remaining = text.length; remaining > 0; s++) {
-            u8_buf_append(text.subtexts[s], buf, capacity, i);
-            remaining -= text.subtexts[s].length;
-        }
+    case TEXT_CONCAT: {
+        u8_buf_append(*text.left, buf, capacity, i);
+        u8_buf_append(*text.right, buf, capacity, i);
         break;
     }
     default: break;
@@ -817,135 +787,95 @@ public char *Text$as_c_string(Text_t text)
 
 PUREFUNC public uint64_t Text$hash(const void *obj, const TypeInfo_t*)
 {
-    Text_t *text = (Text_t*)obj;
-    if (text->hash != 0) return text->hash;
+    Text_t text = *(Text_t*)obj;
     siphash sh;
-    siphashinit(&sh, sizeof(int32_t[text->length]));
+    siphashinit(&sh, sizeof(int32_t[text.length]));
 
     union {
         int32_t chunks[2];
         uint64_t whole;
     } tmp;
-    switch (text->tag) {
-    case TEXT_ASCII: case TEXT_SHORT_ASCII: {
-        const char *bytes = text->tag == TEXT_ASCII ? text->ascii : text->short_ascii;
-        for (int64_t i = 0; i + 1 < text->length; i += 2) {
+    switch (text.tag) {
+    case TEXT_ASCII: {
+        const char *bytes = text.ascii;
+        for (int64_t i = 0; i + 1 < text.length; i += 2) {
             tmp.chunks[0] = (int32_t)bytes[i];
             tmp.chunks[1] = (int32_t)bytes[i+1];
             siphashadd64bits(&sh, tmp.whole);
         }
-        int32_t last = text->length & 0x1 ? (int32_t)bytes[text->length-1] : 0; // Odd number of graphemes
-        text->hash = siphashfinish_last_part(&sh, (uint64_t)last);
-        break;
+        int32_t last = text.length & 0x1 ? (int32_t)bytes[text.length-1] : 0; // Odd number of graphemes
+        return siphashfinish_last_part(&sh, (uint64_t)last);
     }
     case TEXT_GRAPHEMES: {
-        const int32_t *graphemes = text->graphemes;
-        for (int64_t i = 0; i + 1 < text->length; i += 2) {
+        const int32_t *graphemes = text.graphemes;
+        for (int64_t i = 0; i + 1 < text.length; i += 2) {
             tmp.chunks[0] = graphemes[i];
             tmp.chunks[1] = graphemes[i];
             siphashadd64bits(&sh, tmp.whole);
         }
-        int32_t last = text->length & 0x1 ? graphemes[text->length-1] : 0; // Odd number of graphemes
-        text->hash = siphashfinish_last_part(&sh, (uint64_t)last);
-        break;
+        int32_t last = text.length & 0x1 ? graphemes[text.length-1] : 0; // Odd number of graphemes
+        return siphashfinish_last_part(&sh, (uint64_t)last);
     }
-    case TEXT_SHORT_GRAPHEMES: {
-        tmp.chunks[0] = text->short_graphemes[0];
-        if (text->length > 1)
-            tmp.chunks[1] = text->short_graphemes[1];
-        text->hash = siphashfinish_last_part(&sh, (uint64_t)tmp.whole);
-        break;
-    }
-    case TEXT_SUBTEXT: {
-        int32_t leftover = 0;
-        for (int64_t sub_i = 0, to_hash = text->length; to_hash > 0; ) {
-            Text_t subtext = text->subtexts[sub_i];
-            if (subtext.tag == TEXT_ASCII || subtext.tag == TEXT_SHORT_ASCII) {
-                const char *bytes = subtext.tag == TEXT_ASCII ? subtext.ascii : subtext.short_ascii;
-                int64_t grapheme = 0;
-                if (leftover) {
-                    tmp.chunks[0] = leftover;
-                    tmp.chunks[1] = (int32_t)bytes[0];
-                    siphashadd64bits(&sh, tmp.whole);
-                    grapheme += 1;
-                }
-                for (; grapheme + 1 < subtext.length; grapheme += 2) {
-                    tmp.chunks[0] = (int32_t)bytes[grapheme];
-                    tmp.chunks[1] = (int32_t)bytes[grapheme+1];
-                    siphashadd64bits(&sh, tmp.whole);
-                }
-                leftover = grapheme < subtext.length ? (int32_t)bytes[grapheme] : 0;
-            } else if (subtext.tag == TEXT_SHORT_GRAPHEMES) {
-                if (leftover) {
-                    tmp.chunks[0] = leftover;
-                    tmp.chunks[1] = subtext.short_graphemes[0];
-                    siphashadd64bits(&sh, tmp.whole);
-                    leftover = subtext.length > 1 ? subtext.short_graphemes[1] : 0;
-                } else if (subtext.length == 1) {
-                    leftover = subtext.short_graphemes[0];
-                } else {
-                    tmp.chunks[0] = subtext.short_graphemes[0];
-                    tmp.chunks[1] = subtext.short_graphemes[1];
-                    siphashadd64bits(&sh, tmp.whole);
-                }
-            } else if (subtext.tag == TEXT_GRAPHEMES) {
-                const int32_t *graphemes = subtext.graphemes;
-                int64_t grapheme = 0;
-                if (leftover) {
-                    tmp.chunks[0] = leftover;
-                    tmp.chunks[1] = graphemes[0];
-                    siphashadd64bits(&sh, tmp.whole);
-                    grapheme += 1;
-                }
-                for (; grapheme + 1 < subtext.length; grapheme += 2) {
-                    tmp.chunks[0] = graphemes[grapheme];
-                    tmp.chunks[1] = graphemes[grapheme+1];
-                    siphashadd64bits(&sh, tmp.whole);
-                }
-                leftover = grapheme < subtext.length ? graphemes[grapheme] : 0;
-            }
-        
-            to_hash -= text->subtexts[sub_i].length;
-
-            ++sub_i;
+    case TEXT_CONCAT: {
+        TextIter_t state = NEW_TEXT_ITER_STATE(text);
+        for (int64_t i = 0; i < (text.length & ~0x1); i += 2) {
+            tmp.chunks[0] = Text$get_grapheme_fast(&state, i);
+            tmp.chunks[0] = Text$get_grapheme_fast(&state, i+1);
+            siphashadd64bits(&sh, tmp.whole);
         }
 
-        text->hash = siphashfinish_last_part(&sh, (uint64_t)leftover);
-        break;
+        int32_t last = (text.length & 0x1) ? Text$get_grapheme_fast(&state, text.length-1) : 0;
+        return siphashfinish_last_part(&sh, (uint64_t)last);
     }
     default: errx(1, "Invalid text");
     }
-
-    if (text->hash == 0)
-        text->hash = 1;
-
-    return text->hash;
 }
 
 public int32_t Text$get_grapheme_fast(TextIter_t *state, int64_t index)
 {
-    Text_t text = state->text;
-    switch (text.tag) {
-    case TEXT_ASCII: return index < text.length ? (int32_t)text.ascii[index] : 0;
-    case TEXT_SHORT_ASCII: return index < text.length ? (int32_t)text.short_ascii[index] : 0;
-    case TEXT_GRAPHEMES: return index < text.length ? text.graphemes[index] : 0;
-    case TEXT_SHORT_GRAPHEMES: return index < text.length ? text.short_graphemes[index] : 0;
-    case TEXT_SUBTEXT: {
-        if (index < 0 || index >= text.length)
-            return 0;
+    if (index < 0) return 0;
+    if (index >= state->stack[0].text.length) return 0;
 
-        while (index < state->sum_of_previous_subtexts && state->subtext > 0) {
-            state->sum_of_previous_subtexts -= text.subtexts[state->subtext].length;
-            state->subtext -= 1;
-        }
-        for (;;) {
-            if (index < state->sum_of_previous_subtexts + text.subtexts[state->subtext].length)
-                return Text$get_grapheme(text.subtexts[state->subtext], index - state->sum_of_previous_subtexts);
-            state->sum_of_previous_subtexts += text.subtexts[state->subtext].length;
-            state->subtext += 1;
+    assert(state->stack[0].text.depth <= MAX_TEXT_DEPTH);
+
+    // Go up the stack as needed:
+    while (index < state->stack[state->stack_index].offset
+           || index >= state->stack[state->stack_index].offset + state->stack[state->stack_index].text.length) {
+        state->stack_index -= 1;
+        assert(state->stack_index >= 0);
+    }
+
+    assert(state->stack_index >= 0 && state->stack_index <= MAX_TEXT_DEPTH);
+
+    // Go down the stack as needed:
+    while (state->stack[state->stack_index].text.tag == TEXT_CONCAT) {
+        Text_t text = state->stack[state->stack_index].text;
+        int64_t offset = state->stack[state->stack_index].offset;
+        assert(state->stack_index <= MAX_TEXT_DEPTH);
+        assert(index >= offset);
+        assert(index < offset + text.length);
+
+        state->stack_index += 1;
+        if (index < offset + text.left->length) {
+            state->stack[state->stack_index].text = *text.left;
+            state->stack[state->stack_index].offset = offset;
+        } else {
+            state->stack[state->stack_index].text = *text.right;
+            state->stack[state->stack_index].offset = offset + text.left->length;
         }
+        assert(state->stack_index >= 0 && state->stack_index <= MAX_TEXT_DEPTH);
+    }
+
+    Text_t text = state->stack[state->stack_index].text;
+    int64_t offset = state->stack[state->stack_index].offset;
+
+    if (index < offset || index >= offset + text.length) {
         return 0;
     }
+
+    switch (text.tag) {
+    case TEXT_ASCII: return (int32_t)text.ascii[index - offset];
+    case TEXT_GRAPHEMES: return text.graphemes[index - offset];
     default: errx(1, "Invalid text");
     }
     return 0;
@@ -960,11 +890,12 @@ public uint32_t Text$get_main_grapheme_fast(TextIter_t *state, int64_t index)
 PUREFUNC public int32_t Text$compare(const void *va, const void *vb, const TypeInfo_t*)
 {
     if (va == vb) return 0;
-    const Text_t *a = (const Text_t*)va;
-    const Text_t *b = (const Text_t*)vb;
+    const Text_t a = *(const Text_t*)va;
+    const Text_t b = *(const Text_t*)vb;
 
-    int64_t len = MAX(a->length, b->length);
-    TextIter_t a_state = {*a, 0, 0}, b_state = {*b, 0, 0};
+    // TODO: make this smarter and more efficient
+    int64_t len = MAX(a.length, b.length);
+    TextIter_t a_state = NEW_TEXT_ITER_STATE(a), b_state = NEW_TEXT_ITER_STATE(b);
     for (int64_t i = 0; i < len; i++) {
         int32_t ai = Text$get_grapheme_fast(&a_state, i);
         int32_t bi = Text$get_grapheme_fast(&b_state, i);
@@ -998,7 +929,7 @@ PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix)
 {
     if (text.length < prefix.length)
         return false;
-    TextIter_t text_state = {text, 0, 0}, prefix_state = {prefix, 0, 0};
+    TextIter_t text_state = NEW_TEXT_ITER_STATE(text), prefix_state = NEW_TEXT_ITER_STATE(prefix);
     for (int64_t i = 0; i < prefix.length; i++) {
         int32_t text_i = Text$get_grapheme_fast(&text_state, i);
         int32_t prefix_i = Text$get_grapheme_fast(&prefix_state, i);
@@ -1011,7 +942,7 @@ PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix)
 {
     if (text.length < suffix.length)
         return false;
-    TextIter_t text_state = {text, 0, 0}, suffix_state = {suffix, 0, 0};
+    TextIter_t text_state = NEW_TEXT_ITER_STATE(text), suffix_state = NEW_TEXT_ITER_STATE(suffix);
     for (int64_t i = 0; i < suffix.length; i++) {
         int32_t text_i = Text$get_grapheme_fast(&text_state, text.length - suffix.length + i);
         int32_t suffix_i = Text$get_grapheme_fast(&suffix_state, i);
@@ -1022,10 +953,11 @@ PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix)
 
 PUREFUNC public bool Text$equal_values(Text_t a, Text_t b)
 {
-    if (a.length != b.length || (a.hash != 0 && b.hash != 0 && a.hash != b.hash))
+    if (a.length != b.length)
         return false;
     int64_t len = a.length;
-    TextIter_t a_state = {a, 0, 0}, b_state = {b, 0, 0};
+    TextIter_t a_state = NEW_TEXT_ITER_STATE(a), b_state = NEW_TEXT_ITER_STATE(b);
+    // TODO: make this smarter and more efficient
     for (int64_t i = 0; i < len; i++) {
         int32_t ai = Text$get_grapheme_fast(&a_state, i);
         int32_t bi = Text$get_grapheme_fast(&b_state, i);
@@ -1045,7 +977,7 @@ PUREFUNC public bool Text$equal_ignoring_case(Text_t a, Text_t b)
     if (a.length != b.length)
         return false;
     int64_t len = a.length;
-    TextIter_t a_state = {a, 0, 0}, b_state = {b, 0, 0};
+    TextIter_t a_state = NEW_TEXT_ITER_STATE(a), b_state = NEW_TEXT_ITER_STATE(b);
     const char *language = uc_locale_language();
     for (int64_t i = 0; i < len; i++) {
         int32_t ai = Text$get_grapheme_fast(&a_state, i);
@@ -1110,37 +1042,36 @@ public int printf_text_size(const struct printf_info *info, size_t n, int argtyp
     if (n < 1) return -1;
     (void)info;
     argtypes[0] = PA_POINTER;
-    sizes[0] = sizeof(Text_t*);
+    sizes[0] = sizeof(Text_t);
     return 1;
 }
 
 public int printf_text(FILE *stream, const struct printf_info *info, const void *const args[])
 {
-    Text_t t = **(Text_t**)args[0];
+    Text_t *t = *(Text_t**)args[0];
     if (info->alt)
-        return text_visualize(stream, t);
+        return text_visualize(stream, *t, 0);
     else
-        return Text$print(stream, t);
+        return Text$print(stream, *t);
 }
 
 static INLINE Text_t _quoted(Text_t text, bool colorize, char quote_char)
 {
-    // TODO: optimize for ASCII and short strings
-    Array_t graphemes = {.atomic=1};
-#define add_char(c) Array$insert_value(&graphemes, (ucs4_t)c, I_small(0), sizeof(ucs4_t))
-#define add_str(s) ({ for (const char *_c = s; *_c; ++_c) Array$insert_value(&graphemes, (ucs4_t)*_c, I_small(0), sizeof(ucs4_t)); })
-    if (colorize)
-        add_str("\x1b[35m");
+    Text_t ret = colorize ? Text("\x1b[35m") : EMPTY_TEXT;
     if (quote_char != '"' && quote_char != '\'' && quote_char != '`')
-        add_char('$');
-    add_char(quote_char);
+        ret = concat2_assuming_safe(ret, Text("$"));
 
-#define add_escaped(str) ({ if (colorize) add_str("\x1b[34;1m"); \
-                          if (!just_escaped) add_char('$'); \
-                          add_char('\\'); add_str(str); just_escaped = true; \
-                          if (colorize) add_str("\x1b[0;35m"); })
-    TextIter_t state = {text, 0, 0};
+    Text_t quote_text = Text$from_strn(&quote_char, 1);
+    ret = concat2_assuming_safe(ret, quote_text);
+
+#define add_escaped(str) ({ if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[34;1m")); \
+                          if (!just_escaped) ret = concat2_assuming_safe(ret, Text("$")); \
+                          ret = concat2_assuming_safe(ret, Text("\\" str)); \
+                          just_escaped = true; \
+                          if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[0;35m")); })
+    TextIter_t state = NEW_TEXT_ITER_STATE(text);
     bool just_escaped = false;
+    // TODO: optimize for spans of non-escaped text
     for (int64_t i = 0; i < text.length; i++) {
         int32_t g = Text$get_grapheme_fast(&state, i);
         switch (g) {
@@ -1156,14 +1087,14 @@ static INLINE Text_t _quoted(Text_t text, bool colorize, char quote_char)
             if (just_escaped) {
                 add_escaped("\\");
             } else {
-                add_char('\\');
+                ret = concat2_assuming_safe(ret, Text("\\"));
                 just_escaped = false;
             }
             break;
         }
         case '$': {
             if (quote_char == '\'') {
-                add_char('$');
+                ret = concat2_assuming_safe(ret, Text("$"));
                 just_escaped = false;
             } else {
                 add_escaped("$");
@@ -1172,57 +1103,57 @@ static INLINE Text_t _quoted(Text_t text, bool colorize, char quote_char)
         }
         case '\x00' ... '\x06': case '\x0E' ... '\x1A':
         case '\x1C' ... '\x1F': case '\x7F' ... '\x7F': {
-            if (colorize) add_str("\x1b[34;1m");
-            add_char('\\');
-            add_char('x');
-            char tmp[4];
+            if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[34;1m"));
+            ret = concat2_assuming_safe(ret, Text("\\x"));
+            char tmp[2];
             sprintf(tmp, "%02X", g);
-            add_str(tmp);
+            ret = concat2_assuming_safe(ret, Text$from_strn(tmp, 2));
             if (colorize)
-                add_str("\x1b[0;35m");
+                ret = concat2_assuming_safe(ret, Text("\x1b[0;35m"));
             just_escaped = true;
             break;
         }
         default: {
             if (g == quote_char) {
-                add_escaped(((char[2]){quote_char, 0}));
+                ret = concat2_assuming_safe(ret, quote_text);
             } else {
-               add_char(g);
-               just_escaped = false;
+                ret = concat2_assuming_safe(ret, Text$slice(text, I(i+1), I(i+1)));
+                just_escaped = false;
             }
             break;
         }
         }
     }
-
-    add_char(quote_char);
-    if (colorize)
-        add_str("\x1b[m");
-
-    return (Text_t){.length=graphemes.length, .tag=TEXT_GRAPHEMES, .graphemes=graphemes.data};
-#undef add_str
-#undef add_char
 #undef add_escaped
+
+    ret = concat2_assuming_safe(ret, quote_text);
+    if (colorize)
+        ret = concat2_assuming_safe(ret, Text("\x1b[m"));
+
+    return ret;
 }
 
-public Text_t Text$as_text(const void *text, bool colorize, const TypeInfo_t *info)
+public Text_t Text$as_text(const void *vtext, bool colorize, const TypeInfo_t *info)
 {
     (void)info;
     if (info->TextInfo.lang && streq(info->TextInfo.lang, "Path")) {
-        if (!text) return Text("Path");
-        return Text$format("(%s%k%s)", colorize ? "\x1b[35m" : "", text, colorize ? "\x1b[m" : "");
+        if (!vtext) return Text("Path");
+        Text_t text = *(Text_t*)vtext;
+        return Text$format("(%s%k%s)", colorize ? "\x1b[35m" : "", &text, colorize ? "\x1b[m" : "");
     }
 
-    if (!text) return info && info->TextInfo.lang ? Text$from_str(info->TextInfo.lang) : Text("Text");
+    if (!vtext) return info && info->TextInfo.lang ? Text$from_str(info->TextInfo.lang) : Text("Text");
+
+    Text_t text = *(Text_t*)vtext;
     char quote_char;
     if (info == &Pattern$info) {
-        quote_char = Text$has(*(Text_t*)text, Pattern("/")) && !Text$has(*(Text_t*)text, Pattern("|")) ? '|' : '/';
+        quote_char = Text$has(text, Pattern("/")) && !Text$has(text, Pattern("|")) ? '|' : '/';
     } else {
         // Figure out the best quotation mark to use:
         bool has_dollar = false, has_double_quote = false, has_backtick = false,
              has_single_quote = false, needs_escapes = false;
-        TextIter_t state = {*(Text_t*)text, 0, 0};
-        for (int64_t i = 0; i < state.text.length; i++) {
+        TextIter_t state = NEW_TEXT_ITER_STATE(text);
+        for (int64_t i = 0; i < text.length; i++) {
             int32_t g = Text$get_grapheme_fast(&state, i);
             if (g == '$') {
                 has_dollar = true;
@@ -1250,7 +1181,7 @@ public Text_t Text$as_text(const void *text, bool colorize, const TypeInfo_t *in
             quote_char = '"';
     }
 
-    Text_t as_text = _quoted(*(Text_t*)text, colorize, quote_char);
+    Text_t as_text = _quoted(text, colorize, quote_char);
     if (info && info->TextInfo.lang && info != &Text$info && info != &Pattern$info)
         as_text = Text$concat(
             colorize ? Text("\x1b[1m$") : Text("$"),
@@ -1267,7 +1198,7 @@ public Text_t Text$quoted(Text_t text, bool colorize)
 
 public Text_t Text$join(Text_t glue, Array_t pieces)
 {
-    if (pieces.length == 0) return (Text_t){.length=0};
+    if (pieces.length == 0) return EMPTY_TEXT;
 
     Text_t result = *(Text_t*)pieces.data;
     for (int64_t i = 1; i < pieces.length; i++) {
@@ -1284,19 +1215,9 @@ public Text_t Text$format(const char *fmt, ...)
 
     char buf[9];
     int len = vsnprintf(buf, sizeof(buf), fmt, args);
-    Text_t ret;
-    if (len <= 8) {
-        ret = (Text_t){
-            .length=len,
-            .tag=TEXT_SHORT_ASCII,
-        };
-        for (int i = 0; i < len; i++)
-            ret.short_ascii[i] = buf[i];
-    } else {
-        char *str = GC_MALLOC_ATOMIC((size_t)(len+1));
-        vsnprintf(str, (size_t)(len+1), fmt, args);
-        ret = Text$from_str(str);
-    }
+    char *str = GC_MALLOC_ATOMIC((size_t)(len+1));
+    vsnprintf(str, (size_t)(len+1), fmt, args);
+    Text_t ret = Text$from_str(str);
     va_end(args);
     return ret;
 }
@@ -1314,7 +1235,7 @@ public Array_t Text$clusters(Text_t text)
 public Array_t Text$utf32_codepoints(Text_t text)
 {
     Array_t codepoints = {.atomic=1};
-    TextIter_t state = {text, 0, 0};
+    TextIter_t state = NEW_TEXT_ITER_STATE(text);
     for (int64_t i = 0; i < text.length; i++) {
         int32_t grapheme = Text$get_grapheme_fast(&state, i);
         if (grapheme < 0) {
@@ -1349,18 +1270,18 @@ static INLINE const char *codepoint_name(ucs4_t c)
 public Array_t Text$codepoint_names(Text_t text)
 {
     Array_t names = {};
-    TextIter_t state = {text, 0, 0};
+    TextIter_t state = NEW_TEXT_ITER_STATE(text);
     for (int64_t i = 0; i < text.length; i++) {
         int32_t grapheme = Text$get_grapheme_fast(&state, i);
         if (grapheme < 0) {
             for (int64_t c = 0; c < NUM_GRAPHEME_CODEPOINTS(grapheme); c++) {
                 const char *name = codepoint_name(GRAPHEME_CODEPOINTS(grapheme)[c]);
-                Text_t name_text = (Text_t){.tag=TEXT_ASCII, .length=(int64_t)strlen(name), .ascii=name};
+                Text_t name_text = Text$from_str(name);
                 Array$insert(&names, &name_text, I_small(0), sizeof(Text_t));
             }
         } else {
             const char *name = codepoint_name((ucs4_t)grapheme);
-            Text_t name_text = (Text_t){.tag=TEXT_ASCII, .length=(int64_t)strlen(name), .ascii=name};
+            Text_t name_text = Text$from_str(name);
             Array$insert(&names, &name_text, I_small(0), sizeof(Text_t));
         }
     }
@@ -1394,15 +1315,13 @@ public OptionalText_t Text$from_bytes(Array_t bytes)
     if (bytes.stride != sizeof(int8_t))
         Array$compact(&bytes, sizeof(int8_t));
 
-    int8_t nul = 0;
-    Array$insert(&bytes, &nul, I_small(0), sizeof(int8_t));
-    return Text$from_str(bytes.data);
+    return Text$from_strn(bytes.data, (size_t)bytes.length);
 }
 
 public Array_t Text$lines(Text_t text)
 {
     Array_t lines = {};
-    TextIter_t state = {text, 0, 0};
+    TextIter_t state = NEW_TEXT_ITER_STATE(text);
     for (int64_t i = 0, line_start = 0; i < text.length; i++) {
         int32_t grapheme = Text$get_grapheme_fast(&state, i);
         if (grapheme == '\r' && Text$get_grapheme_fast(&state, i + 1) == '\n') { // CRLF
@@ -1429,7 +1348,7 @@ typedef struct {
 
 static OptionalText_t next_line(line_iter_state_t *state)
 {
-    Text_t text = state->state.text;
+    Text_t text = state->state.stack[0].text;
     for (int64_t i = state->i; i < text.length; i++) {
         int32_t grapheme = Text$get_grapheme_fast(&state->state, i);
         if (grapheme == '\r' && Text$get_grapheme_fast(&state->state, i + 1) == '\n') { // CRLF
@@ -1453,7 +1372,7 @@ public Closure_t Text$by_line(Text_t text)
 {
     return (Closure_t){
         .fn=(void*)next_line,
-        .userdata=new(line_iter_state_t, .state={text, 0, 0}, .i=0),
+        .userdata=new(line_iter_state_t, .state=NEW_TEXT_ITER_STATE(text), .i=0),
     };
 }
 
@@ -1490,32 +1409,24 @@ public const TypeInfo_t Text$info = {
 
 public Pattern_t Pattern$escape_text(Text_t text)
 {
-    // TODO: optimize for ASCII and short strings
-    Array_t graphemes = {.atomic=1};
-#define add_char(c) Array$insert_value(&graphemes, (ucs4_t)c, I_small(0), sizeof(ucs4_t))
-#define add_str(s) ({ for (const char *_c = s; *_c; ++_c) Array$insert_value(&graphemes, (ucs4_t)*_c, I_small(0), sizeof(ucs4_t)); })
-    TextIter_t state = {text, 0, 0};
+    // TODO: optimize for spans of non-escaped text
+    Text_t ret = EMPTY_TEXT;
+    TextIter_t state = NEW_TEXT_ITER_STATE(text);
     for (int64_t i = 0; i < text.length; i++) {
         int32_t g = Text$get_grapheme_fast(&state, i);
         ucs4_t g0 = g < 0 ? GRAPHEME_CODEPOINTS(g)[0] : (ucs4_t)g;
 
         if (g == '{') {
-            add_str("{1{}");
+            ret = concat2_assuming_safe(ret, Text("{1{}"));
         } else if (g0 == '?'
                    || uc_is_property_quotation_mark(g0)
                    || (uc_is_property_paired_punctuation(g0) && uc_is_property_left_of_pair(g0))) {
-            add_char('{');
-            add_char('1');
-            add_char(g);
-            add_char('}');
+            ret = Text$concat(ret, Text("{1"), Text$slice(text, I(i+1), I(i+1)), Text("}"));
         } else {
-            add_char(g);
+            ret = concat2_assuming_safe(ret, Text$slice(text, I(i+1), I(i+1)));
         }
     }
-    return (Text_t){.length=graphemes.length, .tag=TEXT_GRAPHEMES, .graphemes=graphemes.data};
-#undef add_str
-#undef add_char
-#undef add_escaped
+    return ret;
 }
 
 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/stdlib/text.h b/stdlib/text.h
index 79c094a..64cf86f 100644
--- a/stdlib/text.h
+++ b/stdlib/text.h
@@ -13,18 +13,24 @@
 #include "types.h"
 #include "util.h"
 
+#define MAX_TEXT_DEPTH 48
+
 typedef struct {
-    Text_t text;
-    int64_t subtext, sum_of_previous_subtexts;
+    struct {
+        Text_t text;
+        int64_t offset;
+    } stack[MAX_TEXT_DEPTH];
+    int64_t stack_index;
 } TextIter_t;
 
+#define NEW_TEXT_ITER_STATE(t) (TextIter_t){.stack={{t, 0}}, .stack_index=0}
+
 int printf_text(FILE *stream, const struct printf_info *info, const void *const args[]);
 int printf_text_size(const struct printf_info *info, size_t n, int argtypes[n], int sizes[n]);
 
 #define Text(str) ((Text_t){.length=sizeof(str)-1, .tag=TEXT_ASCII, .ascii="" str})
 
 int Text$print(FILE *stream, Text_t t);
-void Text$visualize(Text_t t);
 Text_t Text$_concat(int n, Text_t items[n]);
 #define Text$concat(...) Text$_concat(sizeof((Text_t[]){__VA_ARGS__})/sizeof(Text_t), (Text_t[]){__VA_ARGS__})
 #define Texts(...) Text$concat(__VA_ARGS__)
@@ -69,11 +75,12 @@ void Text$deserialize(FILE *in, void *out, Array_t *, const TypeInfo_t *);
 
 MACROLIKE int32_t Text$get_grapheme(Text_t text, int64_t index)
 {
-    TextIter_t state = {text, 0, 0};
+    TextIter_t state = NEW_TEXT_ITER_STATE(text);
     return Text$get_grapheme_fast(&state, index);
 }
 
 extern const TypeInfo_t Text$info;
+extern Text_t EMPTY_TEXT;
 
 #define Text$metamethods ((metamethods_t){ \
     .as_text=Text$as_text, \