From 9447ba8c4aff985f9238b3f4e138afd4526799b0 Mon Sep 17 00:00:00 2001
From: Bruce Hill <bruce@bruce-hill.com>
Date: Fri, 13 Sep 2024 13:34:04 -0400
Subject: [PATCH] Split pattern code into its own file

---
 Makefile             |    2 +-
 builtins/functions.c |    1 +
 builtins/path.c      |    1 +
 builtins/pattern.c   | 1065 +++++++++++++++++++++++++++++++++++++++
 builtins/pattern.h   |   33 ++
 builtins/shell.c     |    1 +
 builtins/text.c      | 1127 ++----------------------------------------
 builtins/text.h      |   32 +-
 builtins/tomo.h      |    1 +
 docs/text.md         |    8 +-
 10 files changed, 1157 insertions(+), 1114 deletions(-)
 create mode 100644 builtins/pattern.c
 create mode 100644 builtins/pattern.h

diff --git a/Makefile b/Makefile
index 1fd0dd4..1ec338e 100644
--- a/Makefile
+++ b/Makefile
@@ -31,7 +31,7 @@ LDLIBS=-lgc -lcord -lm -lunistring -lgmp -ldl
 BUILTIN_OBJS=builtins/siphash.o builtins/array.o builtins/bool.o builtins/channel.o builtins/nums.o builtins/functions.o builtins/integers.o \
 						 builtins/pointer.o builtins/memory.o builtins/text.o builtins/thread.o builtins/c_string.o builtins/table.o \
 						 builtins/types.o builtins/util.o builtins/files.o builtins/range.o builtins/shell.o builtins/path.o \
-						 builtins/optionals.o
+						 builtins/optionals.o builtins/pattern.o
 TESTS=$(patsubst %.tm,%.tm.testresult,$(wildcard test/*.tm))
 
 all: libtomo.so tomo
diff --git a/builtins/functions.c b/builtins/functions.c
index edbea33..08b04c9 100644
--- a/builtins/functions.c
+++ b/builtins/functions.c
@@ -18,6 +18,7 @@
 #include "functions.h"
 #include "integers.h"
 #include "optionals.h"
+#include "pattern.h"
 #include "pointer.h"
 #include "siphash.h"
 #include "string.h"
diff --git a/builtins/path.c b/builtins/path.c
index 8864b7d..09cdc4f 100644
--- a/builtins/path.c
+++ b/builtins/path.c
@@ -18,6 +18,7 @@
 #include "integers.h"
 #include "optionals.h"
 #include "path.h"
+#include "pattern.h"
 #include "text.h"
 #include "types.h"
 #include "util.h"
diff --git a/builtins/pattern.c b/builtins/pattern.c
new file mode 100644
index 0000000..6f46000
--- /dev/null
+++ b/builtins/pattern.c
@@ -0,0 +1,1065 @@
+// Logic for text pattern matching
+
+#include <ctype.h>
+#include <sys/param.h>
+#include <unictype.h>
+#include <uniname.h>
+
+#include "array.h"
+#include "functions.h"
+#include "integers.h"
+#include "pattern.h"
+#include "table.h"
+#include "text.h"
+#include "types.h"
+
+#define MAX_BACKREFS 100
+
+static inline void skip_whitespace(Text_t text, int64_t *i)
+{
+    TextIter_t state = {0, 0};
+    while (*i < text.length) {
+        int32_t grapheme = Text$get_grapheme_fast(text, &state, *i);
+        if (grapheme > 0 && !uc_is_property_white_space((ucs4_t)grapheme))
+            return;
+        *i += 1;
+    }
+}
+
+static inline bool match_grapheme(Text_t text, int64_t *i, int32_t grapheme)
+{
+    if (*i < text.length && Text$get_grapheme(text, *i) == grapheme) {
+        *i += 1;
+        return true;
+    }
+    return false;
+}
+
+static inline bool match_str(Text_t text, int64_t *i, const char *str)
+{
+    TextIter_t state = {0, 0};
+    int64_t matched = 0;
+    while (matched[str]) {
+        if (*i + matched >= text.length || Text$get_grapheme_fast(text, &state, *i + matched) != str[matched])
+            return false;
+        matched += 1;
+    }
+    *i += matched;
+    return true;
+}
+
+static inline bool match_property(Text_t text, int64_t *i, uc_property_t prop)
+{
+    if (*i >= text.length) return false;
+    TextIter_t state = {};
+    ucs4_t grapheme = Text$get_main_grapheme_fast(text, &state, *i);
+    // TODO: check every codepoint in the cluster?
+    if (uc_is_property(grapheme, prop)) {
+        *i += 1;
+        return true;
+    }
+    return false;
+}
+
+static int64_t parse_int(Text_t text, int64_t *i)
+{
+    TextIter_t state = {0, 0};
+    int64_t value = 0;
+    for (;; *i += 1) {
+        ucs4_t grapheme = Text$get_main_grapheme_fast(text, &state, *i);
+        int digit = uc_digit_value((ucs4_t)grapheme);
+        if (digit < 0) break;
+        if (value >= INT64_MAX/10) break;
+        value = 10*value + digit;
+    }
+    return value;
+}
+
+const char *get_property_name(Text_t text, int64_t *i)
+{
+    skip_whitespace(text, i);
+    char *name = GC_MALLOC_ATOMIC(UNINAME_MAX);
+    char *dest = name;
+    TextIter_t state = {0, 0};
+    while (*i < text.length) {
+        int32_t grapheme = Text$get_grapheme_fast(text, &state, *i);
+        if (!(grapheme & ~0xFF) && (isalnum(grapheme) || grapheme == ' ' || grapheme == '_' || grapheme == '-')) {
+            *dest = (char)grapheme;
+            ++dest;
+            if (dest >= name + UNINAME_MAX - 1)
+                break;
+        } else {
+            break;
+        }
+        *i += 1;
+    }
+
+    while (dest > name && dest[-1] == ' ')
+        *(dest--) = '\0';
+
+    if (dest == name) return NULL;
+    *dest = '\0';
+    return name;
+}
+
+#define EAT1(text, state, index, cond) ({\
+        int32_t grapheme = Text$get_grapheme_fast(text, state, index); \
+        bool success = (cond); \
+        if (success) index += 1; \
+        success; })
+
+#define EAT2(text, state, index, cond1, cond2) ({\
+        int32_t grapheme = Text$get_grapheme_fast(text, state, index); \
+        bool success = (cond1); \
+        if (success) { \
+            grapheme = Text$get_grapheme_fast(text, state, index + 1); \
+            success = (cond2); \
+            if (success) \
+                index += 2; \
+        } \
+        success; })
+
+
+#define EAT_MANY(text, state, index, cond) ({ int64_t _n = 0; while (EAT1(text, state, index, cond)) { _n += 1; } _n; })
+
+int64_t match_email(Text_t text, int64_t index)
+{
+    // email = local "@" domain
+    // local = 1-64 ([a-zA-Z0-9!#$%&‘*+–/=?^_`.{|}~] | non-ascii)
+    // domain = dns-label ("." dns-label)*
+    // dns-label = 1-63 ([a-zA-Z0-9-] | non-ascii)
+
+    TextIter_t state = {0, 0};
+    if (index > 0) {
+        ucs4_t prev_codepoint = Text$get_main_grapheme_fast(text, &state, index - 1);
+        if (uc_is_property_alphabetic((ucs4_t)prev_codepoint))
+            return -1;
+    }
+
+    int64_t start_index = index;
+
+    // Local part:
+    int64_t local_len = 0;
+    static const char *allowed_local = "!#$%&‘*+–/=?^_`.{|}~";
+    while (EAT1(text, &state, index,
+                (grapheme & ~0x7F) || isalnum((char)grapheme) || strchr(allowed_local, (char)grapheme))) {
+        local_len += 1;
+        if (local_len > 64) return -1;
+    }
+    
+    if (!EAT1(text, &state, index, grapheme == '@'))
+        return -1;
+
+    // Host
+    int64_t host_len = 0;
+    do {
+        int64_t label_len = 0;
+        while (EAT1(text, &state, index,
+                    (grapheme & ~0x7F) || isalnum((char)grapheme) || grapheme == '-')) {
+            label_len += 1;
+            if (label_len > 63) return -1;
+        }
+
+        if (label_len == 0)
+            return -1;
+
+        host_len += label_len;
+        if (host_len > 255)
+            return -1;
+        host_len += 1;
+    } while (EAT1(text, &state, index, grapheme == '.'));
+
+    return index - start_index;
+}
+
+int64_t match_ipv6(Text_t text, int64_t index)
+{
+    TextIter_t state = {0, 0};
+    if (index > 0) {
+        int32_t prev_codepoint = Text$get_grapheme_fast(text, &state, index - 1);
+        if ((prev_codepoint & ~0x7F) && (isxdigit(prev_codepoint) || prev_codepoint == ':'))
+            return -1;
+    }
+    int64_t start_index = index;
+    const int NUM_CLUSTERS = 8;
+    bool double_colon_used = false;
+    for (int cluster = 0; cluster < NUM_CLUSTERS; cluster++) {
+        for (int digits = 0; digits < 4; digits++) {
+            if (!EAT1(text, &state, index, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
+                break;
+        }
+        if (EAT1(text, &state, index, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
+            return -1; // Too many digits
+
+        if (cluster == NUM_CLUSTERS-1) {
+            break;
+        } else if (!EAT1(text, &state, index, grapheme == ':')) {
+            if (double_colon_used)
+                break;
+            return -1;
+        }
+
+        if (EAT1(text, &state, index, grapheme == ':')) {
+            if (double_colon_used)
+                return -1;
+            double_colon_used = true;
+        }
+    }
+    return index - start_index;
+}
+
+static int64_t match_ipv4(Text_t text, int64_t index)
+{
+    TextIter_t state = {0, 0};
+    if (index > 0) {
+        int32_t prev_codepoint = Text$get_grapheme_fast(text, &state, index - 1);
+        if ((prev_codepoint & ~0x7F) && (isdigit(prev_codepoint) || prev_codepoint == '.'))
+            return -1;
+    }
+    int64_t start_index = index;
+
+    const int NUM_CLUSTERS = 4;
+    for (int cluster = 0; cluster < NUM_CLUSTERS; cluster++) {
+        for (int digits = 0; digits < 3; digits++) {
+            if (!EAT1(text, &state, index, ~(grapheme & ~0x7F) && isdigit((char)grapheme))) {
+                if (digits == 0) return -1;
+                break;
+            }
+        }
+
+        if (EAT1(text, &state, index, ~(grapheme & ~0x7F) && isdigit((char)grapheme)))
+            return -1; // Too many digits
+
+        if (cluster == NUM_CLUSTERS-1)
+            break;
+        else if (!EAT1(text, &state, index, grapheme == '.'))
+            return -1;
+    }
+    return (index - start_index);
+}
+
+int64_t match_ip(Text_t text, int64_t index)
+{
+    int64_t len = match_ipv6(text, index);
+    if (len >= 0) return len;
+    len = match_ipv4(text, index);
+    return (len >= 0) ? len : -1;
+}
+
+int64_t match_uri(Text_t text, int64_t index)
+{
+    // URI = scheme ":" ["//" authority] path ["?" query] ["#" fragment]
+    // scheme = [a-zA-Z] [a-zA-Z0-9+.-]
+    // authority = [userinfo "@"] host [":" port]
+
+    TextIter_t state = {0, 0};
+    if (index > 0) {
+        ucs4_t prev_codepoint = Text$get_main_grapheme_fast(text, &state, index - 1);
+        if (uc_is_property_alphabetic(prev_codepoint))
+            return -1;
+    }
+
+    int64_t start_index = index;
+
+    // Scheme:
+    if (!EAT1(text, &state, index, isalpha(grapheme)))
+        return -1;
+
+    EAT_MANY(text, &state, index,
+             !(grapheme & ~0x7F) && (isalnum(grapheme) || grapheme == '+' || grapheme == '.' || grapheme == '-'));
+
+    if (index == start_index)
+        return -1;
+
+    if (!match_grapheme(text, &index, ':'))
+        return -1;
+
+    // Authority:
+    if (match_str(text, &index, "//")) {
+        int64_t authority_start = index;
+        // Username or host:
+        static const char *forbidden = "#?:@ \t\r\n<>[]{}\\^|\"`/";
+        if (EAT_MANY(text, &state, index, (grapheme & ~0x7F) || !strchr(forbidden, (char)grapheme)) == 0)
+            return -1;
+
+        if (EAT1(text, &state, index, grapheme == '@')) {
+            // Found a username, now get a host:
+            if (EAT_MANY(text, &state, index, (grapheme & ~0x7F) || !strchr(forbidden, (char)grapheme)) == 0)
+                return -1;
+        } else {
+            int64_t ip = authority_start;
+            int64_t ipv4_len = match_ipv4(text, ip);
+            if (ipv4_len > 0) {
+                ip += ipv4_len;
+            } else if (match_grapheme(text, &ip, '[')) {
+                ip += match_ipv6(text, ip);
+                if (ip > authority_start + 1 && match_grapheme(text, &ip, ']'))
+                    index = ip;
+            }
+        }
+
+        // Port:
+        if (EAT1(text, &state, index, grapheme == ':')) {
+            if (EAT_MANY(text, &state, index, !(grapheme & ~0x7F) && isdigit(grapheme)) == 0)
+                return -1;
+        }
+        if (!EAT1(text, &state, index, grapheme == '/'))
+            return (index - start_index); // No path
+    } else {
+        // Optional path root:
+        EAT1(text, &state, index, grapheme == '/');
+    }
+
+    // Path:
+    static const char *non_path = " \"#?<>[]{}\\^`|";
+    EAT_MANY(text, &state, index, (grapheme & ~0x7F) || !strchr(non_path, (char)grapheme));
+
+    if (EAT1(text, &state, index, grapheme == '?')) { // Query
+        static const char *non_query = " \"#<>[]{}\\^`|";
+        EAT_MANY(text, &state, index, (grapheme & ~0x7F) || !strchr(non_query, (char)grapheme));
+    }
+    
+    if (EAT1(text, &state, index, grapheme == '#')) { // Fragment
+        static const char *non_fragment = " \"#<>[]{}\\^`|";
+        EAT_MANY(text, &state, index, (grapheme & ~0x7F) || !strchr(non_fragment, (char)grapheme));
+    }
+    return index - start_index;
+}
+
+int64_t match_url(Text_t text, int64_t index)
+{
+    int64_t lookahead = index;
+    if (!(match_str(text, &lookahead, "https:")
+        || match_str(text, &lookahead, "http:")
+        || match_str(text, &lookahead, "ftp:")
+        || match_str(text, &lookahead, "wss:")
+        || match_str(text, &lookahead, "ws:")))
+        return -1;
+
+    return match_uri(text, index);
+}
+
+int64_t match_id(Text_t text, int64_t index)
+{
+    TextIter_t state = {0, 0};
+    if (!EAT1(text, &state, index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_XID_START)))
+        return -1;
+    return 1 + EAT_MANY(text, &state, index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_XID_CONTINUE));
+}
+
+int64_t match_int(Text_t text, int64_t index)
+{
+    TextIter_t state = {0, 0};
+    int64_t len = EAT_MANY(text, &state, index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_DECIMAL_DIGIT));
+    return len >= 0 ? len : -1;
+}
+
+int64_t match_num(Text_t text, int64_t index)
+{
+    TextIter_t state = {0, 0};
+    bool negative = EAT1(text, &state, index, grapheme == '-') ? 1 : 0;
+    int64_t pre_decimal = EAT_MANY(text, &state, index,
+                                   uc_is_property((ucs4_t)grapheme, UC_PROPERTY_DECIMAL_DIGIT));
+    bool decimal = (EAT1(text, &state, index, grapheme == '.') == 1);
+    int64_t post_decimal = decimal ? EAT_MANY(text, &state, index,
+                                              uc_is_property((ucs4_t)grapheme, UC_PROPERTY_DECIMAL_DIGIT)) : 0;
+    if (pre_decimal == 0 && post_decimal == 0)
+        return -1;
+    return negative + pre_decimal + decimal + post_decimal;
+}
+
+int64_t match_newline(Text_t text, int64_t index)
+{
+    if (index >= text.length)
+        return -1;
+
+    TextIter_t state = {0, 0};
+    ucs4_t grapheme = index >= text.length ? 0 : Text$get_main_grapheme_fast(text, &state, index);
+    if (grapheme == '\n')
+        return 1;
+    if (grapheme == '\r' && Text$get_grapheme_fast(text, &state, index + 1) == '\n')
+        return 2;
+    return -1;
+}
+
+typedef struct {
+    int64_t index, length;
+    bool occupied, recursive;
+} capture_t;
+
+typedef struct {
+    enum { PAT_START, PAT_END, PAT_ANY, PAT_GRAPHEME, PAT_PROPERTY, PAT_QUOTE, PAT_PAIR, PAT_FUNCTION } tag;
+    bool negated, non_capturing;
+    int64_t min, max;
+    union {
+        int32_t grapheme;
+        uc_property_t property;
+        int64_t (*fn)(Text_t, int64_t);
+        int32_t quote_graphemes[2];
+        int32_t pair_graphemes[2];
+    };
+} pat_t;
+
+int64_t match_pat(Text_t text, TextIter_t *state, int64_t index, pat_t pat)
+{
+    int32_t grapheme = index >= text.length ? 0 : Text$get_grapheme_fast(text, state, index);
+
+    switch (pat.tag) {
+    case PAT_START: {
+        if (index == 0)
+            return pat.negated ? -1 : 0;
+        return pat.negated ? 0 : -1;
+    }
+    case PAT_END: {
+        if (index >= text.length)
+            return pat.negated ? -1 : 0;
+        return pat.negated ? 0 : -1;
+    }
+    case PAT_ANY: {
+        assert(!pat.negated);
+        return (index < text.length) ? 1 : -1;
+    }
+    case PAT_GRAPHEME: {
+        if (index >= text.length)
+            return -1;
+        else if (grapheme == pat.grapheme)
+            return pat.negated ? -1 : 1;
+        return pat.negated ? 1 : -1;
+    }
+    case PAT_PROPERTY: {
+        if (index >= text.length)
+            return -1;
+        else if (uc_is_property((ucs4_t)grapheme, pat.property))
+            return pat.negated ? -1 : 1;
+        return pat.negated ? 1 : -1;
+    }
+    case PAT_PAIR: {
+        // Nested punctuation: (?), [?], etc
+        if (index >= text.length)
+            return -1;
+
+        int32_t open = pat.pair_graphemes[0];
+        if (grapheme != open)
+            return pat.negated ? 1 : -1;
+
+        int32_t close = pat.pair_graphemes[1];
+        int64_t depth = 1;
+        int64_t match_len = 1;
+        for (; depth > 0; match_len++) {
+            if (index + match_len >= text.length)
+                return pat.negated ? 1 : -1;
+
+            int32_t c = Text$get_grapheme_fast(text, state, index + match_len);
+            if (c == open)
+                depth += 1;
+            else if (c == close)
+                depth -= 1;
+        }
+        return pat.negated ? -1 : match_len;
+    }
+    case PAT_QUOTE: {
+        // Nested quotes: "?", '?', etc
+        if (index >= text.length)
+            return -1;
+
+        int32_t open = pat.quote_graphemes[0];
+        if (grapheme != open)
+            return pat.negated ? 1 : -1;
+
+        int32_t close = pat.quote_graphemes[1];
+        for (int64_t i = index + 1; i < text.length; i++) {
+            int32_t c = Text$get_grapheme_fast(text, state, i);
+            if (c == close) {
+                return pat.negated ? -1 : (i - index) + 1;
+            } else if (c == '\\' && index + 1 < text.length) {
+                i += 1; // Skip ahead an extra step
+            }
+        }
+        return pat.negated ? 1 : -1;
+    }
+    case PAT_FUNCTION: {
+        int64_t match_len = pat.fn(text, index);
+        if (match_len >= 0)
+            return pat.negated ? -1 : match_len;
+        return pat.negated ? 1 : -1;
+    }
+    default: errx(1, "Invalid pattern");
+    }
+    errx(1, "Unreachable");
+}
+
+pat_t parse_next_pat(Text_t pattern, TextIter_t *state, int64_t *index)
+{
+    if (EAT2(pattern, state, *index,
+             uc_is_property((ucs4_t)grapheme, UC_PROPERTY_QUOTATION_MARK),
+             grapheme == '?')) {
+        // Quotations: "?", '?', etc
+        int32_t open = Text$get_grapheme_fast(pattern, state, *index-2);
+        int32_t close = open;
+        uc_mirror_char((ucs4_t)open, (ucs4_t*)&close);
+        if (!match_grapheme(pattern, index, close))
+            fail("Pattern's closing quote is missing: %k", &pattern);
+
+        return (pat_t){
+            .tag=PAT_QUOTE,
+            .min=1, .max=1,
+            .quote_graphemes={open, close},
+        };
+    } else if (EAT2(pattern, state, *index,
+                    uc_is_property((ucs4_t)grapheme, UC_PROPERTY_PAIRED_PUNCTUATION),
+                    grapheme == '?')) {
+        // Nested punctuation: (?), [?], etc
+        int32_t open = Text$get_grapheme_fast(pattern, state, *index-2);
+        int32_t close = open;
+        uc_mirror_char((ucs4_t)open, (ucs4_t*)&close);
+        if (!match_grapheme(pattern, index, close))
+            fail("Pattern's closing brace is missing: %k", &pattern);
+        
+        return (pat_t){
+            .tag=PAT_PAIR,
+            .min=1, .max=1,
+            .pair_graphemes={open, close},
+        };
+    } else if (EAT1(pattern, state, *index,
+                    grapheme == '{')) { // named patterns {id}, {2-3 hex}, etc.
+        skip_whitespace(pattern, index);
+        int64_t min, max;
+        if (uc_is_digit((ucs4_t)Text$get_grapheme_fast(pattern, state, *index))) {
+            min = parse_int(pattern, index);
+            skip_whitespace(pattern, index);
+            if (match_grapheme(pattern, index, '+')) {
+                max = INT64_MAX;
+            } else if (match_grapheme(pattern, index, '-')) {
+                max = parse_int(pattern, index);
+            } else {
+                max = min;
+            }
+            if (min > max) fail("Minimum repetitions (%ld) is less than the maximum (%ld)", min, max);
+        } else {
+            min = -1, max = -1;
+        }
+
+        skip_whitespace(pattern, index);
+
+        bool negated = match_grapheme(pattern, index, '!');
+#define PAT(_tag, ...) ((pat_t){.min=min, .max=max, .negated=negated, .tag=_tag, __VA_ARGS__})
+        const char *prop_name;
+        if (match_str(pattern, index, ".."))
+            prop_name = "..";
+        else
+            prop_name = get_property_name(pattern, index);
+
+        if (!prop_name) {
+            // Literal character, e.g. {1?}
+            skip_whitespace(pattern, index);
+            int32_t grapheme = Text$get_grapheme_fast(pattern, state, (*index)++);
+            if (!match_grapheme(pattern, index, '}'))
+                fail("Missing closing '}' in pattern: %k", &pattern);
+            return PAT(PAT_GRAPHEME, .grapheme=grapheme);
+        } else if (strlen(prop_name) == 1) {
+            // Single letter names: {1+ A}
+            skip_whitespace(pattern, index);
+            if (!match_grapheme(pattern, index, '}'))
+                fail("Missing closing '}' in pattern: %k", &pattern);
+            return PAT(PAT_GRAPHEME, .grapheme=prop_name[0]);
+        }
+
+        skip_whitespace(pattern, index);
+        if (!match_grapheme(pattern, index, '}'))
+            fail("Missing closing '}' in pattern: %k", &pattern);
+
+        switch (tolower(prop_name[0])) {
+        case '.':
+            if (prop_name[1] == '.') {
+                if (negated)
+                    return ((pat_t){.tag=PAT_END, .min=min, .max=max, .non_capturing=true});
+                else
+                    return PAT(PAT_ANY); 
+            }
+            break;
+        case 'd':
+            if (strcasecmp(prop_name, "digit") == 0) {
+                return PAT(PAT_PROPERTY, .property=UC_PROPERTY_DECIMAL_DIGIT);
+            }
+            break;
+        case 'e':
+            if (strcasecmp(prop_name, "end") == 0) {
+                return PAT(PAT_END, .non_capturing=!negated);
+            } else if (strcasecmp(prop_name, "email") == 0) {
+                return PAT(PAT_FUNCTION, .fn=match_email);
+            } else if (strcasecmp(prop_name, "emoji") == 0) {
+                return PAT(PAT_PROPERTY, .property=UC_PROPERTY_EMOJI);
+            }
+            break;
+        case 'i':
+            if (strcasecmp(prop_name, "id") == 0) {
+                return PAT(PAT_FUNCTION, .fn=match_id);
+            } else if (strcasecmp(prop_name, "int") == 0) {
+                return PAT(PAT_FUNCTION, .fn=match_int);
+            } else if (strcasecmp(prop_name, "ipv4") == 0) {
+                return PAT(PAT_FUNCTION, .fn=match_ipv4);
+            } else if (strcasecmp(prop_name, "ipv6") == 0) {
+                return PAT(PAT_FUNCTION, .fn=match_ipv6);
+            } else if (strcasecmp(prop_name, "ip") == 0) {
+                return PAT(PAT_FUNCTION, .fn=match_ip);
+            }
+            break;
+        case 'n':
+            if (strcasecmp(prop_name, "nl") == 0 || strcasecmp(prop_name, "newline") == 0
+                || strcasecmp(prop_name, "crlf")) {
+                return PAT(PAT_FUNCTION, .fn=match_newline);
+            } else if (strcasecmp(prop_name, "num") == 0) {
+                return PAT(PAT_FUNCTION, .fn=match_num);
+            }
+            break;
+        case 's':
+            if (strcasecmp(prop_name, "start") == 0) {
+                return PAT(PAT_START, .non_capturing=!negated);
+            }
+            break;
+        case 'u':
+            if (strcasecmp(prop_name, "uri") == 0) {
+                return PAT(PAT_FUNCTION, .fn=match_uri);
+            } else if (strcasecmp(prop_name, "url") == 0) {
+                return PAT(PAT_FUNCTION, .fn=match_url);
+            }
+            break;
+        default: break;
+        }
+
+        uc_property_t prop = uc_property_byname(prop_name);
+        if (uc_property_is_valid(prop))
+            return PAT(PAT_PROPERTY, .property=prop);
+
+        ucs4_t grapheme = unicode_name_character(prop_name);
+        if (grapheme == UNINAME_INVALID)
+            fail("Not a valid property or character name: %s", prop_name);
+        return PAT(PAT_GRAPHEME, .grapheme=(int32_t)grapheme);
+#undef PAT
+    } else {
+        return (pat_t){.tag=PAT_GRAPHEME, .non_capturing=true, .min=1, .max=1, .grapheme=Text$get_grapheme_fast(pattern, state, (*index)++)};
+    }
+}
+
+int64_t match(Text_t text, int64_t text_index, Pattern_t pattern, int64_t pattern_index, capture_t *captures, int64_t capture_index)
+{
+    if (pattern_index >= pattern.length) // End of the pattern
+        return 0;
+
+    int64_t start_index = text_index;
+    TextIter_t pattern_state = {0, 0}, text_state = {0, 0};
+    pat_t pat = parse_next_pat(pattern, &pattern_state, &pattern_index);
+
+    if (pat.min == -1 && pat.max == -1) {
+        if (pat.tag == PAT_ANY && pattern_index >= pattern.length) {
+            pat.min = pat.max = MAX(1, text.length - text_index);
+        } else {
+            pat.min = 1;
+            pat.max = INT64_MAX;
+        }
+    }
+
+    int64_t capture_start = text_index;
+    int64_t count = 0, capture_len = 0, next_match_len = 0;
+
+    if (pat.tag == PAT_ANY && pattern_index >= pattern.length) {
+        int64_t remaining = text.length - text_index;
+        capture_len = remaining >= pat.min ? MIN(remaining, pat.max) : -1;
+        text_index += capture_len;
+        goto success;
+    }
+
+    if (pat.min == 0 && pattern_index < pattern.length) {
+        next_match_len = match(text, text_index, pattern, pattern_index, captures, capture_index + (pat.non_capturing ? 0 : 1));
+        if (next_match_len >= 0) {
+            capture_len = 0;
+            goto success;
+        }
+    }
+
+    while (count < pat.max) {
+        int64_t match_len = match_pat(text, &text_state, text_index, pat);
+        if (match_len < 0)
+            break;
+        capture_len += match_len;
+        text_index += match_len;
+        count += 1;
+
+        if (pattern_index < pattern.length) { // More stuff after this
+            if (count < pat.min)
+                next_match_len = -1;
+            else
+                next_match_len = match(text, text_index, pattern, pattern_index, captures, capture_index + (pat.non_capturing ? 0 : 1));
+        } else {
+            next_match_len = 0;
+        }
+
+        if (match_len == 0) {
+            if (next_match_len >= 0) {
+                // If we're good to go, no need to keep re-matching zero-length
+                // matches till we hit max:
+                count = pat.max;
+                break;
+            } else {
+                return -1;
+            }
+        }
+
+        if (pattern_index < pattern.length && next_match_len >= 0)
+            break; // Next guy exists and wants to stop here
+
+        if (text_index >= text.length)
+            break;
+    }
+
+    if (count < pat.min || next_match_len < 0)
+        return -1;
+
+  success:
+    if (captures && capture_index < MAX_BACKREFS && !pat.non_capturing) {
+        if (pat.tag == PAT_PAIR || pat.tag == PAT_QUOTE) {
+            assert(capture_len > 0);
+            captures[capture_index] = (capture_t){
+                .index=capture_start + 1, // Skip leading quote/paren
+                .length=capture_len - 2, // Skip open/close 
+                .occupied=true,
+                .recursive=(pat.tag == PAT_PAIR),
+            };
+        } else {
+            captures[capture_index] = (capture_t){
+                .index=capture_start,
+                .length=capture_len,
+                .occupied=true,
+                .recursive=false,
+            };
+        }
+    }
+    return (text_index - start_index) + next_match_len;
+}
+
+#undef EAT1
+#undef EAT2
+#undef EAT_MANY
+
+static int64_t _find(Text_t text, Pattern_t pattern, int64_t first, int64_t last, int64_t *match_length)
+{
+    int32_t first_grapheme = Text$get_grapheme(pattern, 0);
+    bool find_first = (first_grapheme != '{'
+                       && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
+                       && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
+
+    TextIter_t text_state = {0, 0};
+
+    for (int64_t i = first; i <= last; i++) {
+        // Optimization: quickly skip ahead to first char in pattern:
+        if (find_first) {
+            while (i < text.length && Text$get_grapheme_fast(text, &text_state, i) != first_grapheme)
+                ++i;
+        }
+
+        int64_t m = match(text, i, pattern, 0, NULL, 0);
+        if (m >= 0) {
+            if (match_length)
+                *match_length = m;
+            return i;
+        }
+    }
+    if (match_length)
+        *match_length = -1;
+    return -1;
+}
+
+public Int_t Text$find(Text_t text, Pattern_t pattern, Int_t from_index, int64_t *match_length)
+{
+    int64_t first = Int_to_Int64(from_index, false);
+    if (first == 0) fail("Invalid index: 0");
+    if (first < 0) first = text.length + first + 1;
+    if (first > text.length || first < 1)
+        return I(0);
+    int64_t found = _find(text, pattern, first-1, text.length-1, match_length);
+    return I(found+1);
+}
+
+PUREFUNC public bool Text$has(Text_t text, Pattern_t pattern)
+{
+    if (Text$starts_with(pattern, Text("{start}"))) {
+        int64_t m = match(text, 0, pattern, 0, NULL, 0);
+        return m >= 0;
+    } else if (Text$ends_with(text, Text("{end}"))) {
+        for (int64_t i = text.length-1; i >= 0; i--) {
+            int64_t match_len = match(text, i, pattern, 0, NULL, 0);
+            if (match_len >= 0 && i + match_len == text.length)
+                return true;
+        }
+        return false;
+    } else {
+        int64_t found = _find(text, pattern, 0, text.length-1, NULL);
+        return (found >= 0);
+    }
+}
+
+PUREFUNC public bool Text$matches(Text_t text, Pattern_t pattern)
+{
+    int64_t m = match(text, 0, pattern, 0, NULL, 0);
+    return m == text.length;
+}
+
+public Array_t Text$find_all(Text_t text, Pattern_t pattern)
+{
+    if (pattern.length == 0) // special case
+        return (Array_t){.length=0};
+
+    Array_t matches = {};
+
+    for (int64_t i = 0; ; ) {
+        int64_t len = 0;
+        int64_t found = _find(text, pattern, i, text.length-1, &len);
+        if (found < 0) break;
+        Text_t match = Text$slice(text, I(found+1), I(found + len));
+        Array$insert(&matches, &match, I_small(0), sizeof(Text_t));
+        i = found + MAX(len, 1);
+    }
+
+    return matches;
+}
+
+static Text_t apply_backrefs(Text_t text, Pattern_t original_pattern, Text_t replacement, Pattern_t backref_pat, capture_t *captures)
+{
+    if (backref_pat.length == 0)
+        return replacement;
+
+    int32_t first_grapheme = Text$get_grapheme(backref_pat, 0);
+    bool find_first = (first_grapheme != '{'
+                       && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
+                       && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
+
+    Text_t ret = Text("");
+    TextIter_t state = {0, 0};
+    int64_t nonmatching_pos = 0;
+    for (int64_t pos = 0; pos < replacement.length; ) {
+        // Optimization: quickly skip ahead to first char in the backref pattern:
+        if (find_first) {
+            while (pos < replacement.length && Text$get_grapheme_fast(replacement, &state, pos) != first_grapheme)
+                ++pos;
+        }
+
+        int64_t backref_len = match(replacement, pos, backref_pat, 0, NULL, 0);
+        if (backref_len < 0) {
+            pos += 1;
+            continue;
+        }
+
+        int64_t after_backref = pos + backref_len;
+        int64_t backref = parse_int(replacement, &after_backref);
+        if (after_backref == pos + backref_len) { // Not actually a backref if there's no number
+            pos += 1;
+            continue;
+        }
+        if (backref < 0 || backref > 9) fail("Invalid backref index: %ld (only 0-%d are allowed)", backref, MAX_BACKREFS-1);
+        backref_len = (after_backref - pos);
+
+        if (Text$get_grapheme_fast(replacement, &state, pos + backref_len) == ';')
+            backref_len += 1; // skip optional semicolon
+
+        if (!captures[backref].occupied)
+            fail("There is no capture number %ld!", backref);
+
+        Text_t backref_text = Text$slice(text, I(captures[backref].index+1), I(captures[backref].index + captures[backref].length));
+
+        if (captures[backref].recursive && original_pattern.length > 0)
+            backref_text = Text$replace(backref_text, original_pattern, replacement, backref_pat, true);
+
+        if (pos > nonmatching_pos) {
+            Text_t before_slice = Text$slice(replacement, I(nonmatching_pos+1), I(pos));
+            ret = Text$concat(ret, before_slice, backref_text);
+        } else {
+            ret = Text$concat(ret, backref_text);
+        }
+
+        pos += backref_len;
+        nonmatching_pos = pos;
+    }
+    if (nonmatching_pos < replacement.length) {
+        Text_t last_slice = Text$slice(replacement, I(nonmatching_pos+1), I(replacement.length));
+        ret = Text$concat(ret, last_slice);
+    }
+    return ret;
+}
+
+public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, Pattern_t backref_pat, bool recursive)
+{
+    Text_t ret = {.length=0};
+
+    int32_t first_grapheme = Text$get_grapheme(pattern, 0);
+    bool find_first = (first_grapheme != '{'
+                       && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
+                       && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
+
+    TextIter_t text_state = {0, 0};
+    int64_t nonmatching_pos = 0;
+    for (int64_t pos = 0; pos < text.length; ) {
+        // Optimization: quickly skip ahead to first char in pattern:
+        if (find_first) {
+            while (pos < text.length && Text$get_grapheme_fast(text, &text_state, pos) != first_grapheme)
+                ++pos;
+        }
+
+        capture_t captures[MAX_BACKREFS] = {};
+        int64_t match_len = match(text, pos, pattern, 0, captures, 1);
+        if (match_len < 0) {
+            pos += 1;
+            continue;
+        }
+        captures[0] = (capture_t){
+            .index = pos, .length = match_len,
+            .occupied = true, .recursive = false,
+        };
+
+        Text_t replacement_text = apply_backrefs(text, recursive ? pattern : Text(""), replacement, backref_pat, captures);
+        if (pos > nonmatching_pos) {
+            Text_t before_slice = Text$slice(text, I(nonmatching_pos+1), I(pos));
+            ret = Text$concat(ret, before_slice, replacement_text);
+        } else {
+            ret = Text$concat(ret, replacement_text);
+        }
+        nonmatching_pos = pos + match_len;
+        pos += MAX(match_len, 1);
+    }
+    if (nonmatching_pos < text.length) {
+        Text_t last_slice = Text$slice(text, I(nonmatching_pos+1), I(text.length));
+        ret = Text$concat(ret, last_slice);
+    }
+    return ret;
+}
+
+public Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right)
+{
+    int64_t first = 0, last = text.length-1;
+    if (trim_left) {
+        int64_t match_len = match(text, 0, pattern, 0, NULL, 0);
+        if (match_len > 0)
+            first = match_len;
+    }
+
+    if (trim_right) {
+        for (int64_t i = text.length-1; i >= first; i--) {
+            int64_t match_len = match(text, i, pattern, 0, NULL, 0);
+            if (match_len > 0 && i + match_len == text.length)
+                last = i-1;
+        }
+    }
+    return Text$slice(text, I(first+1), I(last+1));
+}
+
+public Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn)
+{
+    Text_t ret = {.length=0};
+
+    int32_t first_grapheme = Text$get_grapheme(pattern, 0);
+    bool find_first = (first_grapheme != '{'
+                       && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
+                       && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
+
+    TextIter_t text_state = {0, 0};
+    int64_t nonmatching_pos = 0;
+
+    Text_t (*text_mapper)(Text_t, void*) = fn.fn;
+    for (int64_t pos = 0; pos < text.length; pos++) {
+        // Optimization: quickly skip ahead to first char in pattern:
+        if (find_first) {
+            while (pos < text.length && Text$get_grapheme_fast(text, &text_state, pos) != first_grapheme)
+                ++pos;
+        }
+
+        int64_t match_len = match(text, pos, pattern, 0, NULL, 0);
+        if (match_len < 0) continue;
+
+        Text_t replacement = text_mapper(Text$slice(text, I(pos+1), I(pos+match_len)), fn.userdata);
+        if (pos > nonmatching_pos) {
+            Text_t before_slice = Text$slice(text, I(nonmatching_pos+1), I(pos));
+            ret = Text$concat(ret, before_slice, replacement);
+        } else {
+            ret = Text$concat(ret, replacement);
+        }
+        nonmatching_pos = pos + match_len;
+        pos += (match_len - 1);
+    }
+    if (nonmatching_pos < text.length) {
+        Text_t last_slice = Text$slice(text, I(nonmatching_pos+1), I(text.length));
+        ret = Text$concat(ret, last_slice);
+    }
+    return ret;
+}
+
+public Text_t Text$replace_all(Text_t text, Table_t replacements, Text_t backref_pat, bool recursive)
+{
+    if (replacements.entries.length == 0) return text;
+
+    Text_t ret = {.length=0};
+
+    int64_t nonmatch_pos = 0;
+    for (int64_t pos = 0; pos < text.length; ) {
+        // Find the first matching pattern at this position:
+        for (int64_t i = 0; i < replacements.entries.length; i++) {
+            Pattern_t pattern = *(Pattern_t*)(replacements.entries.data + i*replacements.entries.stride);
+            capture_t captures[MAX_BACKREFS] = {};
+            int64_t len = match(text, pos, pattern, 0, captures, 1);
+            if (len < 0) continue;
+            captures[0].index = pos;
+            captures[0].length = len;
+
+            // If we skipped over some non-matching text before finding a match, insert it here:
+            if (pos > nonmatch_pos) {
+                Text_t before_slice = Text$slice(text, I(nonmatch_pos+1), I(pos));
+                ret = Text$concat(ret, before_slice);
+            }
+
+            // Concatenate the replacement:
+            Text_t replacement = *(Text_t*)(replacements.entries.data + i*replacements.entries.stride + sizeof(Text_t));
+            Text_t replacement_text = apply_backrefs(text, recursive ? pattern : Text(""), replacement, backref_pat, captures);
+            ret = Text$concat(ret, replacement_text);
+            pos += MAX(len, 1);
+            nonmatch_pos = pos;
+            goto next_pos;
+        }
+
+        pos += 1;
+      next_pos:
+        continue;
+    }
+
+    if (nonmatch_pos <= text.length) {
+        Text_t last_slice = Text$slice(text, I(nonmatch_pos+1), I(text.length));
+        ret = Text$concat(ret, last_slice);
+    }
+    return ret;
+}
+
+public Array_t Text$split(Text_t text, Pattern_t pattern)
+{
+    if (text.length == 0) // special case
+        return (Array_t){.length=0};
+
+    if (pattern.length == 0) // special case
+        return Text$clusters(text);
+
+    Array_t chunks = {};
+
+    Int_t i = I_small(1);
+    for (;;) {
+        int64_t len = 0;
+        Int_t found = Text$find(text, pattern, i, &len);
+        if (I_is_zero(found)) break;
+        Text_t chunk = Text$slice(text, i, Int$minus(found, I_small(1)));
+        Array$insert(&chunks, &chunk, I_small(0), sizeof(Text_t));
+        i = Int$plus(found, I(MAX(len, 1)));
+    }
+
+    Text_t last_chunk = Text$slice(text, i, I(text.length));
+    Array$insert(&chunks, &last_chunk, I_small(0), sizeof(Text_t));
+
+    return chunks;
+}
+
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/pattern.h b/builtins/pattern.h
new file mode 100644
index 0000000..804fb28
--- /dev/null
+++ b/builtins/pattern.h
@@ -0,0 +1,33 @@
+#pragma once
+
+// The type representing text patterns for pattern matching.
+
+#include <stdbool.h>
+#include <printf.h>
+#include <stdint.h>
+
+#include "datatypes.h"
+#include "integers.h"
+#include "types.h"
+
+#define Pattern(text) ((Pattern_t)Text(text))
+#define Patterns(...) ((Pattern_t)Texts(__VA_ARGS__))
+
+Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement, Pattern_t backref_pat, bool recursive);
+Pattern_t Pattern$escape_text(Text_t text);
+Text_t Text$replace_all(Text_t text, Table_t replacements, Pattern_t backref_pat, bool recursive);
+Array_t Text$split(Text_t text, Pattern_t pattern);
+Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right);
+Int_t Text$find(Text_t text, Pattern_t pattern, Int_t i, int64_t *match_length);
+Array_t Text$find_all(Text_t text, Pattern_t pattern);
+PUREFUNC bool Text$has(Text_t text, Pattern_t pattern);
+PUREFUNC bool Text$matches(Text_t text, Pattern_t pattern);
+Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn);
+
+#define Pattern$hash Text$hash
+#define Pattern$compare Text$compare
+#define Pattern$equal Text$equal
+
+extern const TypeInfo Pattern$info;
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/shell.c b/builtins/shell.c
index 5bb34e8..d880718 100644
--- a/builtins/shell.c
+++ b/builtins/shell.c
@@ -5,6 +5,7 @@
 #include "array.h"
 #include "functions.h"
 #include "integers.h"
+#include "pattern.h"
 #include "shell.h"
 #include "text.h"
 #include "types.h"
diff --git a/builtins/text.c b/builtins/text.c
index 3902863..795f55d 100644
--- a/builtins/text.c
+++ b/builtins/text.c
@@ -84,12 +84,6 @@ typedef struct {
     const uint8_t *utf8;
 } synthetic_grapheme_t;
 
-typedef struct {
-    int64_t subtext, sum_of_previous_subtexts;
-} text_iter_t;
-
-#define MAX_BACKREFS 100
-
 // Synthetic grapheme clusters (clusters of more than one codepoint):
 static Table_t grapheme_ids_by_codepoints = {}; // ucs4_t* length-prefixed codepoints -> int32_t ID
 
@@ -103,9 +97,6 @@ static int32_t num_synthetic_graphemes = 0;
 #define GRAPHEME_CODEPOINTS(id) (&synthetic_graphemes[-(id)-1].utf32_cluster[1])
 #define GRAPHEME_UTF8(id) (synthetic_graphemes[-(id)-1].utf8)
 
-static int32_t get_grapheme(Text_t text, int64_t index);
-static int32_t _get_grapheme(Text_t text, text_iter_t *state, int64_t index);
-#define _get_main_grapheme(...) MAIN_GRAPHEME_CODEPOINT(_get_grapheme(__VA_ARGS__))
 static Text_t text_from_u32(ucs4_t *codepoints, int64_t num_codepoints, bool normalize);
 
 PUREFUNC static bool graphemes_equal(ucs4_t **a, ucs4_t **b) {
@@ -133,7 +124,7 @@ static const TypeInfo GraphemeIDLookupTableInfo = {
 };
 
 #pragma GCC diagnostic ignored "-Wstack-protector"
-int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_len)
+public int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_len)
 {
     ucs4_t length_prefixed[1+utf32_len] = {};
     length_prefixed[0] = (ucs4_t)utf32_len;
@@ -298,8 +289,8 @@ static bool is_concat_stable(Text_t a, Text_t b)
     if (a.length == 0 || b.length == 0)
         return true;
 
-    int32_t last_a = get_grapheme(a, a.length-1);
-    int32_t first_b = get_grapheme(b, 0);
+    int32_t last_a = Text$get_grapheme(a, a.length-1);
+    int32_t first_b = Text$get_grapheme(b, 0);
 
     // Synthetic graphemes are weird and probably need to check with normalization:
     if (last_a < 0 || first_b < 0)
@@ -385,8 +376,8 @@ static Text_t concat2(Text_t a, Text_t b)
         return concat2_assuming_safe(a, b);
 
     // Do full normalization of the last/first characters
-    int32_t last_a = get_grapheme(a, a.length-1);
-    int32_t first_b = get_grapheme(b, 0);
+    int32_t last_a = Text$get_grapheme(a, a.length-1);
+    int32_t first_b = Text$get_grapheme(b, 0);
 
     size_t utf32_len = (last_a >= 0 ? 1 : NUM_GRAPHEME_CODEPOINTS(last_a)) + (first_b >= 0 ? 1 : NUM_GRAPHEME_CODEPOINTS(first_b)); 
     ucs4_t join_graphemes[utf32_len] = {};
@@ -833,7 +824,7 @@ PUREFUNC public uint64_t Text$hash(Text_t *text)
     return text->hash;
 }
 
-int32_t _get_grapheme(Text_t text, text_iter_t *state, int64_t index)
+public int32_t Text$get_grapheme_fast(Text_t text, TextIter_t *state, int64_t index)
 {
     switch (text.tag) {
     case TEXT_ASCII: return index < text.length ? (int32_t)text.ascii[index] : 0;
@@ -841,7 +832,7 @@ int32_t _get_grapheme(Text_t text, text_iter_t *state, int64_t index)
     case TEXT_GRAPHEMES: return index < text.length ? text.graphemes[index] : 0;
     case TEXT_SHORT_GRAPHEMES: return index < text.length ? text.short_graphemes[index] : 0;
     case TEXT_SUBTEXT: {
-        text_iter_t backup_state = {0, 0};
+        TextIter_t backup_state = {0, 0};
         if (!state) state = &backup_state;
 
         if (index < 0 || index >= text.length)
@@ -853,7 +844,7 @@ int32_t _get_grapheme(Text_t text, text_iter_t *state, int64_t index)
         }
         for (;;) {
             if (index < state->sum_of_previous_subtexts + text.subtexts[state->subtext].length)
-                return _get_grapheme(text.subtexts[state->subtext], NULL, index - state->sum_of_previous_subtexts);
+                return Text$get_grapheme_fast(text.subtexts[state->subtext], NULL, index - state->sum_of_previous_subtexts);
             state->sum_of_previous_subtexts += text.subtexts[state->subtext].length;
             state->subtext += 1;
         }
@@ -864,10 +855,9 @@ int32_t _get_grapheme(Text_t text, text_iter_t *state, int64_t index)
     return 0;
 }
 
-int32_t get_grapheme(Text_t text, int64_t index)
+public ucs4_t Text$get_main_grapheme_fast(Text_t text, TextIter_t *state, int64_t index)
 {
-    text_iter_t state = {0, 0};
-    return _get_grapheme(text, &state, index);
+    return MAIN_GRAPHEME_CODEPOINT(Text$get_grapheme_fast(text, state, index));
 }
 
 PUREFUNC public int32_t Text$compare(const Text_t *a, const Text_t *b)
@@ -875,10 +865,10 @@ PUREFUNC public int32_t Text$compare(const Text_t *a, const Text_t *b)
     if (a == b) return 0;
 
     int64_t len = MAX(a->length, b->length);
-    text_iter_t a_state = {0, 0}, b_state = {0, 0};
+    TextIter_t a_state = {0, 0}, b_state = {0, 0};
     for (int64_t i = 0; i < len; i++) {
-        int32_t ai = _get_grapheme(*a, &a_state, i);
-        int32_t bi = _get_grapheme(*b, &b_state, i);
+        int32_t ai = Text$get_grapheme_fast(*a, &a_state, i);
+        int32_t bi = Text$get_grapheme_fast(*b, &b_state, i);
         if (ai == bi) continue;
         int32_t cmp;
         if (ai > 0 && bi > 0) {
@@ -909,10 +899,10 @@ PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix)
 {
     if (text.length < prefix.length)
         return false;
-    text_iter_t text_state = {0, 0}, prefix_state = {0, 0};
+    TextIter_t text_state = {0, 0}, prefix_state = {0, 0};
     for (int64_t i = 0; i < prefix.length; i++) {
-        int32_t text_i = _get_grapheme(text, &text_state, i);
-        int32_t prefix_i = _get_grapheme(prefix, &prefix_state, i);
+        int32_t text_i = Text$get_grapheme_fast(text, &text_state, i);
+        int32_t prefix_i = Text$get_grapheme_fast(prefix, &prefix_state, i);
         if (text_i != prefix_i) return false;
     }
     return true;
@@ -922,10 +912,10 @@ PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix)
 {
     if (text.length < suffix.length)
         return false;
-    text_iter_t text_state = {0, 0}, prefix_state = {0, 0};
+    TextIter_t text_state = {0, 0}, prefix_state = {0, 0};
     for (int64_t i = 0; i < suffix.length; i++) {
-        int32_t text_i = _get_grapheme(text, &text_state, text.length - suffix.length + i);
-        int32_t suffix_i = _get_grapheme(suffix, &prefix_state, i);
+        int32_t text_i = Text$get_grapheme_fast(text, &text_state, text.length - suffix.length + i);
+        int32_t suffix_i = Text$get_grapheme_fast(suffix, &prefix_state, i);
         if (text_i != suffix_i) return false;
     }
     return true;
@@ -936,10 +926,10 @@ PUREFUNC public bool Text$equal_values(Text_t a, Text_t b)
     if (a.length != b.length || (a.hash != 0 && b.hash != 0 && a.hash != b.hash))
         return false;
     int64_t len = a.length;
-    text_iter_t a_state = {0, 0}, b_state = {0, 0};
+    TextIter_t a_state = {0, 0}, b_state = {0, 0};
     for (int64_t i = 0; i < len; i++) {
-        int32_t ai = _get_grapheme(a, &a_state, i);
-        int32_t bi = _get_grapheme(b, &b_state, i);
+        int32_t ai = Text$get_grapheme_fast(a, &a_state, i);
+        int32_t bi = Text$get_grapheme_fast(b, &b_state, i);
         if (ai != bi) return false;
     }
     return true;
@@ -956,11 +946,11 @@ PUREFUNC public bool Text$equal_ignoring_case(Text_t a, Text_t b)
     if (a.length != b.length)
         return false;
     int64_t len = a.length;
-    text_iter_t a_state = {0, 0}, b_state = {0, 0};
+    TextIter_t a_state = {0, 0}, b_state = {0, 0};
     const char *language = uc_locale_language();
     for (int64_t i = 0; i < len; i++) {
-        int32_t ai = _get_grapheme(a, &a_state, i);
-        int32_t bi = _get_grapheme(b, &b_state, i);
+        int32_t ai = Text$get_grapheme_fast(a, &a_state, i);
+        int32_t bi = Text$get_grapheme_fast(b, &b_state, i);
         if (ai != bi) {
             const ucs4_t *a_codepoints = ai >= 0 ? (ucs4_t*)&ai : GRAPHEME_CODEPOINTS(ai);
             int64_t a_len = ai >= 0 ? 1 : NUM_GRAPHEME_CODEPOINTS(ai);
@@ -1016,794 +1006,6 @@ public Text_t Text$title(Text_t text)
     return ret;
 }
 
-static inline void skip_whitespace(Text_t text, int64_t *i)
-{
-    text_iter_t state = {0, 0};
-    while (*i < text.length) {
-        int32_t grapheme = _get_grapheme(text, &state, *i);
-        if (grapheme > 0 && !uc_is_property_white_space((ucs4_t)grapheme))
-            return;
-        *i += 1;
-    }
-}
-
-static inline bool match_grapheme(Text_t text, int64_t *i, int32_t grapheme)
-{
-    if (*i < text.length && get_grapheme(text, *i) == grapheme) {
-        *i += 1;
-        return true;
-    }
-    return false;
-}
-
-static inline bool match_str(Text_t text, int64_t *i, const char *str)
-{
-    text_iter_t state = {0, 0};
-    int64_t matched = 0;
-    while (matched[str]) {
-        if (*i + matched >= text.length || _get_grapheme(text, &state, *i + matched) != str[matched])
-            return false;
-        matched += 1;
-    }
-    *i += matched;
-    return true;
-}
-
-static inline bool match_property(Text_t text, int64_t *i, uc_property_t prop)
-{
-    if (*i >= text.length) return false;
-    int32_t grapheme = get_grapheme(text, *i);
-    // TODO: check every codepoint in the cluster?
-    if (uc_is_property(MAIN_GRAPHEME_CODEPOINT(grapheme), prop)) {
-        *i += 1;
-        return true;
-    }
-    return false;
-}
-
-static int64_t parse_int(Text_t text, int64_t *i)
-{
-    text_iter_t state = {0, 0};
-    int64_t value = 0;
-    for (;; *i += 1) {
-        ucs4_t grapheme = _get_main_grapheme(text, &state, *i);
-        int digit = uc_digit_value((ucs4_t)grapheme);
-        if (digit < 0) break;
-        if (value >= INT64_MAX/10) break;
-        value = 10*value + digit;
-    }
-    return value;
-}
-
-const char *get_property_name(Text_t text, int64_t *i)
-{
-    skip_whitespace(text, i);
-    char *name = GC_MALLOC_ATOMIC(UNINAME_MAX);
-    char *dest = name;
-    text_iter_t state = {0, 0};
-    while (*i < text.length) {
-        int32_t grapheme = _get_grapheme(text, &state, *i);
-        if (!(grapheme & ~0xFF) && (isalnum(grapheme) || grapheme == ' ' || grapheme == '_' || grapheme == '-')) {
-            *dest = (char)grapheme;
-            ++dest;
-            if (dest >= name + UNINAME_MAX - 1)
-                break;
-        } else {
-            break;
-        }
-        *i += 1;
-    }
-
-    while (dest > name && dest[-1] == ' ')
-        *(dest--) = '\0';
-
-    if (dest == name) return NULL;
-    *dest = '\0';
-    return name;
-}
-
-#define EAT1(text, state, index, cond) ({\
-        int32_t grapheme = _get_grapheme(text, state, index); \
-        bool success = (cond); \
-        if (success) index += 1; \
-        success; })
-
-#define EAT2(text, state, index, cond1, cond2) ({\
-        int32_t grapheme = _get_grapheme(text, state, index); \
-        bool success = (cond1); \
-        if (success) { \
-            grapheme = _get_grapheme(text, state, index + 1); \
-            success = (cond2); \
-            if (success) \
-                index += 2; \
-        } \
-        success; })
-
-
-#define EAT_MANY(text, state, index, cond) ({ int64_t _n = 0; while (EAT1(text, state, index, cond)) { _n += 1; } _n; })
-
-int64_t match_email(Text_t text, int64_t index)
-{
-    // email = local "@" domain
-    // local = 1-64 ([a-zA-Z0-9!#$%&‘*+–/=?^_`.{|}~] | non-ascii)
-    // domain = dns-label ("." dns-label)*
-    // dns-label = 1-63 ([a-zA-Z0-9-] | non-ascii)
-
-    text_iter_t state = {0, 0};
-    if (index > 0) {
-        ucs4_t prev_codepoint = _get_main_grapheme(text, &state, index - 1);
-        if (uc_is_property_alphabetic((ucs4_t)prev_codepoint))
-            return -1;
-    }
-
-    int64_t start_index = index;
-
-    // Local part:
-    int64_t local_len = 0;
-    static const char *allowed_local = "!#$%&‘*+–/=?^_`.{|}~";
-    while (EAT1(text, &state, index,
-                (grapheme & ~0x7F) || isalnum((char)grapheme) || strchr(allowed_local, (char)grapheme))) {
-        local_len += 1;
-        if (local_len > 64) return -1;
-    }
-    
-    if (!EAT1(text, &state, index, grapheme == '@'))
-        return -1;
-
-    // Host
-    int64_t host_len = 0;
-    do {
-        int64_t label_len = 0;
-        while (EAT1(text, &state, index,
-                    (grapheme & ~0x7F) || isalnum((char)grapheme) || grapheme == '-')) {
-            label_len += 1;
-            if (label_len > 63) return -1;
-        }
-
-        if (label_len == 0)
-            return -1;
-
-        host_len += label_len;
-        if (host_len > 255)
-            return -1;
-        host_len += 1;
-    } while (EAT1(text, &state, index, grapheme == '.'));
-
-    return index - start_index;
-}
-
-int64_t match_ipv6(Text_t text, int64_t index)
-{
-    text_iter_t state = {0, 0};
-    if (index > 0) {
-        int32_t prev_codepoint = _get_grapheme(text, &state, index - 1);
-        if ((prev_codepoint & ~0x7F) && (isxdigit(prev_codepoint) || prev_codepoint == ':'))
-            return -1;
-    }
-    int64_t start_index = index;
-    const int NUM_CLUSTERS = 8;
-    bool double_colon_used = false;
-    for (int cluster = 0; cluster < NUM_CLUSTERS; cluster++) {
-        for (int digits = 0; digits < 4; digits++) {
-            if (!EAT1(text, &state, index, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
-                break;
-        }
-        if (EAT1(text, &state, index, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
-            return -1; // Too many digits
-
-        if (cluster == NUM_CLUSTERS-1) {
-            break;
-        } else if (!EAT1(text, &state, index, grapheme == ':')) {
-            if (double_colon_used)
-                break;
-            return -1;
-        }
-
-        if (EAT1(text, &state, index, grapheme == ':')) {
-            if (double_colon_used)
-                return -1;
-            double_colon_used = true;
-        }
-    }
-    return index - start_index;
-}
-
-static int64_t match_ipv4(Text_t text, int64_t index)
-{
-    text_iter_t state = {0, 0};
-    if (index > 0) {
-        int32_t prev_codepoint = _get_grapheme(text, &state, index - 1);
-        if ((prev_codepoint & ~0x7F) && (isdigit(prev_codepoint) || prev_codepoint == '.'))
-            return -1;
-    }
-    int64_t start_index = index;
-
-    const int NUM_CLUSTERS = 4;
-    for (int cluster = 0; cluster < NUM_CLUSTERS; cluster++) {
-        for (int digits = 0; digits < 3; digits++) {
-            if (!EAT1(text, &state, index, ~(grapheme & ~0x7F) && isdigit((char)grapheme))) {
-                if (digits == 0) return -1;
-                break;
-            }
-        }
-
-        if (EAT1(text, &state, index, ~(grapheme & ~0x7F) && isdigit((char)grapheme)))
-            return -1; // Too many digits
-
-        if (cluster == NUM_CLUSTERS-1)
-            break;
-        else if (!EAT1(text, &state, index, grapheme == '.'))
-            return -1;
-    }
-    return (index - start_index);
-}
-
-int64_t match_ip(Text_t text, int64_t index)
-{
-    int64_t len = match_ipv6(text, index);
-    if (len >= 0) return len;
-    len = match_ipv4(text, index);
-    return (len >= 0) ? len : -1;
-}
-
-int64_t match_uri(Text_t text, int64_t index)
-{
-    // URI = scheme ":" ["//" authority] path ["?" query] ["#" fragment]
-    // scheme = [a-zA-Z] [a-zA-Z0-9+.-]
-    // authority = [userinfo "@"] host [":" port]
-
-    text_iter_t state = {0, 0};
-    if (index > 0) {
-        int32_t prev_codepoint = _get_grapheme(text, &state, index - 1);
-        if (uc_is_property_alphabetic(MAIN_GRAPHEME_CODEPOINT(prev_codepoint)))
-            return -1;
-    }
-
-    int64_t start_index = index;
-
-    // Scheme:
-    if (!EAT1(text, &state, index, isalpha(grapheme)))
-        return -1;
-
-    EAT_MANY(text, &state, index,
-             !(grapheme & ~0x7F) && (isalnum(grapheme) || grapheme == '+' || grapheme == '.' || grapheme == '-'));
-
-    if (index == start_index)
-        return -1;
-
-    if (!match_grapheme(text, &index, ':'))
-        return -1;
-
-    // Authority:
-    if (match_str(text, &index, "//")) {
-        int64_t authority_start = index;
-        // Username or host:
-        static const char *forbidden = "#?:@ \t\r\n<>[]{}\\^|\"`/";
-        if (EAT_MANY(text, &state, index, (grapheme & ~0x7F) || !strchr(forbidden, (char)grapheme)) == 0)
-            return -1;
-
-        if (EAT1(text, &state, index, grapheme == '@')) {
-            // Found a username, now get a host:
-            if (EAT_MANY(text, &state, index, (grapheme & ~0x7F) || !strchr(forbidden, (char)grapheme)) == 0)
-                return -1;
-        } else {
-            int64_t ip = authority_start;
-            int64_t ipv4_len = match_ipv4(text, ip);
-            if (ipv4_len > 0) {
-                ip += ipv4_len;
-            } else if (match_grapheme(text, &ip, '[')) {
-                ip += match_ipv6(text, ip);
-                if (ip > authority_start + 1 && match_grapheme(text, &ip, ']'))
-                    index = ip;
-            }
-        }
-
-        // Port:
-        if (EAT1(text, &state, index, grapheme == ':')) {
-            if (EAT_MANY(text, &state, index, !(grapheme & ~0x7F) && isdigit(grapheme)) == 0)
-                return -1;
-        }
-        if (!EAT1(text, &state, index, grapheme == '/'))
-            return (index - start_index); // No path
-    } else {
-        // Optional path root:
-        EAT1(text, &state, index, grapheme == '/');
-    }
-
-    // Path:
-    static const char *non_path = " \"#?<>[]{}\\^`|";
-    EAT_MANY(text, &state, index, (grapheme & ~0x7F) || !strchr(non_path, (char)grapheme));
-
-    if (EAT1(text, &state, index, grapheme == '?')) { // Query
-        static const char *non_query = " \"#<>[]{}\\^`|";
-        EAT_MANY(text, &state, index, (grapheme & ~0x7F) || !strchr(non_query, (char)grapheme));
-    }
-    
-    if (EAT1(text, &state, index, grapheme == '#')) { // Fragment
-        static const char *non_fragment = " \"#<>[]{}\\^`|";
-        EAT_MANY(text, &state, index, (grapheme & ~0x7F) || !strchr(non_fragment, (char)grapheme));
-    }
-    return index - start_index;
-}
-
-int64_t match_url(Text_t text, int64_t index)
-{
-    int64_t lookahead = index;
-    if (!(match_str(text, &lookahead, "https:")
-        || match_str(text, &lookahead, "http:")
-        || match_str(text, &lookahead, "ftp:")
-        || match_str(text, &lookahead, "wss:")
-        || match_str(text, &lookahead, "ws:")))
-        return -1;
-
-    return match_uri(text, index);
-}
-
-int64_t match_id(Text_t text, int64_t index)
-{
-    text_iter_t state = {0, 0};
-    if (!EAT1(text, &state, index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_XID_START)))
-        return -1;
-    return 1 + EAT_MANY(text, &state, index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_XID_CONTINUE));
-}
-
-int64_t match_int(Text_t text, int64_t index)
-{
-    text_iter_t state = {0, 0};
-    int64_t len = EAT_MANY(text, &state, index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_DECIMAL_DIGIT));
-    return len >= 0 ? len : -1;
-}
-
-int64_t match_num(Text_t text, int64_t index)
-{
-    text_iter_t state = {0, 0};
-    bool negative = EAT1(text, &state, index, grapheme == '-') ? 1 : 0;
-    int64_t pre_decimal = EAT_MANY(text, &state, index,
-                                   uc_is_property((ucs4_t)grapheme, UC_PROPERTY_DECIMAL_DIGIT));
-    bool decimal = (EAT1(text, &state, index, grapheme == '.') == 1);
-    int64_t post_decimal = decimal ? EAT_MANY(text, &state, index,
-                                              uc_is_property((ucs4_t)grapheme, UC_PROPERTY_DECIMAL_DIGIT)) : 0;
-    if (pre_decimal == 0 && post_decimal == 0)
-        return -1;
-    return negative + pre_decimal + decimal + post_decimal;
-}
-
-int64_t match_newline(Text_t text, int64_t index)
-{
-    if (index >= text.length)
-        return -1;
-
-    text_iter_t state = {0, 0};
-    ucs4_t grapheme = index >= text.length ? 0 : _get_main_grapheme(text, &state, index);
-    if (grapheme == '\n')
-        return 1;
-    if (grapheme == '\r' && _get_grapheme(text, &state, index + 1) == '\n')
-        return 2;
-    return -1;
-}
-
-typedef struct {
-    int64_t index, length;
-    bool occupied, recursive;
-} capture_t;
-
-typedef struct {
-    enum { PAT_START, PAT_END, PAT_ANY, PAT_GRAPHEME, PAT_PROPERTY, PAT_QUOTE, PAT_PAIR, PAT_FUNCTION } tag;
-    bool negated, non_capturing;
-    int64_t min, max;
-    union {
-        int32_t grapheme;
-        uc_property_t property;
-        int64_t (*fn)(Text_t, int64_t);
-        int32_t quote_graphemes[2];
-        int32_t pair_graphemes[2];
-    };
-} pat_t;
-
-int64_t match_pat(Text_t text, text_iter_t *state, int64_t index, pat_t pat)
-{
-    int32_t grapheme = index >= text.length ? 0 : _get_grapheme(text, state, index);
-
-    switch (pat.tag) {
-    case PAT_START: {
-        if (index == 0)
-            return pat.negated ? -1 : 0;
-        return pat.negated ? 0 : -1;
-    }
-    case PAT_END: {
-        if (index >= text.length)
-            return pat.negated ? -1 : 0;
-        return pat.negated ? 0 : -1;
-    }
-    case PAT_ANY: {
-        assert(!pat.negated);
-        return (index < text.length) ? 1 : -1;
-    }
-    case PAT_GRAPHEME: {
-        if (index >= text.length)
-            return -1;
-        else if (grapheme == pat.grapheme)
-            return pat.negated ? -1 : 1;
-        return pat.negated ? 1 : -1;
-    }
-    case PAT_PROPERTY: {
-        if (index >= text.length)
-            return -1;
-        else if (uc_is_property((ucs4_t)grapheme, pat.property))
-            return pat.negated ? -1 : 1;
-        return pat.negated ? 1 : -1;
-    }
-    case PAT_PAIR: {
-        // Nested punctuation: (?), [?], etc
-        if (index >= text.length)
-            return -1;
-
-        int32_t open = pat.pair_graphemes[0];
-        if (grapheme != open)
-            return pat.negated ? 1 : -1;
-
-        int32_t close = pat.pair_graphemes[1];
-        int64_t depth = 1;
-        int64_t match_len = 1;
-        for (; depth > 0; match_len++) {
-            if (index + match_len >= text.length)
-                return pat.negated ? 1 : -1;
-
-            int32_t c = _get_grapheme(text, state, index + match_len);
-            if (c == open)
-                depth += 1;
-            else if (c == close)
-                depth -= 1;
-        }
-        return pat.negated ? -1 : match_len;
-    }
-    case PAT_QUOTE: {
-        // Nested quotes: "?", '?', etc
-        if (index >= text.length)
-            return -1;
-
-        int32_t open = pat.quote_graphemes[0];
-        if (grapheme != open)
-            return pat.negated ? 1 : -1;
-
-        int32_t close = pat.quote_graphemes[1];
-        for (int64_t i = index + 1; i < text.length; i++) {
-            int32_t c = _get_grapheme(text, state, i);
-            if (c == close) {
-                return pat.negated ? -1 : (i - index) + 1;
-            } else if (c == '\\' && index + 1 < text.length) {
-                i += 1; // Skip ahead an extra step
-            }
-        }
-        return pat.negated ? 1 : -1;
-    }
-    case PAT_FUNCTION: {
-        int64_t match_len = pat.fn(text, index);
-        if (match_len >= 0)
-            return pat.negated ? -1 : match_len;
-        return pat.negated ? 1 : -1;
-    }
-    default: errx(1, "Invalid pattern");
-    }
-    errx(1, "Unreachable");
-}
-
-pat_t parse_next_pat(Text_t pattern, text_iter_t *state, int64_t *index)
-{
-    if (EAT2(pattern, state, *index,
-             uc_is_property((ucs4_t)grapheme, UC_PROPERTY_QUOTATION_MARK),
-             grapheme == '?')) {
-        // Quotations: "?", '?', etc
-        int32_t open = _get_grapheme(pattern, state, *index-2);
-        int32_t close = open;
-        uc_mirror_char((ucs4_t)open, (ucs4_t*)&close);
-        if (!match_grapheme(pattern, index, close))
-            fail("Pattern's closing quote is missing: %k", &pattern);
-
-        return (pat_t){
-            .tag=PAT_QUOTE,
-            .min=1, .max=1,
-            .quote_graphemes={open, close},
-        };
-    } else if (EAT2(pattern, state, *index,
-                    uc_is_property((ucs4_t)grapheme, UC_PROPERTY_PAIRED_PUNCTUATION),
-                    grapheme == '?')) {
-        // Nested punctuation: (?), [?], etc
-        int32_t open = _get_grapheme(pattern, state, *index-2);
-        int32_t close = open;
-        uc_mirror_char((ucs4_t)open, (ucs4_t*)&close);
-        if (!match_grapheme(pattern, index, close))
-            fail("Pattern's closing brace is missing: %k", &pattern);
-        
-        return (pat_t){
-            .tag=PAT_PAIR,
-            .min=1, .max=1,
-            .pair_graphemes={open, close},
-        };
-    } else if (EAT1(pattern, state, *index,
-                    grapheme == '{')) { // named patterns {id}, {2-3 hex}, etc.
-        skip_whitespace(pattern, index);
-        int64_t min, max;
-        if (uc_is_digit((ucs4_t)_get_grapheme(pattern, state, *index))) {
-            min = parse_int(pattern, index);
-            skip_whitespace(pattern, index);
-            if (match_grapheme(pattern, index, '+')) {
-                max = INT64_MAX;
-            } else if (match_grapheme(pattern, index, '-')) {
-                max = parse_int(pattern, index);
-            } else {
-                max = min;
-            }
-            if (min > max) fail("Minimum repetitions (%ld) is less than the maximum (%ld)", min, max);
-        } else {
-            min = -1, max = -1;
-        }
-
-        skip_whitespace(pattern, index);
-
-        bool negated = match_grapheme(pattern, index, '!');
-#define PAT(_tag, ...) ((pat_t){.min=min, .max=max, .negated=negated, .tag=_tag, __VA_ARGS__})
-        const char *prop_name;
-        if (match_str(pattern, index, ".."))
-            prop_name = "..";
-        else
-            prop_name = get_property_name(pattern, index);
-
-        if (!prop_name) {
-            // Literal character, e.g. {1?}
-            skip_whitespace(pattern, index);
-            int32_t grapheme = _get_grapheme(pattern, state, (*index)++);
-            if (!match_grapheme(pattern, index, '}'))
-                fail("Missing closing '}' in pattern: %k", &pattern);
-            return PAT(PAT_GRAPHEME, .grapheme=grapheme);
-        } else if (strlen(prop_name) == 1) {
-            // Single letter names: {1+ A}
-            skip_whitespace(pattern, index);
-            if (!match_grapheme(pattern, index, '}'))
-                fail("Missing closing '}' in pattern: %k", &pattern);
-            return PAT(PAT_GRAPHEME, .grapheme=prop_name[0]);
-        }
-
-        skip_whitespace(pattern, index);
-        if (!match_grapheme(pattern, index, '}'))
-            fail("Missing closing '}' in pattern: %k", &pattern);
-
-        switch (tolower(prop_name[0])) {
-        case '.':
-            if (prop_name[1] == '.') {
-                if (negated)
-                    return ((pat_t){.tag=PAT_END, .min=min, .max=max, .non_capturing=true});
-                else
-                    return PAT(PAT_ANY); 
-            }
-            break;
-        case 'd':
-            if (strcasecmp(prop_name, "digit") == 0) {
-                return PAT(PAT_PROPERTY, .property=UC_PROPERTY_DECIMAL_DIGIT);
-            }
-            break;
-        case 'e':
-            if (strcasecmp(prop_name, "end") == 0) {
-                return PAT(PAT_END, .non_capturing=!negated);
-            } else if (strcasecmp(prop_name, "email") == 0) {
-                return PAT(PAT_FUNCTION, .fn=match_email);
-            } else if (strcasecmp(prop_name, "emoji") == 0) {
-                return PAT(PAT_PROPERTY, .property=UC_PROPERTY_EMOJI);
-            }
-            break;
-        case 'i':
-            if (strcasecmp(prop_name, "id") == 0) {
-                return PAT(PAT_FUNCTION, .fn=match_id);
-            } else if (strcasecmp(prop_name, "int") == 0) {
-                return PAT(PAT_FUNCTION, .fn=match_int);
-            } else if (strcasecmp(prop_name, "ipv4") == 0) {
-                return PAT(PAT_FUNCTION, .fn=match_ipv4);
-            } else if (strcasecmp(prop_name, "ipv6") == 0) {
-                return PAT(PAT_FUNCTION, .fn=match_ipv6);
-            } else if (strcasecmp(prop_name, "ip") == 0) {
-                return PAT(PAT_FUNCTION, .fn=match_ip);
-            }
-            break;
-        case 'n':
-            if (strcasecmp(prop_name, "nl") == 0 || strcasecmp(prop_name, "newline") == 0
-                || strcasecmp(prop_name, "crlf")) {
-                return PAT(PAT_FUNCTION, .fn=match_newline);
-            } else if (strcasecmp(prop_name, "num") == 0) {
-                return PAT(PAT_FUNCTION, .fn=match_num);
-            }
-            break;
-        case 's':
-            if (strcasecmp(prop_name, "start") == 0) {
-                return PAT(PAT_START, .non_capturing=!negated);
-            }
-            break;
-        case 'u':
-            if (strcasecmp(prop_name, "uri") == 0) {
-                return PAT(PAT_FUNCTION, .fn=match_uri);
-            } else if (strcasecmp(prop_name, "url") == 0) {
-                return PAT(PAT_FUNCTION, .fn=match_url);
-            }
-            break;
-        default: break;
-        }
-
-        uc_property_t prop = uc_property_byname(prop_name);
-        if (uc_property_is_valid(prop))
-            return PAT(PAT_PROPERTY, .property=prop);
-
-        ucs4_t grapheme = unicode_name_character(prop_name);
-        if (grapheme == UNINAME_INVALID)
-            fail("Not a valid property or character name: %s", prop_name);
-        return PAT(PAT_GRAPHEME, .grapheme=(int32_t)grapheme);
-#undef PAT
-    } else {
-        return (pat_t){.tag=PAT_GRAPHEME, .non_capturing=true, .min=1, .max=1, .grapheme=_get_grapheme(pattern, state, (*index)++)};
-    }
-}
-
-int64_t match(Text_t text, int64_t text_index, Pattern_t pattern, int64_t pattern_index, capture_t *captures, int64_t capture_index)
-{
-    if (pattern_index >= pattern.length) // End of the pattern
-        return 0;
-
-    int64_t start_index = text_index;
-    text_iter_t pattern_state = {0, 0}, text_state = {0, 0};
-    pat_t pat = parse_next_pat(pattern, &pattern_state, &pattern_index);
-
-    if (pat.min == -1 && pat.max == -1) {
-        if (pat.tag == PAT_ANY && pattern_index >= pattern.length) {
-            pat.min = pat.max = MAX(1, text.length - text_index);
-        } else {
-            pat.min = 1;
-            pat.max = INT64_MAX;
-        }
-    }
-
-    int64_t capture_start = text_index;
-    int64_t count = 0, capture_len = 0, next_match_len = 0;
-
-    if (pat.tag == PAT_ANY && pattern_index >= pattern.length) {
-        int64_t remaining = text.length - text_index;
-        capture_len = remaining >= pat.min ? MIN(remaining, pat.max) : -1;
-        text_index += capture_len;
-        goto success;
-    }
-
-    if (pat.min == 0 && pattern_index < pattern.length) {
-        next_match_len = match(text, text_index, pattern, pattern_index, captures, capture_index + (pat.non_capturing ? 0 : 1));
-        if (next_match_len >= 0) {
-            capture_len = 0;
-            goto success;
-        }
-    }
-
-    while (count < pat.max) {
-        int64_t match_len = match_pat(text, &text_state, text_index, pat);
-        if (match_len < 0)
-            break;
-        capture_len += match_len;
-        text_index += match_len;
-        count += 1;
-
-        if (pattern_index < pattern.length) { // More stuff after this
-            if (count < pat.min)
-                next_match_len = -1;
-            else
-                next_match_len = match(text, text_index, pattern, pattern_index, captures, capture_index + (pat.non_capturing ? 0 : 1));
-        } else {
-            next_match_len = 0;
-        }
-
-        if (match_len == 0) {
-            if (next_match_len >= 0) {
-                // If we're good to go, no need to keep re-matching zero-length
-                // matches till we hit max:
-                count = pat.max;
-                break;
-            } else {
-                return -1;
-            }
-        }
-
-        if (pattern_index < pattern.length && next_match_len >= 0)
-            break; // Next guy exists and wants to stop here
-
-        if (text_index >= text.length)
-            break;
-    }
-
-    if (count < pat.min || next_match_len < 0)
-        return -1;
-
-  success:
-    if (captures && capture_index < MAX_BACKREFS && !pat.non_capturing) {
-        if (pat.tag == PAT_PAIR || pat.tag == PAT_QUOTE) {
-            assert(capture_len > 0);
-            captures[capture_index] = (capture_t){
-                .index=capture_start + 1, // Skip leading quote/paren
-                .length=capture_len - 2, // Skip open/close 
-                .occupied=true,
-                .recursive=(pat.tag == PAT_PAIR),
-            };
-        } else {
-            captures[capture_index] = (capture_t){
-                .index=capture_start,
-                .length=capture_len,
-                .occupied=true,
-                .recursive=false,
-            };
-        }
-    }
-    return (text_index - start_index) + next_match_len;
-}
-
-#undef EAT1
-#undef EAT2
-#undef EAT_MANY
-
-static int64_t _find(Text_t text, Pattern_t pattern, int64_t first, int64_t last, int64_t *match_length)
-{
-    int32_t first_grapheme = get_grapheme(pattern, 0);
-    bool find_first = (first_grapheme != '{'
-                       && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
-                       && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
-
-    text_iter_t text_state = {0, 0};
-
-    for (int64_t i = first; i <= last; i++) {
-        // Optimization: quickly skip ahead to first char in pattern:
-        if (find_first) {
-            while (i < text.length && _get_grapheme(text, &text_state, i) != first_grapheme)
-                ++i;
-        }
-
-        int64_t m = match(text, i, pattern, 0, NULL, 0);
-        if (m >= 0) {
-            if (match_length)
-                *match_length = m;
-            return i;
-        }
-    }
-    if (match_length)
-        *match_length = -1;
-    return -1;
-}
-
-public Int_t Text$find(Text_t text, Pattern_t pattern, Int_t from_index, int64_t *match_length)
-{
-    int64_t first = Int_to_Int64(from_index, false);
-    if (first == 0) fail("Invalid index: 0");
-    if (first < 0) first = text.length + first + 1;
-    if (first > text.length || first < 1)
-        return I(0);
-    int64_t found = _find(text, pattern, first-1, text.length-1, match_length);
-    return I(found+1);
-}
-
-PUREFUNC public bool Text$has(Text_t text, Pattern_t pattern)
-{
-    if (Text$starts_with(pattern, Text("{start}"))) {
-        int64_t m = match(text, 0, pattern, 0, NULL, 0);
-        return m >= 0;
-    } else if (Text$ends_with(text, Text("{end}"))) {
-        for (int64_t i = text.length-1; i >= 0; i--) {
-            int64_t match_len = match(text, i, pattern, 0, NULL, 0);
-            if (match_len >= 0 && i + match_len == text.length)
-                return true;
-        }
-        return false;
-    } else {
-        int64_t found = _find(text, pattern, 0, text.length-1, NULL);
-        return (found >= 0);
-    }
-}
-
-PUREFUNC public bool Text$matches(Text_t text, Pattern_t pattern)
-{
-    int64_t m = match(text, 0, pattern, 0, NULL, 0);
-    return m == text.length;
-}
-
 public int printf_text_size(const struct printf_info *info, size_t n, int argtypes[n], int sizes[n])
 {
     if (n < 1) return -1;
@@ -1835,9 +1037,9 @@ static inline Text_t _quoted(Text_t text, bool colorize, char quote_char)
     add_char(quote_char);
 
 #define add_escaped(str) ({ if (colorize) add_str("\x1b[34;1m"); add_char('\\'); add_str(str); if (colorize) add_str("\x1b[0;35m"); })
-    text_iter_t state = {0, 0};
+    TextIter_t state = {0, 0};
     for (int64_t i = 0; i < text.length; i++) {
-        int32_t g = _get_grapheme(text, &state, i);
+        int32_t g = Text$get_grapheme_fast(text, &state, i);
         switch (g) {
         case '\a': add_escaped("a"); break;
         case '\b': add_escaped("b"); break;
@@ -1904,263 +1106,6 @@ public Text_t Text$quoted(Text_t text, bool colorize)
     return _quoted(text, colorize, '"');
 }
 
-public Array_t Text$find_all(Text_t text, Pattern_t pattern)
-{
-    if (pattern.length == 0) // special case
-        return (Array_t){.length=0};
-
-    Array_t matches = {};
-
-    for (int64_t i = 0; ; ) {
-        int64_t len = 0;
-        int64_t found = _find(text, pattern, i, text.length-1, &len);
-        if (found < 0) break;
-        Text_t match = Text$slice(text, I(found+1), I(found + len));
-        Array$insert(&matches, &match, I_small(0), sizeof(Text_t));
-        i = found + MAX(len, 1);
-    }
-
-    return matches;
-}
-
-static Text_t apply_backrefs(Text_t text, Pattern_t original_pattern, Text_t replacement, Pattern_t backref_pat, capture_t *captures)
-{
-    if (backref_pat.length == 0)
-        return replacement;
-
-    int32_t first_grapheme = get_grapheme(backref_pat, 0);
-    bool find_first = (first_grapheme != '{'
-                       && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
-                       && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
-
-    Text_t ret = Text("");
-    text_iter_t state = {0, 0};
-    int64_t nonmatching_pos = 0;
-    for (int64_t pos = 0; pos < replacement.length; ) {
-        // Optimization: quickly skip ahead to first char in the backref pattern:
-        if (find_first) {
-            while (pos < replacement.length && _get_grapheme(replacement, &state, pos) != first_grapheme)
-                ++pos;
-        }
-
-        int64_t backref_len = match(replacement, pos, backref_pat, 0, NULL, 0);
-        if (backref_len < 0) {
-            pos += 1;
-            continue;
-        }
-
-        int64_t after_backref = pos + backref_len;
-        int64_t backref = parse_int(replacement, &after_backref);
-        if (after_backref == pos + backref_len) { // Not actually a backref if there's no number
-            pos += 1;
-            continue;
-        }
-        if (backref < 0 || backref > 9) fail("Invalid backref index: %ld (only 0-%d are allowed)", backref, MAX_BACKREFS-1);
-        backref_len = (after_backref - pos);
-
-        if (_get_grapheme(replacement, &state, pos + backref_len) == ';')
-            backref_len += 1; // skip optional semicolon
-
-        if (!captures[backref].occupied)
-            fail("There is no capture number %ld!", backref);
-
-        Text_t backref_text = Text$slice(text, I(captures[backref].index+1), I(captures[backref].index + captures[backref].length));
-
-        if (captures[backref].recursive && original_pattern.length > 0)
-            backref_text = Text$replace(backref_text, original_pattern, replacement, backref_pat, true);
-
-        if (pos > nonmatching_pos) {
-            Text_t before_slice = Text$slice(replacement, I(nonmatching_pos+1), I(pos));
-            ret = Text$concat(ret, before_slice, backref_text);
-        } else {
-            ret = concat2(ret, backref_text);
-        }
-
-        pos += backref_len;
-        nonmatching_pos = pos;
-    }
-    if (nonmatching_pos < replacement.length) {
-        Text_t last_slice = Text$slice(replacement, I(nonmatching_pos+1), I(replacement.length));
-        ret = concat2(ret, last_slice);
-    }
-    return ret;
-}
-
-public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, Pattern_t backref_pat, bool recursive)
-{
-    Text_t ret = {.length=0};
-
-    int32_t first_grapheme = get_grapheme(pattern, 0);
-    bool find_first = (first_grapheme != '{'
-                       && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
-                       && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
-
-    text_iter_t text_state = {0, 0};
-    int64_t nonmatching_pos = 0;
-    for (int64_t pos = 0; pos < text.length; ) {
-        // Optimization: quickly skip ahead to first char in pattern:
-        if (find_first) {
-            while (pos < text.length && _get_grapheme(text, &text_state, pos) != first_grapheme)
-                ++pos;
-        }
-
-        capture_t captures[MAX_BACKREFS] = {};
-        int64_t match_len = match(text, pos, pattern, 0, captures, 1);
-        if (match_len < 0) {
-            pos += 1;
-            continue;
-        }
-        captures[0] = (capture_t){
-            .index = pos, .length = match_len,
-            .occupied = true, .recursive = false,
-        };
-
-        Text_t replacement_text = apply_backrefs(text, recursive ? pattern : Text(""), replacement, backref_pat, captures);
-        if (pos > nonmatching_pos) {
-            Text_t before_slice = Text$slice(text, I(nonmatching_pos+1), I(pos));
-            ret = Text$concat(ret, before_slice, replacement_text);
-        } else {
-            ret = concat2(ret, replacement_text);
-        }
-        nonmatching_pos = pos + match_len;
-        pos += MAX(match_len, 1);
-    }
-    if (nonmatching_pos < text.length) {
-        Text_t last_slice = Text$slice(text, I(nonmatching_pos+1), I(text.length));
-        ret = concat2(ret, last_slice);
-    }
-    return ret;
-}
-
-public Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right)
-{
-    int64_t first = 0, last = text.length-1;
-    if (trim_left) {
-        int64_t match_len = match(text, 0, pattern, 0, NULL, 0);
-        if (match_len > 0)
-            first = match_len;
-    }
-
-    if (trim_right) {
-        for (int64_t i = text.length-1; i >= first; i--) {
-            int64_t match_len = match(text, i, pattern, 0, NULL, 0);
-            if (match_len > 0 && i + match_len == text.length)
-                last = i-1;
-        }
-    }
-    return Text$slice(text, I(first+1), I(last+1));
-}
-
-public Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn)
-{
-    Text_t ret = {.length=0};
-
-    int32_t first_grapheme = get_grapheme(pattern, 0);
-    bool find_first = (first_grapheme != '{'
-                       && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
-                       && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
-
-    text_iter_t text_state = {0, 0};
-    int64_t nonmatching_pos = 0;
-
-    Text_t (*text_mapper)(Text_t, void*) = fn.fn;
-    for (int64_t pos = 0; pos < text.length; pos++) {
-        // Optimization: quickly skip ahead to first char in pattern:
-        if (find_first) {
-            while (pos < text.length && _get_grapheme(text, &text_state, pos) != first_grapheme)
-                ++pos;
-        }
-
-        int64_t match_len = match(text, pos, pattern, 0, NULL, 0);
-        if (match_len < 0) continue;
-
-        Text_t replacement = text_mapper(Text$slice(text, I(pos+1), I(pos+match_len)), fn.userdata);
-        if (pos > nonmatching_pos) {
-            Text_t before_slice = Text$slice(text, I(nonmatching_pos+1), I(pos));
-            ret = Text$concat(ret, before_slice, replacement);
-        } else {
-            ret = concat2(ret, replacement);
-        }
-        nonmatching_pos = pos + match_len;
-        pos += (match_len - 1);
-    }
-    if (nonmatching_pos < text.length) {
-        Text_t last_slice = Text$slice(text, I(nonmatching_pos+1), I(text.length));
-        ret = concat2(ret, last_slice);
-    }
-    return ret;
-}
-
-public Text_t Text$replace_all(Text_t text, Table_t replacements, Text_t backref_pat, bool recursive)
-{
-    if (replacements.entries.length == 0) return text;
-
-    Text_t ret = {.length=0};
-
-    int64_t nonmatch_pos = 0;
-    for (int64_t pos = 0; pos < text.length; ) {
-        // Find the first matching pattern at this position:
-        for (int64_t i = 0; i < replacements.entries.length; i++) {
-            Pattern_t pattern = *(Pattern_t*)(replacements.entries.data + i*replacements.entries.stride);
-            capture_t captures[MAX_BACKREFS] = {};
-            int64_t len = match(text, pos, pattern, 0, captures, 1);
-            if (len < 0) continue;
-            captures[0].index = pos;
-            captures[0].length = len;
-
-            // If we skipped over some non-matching text before finding a match, insert it here:
-            if (pos > nonmatch_pos) {
-                Text_t before_slice = Text$slice(text, I(nonmatch_pos+1), I(pos));
-                ret = concat2(ret, before_slice);
-            }
-
-            // Concatenate the replacement:
-            Text_t replacement = *(Text_t*)(replacements.entries.data + i*replacements.entries.stride + sizeof(Text_t));
-            Text_t replacement_text = apply_backrefs(text, recursive ? pattern : Text(""), replacement, backref_pat, captures);
-            ret = concat2(ret, replacement_text);
-            pos += MAX(len, 1);
-            nonmatch_pos = pos;
-            goto next_pos;
-        }
-
-        pos += 1;
-      next_pos:
-        continue;
-    }
-
-    if (nonmatch_pos <= text.length) {
-        Text_t last_slice = Text$slice(text, I(nonmatch_pos+1), I(text.length));
-        ret = concat2(ret, last_slice);
-    }
-    return ret;
-}
-
-public Array_t Text$split(Text_t text, Pattern_t pattern)
-{
-    if (text.length == 0) // special case
-        return (Array_t){.length=0};
-
-    if (pattern.length == 0) // special case
-        return Text$clusters(text);
-
-    Array_t chunks = {};
-
-    Int_t i = I_small(1);
-    for (;;) {
-        int64_t len = 0;
-        Int_t found = Text$find(text, pattern, i, &len);
-        if (I_is_zero(found)) break;
-        Text_t chunk = Text$slice(text, i, Int$minus(found, I_small(1)));
-        Array$insert(&chunks, &chunk, I_small(0), sizeof(Text_t));
-        i = Int$plus(found, I(MAX(len, 1)));
-    }
-
-    Text_t last_chunk = Text$slice(text, i, I(text.length));
-    Array$insert(&chunks, &last_chunk, I_small(0), sizeof(Text_t));
-
-    return chunks;
-}
-
 public Text_t Text$join(Text_t glue, Array_t pieces)
 {
     if (pieces.length == 0) return (Text_t){.length=0};
@@ -2210,9 +1155,9 @@ public Array_t Text$clusters(Text_t text)
 public Array_t Text$utf32_codepoints(Text_t text)
 {
     Array_t codepoints = {.atomic=1};
-    text_iter_t state = {0, 0};
+    TextIter_t state = {0, 0};
     for (int64_t i = 0; i < text.length; i++) {
-        int32_t grapheme = _get_grapheme(text, &state, i);
+        int32_t grapheme = Text$get_grapheme_fast(text, &state, i);
         if (grapheme < 0) {
             for (int64_t c = 0; c < NUM_GRAPHEME_CODEPOINTS(grapheme); c++) {
                 ucs4_t subg = GRAPHEME_CODEPOINTS(grapheme)[c];
@@ -2245,9 +1190,9 @@ static inline const char *codepoint_name(ucs4_t c)
 public Array_t Text$codepoint_names(Text_t text)
 {
     Array_t names = {};
-    text_iter_t state = {0, 0};
+    TextIter_t state = {0, 0};
     for (int64_t i = 0; i < text.length; i++) {
-        int32_t grapheme = _get_grapheme(text, &state, i);
+        int32_t grapheme = Text$get_grapheme_fast(text, &state, i);
         if (grapheme < 0) {
             for (int64_t c = 0; c < NUM_GRAPHEME_CODEPOINTS(grapheme); c++) {
                 const char *name = codepoint_name(GRAPHEME_CODEPOINTS(grapheme)[c]);
@@ -2297,10 +1242,10 @@ public Text_t Text$from_bytes(Array_t bytes)
 public Array_t Text$lines(Text_t text)
 {
     Array_t lines = {};
-    text_iter_t state = {0, 0};
+    TextIter_t state = {0, 0};
     for (int64_t i = 0, line_start = 0; i < text.length; i++) {
-        int32_t grapheme = _get_grapheme(text, &state, i);
-        if (grapheme == '\r' && _get_grapheme(text, &state, i + 1) == '\n') { // CRLF
+        int32_t grapheme = Text$get_grapheme_fast(text, &state, i);
+        if (grapheme == '\r' && Text$get_grapheme_fast(text, &state, i + 1) == '\n') { // CRLF
             Text_t line = Text$slice(text, I(line_start+1), I(i));
             Array$insert(&lines, &line, I_small(0), sizeof(Text_t));
             i += 1; // skip one extra for CR
@@ -2330,9 +1275,9 @@ public Pattern_t Pattern$escape_text(Text_t text)
     Array_t graphemes = {.atomic=1};
 #define add_char(c) Array$insert_value(&graphemes, (ucs4_t)c, I_small(0), sizeof(ucs4_t))
 #define add_str(s) ({ for (const char *_c = s; *_c; ++_c) Array$insert_value(&graphemes, (ucs4_t)*_c, I_small(0), sizeof(ucs4_t)); })
-    text_iter_t state = {0, 0};
+    TextIter_t state = {0, 0};
     for (int64_t i = 0; i < text.length; i++) {
-        int32_t g = _get_grapheme(text, &state, i);
+        int32_t g = Text$get_grapheme_fast(text, &state, i);
         ucs4_t g0 = g < 0 ? GRAPHEME_CODEPOINTS(g)[0] : (ucs4_t)g;
 
         if (g == '{') {
diff --git a/builtins/text.h b/builtins/text.h
index 2e58ad6..e5a7b70 100644
--- a/builtins/text.h
+++ b/builtins/text.h
@@ -6,11 +6,16 @@
 #include <stdbool.h>
 #include <printf.h>
 #include <stdint.h>
+#include <unistr.h>
 
 #include "datatypes.h"
 #include "integers.h"
 #include "types.h"
 
+typedef struct {
+    int64_t subtext, sum_of_previous_subtexts;
+} TextIter_t;
+
 int printf_text(FILE *stream, const struct printf_info *info, const void *const args[]);
 int printf_text_size(const struct printf_info *info, size_t n, int argtypes[n], int sizes[n]);
 
@@ -34,16 +39,8 @@ Text_t Text$lower(Text_t text);
 Text_t Text$title(Text_t text);
 Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info);
 Text_t Text$quoted(Text_t str, bool colorize);
-Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement, Pattern_t backref_pat, bool recursive);
-Text_t Text$replace_all(Text_t text, Table_t replacements, Pattern_t backref_pat, bool recursive);
-Array_t Text$split(Text_t text, Pattern_t pattern);
-Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right);
-Int_t Text$find(Text_t text, Pattern_t pattern, Int_t i, int64_t *match_length);
-Array_t Text$find_all(Text_t text, Pattern_t pattern);
-PUREFUNC bool Text$has(Text_t text, Pattern_t pattern);
 PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix);
 PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix);
-PUREFUNC bool Text$matches(Text_t text, Pattern_t pattern);
 char *Text$as_c_string(Text_t text);
 __attribute__((format(printf, 1, 2)))
 public Text_t Text$format(const char *fmt, ...);
@@ -56,19 +53,16 @@ Text_t Text$from_codepoint_names(Array_t codepoint_names);
 Text_t Text$from_bytes(Array_t bytes);
 Array_t Text$lines(Text_t text);
 Text_t Text$join(Text_t glue, Array_t pieces);
-Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn);
 Text_t Text$repeat(Text_t text, Int_t count);
+int32_t Text$get_grapheme_fast(Text_t text, TextIter_t *state, int64_t index);
+ucs4_t Text$get_main_grapheme_fast(Text_t text, TextIter_t *state, int64_t index);
+
+static inline int32_t Text$get_grapheme(Text_t text, int64_t index)
+{
+    TextIter_t state = {0, 0};
+    return Text$get_grapheme_fast(text, &state, index);
+}
 
 extern const TypeInfo Text$info;
 
-#define Pattern(text) ((Pattern_t)Text(text))
-#define Patterns(...) ((Pattern_t)Texts(__VA_ARGS__))
-Pattern_t Pattern$escape_text(Text_t text);
-
-#define Pattern$hash Text$hash
-#define Pattern$compare Text$compare
-#define Pattern$equal Text$equal
-
-extern const TypeInfo Pattern$info;
-
 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/tomo.h b/builtins/tomo.h
index 9354cca..c674ffe 100644
--- a/builtins/tomo.h
+++ b/builtins/tomo.h
@@ -22,6 +22,7 @@
 #include "nums.h"
 #include "optionals.h"
 #include "path.h"
+#include "pattern.h"
 #include "pointer.h"
 #include "range.h"
 #include "shell.h"
diff --git a/docs/text.md b/docs/text.md
index 8a9f641..18960b0 100644
--- a/docs/text.md
+++ b/docs/text.md
@@ -255,9 +255,11 @@ finding the value because the two texts are equivalent under normalization.
 # Patterns
 
 As an alternative to full regular expressions, Tomo provides a limited string
-matching pattern syntax that is intended to solve 80% of use cases in 2% of the
-code size (PCRE's codebase is roughly 150k lines of code, and Tomo's entire
-Text codebase is around 1.8K lines of code).
+matching pattern syntax that is intended to solve 80% of use cases in under 1%
+of the code size (PCRE's codebase is roughly 150k lines of code, and Tomo's
+pattern matching code is a bit under 1k lines of code). Tomo's pattern matching
+syntax is highly readable and works well for matching literal text without
+getting [leaning toothpick syndrome](https://en.wikipedia.org/wiki/Leaning_toothpick_syndrome).
 
 For more advanced use cases, consider linking against a C library for regular
 expressions or pattern matching.