Clean up pattern code to make better use of TextIter_t and fix up

URI/URL patterns
author: Bruce Hill <bruce@bruce-hill.com> 2024-09-14 00:12:52 -0400
committer: Bruce Hill <bruce@bruce-hill.com> 2024-09-14 00:12:52 -0400
commit: 6012a00763afdd467e71b1657bd9a39a4cba4493 (patch)
tree: 6994a7074b4182211c3beec8553c02f638976b7f /stdlib
parent: 2b0556084919ace0700e4480f7fa2886cf31b3e4 (diff)
3 files changed, 254 insertions, 237 deletions
diff --git a/stdlib/patterns.c b/stdlib/patterns.c
index 81beaffe..2de7fe3a 100644
--- a/stdlib/patterns.c
+++ b/stdlib/patterns.c
@@ -14,32 +14,48 @@
 
 #define MAX_BACKREFS 100
 
-static inline void skip_whitespace(Text_t text, int64_t *i)
+typedef struct {
+    int64_t index, length;
+    bool occupied, recursive;
+} capture_t;
+
+typedef struct {
+    enum { PAT_START, PAT_END, PAT_ANY, PAT_GRAPHEME, PAT_PROPERTY, PAT_QUOTE, PAT_PAIR, PAT_FUNCTION } tag;
+    bool negated, non_capturing;
+    int64_t min, max;
+    union {
+        int32_t grapheme;
+        uc_property_t property;
+        int64_t (*fn)(TextIter_t *, int64_t);
+        int32_t quote_graphemes[2];
+        int32_t pair_graphemes[2];
+    };
+} pat_t;
+
+static inline void skip_whitespace(TextIter_t *state, int64_t *i)
 {
-    TextIter_t state = {0, 0};
-    while (*i < text.length) {
-        int32_t grapheme = Text$get_grapheme_fast(text, &state, *i);
+    while (*i < state->text.length) {
+        int32_t grapheme = Text$get_grapheme_fast(state, *i);
         if (grapheme > 0 && !uc_is_property_white_space((ucs4_t)grapheme))
             return;
         *i += 1;
     }
 }
 
-static inline bool match_grapheme(Text_t text, int64_t *i, int32_t grapheme)
+static inline bool match_grapheme(TextIter_t *state, int64_t *i, int32_t grapheme)
 {
-    if (*i < text.length && Text$get_grapheme(text, *i) == grapheme) {
+    if (*i < state->text.length && Text$get_grapheme_fast(state, *i) == grapheme) {
         *i += 1;
         return true;
     }
     return false;
 }
 
-static inline bool match_str(Text_t text, int64_t *i, const char *str)
+static inline bool match_str(TextIter_t *state, int64_t *i, const char *str)
 {
-    TextIter_t state = {0, 0};
     int64_t matched = 0;
     while (matched[str]) {
-        if (*i + matched >= text.length || Text$get_grapheme_fast(text, &state, *i + matched) != str[matched])
+        if (*i + matched >= state->text.length || Text$get_grapheme_fast(state, *i + matched) != str[matched])
             return false;
         matched += 1;
     }
@@ -47,11 +63,10 @@ static inline bool match_str(Text_t text, int64_t *i, const char *str)
     return true;
 }
 
-static inline bool match_property(Text_t text, int64_t *i, uc_property_t prop)
+static inline bool match_property(TextIter_t *state, int64_t *i, uc_property_t prop)
 {
-    if (*i >= text.length) return false;
-    TextIter_t state = {};
-    ucs4_t grapheme = Text$get_main_grapheme_fast(text, &state, *i);
+    if (*i >= state->text.length) return false;
+    ucs4_t grapheme = Text$get_main_grapheme_fast(state, *i);
     // TODO: check every codepoint in the cluster?
     if (uc_is_property(grapheme, prop)) {
         *i += 1;
@@ -60,12 +75,11 @@ static inline bool match_property(Text_t text, int64_t *i, uc_property_t prop)
     return false;
 }
 
-static int64_t parse_int(Text_t text, int64_t *i)
+static int64_t parse_int(TextIter_t *state, int64_t *i)
 {
-    TextIter_t state = {0, 0};
     int64_t value = 0;
     for (;; *i += 1) {
-        ucs4_t grapheme = Text$get_main_grapheme_fast(text, &state, *i);
+        ucs4_t grapheme = Text$get_main_grapheme_fast(state, *i);
         int digit = uc_digit_value((ucs4_t)grapheme);
         if (digit < 0) break;
         if (value >= INT64_MAX/10) break;
@@ -74,14 +88,13 @@ static int64_t parse_int(Text_t text, int64_t *i)
     return value;
 }
 
-const char *get_property_name(Text_t text, int64_t *i)
+static const char *get_property_name(TextIter_t *state, int64_t *i)
 {
-    skip_whitespace(text, i);
+    skip_whitespace(state, i);
     char *name = GC_MALLOC_ATOMIC(UNINAME_MAX);
     char *dest = name;
-    TextIter_t state = {0, 0};
-    while (*i < text.length) {
-        int32_t grapheme = Text$get_grapheme_fast(text, &state, *i);
+    while (*i < state->text.length) {
+        int32_t grapheme = Text$get_grapheme_fast(state, *i);
         if (!(grapheme & ~0xFF) && (isalnum(grapheme) || grapheme == ' ' || grapheme == '_' || grapheme == '-')) {
             *dest = (char)grapheme;
             ++dest;
@@ -101,17 +114,17 @@ const char *get_property_name(Text_t text, int64_t *i)
     return name;
 }
 
-#define EAT1(text, state, index, cond) ({\
-        int32_t grapheme = Text$get_grapheme_fast(text, state, index); \
+#define EAT1(state, index, cond) ({\
+        int32_t grapheme = Text$get_grapheme_fast(state, index); \
         bool success = (cond); \
         if (success) index += 1; \
         success; })
 
-#define EAT2(text, state, index, cond1, cond2) ({\
-        int32_t grapheme = Text$get_grapheme_fast(text, state, index); \
+#define EAT2(state, index, cond1, cond2) ({\
+        int32_t grapheme = Text$get_grapheme_fast(state, index); \
         bool success = (cond1); \
         if (success) { \
-            grapheme = Text$get_grapheme_fast(text, state, index + 1); \
+            grapheme = Text$get_grapheme_fast(state, index + 1); \
             success = (cond2); \
             if (success) \
                 index += 2; \
@@ -119,18 +132,17 @@ const char *get_property_name(Text_t text, int64_t *i)
         success; })
 
 
-#define EAT_MANY(text, state, index, cond) ({ int64_t _n = 0; while (EAT1(text, state, index, cond)) { _n += 1; } _n; })
+#define EAT_MANY(state, index, cond) ({ int64_t _n = 0; while (EAT1(state, index, cond)) { _n += 1; } _n; })
 
-int64_t match_email(Text_t text, int64_t index)
+static int64_t match_email(TextIter_t *state, int64_t index)
 {
     // email = local "@" domain
     // local = 1-64 ([a-zA-Z0-9!#$%&‘*+–/=?^_`.{|}~] | non-ascii)
     // domain = dns-label ("." dns-label)*
     // dns-label = 1-63 ([a-zA-Z0-9-] | non-ascii)
 
-    TextIter_t state = {0, 0};
     if (index > 0) {
-        ucs4_t prev_codepoint = Text$get_main_grapheme_fast(text, &state, index - 1);
+        ucs4_t prev_codepoint = Text$get_main_grapheme_fast(state, index - 1);
         if (uc_is_property_alphabetic((ucs4_t)prev_codepoint))
             return -1;
     }
@@ -140,20 +152,20 @@ int64_t match_email(Text_t text, int64_t index)
     // Local part:
     int64_t local_len = 0;
     static const char *allowed_local = "!#$%&‘*+–/=?^_`.{|}~";
-    while (EAT1(text, &state, index,
+    while (EAT1(state, index,
                 (grapheme & ~0x7F) || isalnum((char)grapheme) || strchr(allowed_local, (char)grapheme))) {
         local_len += 1;
         if (local_len > 64) return -1;
     }
     
-    if (!EAT1(text, &state, index, grapheme == '@'))
+    if (!EAT1(state, index, grapheme == '@'))
         return -1;
 
     // Host
     int64_t host_len = 0;
     do {
         int64_t label_len = 0;
-        while (EAT1(text, &state, index,
+        while (EAT1(state, index,
                     (grapheme & ~0x7F) || isalnum((char)grapheme) || grapheme == '-')) {
             label_len += 1;
             if (label_len > 63) return -1;
@@ -166,16 +178,15 @@ int64_t match_email(Text_t text, int64_t index)
         if (host_len > 255)
             return -1;
         host_len += 1;
-    } while (EAT1(text, &state, index, grapheme == '.'));
+    } while (EAT1(state, index, grapheme == '.'));
 
     return index - start_index;
 }
 
-int64_t match_ipv6(Text_t text, int64_t index)
+static int64_t match_ipv6(TextIter_t *state, int64_t index)
 {
-    TextIter_t state = {0, 0};
     if (index > 0) {
-        int32_t prev_codepoint = Text$get_grapheme_fast(text, &state, index - 1);
+        int32_t prev_codepoint = Text$get_grapheme_fast(state, index - 1);
         if ((prev_codepoint & ~0x7F) && (isxdigit(prev_codepoint) || prev_codepoint == ':'))
             return -1;
     }
@@ -184,21 +195,21 @@ int64_t match_ipv6(Text_t text, int64_t index)
     bool double_colon_used = false;
     for (int cluster = 0; cluster < NUM_CLUSTERS; cluster++) {
         for (int digits = 0; digits < 4; digits++) {
-            if (!EAT1(text, &state, index, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
+            if (!EAT1(state, index, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
                 break;
         }
-        if (EAT1(text, &state, index, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
+        if (EAT1(state, index, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
             return -1; // Too many digits
 
         if (cluster == NUM_CLUSTERS-1) {
             break;
-        } else if (!EAT1(text, &state, index, grapheme == ':')) {
+        } else if (!EAT1(state, index, grapheme == ':')) {
             if (double_colon_used)
                 break;
             return -1;
         }
 
-        if (EAT1(text, &state, index, grapheme == ':')) {
+        if (EAT1(state, index, grapheme == ':')) {
             if (double_colon_used)
                 return -1;
             double_colon_used = true;
@@ -207,11 +218,10 @@ int64_t match_ipv6(Text_t text, int64_t index)
     return index - start_index;
 }
 
-static int64_t match_ipv4(Text_t text, int64_t index)
+static int64_t match_ipv4(TextIter_t *state, int64_t index)
 {
-    TextIter_t state = {0, 0};
     if (index > 0) {
-        int32_t prev_codepoint = Text$get_grapheme_fast(text, &state, index - 1);
+        int32_t prev_codepoint = Text$get_grapheme_fast(state, index - 1);
         if ((prev_codepoint & ~0x7F) && (isdigit(prev_codepoint) || prev_codepoint == '.'))
             return -1;
     }
@@ -220,40 +230,86 @@ static int64_t match_ipv4(Text_t text, int64_t index)
     const int NUM_CLUSTERS = 4;
     for (int cluster = 0; cluster < NUM_CLUSTERS; cluster++) {
         for (int digits = 0; digits < 3; digits++) {
-            if (!EAT1(text, &state, index, ~(grapheme & ~0x7F) && isdigit((char)grapheme))) {
+            if (!EAT1(state, index, ~(grapheme & ~0x7F) && isdigit((char)grapheme))) {
                 if (digits == 0) return -1;
                 break;
             }
         }
 
-        if (EAT1(text, &state, index, ~(grapheme & ~0x7F) && isdigit((char)grapheme)))
+        if (EAT1(state, index, ~(grapheme & ~0x7F) && isdigit((char)grapheme)))
             return -1; // Too many digits
 
         if (cluster == NUM_CLUSTERS-1)
             break;
-        else if (!EAT1(text, &state, index, grapheme == '.'))
+        else if (!EAT1(state, index, grapheme == '.'))
             return -1;
     }
     return (index - start_index);
 }
 
-int64_t match_ip(Text_t text, int64_t index)
+static int64_t match_ip(TextIter_t *state, int64_t index)
 {
-    int64_t len = match_ipv6(text, index);
+    int64_t len = match_ipv6(state, index);
     if (len >= 0) return len;
-    len = match_ipv4(text, index);
+    len = match_ipv4(state, index);
     return (len >= 0) ? len : -1;
 }
 
-int64_t match_uri(Text_t text, int64_t index)
+static int64_t match_host(TextIter_t *state, int64_t index)
+{
+    int64_t ip_len = match_ip(state, index);
+    if (ip_len > 0) return ip_len;
+
+    int64_t start_index = index;
+    if (match_grapheme(state, &index, '[')) {
+        ip_len = match_ip(state, index);
+        if (ip_len <= 0) return -1;
+        index += ip_len;
+        if (match_grapheme(state, &index, ']'))
+            return (index - start_index);
+        return -1;
+    }
+
+    if (!EAT1(state, index, isalpha(grapheme)))
+        return -1;
+
+    static const char *non_host_chars = "/#?:@ \t\r\n<>[]{}\\^|\"`";
+    EAT_MANY(state, index, (grapheme & ~0x7F) || !strchr(non_host_chars, (char)grapheme));
+    return (index - start_index);
+}
+
+static int64_t match_authority(TextIter_t *state, int64_t index)
+{
+    int64_t authority_start = index;
+    static const char *non_segment_chars = "/#?:@ \t\r\n<>[]{}\\^|\"`.";
+
+    // Optional user@ prefix:
+    int64_t username_len = EAT_MANY(state, index, (grapheme & ~0x7F) || !strchr(non_segment_chars, (char)grapheme));
+    if (username_len < 1 || !EAT1(state, index, grapheme == '@'))
+        index = authority_start; // No user@ part
+
+    // Host:
+    int64_t host_len = match_host(state, index);
+    if (host_len <= 0) return -1;
+    index += host_len;
+
+    // Port:
+    if (EAT1(state, index, grapheme == ':')) {
+        if (EAT_MANY(state, index, !(grapheme & ~0x7F) && isdigit(grapheme)) == 0)
+            return -1;
+    }
+    return (index - authority_start);
+}
+
+static int64_t match_uri(TextIter_t *state, int64_t index)
 {
     // URI = scheme ":" ["//" authority] path ["?" query] ["#" fragment]
     // scheme = [a-zA-Z] [a-zA-Z0-9+.-]
     // authority = [userinfo "@"] host [":" port]
 
-    TextIter_t state = {0, 0};
     if (index > 0) {
-        ucs4_t prev_codepoint = Text$get_main_grapheme_fast(text, &state, index - 1);
+        // Don't match if we're not at a word edge:
+        ucs4_t prev_codepoint = Text$get_main_grapheme_fast(state, index - 1);
         if (uc_is_property_alphabetic(prev_codepoint))
             return -1;
     }
@@ -261,147 +317,101 @@ int64_t match_uri(Text_t text, int64_t index)
     int64_t start_index = index;
 
     // Scheme:
-    if (!EAT1(text, &state, index, isalpha(grapheme)))
+    if (!EAT1(state, index, isalpha(grapheme)))
         return -1;
-
-    EAT_MANY(text, &state, index,
-             !(grapheme & ~0x7F) && (isalnum(grapheme) || grapheme == '+' || grapheme == '.' || grapheme == '-'));
-
-    if (index == start_index)
-        return -1;
-
-    if (!match_grapheme(text, &index, ':'))
+    EAT_MANY(state, index, !(grapheme & ~0x7F) && (isalnum(grapheme) || grapheme == '+' || grapheme == '.' || grapheme == '-'));
+    if (!match_grapheme(state, &index, ':'))
         return -1;
 
     // Authority:
-    if (match_str(text, &index, "//")) {
-        int64_t authority_start = index;
-        // Username or host:
-        static const char *forbidden = "#?:@ \t\r\n<>[]{}\\^|\"`/";
-        if (EAT_MANY(text, &state, index, (grapheme & ~0x7F) || !strchr(forbidden, (char)grapheme)) == 0)
-            return -1;
+    int64_t authority_len;
+    if (match_str(state, &index, "//")) {
+        authority_len = match_authority(state, index);
+        if (authority_len > 0)
+            index += authority_len;
+    } else {
+        authority_len = 0;
+    }
 
-        if (EAT1(text, &state, index, grapheme == '@')) {
-            // Found a username, now get a host:
-            if (EAT_MANY(text, &state, index, (grapheme & ~0x7F) || !strchr(forbidden, (char)grapheme)) == 0)
-                return -1;
-        } else {
-            int64_t ip = authority_start;
-            int64_t ipv4_len = match_ipv4(text, ip);
-            if (ipv4_len > 0) {
-                ip += ipv4_len;
-            } else if (match_grapheme(text, &ip, '[')) {
-                ip += match_ipv6(text, ip);
-                if (ip > authority_start + 1 && match_grapheme(text, &ip, ']'))
-                    index = ip;
-            }
+    // Path:
+    int64_t path_start = index;
+    if (EAT1(state, index, grapheme == '/') || authority_len <= 0) {
+        static const char *non_path = " \"#?<>[]{}\\^`|";
+        EAT_MANY(state, index, (grapheme & ~0x7F) || !strchr(non_path, (char)grapheme));
+
+        if (EAT1(state, index, grapheme == '?')) { // Query
+            static const char *non_query = " \"#<>[]{}\\^`|";
+            EAT_MANY(state, index, (grapheme & ~0x7F) || !strchr(non_query, (char)grapheme));
         }
-
-        // Port:
-        if (EAT1(text, &state, index, grapheme == ':')) {
-            if (EAT_MANY(text, &state, index, !(grapheme & ~0x7F) && isdigit(grapheme)) == 0)
-                return -1;
+        
+        if (EAT1(state, index, grapheme == '#')) { // Fragment
+            static const char *non_fragment = " \"#<>[]{}\\^`|";
+            EAT_MANY(state, index, (grapheme & ~0x7F) || !strchr(non_fragment, (char)grapheme));
         }
-        if (!EAT1(text, &state, index, grapheme == '/'))
-            return (index - start_index); // No path
-    } else {
-        // Optional path root:
-        EAT1(text, &state, index, grapheme == '/');
     }
 
-    // Path:
-    static const char *non_path = " \"#?<>[]{}\\^`|";
-    EAT_MANY(text, &state, index, (grapheme & ~0x7F) || !strchr(non_path, (char)grapheme));
+    if (authority_len <= 0 && index == path_start)
+        return -1;
 
-    if (EAT1(text, &state, index, grapheme == '?')) { // Query
-        static const char *non_query = " \"#<>[]{}\\^`|";
-        EAT_MANY(text, &state, index, (grapheme & ~0x7F) || !strchr(non_query, (char)grapheme));
-    }
-    
-    if (EAT1(text, &state, index, grapheme == '#')) { // Fragment
-        static const char *non_fragment = " \"#<>[]{}\\^`|";
-        EAT_MANY(text, &state, index, (grapheme & ~0x7F) || !strchr(non_fragment, (char)grapheme));
-    }
     return index - start_index;
 }
 
-int64_t match_url(Text_t text, int64_t index)
+static int64_t match_url(TextIter_t *state, int64_t index)
 {
     int64_t lookahead = index;
-    if (!(match_str(text, &lookahead, "https:")
-        || match_str(text, &lookahead, "http:")
-        || match_str(text, &lookahead, "ftp:")
-        || match_str(text, &lookahead, "wss:")
-        || match_str(text, &lookahead, "ws:")))
+    if (!(match_str(state, &lookahead, "https:")
+        || match_str(state, &lookahead, "http:")
+        || match_str(state, &lookahead, "ftp:")
+        || match_str(state, &lookahead, "wss:")
+        || match_str(state, &lookahead, "ws:")))
         return -1;
 
-    return match_uri(text, index);
+    return match_uri(state, index);
 }
 
-int64_t match_id(Text_t text, int64_t index)
+static int64_t match_id(TextIter_t *state, int64_t index)
 {
-    TextIter_t state = {0, 0};
-    if (!EAT1(text, &state, index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_XID_START)))
+    if (!EAT1(state, index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_XID_START)))
         return -1;
-    return 1 + EAT_MANY(text, &state, index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_XID_CONTINUE));
+    return 1 + EAT_MANY(state, index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_XID_CONTINUE));
 }
 
-int64_t match_int(Text_t text, int64_t index)
+static int64_t match_int(TextIter_t *state, int64_t index)
 {
-    TextIter_t state = {0, 0};
-    int64_t len = EAT_MANY(text, &state, index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_DECIMAL_DIGIT));
+    int64_t len = EAT_MANY(state, index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_DECIMAL_DIGIT));
     return len >= 0 ? len : -1;
 }
 
-int64_t match_num(Text_t text, int64_t index)
+static int64_t match_num(TextIter_t *state, int64_t index)
 {
-    TextIter_t state = {0, 0};
-    bool negative = EAT1(text, &state, index, grapheme == '-') ? 1 : 0;
-    int64_t pre_decimal = EAT_MANY(text, &state, index,
+    bool negative = EAT1(state, index, grapheme == '-') ? 1 : 0;
+    int64_t pre_decimal = EAT_MANY(state, index,
                                    uc_is_property((ucs4_t)grapheme, UC_PROPERTY_DECIMAL_DIGIT));
-    bool decimal = (EAT1(text, &state, index, grapheme == '.') == 1);
-    int64_t post_decimal = decimal ? EAT_MANY(text, &state, index,
+    bool decimal = (EAT1(state, index, grapheme == '.') == 1);
+    int64_t post_decimal = decimal ? EAT_MANY(state, index,
                                               uc_is_property((ucs4_t)grapheme, UC_PROPERTY_DECIMAL_DIGIT)) : 0;
     if (pre_decimal == 0 && post_decimal == 0)
         return -1;
     return negative + pre_decimal + decimal + post_decimal;
 }
 
-int64_t match_newline(Text_t text, int64_t index)
+static int64_t match_newline(TextIter_t *state, int64_t index)
 {
-    if (index >= text.length)
+    if (index >= state->text.length)
         return -1;
 
-    TextIter_t state = {0, 0};
-    ucs4_t grapheme = index >= text.length ? 0 : Text$get_main_grapheme_fast(text, &state, index);
+    ucs4_t grapheme = index >= state->text.length ? 0 : Text$get_main_grapheme_fast(state, index);
     if (grapheme == '\n')
         return 1;
-    if (grapheme == '\r' && Text$get_grapheme_fast(text, &state, index + 1) == '\n')
+    if (grapheme == '\r' && Text$get_grapheme_fast(state, index + 1) == '\n')
         return 2;
     return -1;
 }
 
-typedef struct {
-    int64_t index, length;
-    bool occupied, recursive;
-} capture_t;
-
-typedef struct {
-    enum { PAT_START, PAT_END, PAT_ANY, PAT_GRAPHEME, PAT_PROPERTY, PAT_QUOTE, PAT_PAIR, PAT_FUNCTION } tag;
-    bool negated, non_capturing;
-    int64_t min, max;
-    union {
-        int32_t grapheme;
-        uc_property_t property;
-        int64_t (*fn)(Text_t, int64_t);
-        int32_t quote_graphemes[2];
-        int32_t pair_graphemes[2];
-    };
-} pat_t;
-
-int64_t match_pat(Text_t text, TextIter_t *state, int64_t index, pat_t pat)
+static int64_t match_pat(TextIter_t *state, int64_t index, pat_t pat)
 {
-    int32_t grapheme = index >= text.length ? 0 : Text$get_grapheme_fast(text, state, index);
+    Text_t text = state->text;
+    int32_t grapheme = index >= text.length ? 0 : Text$get_grapheme_fast(state, index);
 
     switch (pat.tag) {
     case PAT_START: {
@@ -448,7 +458,7 @@ int64_t match_pat(Text_t text, TextIter_t *state, int64_t index, pat_t pat)
             if (index + match_len >= text.length)
                 return pat.negated ? 1 : -1;
 
-            int32_t c = Text$get_grapheme_fast(text, state, index + match_len);
+            int32_t c = Text$get_grapheme_fast(state, index + match_len);
             if (c == open)
                 depth += 1;
             else if (c == close)
@@ -467,7 +477,7 @@ int64_t match_pat(Text_t text, TextIter_t *state, int64_t index, pat_t pat)
 
         int32_t close = pat.quote_graphemes[1];
         for (int64_t i = index + 1; i < text.length; i++) {
-            int32_t c = Text$get_grapheme_fast(text, state, i);
+            int32_t c = Text$get_grapheme_fast(state, i);
             if (c == close) {
                 return pat.negated ? -1 : (i - index) + 1;
             } else if (c == '\\' && index + 1 < text.length) {
@@ -477,7 +487,7 @@ int64_t match_pat(Text_t text, TextIter_t *state, int64_t index, pat_t pat)
         return pat.negated ? 1 : -1;
     }
     case PAT_FUNCTION: {
-        int64_t match_len = pat.fn(text, index);
+        int64_t match_len = pat.fn(state, index);
         if (match_len >= 0)
             return pat.negated ? -1 : match_len;
         return pat.negated ? 1 : -1;
@@ -487,49 +497,48 @@ int64_t match_pat(Text_t text, TextIter_t *state, int64_t index, pat_t pat)
     errx(1, "Unreachable");
 }
 
-pat_t parse_next_pat(Text_t pattern, TextIter_t *state, int64_t *index)
+static pat_t parse_next_pat(TextIter_t *state, int64_t *index)
 {
-    if (EAT2(pattern, state, *index,
+    if (EAT2(state, *index,
              uc_is_property((ucs4_t)grapheme, UC_PROPERTY_QUOTATION_MARK),
              grapheme == '?')) {
         // Quotations: "?", '?', etc
-        int32_t open = Text$get_grapheme_fast(pattern, state, *index-2);
+        int32_t open = Text$get_grapheme_fast(state, *index-2);
         int32_t close = open;
         uc_mirror_char((ucs4_t)open, (ucs4_t*)&close);
-        if (!match_grapheme(pattern, index, close))
-            fail("Pattern's closing quote is missing: %k", &pattern);
+        if (!match_grapheme(state, index, close))
+            fail("Pattern's closing quote is missing: %k", &state->text);
 
         return (pat_t){
             .tag=PAT_QUOTE,
             .min=1, .max=1,
             .quote_graphemes={open, close},
         };
-    } else if (EAT2(pattern, state, *index,
+    } else if (EAT2(state, *index,
                     uc_is_property((ucs4_t)grapheme, UC_PROPERTY_PAIRED_PUNCTUATION),
                     grapheme == '?')) {
         // Nested punctuation: (?), [?], etc
-        int32_t open = Text$get_grapheme_fast(pattern, state, *index-2);
+        int32_t open = Text$get_grapheme_fast(state, *index-2);
         int32_t close = open;
         uc_mirror_char((ucs4_t)open, (ucs4_t*)&close);
-        if (!match_grapheme(pattern, index, close))
-            fail("Pattern's closing brace is missing: %k", &pattern);
+        if (!match_grapheme(state, index, close))
+            fail("Pattern's closing brace is missing: %k", &state->text);
         
         return (pat_t){
             .tag=PAT_PAIR,
             .min=1, .max=1,
             .pair_graphemes={open, close},
         };
-    } else if (EAT1(pattern, state, *index,
-                    grapheme == '{')) { // named patterns {id}, {2-3 hex}, etc.
-        skip_whitespace(pattern, index);
+    } else if (EAT1(state, *index, grapheme == '{')) { // named patterns {id}, {2-3 hex}, etc.
+        skip_whitespace(state, index);
         int64_t min, max;
-        if (uc_is_digit((ucs4_t)Text$get_grapheme_fast(pattern, state, *index))) {
-            min = parse_int(pattern, index);
-            skip_whitespace(pattern, index);
-            if (match_grapheme(pattern, index, '+')) {
+        if (uc_is_digit((ucs4_t)Text$get_grapheme_fast(state, *index))) {
+            min = parse_int(state, index);
+            skip_whitespace(state, index);
+            if (match_grapheme(state, index, '+')) {
                 max = INT64_MAX;
-            } else if (match_grapheme(pattern, index, '-')) {
-                max = parse_int(pattern, index);
+            } else if (match_grapheme(state, index, '-')) {
+                max = parse_int(state, index);
             } else {
                 max = min;
             }
@@ -538,34 +547,34 @@ pat_t parse_next_pat(Text_t pattern, TextIter_t *state, int64_t *index)
             min = -1, max = -1;
         }
 
-        skip_whitespace(pattern, index);
+        skip_whitespace(state, index);
 
-        bool negated = match_grapheme(pattern, index, '!');
+        bool negated = match_grapheme(state, index, '!');
 #define PAT(_tag, ...) ((pat_t){.min=min, .max=max, .negated=negated, .tag=_tag, __VA_ARGS__})
         const char *prop_name;
-        if (match_str(pattern, index, ".."))
+        if (match_str(state, index, ".."))
             prop_name = "..";
         else
-            prop_name = get_property_name(pattern, index);
+            prop_name = get_property_name(state, index);
 
         if (!prop_name) {
             // Literal character, e.g. {1?}
-            skip_whitespace(pattern, index);
-            int32_t grapheme = Text$get_grapheme_fast(pattern, state, (*index)++);
-            if (!match_grapheme(pattern, index, '}'))
-                fail("Missing closing '}' in pattern: %k", &pattern);
+            skip_whitespace(state, index);
+            int32_t grapheme = Text$get_grapheme_fast(state, (*index)++);
+            if (!match_grapheme(state, index, '}'))
+                fail("Missing closing '}' in pattern: %k", &state->text);
             return PAT(PAT_GRAPHEME, .grapheme=grapheme);
         } else if (strlen(prop_name) == 1) {
             // Single letter names: {1+ A}
-            skip_whitespace(pattern, index);
-            if (!match_grapheme(pattern, index, '}'))
-                fail("Missing closing '}' in pattern: %k", &pattern);
+            skip_whitespace(state, index);
+            if (!match_grapheme(state, index, '}'))
+                fail("Missing closing '}' in pattern: %k", &state->text);
             return PAT(PAT_GRAPHEME, .grapheme=prop_name[0]);
         }
 
-        skip_whitespace(pattern, index);
-        if (!match_grapheme(pattern, index, '}'))
-            fail("Missing closing '}' in pattern: %k", &pattern);
+        skip_whitespace(state, index);
+        if (!match_grapheme(state, index, '}'))
+            fail("Missing closing '}' in pattern: %k", &state->text);
 
         switch (tolower(prop_name[0])) {
         case '.':
@@ -576,6 +585,11 @@ pat_t parse_next_pat(Text_t pattern, TextIter_t *state, int64_t *index)
                     return PAT(PAT_ANY); 
             }
             break;
+        case 'a':
+            if (strcasecmp(prop_name, "authority") == 0) {
+                return PAT(PAT_FUNCTION, .fn=match_authority);
+            }
+            break;
         case 'd':
             if (strcasecmp(prop_name, "digit") == 0) {
                 return PAT(PAT_PROPERTY, .property=UC_PROPERTY_DECIMAL_DIGIT);
@@ -590,6 +604,11 @@ pat_t parse_next_pat(Text_t pattern, TextIter_t *state, int64_t *index)
                 return PAT(PAT_PROPERTY, .property=UC_PROPERTY_EMOJI);
             }
             break;
+        case 'h':
+            if (strcasecmp(prop_name, "host") == 0) {
+                return PAT(PAT_FUNCTION, .fn=match_host);
+            }
+            break;
         case 'i':
             if (strcasecmp(prop_name, "id") == 0) {
                 return PAT(PAT_FUNCTION, .fn=match_id);
@@ -636,18 +655,18 @@ pat_t parse_next_pat(Text_t pattern, TextIter_t *state, int64_t *index)
         return PAT(PAT_GRAPHEME, .grapheme=(int32_t)grapheme);
 #undef PAT
     } else {
-        return (pat_t){.tag=PAT_GRAPHEME, .non_capturing=true, .min=1, .max=1, .grapheme=Text$get_grapheme_fast(pattern, state, (*index)++)};
+        return (pat_t){.tag=PAT_GRAPHEME, .non_capturing=true, .min=1, .max=1, .grapheme=Text$get_grapheme_fast(state, (*index)++)};
     }
 }
 
-int64_t match(Text_t text, int64_t text_index, Pattern_t pattern, int64_t pattern_index, capture_t *captures, int64_t capture_index)
+static int64_t match(Text_t text, int64_t text_index, Pattern_t pattern, int64_t pattern_index, capture_t *captures, int64_t capture_index)
 {
     if (pattern_index >= pattern.length) // End of the pattern
         return 0;
 
     int64_t start_index = text_index;
-    TextIter_t pattern_state = {0, 0}, text_state = {0, 0};
-    pat_t pat = parse_next_pat(pattern, &pattern_state, &pattern_index);
+    TextIter_t pattern_state = {pattern, 0, 0}, text_state = {text, 0, 0};
+    pat_t pat = parse_next_pat(&pattern_state, &pattern_index);
 
     if (pat.min == -1 && pat.max == -1) {
         if (pat.tag == PAT_ANY && pattern_index >= pattern.length) {
@@ -677,7 +696,7 @@ int64_t match(Text_t text, int64_t text_index, Pattern_t pattern, int64_t patter
     }
 
     while (count < pat.max) {
-        int64_t match_len = match_pat(text, &text_state, text_index, pat);
+        int64_t match_len = match_pat(&text_state, text_index, pat);
         if (match_len < 0)
             break;
         capture_len += match_len;
@@ -747,12 +766,11 @@ static int64_t _find(Text_t text, Pattern_t pattern, int64_t first, int64_t last
                        && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
                        && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
 
-    TextIter_t text_state = {0, 0};
-
+    TextIter_t text_state = {text, 0, 0};
     for (int64_t i = first; i <= last; i++) {
         // Optimization: quickly skip ahead to first char in pattern:
         if (find_first) {
-            while (i < text.length && Text$get_grapheme_fast(text, &text_state, i) != first_grapheme)
+            while (i < text.length && Text$get_grapheme_fast(&text_state, i) != first_grapheme)
                 ++i;
         }
 
@@ -833,12 +851,12 @@ static Text_t apply_backrefs(Text_t text, Pattern_t original_pattern, Text_t rep
                        && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
 
     Text_t ret = Text("");
-    TextIter_t state = {0, 0};
+    TextIter_t replacement_state = {replacement, 0, 0};
     int64_t nonmatching_pos = 0;
     for (int64_t pos = 0; pos < replacement.length; ) {
         // Optimization: quickly skip ahead to first char in the backref pattern:
         if (find_first) {
-            while (pos < replacement.length && Text$get_grapheme_fast(replacement, &state, pos) != first_grapheme)
+            while (pos < replacement.length && Text$get_grapheme_fast(&replacement_state, pos) != first_grapheme)
                 ++pos;
         }
 
@@ -849,7 +867,7 @@ static Text_t apply_backrefs(Text_t text, Pattern_t original_pattern, Text_t rep
         }
 
         int64_t after_backref = pos + backref_len;
-        int64_t backref = parse_int(replacement, &after_backref);
+        int64_t backref = parse_int(&replacement_state, &after_backref);
         if (after_backref == pos + backref_len) { // Not actually a backref if there's no number
             pos += 1;
             continue;
@@ -857,7 +875,7 @@ static Text_t apply_backrefs(Text_t text, Pattern_t original_pattern, Text_t rep
         if (backref < 0 || backref > 9) fail("Invalid backref index: %ld (only 0-%d are allowed)", backref, MAX_BACKREFS-1);
         backref_len = (after_backref - pos);
 
-        if (Text$get_grapheme_fast(replacement, &state, pos + backref_len) == ';')
+        if (Text$get_grapheme_fast(&replacement_state, pos + backref_len) == ';')
             backref_len += 1; // skip optional semicolon
 
         if (!captures[backref].occupied)
@@ -894,12 +912,12 @@ public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, P
                        && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
                        && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
 
-    TextIter_t text_state = {0, 0};
+    TextIter_t text_state = {text, 0, 0};
     int64_t nonmatching_pos = 0;
     for (int64_t pos = 0; pos < text.length; ) {
         // Optimization: quickly skip ahead to first char in pattern:
         if (find_first) {
-            while (pos < text.length && Text$get_grapheme_fast(text, &text_state, pos) != first_grapheme)
+            while (pos < text.length && Text$get_grapheme_fast(&text_state, pos) != first_grapheme)
                 ++pos;
         }
 
@@ -959,14 +977,14 @@ public Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn)
                        && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
                        && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
 
-    TextIter_t text_state = {0, 0};
+    TextIter_t text_state = {text, 0, 0};
     int64_t nonmatching_pos = 0;
 
     Text_t (*text_mapper)(Text_t, void*) = fn.fn;
     for (int64_t pos = 0; pos < text.length; pos++) {
         // Optimization: quickly skip ahead to first char in pattern:
         if (find_first) {
-            while (pos < text.length && Text$get_grapheme_fast(text, &text_state, pos) != first_grapheme)
+            while (pos < text.length && Text$get_grapheme_fast(&text_state, pos) != first_grapheme)
                 ++pos;
         }
 
diff --git a/stdlib/text.c b/stdlib/text.c
index 283dfb01..60b51962 100644
--- a/stdlib/text.c
+++ b/stdlib/text.c
@@ -817,17 +817,15 @@ PUREFUNC public uint64_t Text$hash(Text_t *text)
     return text->hash;
 }
 
-public int32_t Text$get_grapheme_fast(Text_t text, TextIter_t *state, int64_t index)
+public int32_t Text$get_grapheme_fast(TextIter_t *state, int64_t index)
 {
+    Text_t text = state->text;
     switch (text.tag) {
     case TEXT_ASCII: return index < text.length ? (int32_t)text.ascii[index] : 0;
     case TEXT_SHORT_ASCII: return index < text.length ? (int32_t)text.short_ascii[index] : 0;
     case TEXT_GRAPHEMES: return index < text.length ? text.graphemes[index] : 0;
     case TEXT_SHORT_GRAPHEMES: return index < text.length ? text.short_graphemes[index] : 0;
     case TEXT_SUBTEXT: {
-        TextIter_t backup_state = {0, 0};
-        if (!state) state = &backup_state;
-
         if (index < 0 || index >= text.length)
             return 0;
 
@@ -837,7 +835,7 @@ public int32_t Text$get_grapheme_fast(Text_t text, TextIter_t *state, int64_t in
         }
         for (;;) {
             if (index < state->sum_of_previous_subtexts + text.subtexts[state->subtext].length)
-                return Text$get_grapheme_fast(text.subtexts[state->subtext], NULL, index - state->sum_of_previous_subtexts);
+                return Text$get_grapheme(text.subtexts[state->subtext], index - state->sum_of_previous_subtexts);
             state->sum_of_previous_subtexts += text.subtexts[state->subtext].length;
             state->subtext += 1;
         }
@@ -848,9 +846,9 @@ public int32_t Text$get_grapheme_fast(Text_t text, TextIter_t *state, int64_t in
     return 0;
 }
 
-public ucs4_t Text$get_main_grapheme_fast(Text_t text, TextIter_t *state, int64_t index)
+public ucs4_t Text$get_main_grapheme_fast(TextIter_t *state, int64_t index)
 {
-    return MAIN_GRAPHEME_CODEPOINT(Text$get_grapheme_fast(text, state, index));
+    return MAIN_GRAPHEME_CODEPOINT(Text$get_grapheme_fast(state, index));
 }
 
 PUREFUNC public int32_t Text$compare(const Text_t *a, const Text_t *b)
@@ -858,10 +856,10 @@ PUREFUNC public int32_t Text$compare(const Text_t *a, const Text_t *b)
     if (a == b) return 0;
 
     int64_t len = MAX(a->length, b->length);
-    TextIter_t a_state = {0, 0}, b_state = {0, 0};
+    TextIter_t a_state = {*a, 0, 0}, b_state = {*b, 0, 0};
     for (int64_t i = 0; i < len; i++) {
-        int32_t ai = Text$get_grapheme_fast(*a, &a_state, i);
-        int32_t bi = Text$get_grapheme_fast(*b, &b_state, i);
+        int32_t ai = Text$get_grapheme_fast(&a_state, i);
+        int32_t bi = Text$get_grapheme_fast(&b_state, i);
         if (ai == bi) continue;
         int32_t cmp;
         if (ai > 0 && bi > 0) {
@@ -892,10 +890,10 @@ PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix)
 {
     if (text.length < prefix.length)
         return false;
-    TextIter_t text_state = {0, 0}, prefix_state = {0, 0};
+    TextIter_t text_state = {text, 0, 0}, prefix_state = {prefix, 0, 0};
     for (int64_t i = 0; i < prefix.length; i++) {
-        int32_t text_i = Text$get_grapheme_fast(text, &text_state, i);
-        int32_t prefix_i = Text$get_grapheme_fast(prefix, &prefix_state, i);
+        int32_t text_i = Text$get_grapheme_fast(&text_state, i);
+        int32_t prefix_i = Text$get_grapheme_fast(&prefix_state, i);
         if (text_i != prefix_i) return false;
     }
     return true;
@@ -905,10 +903,10 @@ PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix)
 {
     if (text.length < suffix.length)
         return false;
-    TextIter_t text_state = {0, 0}, prefix_state = {0, 0};
+    TextIter_t text_state = {text, 0, 0}, suffix_state = {suffix, 0, 0};
     for (int64_t i = 0; i < suffix.length; i++) {
-        int32_t text_i = Text$get_grapheme_fast(text, &text_state, text.length - suffix.length + i);
-        int32_t suffix_i = Text$get_grapheme_fast(suffix, &prefix_state, i);
+        int32_t text_i = Text$get_grapheme_fast(&text_state, text.length - suffix.length + i);
+        int32_t suffix_i = Text$get_grapheme_fast(&suffix_state, i);
         if (text_i != suffix_i) return false;
     }
     return true;
@@ -919,10 +917,10 @@ PUREFUNC public bool Text$equal_values(Text_t a, Text_t b)
     if (a.length != b.length || (a.hash != 0 && b.hash != 0 && a.hash != b.hash))
         return false;
     int64_t len = a.length;
-    TextIter_t a_state = {0, 0}, b_state = {0, 0};
+    TextIter_t a_state = {a, 0, 0}, b_state = {b, 0, 0};
     for (int64_t i = 0; i < len; i++) {
-        int32_t ai = Text$get_grapheme_fast(a, &a_state, i);
-        int32_t bi = Text$get_grapheme_fast(b, &b_state, i);
+        int32_t ai = Text$get_grapheme_fast(&a_state, i);
+        int32_t bi = Text$get_grapheme_fast(&b_state, i);
         if (ai != bi) return false;
     }
     return true;
@@ -939,11 +937,11 @@ PUREFUNC public bool Text$equal_ignoring_case(Text_t a, Text_t b)
     if (a.length != b.length)
         return false;
     int64_t len = a.length;
-    TextIter_t a_state = {0, 0}, b_state = {0, 0};
+    TextIter_t a_state = {a, 0, 0}, b_state = {b, 0, 0};
     const char *language = uc_locale_language();
     for (int64_t i = 0; i < len; i++) {
-        int32_t ai = Text$get_grapheme_fast(a, &a_state, i);
-        int32_t bi = Text$get_grapheme_fast(b, &b_state, i);
+        int32_t ai = Text$get_grapheme_fast(&a_state, i);
+        int32_t bi = Text$get_grapheme_fast(&b_state, i);
         if (ai != bi) {
             const ucs4_t *a_codepoints = ai >= 0 ? (ucs4_t*)&ai : GRAPHEME_CODEPOINTS(ai);
             int64_t a_len = ai >= 0 ? 1 : NUM_GRAPHEME_CODEPOINTS(ai);
@@ -1030,9 +1028,9 @@ static inline Text_t _quoted(Text_t text, bool colorize, char quote_char)
     add_char(quote_char);
 
 #define add_escaped(str) ({ if (colorize) add_str("\x1b[34;1m"); add_char('\\'); add_str(str); if (colorize) add_str("\x1b[0;35m"); })
-    TextIter_t state = {0, 0};
+    TextIter_t state = {text, 0, 0};
     for (int64_t i = 0; i < text.length; i++) {
-        int32_t g = Text$get_grapheme_fast(text, &state, i);
+        int32_t g = Text$get_grapheme_fast(&state, i);
         switch (g) {
         case '\a': add_escaped("a"); break;
         case '\b': add_escaped("b"); break;
@@ -1148,9 +1146,9 @@ public Array_t Text$clusters(Text_t text)
 public Array_t Text$utf32_codepoints(Text_t text)
 {
     Array_t codepoints = {.atomic=1};
-    TextIter_t state = {0, 0};
+    TextIter_t state = {text, 0, 0};
     for (int64_t i = 0; i < text.length; i++) {
-        int32_t grapheme = Text$get_grapheme_fast(text, &state, i);
+        int32_t grapheme = Text$get_grapheme_fast(&state, i);
         if (grapheme < 0) {
             for (int64_t c = 0; c < NUM_GRAPHEME_CODEPOINTS(grapheme); c++) {
                 ucs4_t subg = GRAPHEME_CODEPOINTS(grapheme)[c];
@@ -1183,9 +1181,9 @@ static inline const char *codepoint_name(ucs4_t c)
 public Array_t Text$codepoint_names(Text_t text)
 {
     Array_t names = {};
-    TextIter_t state = {0, 0};
+    TextIter_t state = {text, 0, 0};
     for (int64_t i = 0; i < text.length; i++) {
-        int32_t grapheme = Text$get_grapheme_fast(text, &state, i);
+        int32_t grapheme = Text$get_grapheme_fast(&state, i);
         if (grapheme < 0) {
             for (int64_t c = 0; c < NUM_GRAPHEME_CODEPOINTS(grapheme); c++) {
                 const char *name = codepoint_name(GRAPHEME_CODEPOINTS(grapheme)[c]);
@@ -1235,10 +1233,10 @@ public Text_t Text$from_bytes(Array_t bytes)
 public Array_t Text$lines(Text_t text)
 {
     Array_t lines = {};
-    TextIter_t state = {0, 0};
+    TextIter_t state = {text, 0, 0};
     for (int64_t i = 0, line_start = 0; i < text.length; i++) {
-        int32_t grapheme = Text$get_grapheme_fast(text, &state, i);
-        if (grapheme == '\r' && Text$get_grapheme_fast(text, &state, i + 1) == '\n') { // CRLF
+        int32_t grapheme = Text$get_grapheme_fast(&state, i);
+        if (grapheme == '\r' && Text$get_grapheme_fast(&state, i + 1) == '\n') { // CRLF
             Text_t line = Text$slice(text, I(line_start+1), I(i));
             Array$insert(&lines, &line, I_small(0), sizeof(Text_t));
             i += 1; // skip one extra for CR
@@ -1268,9 +1266,9 @@ public Pattern_t Pattern$escape_text(Text_t text)
     Array_t graphemes = {.atomic=1};
 #define add_char(c) Array$insert_value(&graphemes, (ucs4_t)c, I_small(0), sizeof(ucs4_t))
 #define add_str(s) ({ for (const char *_c = s; *_c; ++_c) Array$insert_value(&graphemes, (ucs4_t)*_c, I_small(0), sizeof(ucs4_t)); })
-    TextIter_t state = {0, 0};
+    TextIter_t state = {text, 0, 0};
     for (int64_t i = 0; i < text.length; i++) {
-        int32_t g = Text$get_grapheme_fast(text, &state, i);
+        int32_t g = Text$get_grapheme_fast(&state, i);
         ucs4_t g0 = g < 0 ? GRAPHEME_CODEPOINTS(g)[0] : (ucs4_t)g;
 
         if (g == '{') {
diff --git a/stdlib/text.h b/stdlib/text.h
index 841d51fe..bad0187b 100644
--- a/stdlib/text.h
+++ b/stdlib/text.h
@@ -12,6 +12,7 @@
 #include "integers.h"
 
 typedef struct {
+    Text_t text;
     int64_t subtext, sum_of_previous_subtexts;
 } TextIter_t;
 
@@ -53,13 +54,13 @@ Text_t Text$from_bytes(Array_t bytes);
 Array_t Text$lines(Text_t text);
 Text_t Text$join(Text_t glue, Array_t pieces);
 Text_t Text$repeat(Text_t text, Int_t count);
-int32_t Text$get_grapheme_fast(Text_t text, TextIter_t *state, int64_t index);
-ucs4_t Text$get_main_grapheme_fast(Text_t text, TextIter_t *state, int64_t index);
+int32_t Text$get_grapheme_fast(TextIter_t *state, int64_t index);
+ucs4_t Text$get_main_grapheme_fast(TextIter_t *state, int64_t index);
 
 static inline int32_t Text$get_grapheme(Text_t text, int64_t index)
 {
-    TextIter_t state = {0, 0};
-    return Text$get_grapheme_fast(text, &state, index);
+    TextIter_t state = {text, 0, 0};
+    return Text$get_grapheme_fast(&state, index);
 }
 
 extern const TypeInfo Text$info;
author	Bruce Hill <bruce@bruce-hill.com>	2024-09-14 00:12:52 -0400
committer	Bruce Hill <bruce@bruce-hill.com>	2024-09-14 00:12:52 -0400
commit	6012a00763afdd467e71b1657bd9a39a4cba4493 (patch)
tree	6994a7074b4182211c3beec8553c02f638976b7f /stdlib
parent	2b0556084919ace0700e4480f7fa2886cf31b3e4 (diff)