Change pattern syntax from [..pat] to {pat}

author: Bruce Hill <bruce@bruce-hill.com> 2024-09-03 14:27:09 -0400
committer: Bruce Hill <bruce@bruce-hill.com> 2024-09-03 14:27:09 -0400
commit: 91c5dc61c18d6e4c9825086b1ee96239010cad76 (patch)
tree: e599377d5d18cc9a3c96259a6b260b2698020749
parent: 64143f0a131a053414e4b73c17bff994522b11c2 (diff)
3 files changed, 203 insertions, 185 deletions
diff --git a/builtins/text.c b/builtins/text.c
index d9da1248..d23f2dc3 100644
--- a/builtins/text.c
+++ b/builtins/text.c
@@ -814,28 +814,39 @@ const char *get_property_name(Text_t text, int64_t *i)
             ++dest;
             if (dest >= name + UNINAME_MAX - 1)
                 break;
-        } else if (dest == name && grapheme >= 0 && grapheme != ']') {
-            // Literal character escape: [..[] --> "LEFT SQUARE BRACKET"
-            name = unicode_character_name(grapheme, name);
-            *i += 1;
-            return name;
         } else {
             break;
         }
         *i += 1;
     }
+
+    while (dest > name && dest[-1] == ' ')
+        *(dest--) = '\0';
+
     if (dest == name) return NULL;
     *dest = '\0';
     return name;
 }
 
-#define EAT1(state, cond) ({\
-        int32_t grapheme = _next_grapheme(text, state, text_index); \
+#define EAT1(text, state, index, cond) ({\
+        int32_t grapheme = _next_grapheme(text, state, index); \
         bool success = (cond); \
-        if (success) text_index += 1; \
+        if (success) index += 1; \
+        success; })
+
+#define EAT2(text, state, index, cond1, cond2) ({\
+        int32_t grapheme = _next_grapheme(text, state, index); \
+        bool success = (cond1); \
+        if (success) { \
+            grapheme = _next_grapheme(text, state, index + 1); \
+            success = (cond2); \
+            if (success) \
+                index += 2; \
+        } \
         success; })
 
-#define EAT_MANY(state, cond) ({ int64_t _n = 0; while (EAT1(state, cond)) { _n += 1; } _n; })
+
+#define EAT_MANY(text, state, index, cond) ({ int64_t _n = 0; while (EAT1(text, state, index, cond)) { _n += 1; } _n; })
 
 int64_t match_email(Text_t text, int64_t text_index)
 {
@@ -858,19 +869,21 @@ int64_t match_email(Text_t text, int64_t text_index)
     // Local part:
     int64_t local_len = 0;
     static const char *allowed_local = "!#$%&‘*+–/=?^_`.{|}~";
-    while (EAT1(&state, (grapheme & ~0x7F) || isalnum((char)grapheme) || strchr(allowed_local, (char)grapheme))) {
+    while (EAT1(text, &state, text_index,
+                (grapheme & ~0x7F) || isalnum((char)grapheme) || strchr(allowed_local, (char)grapheme))) {
         local_len += 1;
         if (local_len > 64) return -1;
     }
     
-    if (!EAT1(&state, grapheme == '@'))
+    if (!EAT1(text, &state, text_index, grapheme == '@'))
         return -1;
 
     // Host
     int64_t host_len = 0;
     do {
         int64_t label_len = 0;
-        while (EAT1(&state, (grapheme & ~0x7F) || isalnum((char)grapheme) || grapheme == '-')) {
+        while (EAT1(text, &state, text_index,
+                    (grapheme & ~0x7F) || isalnum((char)grapheme) || grapheme == '-')) {
             label_len += 1;
             if (label_len > 63) return -1;
         }
@@ -882,7 +895,7 @@ int64_t match_email(Text_t text, int64_t text_index)
         if (host_len > 255)
             return -1;
         host_len += 1;
-    } while (EAT1(&state, grapheme == '.'));
+    } while (EAT1(text, &state, text_index, grapheme == '.'));
 
     return text_index - start_index;
 }
@@ -900,21 +913,21 @@ int64_t match_ipv6(Text_t text, int64_t text_index)
     bool double_colon_used = false;
     for (int cluster = 0; cluster < NUM_CLUSTERS; cluster++) {
         for (int digits = 0; digits < 4; digits++) {
-            if (!EAT1(&state, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
+            if (!EAT1(text, &state, text_index, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
                 break;
         }
-        if (EAT1(&state, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
+        if (EAT1(text, &state, text_index, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
             return -1; // Too many digits
 
         if (cluster == NUM_CLUSTERS-1) {
             break;
-        } else if (!EAT1(&state, grapheme == ':')) {
+        } else if (!EAT1(text, &state, text_index, grapheme == ':')) {
             if (double_colon_used)
                 break;
             return -1;
         }
 
-        if (EAT1(&state, grapheme == ':')) {
+        if (EAT1(text, &state, text_index, grapheme == ':')) {
             if (double_colon_used)
                 return -1;
             double_colon_used = true;
@@ -936,18 +949,18 @@ static int64_t match_ipv4(Text_t text, int64_t text_index)
     const int NUM_CLUSTERS = 4;
     for (int cluster = 0; cluster < NUM_CLUSTERS; cluster++) {
         for (int digits = 0; digits < 3; digits++) {
-            if (!EAT1(&state, ~(grapheme & ~0x7F) && isdigit((char)grapheme))) {
+            if (!EAT1(text, &state, text_index, ~(grapheme & ~0x7F) && isdigit((char)grapheme))) {
                 if (digits == 0) return -1;
                 break;
             }
         }
 
-        if (EAT1(&state, ~(grapheme & ~0x7F) && isdigit((char)grapheme)))
+        if (EAT1(text, &state, text_index, ~(grapheme & ~0x7F) && isdigit((char)grapheme)))
             return -1; // Too many digits
 
         if (cluster == NUM_CLUSTERS-1)
             break;
-        else if (!EAT1(&state, grapheme == '.'))
+        else if (!EAT1(text, &state, text_index, grapheme == '.'))
             return -1;
     }
     return (text_index - start_index);
@@ -971,10 +984,11 @@ int64_t match_uri(Text_t text, int64_t text_index)
     int64_t start_index = text_index;
 
     // Scheme:
-    if (!EAT1(&state, isalpha(grapheme)))
+    if (!EAT1(text, &state, text_index, isalpha(grapheme)))
         return -1;
 
-    EAT_MANY(&state, !(grapheme & ~0x7F) && (isalnum(grapheme) || grapheme == '+' || grapheme == '.' || grapheme == '-'));
+    EAT_MANY(text, &state, text_index,
+             !(grapheme & ~0x7F) && (isalnum(grapheme) || grapheme == '+' || grapheme == '.' || grapheme == '-'));
 
     if (text_index == start_index)
         return -1;
@@ -987,12 +1001,12 @@ int64_t match_uri(Text_t text, int64_t text_index)
         int64_t authority_start = text_index;
         // Username or host:
         static const char *forbidden = "#?:@ \t\r\n<>[]{}\\^|\"`/";
-        if (EAT_MANY(&state, (grapheme & ~0x7F) || !strchr(forbidden, (char)grapheme)) == 0)
+        if (EAT_MANY(text, &state, text_index, (grapheme & ~0x7F) || !strchr(forbidden, (char)grapheme)) == 0)
             return -1;
 
-        if (EAT1(&state, grapheme == '@')) {
+        if (EAT1(text, &state, text_index, grapheme == '@')) {
             // Found a username, now get a host:
-            if (EAT_MANY(&state, (grapheme & ~0x7F) || !strchr(forbidden, (char)grapheme)) == 0)
+            if (EAT_MANY(text, &state, text_index, (grapheme & ~0x7F) || !strchr(forbidden, (char)grapheme)) == 0)
                 return -1;
         } else {
             int64_t ip = authority_start;
@@ -1007,29 +1021,29 @@ int64_t match_uri(Text_t text, int64_t text_index)
         }
 
         // Port:
-        if (EAT1(&state, grapheme == ':')) {
-            if (EAT_MANY(&state, !(grapheme & ~0x7F) && isdigit(grapheme)) == 0)
+        if (EAT1(text, &state, text_index, grapheme == ':')) {
+            if (EAT_MANY(text, &state, text_index, !(grapheme & ~0x7F) && isdigit(grapheme)) == 0)
                 return -1;
         }
-        if (!EAT1(&state, grapheme == '/'))
+        if (!EAT1(text, &state, text_index, grapheme == '/'))
             return (text_index - start_index); // No path
     } else {
         // Optional path root:
-        EAT1(&state, grapheme == '/');
+        EAT1(text, &state, text_index, grapheme == '/');
     }
 
     // Path:
     static const char *non_path = " \"#?<>[]{}\\^`|";
-    EAT_MANY(&state, (grapheme & ~0x7F) || !strchr(non_path, (char)grapheme));
+    EAT_MANY(text, &state, text_index, (grapheme & ~0x7F) || !strchr(non_path, (char)grapheme));
 
-    if (EAT1(&state, grapheme == '?')) { // Query
+    if (EAT1(text, &state, text_index, grapheme == '?')) { // Query
         static const char *non_query = " \"#<>[]{}\\^`|";
-        EAT_MANY(&state, (grapheme & ~0x7F) || !strchr(non_query, (char)grapheme));
+        EAT_MANY(text, &state, text_index, (grapheme & ~0x7F) || !strchr(non_query, (char)grapheme));
     }
     
-    if (EAT1(&state, grapheme == '#')) { // Fragment
+    if (EAT1(text, &state, text_index, grapheme == '#')) { // Fragment
         static const char *non_fragment = " \"#<>[]{}\\^`|";
-        EAT_MANY(&state, (grapheme & ~0x7F) || !strchr(non_fragment, (char)grapheme));
+        EAT_MANY(text, &state, text_index, (grapheme & ~0x7F) || !strchr(non_fragment, (char)grapheme));
     }
     return text_index - start_index;
 }
@@ -1041,7 +1055,49 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
     iteration_state_t pattern_state = {0, 0}, text_state = {0, 0};
     while (pattern_index < pattern.length) {
         int64_t old_pat_index = pattern_index;
-        if (match_str(pattern, &pattern_index, "[..")) {
+        if (EAT2(pattern, &pattern_state, pattern_index,
+                 uc_is_property(grapheme, UC_PROPERTY_QUOTATION_MARK),
+                 grapheme == '?')) {
+            // Quotations: "?", '?', etc
+            int32_t open = _next_grapheme(pattern, &pattern_state, pattern_index-2);
+            if (!match_grapheme(text, &text_index, open)) return -1;
+            int32_t close = open;
+            uc_mirror_char(open, (uint32_t*)&close);
+            if (!match_grapheme(pattern, &pattern_index, close))
+                fail("Pattern's closing brace is missing: %k", &pattern);
+            while (text_index < text.length) {
+                int32_t c = _next_grapheme(text, &text_state, text_index);
+                if (c == close)
+                    return (text_index - start_index);
+
+                if (c == '\\' && text_index < text.length) {
+                    text_index += 2;
+                } else {
+                    text_index += 1;
+                }
+            }
+            return -1;
+        } else if (EAT2(pattern, &pattern_state, pattern_index,
+                        uc_is_property(grapheme, UC_PROPERTY_PAIRED_PUNCTUATION),
+                        grapheme == '?')) {
+            // Nested punctuation: (?), [?], etc
+            int32_t open = _next_grapheme(pattern, &pattern_state, pattern_index-2);
+            if (!match_grapheme(text, &text_index, open)) return -1;
+            int32_t close = open;
+            uc_mirror_char(open, (uint32_t*)&close);
+            if (!match_grapheme(pattern, &pattern_index, close))
+                fail("Pattern's closing brace is missing: %k", &pattern);
+            int64_t depth = 1;
+            for (; depth > 0 && text_index < text.length; ++text_index) {
+                int32_t c = _next_grapheme(text, &text_state, text_index);
+                if (c == open)
+                    depth += 1;
+                else if (c == close)
+                    depth -= 1;
+            }
+            if (depth > 0) return -1;
+        } else if (EAT1(pattern, &pattern_state, pattern_index,
+                        grapheme == '{')) { // named patterns {id}, {2-3 hex}, etc.
             skip_whitespace(pattern, &pattern_index);
             int64_t min, max;
             if (uc_is_digit(_next_grapheme(pattern, &pattern_state, pattern_index))) {
@@ -1059,21 +1115,42 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
             }
 
             skip_whitespace(pattern, &pattern_index);
-            bool want_to_match = !match_grapheme(pattern, &pattern_index, '!');
-            const char *prop_name = get_property_name(pattern, &pattern_index);
 
-            skip_whitespace(pattern, &pattern_index);
-            if (!match_grapheme(pattern, &pattern_index, ']'))
-                fail("Missing closing ']' in pattern: %k", &pattern);
-
-            int64_t before_group = text_index;
             bool any = false;
             uc_property_t prop;
             int32_t specific_codepoint = UNINAME_INVALID;
+            bool want_to_match = !match_grapheme(pattern, &pattern_index, '!');
+            const char *prop_name;
+            if (match_str(pattern, &pattern_index, ".."))
+                prop_name = "..";
+            else
+                prop_name = get_property_name(pattern, &pattern_index);
+
+            if (!prop_name) {
+                // Literal character, e.g. {1?}
+                specific_codepoint = _next_grapheme(pattern, &pattern_state, pattern_index);
+                pattern_index += 1;
+            } else if (strlen(prop_name) == 1) {
+                // Single letter names: {1+ A}
+                specific_codepoint = prop_name[0];
+                prop_name = NULL;
+            }
 
+            skip_whitespace(pattern, &pattern_index);
+            if (!match_grapheme(pattern, &pattern_index, '}'))
+                fail("Missing closing '}' in pattern: %k", &pattern);
+
+            int64_t before_group = text_index;
 #define FAIL() ({ if (min < 1) { text_index = before_group; continue; } else { return -1; } })
             if (prop_name) {
                 switch (tolower(prop_name[0])) {
+                case '.':
+                    if (prop_name[1] == '.') {
+                        any = true;
+                        prop = UC_PROPERTY_PRIVATE_USE;
+                        break;
+                    }
+                    break;
                 case 'd':
                     if (strcasecmp(prop_name, "digit") == 0) {
                         prop = UC_PROPERTY_DECIMAL_DIGIT;
@@ -1098,13 +1175,16 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
                     break;
                 case 'i':
                     if (prop_name && strcasecmp(prop_name, "id") == 0) {
-                        if (!EAT1(&text_state, uc_is_property(grapheme, UC_PROPERTY_XID_START)))
+                        if (!EAT1(text, &text_state, text_index,
+                                  uc_is_property(grapheme, UC_PROPERTY_XID_START)))
                             FAIL();
-                        EAT_MANY(&text_state, uc_is_property(grapheme, UC_PROPERTY_XID_CONTINUE));
+                        EAT_MANY(text, &text_state, text_index,
+                                 uc_is_property(grapheme, UC_PROPERTY_XID_CONTINUE));
                         continue;
                     } else if (prop_name && strcasecmp(prop_name, "int") == 0) {
-                        EAT1(&text_state, grapheme == '-');
-                        int64_t n = EAT_MANY(&text_state, uc_is_property(grapheme, UC_PROPERTY_DECIMAL_DIGIT));
+                        EAT1(text, &text_state, text_index, grapheme == '-');
+                        int64_t n = EAT_MANY(text, &text_state, text_index,
+                                             uc_is_property(grapheme, UC_PROPERTY_DECIMAL_DIGIT));
                         if (n <= 0)
                             FAIL();
                         continue;
@@ -1132,10 +1212,12 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
                     break;
                 case 'n':
                     if (prop_name && strcasecmp(prop_name, "num") == 0) {
-                        EAT1(&text_state, grapheme == '-');
-                        int64_t pre_decimal = EAT_MANY(&text_state, uc_is_property(grapheme, UC_PROPERTY_DECIMAL_DIGIT));
-                        bool decimal = (EAT1(&text_state, grapheme == '.') == 1);
-                        int64_t post_decimal = decimal ? EAT_MANY(&text_state, uc_is_property(grapheme, UC_PROPERTY_DECIMAL_DIGIT)) : 0;
+                        EAT1(text, &text_state, text_index, grapheme == '-');
+                        int64_t pre_decimal = EAT_MANY(text, &text_state, text_index,
+                                                       uc_is_property(grapheme, UC_PROPERTY_DECIMAL_DIGIT));
+                        bool decimal = (EAT1(text, &text_state, text_index, grapheme == '.') == 1);
+                        int64_t post_decimal = decimal ? EAT_MANY(text, &text_state, text_index,
+                                                                  uc_is_property(grapheme, UC_PROPERTY_DECIMAL_DIGIT)) : 0;
                         if (pre_decimal == 0 && post_decimal == 0)
                             FAIL();
                         continue;
@@ -1178,9 +1260,6 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
                     if (specific_codepoint == UNINAME_INVALID)
                         fail("Not a valid property or character name: %s", prop_name);
                 }
-            } else {
-                any = true;
-                prop = UC_PROPERTY_PRIVATE_USE;
             }
       got_prop:;
 
@@ -1222,80 +1301,16 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
                     }
                 }
             }
-        } else if (uc_is_property(_next_grapheme(pattern, &pattern_state, pattern_index), UC_PROPERTY_QUOTATION_MARK)
-                   && (pattern_index += 1, match_grapheme(pattern, &pattern_index, '?'))) {
-            // Quotation: "?", '?', etc
-            int32_t open = _next_grapheme(pattern, &pattern_state, pattern_index-2);
-            if (!match_grapheme(text, &text_index, open)) return -1;
-            int32_t close = open;
-            uc_mirror_char(open, (uint32_t*)&close);
-            if (!match_grapheme(pattern, &pattern_index, close))
-                fail("Pattern's closing brace is missing: %k", &pattern);
-            while (text_index < text.length) {
-                int32_t c = _next_grapheme(text, &text_state, text_index);
-                if (c == close)
-                    return (text_index - start_index);
-
-                if (c == '\\' && text_index < text.length) {
-                    text_index += 2;
-                } else {
-                    text_index += 1;
-                }
-            }
-            return -1;
-        } else if (uc_is_property(_next_grapheme(pattern, &pattern_state, pattern_index), UC_PROPERTY_PAIRED_PUNCTUATION)
-                   && (pattern_index += 1, match_grapheme(pattern, &pattern_index, '?'))) {
-            // Nested punctuation: (?), [?], etc
-            int32_t open = _next_grapheme(pattern, &pattern_state, pattern_index-2);
-            if (!match_grapheme(text, &text_index, open)) return -1;
-            int32_t close = open;
-            uc_mirror_char(open, (uint32_t*)&close);
-            if (!match_grapheme(pattern, &pattern_index, close))
-                fail("Pattern's closing brace is missing: %k", &pattern);
-            int64_t depth = 1;
-            for (; depth > 0 && text_index < text.length; ++text_index) {
-                int32_t c = _next_grapheme(text, &text_state, text_index);
-                if (c == open)
-                    depth += 1;
-                else if (c == close)
-                    depth -= 1;
-            }
-            if (depth > 0) return -1;
         } else {
             // Plain character:
             pattern_index = old_pat_index;
             int32_t pat_grapheme = _next_grapheme(pattern, &pattern_state, pattern_index);
-
-            // if (pattern_index == 0 && text_index > 0) {
-            //     int32_t pat_codepoint = pat_grapheme;
-            //     if (pat_codepoint < 0)
-            //         pat_codepoint = synthetic_graphemes[-pat_codepoint-1].codepoints[0];
-
-            //     int32_t prev_codepoint = _next_grapheme(text, &text_state, text_index - 1);
-            //     if (prev_codepoint < 0)
-            //         prev_codepoint = synthetic_graphemes[-prev_codepoint-1].codepoints[0];
-            //     if (uc_is_property_alphabetic(pat_codepoint) && uc_is_property_alphabetic(prev_codepoint))
-            //         return -1;
-            // }
-
             int32_t text_grapheme = _next_grapheme(text, &text_state, text_index);
             if (pat_grapheme != text_grapheme)
                 return -1;
 
             pattern_index += 1;
             text_index += 1;
-
-            // if (pattern_index == pattern.length && text_index < text.length) {
-            //     int32_t pat_codepoint = pat_grapheme;
-            //     if (pat_codepoint < 0)
-            //         pat_codepoint = synthetic_graphemes[-pat_codepoint-1].codepoints[0];
-
-            //     int32_t next_codepoint = _next_grapheme(text, &text_state, text_index);
-            //     if (next_codepoint < 0)
-            //         next_codepoint = synthetic_graphemes[-next_codepoint-1].codepoints[0];
-            //     if (uc_is_property_alphabetic(pat_codepoint) && uc_is_property_alphabetic(next_codepoint))
-            //         return -1;
-            // }
         }
     }
     if (text_index >= text.length && pattern_index < pattern.length)
@@ -1304,6 +1319,7 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
 }
 
 #undef EAT1
+#undef EAT2
 #undef EAT_MANY
 
 public Int_t Text$find(Text_t text, Pattern_t pattern, Int_t from_index, int64_t *match_length)
@@ -1315,7 +1331,7 @@ public Int_t Text$find(Text_t text, Pattern_t pattern, Int_t from_index, int64_t
         return I_small(0);
 
     int32_t first_grapheme = get_grapheme(pattern, 0);
-    bool find_first = (first_grapheme != '['
+    bool find_first = (first_grapheme != '{'
                        && !uc_is_property(first_grapheme, UC_PROPERTY_QUOTATION_MARK)
                        && !uc_is_property(first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
 
@@ -1677,16 +1693,14 @@ public Pattern_t Pattern$escape_text(Text_t text)
         int32_t g = _next_grapheme(text, &state, i);
         uint32_t g0 = g < 0 ? synthetic_graphemes[-g-1].codepoints[0] : (uint32_t)g;
 
-        if (g == '[') {
-            add_str("[..1[]");
-        } else if (uc_is_property_quotation_mark(g0)) {
-            add_str("[..1");
-            add_char(g);
-            add_char(']');
-        } else if (uc_is_property_paired_punctuation(g0)) {
-            add_str("[..1");
+        if (g == '{') {
+            add_str("{1{}");
+        } else if (uc_is_property_quotation_mark(g0)
+                   || (uc_is_property_paired_punctuation(g0) && uc_is_property_left_of_pair(g0))) {
+            add_char('{');
+            add_char('1');
             add_char(g);
-            add_char(']');
+            add_char('}');
         } else {
             add_char(g);
         }
diff --git a/docs/text.md b/docs/text.md
index 855c3c6c..cf60f6a3 100644
--- a/docs/text.md
+++ b/docs/text.md
@@ -284,9 +284,9 @@ See [Text Functions](#Text-Functions) for the full API documentation.
 
 Patterns have three types of syntax:
 
-- `[..` followed by an optional count (`n`, `n-m`, or `n+`), followed by an
+- `{` followed by an optional count (`n`, `n-m`, or `n+`), followed by an
   optional `!` to negate the pattern, followed by an optional pattern name or
-  Unicode character name, followed by a required `]`.
+  Unicode character name, followed by a required `}`.
 
 - Any matching pair of quotes or parentheses or braces with a `?` in the middle
   (e.g. `"?"` or `(?)`).
@@ -296,10 +296,11 @@ Patterns have three types of syntax:
 ## Named Patterns
 
 Named patterns match certain pre-defined patterns that are commonly useful. To
-use a named pattern, use the syntax `[..name]`. Names are case-insensitive and
+use a named pattern, use the syntax `{name}`. Names are case-insensitive and
 mostly ignore spaces, underscores, and dashes.
 
-- ` ` - If no name is given, any character is accepted.
+- `..` - Any character (note that a single `.` would mean the literal period
+  character).
 - `digit` - A unicode digit
 - `email` - an email address
 - `emoji` - an emoji
@@ -315,8 +316,8 @@ mostly ignore spaces, underscores, and dashes.
 - `url` - a URL (URI that specifically starts with `http://`, `https://`, `ws://`, `wss://`, or `ftp://`)
 
 For non-alphabetic characters, any single character is treated as matching
-exactly that character. For example, `[..1 []` matches exactly one `[`
-character. Or, `[..1 (]` matches exactly one `(` character.
+exactly that character. For example, `{1{}` matches exactly one `{`
+character. Or, `{1.}` matches exactly one `.` character.
 
 Patterns can also use any Unicode property name. Some helpful ones are:
 
@@ -326,37 +327,37 @@ Patterns can also use any Unicode property name. Some helpful ones are:
 - `upper` - Uppercase letters
 - `whitespace` - Whitespace characters
 
-Patterns may also use exact Unicode codepoint names. For example: `[..1 latin
-small letter A]` matches `a`.
+Patterns may also use exact Unicode codepoint names. For example: `{1 latin
+small letter A}` matches `a`.
 
 ## Negating Patterns
 
 If an exclamation mark (`!`) is placed before a pattern's name, then characters
-are matched only when they _don't_ match the pattern. For example, `[..!alpha]`
+are matched only when they _don't_ match the pattern. For example, `{!alpha}`
 will match all characters _except_ alphabetic ones.
 
 ## Interpolating Text and Escaping
 
 To escape a character in a pattern (e.g. if you want to match the literal
-character `?`), you can use the syntax `[..1 ?]`. This is almost never
-necessary unless you have text that looks like a Tomo text pattern and has
-something like `[..` or `(?)` inside it.
+character `?`), you can use the syntax `{1 ?}`. This is almost never necessary
+unless you have text that looks like a Tomo text pattern and has something like
+`{` or `(?)` inside it.
 
 However, if you're trying to do an exact match of arbitrary text values, you'll
 want to have the text automatically escaped. Fortunately, Tomo's injection-safe
 DSL text interpolation supports automatic text escaping. This means that if you
 use text interpolation with the `$` sign to insert a text value, the value will
-be automatically escaped using the `[..1 ?]` rule described above:
+be automatically escaped using the `{1 ?}` rule described above:
 
 ```tomo
 # Risk of code injection (would cause an error because 'xxx' is not a valid
 # pattern name:
 >> user_input := get_user_input()
-= "[..xxx]"
+= "{xxx}"
 
 # Interpolation automatically escapes:
 >> $/$user_input/
-= $/[..1 []..xxx]/
+= $/{1{}..xxx}/
 
 # No error:
 >> some_text:find($/$user_input/)
@@ -366,8 +367,8 @@ be automatically escaped using the `[..1 ?]` rule described above:
 If you prefer, you can also use this to insert literal characters:
 
 ```tomo
->> $/literal $"[..]"/
-= $/literal [..1]]..]/
+>> $/literal $"{..}"/
+= $/literal {1{}..}/
 ```
 
 ## Repetitions
@@ -378,11 +379,11 @@ many repetitions you want by putting a number or range of numbers first using
 (`n` or more repetitions):
 
 ```
-[..4-5 alpha]
-0x[..hex]
-[..4 digit]-[..2 digit]-[..2 digit]
-[..2+ space]
-[..0-1 question mark]
+{4-5 alpha}
+0x{hex}
+{4 digit}-{2 digit}-{2 digit}
+{2+ space}
+{0-1 question mark}
 ```
 
 # Text Functions
@@ -625,17 +626,17 @@ found.
 
 **Example:**  
 ```tomo
->> " one   two  three   ":find("[..id]", start=-999)
+>> " one   two  three   ":find("{id}", start=-999)
 = 0
->> " one   two  three   ":find("[..id]", start=999)
+>> " one   two  three   ":find("{id}", start=999)
 = 0
->> " one   two  three   ":find("[..id]")
+>> " one   two  three   ":find("{id}")
 = 2
->> " one   two  three   ":find("[..id]", start=5)
+>> " one   two  three   ":find("{id}", start=5)
 = 8
 
 >> len := 0_i64
->> "   one  ":find("[..id]", length=&len)
+>> "   one  ":find("{id}", length=&len)
 = 4
 >> len
 = 3_i64
@@ -665,16 +666,16 @@ Note: if `text` or `pattern` is empty, an empty array will be returned.
 
 **Example:**  
 ```tomo
->> " one  two three   ":find_all("[..alpha]")
+>> " one  two three   ":find_all("{alpha}")
 = ["one", "two", "three"]
 
->> " one  two three   ":find_all("[..!space]")
+>> " one  two three   ":find_all("{!space}")
 = ["one", "two", "three"]
 
->> "    ":find_all("[..alpha]")
+>> "    ":find_all("{alpha}")
 = []
 
->> " foo(baz(), 1)  doop() ":find_all("[..id](?)")
+>> " foo(baz(), 1)  doop() ":find_all("{id}(?)")
 = ["foo(baz(), 1)", "doop()"]
 
 >> "":find_all("")
@@ -708,11 +709,11 @@ has(text: Text, pattern: Text) -> Bool
 ```tomo
 >> "hello world":has("wo")
 = yes
->> "hello world":has("[..alpha]")
+>> "hello world":has("{alpha}")
 = yes
->> "hello world":has("[..digit]")
+>> "hello world":has("{digit}")
 = no
->> "hello world":has("[..start]he")
+>> "hello world":has("{start}he")
 = yes
 ```
 
@@ -854,7 +855,7 @@ The text with occurrences of the pattern replaced.
 >> "Hello world":replace("world", "there")
 = "Hello there"
 
->> "Hello world":replace("[..id]", "xxx")
+>> "Hello world":replace("{id}", "xxx")
 = "xxx xxx"
 ```
 
@@ -888,7 +889,7 @@ An array of substrings resulting from the split.
 >> "abc":split()
 = ["a", "b", "c"]
 
->> "a    b  c":split("[..space]")
+>> "a    b  c":split("{space}")
 = ["a", "b", "c"]
 
 >> "a,b,c,":split(",")
diff --git a/test/text.tm b/test/text.tm
index 3049af99..52c74afd 100644
--- a/test/text.tm
+++ b/test/text.tm
@@ -45,23 +45,23 @@ func main():
 
 	>> "Hello":has($/l/)
 	= yes
-	>> "Hello":has($/l[..end]/)
+	>> "Hello":has($/l{end}/)
 	= no
-	>> "Hello":has($/[..start]l/)
+	>> "Hello":has($/{start}l/)
 	= no
 
 	>> "Hello":has($/o/)
 	= yes
-	>> "Hello":has($/o[..end]/)
+	>> "Hello":has($/o{end}/)
 	= yes
-	>> "Hello":has($/[..start]o/)
+	>> "Hello":has($/{start}o/)
 	= no
 
 	>> "Hello":has($/H/)
 	= yes
-	>> "Hello":has($/H[..end]/)
+	>> "Hello":has($/H{end}/)
 	= no
-	>> "Hello":has($/[..start]H/)
+	>> "Hello":has($/{start}H/)
 	= yes
 
 	>> "Hello":replace($/l/, "")
@@ -73,9 +73,9 @@ func main():
 	>> "One two three four five six":replace($/e /, "")
 	= "Ontwo threfour fivsix"
 
-	>> " one ":replace($/[..start][..space]/, "")
+	>> " one ":replace($/{start}{space}/, "")
 	= "one "
-	>> " one ":replace($/[..space][..end]/, "")
+	>> " one ":replace($/{space}{end}/, "")
 	= " one"
 
 	>> amelie:has($/$amelie2/)
@@ -104,9 +104,9 @@ func main():
 	>> $(one (nested) two $(1+2))
 	= "one (nested) two 3"
 
-	>> "one two three":replace($/[..alpha]/, "")
+	>> "one two three":replace($/{alpha}/, "")
 	= "  "
-	>> "one two three":replace($/[..alpha]/, "word")
+	>> "one two three":replace($/{alpha}/, "word")
 	= "word word word"
 
 	>> c := "É̩"
@@ -137,7 +137,7 @@ func main():
 	>> "one,two,three,":split($/,/)
 	= ["one", "two", "three", ""]
 
-	>> "one    two three":split($/[..space]/)
+	>> "one    two three":split($/{space}/)
 	= ["one", "two", "three"]
 
 	>> "abc":split($//)
@@ -159,16 +159,16 @@ func main():
 	= []
 
 	!! Test text:find_all()
-	>> " one  two three   ":find_all($/[..alpha]/)
+	>> " one  two three   ":find_all($/{alpha}/)
 	= ["one", "two", "three"]
 
-	>> " one  two three   ":find_all($/[..!space]/)
+	>> " one  two three   ":find_all($/{!space}/)
 	= ["one", "two", "three"]
 
-	>> "    ":find_all($/[..alpha]/)
+	>> "    ":find_all($/{alpha}/)
 	= []
 
-	>> " foo(baz(), 1)  doop() ":find_all($/[..id](?)/)
+	>> " foo(baz(), 1)  doop() ":find_all($/{id}(?)/)
 	= ["foo(baz(), 1)", "doop()"]
 
 	>> "":find_all($Pattern'')
@@ -178,17 +178,17 @@ func main():
 	= []
 
 	!! Test text:find()
-	>> " one   two  three   ":find($/[..id]/, start=-999)
+	>> " one   two  three   ":find($/{id}/, start=-999)
 	= 0
-	>> " one   two  three   ":find($/[..id]/, start=999)
+	>> " one   two  three   ":find($/{id}/, start=999)
 	= 0
-	>> " one   two  three   ":find($/[..id]/)
+	>> " one   two  three   ":find($/{id}/)
 	= 2
-	>> " one   two  three   ":find($/[..id]/, start=5)
+	>> " one   two  three   ":find($/{id}/, start=5)
 	= 8
 
 	>> len := 0_i64
-	>> "   one  ":find($/[..id]/, length=&len)
+	>> "   one  ":find($/{id}/, length=&len)
 	= 4
 	>> len
 	= 3_i64
@@ -220,8 +220,11 @@ func main():
 	>> Text.from_codepoint_names(["not a valid name here buddy"])
 	= ""
 
-	>> malicious := "[..xxx"
+	>> "one two; three four":find_all($/; {..}/)
+	= ["; three four"]
+
+	>> malicious := "{xxx}"
 	>> $/$malicious/
-	= $/[..1[]..xxx/
+	= $/{1{}xxx}/
author	Bruce Hill <bruce@bruce-hill.com>	2024-09-03 14:27:09 -0400
committer	Bruce Hill <bruce@bruce-hill.com>	2024-09-03 14:27:09 -0400
commit	91c5dc61c18d6e4c9825086b1ee96239010cad76 (patch)
tree	e599377d5d18cc9a3c96259a6b260b2698020749
parent	64143f0a131a053414e4b73c17bff994522b11c2 (diff)