diff options
| -rw-r--r-- | builtins/text.c | 54 | ||||
| -rw-r--r-- | builtins/text.h | 4 | ||||
| -rw-r--r-- | docs/text.md | 19 | ||||
| -rw-r--r-- | environment.c | 4 | ||||
| -rw-r--r-- | test/text.tm | 6 |
5 files changed, 63 insertions, 24 deletions
diff --git a/builtins/text.c b/builtins/text.c index bc12debf..11c5610e 100644 --- a/builtins/text.c +++ b/builtins/text.c @@ -1051,6 +1051,7 @@ int64_t match_uri(Text_t text, int64_t text_index) typedef struct { int64_t index, length; + bool occupied, recursive; } capture_t; int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t pattern_index, capture_t *captures, int64_t capture_index) @@ -1077,8 +1078,10 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter // Save this as a capture, including only the interior text: if (captures && capture_index < MAX_BACKREFS) { captures[capture_index++] = (capture_t){ - start_of_quoted_text, - text_index - start_of_quoted_text, + .index=start_of_quoted_text, + .length=text_index - start_of_quoted_text, + .occupied=true, + .recursive=false, }; } @@ -1116,8 +1119,10 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter // Save this as a capture, including only the interior text: if (captures && capture_index < MAX_BACKREFS) { captures[capture_index++] = (capture_t){ - start_of_interior, - text_index - start_of_interior - 1, + .index=start_of_interior, + .length=text_index - start_of_interior - 1, + .occupied=true, + .recursive=true, }; } @@ -1171,8 +1176,10 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter #define SUCCESS() ({ \ if (captures && capture_index < MAX_BACKREFS) { \ captures[capture_index++] = (capture_t){ \ - before_group, \ - (text_index - before_group), \ + .index=before_group, \ + .length=(text_index - before_group), \ + .occupied=true, \ + .recursive=false, \ }; \ }; continue; 0; }) if (prop_name) { @@ -1303,8 +1310,10 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter // Save this as a capture, including only the interior text: if (captures && capture_index < MAX_BACKREFS) { captures[capture_index++] = (capture_t){ - before_group, - (text_index - before_group) + match_len, + .index=before_group, + .length=(text_index - before_group) + match_len, + .occupied=true, + .recursive=false, }; } return (text_index - start_index) + match_len; @@ -1340,8 +1349,10 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter // Save this as a capture, including only the interior text: if (captures && capture_index < MAX_BACKREFS) { captures[capture_index++] = (capture_t){ - before_group, - (text_index - before_group) + match_len, + .index=before_group, + .length=(text_index - before_group) + match_len, + .occupied=true, + .recursive=false, }; } @@ -1525,7 +1536,7 @@ public array_t Text$find_all(Text_t text, Pattern_t pattern) return matches; } -static Text_t apply_backrefs(Text_t text, Text_t replacement, Pattern_t backref_pat, capture_t *captures) +static Text_t apply_backrefs(Text_t text, Pattern_t original_pattern, Text_t replacement, Pattern_t backref_pat, capture_t *captures) { if (backref_pat.length == 0) return replacement; @@ -1563,7 +1574,14 @@ static Text_t apply_backrefs(Text_t text, Text_t replacement, Pattern_t backref_ if (_next_grapheme(replacement, &state, pos + backref_len) == ';') backref_len += 1; // skip optional semicolon + if (!captures[backref].occupied) + fail("There is no capture number %ld!", backref); + Text_t backref_text = Text$slice(text, I(captures[backref].index+1), I(captures[backref].index + captures[backref].length)); + + if (captures[backref].recursive && original_pattern.length > 0) + backref_text = Text$replace(backref_text, original_pattern, replacement, backref_pat, true); + if (pos > nonmatching_pos) { Text_t before_slice = Text$slice(replacement, I(nonmatching_pos+1), I(pos)); ret = Text$concat(ret, before_slice, backref_text); @@ -1581,7 +1599,7 @@ static Text_t apply_backrefs(Text_t text, Text_t replacement, Pattern_t backref_ return ret; } -public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, Pattern_t backref_pat) +public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, Pattern_t backref_pat, bool recursive) { Text_t ret = {.length=0}; @@ -1602,10 +1620,12 @@ public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, P capture_t captures[MAX_BACKREFS] = {}; int64_t match_len = match(text, pattern, pos, 0, captures, 1); if (match_len < 0) continue; - captures[0].index = pos; - captures[0].length = match_len; + captures[0] = (capture_t){ + .index = pos, .length = match_len, + .occupied = true, .recursive = false, + }; - Text_t replacement_text = apply_backrefs(text, replacement, backref_pat, captures); + Text_t replacement_text = apply_backrefs(text, recursive ? pattern : Text(""), replacement, backref_pat, captures); if (pos > nonmatching_pos) { Text_t before_slice = Text$slice(text, I(nonmatching_pos+1), I(pos)); ret = Text$concat(ret, before_slice, replacement_text); @@ -1622,7 +1642,7 @@ public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, P return ret; } -public Text_t Text$replace_all(Text_t text, table_t replacements, Text_t backref_pat) +public Text_t Text$replace_all(Text_t text, table_t replacements, Text_t backref_pat, bool recursive) { if (replacements.entries.length == 0) return text; @@ -1647,7 +1667,7 @@ public Text_t Text$replace_all(Text_t text, table_t replacements, Text_t backref // Concatenate the replacement: Text_t replacement = *(Text_t*)(replacements.entries.data + i*replacements.entries.stride + sizeof(Text_t)); - Text_t replacement_text = apply_backrefs(text, replacement, backref_pat, captures); + Text_t replacement_text = apply_backrefs(text, recursive ? pattern : Text(""), replacement, backref_pat, captures); ret = concat2(ret, replacement_text); pos += len > 0 ? len : 1; nonmatch_pos = pos; diff --git a/builtins/text.h b/builtins/text.h index 96a7b823..b343798d 100644 --- a/builtins/text.h +++ b/builtins/text.h @@ -31,8 +31,8 @@ Text_t Text$lower(Text_t text); Text_t Text$title(Text_t text); Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info); Text_t Text$quoted(Text_t str, bool colorize); -Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement, Pattern_t backref_pat); -Text_t Text$replace_all(Text_t text, table_t replacements, Pattern_t backref_pat); +Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement, Pattern_t backref_pat, bool recursive); +Text_t Text$replace_all(Text_t text, table_t replacements, Pattern_t backref_pat, bool recursive); array_t Text$split(Text_t text, Pattern_t pattern); Int_t Text$find(Text_t text, Pattern_t pattern, Int_t i, int64_t *match_length); array_t Text$find_all(Text_t text, Pattern_t pattern); diff --git a/docs/text.md b/docs/text.md index 8131d255..adf12dd1 100644 --- a/docs/text.md +++ b/docs/text.md @@ -840,7 +840,7 @@ See [Patterns](#patterns) for more information about patterns. **Usage:** ```tomo -replace(text: Text, pattern: Text, replacement: Text, backref: Pattern = $/\/) -> Text +replace(text: Text, pattern: Text, replacement: Text, backref: Pattern = $/\/, recursive: Bool = yes) -> Text ``` **Parameters:** @@ -852,6 +852,9 @@ replace(text: Text, pattern: Text, replacement: Text, backref: Pattern = $/\/) - pattern followed by a number replaced with the corresponding backreference. By default, the backreference pattern is a single backslash, so backreferences look like `\0`, `\1`, etc. +- `recursive`: For backreferences of a nested capture, if recursive is set to + `yes`, then the whole replacement will be reapplied recursively to the + backreferenced text if it's used in the replacement. **Backreferences** If a backreference pattern is in the replacement, then that backreference is @@ -879,11 +882,18 @@ The text with occurrences of the pattern replaced. >> "Hello world":replace($/{id}/, "\0") = "(Hello) (world)" +>> "Hello world":replace($/{id}/, "(@0)", backref=$/@/) += "(Hello) (world)" + >> "Hello world":replace($/{id} {id}/, "just \2") = "just world" ->> " foo(x, fn(), y) ":replace($/foo(?)/, "baz(\1)") -= " baz(x, fn(), y) " +# Recursive is the default behavior: +>> " BAD(x, BAD(y), z) ":replace($/BAD(?)/, "good(\1)", recursive=yes) += " good(x, good(y), z) " + +>> " BAD(x, BAD(y), z) ":replace($/BAD(?)/, "good(\1)", recursive=no) += " good(x, BAD(y), z) " ``` --- @@ -911,6 +921,9 @@ replace_all(replacements:{Pattern:Text}, backref: Pattern = $/\/) -> Text pattern followed by a number replaced with the corresponding backreference. By default, the backreference pattern is a single backslash, so backreferences look like `\0`, `\1`, etc. +- `recursive`: For backreferences of a nested capture, if recursive is set to + `yes`, then the matching replacement will be reapplied recursively to the + backreferenced text if it's used in the replacement. **Returns:** The text with all occurrences of the patterns replaced with their corresponding diff --git a/environment.c b/environment.c index 709f6ac2..d105d2f4 100644 --- a/environment.c +++ b/environment.c @@ -248,8 +248,8 @@ env_t *new_compilation_unit(CORD *libname) {"lines", "Text$lines", "func(text:Text)->[Text]"}, {"lower", "Text$lower", "func(text:Text)->Text"}, {"quoted", "Text$quoted", "func(text:Text, color=no)->Text"}, - {"replace", "Text$replace", "func(text:Text, pattern:Pattern, replacement:Text, placeholder=$/\\/)->Text"}, - {"replace_all", "Text$replace_all", "func(text:Text, replacements:{Pattern:Text}, placeholder=$/\\/)->Text"}, + {"replace", "Text$replace", "func(text:Text, pattern:Pattern, replacement:Text, backref=$/\\/, recursive=yes)->Text"}, + {"replace_all", "Text$replace_all", "func(text:Text, replacements:{Pattern:Text}, backref=$/\\/, recursive=yes)->Text"}, {"split", "Text$split", "func(text:Text, pattern=$Pattern'')->[Text]"}, {"slice", "Text$slice", "func(text:Text, from=1, to=-1)->Text"}, {"title", "Text$title", "func(text:Text)->Text"}, diff --git a/test/text.tm b/test/text.tm index 950805a9..e3848a08 100644 --- a/test/text.tm +++ b/test/text.tm @@ -236,3 +236,9 @@ func main(): >> "<tag>":replace_all({$/</:"<", $/>/:">"}) = "<tag>" + >> " BAD(x, fn(y), BAD(z), w) ":replace($/BAD(?)/, "good(\1)", recursive=yes) + = " good(x, fn(y), good(z), w) " + + >> " BAD(x, fn(y), BAD(z), w) ":replace($/BAD(?)/, "good(\1)", recursive=no) + = " good(x, fn(y), BAD(z), w) " + |
