aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2024-09-03 23:16:45 -0400
committerBruce Hill <bruce@bruce-hill.com>2024-09-03 23:16:45 -0400
commitb8bb4ada8b28e761f09e40618550684fae80249f (patch)
treebc79a181433206b55f367d6b1cb9558334aa5477
parent02dbcbf8b5f3b4aecec78a59eea7528e5f1f3661 (diff)
Add recursive mode to text replacement and update docs
-rw-r--r--builtins/text.c54
-rw-r--r--builtins/text.h4
-rw-r--r--docs/text.md19
-rw-r--r--environment.c4
-rw-r--r--test/text.tm6
5 files changed, 63 insertions, 24 deletions
diff --git a/builtins/text.c b/builtins/text.c
index bc12debf..11c5610e 100644
--- a/builtins/text.c
+++ b/builtins/text.c
@@ -1051,6 +1051,7 @@ int64_t match_uri(Text_t text, int64_t text_index)
typedef struct {
int64_t index, length;
+ bool occupied, recursive;
} capture_t;
int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t pattern_index, capture_t *captures, int64_t capture_index)
@@ -1077,8 +1078,10 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
// Save this as a capture, including only the interior text:
if (captures && capture_index < MAX_BACKREFS) {
captures[capture_index++] = (capture_t){
- start_of_quoted_text,
- text_index - start_of_quoted_text,
+ .index=start_of_quoted_text,
+ .length=text_index - start_of_quoted_text,
+ .occupied=true,
+ .recursive=false,
};
}
@@ -1116,8 +1119,10 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
// Save this as a capture, including only the interior text:
if (captures && capture_index < MAX_BACKREFS) {
captures[capture_index++] = (capture_t){
- start_of_interior,
- text_index - start_of_interior - 1,
+ .index=start_of_interior,
+ .length=text_index - start_of_interior - 1,
+ .occupied=true,
+ .recursive=true,
};
}
@@ -1171,8 +1176,10 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
#define SUCCESS() ({ \
if (captures && capture_index < MAX_BACKREFS) { \
captures[capture_index++] = (capture_t){ \
- before_group, \
- (text_index - before_group), \
+ .index=before_group, \
+ .length=(text_index - before_group), \
+ .occupied=true, \
+ .recursive=false, \
}; \
}; continue; 0; })
if (prop_name) {
@@ -1303,8 +1310,10 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
// Save this as a capture, including only the interior text:
if (captures && capture_index < MAX_BACKREFS) {
captures[capture_index++] = (capture_t){
- before_group,
- (text_index - before_group) + match_len,
+ .index=before_group,
+ .length=(text_index - before_group) + match_len,
+ .occupied=true,
+ .recursive=false,
};
}
return (text_index - start_index) + match_len;
@@ -1340,8 +1349,10 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
// Save this as a capture, including only the interior text:
if (captures && capture_index < MAX_BACKREFS) {
captures[capture_index++] = (capture_t){
- before_group,
- (text_index - before_group) + match_len,
+ .index=before_group,
+ .length=(text_index - before_group) + match_len,
+ .occupied=true,
+ .recursive=false,
};
}
@@ -1525,7 +1536,7 @@ public array_t Text$find_all(Text_t text, Pattern_t pattern)
return matches;
}
-static Text_t apply_backrefs(Text_t text, Text_t replacement, Pattern_t backref_pat, capture_t *captures)
+static Text_t apply_backrefs(Text_t text, Pattern_t original_pattern, Text_t replacement, Pattern_t backref_pat, capture_t *captures)
{
if (backref_pat.length == 0)
return replacement;
@@ -1563,7 +1574,14 @@ static Text_t apply_backrefs(Text_t text, Text_t replacement, Pattern_t backref_
if (_next_grapheme(replacement, &state, pos + backref_len) == ';')
backref_len += 1; // skip optional semicolon
+ if (!captures[backref].occupied)
+ fail("There is no capture number %ld!", backref);
+
Text_t backref_text = Text$slice(text, I(captures[backref].index+1), I(captures[backref].index + captures[backref].length));
+
+ if (captures[backref].recursive && original_pattern.length > 0)
+ backref_text = Text$replace(backref_text, original_pattern, replacement, backref_pat, true);
+
if (pos > nonmatching_pos) {
Text_t before_slice = Text$slice(replacement, I(nonmatching_pos+1), I(pos));
ret = Text$concat(ret, before_slice, backref_text);
@@ -1581,7 +1599,7 @@ static Text_t apply_backrefs(Text_t text, Text_t replacement, Pattern_t backref_
return ret;
}
-public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, Pattern_t backref_pat)
+public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, Pattern_t backref_pat, bool recursive)
{
Text_t ret = {.length=0};
@@ -1602,10 +1620,12 @@ public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, P
capture_t captures[MAX_BACKREFS] = {};
int64_t match_len = match(text, pattern, pos, 0, captures, 1);
if (match_len < 0) continue;
- captures[0].index = pos;
- captures[0].length = match_len;
+ captures[0] = (capture_t){
+ .index = pos, .length = match_len,
+ .occupied = true, .recursive = false,
+ };
- Text_t replacement_text = apply_backrefs(text, replacement, backref_pat, captures);
+ Text_t replacement_text = apply_backrefs(text, recursive ? pattern : Text(""), replacement, backref_pat, captures);
if (pos > nonmatching_pos) {
Text_t before_slice = Text$slice(text, I(nonmatching_pos+1), I(pos));
ret = Text$concat(ret, before_slice, replacement_text);
@@ -1622,7 +1642,7 @@ public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, P
return ret;
}
-public Text_t Text$replace_all(Text_t text, table_t replacements, Text_t backref_pat)
+public Text_t Text$replace_all(Text_t text, table_t replacements, Text_t backref_pat, bool recursive)
{
if (replacements.entries.length == 0) return text;
@@ -1647,7 +1667,7 @@ public Text_t Text$replace_all(Text_t text, table_t replacements, Text_t backref
// Concatenate the replacement:
Text_t replacement = *(Text_t*)(replacements.entries.data + i*replacements.entries.stride + sizeof(Text_t));
- Text_t replacement_text = apply_backrefs(text, replacement, backref_pat, captures);
+ Text_t replacement_text = apply_backrefs(text, recursive ? pattern : Text(""), replacement, backref_pat, captures);
ret = concat2(ret, replacement_text);
pos += len > 0 ? len : 1;
nonmatch_pos = pos;
diff --git a/builtins/text.h b/builtins/text.h
index 96a7b823..b343798d 100644
--- a/builtins/text.h
+++ b/builtins/text.h
@@ -31,8 +31,8 @@ Text_t Text$lower(Text_t text);
Text_t Text$title(Text_t text);
Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info);
Text_t Text$quoted(Text_t str, bool colorize);
-Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement, Pattern_t backref_pat);
-Text_t Text$replace_all(Text_t text, table_t replacements, Pattern_t backref_pat);
+Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement, Pattern_t backref_pat, bool recursive);
+Text_t Text$replace_all(Text_t text, table_t replacements, Pattern_t backref_pat, bool recursive);
array_t Text$split(Text_t text, Pattern_t pattern);
Int_t Text$find(Text_t text, Pattern_t pattern, Int_t i, int64_t *match_length);
array_t Text$find_all(Text_t text, Pattern_t pattern);
diff --git a/docs/text.md b/docs/text.md
index 8131d255..adf12dd1 100644
--- a/docs/text.md
+++ b/docs/text.md
@@ -840,7 +840,7 @@ See [Patterns](#patterns) for more information about patterns.
**Usage:**
```tomo
-replace(text: Text, pattern: Text, replacement: Text, backref: Pattern = $/\/) -> Text
+replace(text: Text, pattern: Text, replacement: Text, backref: Pattern = $/\/, recursive: Bool = yes) -> Text
```
**Parameters:**
@@ -852,6 +852,9 @@ replace(text: Text, pattern: Text, replacement: Text, backref: Pattern = $/\/) -
pattern followed by a number replaced with the corresponding backreference.
By default, the backreference pattern is a single backslash, so
backreferences look like `\0`, `\1`, etc.
+- `recursive`: For backreferences of a nested capture, if recursive is set to
+ `yes`, then the whole replacement will be reapplied recursively to the
+ backreferenced text if it's used in the replacement.
**Backreferences**
If a backreference pattern is in the replacement, then that backreference is
@@ -879,11 +882,18 @@ The text with occurrences of the pattern replaced.
>> "Hello world":replace($/{id}/, "\0")
= "(Hello) (world)"
+>> "Hello world":replace($/{id}/, "(@0)", backref=$/@/)
+= "(Hello) (world)"
+
>> "Hello world":replace($/{id} {id}/, "just \2")
= "just world"
->> " foo(x, fn(), y) ":replace($/foo(?)/, "baz(\1)")
-= " baz(x, fn(), y) "
+# Recursive is the default behavior:
+>> " BAD(x, BAD(y), z) ":replace($/BAD(?)/, "good(\1)", recursive=yes)
+= " good(x, good(y), z) "
+
+>> " BAD(x, BAD(y), z) ":replace($/BAD(?)/, "good(\1)", recursive=no)
+= " good(x, BAD(y), z) "
```
---
@@ -911,6 +921,9 @@ replace_all(replacements:{Pattern:Text}, backref: Pattern = $/\/) -> Text
pattern followed by a number replaced with the corresponding backreference.
By default, the backreference pattern is a single backslash, so
backreferences look like `\0`, `\1`, etc.
+- `recursive`: For backreferences of a nested capture, if recursive is set to
+ `yes`, then the matching replacement will be reapplied recursively to the
+ backreferenced text if it's used in the replacement.
**Returns:**
The text with all occurrences of the patterns replaced with their corresponding
diff --git a/environment.c b/environment.c
index 709f6ac2..d105d2f4 100644
--- a/environment.c
+++ b/environment.c
@@ -248,8 +248,8 @@ env_t *new_compilation_unit(CORD *libname)
{"lines", "Text$lines", "func(text:Text)->[Text]"},
{"lower", "Text$lower", "func(text:Text)->Text"},
{"quoted", "Text$quoted", "func(text:Text, color=no)->Text"},
- {"replace", "Text$replace", "func(text:Text, pattern:Pattern, replacement:Text, placeholder=$/\\/)->Text"},
- {"replace_all", "Text$replace_all", "func(text:Text, replacements:{Pattern:Text}, placeholder=$/\\/)->Text"},
+ {"replace", "Text$replace", "func(text:Text, pattern:Pattern, replacement:Text, backref=$/\\/, recursive=yes)->Text"},
+ {"replace_all", "Text$replace_all", "func(text:Text, replacements:{Pattern:Text}, backref=$/\\/, recursive=yes)->Text"},
{"split", "Text$split", "func(text:Text, pattern=$Pattern'')->[Text]"},
{"slice", "Text$slice", "func(text:Text, from=1, to=-1)->Text"},
{"title", "Text$title", "func(text:Text)->Text"},
diff --git a/test/text.tm b/test/text.tm
index 950805a9..e3848a08 100644
--- a/test/text.tm
+++ b/test/text.tm
@@ -236,3 +236,9 @@ func main():
>> "<tag>":replace_all({$/</:"&lt;", $/>/:"&gt;"})
= "&lt;tag&gt;"
+ >> " BAD(x, fn(y), BAD(z), w) ":replace($/BAD(?)/, "good(\1)", recursive=yes)
+ = " good(x, fn(y), good(z), w) "
+
+ >> " BAD(x, fn(y), BAD(z), w) ":replace($/BAD(?)/, "good(\1)", recursive=no)
+ = " good(x, fn(y), BAD(z), w) "
+