Add recursive mode to text replacement and update docs
This commit is contained in:
parent
02dbcbf8b5
commit
b8bb4ada8b
@ -1051,6 +1051,7 @@ int64_t match_uri(Text_t text, int64_t text_index)
|
||||
|
||||
typedef struct {
|
||||
int64_t index, length;
|
||||
bool occupied, recursive;
|
||||
} capture_t;
|
||||
|
||||
int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t pattern_index, capture_t *captures, int64_t capture_index)
|
||||
@ -1077,8 +1078,10 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
|
||||
// Save this as a capture, including only the interior text:
|
||||
if (captures && capture_index < MAX_BACKREFS) {
|
||||
captures[capture_index++] = (capture_t){
|
||||
start_of_quoted_text,
|
||||
text_index - start_of_quoted_text,
|
||||
.index=start_of_quoted_text,
|
||||
.length=text_index - start_of_quoted_text,
|
||||
.occupied=true,
|
||||
.recursive=false,
|
||||
};
|
||||
}
|
||||
|
||||
@ -1116,8 +1119,10 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
|
||||
// Save this as a capture, including only the interior text:
|
||||
if (captures && capture_index < MAX_BACKREFS) {
|
||||
captures[capture_index++] = (capture_t){
|
||||
start_of_interior,
|
||||
text_index - start_of_interior - 1,
|
||||
.index=start_of_interior,
|
||||
.length=text_index - start_of_interior - 1,
|
||||
.occupied=true,
|
||||
.recursive=true,
|
||||
};
|
||||
}
|
||||
|
||||
@ -1171,8 +1176,10 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
|
||||
#define SUCCESS() ({ \
|
||||
if (captures && capture_index < MAX_BACKREFS) { \
|
||||
captures[capture_index++] = (capture_t){ \
|
||||
before_group, \
|
||||
(text_index - before_group), \
|
||||
.index=before_group, \
|
||||
.length=(text_index - before_group), \
|
||||
.occupied=true, \
|
||||
.recursive=false, \
|
||||
}; \
|
||||
}; continue; 0; })
|
||||
if (prop_name) {
|
||||
@ -1303,8 +1310,10 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
|
||||
// Save this as a capture, including only the interior text:
|
||||
if (captures && capture_index < MAX_BACKREFS) {
|
||||
captures[capture_index++] = (capture_t){
|
||||
before_group,
|
||||
(text_index - before_group) + match_len,
|
||||
.index=before_group,
|
||||
.length=(text_index - before_group) + match_len,
|
||||
.occupied=true,
|
||||
.recursive=false,
|
||||
};
|
||||
}
|
||||
return (text_index - start_index) + match_len;
|
||||
@ -1340,8 +1349,10 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
|
||||
// Save this as a capture, including only the interior text:
|
||||
if (captures && capture_index < MAX_BACKREFS) {
|
||||
captures[capture_index++] = (capture_t){
|
||||
before_group,
|
||||
(text_index - before_group) + match_len,
|
||||
.index=before_group,
|
||||
.length=(text_index - before_group) + match_len,
|
||||
.occupied=true,
|
||||
.recursive=false,
|
||||
};
|
||||
}
|
||||
|
||||
@ -1525,7 +1536,7 @@ public array_t Text$find_all(Text_t text, Pattern_t pattern)
|
||||
return matches;
|
||||
}
|
||||
|
||||
static Text_t apply_backrefs(Text_t text, Text_t replacement, Pattern_t backref_pat, capture_t *captures)
|
||||
static Text_t apply_backrefs(Text_t text, Pattern_t original_pattern, Text_t replacement, Pattern_t backref_pat, capture_t *captures)
|
||||
{
|
||||
if (backref_pat.length == 0)
|
||||
return replacement;
|
||||
@ -1563,7 +1574,14 @@ static Text_t apply_backrefs(Text_t text, Text_t replacement, Pattern_t backref_
|
||||
if (_next_grapheme(replacement, &state, pos + backref_len) == ';')
|
||||
backref_len += 1; // skip optional semicolon
|
||||
|
||||
if (!captures[backref].occupied)
|
||||
fail("There is no capture number %ld!", backref);
|
||||
|
||||
Text_t backref_text = Text$slice(text, I(captures[backref].index+1), I(captures[backref].index + captures[backref].length));
|
||||
|
||||
if (captures[backref].recursive && original_pattern.length > 0)
|
||||
backref_text = Text$replace(backref_text, original_pattern, replacement, backref_pat, true);
|
||||
|
||||
if (pos > nonmatching_pos) {
|
||||
Text_t before_slice = Text$slice(replacement, I(nonmatching_pos+1), I(pos));
|
||||
ret = Text$concat(ret, before_slice, backref_text);
|
||||
@ -1581,7 +1599,7 @@ static Text_t apply_backrefs(Text_t text, Text_t replacement, Pattern_t backref_
|
||||
return ret;
|
||||
}
|
||||
|
||||
public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, Pattern_t backref_pat)
|
||||
public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, Pattern_t backref_pat, bool recursive)
|
||||
{
|
||||
Text_t ret = {.length=0};
|
||||
|
||||
@ -1602,10 +1620,12 @@ public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, P
|
||||
capture_t captures[MAX_BACKREFS] = {};
|
||||
int64_t match_len = match(text, pattern, pos, 0, captures, 1);
|
||||
if (match_len < 0) continue;
|
||||
captures[0].index = pos;
|
||||
captures[0].length = match_len;
|
||||
captures[0] = (capture_t){
|
||||
.index = pos, .length = match_len,
|
||||
.occupied = true, .recursive = false,
|
||||
};
|
||||
|
||||
Text_t replacement_text = apply_backrefs(text, replacement, backref_pat, captures);
|
||||
Text_t replacement_text = apply_backrefs(text, recursive ? pattern : Text(""), replacement, backref_pat, captures);
|
||||
if (pos > nonmatching_pos) {
|
||||
Text_t before_slice = Text$slice(text, I(nonmatching_pos+1), I(pos));
|
||||
ret = Text$concat(ret, before_slice, replacement_text);
|
||||
@ -1622,7 +1642,7 @@ public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, P
|
||||
return ret;
|
||||
}
|
||||
|
||||
public Text_t Text$replace_all(Text_t text, table_t replacements, Text_t backref_pat)
|
||||
public Text_t Text$replace_all(Text_t text, table_t replacements, Text_t backref_pat, bool recursive)
|
||||
{
|
||||
if (replacements.entries.length == 0) return text;
|
||||
|
||||
@ -1647,7 +1667,7 @@ public Text_t Text$replace_all(Text_t text, table_t replacements, Text_t backref
|
||||
|
||||
// Concatenate the replacement:
|
||||
Text_t replacement = *(Text_t*)(replacements.entries.data + i*replacements.entries.stride + sizeof(Text_t));
|
||||
Text_t replacement_text = apply_backrefs(text, replacement, backref_pat, captures);
|
||||
Text_t replacement_text = apply_backrefs(text, recursive ? pattern : Text(""), replacement, backref_pat, captures);
|
||||
ret = concat2(ret, replacement_text);
|
||||
pos += len > 0 ? len : 1;
|
||||
nonmatch_pos = pos;
|
||||
|
@ -31,8 +31,8 @@ Text_t Text$lower(Text_t text);
|
||||
Text_t Text$title(Text_t text);
|
||||
Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info);
|
||||
Text_t Text$quoted(Text_t str, bool colorize);
|
||||
Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement, Pattern_t backref_pat);
|
||||
Text_t Text$replace_all(Text_t text, table_t replacements, Pattern_t backref_pat);
|
||||
Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement, Pattern_t backref_pat, bool recursive);
|
||||
Text_t Text$replace_all(Text_t text, table_t replacements, Pattern_t backref_pat, bool recursive);
|
||||
array_t Text$split(Text_t text, Pattern_t pattern);
|
||||
Int_t Text$find(Text_t text, Pattern_t pattern, Int_t i, int64_t *match_length);
|
||||
array_t Text$find_all(Text_t text, Pattern_t pattern);
|
||||
|
19
docs/text.md
19
docs/text.md
@ -840,7 +840,7 @@ See [Patterns](#patterns) for more information about patterns.
|
||||
|
||||
**Usage:**
|
||||
```tomo
|
||||
replace(text: Text, pattern: Text, replacement: Text, backref: Pattern = $/\/) -> Text
|
||||
replace(text: Text, pattern: Text, replacement: Text, backref: Pattern = $/\/, recursive: Bool = yes) -> Text
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
@ -852,6 +852,9 @@ replace(text: Text, pattern: Text, replacement: Text, backref: Pattern = $/\/) -
|
||||
pattern followed by a number replaced with the corresponding backreference.
|
||||
By default, the backreference pattern is a single backslash, so
|
||||
backreferences look like `\0`, `\1`, etc.
|
||||
- `recursive`: For backreferences of a nested capture, if recursive is set to
|
||||
`yes`, then the whole replacement will be reapplied recursively to the
|
||||
backreferenced text if it's used in the replacement.
|
||||
|
||||
**Backreferences**
|
||||
If a backreference pattern is in the replacement, then that backreference is
|
||||
@ -879,11 +882,18 @@ The text with occurrences of the pattern replaced.
|
||||
>> "Hello world":replace($/{id}/, "\0")
|
||||
= "(Hello) (world)"
|
||||
|
||||
>> "Hello world":replace($/{id}/, "(@0)", backref=$/@/)
|
||||
= "(Hello) (world)"
|
||||
|
||||
>> "Hello world":replace($/{id} {id}/, "just \2")
|
||||
= "just world"
|
||||
|
||||
>> " foo(x, fn(), y) ":replace($/foo(?)/, "baz(\1)")
|
||||
= " baz(x, fn(), y) "
|
||||
# Recursive is the default behavior:
|
||||
>> " BAD(x, BAD(y), z) ":replace($/BAD(?)/, "good(\1)", recursive=yes)
|
||||
= " good(x, good(y), z) "
|
||||
|
||||
>> " BAD(x, BAD(y), z) ":replace($/BAD(?)/, "good(\1)", recursive=no)
|
||||
= " good(x, BAD(y), z) "
|
||||
```
|
||||
|
||||
---
|
||||
@ -911,6 +921,9 @@ replace_all(replacements:{Pattern:Text}, backref: Pattern = $/\/) -> Text
|
||||
pattern followed by a number replaced with the corresponding backreference.
|
||||
By default, the backreference pattern is a single backslash, so
|
||||
backreferences look like `\0`, `\1`, etc.
|
||||
- `recursive`: For backreferences of a nested capture, if recursive is set to
|
||||
`yes`, then the matching replacement will be reapplied recursively to the
|
||||
backreferenced text if it's used in the replacement.
|
||||
|
||||
**Returns:**
|
||||
The text with all occurrences of the patterns replaced with their corresponding
|
||||
|
@ -248,8 +248,8 @@ env_t *new_compilation_unit(CORD *libname)
|
||||
{"lines", "Text$lines", "func(text:Text)->[Text]"},
|
||||
{"lower", "Text$lower", "func(text:Text)->Text"},
|
||||
{"quoted", "Text$quoted", "func(text:Text, color=no)->Text"},
|
||||
{"replace", "Text$replace", "func(text:Text, pattern:Pattern, replacement:Text, placeholder=$/\\/)->Text"},
|
||||
{"replace_all", "Text$replace_all", "func(text:Text, replacements:{Pattern:Text}, placeholder=$/\\/)->Text"},
|
||||
{"replace", "Text$replace", "func(text:Text, pattern:Pattern, replacement:Text, backref=$/\\/, recursive=yes)->Text"},
|
||||
{"replace_all", "Text$replace_all", "func(text:Text, replacements:{Pattern:Text}, backref=$/\\/, recursive=yes)->Text"},
|
||||
{"split", "Text$split", "func(text:Text, pattern=$Pattern'')->[Text]"},
|
||||
{"slice", "Text$slice", "func(text:Text, from=1, to=-1)->Text"},
|
||||
{"title", "Text$title", "func(text:Text)->Text"},
|
||||
|
@ -236,3 +236,9 @@ func main():
|
||||
>> "<tag>":replace_all({$/</:"<", $/>/:">"})
|
||||
= "<tag>"
|
||||
|
||||
>> " BAD(x, fn(y), BAD(z), w) ":replace($/BAD(?)/, "good(\1)", recursive=yes)
|
||||
= " good(x, fn(y), good(z), w) "
|
||||
|
||||
>> " BAD(x, fn(y), BAD(z), w) ":replace($/BAD(?)/, "good(\1)", recursive=no)
|
||||
= " good(x, fn(y), BAD(z), w) "
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user