aboutsummaryrefslogtreecommitdiff
path: root/builtins
diff options
context:
space:
mode:
Diffstat (limited to 'builtins')
-rw-r--r--builtins/text.c54
-rw-r--r--builtins/text.h4
2 files changed, 39 insertions, 19 deletions
diff --git a/builtins/text.c b/builtins/text.c
index bc12debf..11c5610e 100644
--- a/builtins/text.c
+++ b/builtins/text.c
@@ -1051,6 +1051,7 @@ int64_t match_uri(Text_t text, int64_t text_index)
typedef struct {
int64_t index, length;
+ bool occupied, recursive;
} capture_t;
int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t pattern_index, capture_t *captures, int64_t capture_index)
@@ -1077,8 +1078,10 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
// Save this as a capture, including only the interior text:
if (captures && capture_index < MAX_BACKREFS) {
captures[capture_index++] = (capture_t){
- start_of_quoted_text,
- text_index - start_of_quoted_text,
+ .index=start_of_quoted_text,
+ .length=text_index - start_of_quoted_text,
+ .occupied=true,
+ .recursive=false,
};
}
@@ -1116,8 +1119,10 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
// Save this as a capture, including only the interior text:
if (captures && capture_index < MAX_BACKREFS) {
captures[capture_index++] = (capture_t){
- start_of_interior,
- text_index - start_of_interior - 1,
+ .index=start_of_interior,
+ .length=text_index - start_of_interior - 1,
+ .occupied=true,
+ .recursive=true,
};
}
@@ -1171,8 +1176,10 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
#define SUCCESS() ({ \
if (captures && capture_index < MAX_BACKREFS) { \
captures[capture_index++] = (capture_t){ \
- before_group, \
- (text_index - before_group), \
+ .index=before_group, \
+ .length=(text_index - before_group), \
+ .occupied=true, \
+ .recursive=false, \
}; \
}; continue; 0; })
if (prop_name) {
@@ -1303,8 +1310,10 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
// Save this as a capture, including only the interior text:
if (captures && capture_index < MAX_BACKREFS) {
captures[capture_index++] = (capture_t){
- before_group,
- (text_index - before_group) + match_len,
+ .index=before_group,
+ .length=(text_index - before_group) + match_len,
+ .occupied=true,
+ .recursive=false,
};
}
return (text_index - start_index) + match_len;
@@ -1340,8 +1349,10 @@ int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t patter
// Save this as a capture, including only the interior text:
if (captures && capture_index < MAX_BACKREFS) {
captures[capture_index++] = (capture_t){
- before_group,
- (text_index - before_group) + match_len,
+ .index=before_group,
+ .length=(text_index - before_group) + match_len,
+ .occupied=true,
+ .recursive=false,
};
}
@@ -1525,7 +1536,7 @@ public array_t Text$find_all(Text_t text, Pattern_t pattern)
return matches;
}
-static Text_t apply_backrefs(Text_t text, Text_t replacement, Pattern_t backref_pat, capture_t *captures)
+static Text_t apply_backrefs(Text_t text, Pattern_t original_pattern, Text_t replacement, Pattern_t backref_pat, capture_t *captures)
{
if (backref_pat.length == 0)
return replacement;
@@ -1563,7 +1574,14 @@ static Text_t apply_backrefs(Text_t text, Text_t replacement, Pattern_t backref_
if (_next_grapheme(replacement, &state, pos + backref_len) == ';')
backref_len += 1; // skip optional semicolon
+ if (!captures[backref].occupied)
+ fail("There is no capture number %ld!", backref);
+
Text_t backref_text = Text$slice(text, I(captures[backref].index+1), I(captures[backref].index + captures[backref].length));
+
+ if (captures[backref].recursive && original_pattern.length > 0)
+ backref_text = Text$replace(backref_text, original_pattern, replacement, backref_pat, true);
+
if (pos > nonmatching_pos) {
Text_t before_slice = Text$slice(replacement, I(nonmatching_pos+1), I(pos));
ret = Text$concat(ret, before_slice, backref_text);
@@ -1581,7 +1599,7 @@ static Text_t apply_backrefs(Text_t text, Text_t replacement, Pattern_t backref_
return ret;
}
-public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, Pattern_t backref_pat)
+public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, Pattern_t backref_pat, bool recursive)
{
Text_t ret = {.length=0};
@@ -1602,10 +1620,12 @@ public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, P
capture_t captures[MAX_BACKREFS] = {};
int64_t match_len = match(text, pattern, pos, 0, captures, 1);
if (match_len < 0) continue;
- captures[0].index = pos;
- captures[0].length = match_len;
+ captures[0] = (capture_t){
+ .index = pos, .length = match_len,
+ .occupied = true, .recursive = false,
+ };
- Text_t replacement_text = apply_backrefs(text, replacement, backref_pat, captures);
+ Text_t replacement_text = apply_backrefs(text, recursive ? pattern : Text(""), replacement, backref_pat, captures);
if (pos > nonmatching_pos) {
Text_t before_slice = Text$slice(text, I(nonmatching_pos+1), I(pos));
ret = Text$concat(ret, before_slice, replacement_text);
@@ -1622,7 +1642,7 @@ public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, P
return ret;
}
-public Text_t Text$replace_all(Text_t text, table_t replacements, Text_t backref_pat)
+public Text_t Text$replace_all(Text_t text, table_t replacements, Text_t backref_pat, bool recursive)
{
if (replacements.entries.length == 0) return text;
@@ -1647,7 +1667,7 @@ public Text_t Text$replace_all(Text_t text, table_t replacements, Text_t backref
// Concatenate the replacement:
Text_t replacement = *(Text_t*)(replacements.entries.data + i*replacements.entries.stride + sizeof(Text_t));
- Text_t replacement_text = apply_backrefs(text, replacement, backref_pat, captures);
+ Text_t replacement_text = apply_backrefs(text, recursive ? pattern : Text(""), replacement, backref_pat, captures);
ret = concat2(ret, replacement_text);
pos += len > 0 ? len : 1;
nonmatch_pos = pos;
diff --git a/builtins/text.h b/builtins/text.h
index 96a7b823..b343798d 100644
--- a/builtins/text.h
+++ b/builtins/text.h
@@ -31,8 +31,8 @@ Text_t Text$lower(Text_t text);
Text_t Text$title(Text_t text);
Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info);
Text_t Text$quoted(Text_t str, bool colorize);
-Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement, Pattern_t backref_pat);
-Text_t Text$replace_all(Text_t text, table_t replacements, Pattern_t backref_pat);
+Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement, Pattern_t backref_pat, bool recursive);
+Text_t Text$replace_all(Text_t text, table_t replacements, Pattern_t backref_pat, bool recursive);
array_t Text$split(Text_t text, Pattern_t pattern);
Int_t Text$find(Text_t text, Pattern_t pattern, Int_t i, int64_t *match_length);
array_t Text$find_all(Text_t text, Pattern_t pattern);