aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2024-09-04 21:02:37 -0400
committerBruce Hill <bruce@bruce-hill.com>2024-09-04 21:02:37 -0400
commit00543b4e876cf069d5be22c1f06427a4c8d5bed9 (patch)
treec77a2684fff4ce800939f833ff5d727e73607b26
parent93140c2896004f6ab5536e64b6970e48bbc92336 (diff)
Add Text.map(pat, fn)
-rw-r--r--builtins/text.c40
-rw-r--r--builtins/text.h1
-rw-r--r--docs/text.md32
-rw-r--r--environment.c1
-rw-r--r--test/text.tm3
5 files changed, 77 insertions, 0 deletions
diff --git a/builtins/text.c b/builtins/text.c
index 59e12c64..97b50a40 100644
--- a/builtins/text.c
+++ b/builtins/text.c
@@ -1673,6 +1673,46 @@ public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, P
return ret;
}
+public Text_t Text$map(Text_t text, Pattern_t pattern, closure_t fn)
+{
+ Text_t ret = {.length=0};
+
+ int32_t first_grapheme = get_grapheme(pattern, 0);
+ bool find_first = (first_grapheme != '{'
+ && !uc_is_property(first_grapheme, UC_PROPERTY_QUOTATION_MARK)
+ && !uc_is_property(first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
+
+ iteration_state_t text_state = {0, 0};
+ int64_t nonmatching_pos = 0;
+
+ Text_t (*text_mapper)(Text_t, void*) = fn.fn;
+ for (int64_t pos = 0; pos < text.length; pos++) {
+ // Optimization: quickly skip ahead to first char in pattern:
+ if (find_first) {
+ while (pos < text.length && _next_grapheme(text, &text_state, pos) != first_grapheme)
+ ++pos;
+ }
+
+ int64_t match_len = match(text, pattern, pos, 0, NULL, 0);
+ if (match_len < 0) continue;
+
+ Text_t replacement = text_mapper(Text$slice(text, I(pos+1), I(pos+match_len)), fn.userdata);
+ if (pos > nonmatching_pos) {
+ Text_t before_slice = Text$slice(text, I(nonmatching_pos+1), I(pos));
+ ret = Text$concat(ret, before_slice, replacement);
+ } else {
+ ret = concat2(ret, replacement);
+ }
+ nonmatching_pos = pos + match_len;
+ pos += (match_len - 1);
+ }
+ if (nonmatching_pos < text.length) {
+ Text_t last_slice = Text$slice(text, I(nonmatching_pos+1), I(text.length));
+ ret = concat2(ret, last_slice);
+ }
+ return ret;
+}
+
public Text_t Text$replace_all(Text_t text, table_t replacements, Text_t backref_pat, bool recursive)
{
if (replacements.entries.length == 0) return text;
diff --git a/builtins/text.h b/builtins/text.h
index a25a61d5..c93c6266 100644
--- a/builtins/text.h
+++ b/builtins/text.h
@@ -51,6 +51,7 @@ Text_t Text$from_codepoint_names(array_t codepoint_names);
Text_t Text$from_bytes(array_t bytes);
array_t Text$lines(Text_t text);
Text_t Text$join(Text_t glue, array_t pieces);
+Text_t Text$map(Text_t text, Pattern_t pattern, closure_t fn);
extern const TypeInfo $Text;
diff --git a/docs/text.md b/docs/text.md
index 0b02cdfa..7039043f 100644
--- a/docs/text.md
+++ b/docs/text.md
@@ -275,6 +275,7 @@ Text.has(pattern:Pattern)->Bool
Text.find(pattern:Pattern, start=1, length=!&Int64?)->Int
Text.find_all(pattern:Pattern)->[Text]
Text.matches(pattern:Pattern)->Bool
+Text.map(pattern:Pattern, fn:func(t:Text)->Text)->Text
Text.replace(pattern:Pattern, replacement:Text, placeholder:Pattern=$//)->[Text]
Text.replace_all(replacements:{Pattern:Text}, placeholder:Pattern=$//)->[Text]
Text.split(pattern:Pattern)->[Text]
@@ -837,6 +838,37 @@ matches(text: Text, pattern: Text) -> Bool
---
+## `map`
+
+**Description:**
+For each occurrence of the given pattern, replace the text with the result of
+calling the given function on that text.
+
+**Usage:**
+```tomo
+map(text: Text, pattern: Text, fn: func(text:Text)->Text) -> Text
+```
+
+**Parameters:**
+
+- `text`: The text to be searched.
+- `pattern`: The pattern to search for.
+- `fn`: The function to apply to each match.
+
+**Returns:**
+The text with the matching parts replaced with the result of applying the given
+function to each.
+
+**Example:**
+```tomo
+>> "hello world":map($/world/, Text.upper)
+= "hello WORLD"
+>> "Some nums: 1 2 3 4":map($/{int}/, func(i:Text): "$(Int.from_text(i) + 10)")
+= "Some nums: 11 12 13 14"
+```
+
+---
+
## `quoted`
**Description:**
diff --git a/environment.c b/environment.c
index 8f5de0de..2c1a3f70 100644
--- a/environment.c
+++ b/environment.c
@@ -256,6 +256,7 @@ env_t *new_compilation_unit(CORD *libname)
{"join", "Text$join", "func(glue:Text, pieces:[Text])->Text"},
{"lines", "Text$lines", "func(text:Text)->[Text]"},
{"lower", "Text$lower", "func(text:Text)->Text"},
+ {"map", "Text$map", "func(text:Text, pattern:Pattern, fn:func(text:Text)->Text)->Text"},
{"matches", "Text$matches", "func(text:Text, pattern:Pattern)->Bool"},
{"quoted", "Text$quoted", "func(text:Text, color=no)->Text"},
{"replace", "Text$replace", "func(text:Text, pattern:Pattern, replacement:Text, backref=$/\\/, recursive=yes)->Text"},
diff --git a/test/text.tm b/test/text.tm
index af03c3c7..74a53413 100644
--- a/test/text.tm
+++ b/test/text.tm
@@ -250,3 +250,6 @@ func main():
= no
>> "Hello...":matches($/{id}/)
= no
+
+ >> "hello world":map($/world/, Text.upper)
+ = "hello WORLD"