From 00543b4e876cf069d5be22c1f06427a4c8d5bed9 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Wed, 4 Sep 2024 21:02:37 -0400 Subject: [PATCH] Add Text.map(pat, fn) --- builtins/text.c | 40 ++++++++++++++++++++++++++++++++++++++++ builtins/text.h | 1 + docs/text.md | 32 ++++++++++++++++++++++++++++++++ environment.c | 1 + test/text.tm | 3 +++ 5 files changed, 77 insertions(+) diff --git a/builtins/text.c b/builtins/text.c index 59e12c6..97b50a4 100644 --- a/builtins/text.c +++ b/builtins/text.c @@ -1673,6 +1673,46 @@ public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, P return ret; } +public Text_t Text$map(Text_t text, Pattern_t pattern, closure_t fn) +{ + Text_t ret = {.length=0}; + + int32_t first_grapheme = get_grapheme(pattern, 0); + bool find_first = (first_grapheme != '{' + && !uc_is_property(first_grapheme, UC_PROPERTY_QUOTATION_MARK) + && !uc_is_property(first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION)); + + iteration_state_t text_state = {0, 0}; + int64_t nonmatching_pos = 0; + + Text_t (*text_mapper)(Text_t, void*) = fn.fn; + for (int64_t pos = 0; pos < text.length; pos++) { + // Optimization: quickly skip ahead to first char in pattern: + if (find_first) { + while (pos < text.length && _next_grapheme(text, &text_state, pos) != first_grapheme) + ++pos; + } + + int64_t match_len = match(text, pattern, pos, 0, NULL, 0); + if (match_len < 0) continue; + + Text_t replacement = text_mapper(Text$slice(text, I(pos+1), I(pos+match_len)), fn.userdata); + if (pos > nonmatching_pos) { + Text_t before_slice = Text$slice(text, I(nonmatching_pos+1), I(pos)); + ret = Text$concat(ret, before_slice, replacement); + } else { + ret = concat2(ret, replacement); + } + nonmatching_pos = pos + match_len; + pos += (match_len - 1); + } + if (nonmatching_pos < text.length) { + Text_t last_slice = Text$slice(text, I(nonmatching_pos+1), I(text.length)); + ret = concat2(ret, last_slice); + } + return ret; +} + public Text_t Text$replace_all(Text_t text, table_t replacements, Text_t backref_pat, bool recursive) { if (replacements.entries.length == 0) return text; diff --git a/builtins/text.h b/builtins/text.h index a25a61d..c93c626 100644 --- a/builtins/text.h +++ b/builtins/text.h @@ -51,6 +51,7 @@ Text_t Text$from_codepoint_names(array_t codepoint_names); Text_t Text$from_bytes(array_t bytes); array_t Text$lines(Text_t text); Text_t Text$join(Text_t glue, array_t pieces); +Text_t Text$map(Text_t text, Pattern_t pattern, closure_t fn); extern const TypeInfo $Text; diff --git a/docs/text.md b/docs/text.md index 0b02cdf..7039043 100644 --- a/docs/text.md +++ b/docs/text.md @@ -275,6 +275,7 @@ Text.has(pattern:Pattern)->Bool Text.find(pattern:Pattern, start=1, length=!&Int64?)->Int Text.find_all(pattern:Pattern)->[Text] Text.matches(pattern:Pattern)->Bool +Text.map(pattern:Pattern, fn:func(t:Text)->Text)->Text Text.replace(pattern:Pattern, replacement:Text, placeholder:Pattern=$//)->[Text] Text.replace_all(replacements:{Pattern:Text}, placeholder:Pattern=$//)->[Text] Text.split(pattern:Pattern)->[Text] @@ -837,6 +838,37 @@ matches(text: Text, pattern: Text) -> Bool --- +## `map` + +**Description:** +For each occurrence of the given pattern, replace the text with the result of +calling the given function on that text. + +**Usage:** +```tomo +map(text: Text, pattern: Text, fn: func(text:Text)->Text) -> Text +``` + +**Parameters:** + +- `text`: The text to be searched. +- `pattern`: The pattern to search for. +- `fn`: The function to apply to each match. + +**Returns:** +The text with the matching parts replaced with the result of applying the given +function to each. + +**Example:** +```tomo +>> "hello world":map($/world/, Text.upper) += "hello WORLD" +>> "Some nums: 1 2 3 4":map($/{int}/, func(i:Text): "$(Int.from_text(i) + 10)") += "Some nums: 11 12 13 14" +``` + +--- + ## `quoted` **Description:** diff --git a/environment.c b/environment.c index 8f5de0d..2c1a3f7 100644 --- a/environment.c +++ b/environment.c @@ -256,6 +256,7 @@ env_t *new_compilation_unit(CORD *libname) {"join", "Text$join", "func(glue:Text, pieces:[Text])->Text"}, {"lines", "Text$lines", "func(text:Text)->[Text]"}, {"lower", "Text$lower", "func(text:Text)->Text"}, + {"map", "Text$map", "func(text:Text, pattern:Pattern, fn:func(text:Text)->Text)->Text"}, {"matches", "Text$matches", "func(text:Text, pattern:Pattern)->Bool"}, {"quoted", "Text$quoted", "func(text:Text, color=no)->Text"}, {"replace", "Text$replace", "func(text:Text, pattern:Pattern, replacement:Text, backref=$/\\/, recursive=yes)->Text"}, diff --git a/test/text.tm b/test/text.tm index af03c3c..74a5341 100644 --- a/test/text.tm +++ b/test/text.tm @@ -250,3 +250,6 @@ func main(): = no >> "Hello...":matches($/{id}/) = no + + >> "hello world":map($/world/, Text.upper) + = "hello WORLD"