aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2024-09-06 00:03:28 -0400
committerBruce Hill <bruce@bruce-hill.com>2024-09-06 00:03:28 -0400
commit44892df4c5686b292a058ca19eaba1e852fe42f3 (patch)
treeb01b46ef7e9496e1971616e3b55e124dfd217cbb
parent1000423d2b351f1f5edbb3c9a08898883ba47f3e (diff)
Add Text.trim()
-rw-r--r--builtins/text.c21
-rw-r--r--builtins/text.h1
-rw-r--r--docs/text.md36
-rw-r--r--environment.c1
-rw-r--r--test/text.tm12
5 files changed, 71 insertions, 0 deletions
diff --git a/builtins/text.c b/builtins/text.c
index be304184..7316dabc 100644
--- a/builtins/text.c
+++ b/builtins/text.c
@@ -1904,6 +1904,27 @@ public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, P
return ret;
}
+public Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right)
+{
+ int64_t first = 0, last = text.length-1;
+ if (trim_left) {
+ int64_t match_len = match(text, pattern, 0, 0, NULL, 0);
+ if (match_len > 0)
+ first = match_len;
+ }
+
+ if (trim_right) {
+ for (int64_t i = text.length-1; i >= first; i--) {
+ int64_t match_len = match(text, pattern, i, 0, NULL, 0);
+ if (match_len > 0 && i + match_len == text.length)
+ last = i-1;
+ // else
+ // break;
+ }
+ }
+ return Text$slice(text, I(first+1), I(last+1));
+}
+
public Text_t Text$map(Text_t text, Pattern_t pattern, closure_t fn)
{
Text_t ret = {.length=0};
diff --git a/builtins/text.h b/builtins/text.h
index eff01dbe..cb8f08ba 100644
--- a/builtins/text.h
+++ b/builtins/text.h
@@ -36,6 +36,7 @@ Text_t Text$quoted(Text_t str, bool colorize);
Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement, Pattern_t backref_pat, bool recursive);
Text_t Text$replace_all(Text_t text, Table_t replacements, Pattern_t backref_pat, bool recursive);
Array_t Text$split(Text_t text, Pattern_t pattern);
+Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right);
Int_t Text$find(Text_t text, Pattern_t pattern, Int_t i, int64_t *match_length);
Array_t Text$find_all(Text_t text, Pattern_t pattern);
bool Text$has(Text_t text, Pattern_t pattern);
diff --git a/docs/text.md b/docs/text.md
index 1d26040b..02c30912 100644
--- a/docs/text.md
+++ b/docs/text.md
@@ -279,6 +279,7 @@ Text.map(pattern:Pattern, fn:func(t:Text)->Text)->Text
Text.replace(pattern:Pattern, replacement:Text, placeholder:Pattern=$//)->[Text]
Text.replace_all(replacements:{Pattern:Text}, placeholder:Pattern=$//)->[Text]
Text.split(pattern:Pattern)->[Text]
+Text.trim(pattern=$/{whitespace}/, trim_left=yes, trim_right=yes)->[Text]
```
See [Text Functions](#Text-Functions) for the full API documentation.
@@ -1098,6 +1099,41 @@ The text in title case.
---
+## `trim`
+
+**Description:**
+Trims the matching pattern from the left and/or right side of the text
+See [Patterns](#patterns) for more information about patterns.
+
+**Usage:**
+```tomo
+trim(text: Text, pattern: Pattern = $/{whitespace/, trim_left: Bool = yes, trim_right: Bool = yes) -> Text
+```
+
+**Parameters:**
+
+- `text`: The text to be trimmed.
+- `pattern`: The pattern that will be trimmed away.
+- `trim_left`: Whether or not to trim from the front of the text.
+- `trim_right`: Whether or not to trim from the back of the text.
+
+**Returns:**
+The text without the trim pattern at either end.
+
+**Example:**
+```tomo
+>> " x y z $(\n)":trim()
+= "x y z"
+
+>> "abc123def":trim($/{!digit}/)
+= "123"
+
+>> " xyz ":trim(trim_right=no)
+= "xyz "
+```
+
+---
+
## `upper`
**Description:**
diff --git a/environment.c b/environment.c
index 320d2573..666149cc 100644
--- a/environment.c
+++ b/environment.c
@@ -265,6 +265,7 @@ env_t *new_compilation_unit(CORD *libname)
{"split", "Text$split", "func(text:Text, pattern=$Pattern'')->[Text]"},
{"slice", "Text$slice", "func(text:Text, from=1, to=-1)->Text"},
{"title", "Text$title", "func(text:Text)->Text"},
+ {"trim", "Text$trim", "func(text:Text, pattern=$/{whitespace}/, trim_left=yes, trim_right=yes)->Text"},
{"upper", "Text$upper", "func(text:Text)->Text"},
{"utf32_codepoints", "Text$utf32_codepoints", "func(text:Text)->[Int32]"},
{"utf8_bytes", "Text$utf8_bytes", "func(text:Text)->[Int8]"},
diff --git a/test/text.tm b/test/text.tm
index 450d8ae9..c98ca1c6 100644
--- a/test/text.tm
+++ b/test/text.tm
@@ -257,6 +257,18 @@ func main():
>> "Abc":repeat(3)
= "AbcAbcAbc"
+ >> " abc def ":trim()
+ = "abc def"
+ >> " abc123def ":trim($/{!digit}/)
+ = "123"
+ >> " abc123def ":trim($/{!digit}/, trim_left=no)
+ = " abc123"
+ >> " abc123def ":trim($/{!digit}/, trim_right=no)
+ = "123def "
+ # Only trim single whole matches that bookend the text:
+ >> "AbcAbcxxxxxxxxAbcAbc":trim($/Abc/)
+ = "AbcxxxxxxxxAbc"
+
do:
!! Testing concatenation-stability:
>> ab := Text.from_codepoint_names(["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"])