Add Text.trim()

author: Bruce Hill <bruce@bruce-hill.com> 2024-09-06 00:03:28 -0400
committer: Bruce Hill <bruce@bruce-hill.com> 2024-09-06 00:03:28 -0400
commit: 44892df4c5686b292a058ca19eaba1e852fe42f3 (patch)
tree: b01b46ef7e9496e1971616e3b55e124dfd217cbb
parent: 1000423d2b351f1f5edbb3c9a08898883ba47f3e (diff)
5 files changed, 71 insertions, 0 deletions
diff --git a/builtins/text.c b/builtins/text.c
index be304184..7316dabc 100644
--- a/builtins/text.c
+++ b/builtins/text.c
@@ -1904,6 +1904,27 @@ public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, P
     return ret;
 }
 
+public Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right)
+{
+    int64_t first = 0, last = text.length-1;
+    if (trim_left) {
+        int64_t match_len = match(text, pattern, 0, 0, NULL, 0);
+        if (match_len > 0)
+            first = match_len;
+    }
+
+    if (trim_right) {
+        for (int64_t i = text.length-1; i >= first; i--) {
+            int64_t match_len = match(text, pattern, i, 0, NULL, 0);
+            if (match_len > 0 && i + match_len == text.length)
+                last = i-1;
+            // else
+            //     break;
+        }
+    }
+    return Text$slice(text, I(first+1), I(last+1));
+}
+
 public Text_t Text$map(Text_t text, Pattern_t pattern, closure_t fn)
 {
     Text_t ret = {.length=0};
diff --git a/builtins/text.h b/builtins/text.h
index eff01dbe..cb8f08ba 100644
--- a/builtins/text.h
+++ b/builtins/text.h
@@ -36,6 +36,7 @@ Text_t Text$quoted(Text_t str, bool colorize);
 Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement, Pattern_t backref_pat, bool recursive);
 Text_t Text$replace_all(Text_t text, Table_t replacements, Pattern_t backref_pat, bool recursive);
 Array_t Text$split(Text_t text, Pattern_t pattern);
+Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right);
 Int_t Text$find(Text_t text, Pattern_t pattern, Int_t i, int64_t *match_length);
 Array_t Text$find_all(Text_t text, Pattern_t pattern);
 bool Text$has(Text_t text, Pattern_t pattern);
diff --git a/docs/text.md b/docs/text.md
index 1d26040b..02c30912 100644
--- a/docs/text.md
+++ b/docs/text.md
@@ -279,6 +279,7 @@ Text.map(pattern:Pattern, fn:func(t:Text)->Text)->Text
 Text.replace(pattern:Pattern, replacement:Text, placeholder:Pattern=$//)->[Text]
 Text.replace_all(replacements:{Pattern:Text}, placeholder:Pattern=$//)->[Text]
 Text.split(pattern:Pattern)->[Text]
+Text.trim(pattern=$/{whitespace}/, trim_left=yes, trim_right=yes)->[Text]
 ```
 
 See [Text Functions](#Text-Functions) for the full API documentation.
@@ -1098,6 +1099,41 @@ The text in title case.
 
 ---
 
+## `trim`
+
+**Description:**  
+Trims the matching pattern from the left and/or right side of the text
+See [Patterns](#patterns) for more information about patterns.
+
+**Usage:**  
+```tomo
+trim(text: Text, pattern: Pattern = $/{whitespace/, trim_left: Bool = yes, trim_right: Bool = yes) -> Text
+```
+
+**Parameters:**
+
+- `text`: The text to be trimmed.
+- `pattern`: The pattern that will be trimmed away.
+- `trim_left`: Whether or not to trim from the front of the text.
+- `trim_right`: Whether or not to trim from the back of the text.
+
+**Returns:**  
+The text without the trim pattern at either end.
+
+**Example:**  
+```tomo
+>> "   x y z    $(\n)":trim()
+= "x y z"
+
+>> "abc123def":trim($/{!digit}/)
+= "123"
+
+>> "   xyz   ":trim(trim_right=no)
+= "xyz   "
+```
+
+---
+
 ## `upper`
 
 **Description:**  
diff --git a/environment.c b/environment.c
index 320d2573..666149cc 100644
--- a/environment.c
+++ b/environment.c
@@ -265,6 +265,7 @@ env_t *new_compilation_unit(CORD *libname)
             {"split", "Text$split", "func(text:Text, pattern=$Pattern'')->[Text]"},
             {"slice", "Text$slice", "func(text:Text, from=1, to=-1)->Text"},
             {"title", "Text$title", "func(text:Text)->Text"},
+            {"trim", "Text$trim", "func(text:Text, pattern=$/{whitespace}/, trim_left=yes, trim_right=yes)->Text"},
             {"upper", "Text$upper", "func(text:Text)->Text"},
             {"utf32_codepoints", "Text$utf32_codepoints", "func(text:Text)->[Int32]"},
             {"utf8_bytes", "Text$utf8_bytes", "func(text:Text)->[Int8]"},
diff --git a/test/text.tm b/test/text.tm
index 450d8ae9..c98ca1c6 100644
--- a/test/text.tm
+++ b/test/text.tm
@@ -257,6 +257,18 @@ func main():
 	>> "Abc":repeat(3)
 	= "AbcAbcAbc"
 
+	>> "   abc def    ":trim()
+	= "abc def"
+	>> " abc123def ":trim($/{!digit}/)
+	= "123"
+	>> " abc123def ":trim($/{!digit}/, trim_left=no)
+	= " abc123"
+	>> " abc123def ":trim($/{!digit}/, trim_right=no)
+	= "123def "
+	# Only trim single whole matches that bookend the text:
+	>> "AbcAbcxxxxxxxxAbcAbc":trim($/Abc/)
+	= "AbcxxxxxxxxAbc"
+
 	do:
 		!! Testing concatenation-stability:
 		>> ab := Text.from_codepoint_names(["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"])
author	Bruce Hill <bruce@bruce-hill.com>	2024-09-06 00:03:28 -0400
committer	Bruce Hill <bruce@bruce-hill.com>	2024-09-06 00:03:28 -0400
commit	44892df4c5686b292a058ca19eaba1e852fe42f3 (patch)
tree	b01b46ef7e9496e1971616e3b55e124dfd217cbb
parent	1000423d2b351f1f5edbb3c9a08898883ba47f3e (diff)