From 44892df4c5686b292a058ca19eaba1e852fe42f3 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Fri, 6 Sep 2024 00:03:28 -0400 Subject: [PATCH] Add Text.trim() --- builtins/text.c | 21 +++++++++++++++++++++ builtins/text.h | 1 + docs/text.md | 36 ++++++++++++++++++++++++++++++++++++ environment.c | 1 + test/text.tm | 12 ++++++++++++ 5 files changed, 71 insertions(+) diff --git a/builtins/text.c b/builtins/text.c index be30418..7316dab 100644 --- a/builtins/text.c +++ b/builtins/text.c @@ -1904,6 +1904,27 @@ public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, P return ret; } +public Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right) +{ + int64_t first = 0, last = text.length-1; + if (trim_left) { + int64_t match_len = match(text, pattern, 0, 0, NULL, 0); + if (match_len > 0) + first = match_len; + } + + if (trim_right) { + for (int64_t i = text.length-1; i >= first; i--) { + int64_t match_len = match(text, pattern, i, 0, NULL, 0); + if (match_len > 0 && i + match_len == text.length) + last = i-1; + // else + // break; + } + } + return Text$slice(text, I(first+1), I(last+1)); +} + public Text_t Text$map(Text_t text, Pattern_t pattern, closure_t fn) { Text_t ret = {.length=0}; diff --git a/builtins/text.h b/builtins/text.h index eff01db..cb8f08b 100644 --- a/builtins/text.h +++ b/builtins/text.h @@ -36,6 +36,7 @@ Text_t Text$quoted(Text_t str, bool colorize); Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement, Pattern_t backref_pat, bool recursive); Text_t Text$replace_all(Text_t text, Table_t replacements, Pattern_t backref_pat, bool recursive); Array_t Text$split(Text_t text, Pattern_t pattern); +Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right); Int_t Text$find(Text_t text, Pattern_t pattern, Int_t i, int64_t *match_length); Array_t Text$find_all(Text_t text, Pattern_t pattern); bool Text$has(Text_t text, Pattern_t pattern); diff --git a/docs/text.md b/docs/text.md index 1d26040..02c3091 100644 --- a/docs/text.md +++ b/docs/text.md @@ -279,6 +279,7 @@ Text.map(pattern:Pattern, fn:func(t:Text)->Text)->Text Text.replace(pattern:Pattern, replacement:Text, placeholder:Pattern=$//)->[Text] Text.replace_all(replacements:{Pattern:Text}, placeholder:Pattern=$//)->[Text] Text.split(pattern:Pattern)->[Text] +Text.trim(pattern=$/{whitespace}/, trim_left=yes, trim_right=yes)->[Text] ``` See [Text Functions](#Text-Functions) for the full API documentation. @@ -1098,6 +1099,41 @@ The text in title case. --- +## `trim` + +**Description:** +Trims the matching pattern from the left and/or right side of the text +See [Patterns](#patterns) for more information about patterns. + +**Usage:** +```tomo +trim(text: Text, pattern: Pattern = $/{whitespace/, trim_left: Bool = yes, trim_right: Bool = yes) -> Text +``` + +**Parameters:** + +- `text`: The text to be trimmed. +- `pattern`: The pattern that will be trimmed away. +- `trim_left`: Whether or not to trim from the front of the text. +- `trim_right`: Whether or not to trim from the back of the text. + +**Returns:** +The text without the trim pattern at either end. + +**Example:** +```tomo +>> " x y z $(\n)":trim() += "x y z" + +>> "abc123def":trim($/{!digit}/) += "123" + +>> " xyz ":trim(trim_right=no) += "xyz " +``` + +--- + ## `upper` **Description:** diff --git a/environment.c b/environment.c index 320d257..666149c 100644 --- a/environment.c +++ b/environment.c @@ -265,6 +265,7 @@ env_t *new_compilation_unit(CORD *libname) {"split", "Text$split", "func(text:Text, pattern=$Pattern'')->[Text]"}, {"slice", "Text$slice", "func(text:Text, from=1, to=-1)->Text"}, {"title", "Text$title", "func(text:Text)->Text"}, + {"trim", "Text$trim", "func(text:Text, pattern=$/{whitespace}/, trim_left=yes, trim_right=yes)->Text"}, {"upper", "Text$upper", "func(text:Text)->Text"}, {"utf32_codepoints", "Text$utf32_codepoints", "func(text:Text)->[Int32]"}, {"utf8_bytes", "Text$utf8_bytes", "func(text:Text)->[Int8]"}, diff --git a/test/text.tm b/test/text.tm index 450d8ae..c98ca1c 100644 --- a/test/text.tm +++ b/test/text.tm @@ -257,6 +257,18 @@ func main(): >> "Abc":repeat(3) = "AbcAbcAbc" + >> " abc def ":trim() + = "abc def" + >> " abc123def ":trim($/{!digit}/) + = "123" + >> " abc123def ":trim($/{!digit}/, trim_left=no) + = " abc123" + >> " abc123def ":trim($/{!digit}/, trim_right=no) + = "123def " + # Only trim single whole matches that bookend the text: + >> "AbcAbcxxxxxxxxAbcAbc":trim($/Abc/) + = "AbcxxxxxxxxAbc" + do: !! Testing concatenation-stability: >> ab := Text.from_codepoint_names(["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"])