From e16792d5eb76326634cb1ad0b21301ec693fccb3 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Mon, 9 Sep 2024 05:31:25 -0400 Subject: [PATCH] Add Text.starts_with() and Text.ends_with() --- builtins/text.c | 44 ++++++++++++++++++++++++++++++--- builtins/text.h | 2 ++ docs/text.md | 66 +++++++++++++++++++++++++++++++++++++++++++------ environment.c | 8 +++--- test/text.tm | 10 ++++++++ 5 files changed, 116 insertions(+), 14 deletions(-) diff --git a/builtins/text.c b/builtins/text.c index 6fedd82..be00667 100644 --- a/builtins/text.c +++ b/builtins/text.c @@ -905,6 +905,32 @@ PUREFUNC public int32_t Text$compare(const Text_t *a, const Text_t *b) return 0; } +PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix) +{ + if (text.length < prefix.length) + return false; + text_iter_t text_state = {0, 0}, prefix_state = {0, 0}; + for (int64_t i = 0; i < prefix.length; i++) { + int32_t text_i = _get_grapheme(text, &text_state, i); + int32_t prefix_i = _get_grapheme(prefix, &prefix_state, i); + if (text_i != prefix_i) return false; + } + return true; +} + +PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix) +{ + if (text.length < suffix.length) + return false; + text_iter_t text_state = {0, 0}, prefix_state = {0, 0}; + for (int64_t i = 0; i < suffix.length; i++) { + int32_t text_i = _get_grapheme(text, &text_state, text.length - suffix.length + i); + int32_t suffix_i = _get_grapheme(suffix, &prefix_state, i); + if (text_i != suffix_i) return false; + } + return true; +} + PUREFUNC public bool Text$equal_values(Text_t a, Text_t b) { if (a.length != b.length || (a.hash != 0 && b.hash != 0 && a.hash != b.hash)) @@ -1756,8 +1782,20 @@ public Int_t Text$find(Text_t text, Pattern_t pattern, Int_t from_index, int64_t PUREFUNC public bool Text$has(Text_t text, Pattern_t pattern) { - int64_t found = _find(text, pattern, 0, text.length-1, NULL); - return (found >= 0); + if (Text$starts_with(pattern, Text("{start}"))) { + int64_t m = match(text, 0, pattern, 0, NULL, 0); + return m >= 0; + } else if (Text$ends_with(text, Text("{end}"))) { + for (int64_t i = text.length-1; i >= 0; i--) { + int64_t match_len = match(text, i, pattern, 0, NULL, 0); + if (match_len >= 0 && i + match_len == text.length) + return true; + } + return false; + } else { + int64_t found = _find(text, pattern, 0, text.length-1, NULL); + return (found >= 0); + } } PUREFUNC public bool Text$matches(Text_t text, Pattern_t pattern) @@ -2008,8 +2046,6 @@ public Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool tri int64_t match_len = match(text, i, pattern, 0, NULL, 0); if (match_len > 0 && i + match_len == text.length) last = i-1; - // else - // break; } } return Text$slice(text, I(first+1), I(last+1)); diff --git a/builtins/text.h b/builtins/text.h index 7f4861a..2c19f2f 100644 --- a/builtins/text.h +++ b/builtins/text.h @@ -41,6 +41,8 @@ Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right Int_t Text$find(Text_t text, Pattern_t pattern, Int_t i, int64_t *match_length); Array_t Text$find_all(Text_t text, Pattern_t pattern); PUREFUNC bool Text$has(Text_t text, Pattern_t pattern); +PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix); +PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix); PUREFUNC bool Text$matches(Text_t text, Pattern_t pattern); char *Text$as_c_string(Text_t text); __attribute__((format(printf, 1, 2))) diff --git a/docs/text.md b/docs/text.md index 02c3091..8a9f641 100644 --- a/docs/text.md +++ b/docs/text.md @@ -494,6 +494,32 @@ An array of 32-bit integer Unicode code points (`[Int32]`). --- +## `ends_with` + +**Description:** +Checks if the `Text` ends with a literal suffix text. + +**Usage:** +```tomo +ends_with(text: Text, suffix: Text) -> Bool +``` + +**Parameters:** + +- `text`: The text to be searched. +- `suffix`: The literal suffix text to check for. + +**Returns:** +`yes` if the text has the target, `no` otherwise. + +**Example:** +```tomo +>> "hello world":ends_with("world") += yes +``` + +--- + ## `from_c_string` **Description:** @@ -614,7 +640,7 @@ See: [Patterns](#Patterns) for more information on patterns. **Usage:** ```tomo -find(text: Text, pattern: Text, start: Int = 1, length: &Int64? = !&Int64) -> Int +find(text: Text, pattern: Pattern, start: Int = 1, length: &Int64? = !&Int64) -> Int ``` **Parameters:** @@ -657,7 +683,7 @@ See: [Patterns](#Patterns) for more information on patterns. **Usage:** ```tomo -find_all(text: Text, pattern: Text) -> [Text] +find_all(text: Text, pattern: Pattern) -> [Text] ``` **Parameters:** @@ -699,7 +725,7 @@ Checks if the `Text` contains a target pattern (see: [Patterns](#Patterns)). **Usage:** ```tomo -has(text: Text, pattern: Text) -> Bool +has(text: Text, pattern: Pattern) -> Bool ``` **Parameters:** @@ -816,7 +842,7 @@ Checks if the `Text` matches target pattern (see: [Patterns](#Patterns)). **Usage:** ```tomo -matches(text: Text, pattern: Text) -> Bool +matches(text: Text, pattern: Pattern) -> Bool ``` **Parameters:** @@ -847,7 +873,7 @@ calling the given function on that text. **Usage:** ```tomo -map(text: Text, pattern: Text, fn: func(text:Text)->Text) -> Text +map(text: Text, pattern: Pattern, fn: func(text:Text)->Text) -> Text ``` **Parameters:** @@ -931,7 +957,7 @@ See [Patterns](#patterns) for more information about patterns. **Usage:** ```tomo -replace(text: Text, pattern: Text, replacement: Text, backref: Pattern = $/\/, recursive: Bool = yes) -> Text +replace(text: Text, pattern: Pattern, replacement: Text, backref: Pattern = $/\/, recursive: Bool = yes) -> Text ``` **Parameters:** @@ -1045,7 +1071,7 @@ See [Patterns](#patterns) for more information about patterns. **Usage:** ```tomo -split(text: Text, pattern: Text = "") -> [Text] +split(text: Text, pattern: Pattern = "") -> [Text] ``` **Parameters:** @@ -1074,6 +1100,32 @@ An array of substrings resulting from the split. --- +## `starts_with` + +**Description:** +Checks if the `Text` starts with a literal prefix text. + +**Usage:** +```tomo +starts_with(text: Text, prefix: Text) -> Bool +``` + +**Parameters:** + +- `text`: The text to be searched. +- `prefix`: The literal prefix text to check for. + +**Returns:** +`yes` if the text has the given prefix, `no` otherwise. + +**Example:** +```tomo +>> "hello world":starts_with("hello") += yes +``` + +--- + ## `title` **Description:** diff --git a/environment.c b/environment.c index d51d9aa..b17d12a 100644 --- a/environment.c +++ b/environment.c @@ -275,10 +275,11 @@ env_t *new_compilation_unit(CORD *libname) {"run", "Shell$run", "func(command:Shell, status=!&Int32?)->Text"}, )}, {"Text", TEXT_TYPE, "Text_t", "Text$info", TypedArray(ns_entry_t, - {"find", "Text$find", "func(text:Text, pattern:Pattern, start=1, length=!&Int64)->Int"}, - {"find_all", "Text$find_all", "func(text:Text, pattern:Pattern)->[Text]"}, {"as_c_string", "Text$as_c_string", "func(text:Text)->CString"}, {"codepoint_names", "Text$codepoint_names", "func(text:Text)->[Text]"}, + {"ends_with", "Text$ends_with", "func(text:Text, suffix:Text)->Bool"}, + {"find", "Text$find", "func(text:Text, pattern:Pattern, start=1, length=!&Int64)->Int"}, + {"find_all", "Text$find_all", "func(text:Text, pattern:Pattern)->[Text]"}, {"from_bytes", "Text$from_bytes", "func(bytes:[Int8])->Text"}, {"from_c_string", "Text$from_str", "func(str:CString)->Text"}, {"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text])->Text"}, @@ -293,8 +294,9 @@ env_t *new_compilation_unit(CORD *libname) {"repeat", "Text$repeat", "func(text:Text, count:Int)->Text"}, {"replace", "Text$replace", "func(text:Text, pattern:Pattern, replacement:Text, backref=$/\\/, recursive=yes)->Text"}, {"replace_all", "Text$replace_all", "func(text:Text, replacements:{Pattern:Text}, backref=$/\\/, recursive=yes)->Text"}, - {"split", "Text$split", "func(text:Text, pattern=$Pattern'')->[Text]"}, {"slice", "Text$slice", "func(text:Text, from=1, to=-1)->Text"}, + {"split", "Text$split", "func(text:Text, pattern=$Pattern'')->[Text]"}, + {"starts_with", "Text$starts_with", "func(text:Text, prefix:Text)->Bool"}, {"title", "Text$title", "func(text:Text)->Text"}, {"trim", "Text$trim", "func(text:Text, pattern=$/{whitespace}/, trim_left=yes, trim_right=yes)->Text"}, {"upper", "Text$upper", "func(text:Text)->Text"}, diff --git a/test/text.tm b/test/text.tm index bf7b77c..216e5aa 100644 --- a/test/text.tm +++ b/test/text.tm @@ -275,6 +275,16 @@ func main(): >> "A=B=C=D":replace($/{..}={..}/, "1:(\1) 2:(\2)") = "1:(A) 2:(B=C=D)" + >> "abcde":starts_with("ab") + = yes + >> "abcde":starts_with("bc") + = no + + >> "abcde":ends_with("de") + = yes + >> "abcde":starts_with("cd") + = no + do: !! Testing concatenation-stability: >> ab := Text.from_codepoint_names(["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"])