Add Text.starts_with() and Text.ends_with()

author: Bruce Hill <bruce@bruce-hill.com> 2024-09-09 05:31:25 -0400
committer: Bruce Hill <bruce@bruce-hill.com> 2024-09-09 05:31:25 -0400
commit: e16792d5eb76326634cb1ad0b21301ec693fccb3 (patch)
tree: 99e5b5c53db39e173db2b82db87cef85cd267c60
parent: 319d4306b585569d198a6e0ac268c009eb63705a (diff)
5 files changed, 116 insertions, 14 deletions
diff --git a/builtins/text.c b/builtins/text.c
index 6fedd82e..be00667c 100644
--- a/builtins/text.c
+++ b/builtins/text.c
@@ -905,6 +905,32 @@ PUREFUNC public int32_t Text$compare(const Text_t *a, const Text_t *b)
     return 0;
 }
 
+PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix)
+{
+    if (text.length < prefix.length)
+        return false;
+    text_iter_t text_state = {0, 0}, prefix_state = {0, 0};
+    for (int64_t i = 0; i < prefix.length; i++) {
+        int32_t text_i = _get_grapheme(text, &text_state, i);
+        int32_t prefix_i = _get_grapheme(prefix, &prefix_state, i);
+        if (text_i != prefix_i) return false;
+    }
+    return true;
+}
+
+PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix)
+{
+    if (text.length < suffix.length)
+        return false;
+    text_iter_t text_state = {0, 0}, prefix_state = {0, 0};
+    for (int64_t i = 0; i < suffix.length; i++) {
+        int32_t text_i = _get_grapheme(text, &text_state, text.length - suffix.length + i);
+        int32_t suffix_i = _get_grapheme(suffix, &prefix_state, i);
+        if (text_i != suffix_i) return false;
+    }
+    return true;
+}
+
 PUREFUNC public bool Text$equal_values(Text_t a, Text_t b)
 {
     if (a.length != b.length || (a.hash != 0 && b.hash != 0 && a.hash != b.hash))
@@ -1756,8 +1782,20 @@ public Int_t Text$find(Text_t text, Pattern_t pattern, Int_t from_index, int64_t
 
 PUREFUNC public bool Text$has(Text_t text, Pattern_t pattern)
 {
-    int64_t found = _find(text, pattern, 0, text.length-1, NULL);
-    return (found >= 0);
+    if (Text$starts_with(pattern, Text("{start}"))) {
+        int64_t m = match(text, 0, pattern, 0, NULL, 0);
+        return m >= 0;
+    } else if (Text$ends_with(text, Text("{end}"))) {
+        for (int64_t i = text.length-1; i >= 0; i--) {
+            int64_t match_len = match(text, i, pattern, 0, NULL, 0);
+            if (match_len >= 0 && i + match_len == text.length)
+                return true;
+        }
+        return false;
+    } else {
+        int64_t found = _find(text, pattern, 0, text.length-1, NULL);
+        return (found >= 0);
+    }
 }
 
 PUREFUNC public bool Text$matches(Text_t text, Pattern_t pattern)
@@ -2008,8 +2046,6 @@ public Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool tri
             int64_t match_len = match(text, i, pattern, 0, NULL, 0);
             if (match_len > 0 && i + match_len == text.length)
                 last = i-1;
-            // else
-            //     break;
         }
     }
     return Text$slice(text, I(first+1), I(last+1));
diff --git a/builtins/text.h b/builtins/text.h
index 7f4861a3..2c19f2f2 100644
--- a/builtins/text.h
+++ b/builtins/text.h
@@ -41,6 +41,8 @@ Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right
 Int_t Text$find(Text_t text, Pattern_t pattern, Int_t i, int64_t *match_length);
 Array_t Text$find_all(Text_t text, Pattern_t pattern);
 PUREFUNC bool Text$has(Text_t text, Pattern_t pattern);
+PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix);
+PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix);
 PUREFUNC bool Text$matches(Text_t text, Pattern_t pattern);
 char *Text$as_c_string(Text_t text);
 __attribute__((format(printf, 1, 2)))
diff --git a/docs/text.md b/docs/text.md
index 02c30912..8a9f641e 100644
--- a/docs/text.md
+++ b/docs/text.md
@@ -494,6 +494,32 @@ An array of 32-bit integer Unicode code points (`[Int32]`).
 
 ---
 
+## `ends_with`
+
+**Description:**  
+Checks if the `Text` ends with a literal suffix text.
+
+**Usage:**  
+```tomo
+ends_with(text: Text, suffix: Text) -> Bool
+```
+
+**Parameters:**
+
+- `text`: The text to be searched.
+- `suffix`: The literal suffix text to check for.
+
+**Returns:**  
+`yes` if the text has the target, `no` otherwise.
+
+**Example:**  
+```tomo
+>> "hello world":ends_with("world")
+= yes
+```
+
+---
+
 ## `from_c_string`
 
 **Description:**  
@@ -614,7 +640,7 @@ See: [Patterns](#Patterns) for more information on patterns.
 
 **Usage:**  
 ```tomo
-find(text: Text, pattern: Text, start: Int = 1, length: &Int64? = !&Int64) -> Int
+find(text: Text, pattern: Pattern, start: Int = 1, length: &Int64? = !&Int64) -> Int
 ```
 
 **Parameters:**
@@ -657,7 +683,7 @@ See: [Patterns](#Patterns) for more information on patterns.
 
 **Usage:**  
 ```tomo
-find_all(text: Text, pattern: Text) -> [Text]
+find_all(text: Text, pattern: Pattern) -> [Text]
 ```
 
 **Parameters:**
@@ -699,7 +725,7 @@ Checks if the `Text` contains a target pattern (see: [Patterns](#Patterns)).
 
 **Usage:**  
 ```tomo
-has(text: Text, pattern: Text) -> Bool
+has(text: Text, pattern: Pattern) -> Bool
 ```
 
 **Parameters:**
@@ -816,7 +842,7 @@ Checks if the `Text` matches target pattern (see: [Patterns](#Patterns)).
 
 **Usage:**  
 ```tomo
-matches(text: Text, pattern: Text) -> Bool
+matches(text: Text, pattern: Pattern) -> Bool
 ```
 
 **Parameters:**
@@ -847,7 +873,7 @@ calling the given function on that text.
 
 **Usage:**  
 ```tomo
-map(text: Text, pattern: Text, fn: func(text:Text)->Text) -> Text
+map(text: Text, pattern: Pattern, fn: func(text:Text)->Text) -> Text
 ```
 
 **Parameters:**
@@ -931,7 +957,7 @@ See [Patterns](#patterns) for more information about patterns.
 
 **Usage:**  
 ```tomo
-replace(text: Text, pattern: Text, replacement: Text, backref: Pattern = $/\/, recursive: Bool = yes) -> Text
+replace(text: Text, pattern: Pattern, replacement: Text, backref: Pattern = $/\/, recursive: Bool = yes) -> Text
 ```
 
 **Parameters:**
@@ -1045,7 +1071,7 @@ See [Patterns](#patterns) for more information about patterns.
 
 **Usage:**  
 ```tomo
-split(text: Text, pattern: Text = "") -> [Text]
+split(text: Text, pattern: Pattern = "") -> [Text]
 ```
 
 **Parameters:**
@@ -1074,6 +1100,32 @@ An array of substrings resulting from the split.
 
 ---
 
+## `starts_with`
+
+**Description:**  
+Checks if the `Text` starts with a literal prefix text.
+
+**Usage:**  
+```tomo
+starts_with(text: Text, prefix: Text) -> Bool
+```
+
+**Parameters:**
+
+- `text`: The text to be searched.
+- `prefix`: The literal prefix text to check for.
+
+**Returns:**  
+`yes` if the text has the given prefix, `no` otherwise.
+
+**Example:**  
+```tomo
+>> "hello world":starts_with("hello")
+= yes
+```
+
+---
+
 ## `title`
 
 **Description:**  
diff --git a/environment.c b/environment.c
index d51d9aa1..b17d12a0 100644
--- a/environment.c
+++ b/environment.c
@@ -275,10 +275,11 @@ env_t *new_compilation_unit(CORD *libname)
             {"run", "Shell$run", "func(command:Shell, status=!&Int32?)->Text"},
         )},
         {"Text", TEXT_TYPE, "Text_t", "Text$info", TypedArray(ns_entry_t,
-            {"find", "Text$find", "func(text:Text, pattern:Pattern, start=1, length=!&Int64)->Int"},
-            {"find_all", "Text$find_all", "func(text:Text, pattern:Pattern)->[Text]"},
             {"as_c_string", "Text$as_c_string", "func(text:Text)->CString"},
             {"codepoint_names", "Text$codepoint_names", "func(text:Text)->[Text]"},
+            {"ends_with", "Text$ends_with", "func(text:Text, suffix:Text)->Bool"},
+            {"find", "Text$find", "func(text:Text, pattern:Pattern, start=1, length=!&Int64)->Int"},
+            {"find_all", "Text$find_all", "func(text:Text, pattern:Pattern)->[Text]"},
             {"from_bytes", "Text$from_bytes", "func(bytes:[Int8])->Text"},
             {"from_c_string", "Text$from_str", "func(str:CString)->Text"},
             {"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text])->Text"},
@@ -293,8 +294,9 @@ env_t *new_compilation_unit(CORD *libname)
             {"repeat", "Text$repeat", "func(text:Text, count:Int)->Text"},
             {"replace", "Text$replace", "func(text:Text, pattern:Pattern, replacement:Text, backref=$/\\/, recursive=yes)->Text"},
             {"replace_all", "Text$replace_all", "func(text:Text, replacements:{Pattern:Text}, backref=$/\\/, recursive=yes)->Text"},
-            {"split", "Text$split", "func(text:Text, pattern=$Pattern'')->[Text]"},
             {"slice", "Text$slice", "func(text:Text, from=1, to=-1)->Text"},
+            {"split", "Text$split", "func(text:Text, pattern=$Pattern'')->[Text]"},
+            {"starts_with", "Text$starts_with", "func(text:Text, prefix:Text)->Bool"},
             {"title", "Text$title", "func(text:Text)->Text"},
             {"trim", "Text$trim", "func(text:Text, pattern=$/{whitespace}/, trim_left=yes, trim_right=yes)->Text"},
             {"upper", "Text$upper", "func(text:Text)->Text"},
diff --git a/test/text.tm b/test/text.tm
index bf7b77cd..216e5aaf 100644
--- a/test/text.tm
+++ b/test/text.tm
@@ -275,6 +275,16 @@ func main():
 	>> "A=B=C=D":replace($/{..}={..}/, "1:(\1) 2:(\2)")
 	= "1:(A) 2:(B=C=D)"
 
+	>> "abcde":starts_with("ab")
+	= yes
+	>> "abcde":starts_with("bc")
+	= no
+
+	>> "abcde":ends_with("de")
+	= yes
+	>> "abcde":starts_with("cd")
+	= no
+
 	do:
 		!! Testing concatenation-stability:
 		>> ab := Text.from_codepoint_names(["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"])
author	Bruce Hill <bruce@bruce-hill.com>	2024-09-09 05:31:25 -0400
committer	Bruce Hill <bruce@bruce-hill.com>	2024-09-09 05:31:25 -0400
commit	e16792d5eb76326634cb1ad0b21301ec693fccb3 (patch)
tree	99e5b5c53db39e173db2b82db87cef85cd267c60
parent	319d4306b585569d198a6e0ac268c009eb63705a (diff)