Add Text.starts_with() and Text.ends_with()

2024-09-09 05:31:25 -04:00 · 2024-09-09 05:31:25 -04:00 · e16792d5eb
commit e16792d5eb
parent 319d4306b5
5 changed files with 116 additions and 14 deletions
--- a/builtins/text.c
+++ b/builtins/text.c
@ -905,6 +905,32 @@ PUREFUNC public int32_t Text$compare(const Text_t *a, const Text_t *b)
    return 0;
 }

+PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix)
+{
+    if (text.length < prefix.length)
+        return false;
+    text_iter_t text_state = {0, 0}, prefix_state = {0, 0};
+    for (int64_t i = 0; i < prefix.length; i++) {
+        int32_t text_i = _get_grapheme(text, &text_state, i);
+        int32_t prefix_i = _get_grapheme(prefix, &prefix_state, i);
+        if (text_i != prefix_i) return false;
+    }
+    return true;
+}
+
+PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix)
+{
+    if (text.length < suffix.length)
+        return false;
+    text_iter_t text_state = {0, 0}, prefix_state = {0, 0};
+    for (int64_t i = 0; i < suffix.length; i++) {
+        int32_t text_i = _get_grapheme(text, &text_state, text.length - suffix.length + i);
+        int32_t suffix_i = _get_grapheme(suffix, &prefix_state, i);
+        if (text_i != suffix_i) return false;
+    }
+    return true;
+}
+
 PUREFUNC public bool Text$equal_values(Text_t a, Text_t b)
 {
    if (a.length != b.length || (a.hash != 0 && b.hash != 0 && a.hash != b.hash))
@ -1756,9 +1782,21 @@ public Int_t Text$find(Text_t text, Pattern_t pattern, Int_t from_index, int64_t

 PUREFUNC public bool Text$has(Text_t text, Pattern_t pattern)
 {
+    if (Text$starts_with(pattern, Text("{start}"))) {
+        int64_t m = match(text, 0, pattern, 0, NULL, 0);
+        return m >= 0;
+    } else if (Text$ends_with(text, Text("{end}"))) {
+        for (int64_t i = text.length-1; i >= 0; i--) {
+            int64_t match_len = match(text, i, pattern, 0, NULL, 0);
+            if (match_len >= 0 && i + match_len == text.length)
+                return true;
+        }
+        return false;
+    } else {
        int64_t found = _find(text, pattern, 0, text.length-1, NULL);
        return (found >= 0);
    }
+}

 PUREFUNC public bool Text$matches(Text_t text, Pattern_t pattern)
 {
@ -2008,8 +2046,6 @@ public Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool tri
            int64_t match_len = match(text, i, pattern, 0, NULL, 0);
            if (match_len > 0 && i + match_len == text.length)
                last = i-1;
-            // else
-            //     break;
        }
    }
    return Text$slice(text, I(first+1), I(last+1));
--- a/builtins/text.h
+++ b/builtins/text.h
@ -41,6 +41,8 @@ Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right
 Int_t Text$find(Text_t text, Pattern_t pattern, Int_t i, int64_t *match_length);
 Array_t Text$find_all(Text_t text, Pattern_t pattern);
 PUREFUNC bool Text$has(Text_t text, Pattern_t pattern);
+PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix);
+PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix);
 PUREFUNC bool Text$matches(Text_t text, Pattern_t pattern);
 char *Text$as_c_string(Text_t text);
 __attribute__((format(printf, 1, 2)))
--- a/docs/text.md
+++ b/docs/text.md
@ -494,6 +494,32 @@ An array of 32-bit integer Unicode code points (`[Int32]`).

 ---

+## `ends_with`
+
+**Description:**  
+Checks if the `Text` ends with a literal suffix text.
+
+**Usage:**  
+```tomo
+ends_with(text: Text, suffix: Text) -> Bool
+```
+
+**Parameters:**
+
+- `text`: The text to be searched.
+- `suffix`: The literal suffix text to check for.
+
+**Returns:**  
+`yes` if the text has the target, `no` otherwise.
+
+**Example:**  
+```tomo
+>> "hello world":ends_with("world")
+= yes
+```
+
+---
+
 ## `from_c_string`

 **Description:**  
@ -614,7 +640,7 @@ See: [Patterns](#Patterns) for more information on patterns.

 **Usage:**  
 ```tomo
-find(text: Text, pattern: Text, start: Int = 1, length: &Int64? = !&Int64) -> Int
+find(text: Text, pattern: Pattern, start: Int = 1, length: &Int64? = !&Int64) -> Int
 ```

 **Parameters:**
@ -657,7 +683,7 @@ See: [Patterns](#Patterns) for more information on patterns.

 **Usage:**  
 ```tomo
-find_all(text: Text, pattern: Text) -> [Text]
+find_all(text: Text, pattern: Pattern) -> [Text]
 ```

 **Parameters:**
@ -699,7 +725,7 @@ Checks if the `Text` contains a target pattern (see: [Patterns](#Patterns)).

 **Usage:**  
 ```tomo
-has(text: Text, pattern: Text) -> Bool
+has(text: Text, pattern: Pattern) -> Bool
 ```

 **Parameters:**
@ -816,7 +842,7 @@ Checks if the `Text` matches target pattern (see: [Patterns](#Patterns)).

 **Usage:**  
 ```tomo
-matches(text: Text, pattern: Text) -> Bool
+matches(text: Text, pattern: Pattern) -> Bool
 ```

 **Parameters:**
@ -847,7 +873,7 @@ calling the given function on that text.

 **Usage:**  
 ```tomo
-map(text: Text, pattern: Text, fn: func(text:Text)->Text) -> Text
+map(text: Text, pattern: Pattern, fn: func(text:Text)->Text) -> Text
 ```

 **Parameters:**
@ -931,7 +957,7 @@ See [Patterns](#patterns) for more information about patterns.

 **Usage:**  
 ```tomo
-replace(text: Text, pattern: Text, replacement: Text, backref: Pattern = $/\/, recursive: Bool = yes) -> Text
+replace(text: Text, pattern: Pattern, replacement: Text, backref: Pattern = $/\/, recursive: Bool = yes) -> Text
 ```

 **Parameters:**
@ -1045,7 +1071,7 @@ See [Patterns](#patterns) for more information about patterns.

 **Usage:**  
 ```tomo
-split(text: Text, pattern: Text = "") -> [Text]
+split(text: Text, pattern: Pattern = "") -> [Text]
 ```

 **Parameters:**
@ -1074,6 +1100,32 @@ An array of substrings resulting from the split.

 ---

+## `starts_with`
+
+**Description:**  
+Checks if the `Text` starts with a literal prefix text.
+
+**Usage:**  
+```tomo
+starts_with(text: Text, prefix: Text) -> Bool
+```
+
+**Parameters:**
+
+- `text`: The text to be searched.
+- `prefix`: The literal prefix text to check for.
+
+**Returns:**  
+`yes` if the text has the given prefix, `no` otherwise.
+
+**Example:**  
+```tomo
+>> "hello world":starts_with("hello")
+= yes
+```
+
+---
+
 ## `title`

 **Description:**  
--- a/environment.c
+++ b/environment.c
@ -275,10 +275,11 @@ env_t *new_compilation_unit(CORD *libname)
            {"run", "Shell$run", "func(command:Shell, status=!&Int32?)->Text"},
        )},
        {"Text", TEXT_TYPE, "Text_t", "Text$info", TypedArray(ns_entry_t,
-            {"find", "Text$find", "func(text:Text, pattern:Pattern, start=1, length=!&Int64)->Int"},
-            {"find_all", "Text$find_all", "func(text:Text, pattern:Pattern)->[Text]"},
            {"as_c_string", "Text$as_c_string", "func(text:Text)->CString"},
            {"codepoint_names", "Text$codepoint_names", "func(text:Text)->[Text]"},
+            {"ends_with", "Text$ends_with", "func(text:Text, suffix:Text)->Bool"},
+            {"find", "Text$find", "func(text:Text, pattern:Pattern, start=1, length=!&Int64)->Int"},
+            {"find_all", "Text$find_all", "func(text:Text, pattern:Pattern)->[Text]"},
            {"from_bytes", "Text$from_bytes", "func(bytes:[Int8])->Text"},
            {"from_c_string", "Text$from_str", "func(str:CString)->Text"},
            {"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text])->Text"},
@ -293,8 +294,9 @@ env_t *new_compilation_unit(CORD *libname)
            {"repeat", "Text$repeat", "func(text:Text, count:Int)->Text"},
            {"replace", "Text$replace", "func(text:Text, pattern:Pattern, replacement:Text, backref=$/\\/, recursive=yes)->Text"},
            {"replace_all", "Text$replace_all", "func(text:Text, replacements:{Pattern:Text}, backref=$/\\/, recursive=yes)->Text"},
-            {"split", "Text$split", "func(text:Text, pattern=$Pattern'')->[Text]"},
            {"slice", "Text$slice", "func(text:Text, from=1, to=-1)->Text"},
+            {"split", "Text$split", "func(text:Text, pattern=$Pattern'')->[Text]"},
+            {"starts_with", "Text$starts_with", "func(text:Text, prefix:Text)->Bool"},
            {"title", "Text$title", "func(text:Text)->Text"},
            {"trim", "Text$trim", "func(text:Text, pattern=$/{whitespace}/, trim_left=yes, trim_right=yes)->Text"},
            {"upper", "Text$upper", "func(text:Text)->Text"},
--- a/test/text.tm
+++ b/test/text.tm
@ -275,6 +275,16 @@ func main():
 	>> "A=B=C=D":replace($/{..}={..}/, "1:(\1) 2:(\2)")
 	= "1:(A) 2:(B=C=D)"

+	>> "abcde":starts_with("ab")
+	= yes
+	>> "abcde":starts_with("bc")
+	= no
+
+	>> "abcde":ends_with("de")
+	= yes
+	>> "abcde":starts_with("cd")
+	= no
+
 	do:
 		!! Testing concatenation-stability:
 		>> ab := Text.from_codepoint_names(["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"])