aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2024-09-09 05:31:25 -0400
committerBruce Hill <bruce@bruce-hill.com>2024-09-09 05:31:25 -0400
commite16792d5eb76326634cb1ad0b21301ec693fccb3 (patch)
tree99e5b5c53db39e173db2b82db87cef85cd267c60
parent319d4306b585569d198a6e0ac268c009eb63705a (diff)
Add Text.starts_with() and Text.ends_with()
-rw-r--r--builtins/text.c44
-rw-r--r--builtins/text.h2
-rw-r--r--docs/text.md66
-rw-r--r--environment.c8
-rw-r--r--test/text.tm10
5 files changed, 116 insertions, 14 deletions
diff --git a/builtins/text.c b/builtins/text.c
index 6fedd82e..be00667c 100644
--- a/builtins/text.c
+++ b/builtins/text.c
@@ -905,6 +905,32 @@ PUREFUNC public int32_t Text$compare(const Text_t *a, const Text_t *b)
return 0;
}
+PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix)
+{
+ if (text.length < prefix.length)
+ return false;
+ text_iter_t text_state = {0, 0}, prefix_state = {0, 0};
+ for (int64_t i = 0; i < prefix.length; i++) {
+ int32_t text_i = _get_grapheme(text, &text_state, i);
+ int32_t prefix_i = _get_grapheme(prefix, &prefix_state, i);
+ if (text_i != prefix_i) return false;
+ }
+ return true;
+}
+
+PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix)
+{
+ if (text.length < suffix.length)
+ return false;
+ text_iter_t text_state = {0, 0}, prefix_state = {0, 0};
+ for (int64_t i = 0; i < suffix.length; i++) {
+ int32_t text_i = _get_grapheme(text, &text_state, text.length - suffix.length + i);
+ int32_t suffix_i = _get_grapheme(suffix, &prefix_state, i);
+ if (text_i != suffix_i) return false;
+ }
+ return true;
+}
+
PUREFUNC public bool Text$equal_values(Text_t a, Text_t b)
{
if (a.length != b.length || (a.hash != 0 && b.hash != 0 && a.hash != b.hash))
@@ -1756,8 +1782,20 @@ public Int_t Text$find(Text_t text, Pattern_t pattern, Int_t from_index, int64_t
PUREFUNC public bool Text$has(Text_t text, Pattern_t pattern)
{
- int64_t found = _find(text, pattern, 0, text.length-1, NULL);
- return (found >= 0);
+ if (Text$starts_with(pattern, Text("{start}"))) {
+ int64_t m = match(text, 0, pattern, 0, NULL, 0);
+ return m >= 0;
+ } else if (Text$ends_with(text, Text("{end}"))) {
+ for (int64_t i = text.length-1; i >= 0; i--) {
+ int64_t match_len = match(text, i, pattern, 0, NULL, 0);
+ if (match_len >= 0 && i + match_len == text.length)
+ return true;
+ }
+ return false;
+ } else {
+ int64_t found = _find(text, pattern, 0, text.length-1, NULL);
+ return (found >= 0);
+ }
}
PUREFUNC public bool Text$matches(Text_t text, Pattern_t pattern)
@@ -2008,8 +2046,6 @@ public Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool tri
int64_t match_len = match(text, i, pattern, 0, NULL, 0);
if (match_len > 0 && i + match_len == text.length)
last = i-1;
- // else
- // break;
}
}
return Text$slice(text, I(first+1), I(last+1));
diff --git a/builtins/text.h b/builtins/text.h
index 7f4861a3..2c19f2f2 100644
--- a/builtins/text.h
+++ b/builtins/text.h
@@ -41,6 +41,8 @@ Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right
Int_t Text$find(Text_t text, Pattern_t pattern, Int_t i, int64_t *match_length);
Array_t Text$find_all(Text_t text, Pattern_t pattern);
PUREFUNC bool Text$has(Text_t text, Pattern_t pattern);
+PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix);
+PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix);
PUREFUNC bool Text$matches(Text_t text, Pattern_t pattern);
char *Text$as_c_string(Text_t text);
__attribute__((format(printf, 1, 2)))
diff --git a/docs/text.md b/docs/text.md
index 02c30912..8a9f641e 100644
--- a/docs/text.md
+++ b/docs/text.md
@@ -494,6 +494,32 @@ An array of 32-bit integer Unicode code points (`[Int32]`).
---
+## `ends_with`
+
+**Description:**
+Checks if the `Text` ends with a literal suffix text.
+
+**Usage:**
+```tomo
+ends_with(text: Text, suffix: Text) -> Bool
+```
+
+**Parameters:**
+
+- `text`: The text to be searched.
+- `suffix`: The literal suffix text to check for.
+
+**Returns:**
+`yes` if the text has the target, `no` otherwise.
+
+**Example:**
+```tomo
+>> "hello world":ends_with("world")
+= yes
+```
+
+---
+
## `from_c_string`
**Description:**
@@ -614,7 +640,7 @@ See: [Patterns](#Patterns) for more information on patterns.
**Usage:**
```tomo
-find(text: Text, pattern: Text, start: Int = 1, length: &Int64? = !&Int64) -> Int
+find(text: Text, pattern: Pattern, start: Int = 1, length: &Int64? = !&Int64) -> Int
```
**Parameters:**
@@ -657,7 +683,7 @@ See: [Patterns](#Patterns) for more information on patterns.
**Usage:**
```tomo
-find_all(text: Text, pattern: Text) -> [Text]
+find_all(text: Text, pattern: Pattern) -> [Text]
```
**Parameters:**
@@ -699,7 +725,7 @@ Checks if the `Text` contains a target pattern (see: [Patterns](#Patterns)).
**Usage:**
```tomo
-has(text: Text, pattern: Text) -> Bool
+has(text: Text, pattern: Pattern) -> Bool
```
**Parameters:**
@@ -816,7 +842,7 @@ Checks if the `Text` matches target pattern (see: [Patterns](#Patterns)).
**Usage:**
```tomo
-matches(text: Text, pattern: Text) -> Bool
+matches(text: Text, pattern: Pattern) -> Bool
```
**Parameters:**
@@ -847,7 +873,7 @@ calling the given function on that text.
**Usage:**
```tomo
-map(text: Text, pattern: Text, fn: func(text:Text)->Text) -> Text
+map(text: Text, pattern: Pattern, fn: func(text:Text)->Text) -> Text
```
**Parameters:**
@@ -931,7 +957,7 @@ See [Patterns](#patterns) for more information about patterns.
**Usage:**
```tomo
-replace(text: Text, pattern: Text, replacement: Text, backref: Pattern = $/\/, recursive: Bool = yes) -> Text
+replace(text: Text, pattern: Pattern, replacement: Text, backref: Pattern = $/\/, recursive: Bool = yes) -> Text
```
**Parameters:**
@@ -1045,7 +1071,7 @@ See [Patterns](#patterns) for more information about patterns.
**Usage:**
```tomo
-split(text: Text, pattern: Text = "") -> [Text]
+split(text: Text, pattern: Pattern = "") -> [Text]
```
**Parameters:**
@@ -1074,6 +1100,32 @@ An array of substrings resulting from the split.
---
+## `starts_with`
+
+**Description:**
+Checks if the `Text` starts with a literal prefix text.
+
+**Usage:**
+```tomo
+starts_with(text: Text, prefix: Text) -> Bool
+```
+
+**Parameters:**
+
+- `text`: The text to be searched.
+- `prefix`: The literal prefix text to check for.
+
+**Returns:**
+`yes` if the text has the given prefix, `no` otherwise.
+
+**Example:**
+```tomo
+>> "hello world":starts_with("hello")
+= yes
+```
+
+---
+
## `title`
**Description:**
diff --git a/environment.c b/environment.c
index d51d9aa1..b17d12a0 100644
--- a/environment.c
+++ b/environment.c
@@ -275,10 +275,11 @@ env_t *new_compilation_unit(CORD *libname)
{"run", "Shell$run", "func(command:Shell, status=!&Int32?)->Text"},
)},
{"Text", TEXT_TYPE, "Text_t", "Text$info", TypedArray(ns_entry_t,
- {"find", "Text$find", "func(text:Text, pattern:Pattern, start=1, length=!&Int64)->Int"},
- {"find_all", "Text$find_all", "func(text:Text, pattern:Pattern)->[Text]"},
{"as_c_string", "Text$as_c_string", "func(text:Text)->CString"},
{"codepoint_names", "Text$codepoint_names", "func(text:Text)->[Text]"},
+ {"ends_with", "Text$ends_with", "func(text:Text, suffix:Text)->Bool"},
+ {"find", "Text$find", "func(text:Text, pattern:Pattern, start=1, length=!&Int64)->Int"},
+ {"find_all", "Text$find_all", "func(text:Text, pattern:Pattern)->[Text]"},
{"from_bytes", "Text$from_bytes", "func(bytes:[Int8])->Text"},
{"from_c_string", "Text$from_str", "func(str:CString)->Text"},
{"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text])->Text"},
@@ -293,8 +294,9 @@ env_t *new_compilation_unit(CORD *libname)
{"repeat", "Text$repeat", "func(text:Text, count:Int)->Text"},
{"replace", "Text$replace", "func(text:Text, pattern:Pattern, replacement:Text, backref=$/\\/, recursive=yes)->Text"},
{"replace_all", "Text$replace_all", "func(text:Text, replacements:{Pattern:Text}, backref=$/\\/, recursive=yes)->Text"},
- {"split", "Text$split", "func(text:Text, pattern=$Pattern'')->[Text]"},
{"slice", "Text$slice", "func(text:Text, from=1, to=-1)->Text"},
+ {"split", "Text$split", "func(text:Text, pattern=$Pattern'')->[Text]"},
+ {"starts_with", "Text$starts_with", "func(text:Text, prefix:Text)->Bool"},
{"title", "Text$title", "func(text:Text)->Text"},
{"trim", "Text$trim", "func(text:Text, pattern=$/{whitespace}/, trim_left=yes, trim_right=yes)->Text"},
{"upper", "Text$upper", "func(text:Text)->Text"},
diff --git a/test/text.tm b/test/text.tm
index bf7b77cd..216e5aaf 100644
--- a/test/text.tm
+++ b/test/text.tm
@@ -275,6 +275,16 @@ func main():
>> "A=B=C=D":replace($/{..}={..}/, "1:(\1) 2:(\2)")
= "1:(A) 2:(B=C=D)"
+ >> "abcde":starts_with("ab")
+ = yes
+ >> "abcde":starts_with("bc")
+ = no
+
+ >> "abcde":ends_with("de")
+ = yes
+ >> "abcde":starts_with("cd")
+ = no
+
do:
!! Testing concatenation-stability:
>> ab := Text.from_codepoint_names(["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"])