aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2025-11-23 00:35:05 -0500
committerBruce Hill <bruce@bruce-hill.com>2025-11-23 00:35:05 -0500
commitcb9d3b1a2c2c59c368f6121a16a9ab928b0ff951 (patch)
treedf4c38c993ff78e2e4005058efb66ee1df6f3561
parenta453ebf215e5e3ec3b27fa5142af77d7e3ca0c92 (diff)
Added Text.find(text, target, start=1)
-rw-r--r--CHANGES.md1
-rw-r--r--api/api.md25
-rw-r--r--api/text.md25
-rw-r--r--api/text.yaml28
-rw-r--r--src/environment.c1
-rw-r--r--src/stdlib/text.c17
-rw-r--r--src/stdlib/text.h1
-rw-r--r--test/text.tm6
8 files changed, 102 insertions, 2 deletions
diff --git a/CHANGES.md b/CHANGES.md
index 396e4a84..293ff431 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -12,6 +12,7 @@
- Syntax for tables has changed to use colons (`{k: v}`) instead of equals
(`{k=v}`).
- Added `Path.lines()`
+- Added `Text.find(text, target, start=1)`
- Deprecated:
- Sets are no longer a separate type with separate methods.
- Instead of sets, use tables with a value type of `{KeyType:Empty}`.
diff --git a/api/api.md b/api/api.md
index 3af79d37..02ad054e 100644
--- a/api/api.md
+++ b/api/api.md
@@ -3905,6 +3905,31 @@ assert "hello world".ends_with("world", &remainder) == yes
assert remainder == "hello "
```
+## Text.find
+
+```tomo
+Text.find : func(text: Text, target: Text, start: Int = 1 -> Int)
+```
+
+Find a substring within a text and return its index, if found.
+
+Argument | Type | Description | Default
+---------|------|-------------|---------
+text | `Text` | The text to be searched. | -
+target | `Text` | The target text to find. | -
+start | `Int` | The index at which to begin searching. | `1`
+
+**Return:** The index where the first occurrence of `target` appears, or `none` if it is not found.
+
+
+**Example:**
+```tomo
+assert "one two".find("one") == 1
+assert "one two".find("two") == 5
+assert "one two".find("three") == none
+assert "one two".find("o", start=2) == 7
+
+```
## Text.from
```tomo
diff --git a/api/text.md b/api/text.md
index 9bd99529..928cb6ec 100644
--- a/api/text.md
+++ b/api/text.md
@@ -205,6 +205,31 @@ assert "hello world".ends_with("world", &remainder) == yes
assert remainder == "hello "
```
+## Text.find
+
+```tomo
+Text.find : func(text: Text, target: Text, start: Int = 1 -> Int)
+```
+
+Find a substring within a text and return its index, if found.
+
+Argument | Type | Description | Default
+---------|------|-------------|---------
+text | `Text` | The text to be searched. | -
+target | `Text` | The target text to find. | -
+start | `Int` | The index at which to begin searching. | `1`
+
+**Return:** The index where the first occurrence of `target` appears, or `none` if it is not found.
+
+
+**Example:**
+```tomo
+assert "one two".find("one") == 1
+assert "one two".find("two") == 5
+assert "one two".find("three") == none
+assert "one two".find("o", start=2) == 7
+
+```
## Text.from
```tomo
diff --git a/api/text.yaml b/api/text.yaml
index 2c21fa30..6874bfc8 100644
--- a/api/text.yaml
+++ b/api/text.yaml
@@ -225,6 +225,34 @@ Text.ends_with:
assert "hello world".ends_with("world", &remainder) == yes
assert remainder == "hello "
+Text.find:
+ short: find a substring
+ description: >
+ Find a substring within a text and return its index, if found.
+ return:
+ type: 'Int'
+ description: >
+ The index where the first occurrence of `target` appears, or `none` if it is not found.
+ args:
+ text:
+ type: 'Text'
+ description: >
+ The text to be searched.
+ target:
+ type: 'Text'
+ description: >
+ The target text to find.
+ start:
+ type: 'Int'
+ default: '1'
+ description: >
+ The index at which to begin searching.
+ example: |
+ assert "one two".find("one") == 1
+ assert "one two".find("two") == 5
+ assert "one two".find("three") == none
+ assert "one two".find("o", start=2) == 7
+
Text.from:
short: slice from a starting index
description: >
diff --git a/src/environment.c b/src/environment.c
index 8f49f86e..3a2995f7 100644
--- a/src/environment.c
+++ b/src/environment.c
@@ -353,6 +353,7 @@ env_t *global_env(bool source_mapping) {
{"caseless_equals", "Text$equal_ignoring_case", "func(a,b:Text, language='C' -> Bool)"}, //
{"codepoint_names", "Text$codepoint_names", "func(text:Text -> [Text])"}, //
{"ends_with", "Text$ends_with", "func(text,suffix:Text, remainder:&Text? = none -> Bool)"}, //
+ {"find", "Text$find", "func(text,target:Text, start=1 -> Int?)"}, //
{"from", "Text$from", "func(text:Text, first:Int -> Text)"}, //
{"from_c_string", "Text$from_str", "func(str:CString -> Text?)"}, //
{"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text] -> Text?)"}, //
diff --git a/src/stdlib/text.c b/src/stdlib/text.c
index febcafce..e51af49c 100644
--- a/src/stdlib/text.c
+++ b/src/stdlib/text.c
@@ -1057,8 +1057,8 @@ PUREFUNC public int32_t Text$compare(const void *va, const void *vb, const TypeI
bool _matches(TextIter_t *text_state, TextIter_t *target_state, int64_t pos) {
for (int64_t i = 0; i < (int64_t)target_state->stack[0].text.length; i++) {
int32_t text_i = Text$get_grapheme_fast(text_state, pos + i);
- int32_t prefix_i = Text$get_grapheme_fast(target_state, i);
- if (text_i != prefix_i) return false;
+ int32_t target_i = Text$get_grapheme_fast(target_state, i);
+ if (text_i != target_i) return false;
}
return true;
}
@@ -1107,6 +1107,19 @@ static bool _has_grapheme(TextIter_t *text, int32_t g) {
}
public
+OptionalInt_t Text$find(Text_t text, Text_t target, Int_t start) {
+ if (text.length < target.length) return NONE_INT;
+ if (target.length <= 0) return I(1);
+ TextIter_t text_state = NEW_TEXT_ITER_STATE(text), target_state = NEW_TEXT_ITER_STATE(target);
+ for (int64_t i = Int64$from_int(start, false) - 1; i < text.length - target.length + 1; i++) {
+ if (_matches(&text_state, &target_state, i)) {
+ return Int$from_int64(i + 1);
+ }
+ }
+ return NONE_INT;
+}
+
+public
Text_t Text$trim(Text_t text, Text_t to_trim, bool left, bool right) {
int64_t first = 0;
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), trim_state = NEW_TEXT_ITER_STATE(to_trim);
diff --git a/src/stdlib/text.h b/src/stdlib/text.h
index 12b4bc20..fba8b08f 100644
--- a/src/stdlib/text.h
+++ b/src/stdlib/text.h
@@ -84,6 +84,7 @@ PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix, Text_t *remainder);
PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix, Text_t *remainder);
Text_t Text$without_prefix(Text_t text, Text_t prefix);
Text_t Text$without_suffix(Text_t text, Text_t suffix);
+OptionalInt_t Text$find(Text_t text, Text_t target, Int_t start);
Text_t Text$replace(Text_t text, Text_t target, Text_t replacement);
Text_t Text$translate(Text_t text, Table_t translations);
PUREFUNC bool Text$has(Text_t text, Text_t target);
diff --git a/test/text.tm b/test/text.tm
index 094da8f8..6c23042d 100644
--- a/test/text.tm
+++ b/test/text.tm
@@ -202,3 +202,9 @@ func main()
assert Text.from_utf32([150370]) == test
assert Text.from_utf16([-10158, -8350]) == test
assert Text.from_utf8([0xf0, 0xa4, 0xad, 0xa2]) == test
+
+
+ assert "one two".find("one") == 1
+ assert "one two".find("two") == 5
+ assert "one two".find("three") == none
+ assert "one two".find("o", start=2) == 7