diff options
| -rw-r--r-- | CHANGES.md | 1 | ||||
| -rw-r--r-- | api/api.md | 25 | ||||
| -rw-r--r-- | api/text.md | 25 | ||||
| -rw-r--r-- | api/text.yaml | 28 | ||||
| -rw-r--r-- | src/environment.c | 1 | ||||
| -rw-r--r-- | src/stdlib/text.c | 17 | ||||
| -rw-r--r-- | src/stdlib/text.h | 1 | ||||
| -rw-r--r-- | test/text.tm | 6 |
8 files changed, 102 insertions, 2 deletions
@@ -12,6 +12,7 @@ - Syntax for tables has changed to use colons (`{k: v}`) instead of equals (`{k=v}`). - Added `Path.lines()` +- Added `Text.find(text, target, start=1)` - Deprecated: - Sets are no longer a separate type with separate methods. - Instead of sets, use tables with a value type of `{KeyType:Empty}`. @@ -3905,6 +3905,31 @@ assert "hello world".ends_with("world", &remainder) == yes assert remainder == "hello " ``` +## Text.find + +```tomo +Text.find : func(text: Text, target: Text, start: Int = 1 -> Int) +``` + +Find a substring within a text and return its index, if found. + +Argument | Type | Description | Default +---------|------|-------------|--------- +text | `Text` | The text to be searched. | - +target | `Text` | The target text to find. | - +start | `Int` | The index at which to begin searching. | `1` + +**Return:** The index where the first occurrence of `target` appears, or `none` if it is not found. + + +**Example:** +```tomo +assert "one two".find("one") == 1 +assert "one two".find("two") == 5 +assert "one two".find("three") == none +assert "one two".find("o", start=2) == 7 + +``` ## Text.from ```tomo diff --git a/api/text.md b/api/text.md index 9bd99529..928cb6ec 100644 --- a/api/text.md +++ b/api/text.md @@ -205,6 +205,31 @@ assert "hello world".ends_with("world", &remainder) == yes assert remainder == "hello " ``` +## Text.find + +```tomo +Text.find : func(text: Text, target: Text, start: Int = 1 -> Int) +``` + +Find a substring within a text and return its index, if found. + +Argument | Type | Description | Default +---------|------|-------------|--------- +text | `Text` | The text to be searched. | - +target | `Text` | The target text to find. | - +start | `Int` | The index at which to begin searching. | `1` + +**Return:** The index where the first occurrence of `target` appears, or `none` if it is not found. + + +**Example:** +```tomo +assert "one two".find("one") == 1 +assert "one two".find("two") == 5 +assert "one two".find("three") == none +assert "one two".find("o", start=2) == 7 + +``` ## Text.from ```tomo diff --git a/api/text.yaml b/api/text.yaml index 2c21fa30..6874bfc8 100644 --- a/api/text.yaml +++ b/api/text.yaml @@ -225,6 +225,34 @@ Text.ends_with: assert "hello world".ends_with("world", &remainder) == yes assert remainder == "hello " +Text.find: + short: find a substring + description: > + Find a substring within a text and return its index, if found. + return: + type: 'Int' + description: > + The index where the first occurrence of `target` appears, or `none` if it is not found. + args: + text: + type: 'Text' + description: > + The text to be searched. + target: + type: 'Text' + description: > + The target text to find. + start: + type: 'Int' + default: '1' + description: > + The index at which to begin searching. + example: | + assert "one two".find("one") == 1 + assert "one two".find("two") == 5 + assert "one two".find("three") == none + assert "one two".find("o", start=2) == 7 + Text.from: short: slice from a starting index description: > diff --git a/src/environment.c b/src/environment.c index 8f49f86e..3a2995f7 100644 --- a/src/environment.c +++ b/src/environment.c @@ -353,6 +353,7 @@ env_t *global_env(bool source_mapping) { {"caseless_equals", "Text$equal_ignoring_case", "func(a,b:Text, language='C' -> Bool)"}, // {"codepoint_names", "Text$codepoint_names", "func(text:Text -> [Text])"}, // {"ends_with", "Text$ends_with", "func(text,suffix:Text, remainder:&Text? = none -> Bool)"}, // + {"find", "Text$find", "func(text,target:Text, start=1 -> Int?)"}, // {"from", "Text$from", "func(text:Text, first:Int -> Text)"}, // {"from_c_string", "Text$from_str", "func(str:CString -> Text?)"}, // {"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text] -> Text?)"}, // diff --git a/src/stdlib/text.c b/src/stdlib/text.c index febcafce..e51af49c 100644 --- a/src/stdlib/text.c +++ b/src/stdlib/text.c @@ -1057,8 +1057,8 @@ PUREFUNC public int32_t Text$compare(const void *va, const void *vb, const TypeI bool _matches(TextIter_t *text_state, TextIter_t *target_state, int64_t pos) { for (int64_t i = 0; i < (int64_t)target_state->stack[0].text.length; i++) { int32_t text_i = Text$get_grapheme_fast(text_state, pos + i); - int32_t prefix_i = Text$get_grapheme_fast(target_state, i); - if (text_i != prefix_i) return false; + int32_t target_i = Text$get_grapheme_fast(target_state, i); + if (text_i != target_i) return false; } return true; } @@ -1107,6 +1107,19 @@ static bool _has_grapheme(TextIter_t *text, int32_t g) { } public +OptionalInt_t Text$find(Text_t text, Text_t target, Int_t start) { + if (text.length < target.length) return NONE_INT; + if (target.length <= 0) return I(1); + TextIter_t text_state = NEW_TEXT_ITER_STATE(text), target_state = NEW_TEXT_ITER_STATE(target); + for (int64_t i = Int64$from_int(start, false) - 1; i < text.length - target.length + 1; i++) { + if (_matches(&text_state, &target_state, i)) { + return Int$from_int64(i + 1); + } + } + return NONE_INT; +} + +public Text_t Text$trim(Text_t text, Text_t to_trim, bool left, bool right) { int64_t first = 0; TextIter_t text_state = NEW_TEXT_ITER_STATE(text), trim_state = NEW_TEXT_ITER_STATE(to_trim); diff --git a/src/stdlib/text.h b/src/stdlib/text.h index 12b4bc20..fba8b08f 100644 --- a/src/stdlib/text.h +++ b/src/stdlib/text.h @@ -84,6 +84,7 @@ PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix, Text_t *remainder); PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix, Text_t *remainder); Text_t Text$without_prefix(Text_t text, Text_t prefix); Text_t Text$without_suffix(Text_t text, Text_t suffix); +OptionalInt_t Text$find(Text_t text, Text_t target, Int_t start); Text_t Text$replace(Text_t text, Text_t target, Text_t replacement); Text_t Text$translate(Text_t text, Table_t translations); PUREFUNC bool Text$has(Text_t text, Text_t target); diff --git a/test/text.tm b/test/text.tm index 094da8f8..6c23042d 100644 --- a/test/text.tm +++ b/test/text.tm @@ -202,3 +202,9 @@ func main() assert Text.from_utf32([150370]) == test assert Text.from_utf16([-10158, -8350]) == test assert Text.from_utf8([0xf0, 0xa4, 0xad, 0xa2]) == test + + + assert "one two".find("one") == 1 + assert "one two".find("two") == 5 + assert "one two".find("three") == none + assert "one two".find("o", start=2) == 7 |
