diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2024-09-02 23:47:16 -0400 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2024-09-02 23:47:16 -0400 |
| commit | 5aa5a5e99b322586eed9997a14b3d616540bef07 (patch) | |
| tree | c8eede952aa9ff49cea8f981d48e0b5faad291b2 | |
| parent | 5d6fa135b1eadbceac04e5456fabb7e53feedc10 (diff) | |
Fix some stuff around Text:find() and text indexing
| -rw-r--r-- | builtins/functions.c | 2 | ||||
| -rw-r--r-- | builtins/text.c | 25 | ||||
| -rw-r--r-- | environment.c | 2 | ||||
| -rw-r--r-- | test/text.tm | 18 |
4 files changed, 36 insertions, 11 deletions
diff --git a/builtins/functions.c b/builtins/functions.c index 06636cba..4aa699a5 100644 --- a/builtins/functions.c +++ b/builtins/functions.c @@ -218,7 +218,7 @@ public void end_test(void *expr, const TypeInfo *type, const char *expected, con Text_t expr_plain = USE_COLOR ? generic_as_text(expr, false, type) : expr_text; bool success = Text$equal(&expr_plain, &expected_text); if (!success) { - Int_t colon = Text$find(expected_text, Text$from_str(":"), I_small(0), NULL); + Int_t colon = Text$find(expected_text, Text$from_str(":"), I_small(1), NULL); if (colon.small != I_small(0).small) { Text_t with_type = Text$concat(expr_plain, Text$from_str(" : "), type_name); success = Text$equal(&with_type, &expected_text); diff --git a/builtins/text.c b/builtins/text.c index 2f3fbb46..32eefd32 100644 --- a/builtins/text.c +++ b/builtins/text.c @@ -1291,16 +1291,23 @@ int64_t match(Text_t text, Text_t pattern, int64_t text_index, int64_t pattern_i public Int_t Text$find(Text_t text, Text_t pattern, Int_t from_index, int64_t *match_length) { - int32_t first = get_grapheme(pattern, 0); - bool find_first = (first != '[' - && !uc_is_property(first, UC_PROPERTY_QUOTATION_MARK) - && !uc_is_property(first, UC_PROPERTY_PAIRED_PUNCTUATION)); + int64_t first = Int_to_Int64(from_index, false); + if (first == 0) fail("Invalid index: 0"); + if (first < 0) first = text.length + first + 1; + if (first > text.length || first < 1) + return I_small(0); + + int32_t first_grapheme = get_grapheme(pattern, 0); + bool find_first = (first_grapheme != '[' + && !uc_is_property(first_grapheme, UC_PROPERTY_QUOTATION_MARK) + && !uc_is_property(first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION)); iteration_state_t text_state = {0, 0}; - for (int64_t i = Int_to_Int64(from_index, false)-1; i < text.length; i++) { + + for (int64_t i = first-1; i < text.length; i++) { // Optimization: quickly skip ahead to first char in pattern: if (find_first) { - while (i < text.length && _next_grapheme(text, &text_state, i) != first) + while (i < text.length && _next_grapheme(text, &text_state, i) != first_grapheme) ++i; } @@ -1416,7 +1423,7 @@ public array_t Text$find_all(Text_t text, Text_t pattern) if (I_is_zero(found)) break; Text_t match = Text$slice(text, found, Int$plus(found, Int64_to_Int(len-1))); Array$insert(&matches, &match, I_small(0), sizeof(Text_t)); - i = Int$plus(found, Int64_to_Int(len)); + i = Int$plus(found, Int64_to_Int(len <= 0 ? 1 : len)); } return matches; @@ -1437,7 +1444,7 @@ public Text_t Text$replace(Text_t text, Text_t pattern, Text_t replacement) } else { ret = concat2(ret, replacement); } - i = Int$plus(found, Int64_to_Int(len)); + i = Int$plus(found, Int64_to_Int(len <= 0 ? 1 : len)); } if (Int_to_Int64(i, false) <= text.length) { Text_t last_slice = Text$slice(text, i, Int64_to_Int(text.length)); @@ -1463,7 +1470,7 @@ public array_t Text$split(Text_t text, Text_t pattern) if (I_is_zero(found)) break; Text_t chunk = Text$slice(text, i, Int$minus(found, I_small(1))); Array$insert(&chunks, &chunk, I_small(0), sizeof(Text_t)); - i = Int$plus(found, Int64_to_Int(len)); + i = Int$plus(found, Int64_to_Int(len <= 0 ? 1 : len)); } Text_t last_chunk = Text$slice(text, i, Int64_to_Int(text.length)); diff --git a/environment.c b/environment.c index d4ed6c8d..ee277d2a 100644 --- a/environment.c +++ b/environment.c @@ -247,7 +247,7 @@ env_t *new_compilation_unit(CORD *libname) {"by", "Range$by", "func(range:Range, step:Int)->Range"}, )}, {"Text", TEXT_TYPE, "Text_t", "$Text", TypedArray(ns_entry_t, - {"find", "Text$find", "func(text:Text, pattern:Text)->Int"}, + {"find", "Text$find", "func(text:Text, pattern:Text, start=1, length=!&Int64)->Int"}, {"find_all", "Text$find_all", "func(text:Text, pattern:Text)->[Text]"}, {"as_c_string", "CORD_to_char_star", "func(text:Text)->CString"}, {"codepoint_names", "Text$codepoint_names", "func(text:Text)->[Text]"}, diff --git a/test/text.tm b/test/text.tm index 0dd5f2ec..39e8a6e1 100644 --- a/test/text.tm +++ b/test/text.tm @@ -129,6 +129,7 @@ func main(): >> "one$(\r\n)two$(\r\n)three$(\r\n)":lines() = ["one", "two", "three"] + //! Test splitting and joining text: >> "one two three":split(" ") = ["one", "two", "three"] @@ -156,6 +157,7 @@ func main(): >> "":split() = [] + //! Test text:find_all() >> " one two three ":find_all("[..alpha]") = ["one", "two", "three"] @@ -173,3 +175,19 @@ func main(): >> "Hello":find_all("") = [] + + //! Test text:find() + >> " one two three ":find("[..id]", start=-999) + = 0 + >> " one two three ":find("[..id]", start=999) + = 0 + >> " one two three ":find("[..id]") + = 2 + >> " one two three ":find("[..id]", start=5) + = 8 + + >> len := 0_i64 + >> " one ":find("[..id]", length=&len) + = 4 + >> len + = 3_i64 |
