aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2024-09-02 23:47:16 -0400
committerBruce Hill <bruce@bruce-hill.com>2024-09-02 23:47:16 -0400
commit5aa5a5e99b322586eed9997a14b3d616540bef07 (patch)
treec8eede952aa9ff49cea8f981d48e0b5faad291b2
parent5d6fa135b1eadbceac04e5456fabb7e53feedc10 (diff)
Fix some stuff around Text:find() and text indexing
-rw-r--r--builtins/functions.c2
-rw-r--r--builtins/text.c25
-rw-r--r--environment.c2
-rw-r--r--test/text.tm18
4 files changed, 36 insertions, 11 deletions
diff --git a/builtins/functions.c b/builtins/functions.c
index 06636cba..4aa699a5 100644
--- a/builtins/functions.c
+++ b/builtins/functions.c
@@ -218,7 +218,7 @@ public void end_test(void *expr, const TypeInfo *type, const char *expected, con
Text_t expr_plain = USE_COLOR ? generic_as_text(expr, false, type) : expr_text;
bool success = Text$equal(&expr_plain, &expected_text);
if (!success) {
- Int_t colon = Text$find(expected_text, Text$from_str(":"), I_small(0), NULL);
+ Int_t colon = Text$find(expected_text, Text$from_str(":"), I_small(1), NULL);
if (colon.small != I_small(0).small) {
Text_t with_type = Text$concat(expr_plain, Text$from_str(" : "), type_name);
success = Text$equal(&with_type, &expected_text);
diff --git a/builtins/text.c b/builtins/text.c
index 2f3fbb46..32eefd32 100644
--- a/builtins/text.c
+++ b/builtins/text.c
@@ -1291,16 +1291,23 @@ int64_t match(Text_t text, Text_t pattern, int64_t text_index, int64_t pattern_i
public Int_t Text$find(Text_t text, Text_t pattern, Int_t from_index, int64_t *match_length)
{
- int32_t first = get_grapheme(pattern, 0);
- bool find_first = (first != '['
- && !uc_is_property(first, UC_PROPERTY_QUOTATION_MARK)
- && !uc_is_property(first, UC_PROPERTY_PAIRED_PUNCTUATION));
+ int64_t first = Int_to_Int64(from_index, false);
+ if (first == 0) fail("Invalid index: 0");
+ if (first < 0) first = text.length + first + 1;
+ if (first > text.length || first < 1)
+ return I_small(0);
+
+ int32_t first_grapheme = get_grapheme(pattern, 0);
+ bool find_first = (first_grapheme != '['
+ && !uc_is_property(first_grapheme, UC_PROPERTY_QUOTATION_MARK)
+ && !uc_is_property(first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
iteration_state_t text_state = {0, 0};
- for (int64_t i = Int_to_Int64(from_index, false)-1; i < text.length; i++) {
+
+ for (int64_t i = first-1; i < text.length; i++) {
// Optimization: quickly skip ahead to first char in pattern:
if (find_first) {
- while (i < text.length && _next_grapheme(text, &text_state, i) != first)
+ while (i < text.length && _next_grapheme(text, &text_state, i) != first_grapheme)
++i;
}
@@ -1416,7 +1423,7 @@ public array_t Text$find_all(Text_t text, Text_t pattern)
if (I_is_zero(found)) break;
Text_t match = Text$slice(text, found, Int$plus(found, Int64_to_Int(len-1)));
Array$insert(&matches, &match, I_small(0), sizeof(Text_t));
- i = Int$plus(found, Int64_to_Int(len));
+ i = Int$plus(found, Int64_to_Int(len <= 0 ? 1 : len));
}
return matches;
@@ -1437,7 +1444,7 @@ public Text_t Text$replace(Text_t text, Text_t pattern, Text_t replacement)
} else {
ret = concat2(ret, replacement);
}
- i = Int$plus(found, Int64_to_Int(len));
+ i = Int$plus(found, Int64_to_Int(len <= 0 ? 1 : len));
}
if (Int_to_Int64(i, false) <= text.length) {
Text_t last_slice = Text$slice(text, i, Int64_to_Int(text.length));
@@ -1463,7 +1470,7 @@ public array_t Text$split(Text_t text, Text_t pattern)
if (I_is_zero(found)) break;
Text_t chunk = Text$slice(text, i, Int$minus(found, I_small(1)));
Array$insert(&chunks, &chunk, I_small(0), sizeof(Text_t));
- i = Int$plus(found, Int64_to_Int(len));
+ i = Int$plus(found, Int64_to_Int(len <= 0 ? 1 : len));
}
Text_t last_chunk = Text$slice(text, i, Int64_to_Int(text.length));
diff --git a/environment.c b/environment.c
index d4ed6c8d..ee277d2a 100644
--- a/environment.c
+++ b/environment.c
@@ -247,7 +247,7 @@ env_t *new_compilation_unit(CORD *libname)
{"by", "Range$by", "func(range:Range, step:Int)->Range"},
)},
{"Text", TEXT_TYPE, "Text_t", "$Text", TypedArray(ns_entry_t,
- {"find", "Text$find", "func(text:Text, pattern:Text)->Int"},
+ {"find", "Text$find", "func(text:Text, pattern:Text, start=1, length=!&Int64)->Int"},
{"find_all", "Text$find_all", "func(text:Text, pattern:Text)->[Text]"},
{"as_c_string", "CORD_to_char_star", "func(text:Text)->CString"},
{"codepoint_names", "Text$codepoint_names", "func(text:Text)->[Text]"},
diff --git a/test/text.tm b/test/text.tm
index 0dd5f2ec..39e8a6e1 100644
--- a/test/text.tm
+++ b/test/text.tm
@@ -129,6 +129,7 @@ func main():
>> "one$(\r\n)two$(\r\n)three$(\r\n)":lines()
= ["one", "two", "three"]
+ //! Test splitting and joining text:
>> "one two three":split(" ")
= ["one", "two", "three"]
@@ -156,6 +157,7 @@ func main():
>> "":split()
= []
+ //! Test text:find_all()
>> " one two three ":find_all("[..alpha]")
= ["one", "two", "three"]
@@ -173,3 +175,19 @@ func main():
>> "Hello":find_all("")
= []
+
+ //! Test text:find()
+ >> " one two three ":find("[..id]", start=-999)
+ = 0
+ >> " one two three ":find("[..id]", start=999)
+ = 0
+ >> " one two three ":find("[..id]")
+ = 2
+ >> " one two three ":find("[..id]", start=5)
+ = 8
+
+ >> len := 0_i64
+ >> " one ":find("[..id]", length=&len)
+ = 4
+ >> len
+ = 3_i64