From af1bd79fd91d1a1efde3cf084643f065c61d330a Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Sat, 15 Nov 2025 14:10:28 -0500 Subject: Make EMPTY_TEXT into a macro --- src/stdlib/text.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'src/stdlib/text.c') diff --git a/src/stdlib/text.c b/src/stdlib/text.c index d40e1306..f323d88d 100644 --- a/src/stdlib/text.c +++ b/src/stdlib/text.c @@ -146,13 +146,6 @@ static int32_t num_synthetic_graphemes = 0; static Text_t simple_concatenation(Text_t a, Text_t b); -public -Text_t EMPTY_TEXT = { - .length = 0, - .tag = TEXT_ASCII, - .ascii = 0, -}; - PUREFUNC static bool graphemes_equal(const void *va, const void *vb, const TypeInfo_t *info) { (void)info; ucs4_t *a = *(ucs4_t **)va; -- cgit v1.2.3 From a1884f7a85cbee5a67cf48c9e7b088fdea8b8b38 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Sat, 15 Nov 2025 18:12:57 -0500 Subject: Fix for potential issue with codepoint names --- src/stdlib/text.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/stdlib/text.c') diff --git a/src/stdlib/text.c b/src/stdlib/text.c index f323d88d..8e800c8a 100644 --- a/src/stdlib/text.c +++ b/src/stdlib/text.c @@ -1605,6 +1605,7 @@ static INLINE const char *codepoint_name(ucs4_t c) { char *found_name = unicode_character_name(c, name); if (found_name) return found_name; const uc_block_t *block = uc_block(c); + if (!block) return "???"; assert(block); return String(block->name, "-", hex(c, .no_prefix = true, .uppercase = true)); } -- cgit v1.2.3 From e0706bc707ea6a8be86cee9fde21971cde3d7a42 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Sat, 22 Nov 2025 16:34:41 -0500 Subject: Bugfix for infinite loop in text.replace("", ...) with empty string --- src/stdlib/text.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/stdlib/text.c') diff --git a/src/stdlib/text.c b/src/stdlib/text.c index 8e800c8a..febcafce 100644 --- a/src/stdlib/text.c +++ b/src/stdlib/text.c @@ -1135,6 +1135,7 @@ Text_t Text$translate(Text_t text, Table_t translations) { struct { Text_t target, replacement; } *entry = replacement_list.data + r * replacement_list.stride; + if (entry->target.length <= 0) continue; TextIter_t target_state = NEW_TEXT_ITER_STATE(entry->target); if (_matches(&text_state, &target_state, i)) { if (i > span_start) result = concat2(result, Text$slice(text, I(span_start + 1), I(i))); @@ -1156,6 +1157,7 @@ Text_t Text$translate(Text_t text, Table_t translations) { public Text_t Text$replace(Text_t text, Text_t target, Text_t replacement) { + if (target.length <= 0) return text; TextIter_t text_state = NEW_TEXT_ITER_STATE(text), target_state = NEW_TEXT_ITER_STATE(target); Text_t result = EMPTY_TEXT; int64_t span_start = 0; -- cgit v1.2.3 From cb9d3b1a2c2c59c368f6121a16a9ab928b0ff951 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Sun, 23 Nov 2025 00:35:05 -0500 Subject: Added Text.find(text, target, start=1) --- src/stdlib/text.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'src/stdlib/text.c') diff --git a/src/stdlib/text.c b/src/stdlib/text.c index febcafce..e51af49c 100644 --- a/src/stdlib/text.c +++ b/src/stdlib/text.c @@ -1057,8 +1057,8 @@ PUREFUNC public int32_t Text$compare(const void *va, const void *vb, const TypeI bool _matches(TextIter_t *text_state, TextIter_t *target_state, int64_t pos) { for (int64_t i = 0; i < (int64_t)target_state->stack[0].text.length; i++) { int32_t text_i = Text$get_grapheme_fast(text_state, pos + i); - int32_t prefix_i = Text$get_grapheme_fast(target_state, i); - if (text_i != prefix_i) return false; + int32_t target_i = Text$get_grapheme_fast(target_state, i); + if (text_i != target_i) return false; } return true; } @@ -1106,6 +1106,19 @@ static bool _has_grapheme(TextIter_t *text, int32_t g) { return false; } +public +OptionalInt_t Text$find(Text_t text, Text_t target, Int_t start) { + if (text.length < target.length) return NONE_INT; + if (target.length <= 0) return I(1); + TextIter_t text_state = NEW_TEXT_ITER_STATE(text), target_state = NEW_TEXT_ITER_STATE(target); + for (int64_t i = Int64$from_int(start, false) - 1; i < text.length - target.length + 1; i++) { + if (_matches(&text_state, &target_state, i)) { + return Int$from_int64(i + 1); + } + } + return NONE_INT; +} + public Text_t Text$trim(Text_t text, Text_t to_trim, bool left, bool right) { int64_t first = 0; -- cgit v1.2.3 From d302aaec38b9d295d39c4d87b53ee610bc9e0e07 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Sun, 30 Nov 2025 13:08:57 -0500 Subject: Handle some text method edge cases with empty text better. --- src/stdlib/text.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/stdlib/text.c') diff --git a/src/stdlib/text.c b/src/stdlib/text.c index e51af49c..b4b27fed 100644 --- a/src/stdlib/text.c +++ b/src/stdlib/text.c @@ -763,8 +763,10 @@ static Text_t Text$from_components(List_t graphemes, Table_t unique_clusters) { public OptionalText_t Text$from_strn(const char *str, size_t len) { int64_t ascii_span = 0; - for (size_t i = 0; i < len && isascii(str[i]); i++) + for (size_t i = 0; i < len && isascii(str[i]); i++) { ascii_span++; + if (str[i] == 0) return NONE_TEXT; + } if (ascii_span == (int64_t)len) { // All ASCII char *copy = GC_MALLOC_ATOMIC(len); @@ -786,12 +788,15 @@ OptionalText_t Text$from_strn(const char *str, size_t len) { uint32_t buf[256]; size_t u32_len = sizeof(buf) / sizeof(buf[0]); uint32_t *u32s = u8_to_u32(pos, (size_t)(next - pos), buf, &u32_len); + if (u32s == NULL) return NONE_TEXT; uint32_t buf2[256]; size_t u32_normlen = sizeof(buf2) / sizeof(buf2[0]); uint32_t *u32s_normalized = u32_normalize(UNINORM_NFC, u32s, u32_len, buf2, &u32_normlen); + if (u32s_normalized == NULL) return NONE_TEXT; int32_t g = get_synthetic_grapheme(u32s_normalized, (int64_t)u32_normlen); + if (g == 0) return NONE_TEXT; List$insert(&graphemes, &g, I(0), sizeof(int32_t)); Table$get_or_setdefault(&unique_clusters, int32_t, uint8_t, g, (uint8_t)unique_clusters.entries.length, Table$info(&Int32$info, &Byte$info)); -- cgit v1.2.3