From af1bd79fd91d1a1efde3cf084643f065c61d330a Mon Sep 17 00:00:00 2001
From: Bruce Hill <bruce@bruce-hill.com>
Date: Sat, 15 Nov 2025 14:10:28 -0500
Subject: Make EMPTY_TEXT into a macro

---
 src/stdlib/text.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'src/stdlib/text.c')

diff --git a/src/stdlib/text.c b/src/stdlib/text.c
index d40e1306..f323d88d 100644
--- a/src/stdlib/text.c
+++ b/src/stdlib/text.c
@@ -146,13 +146,6 @@ static int32_t num_synthetic_graphemes = 0;
 
 static Text_t simple_concatenation(Text_t a, Text_t b);
 
-public
-Text_t EMPTY_TEXT = {
-    .length = 0,
-    .tag = TEXT_ASCII,
-    .ascii = 0,
-};
-
 PUREFUNC static bool graphemes_equal(const void *va, const void *vb, const TypeInfo_t *info) {
     (void)info;
     ucs4_t *a = *(ucs4_t **)va;
-- 
cgit v1.2.3


From a1884f7a85cbee5a67cf48c9e7b088fdea8b8b38 Mon Sep 17 00:00:00 2001
From: Bruce Hill <bruce@bruce-hill.com>
Date: Sat, 15 Nov 2025 18:12:57 -0500
Subject: Fix for potential issue with codepoint names

---
 src/stdlib/text.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/stdlib/text.c')

diff --git a/src/stdlib/text.c b/src/stdlib/text.c
index f323d88d..8e800c8a 100644
--- a/src/stdlib/text.c
+++ b/src/stdlib/text.c
@@ -1605,6 +1605,7 @@ static INLINE const char *codepoint_name(ucs4_t c) {
     char *found_name = unicode_character_name(c, name);
     if (found_name) return found_name;
     const uc_block_t *block = uc_block(c);
+    if (!block) return "???";
     assert(block);
     return String(block->name, "-", hex(c, .no_prefix = true, .uppercase = true));
 }
-- 
cgit v1.2.3


From e0706bc707ea6a8be86cee9fde21971cde3d7a42 Mon Sep 17 00:00:00 2001
From: Bruce Hill <bruce@bruce-hill.com>
Date: Sat, 22 Nov 2025 16:34:41 -0500
Subject: Bugfix for infinite loop in text.replace("", ...) with empty string

---
 src/stdlib/text.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/stdlib/text.c')

diff --git a/src/stdlib/text.c b/src/stdlib/text.c
index 8e800c8a..febcafce 100644
--- a/src/stdlib/text.c
+++ b/src/stdlib/text.c
@@ -1135,6 +1135,7 @@ Text_t Text$translate(Text_t text, Table_t translations) {
             struct {
                 Text_t target, replacement;
             } *entry = replacement_list.data + r * replacement_list.stride;
+            if (entry->target.length <= 0) continue;
             TextIter_t target_state = NEW_TEXT_ITER_STATE(entry->target);
             if (_matches(&text_state, &target_state, i)) {
                 if (i > span_start) result = concat2(result, Text$slice(text, I(span_start + 1), I(i)));
@@ -1156,6 +1157,7 @@ Text_t Text$translate(Text_t text, Table_t translations) {
 
 public
 Text_t Text$replace(Text_t text, Text_t target, Text_t replacement) {
+    if (target.length <= 0) return text;
     TextIter_t text_state = NEW_TEXT_ITER_STATE(text), target_state = NEW_TEXT_ITER_STATE(target);
     Text_t result = EMPTY_TEXT;
     int64_t span_start = 0;
-- 
cgit v1.2.3


From cb9d3b1a2c2c59c368f6121a16a9ab928b0ff951 Mon Sep 17 00:00:00 2001
From: Bruce Hill <bruce@bruce-hill.com>
Date: Sun, 23 Nov 2025 00:35:05 -0500
Subject: Added Text.find(text, target, start=1)

---
 src/stdlib/text.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'src/stdlib/text.c')

diff --git a/src/stdlib/text.c b/src/stdlib/text.c
index febcafce..e51af49c 100644
--- a/src/stdlib/text.c
+++ b/src/stdlib/text.c
@@ -1057,8 +1057,8 @@ PUREFUNC public int32_t Text$compare(const void *va, const void *vb, const TypeI
 bool _matches(TextIter_t *text_state, TextIter_t *target_state, int64_t pos) {
     for (int64_t i = 0; i < (int64_t)target_state->stack[0].text.length; i++) {
         int32_t text_i = Text$get_grapheme_fast(text_state, pos + i);
-        int32_t prefix_i = Text$get_grapheme_fast(target_state, i);
-        if (text_i != prefix_i) return false;
+        int32_t target_i = Text$get_grapheme_fast(target_state, i);
+        if (text_i != target_i) return false;
     }
     return true;
 }
@@ -1106,6 +1106,19 @@ static bool _has_grapheme(TextIter_t *text, int32_t g) {
     return false;
 }
 
+public
+OptionalInt_t Text$find(Text_t text, Text_t target, Int_t start) {
+    if (text.length < target.length) return NONE_INT;
+    if (target.length <= 0) return I(1);
+    TextIter_t text_state = NEW_TEXT_ITER_STATE(text), target_state = NEW_TEXT_ITER_STATE(target);
+    for (int64_t i = Int64$from_int(start, false) - 1; i < text.length - target.length + 1; i++) {
+        if (_matches(&text_state, &target_state, i)) {
+            return Int$from_int64(i + 1);
+        }
+    }
+    return NONE_INT;
+}
+
 public
 Text_t Text$trim(Text_t text, Text_t to_trim, bool left, bool right) {
     int64_t first = 0;
-- 
cgit v1.2.3


From d302aaec38b9d295d39c4d87b53ee610bc9e0e07 Mon Sep 17 00:00:00 2001
From: Bruce Hill <bruce@bruce-hill.com>
Date: Sun, 30 Nov 2025 13:08:57 -0500
Subject: Handle some text method edge cases with empty text better.

---
 src/stdlib/text.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src/stdlib/text.c')

diff --git a/src/stdlib/text.c b/src/stdlib/text.c
index e51af49c..b4b27fed 100644
--- a/src/stdlib/text.c
+++ b/src/stdlib/text.c
@@ -763,8 +763,10 @@ static Text_t Text$from_components(List_t graphemes, Table_t unique_clusters) {
 public
 OptionalText_t Text$from_strn(const char *str, size_t len) {
     int64_t ascii_span = 0;
-    for (size_t i = 0; i < len && isascii(str[i]); i++)
+    for (size_t i = 0; i < len && isascii(str[i]); i++) {
         ascii_span++;
+        if (str[i] == 0) return NONE_TEXT;
+    }
 
     if (ascii_span == (int64_t)len) { // All ASCII
         char *copy = GC_MALLOC_ATOMIC(len);
@@ -786,12 +788,15 @@ OptionalText_t Text$from_strn(const char *str, size_t len) {
         uint32_t buf[256];
         size_t u32_len = sizeof(buf) / sizeof(buf[0]);
         uint32_t *u32s = u8_to_u32(pos, (size_t)(next - pos), buf, &u32_len);
+        if (u32s == NULL) return NONE_TEXT;
 
         uint32_t buf2[256];
         size_t u32_normlen = sizeof(buf2) / sizeof(buf2[0]);
         uint32_t *u32s_normalized = u32_normalize(UNINORM_NFC, u32s, u32_len, buf2, &u32_normlen);
+        if (u32s_normalized == NULL) return NONE_TEXT;
 
         int32_t g = get_synthetic_grapheme(u32s_normalized, (int64_t)u32_normlen);
+        if (g == 0) return NONE_TEXT;
         List$insert(&graphemes, &g, I(0), sizeof(int32_t));
         Table$get_or_setdefault(&unique_clusters, int32_t, uint8_t, g, (uint8_t)unique_clusters.entries.length,
                                 Table$info(&Int32$info, &Byte$info));
-- 
cgit v1.2.3