Convert to using more zero values for `none`

author: Bruce Hill <bruce@bruce-hill.com> 2025-10-01 12:43:00 -0400
committer: Bruce Hill <bruce@bruce-hill.com> 2025-10-01 12:43:00 -0400
commit: 6583fe9b389a6b4698f9364945885e6783506886 (patch)
tree: 0464456d177eab051b03f29a74218a45b301f174 /src/stdlib/text.c
parent: 0cfae753aa131f949253f3fba1e3a36c2bde6ac0 (diff)
1 files changed, 35 insertions, 39 deletions
diff --git a/src/stdlib/text.c b/src/stdlib/text.c
index cda7dd31..0f40aef9 100644
--- a/src/stdlib/text.c
+++ b/src/stdlib/text.c
@@ -127,7 +127,7 @@ typedef struct {
 } synthetic_grapheme_t;
 
 // Synthetic grapheme clusters (clusters of more than one codepoint):
-static Table_t grapheme_ids_by_codepoints = {}; // ucs4_t* length-prefixed codepoints -> int32_t ID
+static Table_t grapheme_ids_by_codepoints = EMPTY_TABLE; // ucs4_t* length-prefixed codepoints -> int32_t ID
 
 // This will hold a dynamically growing list of synthetic graphemes:
 static synthetic_grapheme_t *synthetic_graphemes = NULL;
@@ -521,8 +521,7 @@ static Text_t concat2(Text_t a, Text_t b) {
     }
 
     OptionalText_t glue =
-        Text$from_utf32((List_t){.data = normalized, .length = (int64_t)norm_length, .stride = sizeof(ucs4_t)});
-    assert(glue.length >= 0);
+        Text$from_utf32((List_t){.data = normalized, .length = (uint64_t)norm_length, .stride = sizeof(ucs4_t)});
 
     if (normalized != norm_buf) free(normalized);
 
@@ -743,14 +742,14 @@ static Text_t Text$from_components(List_t graphemes, Table_t unique_clusters) {
         void *blob = GC_MALLOC_ATOMIC(blob_size);
         int32_t *map = blob;
         uint8_t *bytes = blob + sizeof(int32_t[unique_clusters.entries.length]);
-        for (int64_t i = 0; i < unique_clusters.entries.length; i++) {
+        for (int64_t i = 0; i < (int64_t)unique_clusters.entries.length; i++) {
             struct {
                 int32_t g;
                 uint8_t b;
             } *entry = unique_clusters.entries.data + i * unique_clusters.entries.stride;
             map[entry->b] = entry->g;
         }
-        for (int64_t i = 0; i < graphemes.length; i++) {
+        for (int64_t i = 0; i < (int64_t)graphemes.length; i++) {
             int32_t g = *(int32_t *)(graphemes.data + i * graphemes.stride);
             uint8_t *byte = Table$get(unique_clusters, &g, Table$info(&Int32$info, &Byte$info));
             assert(byte);
@@ -785,8 +784,8 @@ OptionalText_t Text$from_strn(const char *str, size_t len) {
     }
     if (u8_check((uint8_t *)str, len) != NULL) return NONE_TEXT;
 
-    List_t graphemes = {};
-    Table_t unique_clusters = {};
+    List_t graphemes = EMPTY_LIST;
+    Table_t unique_clusters = EMPTY_TABLE;
     const uint8_t *pos = (const uint8_t *)str;
     const uint8_t *end = (const uint8_t *)&str[len];
     // Iterate over grapheme clusters
@@ -1138,7 +1137,7 @@ Text_t Text$translate(Text_t text, Table_t translations) {
     int64_t span_start = 0;
     List_t replacement_list = translations.entries;
     for (int64_t i = 0; i < text.length;) {
-        for (int64_t r = 0; r < replacement_list.length; r++) {
+        for (int64_t r = 0; r < (int64_t)replacement_list.length; r++) {
             struct {
                 Text_t target, replacement;
             } *entry = replacement_list.data + r * replacement_list.stride;
@@ -1194,7 +1193,7 @@ List_t Text$split(Text_t text, Text_t delimiters) {
     if (delimiters.length == 0) return Text$clusters(text);
 
     TextIter_t text_state = NEW_TEXT_ITER_STATE(text), delim_state = NEW_TEXT_ITER_STATE(delimiters);
-    List_t splits = {};
+    List_t splits = EMPTY_LIST;
     for (int64_t i = 0; i < text.length;) {
         int64_t span_len = 0;
         while (i + span_len < text.length && !_matches(&text_state, &delim_state, i + span_len)) {
@@ -1216,7 +1215,7 @@ List_t Text$split_any(Text_t text, Text_t delimiters) {
     if (delimiters.length == 0) return List(text);
 
     TextIter_t text_state = NEW_TEXT_ITER_STATE(text), delim_state = NEW_TEXT_ITER_STATE(delimiters);
-    List_t splits = {};
+    List_t splits = EMPTY_LIST;
     for (int64_t i = 0; i < text.length;) {
         int64_t span_len = 0;
         while (i + span_len < text.length
@@ -1370,8 +1369,7 @@ Text_t Text$upper(Text_t text, Text_t language) {
     uint32_t buf[out_len];
     ucs4_t *upper = u32_toupper(codepoints.data, (size_t)codepoints.length, uc_language, UNINORM_NFC, buf, &out_len);
     OptionalText_t ret =
-        Text$from_utf32((List_t){.data = upper, .length = (int64_t)out_len, .stride = sizeof(int32_t)});
-    assert(ret.length >= 0);
+        Text$from_utf32((List_t){.data = upper, .length = (uint64_t)out_len, .stride = sizeof(int32_t)});
     if (upper != buf) free(upper);
     return ret;
 }
@@ -1386,8 +1384,7 @@ Text_t Text$lower(Text_t text, Text_t language) {
     uint32_t buf[out_len];
     ucs4_t *lower = u32_tolower(codepoints.data, (size_t)codepoints.length, uc_language, UNINORM_NFC, buf, &out_len);
     OptionalText_t ret =
-        Text$from_utf32((List_t){.data = lower, .length = (int64_t)out_len, .stride = sizeof(int32_t)});
-    assert(ret.length >= 0);
+        Text$from_utf32((List_t){.data = lower, .length = (uint64_t)out_len, .stride = sizeof(int32_t)});
     if (lower != buf) free(lower);
     return ret;
 }
@@ -1402,8 +1399,7 @@ Text_t Text$title(Text_t text, Text_t language) {
     uint32_t buf[out_len];
     ucs4_t *title = u32_totitle(codepoints.data, (size_t)codepoints.length, uc_language, UNINORM_NFC, buf, &out_len);
     OptionalText_t ret =
-        Text$from_utf32((List_t){.data = title, .length = (int64_t)out_len, .stride = sizeof(int32_t)});
-    assert(ret.length >= 0);
+        Text$from_utf32((List_t){.data = title, .length = (uint64_t)out_len, .stride = sizeof(int32_t)});
     if (title != buf) free(title);
     return ret;
 }
@@ -1543,7 +1539,7 @@ Text_t Text$join(Text_t glue, List_t pieces) {
     if (pieces.length == 0) return EMPTY_TEXT;
 
     Text_t result = *(Text_t *)pieces.data;
-    for (int64_t i = 1; i < pieces.length; i++) {
+    for (int64_t i = 1; i < (int64_t)pieces.length; i++) {
         result = Text$concat(result, glue, *(Text_t *)(pieces.data + i * pieces.stride));
     }
     return result;
@@ -1551,8 +1547,8 @@ Text_t Text$join(Text_t glue, List_t pieces) {
 
 public
 List_t Text$clusters(Text_t text) {
-    List_t clusters = {};
-    for (int64_t i = 1; i <= text.length; i++) {
+    List_t clusters = EMPTY_LIST;
+    for (int64_t i = 1; i <= (int64_t)text.length; i++) {
         Text_t cluster = Text$slice(text, I(i), I(i));
         List$insert(&clusters, &cluster, I_small(0), sizeof(Text_t));
     }
@@ -1561,27 +1557,27 @@ List_t Text$clusters(Text_t text) {
 
 public
 List_t Text$utf8(Text_t text) {
-    if (text.length == 0) return (List_t){.atomic = 1};
-    int64_t capacity = text.length;
+    if (text.length == 0) return EMPTY_ATOMIC_LIST;
+    int64_t capacity = (int64_t)text.length;
     Byte_t *buf = GC_MALLOC_ATOMIC(sizeof(Byte_t[capacity]));
     int64_t i = 0;
     u8_buf_append(text, &buf, &capacity, &i);
     return (List_t){
-        .data = buf, .length = i, .stride = 1, .atomic = 1, .free = MIN(LIST_MAX_FREE_ENTRIES, capacity - i)};
+        .data = buf, .length = (uint64_t)i, .stride = 1, .atomic = 1, .free = MIN(LIST_MAX_FREE_ENTRIES, capacity - i)};
 }
 
 public
 List_t Text$utf16(Text_t text) {
-    if (text.length == 0) return (List_t){.atomic = 1};
+    if (text.length == 0) return EMPTY_ATOMIC_LIST;
     List_t utf32 = Text$utf32(text);
     List_t utf16 = {.free = MIN(LIST_MAX_FREE_ENTRIES, (uint64_t)utf32.length), .atomic = 1};
     utf16.data = GC_MALLOC_ATOMIC(sizeof(int32_t[utf16.free]));
-    for (int64_t i = 0; i < utf32.length; i++) {
+    for (int64_t i = 0; i < (int64_t)utf32.length; i++) {
         uint16_t u16_buf[4];
         size_t u16_len = sizeof(u16_buf) / sizeof(u16_buf[0]);
         uint16_t *chunk_u16 = u32_to_u16(utf32.data + utf32.stride * i, 1, u16_buf, &u16_len);
         if (chunk_u16 == NULL) fail("Invalid codepoints encountered!");
-        List$insert_all(&utf16, (List_t){.data = chunk_u16, .stride = sizeof(uint16_t), .length = (int64_t)u16_len},
+        List$insert_all(&utf16, (List_t){.data = chunk_u16, .stride = sizeof(uint16_t), .length = (uint64_t)u16_len},
                         I(0), sizeof(uint16_t));
         if (chunk_u16 != u16_buf) free(chunk_u16);
     }
@@ -1590,10 +1586,10 @@ List_t Text$utf16(Text_t text) {
 
 public
 List_t Text$utf32(Text_t text) {
-    if (text.length == 0) return (List_t){.atomic = 1};
-    List_t codepoints = {.atomic = 1};
+    if (text.length == 0) return EMPTY_ATOMIC_LIST;
+    List_t codepoints = EMPTY_ATOMIC_LIST;
     TextIter_t state = NEW_TEXT_ITER_STATE(text);
-    for (int64_t i = 0; i < text.length; i++) {
+    for (int64_t i = 0; i < (int64_t)text.length; i++) {
         int32_t grapheme = Text$get_grapheme_fast(&state, i);
         if (grapheme < 0) {
             for (int64_t c = 0; c < NUM_GRAPHEME_CODEPOINTS(grapheme); c++) {
@@ -1618,7 +1614,7 @@ static INLINE const char *codepoint_name(ucs4_t c) {
 
 public
 List_t Text$codepoint_names(Text_t text) {
-    List_t names = {};
+    List_t names = EMPTY_LIST;
     TextIter_t state = NEW_TEXT_ITER_STATE(text);
     for (int64_t i = 0; i < text.length; i++) {
         int32_t grapheme = Text$get_grapheme_fast(&state, i);
@@ -1653,7 +1649,7 @@ OptionalText_t Text$from_utf16(List_t units) {
     uint8_t buf[length];
     uint8_t *u8 = u16_to_u8(units.data, (size_t)units.length, buf, &length);
     Text_t ret =
-        Text$from_utf8((List_t){.data = u8, .length = (int64_t)length, .stride = sizeof(uint8_t), .atomic = 1});
+        Text$from_utf8((List_t){.data = u8, .length = (uint64_t)length, .stride = sizeof(uint8_t), .atomic = 1});
     if (u8 != buf) free(u8);
     return ret;
 }
@@ -1663,8 +1659,8 @@ OptionalText_t Text$from_utf32(List_t codepoints) {
     if (codepoints.length == 0) return EMPTY_TEXT;
     if (codepoints.stride != sizeof(uint32_t)) List$compact(&codepoints, sizeof(uint32_t));
 
-    List_t graphemes = {};
-    Table_t unique_clusters = {};
+    List_t graphemes = EMPTY_ATOMIC_LIST;
+    Table_t unique_clusters = EMPTY_TABLE;
     const uint32_t *pos = (const uint32_t *)codepoints.data;
     const uint32_t *end = (const uint32_t *)&pos[codepoints.length];
     // Iterate over grapheme clusters
@@ -1684,12 +1680,12 @@ OptionalText_t Text$from_utf32(List_t codepoints) {
 
         if (unique_clusters.entries.length == 256) {
             List_t remaining_codepoints = {
-                .length = (int64_t)(end - next),
+                .length = (uint64_t)(end - next),
                 .data = (void *)next,
                 .stride = sizeof(int32_t),
             };
             OptionalText_t remainder = Text$from_utf32(remaining_codepoints);
-            if (remainder.length < 0) return NONE_TEXT;
+            if (remainder.tag == TEXT_NONE) return NONE_TEXT;
             return concat2_assuming_safe(Text$from_components(graphemes, unique_clusters), remainder);
         }
     }
@@ -1698,8 +1694,8 @@ OptionalText_t Text$from_utf32(List_t codepoints) {
 
 public
 OptionalText_t Text$from_codepoint_names(List_t codepoint_names) {
-    List_t codepoints = {};
-    for (int64_t i = 0; i < codepoint_names.length; i++) {
+    List_t codepoints = EMPTY_LIST;
+    for (int64_t i = 0; i < (int64_t)codepoint_names.length; i++) {
         Text_t *name = ((Text_t *)(codepoint_names.data + i * codepoint_names.stride));
         const char *name_str = Text$as_c_string(*name);
         ucs4_t codepoint = unicode_name_character(name_str);
@@ -1711,9 +1707,9 @@ OptionalText_t Text$from_codepoint_names(List_t codepoint_names) {
 
 public
 List_t Text$lines(Text_t text) {
-    List_t lines = {};
+    List_t lines = EMPTY_LIST;
     TextIter_t state = NEW_TEXT_ITER_STATE(text);
-    for (int64_t i = 0, line_start = 0; i < text.length; i++) {
+    for (int64_t i = 0, line_start = 0; i < (int64_t)text.length; i++) {
         int32_t grapheme = Text$get_grapheme_fast(&state, i);
         if (grapheme == '\r' && Text$get_grapheme_fast(&state, i + 1) == '\n') { // CRLF
             Text_t line = Text$slice(text, I(line_start + 1), I(i));
@@ -1768,7 +1764,7 @@ Closure_t Text$by_line(Text_t text) {
 
 PUREFUNC public bool Text$is_none(const void *t, const TypeInfo_t *info) {
     (void)info;
-    return ((Text_t *)t)->length < 0;
+    return ((Text_t *)t)->tag == TEXT_NONE;
 }
 
 public
author	Bruce Hill <bruce@bruce-hill.com>	2025-10-01 12:43:00 -0400
committer	Bruce Hill <bruce@bruce-hill.com>	2025-10-01 12:43:00 -0400
commit	6583fe9b389a6b4698f9364945885e6783506886 (patch)
tree	0464456d177eab051b03f29a74218a45b301f174 /src/stdlib/text.c
parent	0cfae753aa131f949253f3fba1e3a36c2bde6ac0 (diff)