From fcda36561d668f43bac91ea31cd55cbbd605d330 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Sat, 23 Aug 2025 19:28:08 -0400 Subject: Autoformat everything with clang-format --- src/stdlib/text.c | 880 +++++++++++++++++++++++++----------------------------- 1 file changed, 405 insertions(+), 475 deletions(-) (limited to 'src/stdlib/text.c') diff --git a/src/stdlib/text.c b/src/stdlib/text.c index d9793eb8..aad3fd76 100644 --- a/src/stdlib/text.c +++ b/src/stdlib/text.c @@ -116,8 +116,8 @@ #include "text.h" // Use inline version of the siphash code for performance: -#include "siphash.h" #include "siphash-internals.h" +#include "siphash.h" typedef struct { ucs4_t main_codepoint; @@ -133,9 +133,9 @@ static synthetic_grapheme_t *synthetic_graphemes = NULL; static int32_t synthetic_grapheme_capacity = 0; static int32_t num_synthetic_graphemes = 0; -#define NUM_GRAPHEME_CODEPOINTS(id) (synthetic_graphemes[-(id)-1].utf32_cluster[0]) -#define GRAPHEME_CODEPOINTS(id) (&synthetic_graphemes[-(id)-1].utf32_cluster[1]) -#define GRAPHEME_UTF8(id) (synthetic_graphemes[-(id)-1].utf8) +#define NUM_GRAPHEME_CODEPOINTS(id) (synthetic_graphemes[-(id) - 1].utf32_cluster[0]) +#define GRAPHEME_CODEPOINTS(id) (&synthetic_graphemes[-(id) - 1].utf32_cluster[1]) +#define GRAPHEME_UTF8(id) (synthetic_graphemes[-(id) - 1].utf8) // Somewhat arbitrarily chosen, if two short literal ASCII or grapheme chunks // are concatenated below this length threshold, we just merge them into a @@ -145,16 +145,17 @@ static int32_t num_synthetic_graphemes = 0; static Text_t simple_concatenation(Text_t a, Text_t b); -public Text_t EMPTY_TEXT = { - .length=0, - .tag=TEXT_ASCII, - .ascii=0, +public +Text_t EMPTY_TEXT = { + .length = 0, + .tag = TEXT_ASCII, + .ascii = 0, }; PUREFUNC static bool graphemes_equal(const void *va, const void *vb, const TypeInfo_t *info) { (void)info; - ucs4_t *a = *(ucs4_t**)va; - ucs4_t *b = *(ucs4_t**)vb; + ucs4_t *a = *(ucs4_t **)va; + ucs4_t *b = *(ucs4_t **)vb; if (a[0] != b[0]) return false; for (int i = 0; i < (int)a[0]; i++) if (a[i] != b[i]) return false; @@ -163,37 +164,37 @@ PUREFUNC static bool graphemes_equal(const void *va, const void *vb, const TypeI PUREFUNC static uint64_t grapheme_hash(const void *g, const TypeInfo_t *info) { (void)info; - ucs4_t *cluster = *(ucs4_t**)g; - return siphash24((void*)&cluster[1], sizeof(ucs4_t[cluster[0]])); + ucs4_t *cluster = *(ucs4_t **)g; + return siphash24((void *)&cluster[1], sizeof(ucs4_t[cluster[0]])); } static const TypeInfo_t GraphemeClusterInfo = { - .size=sizeof(ucs4_t*), - .align=__alignof__(ucs4_t*), - .metamethods={ - .equal=graphemes_equal, - .hash=grapheme_hash, - }, + .size = sizeof(ucs4_t *), + .align = __alignof__(ucs4_t *), + .metamethods = + { + .equal = graphemes_equal, + .hash = grapheme_hash, + }, }; #ifdef __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wstack-protector" #endif -public int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_len) -{ - if (utf32_len == 1) - return (int32_t)*codepoints; +public +int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_len) { + if (utf32_len == 1) return (int32_t)*codepoints; - ucs4_t length_prefixed[1+utf32_len]; + ucs4_t length_prefixed[1 + utf32_len]; length_prefixed[0] = (ucs4_t)utf32_len; for (int i = 0; i < utf32_len; i++) - length_prefixed[i+1] = codepoints[i]; + length_prefixed[i + 1] = codepoints[i]; ucs4_t *ptr = &length_prefixed[0]; // Optimization for common case of one frequently used synthetic grapheme: static int32_t last_grapheme = 0; - if (last_grapheme != 0 && graphemes_equal(&ptr, &synthetic_graphemes[-last_grapheme-1].utf32_cluster, NULL)) + if (last_grapheme != 0 && graphemes_equal(&ptr, &synthetic_graphemes[-last_grapheme - 1].utf32_cluster, NULL)) return last_grapheme; TypeInfo_t GraphemeIDLookupTableInfo = *Table$info(&GraphemeClusterInfo, &Int32$info); @@ -209,12 +210,12 @@ public int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_le synthetic_graphemes = new; } - int32_t grapheme_id = -(num_synthetic_graphemes+1); + int32_t grapheme_id = -(num_synthetic_graphemes + 1); num_synthetic_graphemes += 1; // Get UTF8 representation: uint8_t u8_buf[64]; - size_t u8_len = sizeof(u8_buf)/sizeof(u8_buf[0]); + size_t u8_len = sizeof(u8_buf) / sizeof(u8_buf[0]); uint8_t *u8 = u32_to_u8(codepoints, (size_t)utf32_len, u8_buf, &u8_len); if (u8 == NULL) fail("Invalid graphemes encountered!"); @@ -223,11 +224,10 @@ public int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_le // area with good cache locality: static void *arena = NULL, *arena_end = NULL; // Eat up any space needed to make arena 32-bit aligned: - if ((size_t)arena % __alignof__(ucs4_t) != 0) - arena += __alignof__(ucs4_t) - ((size_t)arena % __alignof__(ucs4_t)); + if ((size_t)arena % __alignof__(ucs4_t) != 0) arena += __alignof__(ucs4_t) - ((size_t)arena % __alignof__(ucs4_t)); // If we have filled up this arena, allocate a new one: - size_t needed_memory = sizeof(ucs4_t[1+utf32_len]) + sizeof(uint8_t[u8_len + 1]); + size_t needed_memory = sizeof(ucs4_t[1 + utf32_len]) + sizeof(uint8_t[u8_len + 1]); if (arena + needed_memory > arena_end) { // Do reasonably big chunks at a time, so most synthetic codepoints are // nearby each other in memory and cache locality is good. This is a @@ -239,28 +239,27 @@ public int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_le // Copy length-prefixed UTF32 codepoints into the arena and store where they live: ucs4_t *codepoint_copy = arena; - memcpy(codepoint_copy, length_prefixed, sizeof(ucs4_t[1+utf32_len])); - synthetic_graphemes[-grapheme_id-1].utf32_cluster = codepoint_copy; - arena += sizeof(ucs4_t[1+utf32_len]); + memcpy(codepoint_copy, length_prefixed, sizeof(ucs4_t[1 + utf32_len])); + synthetic_graphemes[-grapheme_id - 1].utf32_cluster = codepoint_copy; + arena += sizeof(ucs4_t[1 + utf32_len]); // Copy UTF8 bytes into the arena and store where they live: uint8_t *utf8_final = arena; memcpy(utf8_final, u8, sizeof(uint8_t[u8_len])); utf8_final[u8_len] = '\0'; // Add a terminating NUL byte - synthetic_graphemes[-grapheme_id-1].utf8 = utf8_final; + synthetic_graphemes[-grapheme_id - 1].utf8 = utf8_final; arena += sizeof(uint8_t[u8_len + 1]); // Sickos at the unicode consortium decreed that you can have grapheme clusters // that begin with *prefix* modifiers, so we gotta check for that case: - synthetic_graphemes[-grapheme_id-1].main_codepoint = length_prefixed[1]; + synthetic_graphemes[-grapheme_id - 1].main_codepoint = length_prefixed[1]; for (ucs4_t i = 0; i < utf32_len; i++) { #if _LIBUNISTRING_VERSION >= 0x010200 -// libuinstring version 1.2.0 introduced uc_is_property_prepended_concatenation_mark() -// It's not critical, but it's technically more correct to have this check: - if (unlikely(uc_is_property_prepended_concatenation_mark(length_prefixed[1+i]))) - continue; + // libuinstring version 1.2.0 introduced uc_is_property_prepended_concatenation_mark() + // It's not critical, but it's technically more correct to have this check: + if (unlikely(uc_is_property_prepended_concatenation_mark(length_prefixed[1 + i]))) continue; #endif - synthetic_graphemes[-grapheme_id-1].main_codepoint = length_prefixed[1+i]; + synthetic_graphemes[-grapheme_id - 1].main_codepoint = length_prefixed[1 + i]; break; } @@ -276,8 +275,8 @@ public int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_le #pragma GCC diagnostic pop #endif -public int Text$print(FILE *stream, Text_t t) -{ +public +int Text$print(FILE *stream, Text_t t) { if (t.length == 0) return 0; switch (t.tag) { @@ -290,14 +289,14 @@ public int Text$print(FILE *stream, Text_t t) if (grapheme >= 0) { uint8_t buf[8]; size_t len = sizeof(buf); - uint8_t *u8 = u32_to_u8((ucs4_t*)&grapheme, 1, buf, &len); + uint8_t *u8 = u32_to_u8((ucs4_t *)&grapheme, 1, buf, &len); if (u8 == NULL) fail("Invalid grapheme encountered: ", grapheme); written += (int)fwrite(u8, sizeof(char), len, stream); if (u8 != buf) free(u8); } else { const uint8_t *u8 = GRAPHEME_UTF8(grapheme); assert(u8); - written += (int)fwrite(u8, sizeof(uint8_t), strlen((char*)u8), stream); + written += (int)fwrite(u8, sizeof(uint8_t), strlen((char *)u8), stream); } } return written; @@ -309,14 +308,14 @@ public int Text$print(FILE *stream, Text_t t) if (grapheme >= 0) { uint8_t buf[8]; size_t len = sizeof(buf); - uint8_t *u8 = u32_to_u8((ucs4_t*)&grapheme, 1, buf, &len); + uint8_t *u8 = u32_to_u8((ucs4_t *)&grapheme, 1, buf, &len); if (u8 == NULL) fail("Invalid grapheme encountered: ", grapheme); written += (int)fwrite(u8, sizeof(char), len, stream); if (u8 != buf) free(u8); } else { const uint8_t *u8 = GRAPHEME_UTF8(grapheme); assert(u8); - written += (int)fwrite(u8, sizeof(uint8_t), strlen((char*)u8), stream); + written += (int)fwrite(u8, sizeof(uint8_t), strlen((char *)u8), stream); } } return written; @@ -332,16 +331,16 @@ public int Text$print(FILE *stream, Text_t t) static const int64_t min_len_for_depth[MAX_TEXT_DEPTH] = { // Fibonacci numbers (skipping first two) - 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, 10946, - 17711, 28657, 46368, 75025, 121393, 196418, 317811, 514229, 832040, 1346269, 2178309, 3524578, - 5702887, 9227465, 14930352, 24157817, 39088169, 63245986, 102334155, 165580141, 267914296, - 433494437, 701408733, 1134903170, 1836311903, 2971215073, 4807526976, 7778742049, + 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, + 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, 10946, + 17711, 28657, 46368, 75025, 121393, 196418, 317811, 514229, 832040, 1346269, + 2178309, 3524578, 5702887, 9227465, 14930352, 24157817, 39088169, 63245986, 102334155, 165580141, + 267914296, 433494437, 701408733, 1134903170, 1836311903, 2971215073, 4807526976, 7778742049, }; #define IS_BALANCED_TEXT(t) ((t).length >= min_len_for_depth[(t).depth]) -static void insert_balanced_recursive(Text_t balanced_texts[MAX_TEXT_DEPTH], Text_t text) -{ +static void insert_balanced_recursive(Text_t balanced_texts[MAX_TEXT_DEPTH], Text_t text) { if (text.tag == TEXT_CONCAT && (!IS_BALANCED_TEXT(text) || text.depth >= MAX_TEXT_DEPTH)) { insert_balanced_recursive(balanced_texts, *text.left); insert_balanced_recursive(balanced_texts, *text.right); @@ -370,8 +369,7 @@ static void insert_balanced_recursive(Text_t balanced_texts[MAX_TEXT_DEPTH], Tex balanced_texts[i] = accumulator; } -static Text_t rebalanced(Text_t a, Text_t b) -{ +static Text_t rebalanced(Text_t a, Text_t b) { Text_t balanced_texts[MAX_TEXT_DEPTH]; memset(balanced_texts, 0, sizeof(balanced_texts)); insert_balanced_recursive(balanced_texts, a); @@ -379,14 +377,12 @@ static Text_t rebalanced(Text_t a, Text_t b) Text_t ret = EMPTY_TEXT; for (int i = 0; ret.length < a.length + b.length; i++) { - if (balanced_texts[i].length) - ret = simple_concatenation(balanced_texts[i], ret); + if (balanced_texts[i].length) ret = simple_concatenation(balanced_texts[i], ret); } return ret; } -Text_t simple_concatenation(Text_t a, Text_t b) -{ +Text_t simple_concatenation(Text_t a, Text_t b) { if (a.length == 0) return b; if (b.length == 0) return a; @@ -395,53 +391,53 @@ Text_t simple_concatenation(Text_t a, Text_t b) // every concatenation to yield a balanced text, since many concatenations // are ephemeral (e.g. doing a loop repeatedly concatenating without using // the intermediary values). - if (new_depth >= MAX_TEXT_DEPTH) - return rebalanced(a, b); + if (new_depth >= MAX_TEXT_DEPTH) return rebalanced(a, b); Text_t *children = GC_MALLOC(sizeof(Text_t[2])); children[0] = a; children[1] = b; return (Text_t){ - .tag=TEXT_CONCAT, - .length=a.length + b.length, - .depth=new_depth, - .left=&children[0], - .right=&children[1], + .tag = TEXT_CONCAT, + .length = a.length + b.length, + .depth = new_depth, + .left = &children[0], + .right = &children[1], }; } -static Text_t concat2_assuming_safe(Text_t a, Text_t b) -{ +static Text_t concat2_assuming_safe(Text_t a, Text_t b) { if (a.length == 0) return b; if (b.length == 0) return a; if (a.tag == TEXT_ASCII && b.tag == TEXT_ASCII && (size_t)(a.length + b.length) <= SHORT_ASCII_LENGTH) { struct Text_s ret = { - .tag=TEXT_ASCII, - .length=a.length + b.length, + .tag = TEXT_ASCII, + .length = a.length + b.length, }; ret.ascii = GC_MALLOC_ATOMIC(sizeof(char[ret.length])); - memcpy((char*)ret.ascii, a.ascii, sizeof(char[a.length])); - memcpy((char*)&ret.ascii[a.length], b.ascii, sizeof(char[b.length])); + memcpy((char *)ret.ascii, a.ascii, sizeof(char[a.length])); + memcpy((char *)&ret.ascii[a.length], b.ascii, sizeof(char[b.length])); return ret; - } else if (a.tag == TEXT_GRAPHEMES && b.tag == TEXT_GRAPHEMES && (size_t)(a.length + b.length) <= SHORT_GRAPHEMES_LENGTH) { + } else if (a.tag == TEXT_GRAPHEMES && b.tag == TEXT_GRAPHEMES + && (size_t)(a.length + b.length) <= SHORT_GRAPHEMES_LENGTH) { struct Text_s ret = { - .tag=TEXT_GRAPHEMES, - .length=a.length + b.length, + .tag = TEXT_GRAPHEMES, + .length = a.length + b.length, }; ret.graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[ret.length])); - memcpy((int32_t*)ret.graphemes, a.graphemes, sizeof(int32_t[a.length])); - memcpy((int32_t*)&ret.graphemes[a.length], b.graphemes, sizeof(int32_t[b.length])); + memcpy((int32_t *)ret.graphemes, a.graphemes, sizeof(int32_t[a.length])); + memcpy((int32_t *)&ret.graphemes[a.length], b.graphemes, sizeof(int32_t[b.length])); return ret; - } else if (a.tag != TEXT_CONCAT && b.tag != TEXT_CONCAT && (size_t)(a.length + b.length) <= SHORT_GRAPHEMES_LENGTH) { + } else if (a.tag != TEXT_CONCAT && b.tag != TEXT_CONCAT + && (size_t)(a.length + b.length) <= SHORT_GRAPHEMES_LENGTH) { // Turn a small bit of ASCII into graphemes if it helps make things smaller // Text structs come with an extra 8 bytes, so allocate enough to hold the text struct Text_s ret = { - .tag=TEXT_GRAPHEMES, - .length=a.length + b.length, + .tag = TEXT_GRAPHEMES, + .length = a.length + b.length, }; ret.graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[ret.length])); - int32_t *dest = (int32_t*)ret.graphemes; + int32_t *dest = (int32_t *)ret.graphemes; if (a.tag == TEXT_GRAPHEMES) { memcpy(dest, a.graphemes, sizeof(int32_t[a.length])); dest += a.length; @@ -474,12 +470,11 @@ static Text_t concat2_assuming_safe(Text_t a, Text_t b) return simple_concatenation(a, b); } -static Text_t concat2(Text_t a, Text_t b) -{ +static Text_t concat2(Text_t a, Text_t b) { if (a.length == 0) return b; if (b.length == 0) return a; - int32_t last_a = Text$get_grapheme(a, a.length-1); + int32_t last_a = Text$get_grapheme(a, a.length - 1); int32_t first_b = Text$get_grapheme(b, 0); // Magic number, we know that no codepoints below here trigger instability: @@ -509,60 +504,51 @@ static Text_t concat2(Text_t a, Text_t b) // Do a normalization run for these two codepoints and see if it looks different. // Normalization should not exceed 3x in the input length (but if it does, it will be // handled gracefully) - ucs4_t norm_buf[3*len]; - size_t norm_length = sizeof(norm_buf)/sizeof(norm_buf[0]); + ucs4_t norm_buf[3 * len]; + size_t norm_length = sizeof(norm_buf) / sizeof(norm_buf[0]); ucs4_t *normalized = u32_normalize(UNINORM_NFC, codepoints, len, norm_buf, &norm_length); bool stable = (norm_length == len && memcmp(codepoints, normalized, sizeof(codepoints)) == 0); if (stable) { const void *second_grapheme = u32_grapheme_next(normalized, &normalized[norm_length]); - if (second_grapheme == &normalized[norm_length]) - stable = false; + if (second_grapheme == &normalized[norm_length]) stable = false; } if likely (stable) { - if (normalized != norm_buf) - free(normalized); + if (normalized != norm_buf) free(normalized); return concat2_assuming_safe(a, b); } - Text_t glue = Text$from_codepoints((List_t){.data=norm_buf, .length=(int64_t)norm_length, .stride=sizeof(int32_t)}); + Text_t glue = + Text$from_codepoints((List_t){.data = norm_buf, .length = (int64_t)norm_length, .stride = sizeof(int32_t)}); - if (normalized != norm_buf) - free(normalized); + if (normalized != norm_buf) free(normalized); - if (a.length == 1 && b.length == 1) - return glue; - else if (a.length == 1) - return concat2_assuming_safe(glue, Text$slice(b, I(2), I(b.length))); - else if (b.length == 1) - return concat2_assuming_safe(Text$slice(a, I(1), I(a.length-1)), glue); + if (a.length == 1 && b.length == 1) return glue; + else if (a.length == 1) return concat2_assuming_safe(glue, Text$slice(b, I(2), I(b.length))); + else if (b.length == 1) return concat2_assuming_safe(Text$slice(a, I(1), I(a.length - 1)), glue); else - return concat2_assuming_safe( - concat2_assuming_safe(Text$slice(a, I(1), I(a.length-1)), glue), - Text$slice(b, I(2), I(b.length))); + return concat2_assuming_safe(concat2_assuming_safe(Text$slice(a, I(1), I(a.length - 1)), glue), + Text$slice(b, I(2), I(b.length))); } -public Text_t Text$_concat(int n, Text_t items[n]) -{ +public +Text_t Text$_concat(int n, Text_t items[n]) { if (n == 0) return EMPTY_TEXT; Text_t ret = items[0]; for (int i = 1; i < n; i++) { - if (items[i].length > 0) - ret = concat2(ret, items[i]); + if (items[i].length > 0) ret = concat2(ret, items[i]); } return ret; } -public Text_t Text$repeat(Text_t text, Int_t count) -{ - if (text.length == 0 || Int$is_negative(count)) - return EMPTY_TEXT; +public +Text_t Text$repeat(Text_t text, Int_t count) { + if (text.length == 0 || Int$is_negative(count)) return EMPTY_TEXT; Int_t result_len = Int$times(count, I(text.length)); - if (Int$compare_value(result_len, I(1l<<40)) > 0) - fail("Text repeating would produce too big of an result!"); + if (Int$compare_value(result_len, I(1l << 40)) > 0) fail("Text repeating would produce too big of an result!"); int64_t count64 = Int64$from_int(count, false); Text_t ret = text; @@ -571,19 +557,17 @@ public Text_t Text$repeat(Text_t text, Int_t count) return ret; } -public Int_t Text$width(Text_t text, Text_t language) -{ - int width = u8_strwidth((const uint8_t*)Text$as_c_string(text), Text$as_c_string(language)); +public +Int_t Text$width(Text_t text, Text_t language) { + int width = u8_strwidth((const uint8_t *)Text$as_c_string(text), Text$as_c_string(language)); return Int$from_int32(width); } -static Text_t Text$repeat_to_width(Text_t to_repeat, int64_t target_width, Text_t language) -{ - if (target_width <= 0) - return EMPTY_TEXT; +static Text_t Text$repeat_to_width(Text_t to_repeat, int64_t target_width, Text_t language) { + if (target_width <= 0) return EMPTY_TEXT; const char *lang_str = Text$as_c_string(language); - int64_t width = (int64_t)u8_strwidth((const uint8_t*)Text$as_c_string(to_repeat), lang_str); + int64_t width = (int64_t)u8_strwidth((const uint8_t *)Text$as_c_string(to_repeat), lang_str); Text_t repeated = EMPTY_TEXT; int64_t repeated_width = 0; while (repeated_width + width <= target_width) { @@ -593,8 +577,8 @@ static Text_t Text$repeat_to_width(Text_t to_repeat, int64_t target_width, Text_ if (repeated_width < target_width) { for (int64_t i = 0; repeated_width < target_width && i < to_repeat.length; i++) { - Text_t c = Text$slice(to_repeat, I_small(i+1), I_small(i+1)); - int64_t w = (int64_t)u8_strwidth((const uint8_t*)Text$as_c_string(c), lang_str); + Text_t c = Text$slice(to_repeat, I_small(i + 1), I_small(i + 1)); + int64_t w = (int64_t)u8_strwidth((const uint8_t *)Text$as_c_string(c), lang_str); if (repeated_width + w > target_width) { repeated = concat2(repeated, Text$repeat(Text(" "), I(target_width - repeated_width))); repeated_width = target_width; @@ -608,35 +592,33 @@ static Text_t Text$repeat_to_width(Text_t to_repeat, int64_t target_width, Text_ return repeated; } -public Text_t Text$left_pad(Text_t text, Int_t width, Text_t padding, Text_t language) -{ - if (padding.length == 0) - fail("Cannot pad with an empty text!"); +public +Text_t Text$left_pad(Text_t text, Int_t width, Text_t padding, Text_t language) { + if (padding.length == 0) fail("Cannot pad with an empty text!"); int64_t needed = Int64$from_int(width, false) - Int64$from_int(Text$width(text, language), false); return concat2(Text$repeat_to_width(padding, needed, language), text); } -public Text_t Text$right_pad(Text_t text, Int_t width, Text_t padding, Text_t language) -{ - if (padding.length == 0) - fail("Cannot pad with an empty text!"); +public +Text_t Text$right_pad(Text_t text, Int_t width, Text_t padding, Text_t language) { + if (padding.length == 0) fail("Cannot pad with an empty text!"); int64_t needed = Int64$from_int(width, false) - Int64$from_int(Text$width(text, language), false); return concat2(text, Text$repeat_to_width(padding, needed, language)); } -public Text_t Text$middle_pad(Text_t text, Int_t width, Text_t padding, Text_t language) -{ - if (padding.length == 0) - fail("Cannot pad with an empty text!"); +public +Text_t Text$middle_pad(Text_t text, Int_t width, Text_t padding, Text_t language) { + if (padding.length == 0) fail("Cannot pad with an empty text!"); int64_t needed = Int64$from_int(width, false) - Int64$from_int(Text$width(text, language), false); - return Texts(Text$repeat_to_width(padding, needed/2, language), text, Text$repeat_to_width(padding, (needed+1)/2, language)); + return Texts(Text$repeat_to_width(padding, needed / 2, language), text, + Text$repeat_to_width(padding, (needed + 1) / 2, language)); } -public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int) -{ +public +Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int) { int64_t first = Int64$from_int(first_int, false); int64_t last = Int64$from_int(last_int, false); if (first == 0) fail("Invalid index: 0"); @@ -647,11 +629,9 @@ public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int) if (last > text.length) last = text.length; - if (first > text.length || last < first) - return EMPTY_TEXT; + if (first > text.length || last < first) return EMPTY_TEXT; - if (first == 1 && last == text.length) - return text; + if (first == 1 && last == text.length) return text; while (text.tag == TEXT_CONCAT) { if (last < text.left->length) { @@ -662,31 +642,31 @@ public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int) text = *text.right; } else { return concat2_assuming_safe(Text$slice(*text.left, I(first), I(text.length)), - Text$slice(*text.right, I(1), I(last-text.left->length))); + Text$slice(*text.right, I(1), I(last - text.left->length))); } } switch (text.tag) { case TEXT_ASCII: { return (Text_t){ - .tag=TEXT_ASCII, - .length=last - first + 1, - .ascii=text.ascii + (first-1), + .tag = TEXT_ASCII, + .length = last - first + 1, + .ascii = text.ascii + (first - 1), }; } case TEXT_GRAPHEMES: { return (Text_t){ - .tag=TEXT_GRAPHEMES, - .length=last - first + 1, - .graphemes=text.graphemes + (first-1), + .tag = TEXT_GRAPHEMES, + .length = last - first + 1, + .graphemes = text.graphemes + (first - 1), }; } case TEXT_BLOB: { Text_t ret = (Text_t){ - .tag=TEXT_BLOB, - .length=last - first + 1, - .blob.map=text.blob.map, - .blob.bytes=text.blob.bytes + (first-1), + .tag = TEXT_BLOB, + .length = last - first + 1, + .blob.map = text.blob.map, + .blob.bytes = text.blob.bytes + (first - 1), }; return ret; } @@ -695,48 +675,44 @@ public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int) return EMPTY_TEXT; } -public Text_t Text$from(Text_t text, Int_t first) -{ - return Text$slice(text, first, I_small(-1)); -} +public +Text_t Text$from(Text_t text, Int_t first) { return Text$slice(text, first, I_small(-1)); } -public Text_t Text$to(Text_t text, Int_t last) -{ - return Text$slice(text, I_small(1), last); -} +public +Text_t Text$to(Text_t text, Int_t last) { return Text$slice(text, I_small(1), last); } -public Text_t Text$reversed(Text_t text) -{ +public +Text_t Text$reversed(Text_t text) { switch (text.tag) { case TEXT_ASCII: { struct Text_s ret = { - .tag=TEXT_ASCII, - .length=text.length, + .tag = TEXT_ASCII, + .length = text.length, }; ret.ascii = GC_MALLOC_ATOMIC(sizeof(char[ret.length])); for (int64_t i = 0; i < text.length; i++) - ((char*)ret.ascii)[text.length-1-i] = text.ascii[i]; + ((char *)ret.ascii)[text.length - 1 - i] = text.ascii[i]; return ret; } case TEXT_GRAPHEMES: { struct Text_s ret = { - .tag=TEXT_GRAPHEMES, - .length=text.length, + .tag = TEXT_GRAPHEMES, + .length = text.length, }; ret.graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[ret.length])); for (int64_t i = 0; i < text.length; i++) - ((int32_t*)ret.graphemes)[text.length-1-i] = text.graphemes[i]; + ((int32_t *)ret.graphemes)[text.length - 1 - i] = text.graphemes[i]; return ret; } case TEXT_BLOB: { struct Text_s ret = { - .tag=TEXT_BLOB, - .length=text.length, - .blob.map=text.blob.map, + .tag = TEXT_BLOB, + .length = text.length, + .blob.map = text.blob.map, }; ret.blob.bytes = GC_MALLOC_ATOMIC(sizeof(uint8_t[ret.length])); for (int64_t i = 0; i < text.length; i++) - ((uint8_t*)ret.blob.bytes)[text.length-1-i] = text.graphemes[i]; + ((uint8_t *)ret.blob.bytes)[text.length - 1 - i] = text.graphemes[i]; return ret; } case TEXT_CONCAT: { @@ -747,32 +723,30 @@ public Text_t Text$reversed(Text_t text) return EMPTY_TEXT; } -public PUREFUNC Text_t Text$cluster(Text_t text, Int_t index) -{ - return Text$slice(text, index, index); -} +public +PUREFUNC Text_t Text$cluster(Text_t text, Int_t index) { return Text$slice(text, index, index); } -static Text_t Text$from_components(List_t graphemes, Table_t unique_clusters) -{ - size_t blob_size = ( - sizeof(int32_t[unique_clusters.entries.length]) - + sizeof(uint8_t[graphemes.length])); +static Text_t Text$from_components(List_t graphemes, Table_t unique_clusters) { + size_t blob_size = (sizeof(int32_t[unique_clusters.entries.length]) + sizeof(uint8_t[graphemes.length])); // If blob optimization will save at least 200 bytes: if (unique_clusters.entries.length <= 256 && blob_size + 200 < sizeof(int32_t[graphemes.length])) { Text_t ret = { - .tag=TEXT_BLOB, - .length=graphemes.length, - .depth=0, + .tag = TEXT_BLOB, + .length = graphemes.length, + .depth = 0, }; void *blob = GC_MALLOC_ATOMIC(blob_size); int32_t *map = blob; uint8_t *bytes = blob + sizeof(int32_t[unique_clusters.entries.length]); for (int64_t i = 0; i < unique_clusters.entries.length; i++) { - struct { int32_t g; uint8_t b; } *entry = unique_clusters.entries.data + i*unique_clusters.entries.stride; + struct { + int32_t g; + uint8_t b; + } *entry = unique_clusters.entries.data + i * unique_clusters.entries.stride; map[entry->b] = entry->g; } for (int64_t i = 0; i < graphemes.length; i++) { - int32_t g = *(int32_t*)(graphemes.data + i*graphemes.stride); + int32_t g = *(int32_t *)(graphemes.data + i * graphemes.stride); uint8_t *byte = Table$get(unique_clusters, &g, Table$info(&Int32$info, &Byte$info)); assert(byte); bytes[i] = *byte; @@ -782,15 +756,15 @@ static Text_t Text$from_components(List_t graphemes, Table_t unique_clusters) return ret; } else { return (Text_t){ - .tag=TEXT_GRAPHEMES, - .length=graphemes.length, - .graphemes=graphemes.data, + .tag = TEXT_GRAPHEMES, + .length = graphemes.length, + .graphemes = graphemes.data, }; } } -public OptionalText_t Text$from_strn(const char *str, size_t len) -{ +public +OptionalText_t Text$from_strn(const char *str, size_t len) { int64_t ascii_span = 0; for (size_t i = 0; i < len && isascii(str[i]); i++) ascii_span++; @@ -799,52 +773,48 @@ public OptionalText_t Text$from_strn(const char *str, size_t len) char *copy = GC_MALLOC_ATOMIC(len); memcpy(copy, str, len); return (Text_t){ - .tag=TEXT_ASCII, - .length=ascii_span, - .ascii=copy, + .tag = TEXT_ASCII, + .length = ascii_span, + .ascii = copy, }; } - if (u8_check((uint8_t*)str, len) != NULL) - return NONE_TEXT; + if (u8_check((uint8_t *)str, len) != NULL) return NONE_TEXT; List_t graphemes = {}; Table_t unique_clusters = {}; - const uint8_t *pos = (const uint8_t*)str; - const uint8_t *end = (const uint8_t*)&str[len]; + const uint8_t *pos = (const uint8_t *)str; + const uint8_t *end = (const uint8_t *)&str[len]; // Iterate over grapheme clusters - for (const uint8_t *next; (next=u8_grapheme_next(pos, end)); pos = next) { + for (const uint8_t *next; (next = u8_grapheme_next(pos, end)); pos = next) { uint32_t buf[256]; - size_t u32_len = sizeof(buf)/sizeof(buf[0]); - uint32_t *u32s = u8_to_u32(pos, (size_t)(next-pos), buf, &u32_len); + size_t u32_len = sizeof(buf) / sizeof(buf[0]); + uint32_t *u32s = u8_to_u32(pos, (size_t)(next - pos), buf, &u32_len); uint32_t buf2[256]; - size_t u32_normlen = sizeof(buf2)/sizeof(buf2[0]); + size_t u32_normlen = sizeof(buf2) / sizeof(buf2[0]); uint32_t *u32s_normalized = u32_normalize(UNINORM_NFC, u32s, u32_len, buf2, &u32_normlen); int32_t g = get_synthetic_grapheme(u32s_normalized, (int64_t)u32_normlen); List$insert(&graphemes, &g, I(0), sizeof(int32_t)); - Table$get_or_setdefault(&unique_clusters, int32_t, uint8_t, g, (uint8_t)unique_clusters.entries.length, Table$info(&Int32$info, &Byte$info)); + Table$get_or_setdefault(&unique_clusters, int32_t, uint8_t, g, (uint8_t)unique_clusters.entries.length, + Table$info(&Int32$info, &Byte$info)); if (u32s != buf) free(u32s); if (u32s_normalized != buf2) free(u32s_normalized); if (unique_clusters.entries.length >= 256) { - return concat2_assuming_safe( - Text$from_components(graphemes, unique_clusters), - Text$from_strn((const char*)next, (size_t)(end-next))); + return concat2_assuming_safe(Text$from_components(graphemes, unique_clusters), + Text$from_strn((const char *)next, (size_t)(end - next))); } } return Text$from_components(graphemes, unique_clusters); } -public OptionalText_t Text$from_str(const char *str) -{ - return str ? Text$from_strn(str, strlen(str)) : Text(""); -} +public +OptionalText_t Text$from_str(const char *str) { return str ? Text$from_strn(str, strlen(str)) : Text(""); } -static void u8_buf_append(Text_t text, char **buf, int64_t *capacity, int64_t *i) -{ +static void u8_buf_append(Text_t text, char **buf, int64_t *capacity, int64_t *i) { switch (text.tag) { case TEXT_ASCII: { if (*i + text.length > (int64_t)*capacity) { @@ -863,7 +833,7 @@ static void u8_buf_append(Text_t text, char **buf, int64_t *capacity, int64_t *i if (graphemes[g] >= 0) { uint8_t u8_buf[64]; size_t u8_len = sizeof(u8_buf); - uint8_t *u8 = u32_to_u8((ucs4_t*)&graphemes[g], 1, u8_buf, &u8_len); + uint8_t *u8 = u32_to_u8((ucs4_t *)&graphemes[g], 1, u8_buf, &u8_len); if (u8 == NULL) fail("Invalid grapheme encountered: ", graphemes[g]); if (*i + (int64_t)u8_len > (int64_t)*capacity) { @@ -894,7 +864,7 @@ static void u8_buf_append(Text_t text, char **buf, int64_t *capacity, int64_t *i if (grapheme >= 0) { uint8_t u8_buf[64]; size_t u8_len = sizeof(u8_buf); - uint8_t *u8 = u32_to_u8((ucs4_t*)&grapheme, 1, u8_buf, &u8_len); + uint8_t *u8 = u32_to_u8((ucs4_t *)&grapheme, 1, u8_buf, &u8_len); if (u8 == NULL) fail("Invalid grapheme encountered: ", grapheme); if (*i + (int64_t)u8_len > (int64_t)*capacity) { @@ -928,8 +898,8 @@ static void u8_buf_append(Text_t text, char **buf, int64_t *capacity, int64_t *i } } -public char *Text$as_c_string(Text_t text) -{ +public +char *Text$as_c_string(Text_t text) { int64_t capacity = text.length + 1; char *buf = GC_MALLOC_ATOMIC((size_t)capacity); int64_t i = 0; @@ -943,10 +913,9 @@ public char *Text$as_c_string(Text_t text) return buf; } -PUREFUNC public uint64_t Text$hash(const void *obj, const TypeInfo_t *info) -{ +PUREFUNC public uint64_t Text$hash(const void *obj, const TypeInfo_t *info) { (void)info; - Text_t text = *(Text_t*)obj; + Text_t text = *(Text_t *)obj; siphash sh; siphashinit(&sh, sizeof(int32_t[text.length])); @@ -959,40 +928,41 @@ PUREFUNC public uint64_t Text$hash(const void *obj, const TypeInfo_t *info) const char *bytes = text.ascii; for (int64_t i = 0; i + 1 < text.length; i += 2) { tmp.chunks[0] = (int32_t)bytes[i]; - tmp.chunks[1] = (int32_t)bytes[i+1]; + tmp.chunks[1] = (int32_t)bytes[i + 1]; siphashadd64bits(&sh, tmp.whole); } - int32_t last = text.length & 0x1 ? (int32_t)bytes[text.length-1] : 0; // Odd number of graphemes + int32_t last = text.length & 0x1 ? (int32_t)bytes[text.length - 1] : 0; // Odd number of graphemes return siphashfinish_last_part(&sh, (uint64_t)last); } case TEXT_GRAPHEMES: { const int32_t *graphemes = text.graphemes; for (int64_t i = 0; i + 1 < text.length; i += 2) { tmp.chunks[0] = graphemes[i]; - tmp.chunks[1] = graphemes[i+1]; + tmp.chunks[1] = graphemes[i + 1]; siphashadd64bits(&sh, tmp.whole); } - int32_t last = text.length & 0x1 ? graphemes[text.length-1] : 0; // Odd number of graphemes + int32_t last = text.length & 0x1 ? graphemes[text.length - 1] : 0; // Odd number of graphemes return siphashfinish_last_part(&sh, (uint64_t)last); } case TEXT_BLOB: { for (int64_t i = 0; i + 1 < text.length; i += 2) { tmp.chunks[0] = text.blob.map[text.blob.bytes[i]]; - tmp.chunks[1] = text.blob.map[text.blob.bytes[i+1]]; + tmp.chunks[1] = text.blob.map[text.blob.bytes[i + 1]]; siphashadd64bits(&sh, tmp.whole); } - int32_t last = text.length & 0x1 ? text.blob.map[text.blob.bytes[text.length-1]] : 0; // Odd number of graphemes + int32_t last = + text.length & 0x1 ? text.blob.map[text.blob.bytes[text.length - 1]] : 0; // Odd number of graphemes return siphashfinish_last_part(&sh, (uint64_t)last); } case TEXT_CONCAT: { TextIter_t state = NEW_TEXT_ITER_STATE(text); for (int64_t i = 0; i + 1 < text.length; i += 2) { tmp.chunks[0] = Text$get_grapheme_fast(&state, i); - tmp.chunks[1] = Text$get_grapheme_fast(&state, i+1); + tmp.chunks[1] = Text$get_grapheme_fast(&state, i + 1); siphashadd64bits(&sh, tmp.whole); } - int32_t last = (text.length & 0x1) ? Text$get_grapheme_fast(&state, text.length-1) : 0; + int32_t last = (text.length & 0x1) ? Text$get_grapheme_fast(&state, text.length - 1) : 0; return siphashfinish_last_part(&sh, (uint64_t)last); } default: errx(1, "Invalid text"); @@ -1000,8 +970,8 @@ PUREFUNC public uint64_t Text$hash(const void *obj, const TypeInfo_t *info) return 0; } -public int32_t Text$get_grapheme_fast(TextIter_t *state, int64_t index) -{ +public +int32_t Text$get_grapheme_fast(TextIter_t *state, int64_t index) { if (index < 0) return 0; if (index >= state->stack[0].text.length) return 0; @@ -1051,18 +1021,17 @@ public int32_t Text$get_grapheme_fast(TextIter_t *state, int64_t index) return 0; } -public uint32_t Text$get_main_grapheme_fast(TextIter_t *state, int64_t index) -{ +public +uint32_t Text$get_main_grapheme_fast(TextIter_t *state, int64_t index) { int32_t g = Text$get_grapheme_fast(state, index); return (g) >= 0 ? (ucs4_t)(g) : synthetic_graphemes[-(g)-1].main_codepoint; } -PUREFUNC public int32_t Text$compare(const void *va, const void *vb, const TypeInfo_t *info) -{ +PUREFUNC public int32_t Text$compare(const void *va, const void *vb, const TypeInfo_t *info) { (void)info; if (va == vb) return 0; - const Text_t a = *(const Text_t*)va; - const Text_t b = *(const Text_t*)vb; + const Text_t a = *(const Text_t *)va; + const Text_t b = *(const Text_t *)vb; // TODO: make this smarter and more efficient int64_t len = MAX(a.length, b.length); @@ -1073,31 +1042,21 @@ PUREFUNC public int32_t Text$compare(const void *va, const void *vb, const TypeI if (ai == bi) continue; int32_t cmp; if (ai > 0 && bi > 0) { - cmp = u32_cmp((ucs4_t*)&ai, (ucs4_t*)&bi, 1); + cmp = u32_cmp((ucs4_t *)&ai, (ucs4_t *)&bi, 1); } else if (ai > 0) { - cmp = u32_cmp2( - (ucs4_t*)&ai, 1, - GRAPHEME_CODEPOINTS(bi), - NUM_GRAPHEME_CODEPOINTS(bi)); + cmp = u32_cmp2((ucs4_t *)&ai, 1, GRAPHEME_CODEPOINTS(bi), NUM_GRAPHEME_CODEPOINTS(bi)); } else if (bi > 0) { - cmp = u32_cmp2( - GRAPHEME_CODEPOINTS(ai), - NUM_GRAPHEME_CODEPOINTS(ai), - (ucs4_t*)&bi, 1); + cmp = u32_cmp2(GRAPHEME_CODEPOINTS(ai), NUM_GRAPHEME_CODEPOINTS(ai), (ucs4_t *)&bi, 1); } else { - cmp = u32_cmp2( - GRAPHEME_CODEPOINTS(ai), - NUM_GRAPHEME_CODEPOINTS(ai), - GRAPHEME_CODEPOINTS(bi), - NUM_GRAPHEME_CODEPOINTS(bi)); + cmp = u32_cmp2(GRAPHEME_CODEPOINTS(ai), NUM_GRAPHEME_CODEPOINTS(ai), GRAPHEME_CODEPOINTS(bi), + NUM_GRAPHEME_CODEPOINTS(bi)); } if (cmp != 0) return cmp; } return 0; } -bool _matches(TextIter_t *text_state, TextIter_t *target_state, int64_t pos) -{ +bool _matches(TextIter_t *text_state, TextIter_t *target_state, int64_t pos) { for (int64_t i = 0; i < target_state->stack[0].text.length; i++) { int32_t text_i = Text$get_grapheme_fast(text_state, pos + i); int32_t prefix_i = Text$get_grapheme_fast(target_state, i); @@ -1106,10 +1065,8 @@ bool _matches(TextIter_t *text_state, TextIter_t *target_state, int64_t pos) return true; } -PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix, Text_t *remainder) -{ - if (text.length < prefix.length) - return false; +PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix, Text_t *remainder) { + if (text.length < prefix.length) return false; TextIter_t text_state = NEW_TEXT_ITER_STATE(text), prefix_state = NEW_TEXT_ITER_STATE(prefix); if (_matches(&text_state, &prefix_state, 0)) { if (remainder) *remainder = Text$from(text, Int$from_int64(prefix.length + 1)); @@ -1120,10 +1077,8 @@ PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix, Text_t *remain } } -PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix, Text_t *remainder) -{ - if (text.length < suffix.length) - return false; +PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix, Text_t *remainder) { + if (text.length < suffix.length) return false; TextIter_t text_state = NEW_TEXT_ITER_STATE(text), suffix_state = NEW_TEXT_ITER_STATE(suffix); if (_matches(&text_state, &suffix_state, text.length - suffix.length)) { if (remainder) *remainder = Text$to(text, Int$from_int64(text.length - suffix.length)); @@ -1134,18 +1089,17 @@ PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix, Text_t *remainde } } -public Text_t Text$without_prefix(Text_t text, Text_t prefix) -{ +public +Text_t Text$without_prefix(Text_t text, Text_t prefix) { return Text$starts_with(text, prefix, NULL) ? Text$slice(text, I(prefix.length + 1), I(text.length)) : text; } -public Text_t Text$without_suffix(Text_t text, Text_t suffix) -{ +public +Text_t Text$without_suffix(Text_t text, Text_t suffix) { return Text$ends_with(text, suffix, NULL) ? Text$slice(text, I(1), I(text.length - suffix.length)) : text; } -static bool _has_grapheme(TextIter_t *text, int32_t g) -{ +static bool _has_grapheme(TextIter_t *text, int32_t g) { for (int64_t t = 0; t < text->stack[0].text.length; t++) { if (g == Text$get_grapheme_fast(text, t)) { return true; @@ -1154,8 +1108,8 @@ static bool _has_grapheme(TextIter_t *text, int32_t g) return false; } -public Text_t Text$trim(Text_t text, Text_t to_trim, bool left, bool right) -{ +public +Text_t Text$trim(Text_t text, Text_t to_trim, bool left, bool right) { int64_t first = 0; TextIter_t text_state = NEW_TEXT_ITER_STATE(text), trim_state = NEW_TEXT_ITER_STATE(to_trim); if (left) { @@ -1163,28 +1117,29 @@ public Text_t Text$trim(Text_t text, Text_t to_trim, bool left, bool right) first += 1; } } - int64_t last = text.length-1; + int64_t last = text.length - 1; if (right) { while (last >= first && _has_grapheme(&trim_state, Text$get_grapheme_fast(&text_state, last))) { last -= 1; } } - return (first != 0 || last != text.length-1) ? Text$slice(text, I(first+1), I(last+1)) : text; + return (first != 0 || last != text.length - 1) ? Text$slice(text, I(first + 1), I(last + 1)) : text; } -public Text_t Text$translate(Text_t text, Table_t translations) -{ +public +Text_t Text$translate(Text_t text, Table_t translations) { TextIter_t text_state = NEW_TEXT_ITER_STATE(text); Text_t result = EMPTY_TEXT; int64_t span_start = 0; List_t replacement_list = translations.entries; - for (int64_t i = 0; i < text.length; ) { + for (int64_t i = 0; i < text.length;) { for (int64_t r = 0; r < replacement_list.length; r++) { - struct { Text_t target, replacement; } *entry = replacement_list.data + r*replacement_list.stride; + struct { + Text_t target, replacement; + } *entry = replacement_list.data + r * replacement_list.stride; TextIter_t target_state = NEW_TEXT_ITER_STATE(entry->target); if (_matches(&text_state, &target_state, i)) { - if (i > span_start) - result = concat2(result, Text$slice(text, I(span_start+1), I(i))); + if (i > span_start) result = concat2(result, Text$slice(text, I(span_start + 1), I(i))); result = concat2(result, entry->replacement); i += entry->target.length; @@ -1193,22 +1148,21 @@ public Text_t Text$translate(Text_t text, Table_t translations) } } i += 1; - found_match: continue; + found_match: + continue; } - if (span_start < text.length) - result = concat2(result, Text$slice(text, I(span_start+1), I(text.length))); + if (span_start < text.length) result = concat2(result, Text$slice(text, I(span_start + 1), I(text.length))); return result; } -public Text_t Text$replace(Text_t text, Text_t target, Text_t replacement) -{ +public +Text_t Text$replace(Text_t text, Text_t target, Text_t replacement) { TextIter_t text_state = NEW_TEXT_ITER_STATE(text), target_state = NEW_TEXT_ITER_STATE(target); Text_t result = EMPTY_TEXT; int64_t span_start = 0; - for (int64_t i = 0; i < text.length; ) { + for (int64_t i = 0; i < text.length;) { if (_matches(&text_state, &target_state, i)) { - if (i > span_start) - result = concat2(result, Text$slice(text, I(span_start+1), I(i))); + if (i > span_start) result = concat2(result, Text$slice(text, I(span_start + 1), I(i))); result = concat2(result, replacement); i += target.length; @@ -1217,34 +1171,31 @@ public Text_t Text$replace(Text_t text, Text_t target, Text_t replacement) i += 1; } } - if (span_start < text.length) - result = concat2(result, Text$slice(text, I(span_start+1), I(text.length))); + if (span_start < text.length) result = concat2(result, Text$slice(text, I(span_start + 1), I(text.length))); return result; } -public PUREFUNC bool Text$has(Text_t text, Text_t target) -{ +public +PUREFUNC bool Text$has(Text_t text, Text_t target) { TextIter_t text_state = NEW_TEXT_ITER_STATE(text), target_state = NEW_TEXT_ITER_STATE(target); for (int64_t i = 0; i < text.length; i++) { - if (_matches(&text_state, &target_state, i)) - return true; + if (_matches(&text_state, &target_state, i)) return true; } return false; } -public List_t Text$split(Text_t text, Text_t delimiters) -{ - if (delimiters.length == 0) - return Text$clusters(text); +public +List_t Text$split(Text_t text, Text_t delimiters) { + if (delimiters.length == 0) return Text$clusters(text); TextIter_t text_state = NEW_TEXT_ITER_STATE(text), delim_state = NEW_TEXT_ITER_STATE(delimiters); List_t splits = {}; - for (int64_t i = 0; i < text.length; ) { + for (int64_t i = 0; i < text.length;) { int64_t span_len = 0; while (i + span_len < text.length && !_matches(&text_state, &delim_state, i + span_len)) { span_len += 1; } - Text_t slice = Text$slice(text, I(i+1), I(i+span_len)); + Text_t slice = Text$slice(text, I(i + 1), I(i + span_len)); List$insert(&splits, &slice, I(0), sizeof(slice)); i += span_len + delimiters.length; if (i == text.length) { @@ -1255,20 +1206,20 @@ public List_t Text$split(Text_t text, Text_t delimiters) return splits; } -public List_t Text$split_any(Text_t text, Text_t delimiters) -{ - if (delimiters.length == 0) - return List(text); +public +List_t Text$split_any(Text_t text, Text_t delimiters) { + if (delimiters.length == 0) return List(text); TextIter_t text_state = NEW_TEXT_ITER_STATE(text), delim_state = NEW_TEXT_ITER_STATE(delimiters); List_t splits = {}; - for (int64_t i = 0; i < text.length; ) { + for (int64_t i = 0; i < text.length;) { int64_t span_len = 0; - while (i + span_len < text.length && !_has_grapheme(&delim_state, Text$get_grapheme_fast(&text_state, i + span_len))) { + while (i + span_len < text.length + && !_has_grapheme(&delim_state, Text$get_grapheme_fast(&text_state, i + span_len))) { span_len += 1; } bool trailing_delim = i + span_len < text.length; - Text_t slice = Text$slice(text, I(i+1), I(i+span_len)); + Text_t slice = Text$slice(text, I(i + 1), I(i + span_len)); List$insert(&splits, &slice, I(0), sizeof(slice)); i += span_len + 1; while (i < text.length && _has_grapheme(&delim_state, Text$get_grapheme_fast(&text_state, i))) { @@ -1288,8 +1239,7 @@ typedef struct { Text_t delimiter; } split_iter_state_t; -static OptionalText_t next_split(split_iter_state_t *state) -{ +static OptionalText_t next_split(split_iter_state_t *state) { Text_t text = state->state.stack[0].text; if (state->i >= text.length) { if (state->delimiter.length > 0 && state->i == text.length) { // special case @@ -1310,21 +1260,20 @@ static OptionalText_t next_split(split_iter_state_t *state) while (i + span_len < text.length && !_matches(&state->state, &delim_state, i + span_len)) { span_len += 1; } - Text_t slice = Text$slice(text, I(i+1), I(i+span_len)); + Text_t slice = Text$slice(text, I(i + 1), I(i + span_len)); state->i = i + span_len + state->delimiter.length; return slice; } -public Closure_t Text$by_split(Text_t text, Text_t delimiter) -{ +public +Closure_t Text$by_split(Text_t text, Text_t delimiter) { return (Closure_t){ - .fn=(void*)next_split, - .userdata=new(split_iter_state_t, .state=NEW_TEXT_ITER_STATE(text), .i=0, .delimiter=delimiter), + .fn = (void *)next_split, + .userdata = new (split_iter_state_t, .state = NEW_TEXT_ITER_STATE(text), .i = 0, .delimiter = delimiter), }; } -static OptionalText_t next_split_any(split_iter_state_t *state) -{ +static OptionalText_t next_split_any(split_iter_state_t *state) { Text_t text = state->state.stack[0].text; if (state->i >= text.length) { if (state->delimiter.length > 0 && state->i == text.length) { // special case @@ -1335,7 +1284,7 @@ static OptionalText_t next_split_any(split_iter_state_t *state) } if (state->delimiter.length == 0) { // special case - Text_t ret = Text$cluster(text, I(state->i+1)); + Text_t ret = Text$cluster(text, I(state->i + 1)); state->i += 1; return ret; } @@ -1343,10 +1292,11 @@ static OptionalText_t next_split_any(split_iter_state_t *state) TextIter_t delim_state = NEW_TEXT_ITER_STATE(state->delimiter); int64_t i = state->i; int64_t span_len = 0; - while (i + span_len < text.length && !_has_grapheme(&delim_state, Text$get_grapheme_fast(&state->state, i + span_len))) { + while (i + span_len < text.length + && !_has_grapheme(&delim_state, Text$get_grapheme_fast(&state->state, i + span_len))) { span_len += 1; } - Text_t slice = Text$slice(text, I(i+1), I(i+span_len)); + Text_t slice = Text$slice(text, I(i + 1), I(i + span_len)); i += span_len + 1; while (i < text.length && _has_grapheme(&delim_state, Text$get_grapheme_fast(&state->state, i))) { i += 1; @@ -1355,18 +1305,16 @@ static OptionalText_t next_split_any(split_iter_state_t *state) return slice; } -public Closure_t Text$by_split_any(Text_t text, Text_t delimiters) -{ +public +Closure_t Text$by_split_any(Text_t text, Text_t delimiters) { return (Closure_t){ - .fn=(void*)next_split_any, - .userdata=new(split_iter_state_t, .state=NEW_TEXT_ITER_STATE(text), .i=0, .delimiter=delimiters), + .fn = (void *)next_split_any, + .userdata = new (split_iter_state_t, .state = NEW_TEXT_ITER_STATE(text), .i = 0, .delimiter = delimiters), }; } -PUREFUNC public bool Text$equal_values(Text_t a, Text_t b) -{ - if (a.length != b.length) - return false; +PUREFUNC public bool Text$equal_values(Text_t a, Text_t b) { + if (a.length != b.length) return false; int64_t len = a.length; TextIter_t a_state = NEW_TEXT_ITER_STATE(a), b_state = NEW_TEXT_ITER_STATE(b); // TODO: make this smarter and more efficient @@ -1378,17 +1326,14 @@ PUREFUNC public bool Text$equal_values(Text_t a, Text_t b) return true; } -PUREFUNC public bool Text$equal(const void *a, const void *b, const TypeInfo_t *info) -{ +PUREFUNC public bool Text$equal(const void *a, const void *b, const TypeInfo_t *info) { (void)info; if (a == b) return true; - return Text$equal_values(*(Text_t*)a, *(Text_t*)b); + return Text$equal_values(*(Text_t *)a, *(Text_t *)b); } -PUREFUNC public bool Text$equal_ignoring_case(Text_t a, Text_t b, Text_t language) -{ - if (a.length != b.length) - return false; +PUREFUNC public bool Text$equal_ignoring_case(Text_t a, Text_t b, Text_t language) { + if (a.length != b.length) return false; int64_t len = a.length; TextIter_t a_state = NEW_TEXT_ITER_STATE(a), b_state = NEW_TEXT_ITER_STATE(b); const char *uc_language = Text$as_c_string(language); @@ -1396,76 +1341,79 @@ PUREFUNC public bool Text$equal_ignoring_case(Text_t a, Text_t b, Text_t languag int32_t ai = Text$get_grapheme_fast(&a_state, i); int32_t bi = Text$get_grapheme_fast(&b_state, i); if (ai != bi) { - const ucs4_t *a_codepoints = ai >= 0 ? (ucs4_t*)&ai : GRAPHEME_CODEPOINTS(ai); + const ucs4_t *a_codepoints = ai >= 0 ? (ucs4_t *)&ai : GRAPHEME_CODEPOINTS(ai); int64_t a_len = ai >= 0 ? 1 : NUM_GRAPHEME_CODEPOINTS(ai); - const ucs4_t *b_codepoints = bi >= 0 ? (ucs4_t*)&bi : GRAPHEME_CODEPOINTS(bi); + const ucs4_t *b_codepoints = bi >= 0 ? (ucs4_t *)&bi : GRAPHEME_CODEPOINTS(bi); int64_t b_len = bi >= 0 ? 1 : NUM_GRAPHEME_CODEPOINTS(bi); int cmp = 0; (void)u32_casecmp(a_codepoints, (size_t)a_len, b_codepoints, (size_t)b_len, uc_language, UNINORM_NFC, &cmp); - if (cmp != 0) - return false; + if (cmp != 0) return false; } } return true; } -public Text_t Text$upper(Text_t text, Text_t language) -{ +public +Text_t Text$upper(Text_t text, Text_t language) { if (text.length == 0) return text; List_t codepoints = Text$utf32_codepoints(text); const char *uc_language = Text$as_c_string(language); size_t out_len = 0; ucs4_t *upper = u32_toupper(codepoints.data, (size_t)codepoints.length, uc_language, UNINORM_NFC, NULL, &out_len); - Text_t ret = Text$from_codepoints((List_t){.data=upper, .length=(int64_t)out_len, .stride=sizeof(int32_t)}); + Text_t ret = Text$from_codepoints((List_t){.data = upper, .length = (int64_t)out_len, .stride = sizeof(int32_t)}); return ret; } -public Text_t Text$lower(Text_t text, Text_t language) -{ +public +Text_t Text$lower(Text_t text, Text_t language) { if (text.length == 0) return text; List_t codepoints = Text$utf32_codepoints(text); const char *uc_language = Text$as_c_string(language); size_t out_len = 0; ucs4_t *lower = u32_tolower(codepoints.data, (size_t)codepoints.length, uc_language, UNINORM_NFC, NULL, &out_len); - Text_t ret = Text$from_codepoints((List_t){.data=lower, .length=(int64_t)out_len, .stride=sizeof(int32_t)}); + Text_t ret = Text$from_codepoints((List_t){.data = lower, .length = (int64_t)out_len, .stride = sizeof(int32_t)}); return ret; } -public Text_t Text$title(Text_t text, Text_t language) -{ +public +Text_t Text$title(Text_t text, Text_t language) { if (text.length == 0) return text; List_t codepoints = Text$utf32_codepoints(text); const char *uc_language = Text$as_c_string(language); size_t out_len = 0; ucs4_t *title = u32_totitle(codepoints.data, (size_t)codepoints.length, uc_language, UNINORM_NFC, NULL, &out_len); - Text_t ret = Text$from_codepoints((List_t){.data=title, .length=(int64_t)out_len, .stride=sizeof(int32_t)}); + Text_t ret = Text$from_codepoints((List_t){.data = title, .length = (int64_t)out_len, .stride = sizeof(int32_t)}); return ret; } -public Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) -{ - if (quotation_mark.length != 1) - fail("Invalid quote text: ", quotation_mark, " (must have length == 1)"); +public +Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) { + if (quotation_mark.length != 1) fail("Invalid quote text: ", quotation_mark, " (must have length == 1)"); Text_t ret = colorize ? Text("\x1b[35m") : EMPTY_TEXT; - if (!Text$equal_values(quotation_mark, Text("\"")) && !Text$equal_values(quotation_mark, Text("'")) && !Text$equal_values(quotation_mark, Text("`"))) + if (!Text$equal_values(quotation_mark, Text("\"")) && !Text$equal_values(quotation_mark, Text("'")) + && !Text$equal_values(quotation_mark, Text("`"))) ret = concat2_assuming_safe(ret, Text("$")); ret = concat2_assuming_safe(ret, quotation_mark); int32_t quote_char = Text$get_grapheme(quotation_mark, 0); -#define flush_unquoted() ({ \ - if (unquoted_span > 0) { \ - ret = concat2_assuming_safe(ret, Text$slice(text, I(i-unquoted_span+1), I(i))); \ - unquoted_span = 0; \ - } }) -#define add_escaped(str) ({ \ - flush_unquoted(); \ - if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[34;1m")); \ - ret = concat2_assuming_safe(ret, Text("\\" str)); \ - if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[0;35m")); }) +#define flush_unquoted() \ + ({ \ + if (unquoted_span > 0) { \ + ret = concat2_assuming_safe(ret, Text$slice(text, I(i - unquoted_span + 1), I(i))); \ + unquoted_span = 0; \ + } \ + }) +#define add_escaped(str) \ + ({ \ + flush_unquoted(); \ + if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[34;1m")); \ + ret = concat2_assuming_safe(ret, Text("\\" str)); \ + if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[0;35m")); \ + }) TextIter_t state = NEW_TEXT_ITER_STATE(text); int64_t unquoted_span = 0; int64_t i = 0; @@ -1488,8 +1436,10 @@ public Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) add_escaped("$"); break; } - case '\x00' ... '\x06': case '\x0E' ... '\x1A': - case '\x1C' ... '\x1F': case '\x7F' ... '\x7F': { + case '\x00' ... '\x06': + case '\x0E' ... '\x1A': + case '\x1C' ... '\x1F': + case '\x7F' ... '\x7F': { flush_unquoted(); if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[34;1m")); ret = concat2_assuming_safe(ret, Text("\\x")); @@ -1499,8 +1449,7 @@ public Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) '\0', }; ret = concat2_assuming_safe(ret, Text$from_strn(tmp, 2)); - if (colorize) - ret = concat2_assuming_safe(ret, Text("\x1b[0;35m")); + if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[0;35m")); break; } default: { @@ -1522,21 +1471,19 @@ public Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) #undef flush_unquoted ret = concat2_assuming_safe(ret, quotation_mark); - if (colorize) - ret = concat2_assuming_safe(ret, Text("\x1b[m")); + if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[m")); return ret; } -public Text_t Text$as_text(const void *vtext, bool colorize, const TypeInfo_t *info) -{ +public +Text_t Text$as_text(const void *vtext, bool colorize, const TypeInfo_t *info) { (void)info; if (!vtext) return info && info->TextInfo.lang ? Text$from_str(info->TextInfo.lang) : Text("Text"); - Text_t text = *(Text_t*)vtext; + Text_t text = *(Text_t *)vtext; // Figure out the best quotation mark to use: - bool has_double_quote = false, has_backtick = false, - has_single_quote = false, needs_escapes = false; + bool has_double_quote = false, has_backtick = false, has_single_quote = false, needs_escapes = false; TextIter_t state = NEW_TEXT_ITER_STATE(text); for (int64_t i = 0; i < text.length; i++) { int32_t g = Text$get_grapheme_fast(&state, i); @@ -1554,39 +1501,33 @@ public Text_t Text$as_text(const void *vtext, bool colorize, const TypeInfo_t *i // needing to escape them by using single quotes, but only if we don't have // single quotes or need to escape anything else (because single quotes // don't have interpolation): - if (has_double_quote && !has_single_quote) - quote = Text("'"); + if (has_double_quote && !has_single_quote) quote = Text("'"); // If there is a double quote, but no backtick, we can save a bit of // escaping by using backtick instead of double quote: - else if (has_double_quote && has_single_quote && !has_backtick && !needs_escapes) - quote = Text("`"); + else if (has_double_quote && has_single_quote && !has_backtick && !needs_escapes) quote = Text("`"); // Otherwise fall back to double quotes as the default quoting style: - else - quote = Text("\""); + else quote = Text("\""); Text_t as_text = Text$quoted(text, colorize, quote); if (info && info->TextInfo.lang && info != &Text$info) - as_text = Text$concat( - colorize ? Text("\x1b[1m$") : Text("$"), - Text$from_str(info->TextInfo.lang), - colorize ? Text("\x1b[0m") : Text(""), - as_text); + as_text = Text$concat(colorize ? Text("\x1b[1m$") : Text("$"), Text$from_str(info->TextInfo.lang), + colorize ? Text("\x1b[0m") : Text(""), as_text); return as_text; } -public Text_t Text$join(Text_t glue, List_t pieces) -{ +public +Text_t Text$join(Text_t glue, List_t pieces) { if (pieces.length == 0) return EMPTY_TEXT; - Text_t result = *(Text_t*)pieces.data; + Text_t result = *(Text_t *)pieces.data; for (int64_t i = 1; i < pieces.length; i++) { - result = Text$concat(result, glue, *(Text_t*)(pieces.data + i*pieces.stride)); + result = Text$concat(result, glue, *(Text_t *)(pieces.data + i * pieces.stride)); } return result; } -public List_t Text$clusters(Text_t text) -{ +public +List_t Text$clusters(Text_t text) { List_t clusters = {}; for (int64_t i = 1; i <= text.length; i++) { Text_t cluster = Text$slice(text, I(i), I(i)); @@ -1595,9 +1536,9 @@ public List_t Text$clusters(Text_t text) return clusters; } -public List_t Text$utf32_codepoints(Text_t text) -{ - List_t codepoints = {.atomic=1}; +public +List_t Text$utf32_codepoints(Text_t text) { + List_t codepoints = {.atomic = 1}; TextIter_t state = NEW_TEXT_ITER_STATE(text); for (int64_t i = 0; i < text.length; i++) { int32_t grapheme = Text$get_grapheme_fast(&state, i); @@ -1613,24 +1554,23 @@ public List_t Text$utf32_codepoints(Text_t text) return codepoints; } -public List_t Text$utf8_bytes(Text_t text) -{ +public +List_t Text$utf8_bytes(Text_t text) { const char *str = Text$as_c_string(text); - return (List_t){.length=(int64_t)strlen(str), .stride=1, .atomic=1, .data=(void*)str}; + return (List_t){.length = (int64_t)strlen(str), .stride = 1, .atomic = 1, .data = (void *)str}; } -static INLINE const char *codepoint_name(ucs4_t c) -{ +static INLINE const char *codepoint_name(ucs4_t c) { char *name = GC_MALLOC_ATOMIC(UNINAME_MAX); char *found_name = unicode_character_name(c, name); if (found_name) return found_name; const uc_block_t *block = uc_block(c); assert(block); - return String(block->name, "-", hex(c, .no_prefix=true, .uppercase=true)); + return String(block->name, "-", hex(c, .no_prefix = true, .uppercase = true)); } -public List_t Text$codepoint_names(Text_t text) -{ +public +List_t Text$codepoint_names(Text_t text) { List_t names = {}; TextIter_t state = NEW_TEXT_ITER_STATE(text); for (int64_t i = 0; i < text.length; i++) { @@ -1650,81 +1590,78 @@ public List_t Text$codepoint_names(Text_t text) return names; } -public Text_t Text$from_codepoints(List_t codepoints) -{ - if (codepoints.stride != sizeof(uint32_t)) - List$compact(&codepoints, sizeof(uint32_t)); +public +Text_t Text$from_codepoints(List_t codepoints) { + if (codepoints.stride != sizeof(uint32_t)) List$compact(&codepoints, sizeof(uint32_t)); List_t graphemes = {}; Table_t unique_clusters = {}; - const uint32_t *pos = (const uint32_t*)codepoints.data; - const uint32_t *end = (const uint32_t*)&pos[codepoints.length]; + const uint32_t *pos = (const uint32_t *)codepoints.data; + const uint32_t *end = (const uint32_t *)&pos[codepoints.length]; // Iterate over grapheme clusters - for (const uint32_t *next; (next=u32_grapheme_next(pos, end)); pos = next) { + for (const uint32_t *next; (next = u32_grapheme_next(pos, end)); pos = next) { // Buffer for normalized cluster: uint32_t buf[256]; - size_t u32_normlen = sizeof(buf)/sizeof(buf[0]); - uint32_t *u32s_normalized = u32_normalize(UNINORM_NFC, pos, (size_t)(next-pos), buf, &u32_normlen); + size_t u32_normlen = sizeof(buf) / sizeof(buf[0]); + uint32_t *u32s_normalized = u32_normalize(UNINORM_NFC, pos, (size_t)(next - pos), buf, &u32_normlen); int32_t g = get_synthetic_grapheme(u32s_normalized, (int64_t)u32_normlen); List$insert(&graphemes, &g, I(0), sizeof(int32_t)); - Table$get_or_setdefault( - &unique_clusters, int32_t, uint8_t, g, (uint8_t)unique_clusters.entries.length, - Table$info(&Int32$info, &Byte$info)); + Table$get_or_setdefault(&unique_clusters, int32_t, uint8_t, g, (uint8_t)unique_clusters.entries.length, + Table$info(&Int32$info, &Byte$info)); if (u32s_normalized != buf) free(u32s_normalized); if (unique_clusters.entries.length == 256) { List_t remaining_codepoints = { - .length=(int64_t)(end-next), - .data=(void*)next, - .stride=sizeof(int32_t), + .length = (int64_t)(end - next), + .data = (void *)next, + .stride = sizeof(int32_t), }; - return concat2_assuming_safe(Text$from_components(graphemes, unique_clusters), Text$from_codepoints(remaining_codepoints)); + return concat2_assuming_safe(Text$from_components(graphemes, unique_clusters), + Text$from_codepoints(remaining_codepoints)); } } return Text$from_components(graphemes, unique_clusters); } -public OptionalText_t Text$from_codepoint_names(List_t codepoint_names) -{ +public +OptionalText_t Text$from_codepoint_names(List_t codepoint_names) { List_t codepoints = {}; for (int64_t i = 0; i < codepoint_names.length; i++) { - Text_t *name = ((Text_t*)(codepoint_names.data + i*codepoint_names.stride)); + Text_t *name = ((Text_t *)(codepoint_names.data + i * codepoint_names.stride)); const char *name_str = Text$as_c_string(*name); ucs4_t codepoint = unicode_name_character(name_str); - if (codepoint == UNINAME_INVALID) - return NONE_TEXT; + if (codepoint == UNINAME_INVALID) return NONE_TEXT; List$insert(&codepoints, &codepoint, I_small(0), sizeof(ucs4_t)); } return Text$from_codepoints(codepoints); } -public OptionalText_t Text$from_bytes(List_t bytes) -{ - if (bytes.stride != sizeof(int8_t)) - List$compact(&bytes, sizeof(int8_t)); +public +OptionalText_t Text$from_bytes(List_t bytes) { + if (bytes.stride != sizeof(int8_t)) List$compact(&bytes, sizeof(int8_t)); return Text$from_strn(bytes.data, (size_t)bytes.length); } -public List_t Text$lines(Text_t text) -{ +public +List_t Text$lines(Text_t text) { List_t lines = {}; TextIter_t state = NEW_TEXT_ITER_STATE(text); for (int64_t i = 0, line_start = 0; i < text.length; i++) { int32_t grapheme = Text$get_grapheme_fast(&state, i); if (grapheme == '\r' && Text$get_grapheme_fast(&state, i + 1) == '\n') { // CRLF - Text_t line = Text$slice(text, I(line_start+1), I(i)); + Text_t line = Text$slice(text, I(line_start + 1), I(i)); List$insert(&lines, &line, I_small(0), sizeof(Text_t)); i += 1; // skip one extra for CR line_start = i + 1; } else if (grapheme == '\n') { // newline - Text_t line = Text$slice(text, I(line_start+1), I(i)); + Text_t line = Text$slice(text, I(line_start + 1), I(i)); List$insert(&lines, &line, I_small(0), sizeof(Text_t)); line_start = i + 1; - } else if (i == text.length-1 && line_start != i) { // last line - Text_t line = Text$slice(text, I(line_start+1), I(i+1)); + } else if (i == text.length - 1 && line_start != i) { // last line + Text_t line = Text$slice(text, I(line_start + 1), I(i + 1)); List$insert(&lines, &line, I_small(0), sizeof(Text_t)); } } @@ -1736,21 +1673,20 @@ typedef struct { int64_t i; } line_iter_state_t; -static OptionalText_t next_line(line_iter_state_t *state) -{ +static OptionalText_t next_line(line_iter_state_t *state) { Text_t text = state->state.stack[0].text; for (int64_t i = state->i; i < text.length; i++) { int32_t grapheme = Text$get_grapheme_fast(&state->state, i); if (grapheme == '\r' && Text$get_grapheme_fast(&state->state, i + 1) == '\n') { // CRLF - Text_t line = Text$slice(text, I(state->i+1), I(i)); + Text_t line = Text$slice(text, I(state->i + 1), I(i)); state->i = i + 2; // skip one extra for CR return line; } else if (grapheme == '\n') { // newline - Text_t line = Text$slice(text, I(state->i+1), I(i)); + Text_t line = Text$slice(text, I(state->i + 1), I(i)); state->i = i + 1; return line; - } else if (i == text.length-1 && state->i != i) { // last line - Text_t line = Text$slice(text, I(state->i+1), I(i+1)); + } else if (i == text.length - 1 && state->i != i) { // last line + Text_t line = Text$slice(text, I(state->i + 1), I(i + 1)); state->i = i + 1; return line; } @@ -1758,81 +1694,75 @@ static OptionalText_t next_line(line_iter_state_t *state) return NONE_TEXT; } -public Closure_t Text$by_line(Text_t text) -{ +public +Closure_t Text$by_line(Text_t text) { return (Closure_t){ - .fn=(void*)next_line, - .userdata=new(line_iter_state_t, .state=NEW_TEXT_ITER_STATE(text), .i=0), + .fn = (void *)next_line, + .userdata = new (line_iter_state_t, .state = NEW_TEXT_ITER_STATE(text), .i = 0), }; } -PUREFUNC public bool Text$is_none(const void *t, const TypeInfo_t *info) -{ +PUREFUNC public bool Text$is_none(const void *t, const TypeInfo_t *info) { (void)info; - return ((Text_t*)t)->length < 0; + return ((Text_t *)t)->length < 0; } -public Int_t Text$memory_size(Text_t text) -{ +public +Int_t Text$memory_size(Text_t text) { switch (text.tag) { - case TEXT_ASCII: - return Int$from_int64((int64_t)sizeof(Text_t) + (int64_t)sizeof(char[text.length])); - case TEXT_GRAPHEMES: - return Int$from_int64((int64_t)sizeof(Text_t) + (int64_t)sizeof(int32_t[text.length])); + case TEXT_ASCII: return Int$from_int64((int64_t)sizeof(Text_t) + (int64_t)sizeof(char[text.length])); + case TEXT_GRAPHEMES: return Int$from_int64((int64_t)sizeof(Text_t) + (int64_t)sizeof(int32_t[text.length])); case TEXT_BLOB: - return Int$from_int64((int64_t)sizeof(Text_t) + (int64_t)((void*)text.blob.bytes - (void*)text.blob.map) + (int64_t)sizeof(uint8_t[text.length])); + return Int$from_int64((int64_t)sizeof(Text_t) + (int64_t)((void *)text.blob.bytes - (void *)text.blob.map) + + (int64_t)sizeof(uint8_t[text.length])); case TEXT_CONCAT: - return Int$plus( - Int$from_int64((int64_t)sizeof(Text_t)), - Int$plus(Text$memory_size(*text.left), Text$memory_size(*text.right))); + return Int$plus(Int$from_int64((int64_t)sizeof(Text_t)), + Int$plus(Text$memory_size(*text.left), Text$memory_size(*text.right))); default: errx(1, "Invalid text tag: %d", text.tag); } } -public Text_t Text$layout(Text_t text) -{ +public +Text_t Text$layout(Text_t text) { switch (text.tag) { - case TEXT_ASCII: - return Texts(Text("ASCII("), Int64$as_text((int64_t[1]){text.length}, false, NULL), Text(")")); + case TEXT_ASCII: return Texts(Text("ASCII("), Int64$as_text((int64_t[1]){text.length}, false, NULL), Text(")")); case TEXT_GRAPHEMES: return Texts(Text("Graphemes("), Int64$as_text((int64_t[1]){text.length}, false, NULL), Text(")")); - case TEXT_BLOB: - return Texts(Text("Blob("), Int64$as_text((int64_t[1]){text.length}, false, NULL), Text(")")); + case TEXT_BLOB: return Texts(Text("Blob("), Int64$as_text((int64_t[1]){text.length}, false, NULL), Text(")")); case TEXT_CONCAT: return Texts(Text("Concat("), Text$layout(*text.left), Text(", "), Text$layout(*text.right), Text(")")); default: errx(1, "Invalid text tag: %d", text.tag); } } -public void Text$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *info) -{ +public +void Text$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *info) { (void)info; - const char *str = Text$as_c_string(*(Text_t*)obj); + const char *str = Text$as_c_string(*(Text_t *)obj); int64_t len = (int64_t)strlen(str); Int64$serialize(&len, out, pointers, &Int64$info); fwrite(str, sizeof(char), (size_t)len, out); } -public void Text$deserialize(FILE *in, void *out, List_t *pointers, const TypeInfo_t *info) -{ +public +void Text$deserialize(FILE *in, void *out, List_t *pointers, const TypeInfo_t *info) { (void)info; int64_t len = 0; Int64$deserialize(in, &len, pointers, &Int64$info); - if (len < 0) - fail("Cannot deserialize text with a negative length!"); - char *buf = GC_MALLOC_ATOMIC((size_t)len+1); - if (fread(buf, sizeof(char), (size_t)len, in) != (size_t)len) - fail("Not enough data in stream to deserialize"); - buf[len+1] = '\0'; - *(Text_t*)out = Text$from_strn(buf, (size_t)len); -} - -public const TypeInfo_t Text$info = { - .size=sizeof(Text_t), - .align=__alignof__(Text_t), - .tag=TextInfo, - .TextInfo={.lang="Text"}, - .metamethods=Text$metamethods, + if (len < 0) fail("Cannot deserialize text with a negative length!"); + char *buf = GC_MALLOC_ATOMIC((size_t)len + 1); + if (fread(buf, sizeof(char), (size_t)len, in) != (size_t)len) fail("Not enough data in stream to deserialize"); + buf[len + 1] = '\0'; + *(Text_t *)out = Text$from_strn(buf, (size_t)len); +} + +public +const TypeInfo_t Text$info = { + .size = sizeof(Text_t), + .align = __alignof__(Text_t), + .tag = TextInfo, + .TextInfo = {.lang = "Text"}, + .metamethods = Text$metamethods, }; // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 -- cgit v1.2.3