aboutsummaryrefslogtreecommitdiff
path: root/src/stdlib/text.c
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2025-08-23 19:28:08 -0400
committerBruce Hill <bruce@bruce-hill.com>2025-08-23 19:28:08 -0400
commitfcda36561d668f43bac91ea31cd55cbbd605d330 (patch)
treeeb74c0b17df584af0fd8154422ad924e04c96cc2 /src/stdlib/text.c
parent414b0c7472c87c5a013029aefef49e2dbc41e700 (diff)
Autoformat everything with clang-format
Diffstat (limited to 'src/stdlib/text.c')
-rw-r--r--src/stdlib/text.c880
1 files changed, 405 insertions, 475 deletions
diff --git a/src/stdlib/text.c b/src/stdlib/text.c
index d9793eb8..aad3fd76 100644
--- a/src/stdlib/text.c
+++ b/src/stdlib/text.c
@@ -116,8 +116,8 @@
#include "text.h"
// Use inline version of the siphash code for performance:
-#include "siphash.h"
#include "siphash-internals.h"
+#include "siphash.h"
typedef struct {
ucs4_t main_codepoint;
@@ -133,9 +133,9 @@ static synthetic_grapheme_t *synthetic_graphemes = NULL;
static int32_t synthetic_grapheme_capacity = 0;
static int32_t num_synthetic_graphemes = 0;
-#define NUM_GRAPHEME_CODEPOINTS(id) (synthetic_graphemes[-(id)-1].utf32_cluster[0])
-#define GRAPHEME_CODEPOINTS(id) (&synthetic_graphemes[-(id)-1].utf32_cluster[1])
-#define GRAPHEME_UTF8(id) (synthetic_graphemes[-(id)-1].utf8)
+#define NUM_GRAPHEME_CODEPOINTS(id) (synthetic_graphemes[-(id) - 1].utf32_cluster[0])
+#define GRAPHEME_CODEPOINTS(id) (&synthetic_graphemes[-(id) - 1].utf32_cluster[1])
+#define GRAPHEME_UTF8(id) (synthetic_graphemes[-(id) - 1].utf8)
// Somewhat arbitrarily chosen, if two short literal ASCII or grapheme chunks
// are concatenated below this length threshold, we just merge them into a
@@ -145,16 +145,17 @@ static int32_t num_synthetic_graphemes = 0;
static Text_t simple_concatenation(Text_t a, Text_t b);
-public Text_t EMPTY_TEXT = {
- .length=0,
- .tag=TEXT_ASCII,
- .ascii=0,
+public
+Text_t EMPTY_TEXT = {
+ .length = 0,
+ .tag = TEXT_ASCII,
+ .ascii = 0,
};
PUREFUNC static bool graphemes_equal(const void *va, const void *vb, const TypeInfo_t *info) {
(void)info;
- ucs4_t *a = *(ucs4_t**)va;
- ucs4_t *b = *(ucs4_t**)vb;
+ ucs4_t *a = *(ucs4_t **)va;
+ ucs4_t *b = *(ucs4_t **)vb;
if (a[0] != b[0]) return false;
for (int i = 0; i < (int)a[0]; i++)
if (a[i] != b[i]) return false;
@@ -163,37 +164,37 @@ PUREFUNC static bool graphemes_equal(const void *va, const void *vb, const TypeI
PUREFUNC static uint64_t grapheme_hash(const void *g, const TypeInfo_t *info) {
(void)info;
- ucs4_t *cluster = *(ucs4_t**)g;
- return siphash24((void*)&cluster[1], sizeof(ucs4_t[cluster[0]]));
+ ucs4_t *cluster = *(ucs4_t **)g;
+ return siphash24((void *)&cluster[1], sizeof(ucs4_t[cluster[0]]));
}
static const TypeInfo_t GraphemeClusterInfo = {
- .size=sizeof(ucs4_t*),
- .align=__alignof__(ucs4_t*),
- .metamethods={
- .equal=graphemes_equal,
- .hash=grapheme_hash,
- },
+ .size = sizeof(ucs4_t *),
+ .align = __alignof__(ucs4_t *),
+ .metamethods =
+ {
+ .equal = graphemes_equal,
+ .hash = grapheme_hash,
+ },
};
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstack-protector"
#endif
-public int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_len)
-{
- if (utf32_len == 1)
- return (int32_t)*codepoints;
+public
+int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_len) {
+ if (utf32_len == 1) return (int32_t)*codepoints;
- ucs4_t length_prefixed[1+utf32_len];
+ ucs4_t length_prefixed[1 + utf32_len];
length_prefixed[0] = (ucs4_t)utf32_len;
for (int i = 0; i < utf32_len; i++)
- length_prefixed[i+1] = codepoints[i];
+ length_prefixed[i + 1] = codepoints[i];
ucs4_t *ptr = &length_prefixed[0];
// Optimization for common case of one frequently used synthetic grapheme:
static int32_t last_grapheme = 0;
- if (last_grapheme != 0 && graphemes_equal(&ptr, &synthetic_graphemes[-last_grapheme-1].utf32_cluster, NULL))
+ if (last_grapheme != 0 && graphemes_equal(&ptr, &synthetic_graphemes[-last_grapheme - 1].utf32_cluster, NULL))
return last_grapheme;
TypeInfo_t GraphemeIDLookupTableInfo = *Table$info(&GraphemeClusterInfo, &Int32$info);
@@ -209,12 +210,12 @@ public int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_le
synthetic_graphemes = new;
}
- int32_t grapheme_id = -(num_synthetic_graphemes+1);
+ int32_t grapheme_id = -(num_synthetic_graphemes + 1);
num_synthetic_graphemes += 1;
// Get UTF8 representation:
uint8_t u8_buf[64];
- size_t u8_len = sizeof(u8_buf)/sizeof(u8_buf[0]);
+ size_t u8_len = sizeof(u8_buf) / sizeof(u8_buf[0]);
uint8_t *u8 = u32_to_u8(codepoints, (size_t)utf32_len, u8_buf, &u8_len);
if (u8 == NULL) fail("Invalid graphemes encountered!");
@@ -223,11 +224,10 @@ public int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_le
// area with good cache locality:
static void *arena = NULL, *arena_end = NULL;
// Eat up any space needed to make arena 32-bit aligned:
- if ((size_t)arena % __alignof__(ucs4_t) != 0)
- arena += __alignof__(ucs4_t) - ((size_t)arena % __alignof__(ucs4_t));
+ if ((size_t)arena % __alignof__(ucs4_t) != 0) arena += __alignof__(ucs4_t) - ((size_t)arena % __alignof__(ucs4_t));
// If we have filled up this arena, allocate a new one:
- size_t needed_memory = sizeof(ucs4_t[1+utf32_len]) + sizeof(uint8_t[u8_len + 1]);
+ size_t needed_memory = sizeof(ucs4_t[1 + utf32_len]) + sizeof(uint8_t[u8_len + 1]);
if (arena + needed_memory > arena_end) {
// Do reasonably big chunks at a time, so most synthetic codepoints are
// nearby each other in memory and cache locality is good. This is a
@@ -239,28 +239,27 @@ public int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_le
// Copy length-prefixed UTF32 codepoints into the arena and store where they live:
ucs4_t *codepoint_copy = arena;
- memcpy(codepoint_copy, length_prefixed, sizeof(ucs4_t[1+utf32_len]));
- synthetic_graphemes[-grapheme_id-1].utf32_cluster = codepoint_copy;
- arena += sizeof(ucs4_t[1+utf32_len]);
+ memcpy(codepoint_copy, length_prefixed, sizeof(ucs4_t[1 + utf32_len]));
+ synthetic_graphemes[-grapheme_id - 1].utf32_cluster = codepoint_copy;
+ arena += sizeof(ucs4_t[1 + utf32_len]);
// Copy UTF8 bytes into the arena and store where they live:
uint8_t *utf8_final = arena;
memcpy(utf8_final, u8, sizeof(uint8_t[u8_len]));
utf8_final[u8_len] = '\0'; // Add a terminating NUL byte
- synthetic_graphemes[-grapheme_id-1].utf8 = utf8_final;
+ synthetic_graphemes[-grapheme_id - 1].utf8 = utf8_final;
arena += sizeof(uint8_t[u8_len + 1]);
// Sickos at the unicode consortium decreed that you can have grapheme clusters
// that begin with *prefix* modifiers, so we gotta check for that case:
- synthetic_graphemes[-grapheme_id-1].main_codepoint = length_prefixed[1];
+ synthetic_graphemes[-grapheme_id - 1].main_codepoint = length_prefixed[1];
for (ucs4_t i = 0; i < utf32_len; i++) {
#if _LIBUNISTRING_VERSION >= 0x010200
-// libuinstring version 1.2.0 introduced uc_is_property_prepended_concatenation_mark()
-// It's not critical, but it's technically more correct to have this check:
- if (unlikely(uc_is_property_prepended_concatenation_mark(length_prefixed[1+i])))
- continue;
+ // libuinstring version 1.2.0 introduced uc_is_property_prepended_concatenation_mark()
+ // It's not critical, but it's technically more correct to have this check:
+ if (unlikely(uc_is_property_prepended_concatenation_mark(length_prefixed[1 + i]))) continue;
#endif
- synthetic_graphemes[-grapheme_id-1].main_codepoint = length_prefixed[1+i];
+ synthetic_graphemes[-grapheme_id - 1].main_codepoint = length_prefixed[1 + i];
break;
}
@@ -276,8 +275,8 @@ public int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_le
#pragma GCC diagnostic pop
#endif
-public int Text$print(FILE *stream, Text_t t)
-{
+public
+int Text$print(FILE *stream, Text_t t) {
if (t.length == 0) return 0;
switch (t.tag) {
@@ -290,14 +289,14 @@ public int Text$print(FILE *stream, Text_t t)
if (grapheme >= 0) {
uint8_t buf[8];
size_t len = sizeof(buf);
- uint8_t *u8 = u32_to_u8((ucs4_t*)&grapheme, 1, buf, &len);
+ uint8_t *u8 = u32_to_u8((ucs4_t *)&grapheme, 1, buf, &len);
if (u8 == NULL) fail("Invalid grapheme encountered: ", grapheme);
written += (int)fwrite(u8, sizeof(char), len, stream);
if (u8 != buf) free(u8);
} else {
const uint8_t *u8 = GRAPHEME_UTF8(grapheme);
assert(u8);
- written += (int)fwrite(u8, sizeof(uint8_t), strlen((char*)u8), stream);
+ written += (int)fwrite(u8, sizeof(uint8_t), strlen((char *)u8), stream);
}
}
return written;
@@ -309,14 +308,14 @@ public int Text$print(FILE *stream, Text_t t)
if (grapheme >= 0) {
uint8_t buf[8];
size_t len = sizeof(buf);
- uint8_t *u8 = u32_to_u8((ucs4_t*)&grapheme, 1, buf, &len);
+ uint8_t *u8 = u32_to_u8((ucs4_t *)&grapheme, 1, buf, &len);
if (u8 == NULL) fail("Invalid grapheme encountered: ", grapheme);
written += (int)fwrite(u8, sizeof(char), len, stream);
if (u8 != buf) free(u8);
} else {
const uint8_t *u8 = GRAPHEME_UTF8(grapheme);
assert(u8);
- written += (int)fwrite(u8, sizeof(uint8_t), strlen((char*)u8), stream);
+ written += (int)fwrite(u8, sizeof(uint8_t), strlen((char *)u8), stream);
}
}
return written;
@@ -332,16 +331,16 @@ public int Text$print(FILE *stream, Text_t t)
static const int64_t min_len_for_depth[MAX_TEXT_DEPTH] = {
// Fibonacci numbers (skipping first two)
- 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, 10946,
- 17711, 28657, 46368, 75025, 121393, 196418, 317811, 514229, 832040, 1346269, 2178309, 3524578,
- 5702887, 9227465, 14930352, 24157817, 39088169, 63245986, 102334155, 165580141, 267914296,
- 433494437, 701408733, 1134903170, 1836311903, 2971215073, 4807526976, 7778742049,
+ 1, 2, 3, 5, 8, 13, 21, 34, 55, 89,
+ 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, 10946,
+ 17711, 28657, 46368, 75025, 121393, 196418, 317811, 514229, 832040, 1346269,
+ 2178309, 3524578, 5702887, 9227465, 14930352, 24157817, 39088169, 63245986, 102334155, 165580141,
+ 267914296, 433494437, 701408733, 1134903170, 1836311903, 2971215073, 4807526976, 7778742049,
};
#define IS_BALANCED_TEXT(t) ((t).length >= min_len_for_depth[(t).depth])
-static void insert_balanced_recursive(Text_t balanced_texts[MAX_TEXT_DEPTH], Text_t text)
-{
+static void insert_balanced_recursive(Text_t balanced_texts[MAX_TEXT_DEPTH], Text_t text) {
if (text.tag == TEXT_CONCAT && (!IS_BALANCED_TEXT(text) || text.depth >= MAX_TEXT_DEPTH)) {
insert_balanced_recursive(balanced_texts, *text.left);
insert_balanced_recursive(balanced_texts, *text.right);
@@ -370,8 +369,7 @@ static void insert_balanced_recursive(Text_t balanced_texts[MAX_TEXT_DEPTH], Tex
balanced_texts[i] = accumulator;
}
-static Text_t rebalanced(Text_t a, Text_t b)
-{
+static Text_t rebalanced(Text_t a, Text_t b) {
Text_t balanced_texts[MAX_TEXT_DEPTH];
memset(balanced_texts, 0, sizeof(balanced_texts));
insert_balanced_recursive(balanced_texts, a);
@@ -379,14 +377,12 @@ static Text_t rebalanced(Text_t a, Text_t b)
Text_t ret = EMPTY_TEXT;
for (int i = 0; ret.length < a.length + b.length; i++) {
- if (balanced_texts[i].length)
- ret = simple_concatenation(balanced_texts[i], ret);
+ if (balanced_texts[i].length) ret = simple_concatenation(balanced_texts[i], ret);
}
return ret;
}
-Text_t simple_concatenation(Text_t a, Text_t b)
-{
+Text_t simple_concatenation(Text_t a, Text_t b) {
if (a.length == 0) return b;
if (b.length == 0) return a;
@@ -395,53 +391,53 @@ Text_t simple_concatenation(Text_t a, Text_t b)
// every concatenation to yield a balanced text, since many concatenations
// are ephemeral (e.g. doing a loop repeatedly concatenating without using
// the intermediary values).
- if (new_depth >= MAX_TEXT_DEPTH)
- return rebalanced(a, b);
+ if (new_depth >= MAX_TEXT_DEPTH) return rebalanced(a, b);
Text_t *children = GC_MALLOC(sizeof(Text_t[2]));
children[0] = a;
children[1] = b;
return (Text_t){
- .tag=TEXT_CONCAT,
- .length=a.length + b.length,
- .depth=new_depth,
- .left=&children[0],
- .right=&children[1],
+ .tag = TEXT_CONCAT,
+ .length = a.length + b.length,
+ .depth = new_depth,
+ .left = &children[0],
+ .right = &children[1],
};
}
-static Text_t concat2_assuming_safe(Text_t a, Text_t b)
-{
+static Text_t concat2_assuming_safe(Text_t a, Text_t b) {
if (a.length == 0) return b;
if (b.length == 0) return a;
if (a.tag == TEXT_ASCII && b.tag == TEXT_ASCII && (size_t)(a.length + b.length) <= SHORT_ASCII_LENGTH) {
struct Text_s ret = {
- .tag=TEXT_ASCII,
- .length=a.length + b.length,
+ .tag = TEXT_ASCII,
+ .length = a.length + b.length,
};
ret.ascii = GC_MALLOC_ATOMIC(sizeof(char[ret.length]));
- memcpy((char*)ret.ascii, a.ascii, sizeof(char[a.length]));
- memcpy((char*)&ret.ascii[a.length], b.ascii, sizeof(char[b.length]));
+ memcpy((char *)ret.ascii, a.ascii, sizeof(char[a.length]));
+ memcpy((char *)&ret.ascii[a.length], b.ascii, sizeof(char[b.length]));
return ret;
- } else if (a.tag == TEXT_GRAPHEMES && b.tag == TEXT_GRAPHEMES && (size_t)(a.length + b.length) <= SHORT_GRAPHEMES_LENGTH) {
+ } else if (a.tag == TEXT_GRAPHEMES && b.tag == TEXT_GRAPHEMES
+ && (size_t)(a.length + b.length) <= SHORT_GRAPHEMES_LENGTH) {
struct Text_s ret = {
- .tag=TEXT_GRAPHEMES,
- .length=a.length + b.length,
+ .tag = TEXT_GRAPHEMES,
+ .length = a.length + b.length,
};
ret.graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[ret.length]));
- memcpy((int32_t*)ret.graphemes, a.graphemes, sizeof(int32_t[a.length]));
- memcpy((int32_t*)&ret.graphemes[a.length], b.graphemes, sizeof(int32_t[b.length]));
+ memcpy((int32_t *)ret.graphemes, a.graphemes, sizeof(int32_t[a.length]));
+ memcpy((int32_t *)&ret.graphemes[a.length], b.graphemes, sizeof(int32_t[b.length]));
return ret;
- } else if (a.tag != TEXT_CONCAT && b.tag != TEXT_CONCAT && (size_t)(a.length + b.length) <= SHORT_GRAPHEMES_LENGTH) {
+ } else if (a.tag != TEXT_CONCAT && b.tag != TEXT_CONCAT
+ && (size_t)(a.length + b.length) <= SHORT_GRAPHEMES_LENGTH) {
// Turn a small bit of ASCII into graphemes if it helps make things smaller
// Text structs come with an extra 8 bytes, so allocate enough to hold the text
struct Text_s ret = {
- .tag=TEXT_GRAPHEMES,
- .length=a.length + b.length,
+ .tag = TEXT_GRAPHEMES,
+ .length = a.length + b.length,
};
ret.graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[ret.length]));
- int32_t *dest = (int32_t*)ret.graphemes;
+ int32_t *dest = (int32_t *)ret.graphemes;
if (a.tag == TEXT_GRAPHEMES) {
memcpy(dest, a.graphemes, sizeof(int32_t[a.length]));
dest += a.length;
@@ -474,12 +470,11 @@ static Text_t concat2_assuming_safe(Text_t a, Text_t b)
return simple_concatenation(a, b);
}
-static Text_t concat2(Text_t a, Text_t b)
-{
+static Text_t concat2(Text_t a, Text_t b) {
if (a.length == 0) return b;
if (b.length == 0) return a;
- int32_t last_a = Text$get_grapheme(a, a.length-1);
+ int32_t last_a = Text$get_grapheme(a, a.length - 1);
int32_t first_b = Text$get_grapheme(b, 0);
// Magic number, we know that no codepoints below here trigger instability:
@@ -509,60 +504,51 @@ static Text_t concat2(Text_t a, Text_t b)
// Do a normalization run for these two codepoints and see if it looks different.
// Normalization should not exceed 3x in the input length (but if it does, it will be
// handled gracefully)
- ucs4_t norm_buf[3*len];
- size_t norm_length = sizeof(norm_buf)/sizeof(norm_buf[0]);
+ ucs4_t norm_buf[3 * len];
+ size_t norm_length = sizeof(norm_buf) / sizeof(norm_buf[0]);
ucs4_t *normalized = u32_normalize(UNINORM_NFC, codepoints, len, norm_buf, &norm_length);
bool stable = (norm_length == len && memcmp(codepoints, normalized, sizeof(codepoints)) == 0);
if (stable) {
const void *second_grapheme = u32_grapheme_next(normalized, &normalized[norm_length]);
- if (second_grapheme == &normalized[norm_length])
- stable = false;
+ if (second_grapheme == &normalized[norm_length]) stable = false;
}
if likely (stable) {
- if (normalized != norm_buf)
- free(normalized);
+ if (normalized != norm_buf) free(normalized);
return concat2_assuming_safe(a, b);
}
- Text_t glue = Text$from_codepoints((List_t){.data=norm_buf, .length=(int64_t)norm_length, .stride=sizeof(int32_t)});
+ Text_t glue =
+ Text$from_codepoints((List_t){.data = norm_buf, .length = (int64_t)norm_length, .stride = sizeof(int32_t)});
- if (normalized != norm_buf)
- free(normalized);
+ if (normalized != norm_buf) free(normalized);
- if (a.length == 1 && b.length == 1)
- return glue;
- else if (a.length == 1)
- return concat2_assuming_safe(glue, Text$slice(b, I(2), I(b.length)));
- else if (b.length == 1)
- return concat2_assuming_safe(Text$slice(a, I(1), I(a.length-1)), glue);
+ if (a.length == 1 && b.length == 1) return glue;
+ else if (a.length == 1) return concat2_assuming_safe(glue, Text$slice(b, I(2), I(b.length)));
+ else if (b.length == 1) return concat2_assuming_safe(Text$slice(a, I(1), I(a.length - 1)), glue);
else
- return concat2_assuming_safe(
- concat2_assuming_safe(Text$slice(a, I(1), I(a.length-1)), glue),
- Text$slice(b, I(2), I(b.length)));
+ return concat2_assuming_safe(concat2_assuming_safe(Text$slice(a, I(1), I(a.length - 1)), glue),
+ Text$slice(b, I(2), I(b.length)));
}
-public Text_t Text$_concat(int n, Text_t items[n])
-{
+public
+Text_t Text$_concat(int n, Text_t items[n]) {
if (n == 0) return EMPTY_TEXT;
Text_t ret = items[0];
for (int i = 1; i < n; i++) {
- if (items[i].length > 0)
- ret = concat2(ret, items[i]);
+ if (items[i].length > 0) ret = concat2(ret, items[i]);
}
return ret;
}
-public Text_t Text$repeat(Text_t text, Int_t count)
-{
- if (text.length == 0 || Int$is_negative(count))
- return EMPTY_TEXT;
+public
+Text_t Text$repeat(Text_t text, Int_t count) {
+ if (text.length == 0 || Int$is_negative(count)) return EMPTY_TEXT;
Int_t result_len = Int$times(count, I(text.length));
- if (Int$compare_value(result_len, I(1l<<40)) > 0)
- fail("Text repeating would produce too big of an result!");
+ if (Int$compare_value(result_len, I(1l << 40)) > 0) fail("Text repeating would produce too big of an result!");
int64_t count64 = Int64$from_int(count, false);
Text_t ret = text;
@@ -571,19 +557,17 @@ public Text_t Text$repeat(Text_t text, Int_t count)
return ret;
}
-public Int_t Text$width(Text_t text, Text_t language)
-{
- int width = u8_strwidth((const uint8_t*)Text$as_c_string(text), Text$as_c_string(language));
+public
+Int_t Text$width(Text_t text, Text_t language) {
+ int width = u8_strwidth((const uint8_t *)Text$as_c_string(text), Text$as_c_string(language));
return Int$from_int32(width);
}
-static Text_t Text$repeat_to_width(Text_t to_repeat, int64_t target_width, Text_t language)
-{
- if (target_width <= 0)
- return EMPTY_TEXT;
+static Text_t Text$repeat_to_width(Text_t to_repeat, int64_t target_width, Text_t language) {
+ if (target_width <= 0) return EMPTY_TEXT;
const char *lang_str = Text$as_c_string(language);
- int64_t width = (int64_t)u8_strwidth((const uint8_t*)Text$as_c_string(to_repeat), lang_str);
+ int64_t width = (int64_t)u8_strwidth((const uint8_t *)Text$as_c_string(to_repeat), lang_str);
Text_t repeated = EMPTY_TEXT;
int64_t repeated_width = 0;
while (repeated_width + width <= target_width) {
@@ -593,8 +577,8 @@ static Text_t Text$repeat_to_width(Text_t to_repeat, int64_t target_width, Text_
if (repeated_width < target_width) {
for (int64_t i = 0; repeated_width < target_width && i < to_repeat.length; i++) {
- Text_t c = Text$slice(to_repeat, I_small(i+1), I_small(i+1));
- int64_t w = (int64_t)u8_strwidth((const uint8_t*)Text$as_c_string(c), lang_str);
+ Text_t c = Text$slice(to_repeat, I_small(i + 1), I_small(i + 1));
+ int64_t w = (int64_t)u8_strwidth((const uint8_t *)Text$as_c_string(c), lang_str);
if (repeated_width + w > target_width) {
repeated = concat2(repeated, Text$repeat(Text(" "), I(target_width - repeated_width)));
repeated_width = target_width;
@@ -608,35 +592,33 @@ static Text_t Text$repeat_to_width(Text_t to_repeat, int64_t target_width, Text_
return repeated;
}
-public Text_t Text$left_pad(Text_t text, Int_t width, Text_t padding, Text_t language)
-{
- if (padding.length == 0)
- fail("Cannot pad with an empty text!");
+public
+Text_t Text$left_pad(Text_t text, Int_t width, Text_t padding, Text_t language) {
+ if (padding.length == 0) fail("Cannot pad with an empty text!");
int64_t needed = Int64$from_int(width, false) - Int64$from_int(Text$width(text, language), false);
return concat2(Text$repeat_to_width(padding, needed, language), text);
}
-public Text_t Text$right_pad(Text_t text, Int_t width, Text_t padding, Text_t language)
-{
- if (padding.length == 0)
- fail("Cannot pad with an empty text!");
+public
+Text_t Text$right_pad(Text_t text, Int_t width, Text_t padding, Text_t language) {
+ if (padding.length == 0) fail("Cannot pad with an empty text!");
int64_t needed = Int64$from_int(width, false) - Int64$from_int(Text$width(text, language), false);
return concat2(text, Text$repeat_to_width(padding, needed, language));
}
-public Text_t Text$middle_pad(Text_t text, Int_t width, Text_t padding, Text_t language)
-{
- if (padding.length == 0)
- fail("Cannot pad with an empty text!");
+public
+Text_t Text$middle_pad(Text_t text, Int_t width, Text_t padding, Text_t language) {
+ if (padding.length == 0) fail("Cannot pad with an empty text!");
int64_t needed = Int64$from_int(width, false) - Int64$from_int(Text$width(text, language), false);
- return Texts(Text$repeat_to_width(padding, needed/2, language), text, Text$repeat_to_width(padding, (needed+1)/2, language));
+ return Texts(Text$repeat_to_width(padding, needed / 2, language), text,
+ Text$repeat_to_width(padding, (needed + 1) / 2, language));
}
-public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int)
-{
+public
+Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int) {
int64_t first = Int64$from_int(first_int, false);
int64_t last = Int64$from_int(last_int, false);
if (first == 0) fail("Invalid index: 0");
@@ -647,11 +629,9 @@ public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int)
if (last > text.length) last = text.length;
- if (first > text.length || last < first)
- return EMPTY_TEXT;
+ if (first > text.length || last < first) return EMPTY_TEXT;
- if (first == 1 && last == text.length)
- return text;
+ if (first == 1 && last == text.length) return text;
while (text.tag == TEXT_CONCAT) {
if (last < text.left->length) {
@@ -662,31 +642,31 @@ public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int)
text = *text.right;
} else {
return concat2_assuming_safe(Text$slice(*text.left, I(first), I(text.length)),
- Text$slice(*text.right, I(1), I(last-text.left->length)));
+ Text$slice(*text.right, I(1), I(last - text.left->length)));
}
}
switch (text.tag) {
case TEXT_ASCII: {
return (Text_t){
- .tag=TEXT_ASCII,
- .length=last - first + 1,
- .ascii=text.ascii + (first-1),
+ .tag = TEXT_ASCII,
+ .length = last - first + 1,
+ .ascii = text.ascii + (first - 1),
};
}
case TEXT_GRAPHEMES: {
return (Text_t){
- .tag=TEXT_GRAPHEMES,
- .length=last - first + 1,
- .graphemes=text.graphemes + (first-1),
+ .tag = TEXT_GRAPHEMES,
+ .length = last - first + 1,
+ .graphemes = text.graphemes + (first - 1),
};
}
case TEXT_BLOB: {
Text_t ret = (Text_t){
- .tag=TEXT_BLOB,
- .length=last - first + 1,
- .blob.map=text.blob.map,
- .blob.bytes=text.blob.bytes + (first-1),
+ .tag = TEXT_BLOB,
+ .length = last - first + 1,
+ .blob.map = text.blob.map,
+ .blob.bytes = text.blob.bytes + (first - 1),
};
return ret;
}
@@ -695,48 +675,44 @@ public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int)
return EMPTY_TEXT;
}
-public Text_t Text$from(Text_t text, Int_t first)
-{
- return Text$slice(text, first, I_small(-1));
-}
+public
+Text_t Text$from(Text_t text, Int_t first) { return Text$slice(text, first, I_small(-1)); }
-public Text_t Text$to(Text_t text, Int_t last)
-{
- return Text$slice(text, I_small(1), last);
-}
+public
+Text_t Text$to(Text_t text, Int_t last) { return Text$slice(text, I_small(1), last); }
-public Text_t Text$reversed(Text_t text)
-{
+public
+Text_t Text$reversed(Text_t text) {
switch (text.tag) {
case TEXT_ASCII: {
struct Text_s ret = {
- .tag=TEXT_ASCII,
- .length=text.length,
+ .tag = TEXT_ASCII,
+ .length = text.length,
};
ret.ascii = GC_MALLOC_ATOMIC(sizeof(char[ret.length]));
for (int64_t i = 0; i < text.length; i++)
- ((char*)ret.ascii)[text.length-1-i] = text.ascii[i];
+ ((char *)ret.ascii)[text.length - 1 - i] = text.ascii[i];
return ret;
}
case TEXT_GRAPHEMES: {
struct Text_s ret = {
- .tag=TEXT_GRAPHEMES,
- .length=text.length,
+ .tag = TEXT_GRAPHEMES,
+ .length = text.length,
};
ret.graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[ret.length]));
for (int64_t i = 0; i < text.length; i++)
- ((int32_t*)ret.graphemes)[text.length-1-i] = text.graphemes[i];
+ ((int32_t *)ret.graphemes)[text.length - 1 - i] = text.graphemes[i];
return ret;
}
case TEXT_BLOB: {
struct Text_s ret = {
- .tag=TEXT_BLOB,
- .length=text.length,
- .blob.map=text.blob.map,
+ .tag = TEXT_BLOB,
+ .length = text.length,
+ .blob.map = text.blob.map,
};
ret.blob.bytes = GC_MALLOC_ATOMIC(sizeof(uint8_t[ret.length]));
for (int64_t i = 0; i < text.length; i++)
- ((uint8_t*)ret.blob.bytes)[text.length-1-i] = text.graphemes[i];
+ ((uint8_t *)ret.blob.bytes)[text.length - 1 - i] = text.graphemes[i];
return ret;
}
case TEXT_CONCAT: {
@@ -747,32 +723,30 @@ public Text_t Text$reversed(Text_t text)
return EMPTY_TEXT;
}
-public PUREFUNC Text_t Text$cluster(Text_t text, Int_t index)
-{
- return Text$slice(text, index, index);
-}
+public
+PUREFUNC Text_t Text$cluster(Text_t text, Int_t index) { return Text$slice(text, index, index); }
-static Text_t Text$from_components(List_t graphemes, Table_t unique_clusters)
-{
- size_t blob_size = (
- sizeof(int32_t[unique_clusters.entries.length])
- + sizeof(uint8_t[graphemes.length]));
+static Text_t Text$from_components(List_t graphemes, Table_t unique_clusters) {
+ size_t blob_size = (sizeof(int32_t[unique_clusters.entries.length]) + sizeof(uint8_t[graphemes.length]));
// If blob optimization will save at least 200 bytes:
if (unique_clusters.entries.length <= 256 && blob_size + 200 < sizeof(int32_t[graphemes.length])) {
Text_t ret = {
- .tag=TEXT_BLOB,
- .length=graphemes.length,
- .depth=0,
+ .tag = TEXT_BLOB,
+ .length = graphemes.length,
+ .depth = 0,
};
void *blob = GC_MALLOC_ATOMIC(blob_size);
int32_t *map = blob;
uint8_t *bytes = blob + sizeof(int32_t[unique_clusters.entries.length]);
for (int64_t i = 0; i < unique_clusters.entries.length; i++) {
- struct { int32_t g; uint8_t b; } *entry = unique_clusters.entries.data + i*unique_clusters.entries.stride;
+ struct {
+ int32_t g;
+ uint8_t b;
+ } *entry = unique_clusters.entries.data + i * unique_clusters.entries.stride;
map[entry->b] = entry->g;
}
for (int64_t i = 0; i < graphemes.length; i++) {
- int32_t g = *(int32_t*)(graphemes.data + i*graphemes.stride);
+ int32_t g = *(int32_t *)(graphemes.data + i * graphemes.stride);
uint8_t *byte = Table$get(unique_clusters, &g, Table$info(&Int32$info, &Byte$info));
assert(byte);
bytes[i] = *byte;
@@ -782,15 +756,15 @@ static Text_t Text$from_components(List_t graphemes, Table_t unique_clusters)
return ret;
} else {
return (Text_t){
- .tag=TEXT_GRAPHEMES,
- .length=graphemes.length,
- .graphemes=graphemes.data,
+ .tag = TEXT_GRAPHEMES,
+ .length = graphemes.length,
+ .graphemes = graphemes.data,
};
}
}
-public OptionalText_t Text$from_strn(const char *str, size_t len)
-{
+public
+OptionalText_t Text$from_strn(const char *str, size_t len) {
int64_t ascii_span = 0;
for (size_t i = 0; i < len && isascii(str[i]); i++)
ascii_span++;
@@ -799,52 +773,48 @@ public OptionalText_t Text$from_strn(const char *str, size_t len)
char *copy = GC_MALLOC_ATOMIC(len);
memcpy(copy, str, len);
return (Text_t){
- .tag=TEXT_ASCII,
- .length=ascii_span,
- .ascii=copy,
+ .tag = TEXT_ASCII,
+ .length = ascii_span,
+ .ascii = copy,
};
}
- if (u8_check((uint8_t*)str, len) != NULL)
- return NONE_TEXT;
+ if (u8_check((uint8_t *)str, len) != NULL) return NONE_TEXT;
List_t graphemes = {};
Table_t unique_clusters = {};
- const uint8_t *pos = (const uint8_t*)str;
- const uint8_t *end = (const uint8_t*)&str[len];
+ const uint8_t *pos = (const uint8_t *)str;
+ const uint8_t *end = (const uint8_t *)&str[len];
// Iterate over grapheme clusters
- for (const uint8_t *next; (next=u8_grapheme_next(pos, end)); pos = next) {
+ for (const uint8_t *next; (next = u8_grapheme_next(pos, end)); pos = next) {
uint32_t buf[256];
- size_t u32_len = sizeof(buf)/sizeof(buf[0]);
- uint32_t *u32s = u8_to_u32(pos, (size_t)(next-pos), buf, &u32_len);
+ size_t u32_len = sizeof(buf) / sizeof(buf[0]);
+ uint32_t *u32s = u8_to_u32(pos, (size_t)(next - pos), buf, &u32_len);
uint32_t buf2[256];
- size_t u32_normlen = sizeof(buf2)/sizeof(buf2[0]);
+ size_t u32_normlen = sizeof(buf2) / sizeof(buf2[0]);
uint32_t *u32s_normalized = u32_normalize(UNINORM_NFC, u32s, u32_len, buf2, &u32_normlen);
int32_t g = get_synthetic_grapheme(u32s_normalized, (int64_t)u32_normlen);
List$insert(&graphemes, &g, I(0), sizeof(int32_t));
- Table$get_or_setdefault(&unique_clusters, int32_t, uint8_t, g, (uint8_t)unique_clusters.entries.length, Table$info(&Int32$info, &Byte$info));
+ Table$get_or_setdefault(&unique_clusters, int32_t, uint8_t, g, (uint8_t)unique_clusters.entries.length,
+ Table$info(&Int32$info, &Byte$info));
if (u32s != buf) free(u32s);
if (u32s_normalized != buf2) free(u32s_normalized);
if (unique_clusters.entries.length >= 256) {
- return concat2_assuming_safe(
- Text$from_components(graphemes, unique_clusters),
- Text$from_strn((const char*)next, (size_t)(end-next)));
+ return concat2_assuming_safe(Text$from_components(graphemes, unique_clusters),
+ Text$from_strn((const char *)next, (size_t)(end - next)));
}
}
return Text$from_components(graphemes, unique_clusters);
}
-public OptionalText_t Text$from_str(const char *str)
-{
- return str ? Text$from_strn(str, strlen(str)) : Text("");
-}
+public
+OptionalText_t Text$from_str(const char *str) { return str ? Text$from_strn(str, strlen(str)) : Text(""); }
-static void u8_buf_append(Text_t text, char **buf, int64_t *capacity, int64_t *i)
-{
+static void u8_buf_append(Text_t text, char **buf, int64_t *capacity, int64_t *i) {
switch (text.tag) {
case TEXT_ASCII: {
if (*i + text.length > (int64_t)*capacity) {
@@ -863,7 +833,7 @@ static void u8_buf_append(Text_t text, char **buf, int64_t *capacity, int64_t *i
if (graphemes[g] >= 0) {
uint8_t u8_buf[64];
size_t u8_len = sizeof(u8_buf);
- uint8_t *u8 = u32_to_u8((ucs4_t*)&graphemes[g], 1, u8_buf, &u8_len);
+ uint8_t *u8 = u32_to_u8((ucs4_t *)&graphemes[g], 1, u8_buf, &u8_len);
if (u8 == NULL) fail("Invalid grapheme encountered: ", graphemes[g]);
if (*i + (int64_t)u8_len > (int64_t)*capacity) {
@@ -894,7 +864,7 @@ static void u8_buf_append(Text_t text, char **buf, int64_t *capacity, int64_t *i
if (grapheme >= 0) {
uint8_t u8_buf[64];
size_t u8_len = sizeof(u8_buf);
- uint8_t *u8 = u32_to_u8((ucs4_t*)&grapheme, 1, u8_buf, &u8_len);
+ uint8_t *u8 = u32_to_u8((ucs4_t *)&grapheme, 1, u8_buf, &u8_len);
if (u8 == NULL) fail("Invalid grapheme encountered: ", grapheme);
if (*i + (int64_t)u8_len > (int64_t)*capacity) {
@@ -928,8 +898,8 @@ static void u8_buf_append(Text_t text, char **buf, int64_t *capacity, int64_t *i
}
}
-public char *Text$as_c_string(Text_t text)
-{
+public
+char *Text$as_c_string(Text_t text) {
int64_t capacity = text.length + 1;
char *buf = GC_MALLOC_ATOMIC((size_t)capacity);
int64_t i = 0;
@@ -943,10 +913,9 @@ public char *Text$as_c_string(Text_t text)
return buf;
}
-PUREFUNC public uint64_t Text$hash(const void *obj, const TypeInfo_t *info)
-{
+PUREFUNC public uint64_t Text$hash(const void *obj, const TypeInfo_t *info) {
(void)info;
- Text_t text = *(Text_t*)obj;
+ Text_t text = *(Text_t *)obj;
siphash sh;
siphashinit(&sh, sizeof(int32_t[text.length]));
@@ -959,40 +928,41 @@ PUREFUNC public uint64_t Text$hash(const void *obj, const TypeInfo_t *info)
const char *bytes = text.ascii;
for (int64_t i = 0; i + 1 < text.length; i += 2) {
tmp.chunks[0] = (int32_t)bytes[i];
- tmp.chunks[1] = (int32_t)bytes[i+1];
+ tmp.chunks[1] = (int32_t)bytes[i + 1];
siphashadd64bits(&sh, tmp.whole);
}
- int32_t last = text.length & 0x1 ? (int32_t)bytes[text.length-1] : 0; // Odd number of graphemes
+ int32_t last = text.length & 0x1 ? (int32_t)bytes[text.length - 1] : 0; // Odd number of graphemes
return siphashfinish_last_part(&sh, (uint64_t)last);
}
case TEXT_GRAPHEMES: {
const int32_t *graphemes = text.graphemes;
for (int64_t i = 0; i + 1 < text.length; i += 2) {
tmp.chunks[0] = graphemes[i];
- tmp.chunks[1] = graphemes[i+1];
+ tmp.chunks[1] = graphemes[i + 1];
siphashadd64bits(&sh, tmp.whole);
}
- int32_t last = text.length & 0x1 ? graphemes[text.length-1] : 0; // Odd number of graphemes
+ int32_t last = text.length & 0x1 ? graphemes[text.length - 1] : 0; // Odd number of graphemes
return siphashfinish_last_part(&sh, (uint64_t)last);
}
case TEXT_BLOB: {
for (int64_t i = 0; i + 1 < text.length; i += 2) {
tmp.chunks[0] = text.blob.map[text.blob.bytes[i]];
- tmp.chunks[1] = text.blob.map[text.blob.bytes[i+1]];
+ tmp.chunks[1] = text.blob.map[text.blob.bytes[i + 1]];
siphashadd64bits(&sh, tmp.whole);
}
- int32_t last = text.length & 0x1 ? text.blob.map[text.blob.bytes[text.length-1]] : 0; // Odd number of graphemes
+ int32_t last =
+ text.length & 0x1 ? text.blob.map[text.blob.bytes[text.length - 1]] : 0; // Odd number of graphemes
return siphashfinish_last_part(&sh, (uint64_t)last);
}
case TEXT_CONCAT: {
TextIter_t state = NEW_TEXT_ITER_STATE(text);
for (int64_t i = 0; i + 1 < text.length; i += 2) {
tmp.chunks[0] = Text$get_grapheme_fast(&state, i);
- tmp.chunks[1] = Text$get_grapheme_fast(&state, i+1);
+ tmp.chunks[1] = Text$get_grapheme_fast(&state, i + 1);
siphashadd64bits(&sh, tmp.whole);
}
- int32_t last = (text.length & 0x1) ? Text$get_grapheme_fast(&state, text.length-1) : 0;
+ int32_t last = (text.length & 0x1) ? Text$get_grapheme_fast(&state, text.length - 1) : 0;
return siphashfinish_last_part(&sh, (uint64_t)last);
}
default: errx(1, "Invalid text");
@@ -1000,8 +970,8 @@ PUREFUNC public uint64_t Text$hash(const void *obj, const TypeInfo_t *info)
return 0;
}
-public int32_t Text$get_grapheme_fast(TextIter_t *state, int64_t index)
-{
+public
+int32_t Text$get_grapheme_fast(TextIter_t *state, int64_t index) {
if (index < 0) return 0;
if (index >= state->stack[0].text.length) return 0;
@@ -1051,18 +1021,17 @@ public int32_t Text$get_grapheme_fast(TextIter_t *state, int64_t index)
return 0;
}
-public uint32_t Text$get_main_grapheme_fast(TextIter_t *state, int64_t index)
-{
+public
+uint32_t Text$get_main_grapheme_fast(TextIter_t *state, int64_t index) {
int32_t g = Text$get_grapheme_fast(state, index);
return (g) >= 0 ? (ucs4_t)(g) : synthetic_graphemes[-(g)-1].main_codepoint;
}
-PUREFUNC public int32_t Text$compare(const void *va, const void *vb, const TypeInfo_t *info)
-{
+PUREFUNC public int32_t Text$compare(const void *va, const void *vb, const TypeInfo_t *info) {
(void)info;
if (va == vb) return 0;
- const Text_t a = *(const Text_t*)va;
- const Text_t b = *(const Text_t*)vb;
+ const Text_t a = *(const Text_t *)va;
+ const Text_t b = *(const Text_t *)vb;
// TODO: make this smarter and more efficient
int64_t len = MAX(a.length, b.length);
@@ -1073,31 +1042,21 @@ PUREFUNC public int32_t Text$compare(const void *va, const void *vb, const TypeI
if (ai == bi) continue;
int32_t cmp;
if (ai > 0 && bi > 0) {
- cmp = u32_cmp((ucs4_t*)&ai, (ucs4_t*)&bi, 1);
+ cmp = u32_cmp((ucs4_t *)&ai, (ucs4_t *)&bi, 1);
} else if (ai > 0) {
- cmp = u32_cmp2(
- (ucs4_t*)&ai, 1,
- GRAPHEME_CODEPOINTS(bi),
- NUM_GRAPHEME_CODEPOINTS(bi));
+ cmp = u32_cmp2((ucs4_t *)&ai, 1, GRAPHEME_CODEPOINTS(bi), NUM_GRAPHEME_CODEPOINTS(bi));
} else if (bi > 0) {
- cmp = u32_cmp2(
- GRAPHEME_CODEPOINTS(ai),
- NUM_GRAPHEME_CODEPOINTS(ai),
- (ucs4_t*)&bi, 1);
+ cmp = u32_cmp2(GRAPHEME_CODEPOINTS(ai), NUM_GRAPHEME_CODEPOINTS(ai), (ucs4_t *)&bi, 1);
} else {
- cmp = u32_cmp2(
- GRAPHEME_CODEPOINTS(ai),
- NUM_GRAPHEME_CODEPOINTS(ai),
- GRAPHEME_CODEPOINTS(bi),
- NUM_GRAPHEME_CODEPOINTS(bi));
+ cmp = u32_cmp2(GRAPHEME_CODEPOINTS(ai), NUM_GRAPHEME_CODEPOINTS(ai), GRAPHEME_CODEPOINTS(bi),
+ NUM_GRAPHEME_CODEPOINTS(bi));
}
if (cmp != 0) return cmp;
}
return 0;
}
-bool _matches(TextIter_t *text_state, TextIter_t *target_state, int64_t pos)
-{
+bool _matches(TextIter_t *text_state, TextIter_t *target_state, int64_t pos) {
for (int64_t i = 0; i < target_state->stack[0].text.length; i++) {
int32_t text_i = Text$get_grapheme_fast(text_state, pos + i);
int32_t prefix_i = Text$get_grapheme_fast(target_state, i);
@@ -1106,10 +1065,8 @@ bool _matches(TextIter_t *text_state, TextIter_t *target_state, int64_t pos)
return true;
}
-PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix, Text_t *remainder)
-{
- if (text.length < prefix.length)
- return false;
+PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix, Text_t *remainder) {
+ if (text.length < prefix.length) return false;
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), prefix_state = NEW_TEXT_ITER_STATE(prefix);
if (_matches(&text_state, &prefix_state, 0)) {
if (remainder) *remainder = Text$from(text, Int$from_int64(prefix.length + 1));
@@ -1120,10 +1077,8 @@ PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix, Text_t *remain
}
}
-PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix, Text_t *remainder)
-{
- if (text.length < suffix.length)
- return false;
+PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix, Text_t *remainder) {
+ if (text.length < suffix.length) return false;
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), suffix_state = NEW_TEXT_ITER_STATE(suffix);
if (_matches(&text_state, &suffix_state, text.length - suffix.length)) {
if (remainder) *remainder = Text$to(text, Int$from_int64(text.length - suffix.length));
@@ -1134,18 +1089,17 @@ PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix, Text_t *remainde
}
}
-public Text_t Text$without_prefix(Text_t text, Text_t prefix)
-{
+public
+Text_t Text$without_prefix(Text_t text, Text_t prefix) {
return Text$starts_with(text, prefix, NULL) ? Text$slice(text, I(prefix.length + 1), I(text.length)) : text;
}
-public Text_t Text$without_suffix(Text_t text, Text_t suffix)
-{
+public
+Text_t Text$without_suffix(Text_t text, Text_t suffix) {
return Text$ends_with(text, suffix, NULL) ? Text$slice(text, I(1), I(text.length - suffix.length)) : text;
}
-static bool _has_grapheme(TextIter_t *text, int32_t g)
-{
+static bool _has_grapheme(TextIter_t *text, int32_t g) {
for (int64_t t = 0; t < text->stack[0].text.length; t++) {
if (g == Text$get_grapheme_fast(text, t)) {
return true;
@@ -1154,8 +1108,8 @@ static bool _has_grapheme(TextIter_t *text, int32_t g)
return false;
}
-public Text_t Text$trim(Text_t text, Text_t to_trim, bool left, bool right)
-{
+public
+Text_t Text$trim(Text_t text, Text_t to_trim, bool left, bool right) {
int64_t first = 0;
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), trim_state = NEW_TEXT_ITER_STATE(to_trim);
if (left) {
@@ -1163,28 +1117,29 @@ public Text_t Text$trim(Text_t text, Text_t to_trim, bool left, bool right)
first += 1;
}
}
- int64_t last = text.length-1;
+ int64_t last = text.length - 1;
if (right) {
while (last >= first && _has_grapheme(&trim_state, Text$get_grapheme_fast(&text_state, last))) {
last -= 1;
}
}
- return (first != 0 || last != text.length-1) ? Text$slice(text, I(first+1), I(last+1)) : text;
+ return (first != 0 || last != text.length - 1) ? Text$slice(text, I(first + 1), I(last + 1)) : text;
}
-public Text_t Text$translate(Text_t text, Table_t translations)
-{
+public
+Text_t Text$translate(Text_t text, Table_t translations) {
TextIter_t text_state = NEW_TEXT_ITER_STATE(text);
Text_t result = EMPTY_TEXT;
int64_t span_start = 0;
List_t replacement_list = translations.entries;
- for (int64_t i = 0; i < text.length; ) {
+ for (int64_t i = 0; i < text.length;) {
for (int64_t r = 0; r < replacement_list.length; r++) {
- struct { Text_t target, replacement; } *entry = replacement_list.data + r*replacement_list.stride;
+ struct {
+ Text_t target, replacement;
+ } *entry = replacement_list.data + r * replacement_list.stride;
TextIter_t target_state = NEW_TEXT_ITER_STATE(entry->target);
if (_matches(&text_state, &target_state, i)) {
- if (i > span_start)
- result = concat2(result, Text$slice(text, I(span_start+1), I(i)));
+ if (i > span_start) result = concat2(result, Text$slice(text, I(span_start + 1), I(i)));
result = concat2(result, entry->replacement);
i += entry->target.length;
@@ -1193,22 +1148,21 @@ public Text_t Text$translate(Text_t text, Table_t translations)
}
}
i += 1;
- found_match: continue;
+ found_match:
+ continue;
}
- if (span_start < text.length)
- result = concat2(result, Text$slice(text, I(span_start+1), I(text.length)));
+ if (span_start < text.length) result = concat2(result, Text$slice(text, I(span_start + 1), I(text.length)));
return result;
}
-public Text_t Text$replace(Text_t text, Text_t target, Text_t replacement)
-{
+public
+Text_t Text$replace(Text_t text, Text_t target, Text_t replacement) {
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), target_state = NEW_TEXT_ITER_STATE(target);
Text_t result = EMPTY_TEXT;
int64_t span_start = 0;
- for (int64_t i = 0; i < text.length; ) {
+ for (int64_t i = 0; i < text.length;) {
if (_matches(&text_state, &target_state, i)) {
- if (i > span_start)
- result = concat2(result, Text$slice(text, I(span_start+1), I(i)));
+ if (i > span_start) result = concat2(result, Text$slice(text, I(span_start + 1), I(i)));
result = concat2(result, replacement);
i += target.length;
@@ -1217,34 +1171,31 @@ public Text_t Text$replace(Text_t text, Text_t target, Text_t replacement)
i += 1;
}
}
- if (span_start < text.length)
- result = concat2(result, Text$slice(text, I(span_start+1), I(text.length)));
+ if (span_start < text.length) result = concat2(result, Text$slice(text, I(span_start + 1), I(text.length)));
return result;
}
-public PUREFUNC bool Text$has(Text_t text, Text_t target)
-{
+public
+PUREFUNC bool Text$has(Text_t text, Text_t target) {
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), target_state = NEW_TEXT_ITER_STATE(target);
for (int64_t i = 0; i < text.length; i++) {
- if (_matches(&text_state, &target_state, i))
- return true;
+ if (_matches(&text_state, &target_state, i)) return true;
}
return false;
}
-public List_t Text$split(Text_t text, Text_t delimiters)
-{
- if (delimiters.length == 0)
- return Text$clusters(text);
+public
+List_t Text$split(Text_t text, Text_t delimiters) {
+ if (delimiters.length == 0) return Text$clusters(text);
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), delim_state = NEW_TEXT_ITER_STATE(delimiters);
List_t splits = {};
- for (int64_t i = 0; i < text.length; ) {
+ for (int64_t i = 0; i < text.length;) {
int64_t span_len = 0;
while (i + span_len < text.length && !_matches(&text_state, &delim_state, i + span_len)) {
span_len += 1;
}
- Text_t slice = Text$slice(text, I(i+1), I(i+span_len));
+ Text_t slice = Text$slice(text, I(i + 1), I(i + span_len));
List$insert(&splits, &slice, I(0), sizeof(slice));
i += span_len + delimiters.length;
if (i == text.length) {
@@ -1255,20 +1206,20 @@ public List_t Text$split(Text_t text, Text_t delimiters)
return splits;
}
-public List_t Text$split_any(Text_t text, Text_t delimiters)
-{
- if (delimiters.length == 0)
- return List(text);
+public
+List_t Text$split_any(Text_t text, Text_t delimiters) {
+ if (delimiters.length == 0) return List(text);
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), delim_state = NEW_TEXT_ITER_STATE(delimiters);
List_t splits = {};
- for (int64_t i = 0; i < text.length; ) {
+ for (int64_t i = 0; i < text.length;) {
int64_t span_len = 0;
- while (i + span_len < text.length && !_has_grapheme(&delim_state, Text$get_grapheme_fast(&text_state, i + span_len))) {
+ while (i + span_len < text.length
+ && !_has_grapheme(&delim_state, Text$get_grapheme_fast(&text_state, i + span_len))) {
span_len += 1;
}
bool trailing_delim = i + span_len < text.length;
- Text_t slice = Text$slice(text, I(i+1), I(i+span_len));
+ Text_t slice = Text$slice(text, I(i + 1), I(i + span_len));
List$insert(&splits, &slice, I(0), sizeof(slice));
i += span_len + 1;
while (i < text.length && _has_grapheme(&delim_state, Text$get_grapheme_fast(&text_state, i))) {
@@ -1288,8 +1239,7 @@ typedef struct {
Text_t delimiter;
} split_iter_state_t;
-static OptionalText_t next_split(split_iter_state_t *state)
-{
+static OptionalText_t next_split(split_iter_state_t *state) {
Text_t text = state->state.stack[0].text;
if (state->i >= text.length) {
if (state->delimiter.length > 0 && state->i == text.length) { // special case
@@ -1310,21 +1260,20 @@ static OptionalText_t next_split(split_iter_state_t *state)
while (i + span_len < text.length && !_matches(&state->state, &delim_state, i + span_len)) {
span_len += 1;
}
- Text_t slice = Text$slice(text, I(i+1), I(i+span_len));
+ Text_t slice = Text$slice(text, I(i + 1), I(i + span_len));
state->i = i + span_len + state->delimiter.length;
return slice;
}
-public Closure_t Text$by_split(Text_t text, Text_t delimiter)
-{
+public
+Closure_t Text$by_split(Text_t text, Text_t delimiter) {
return (Closure_t){
- .fn=(void*)next_split,
- .userdata=new(split_iter_state_t, .state=NEW_TEXT_ITER_STATE(text), .i=0, .delimiter=delimiter),
+ .fn = (void *)next_split,
+ .userdata = new (split_iter_state_t, .state = NEW_TEXT_ITER_STATE(text), .i = 0, .delimiter = delimiter),
};
}
-static OptionalText_t next_split_any(split_iter_state_t *state)
-{
+static OptionalText_t next_split_any(split_iter_state_t *state) {
Text_t text = state->state.stack[0].text;
if (state->i >= text.length) {
if (state->delimiter.length > 0 && state->i == text.length) { // special case
@@ -1335,7 +1284,7 @@ static OptionalText_t next_split_any(split_iter_state_t *state)
}
if (state->delimiter.length == 0) { // special case
- Text_t ret = Text$cluster(text, I(state->i+1));
+ Text_t ret = Text$cluster(text, I(state->i + 1));
state->i += 1;
return ret;
}
@@ -1343,10 +1292,11 @@ static OptionalText_t next_split_any(split_iter_state_t *state)
TextIter_t delim_state = NEW_TEXT_ITER_STATE(state->delimiter);
int64_t i = state->i;
int64_t span_len = 0;
- while (i + span_len < text.length && !_has_grapheme(&delim_state, Text$get_grapheme_fast(&state->state, i + span_len))) {
+ while (i + span_len < text.length
+ && !_has_grapheme(&delim_state, Text$get_grapheme_fast(&state->state, i + span_len))) {
span_len += 1;
}
- Text_t slice = Text$slice(text, I(i+1), I(i+span_len));
+ Text_t slice = Text$slice(text, I(i + 1), I(i + span_len));
i += span_len + 1;
while (i < text.length && _has_grapheme(&delim_state, Text$get_grapheme_fast(&state->state, i))) {
i += 1;
@@ -1355,18 +1305,16 @@ static OptionalText_t next_split_any(split_iter_state_t *state)
return slice;
}
-public Closure_t Text$by_split_any(Text_t text, Text_t delimiters)
-{
+public
+Closure_t Text$by_split_any(Text_t text, Text_t delimiters) {
return (Closure_t){
- .fn=(void*)next_split_any,
- .userdata=new(split_iter_state_t, .state=NEW_TEXT_ITER_STATE(text), .i=0, .delimiter=delimiters),
+ .fn = (void *)next_split_any,
+ .userdata = new (split_iter_state_t, .state = NEW_TEXT_ITER_STATE(text), .i = 0, .delimiter = delimiters),
};
}
-PUREFUNC public bool Text$equal_values(Text_t a, Text_t b)
-{
- if (a.length != b.length)
- return false;
+PUREFUNC public bool Text$equal_values(Text_t a, Text_t b) {
+ if (a.length != b.length) return false;
int64_t len = a.length;
TextIter_t a_state = NEW_TEXT_ITER_STATE(a), b_state = NEW_TEXT_ITER_STATE(b);
// TODO: make this smarter and more efficient
@@ -1378,17 +1326,14 @@ PUREFUNC public bool Text$equal_values(Text_t a, Text_t b)
return true;
}
-PUREFUNC public bool Text$equal(const void *a, const void *b, const TypeInfo_t *info)
-{
+PUREFUNC public bool Text$equal(const void *a, const void *b, const TypeInfo_t *info) {
(void)info;
if (a == b) return true;
- return Text$equal_values(*(Text_t*)a, *(Text_t*)b);
+ return Text$equal_values(*(Text_t *)a, *(Text_t *)b);
}
-PUREFUNC public bool Text$equal_ignoring_case(Text_t a, Text_t b, Text_t language)
-{
- if (a.length != b.length)
- return false;
+PUREFUNC public bool Text$equal_ignoring_case(Text_t a, Text_t b, Text_t language) {
+ if (a.length != b.length) return false;
int64_t len = a.length;
TextIter_t a_state = NEW_TEXT_ITER_STATE(a), b_state = NEW_TEXT_ITER_STATE(b);
const char *uc_language = Text$as_c_string(language);
@@ -1396,76 +1341,79 @@ PUREFUNC public bool Text$equal_ignoring_case(Text_t a, Text_t b, Text_t languag
int32_t ai = Text$get_grapheme_fast(&a_state, i);
int32_t bi = Text$get_grapheme_fast(&b_state, i);
if (ai != bi) {
- const ucs4_t *a_codepoints = ai >= 0 ? (ucs4_t*)&ai : GRAPHEME_CODEPOINTS(ai);
+ const ucs4_t *a_codepoints = ai >= 0 ? (ucs4_t *)&ai : GRAPHEME_CODEPOINTS(ai);
int64_t a_len = ai >= 0 ? 1 : NUM_GRAPHEME_CODEPOINTS(ai);
- const ucs4_t *b_codepoints = bi >= 0 ? (ucs4_t*)&bi : GRAPHEME_CODEPOINTS(bi);
+ const ucs4_t *b_codepoints = bi >= 0 ? (ucs4_t *)&bi : GRAPHEME_CODEPOINTS(bi);
int64_t b_len = bi >= 0 ? 1 : NUM_GRAPHEME_CODEPOINTS(bi);
int cmp = 0;
(void)u32_casecmp(a_codepoints, (size_t)a_len, b_codepoints, (size_t)b_len, uc_language, UNINORM_NFC, &cmp);
- if (cmp != 0)
- return false;
+ if (cmp != 0) return false;
}
}
return true;
}
-public Text_t Text$upper(Text_t text, Text_t language)
-{
+public
+Text_t Text$upper(Text_t text, Text_t language) {
if (text.length == 0) return text;
List_t codepoints = Text$utf32_codepoints(text);
const char *uc_language = Text$as_c_string(language);
size_t out_len = 0;
ucs4_t *upper = u32_toupper(codepoints.data, (size_t)codepoints.length, uc_language, UNINORM_NFC, NULL, &out_len);
- Text_t ret = Text$from_codepoints((List_t){.data=upper, .length=(int64_t)out_len, .stride=sizeof(int32_t)});
+ Text_t ret = Text$from_codepoints((List_t){.data = upper, .length = (int64_t)out_len, .stride = sizeof(int32_t)});
return ret;
}
-public Text_t Text$lower(Text_t text, Text_t language)
-{
+public
+Text_t Text$lower(Text_t text, Text_t language) {
if (text.length == 0) return text;
List_t codepoints = Text$utf32_codepoints(text);
const char *uc_language = Text$as_c_string(language);
size_t out_len = 0;
ucs4_t *lower = u32_tolower(codepoints.data, (size_t)codepoints.length, uc_language, UNINORM_NFC, NULL, &out_len);
- Text_t ret = Text$from_codepoints((List_t){.data=lower, .length=(int64_t)out_len, .stride=sizeof(int32_t)});
+ Text_t ret = Text$from_codepoints((List_t){.data = lower, .length = (int64_t)out_len, .stride = sizeof(int32_t)});
return ret;
}
-public Text_t Text$title(Text_t text, Text_t language)
-{
+public
+Text_t Text$title(Text_t text, Text_t language) {
if (text.length == 0) return text;
List_t codepoints = Text$utf32_codepoints(text);
const char *uc_language = Text$as_c_string(language);
size_t out_len = 0;
ucs4_t *title = u32_totitle(codepoints.data, (size_t)codepoints.length, uc_language, UNINORM_NFC, NULL, &out_len);
- Text_t ret = Text$from_codepoints((List_t){.data=title, .length=(int64_t)out_len, .stride=sizeof(int32_t)});
+ Text_t ret = Text$from_codepoints((List_t){.data = title, .length = (int64_t)out_len, .stride = sizeof(int32_t)});
return ret;
}
-public Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark)
-{
- if (quotation_mark.length != 1)
- fail("Invalid quote text: ", quotation_mark, " (must have length == 1)");
+public
+Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) {
+ if (quotation_mark.length != 1) fail("Invalid quote text: ", quotation_mark, " (must have length == 1)");
Text_t ret = colorize ? Text("\x1b[35m") : EMPTY_TEXT;
- if (!Text$equal_values(quotation_mark, Text("\"")) && !Text$equal_values(quotation_mark, Text("'")) && !Text$equal_values(quotation_mark, Text("`")))
+ if (!Text$equal_values(quotation_mark, Text("\"")) && !Text$equal_values(quotation_mark, Text("'"))
+ && !Text$equal_values(quotation_mark, Text("`")))
ret = concat2_assuming_safe(ret, Text("$"));
ret = concat2_assuming_safe(ret, quotation_mark);
int32_t quote_char = Text$get_grapheme(quotation_mark, 0);
-#define flush_unquoted() ({ \
- if (unquoted_span > 0) { \
- ret = concat2_assuming_safe(ret, Text$slice(text, I(i-unquoted_span+1), I(i))); \
- unquoted_span = 0; \
- } })
-#define add_escaped(str) ({ \
- flush_unquoted(); \
- if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[34;1m")); \
- ret = concat2_assuming_safe(ret, Text("\\" str)); \
- if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[0;35m")); })
+#define flush_unquoted() \
+ ({ \
+ if (unquoted_span > 0) { \
+ ret = concat2_assuming_safe(ret, Text$slice(text, I(i - unquoted_span + 1), I(i))); \
+ unquoted_span = 0; \
+ } \
+ })
+#define add_escaped(str) \
+ ({ \
+ flush_unquoted(); \
+ if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[34;1m")); \
+ ret = concat2_assuming_safe(ret, Text("\\" str)); \
+ if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[0;35m")); \
+ })
TextIter_t state = NEW_TEXT_ITER_STATE(text);
int64_t unquoted_span = 0;
int64_t i = 0;
@@ -1488,8 +1436,10 @@ public Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark)
add_escaped("$");
break;
}
- case '\x00' ... '\x06': case '\x0E' ... '\x1A':
- case '\x1C' ... '\x1F': case '\x7F' ... '\x7F': {
+ case '\x00' ... '\x06':
+ case '\x0E' ... '\x1A':
+ case '\x1C' ... '\x1F':
+ case '\x7F' ... '\x7F': {
flush_unquoted();
if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[34;1m"));
ret = concat2_assuming_safe(ret, Text("\\x"));
@@ -1499,8 +1449,7 @@ public Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark)
'\0',
};
ret = concat2_assuming_safe(ret, Text$from_strn(tmp, 2));
- if (colorize)
- ret = concat2_assuming_safe(ret, Text("\x1b[0;35m"));
+ if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[0;35m"));
break;
}
default: {
@@ -1522,21 +1471,19 @@ public Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark)
#undef flush_unquoted
ret = concat2_assuming_safe(ret, quotation_mark);
- if (colorize)
- ret = concat2_assuming_safe(ret, Text("\x1b[m"));
+ if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[m"));
return ret;
}
-public Text_t Text$as_text(const void *vtext, bool colorize, const TypeInfo_t *info)
-{
+public
+Text_t Text$as_text(const void *vtext, bool colorize, const TypeInfo_t *info) {
(void)info;
if (!vtext) return info && info->TextInfo.lang ? Text$from_str(info->TextInfo.lang) : Text("Text");
- Text_t text = *(Text_t*)vtext;
+ Text_t text = *(Text_t *)vtext;
// Figure out the best quotation mark to use:
- bool has_double_quote = false, has_backtick = false,
- has_single_quote = false, needs_escapes = false;
+ bool has_double_quote = false, has_backtick = false, has_single_quote = false, needs_escapes = false;
TextIter_t state = NEW_TEXT_ITER_STATE(text);
for (int64_t i = 0; i < text.length; i++) {
int32_t g = Text$get_grapheme_fast(&state, i);
@@ -1554,39 +1501,33 @@ public Text_t Text$as_text(const void *vtext, bool colorize, const TypeInfo_t *i
// needing to escape them by using single quotes, but only if we don't have
// single quotes or need to escape anything else (because single quotes
// don't have interpolation):
- if (has_double_quote && !has_single_quote)
- quote = Text("'");
+ if (has_double_quote && !has_single_quote) quote = Text("'");
// If there is a double quote, but no backtick, we can save a bit of
// escaping by using backtick instead of double quote:
- else if (has_double_quote && has_single_quote && !has_backtick && !needs_escapes)
- quote = Text("`");
+ else if (has_double_quote && has_single_quote && !has_backtick && !needs_escapes) quote = Text("`");
// Otherwise fall back to double quotes as the default quoting style:
- else
- quote = Text("\"");
+ else quote = Text("\"");
Text_t as_text = Text$quoted(text, colorize, quote);
if (info && info->TextInfo.lang && info != &Text$info)
- as_text = Text$concat(
- colorize ? Text("\x1b[1m$") : Text("$"),
- Text$from_str(info->TextInfo.lang),
- colorize ? Text("\x1b[0m") : Text(""),
- as_text);
+ as_text = Text$concat(colorize ? Text("\x1b[1m$") : Text("$"), Text$from_str(info->TextInfo.lang),
+ colorize ? Text("\x1b[0m") : Text(""), as_text);
return as_text;
}
-public Text_t Text$join(Text_t glue, List_t pieces)
-{
+public
+Text_t Text$join(Text_t glue, List_t pieces) {
if (pieces.length == 0) return EMPTY_TEXT;
- Text_t result = *(Text_t*)pieces.data;
+ Text_t result = *(Text_t *)pieces.data;
for (int64_t i = 1; i < pieces.length; i++) {
- result = Text$concat(result, glue, *(Text_t*)(pieces.data + i*pieces.stride));
+ result = Text$concat(result, glue, *(Text_t *)(pieces.data + i * pieces.stride));
}
return result;
}
-public List_t Text$clusters(Text_t text)
-{
+public
+List_t Text$clusters(Text_t text) {
List_t clusters = {};
for (int64_t i = 1; i <= text.length; i++) {
Text_t cluster = Text$slice(text, I(i), I(i));
@@ -1595,9 +1536,9 @@ public List_t Text$clusters(Text_t text)
return clusters;
}
-public List_t Text$utf32_codepoints(Text_t text)
-{
- List_t codepoints = {.atomic=1};
+public
+List_t Text$utf32_codepoints(Text_t text) {
+ List_t codepoints = {.atomic = 1};
TextIter_t state = NEW_TEXT_ITER_STATE(text);
for (int64_t i = 0; i < text.length; i++) {
int32_t grapheme = Text$get_grapheme_fast(&state, i);
@@ -1613,24 +1554,23 @@ public List_t Text$utf32_codepoints(Text_t text)
return codepoints;
}
-public List_t Text$utf8_bytes(Text_t text)
-{
+public
+List_t Text$utf8_bytes(Text_t text) {
const char *str = Text$as_c_string(text);
- return (List_t){.length=(int64_t)strlen(str), .stride=1, .atomic=1, .data=(void*)str};
+ return (List_t){.length = (int64_t)strlen(str), .stride = 1, .atomic = 1, .data = (void *)str};
}
-static INLINE const char *codepoint_name(ucs4_t c)
-{
+static INLINE const char *codepoint_name(ucs4_t c) {
char *name = GC_MALLOC_ATOMIC(UNINAME_MAX);
char *found_name = unicode_character_name(c, name);
if (found_name) return found_name;
const uc_block_t *block = uc_block(c);
assert(block);
- return String(block->name, "-", hex(c, .no_prefix=true, .uppercase=true));
+ return String(block->name, "-", hex(c, .no_prefix = true, .uppercase = true));
}
-public List_t Text$codepoint_names(Text_t text)
-{
+public
+List_t Text$codepoint_names(Text_t text) {
List_t names = {};
TextIter_t state = NEW_TEXT_ITER_STATE(text);
for (int64_t i = 0; i < text.length; i++) {
@@ -1650,81 +1590,78 @@ public List_t Text$codepoint_names(Text_t text)
return names;
}
-public Text_t Text$from_codepoints(List_t codepoints)
-{
- if (codepoints.stride != sizeof(uint32_t))
- List$compact(&codepoints, sizeof(uint32_t));
+public
+Text_t Text$from_codepoints(List_t codepoints) {
+ if (codepoints.stride != sizeof(uint32_t)) List$compact(&codepoints, sizeof(uint32_t));
List_t graphemes = {};
Table_t unique_clusters = {};
- const uint32_t *pos = (const uint32_t*)codepoints.data;
- const uint32_t *end = (const uint32_t*)&pos[codepoints.length];
+ const uint32_t *pos = (const uint32_t *)codepoints.data;
+ const uint32_t *end = (const uint32_t *)&pos[codepoints.length];
// Iterate over grapheme clusters
- for (const uint32_t *next; (next=u32_grapheme_next(pos, end)); pos = next) {
+ for (const uint32_t *next; (next = u32_grapheme_next(pos, end)); pos = next) {
// Buffer for normalized cluster:
uint32_t buf[256];
- size_t u32_normlen = sizeof(buf)/sizeof(buf[0]);
- uint32_t *u32s_normalized = u32_normalize(UNINORM_NFC, pos, (size_t)(next-pos), buf, &u32_normlen);
+ size_t u32_normlen = sizeof(buf) / sizeof(buf[0]);
+ uint32_t *u32s_normalized = u32_normalize(UNINORM_NFC, pos, (size_t)(next - pos), buf, &u32_normlen);
int32_t g = get_synthetic_grapheme(u32s_normalized, (int64_t)u32_normlen);
List$insert(&graphemes, &g, I(0), sizeof(int32_t));
- Table$get_or_setdefault(
- &unique_clusters, int32_t, uint8_t, g, (uint8_t)unique_clusters.entries.length,
- Table$info(&Int32$info, &Byte$info));
+ Table$get_or_setdefault(&unique_clusters, int32_t, uint8_t, g, (uint8_t)unique_clusters.entries.length,
+ Table$info(&Int32$info, &Byte$info));
if (u32s_normalized != buf) free(u32s_normalized);
if (unique_clusters.entries.length == 256) {
List_t remaining_codepoints = {
- .length=(int64_t)(end-next),
- .data=(void*)next,
- .stride=sizeof(int32_t),
+ .length = (int64_t)(end - next),
+ .data = (void *)next,
+ .stride = sizeof(int32_t),
};
- return concat2_assuming_safe(Text$from_components(graphemes, unique_clusters), Text$from_codepoints(remaining_codepoints));
+ return concat2_assuming_safe(Text$from_components(graphemes, unique_clusters),
+ Text$from_codepoints(remaining_codepoints));
}
}
return Text$from_components(graphemes, unique_clusters);
}
-public OptionalText_t Text$from_codepoint_names(List_t codepoint_names)
-{
+public
+OptionalText_t Text$from_codepoint_names(List_t codepoint_names) {
List_t codepoints = {};
for (int64_t i = 0; i < codepoint_names.length; i++) {
- Text_t *name = ((Text_t*)(codepoint_names.data + i*codepoint_names.stride));
+ Text_t *name = ((Text_t *)(codepoint_names.data + i * codepoint_names.stride));
const char *name_str = Text$as_c_string(*name);
ucs4_t codepoint = unicode_name_character(name_str);
- if (codepoint == UNINAME_INVALID)
- return NONE_TEXT;
+ if (codepoint == UNINAME_INVALID) return NONE_TEXT;
List$insert(&codepoints, &codepoint, I_small(0), sizeof(ucs4_t));
}
return Text$from_codepoints(codepoints);
}
-public OptionalText_t Text$from_bytes(List_t bytes)
-{
- if (bytes.stride != sizeof(int8_t))
- List$compact(&bytes, sizeof(int8_t));
+public
+OptionalText_t Text$from_bytes(List_t bytes) {
+ if (bytes.stride != sizeof(int8_t)) List$compact(&bytes, sizeof(int8_t));
return Text$from_strn(bytes.data, (size_t)bytes.length);
}
-public List_t Text$lines(Text_t text)
-{
+public
+List_t Text$lines(Text_t text) {
List_t lines = {};
TextIter_t state = NEW_TEXT_ITER_STATE(text);
for (int64_t i = 0, line_start = 0; i < text.length; i++) {
int32_t grapheme = Text$get_grapheme_fast(&state, i);
if (grapheme == '\r' && Text$get_grapheme_fast(&state, i + 1) == '\n') { // CRLF
- Text_t line = Text$slice(text, I(line_start+1), I(i));
+ Text_t line = Text$slice(text, I(line_start + 1), I(i));
List$insert(&lines, &line, I_small(0), sizeof(Text_t));
i += 1; // skip one extra for CR
line_start = i + 1;
} else if (grapheme == '\n') { // newline
- Text_t line = Text$slice(text, I(line_start+1), I(i));
+ Text_t line = Text$slice(text, I(line_start + 1), I(i));
List$insert(&lines, &line, I_small(0), sizeof(Text_t));
line_start = i + 1;
- } else if (i == text.length-1 && line_start != i) { // last line
- Text_t line = Text$slice(text, I(line_start+1), I(i+1));
+ } else if (i == text.length - 1 && line_start != i) { // last line
+ Text_t line = Text$slice(text, I(line_start + 1), I(i + 1));
List$insert(&lines, &line, I_small(0), sizeof(Text_t));
}
}
@@ -1736,21 +1673,20 @@ typedef struct {
int64_t i;
} line_iter_state_t;
-static OptionalText_t next_line(line_iter_state_t *state)
-{
+static OptionalText_t next_line(line_iter_state_t *state) {
Text_t text = state->state.stack[0].text;
for (int64_t i = state->i; i < text.length; i++) {
int32_t grapheme = Text$get_grapheme_fast(&state->state, i);
if (grapheme == '\r' && Text$get_grapheme_fast(&state->state, i + 1) == '\n') { // CRLF
- Text_t line = Text$slice(text, I(state->i+1), I(i));
+ Text_t line = Text$slice(text, I(state->i + 1), I(i));
state->i = i + 2; // skip one extra for CR
return line;
} else if (grapheme == '\n') { // newline
- Text_t line = Text$slice(text, I(state->i+1), I(i));
+ Text_t line = Text$slice(text, I(state->i + 1), I(i));
state->i = i + 1;
return line;
- } else if (i == text.length-1 && state->i != i) { // last line
- Text_t line = Text$slice(text, I(state->i+1), I(i+1));
+ } else if (i == text.length - 1 && state->i != i) { // last line
+ Text_t line = Text$slice(text, I(state->i + 1), I(i + 1));
state->i = i + 1;
return line;
}
@@ -1758,81 +1694,75 @@ static OptionalText_t next_line(line_iter_state_t *state)
return NONE_TEXT;
}
-public Closure_t Text$by_line(Text_t text)
-{
+public
+Closure_t Text$by_line(Text_t text) {
return (Closure_t){
- .fn=(void*)next_line,
- .userdata=new(line_iter_state_t, .state=NEW_TEXT_ITER_STATE(text), .i=0),
+ .fn = (void *)next_line,
+ .userdata = new (line_iter_state_t, .state = NEW_TEXT_ITER_STATE(text), .i = 0),
};
}
-PUREFUNC public bool Text$is_none(const void *t, const TypeInfo_t *info)
-{
+PUREFUNC public bool Text$is_none(const void *t, const TypeInfo_t *info) {
(void)info;
- return ((Text_t*)t)->length < 0;
+ return ((Text_t *)t)->length < 0;
}
-public Int_t Text$memory_size(Text_t text)
-{
+public
+Int_t Text$memory_size(Text_t text) {
switch (text.tag) {
- case TEXT_ASCII:
- return Int$from_int64((int64_t)sizeof(Text_t) + (int64_t)sizeof(char[text.length]));
- case TEXT_GRAPHEMES:
- return Int$from_int64((int64_t)sizeof(Text_t) + (int64_t)sizeof(int32_t[text.length]));
+ case TEXT_ASCII: return Int$from_int64((int64_t)sizeof(Text_t) + (int64_t)sizeof(char[text.length]));
+ case TEXT_GRAPHEMES: return Int$from_int64((int64_t)sizeof(Text_t) + (int64_t)sizeof(int32_t[text.length]));
case TEXT_BLOB:
- return Int$from_int64((int64_t)sizeof(Text_t) + (int64_t)((void*)text.blob.bytes - (void*)text.blob.map) + (int64_t)sizeof(uint8_t[text.length]));
+ return Int$from_int64((int64_t)sizeof(Text_t) + (int64_t)((void *)text.blob.bytes - (void *)text.blob.map)
+ + (int64_t)sizeof(uint8_t[text.length]));
case TEXT_CONCAT:
- return Int$plus(
- Int$from_int64((int64_t)sizeof(Text_t)),
- Int$plus(Text$memory_size(*text.left), Text$memory_size(*text.right)));
+ return Int$plus(Int$from_int64((int64_t)sizeof(Text_t)),
+ Int$plus(Text$memory_size(*text.left), Text$memory_size(*text.right)));
default: errx(1, "Invalid text tag: %d", text.tag);
}
}
-public Text_t Text$layout(Text_t text)
-{
+public
+Text_t Text$layout(Text_t text) {
switch (text.tag) {
- case TEXT_ASCII:
- return Texts(Text("ASCII("), Int64$as_text((int64_t[1]){text.length}, false, NULL), Text(")"));
+ case TEXT_ASCII: return Texts(Text("ASCII("), Int64$as_text((int64_t[1]){text.length}, false, NULL), Text(")"));
case TEXT_GRAPHEMES:
return Texts(Text("Graphemes("), Int64$as_text((int64_t[1]){text.length}, false, NULL), Text(")"));
- case TEXT_BLOB:
- return Texts(Text("Blob("), Int64$as_text((int64_t[1]){text.length}, false, NULL), Text(")"));
+ case TEXT_BLOB: return Texts(Text("Blob("), Int64$as_text((int64_t[1]){text.length}, false, NULL), Text(")"));
case TEXT_CONCAT:
return Texts(Text("Concat("), Text$layout(*text.left), Text(", "), Text$layout(*text.right), Text(")"));
default: errx(1, "Invalid text tag: %d", text.tag);
}
}
-public void Text$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *info)
-{
+public
+void Text$serialize(const void *obj, FILE *out, Table_t *pointers, const TypeInfo_t *info) {
(void)info;
- const char *str = Text$as_c_string(*(Text_t*)obj);
+ const char *str = Text$as_c_string(*(Text_t *)obj);
int64_t len = (int64_t)strlen(str);
Int64$serialize(&len, out, pointers, &Int64$info);
fwrite(str, sizeof(char), (size_t)len, out);
}
-public void Text$deserialize(FILE *in, void *out, List_t *pointers, const TypeInfo_t *info)
-{
+public
+void Text$deserialize(FILE *in, void *out, List_t *pointers, const TypeInfo_t *info) {
(void)info;
int64_t len = 0;
Int64$deserialize(in, &len, pointers, &Int64$info);
- if (len < 0)
- fail("Cannot deserialize text with a negative length!");
- char *buf = GC_MALLOC_ATOMIC((size_t)len+1);
- if (fread(buf, sizeof(char), (size_t)len, in) != (size_t)len)
- fail("Not enough data in stream to deserialize");
- buf[len+1] = '\0';
- *(Text_t*)out = Text$from_strn(buf, (size_t)len);
-}
-
-public const TypeInfo_t Text$info = {
- .size=sizeof(Text_t),
- .align=__alignof__(Text_t),
- .tag=TextInfo,
- .TextInfo={.lang="Text"},
- .metamethods=Text$metamethods,
+ if (len < 0) fail("Cannot deserialize text with a negative length!");
+ char *buf = GC_MALLOC_ATOMIC((size_t)len + 1);
+ if (fread(buf, sizeof(char), (size_t)len, in) != (size_t)len) fail("Not enough data in stream to deserialize");
+ buf[len + 1] = '\0';
+ *(Text_t *)out = Text$from_strn(buf, (size_t)len);
+}
+
+public
+const TypeInfo_t Text$info = {
+ .size = sizeof(Text_t),
+ .align = __alignof__(Text_t),
+ .tag = TextInfo,
+ .TextInfo = {.lang = "Text"},
+ .metamethods = Text$metamethods,
};
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0