tomo/builtins/text.c

1467 lines
50 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Type info and methods for Text datatype, which uses the Boehm "cord" library
// and libunistr
#include <assert.h>
#include <ctype.h>
#include <err.h>
#include <gc.h>
#include <gmp.h>
#include <limits.h>
#include <printf.h>
#include <readline/history.h>
#include <readline/readline.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/param.h>
#include <unicase.h>
#include <unictype.h>
#include <unigbrk.h>
#include <uniname.h>
#include <uninorm.h>
#include <unistd.h>
#include <unistdio.h>
#include <unistr.h>
#include "array.h"
#include "functions.h"
#include "integers.h"
#include "text.h"
#include "types.h"
static struct {
size_t num_codepoints;
const uint32_t *codepoints;
} synthetic_graphemes[1024] = {};
static int32_t num_synthetic_graphemes = 0;
static int32_t get_grapheme(Text_t text, int64_t index);
typedef struct {
int64_t subtext, sum_of_previous_subtexts;
} iteration_state_t;
static int32_t _next_grapheme(Text_t text, iteration_state_t *state, int64_t index);
int32_t find_synthetic_grapheme(const uint32_t *codepoints, size_t len)
{
int32_t lo = 0, hi = num_synthetic_graphemes;
while (lo <= hi) {
int32_t mid = (lo + hi) / 2;
int32_t cmp = (synthetic_graphemes[mid].num_codepoints > len) - (synthetic_graphemes[mid].num_codepoints < len);
if (cmp == 0)
cmp = memcmp(synthetic_graphemes[mid].codepoints, codepoints, sizeof(uint32_t[len]));
if (cmp == 0)
return mid;
else if (cmp < 0)
lo = mid + 1;
else if (cmp > 0)
hi = mid - 1;
}
return hi;
}
int32_t get_synthetic_grapheme(const uint32_t *codepoints, size_t len)
{
int32_t index = find_synthetic_grapheme(codepoints, len);
if (index < num_synthetic_graphemes
&& synthetic_graphemes[index].num_codepoints == len
&& memcmp(synthetic_graphemes[index].codepoints, codepoints, len) == 0) {
return -(index+1);
} else {
if (num_synthetic_graphemes > 0)
memmove(&synthetic_graphemes[index], &synthetic_graphemes[index + 1], num_synthetic_graphemes - index);
uint32_t *buf = GC_MALLOC_ATOMIC(sizeof(uint32_t[len]));
memcpy(buf, codepoints, sizeof(uint32_t[len]));
synthetic_graphemes[index].codepoints = buf;
synthetic_graphemes[index].num_codepoints = len;
++num_synthetic_graphemes;
return -(index+1);
}
}
static inline size_t num_subtexts(Text_t t)
{
if (t.tag != TEXT_SUBTEXT) return 1;
size_t len = t.length;
size_t n = 0;
while (len > 0) {
len -= t.subtexts[n].length;
++n;
}
return n;
}
int text_visualize(FILE *stream, Text_t t)
{
switch (t.tag) {
case TEXT_SHORT_ASCII: return fprintf(stream, "<ascii length=%ld>%.*s</ascii>", t.length, t.length, t.short_ascii);
case TEXT_ASCII: return fprintf(stream, "<ascii length=%ld>%.*s</ascii>", t.length, t.length, t.ascii);
case TEXT_GRAPHEMES: case TEXT_SHORT_GRAPHEMES: {
int printed = fprintf(stream, "<graphemes length=%ld>", t.length);
printed += Text$print(stream, t);
printed += fprintf(stream, "</graphemes>");
return printed;
}
case TEXT_SUBTEXT: {
int printed = fprintf(stream, "<text length=%ld>", t.length);
size_t to_print = t.length;
for (int i = 0; to_print > 0; ++i) {
printed += fprintf(stream, "\n ");
printed += text_visualize(stream, t.subtexts[i]);
to_print -= t.subtexts[i].length;
if (t.subtexts[i].length == 0) break;
}
printed += fprintf(stream, "\n</text>");
return printed;
}
default: return 0;
}
}
public int Text$print(FILE *stream, Text_t t)
{
switch (t.tag) {
case TEXT_SHORT_ASCII: return fwrite(t.short_ascii, sizeof(char), t.length, stream);
case TEXT_ASCII: return fwrite(t.ascii, sizeof(char), t.length, stream);
case TEXT_GRAPHEMES: case TEXT_SHORT_GRAPHEMES: {
int32_t *graphemes = t.tag == TEXT_SHORT_GRAPHEMES ? t.short_graphemes : t.graphemes;
int written = 0;
for (int64_t i = 0; i < t.length; i++) {
int32_t grapheme = graphemes[i];
if (grapheme >= 0) {
written += ulc_fprintf(stream, "%.*llU", 1, &grapheme);
} else {
written += ulc_fprintf(
stream, "%.*llU",
synthetic_graphemes[-grapheme-1].num_codepoints,
synthetic_graphemes[-grapheme-1].codepoints);
}
}
return written;
}
case TEXT_SUBTEXT: {
int written = 0;
int i = 0;
for (size_t to_print = t.length; to_print > 0; to_print -= t.subtexts[i].length, ++i)
written += Text$print(stream, t.subtexts[i]);
return written;
}
default: return 0;
}
}
static Text_t concat2(Text_t a, Text_t b)
{
if (a.length == 0) return b;
if (b.length == 0) return a;
if (a.tag == TEXT_SUBTEXT && b.tag == TEXT_SUBTEXT) {
size_t na = num_subtexts(a);
size_t nb = num_subtexts(b);
Text_t ret = {
.length=a.length + b.length,
.tag=TEXT_SUBTEXT,
.subtexts=GC_MALLOC(sizeof(Text_t[na + nb])),
};
memcpy(&ret.subtexts[0], a.subtexts, sizeof(Text_t[na]));
memcpy(&ret.subtexts[na], b.subtexts, sizeof(Text_t[nb]));
return ret;
} else if (a.tag == TEXT_SUBTEXT) {
size_t n = num_subtexts(a);
Text_t ret = {
.length=a.length + b.length,
.tag=TEXT_SUBTEXT,
.subtexts=GC_MALLOC(sizeof(Text_t[n + 1])),
};
memcpy(ret.subtexts, a.subtexts, sizeof(Text_t[n]));
ret.subtexts[n] = b;
return ret;
} else if (b.tag == TEXT_SUBTEXT) {
size_t n = num_subtexts(b);
Text_t ret = {
.length=a.length + b.length,
.tag=TEXT_SUBTEXT,
.subtexts=GC_MALLOC(sizeof(Text_t[n + 1])),
};
ret.subtexts[0] = a;
memcpy(&ret.subtexts[1], b.subtexts, sizeof(Text_t[n]));
return ret;
} else {
Text_t ret = {
.length=a.length + b.length,
.tag=TEXT_SUBTEXT,
.subtexts=GC_MALLOC(sizeof(Text_t[2])),
};
ret.subtexts[0] = a;
ret.subtexts[1] = b;
return ret;
}
}
public Text_t Text$_concat(int n, Text_t items[n])
{
if (n == 0) return (Text_t){.length=0};
if (n == 1) return items[0];
if (n == 2) return concat2(items[0], items[1]);
int64_t len = 0, subtexts = 0;
for (int i = 0; i < n; i++) {
len += items[i].length;
subtexts += num_subtexts(items[i]);
}
Text_t ret = {
.length=len,
.tag=TEXT_SUBTEXT,
.subtexts=GC_MALLOC(sizeof(Text_t[len])),
};
int64_t sub_i = 0;
for (int i = 0; i < n; i++) {
if (items[i].tag == TEXT_SUBTEXT) {
for (int64_t j = 0, remainder = items[i].length; remainder > 0; j++) {
ret.subtexts[sub_i++] = items[i].subtexts[j];
remainder -= items[i].subtexts[j].length;
}
} else {
ret.subtexts[sub_i++] = items[i];
}
}
return ret;
}
public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int)
{
int64_t first = Int_to_Int64(first_int, false);
int64_t last = Int_to_Int64(last_int, false);
if (first == 0) errx(1, "Invalid index: 0");
if (last == 0) return (Text_t){.length=0};
if (first < 0) first = text.length + first + 1;
if (last < 0) last = text.length + last + 1;
if (last > text.length) last = text.length;
if (first > text.length || last < first)
return (Text_t){.length=0};
if (first == 1 && last == text.length)
return text;
switch (text.tag) {
case TEXT_SHORT_ASCII: {
Text_t ret = text;
ret.length = last - first + 1;
if (first > 1)
memcpy(ret.short_ascii, text.short_ascii + (first-1), ret.length);
return ret;
}
case TEXT_ASCII: {
Text_t ret = {
.tag=TEXT_ASCII,
.length=last - first + 1,
.ascii=text.ascii + (first-1),
};
return ret;
}
case TEXT_SHORT_GRAPHEMES: {
assert((first == 1 && last == 1) || (first == 2 && last == 2));
Text_t ret = {
.tag=TEXT_SHORT_GRAPHEMES,
.length=1,
.short_graphemes={text.short_graphemes[first-1]},
};
return ret;
}
case TEXT_GRAPHEMES: {
Text_t ret = {
.tag=TEXT_GRAPHEMES,
.length=last - first + 1,
.graphemes=text.graphemes + (first-1),
};
return ret;
}
case TEXT_SUBTEXT: {
Text_t *subtexts = text.subtexts;
while (first > subtexts[0].length) {
first -= subtexts[0].length;
last -= subtexts[0].length;
++subtexts;
}
int64_t needed_len = (last - first) + 1;
int64_t num_subtexts = 0;
for (int64_t included = 0; included < needed_len; ) {
if (included == 0)
included += subtexts[num_subtexts].length - first + 1;
else
included += subtexts[num_subtexts].length;
num_subtexts += 1;
}
if (num_subtexts == 1)
return Text$slice(subtexts[0], Int64_to_Int(first+1), Int64_to_Int(last+1));
Text_t ret = {
.length=needed_len,
.tag=TEXT_SUBTEXT,
.subtexts=GC_MALLOC(sizeof(Text_t[num_subtexts])),
};
for (int64_t i = 0; i < num_subtexts; i++) {
ret.subtexts[i] = Text$slice(subtexts[i], Int64_to_Int(first+1), Int64_to_Int(last+1));
first = 1;
needed_len -= ret.subtexts[i].length;
last = first + needed_len - 1;
}
return ret;
}
default: errx(1, "Invalid tag");
}
}
Text_t text_from_u32(uint32_t *codepoints, size_t num_codepoints, bool normalize)
{
uint32_t norm_buf[128];
if (normalize) {
size_t norm_length = sizeof(norm_buf)/sizeof(norm_buf[0]);
uint32_t *normalized = u32_normalize(UNINORM_NFC, codepoints, num_codepoints, norm_buf, &norm_length);
codepoints = normalized;
num_codepoints = norm_length;
}
char breaks[num_codepoints];
u32_grapheme_breaks(codepoints, num_codepoints, breaks);
Text_t ret = {
.length=0,
.tag=TEXT_SHORT_GRAPHEMES,
};
const uint32_t *src = codepoints;
int32_t *dest = &ret.short_graphemes[0];
while (src != &codepoints[num_codepoints]) {
++ret.length;
if (ret.tag == TEXT_SHORT_GRAPHEMES && ret.length > 2) {
int32_t *graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[num_codepoints])); // May be a slight overallocation
graphemes[0] = ret.short_graphemes[0];
graphemes[1] = ret.short_graphemes[1];
ret.tag = TEXT_GRAPHEMES;
ret.graphemes = graphemes;
dest = &graphemes[2];
}
const uint32_t *next = u32_grapheme_next(src, &codepoints[num_codepoints]);
if (next == &src[1]) {
*dest = (int32_t)*src;
} else {
// Synthetic grapheme
*dest = get_synthetic_grapheme(src, next-src);
}
++dest;
src = next;
}
if (normalize && codepoints != norm_buf) free(codepoints);
return ret;
}
public Text_t Text$from_str(const char *str)
{
size_t ascii_span = 0;
while (str[ascii_span] && isascii(str[ascii_span]))
ascii_span++;
if (str[ascii_span] == '\0') { // All ASCII
Text_t ret = {.length=ascii_span};
if (ascii_span <= 8) {
ret.tag = TEXT_SHORT_ASCII;
for (size_t i = 0; i < ascii_span; i++)
ret.short_ascii[i] = str[i];
} else {
ret.tag = TEXT_ASCII;
ret.ascii = str;
}
return ret;
} else {
uint32_t buf[128];
size_t length = sizeof(buf)/sizeof(buf[0]);
uint32_t *codepoints = u8_to_u32((uint8_t*)str, ascii_span + strlen(str + ascii_span), buf, &length);
Text_t ret = text_from_u32(codepoints, length, true);
if (codepoints != buf) free(codepoints);
return ret;
}
}
static void u8_buf_append(Text_t text, char **buf, size_t *capacity, int64_t *i)
{
switch (text.tag) {
case TEXT_ASCII: case TEXT_SHORT_ASCII: {
if (*i + text.length > (int64_t)*capacity) {
*capacity = *i + text.length;
*buf = GC_REALLOC(*buf, *capacity);
}
const char *bytes = text.tag == TEXT_ASCII ? text.ascii : text.short_ascii;
memcpy(*buf + *i, bytes, text.length);
*i += text.length;
break;
}
case TEXT_GRAPHEMES: case TEXT_SHORT_GRAPHEMES: {
const int32_t *graphemes = text.tag == TEXT_GRAPHEMES ? text.graphemes : text.short_graphemes;
for (int64_t g = 0; g + 1 < text.length; g++) {
const uint32_t *codepoints = graphemes[g] < 0 ? synthetic_graphemes[-graphemes[g]-1].codepoints : (uint32_t*)&graphemes[g];
size_t num_codepoints = graphemes[g] < 0 ? synthetic_graphemes[-graphemes[g]-1].num_codepoints : 1;
uint8_t u8_buf[64];
size_t u8_len = sizeof(u8_buf);
uint8_t *u8 = u32_to_u8(codepoints, num_codepoints, u8_buf, &u8_len);
if (*i + (int64_t)u8_len > (int64_t)*capacity) {
*capacity = *i + u8_len;
*buf = GC_REALLOC(*buf, *capacity);
}
memcpy(*buf + *i, u8, u8_len);
*i += u8_len;
if (u8 != u8_buf) free(u8);
}
break;
}
case TEXT_SUBTEXT: {
for (int64_t s = 0, remaining = text.length; remaining > 0; s++) {
u8_buf_append(text.subtexts[s], buf, capacity, i);
remaining -= text.subtexts[s].length;
}
break;
}
default: break;
}
}
public const char *Text$as_c_string(Text_t text)
{
size_t capacity = text.length + 1;
char *buf = GC_MALLOC_ATOMIC(capacity);
int64_t i = 0;
u8_buf_append(text, &buf, &capacity, &i);
if (i + 1 > (int64_t)capacity) {
capacity = i + 1;
buf = GC_REALLOC(buf, capacity);
}
buf[i] = '\0';
return buf;
}
uint32_t *text_to_u32(Text_t text, size_t *length)
{
// Precalculate size:
size_t len = 0;
if (text.tag == TEXT_ASCII) {
len = text.length;
} else {
iteration_state_t state = {0, 0};
for (int64_t i = 0; i < text.length; i++) {
int32_t grapheme = _next_grapheme(text, &state, i);
if (grapheme < 0)
len += synthetic_graphemes[-grapheme-1].num_codepoints;
else
len += 1;
}
}
assert(length);
*length = len;
// Copy over codepoints one grapheme cluster at a time:
uint32_t *ret = GC_MALLOC_ATOMIC(sizeof(uint32_t[len]));
uint32_t *dest = ret;
iteration_state_t state = {0, 0};
for (int64_t i = 0; i < text.length; i++) {
int32_t grapheme = _next_grapheme(text, &state, i);
if (grapheme < 0) {
const uint32_t *codepoints = synthetic_graphemes[-grapheme-1].codepoints;
size_t num_codepoints = synthetic_graphemes[-grapheme-1].num_codepoints;
for (size_t j = 0; j < num_codepoints; j++)
*(dest++) = codepoints[j];
} else {
*(dest++) = (uint32_t)grapheme;
}
}
return ret;
}
#include "siphash.c"
public uint64_t Text$hash(Text_t *text)
{
if (text->hash != 0) return text->hash;
siphash sh;
siphashinit(&sh, sizeof(int32_t[text->length]), (uint64_t*)TOMO_HASH_KEY);
union {
int32_t chunks[2];
uint64_t whole;
} tmp;
switch (text->tag) {
case TEXT_ASCII: case TEXT_SHORT_ASCII: {
const char *bytes = text->tag == TEXT_ASCII ? text->ascii : text->short_ascii;
for (int64_t i = 0; i + 1 < text->length; i++) {
tmp.chunks[0] = (int32_t)bytes[i];
tmp.chunks[1] = (int32_t)bytes[i+1];
siphashadd64bits(&sh, tmp.whole);
}
int32_t last = text->length & 0x1 ? (int32_t)bytes[text->length-1] : 0; // Odd number of graphemes
text->hash = siphashfinish_last_part(&sh, (uint64_t)last);
break;
}
case TEXT_GRAPHEMES: {
const int32_t *graphemes = text->graphemes;
for (int64_t i = 0; i + 1 < text->length; i++) {
tmp.chunks[0] = graphemes[i];
tmp.chunks[1] = graphemes[i];
siphashadd64bits(&sh, tmp.whole);
}
int32_t last = text->length & 0x1 ? graphemes[text->length-1] : 0; // Odd number of graphemes
text->hash = siphashfinish_last_part(&sh, (uint64_t)last);
break;
}
case TEXT_SHORT_GRAPHEMES: {
tmp.chunks[0] = text->short_graphemes[0];
if (text->length > 1)
tmp.chunks[1] = text->short_graphemes[1];
text->hash = siphashfinish_last_part(&sh, (uint64_t)tmp.whole);
break;
}
case TEXT_SUBTEXT: {
int32_t leftover = 0;
for (int64_t sub_i = 0, to_hash = text->length; to_hash > 0; ) {
Text_t subtext = text->subtexts[sub_i];
if (subtext.tag == TEXT_ASCII || subtext.tag == TEXT_SHORT_ASCII) {
const char *bytes = subtext.tag == TEXT_ASCII ? subtext.ascii : subtext.short_ascii;
int64_t grapheme = 0;
if (leftover) {
tmp.chunks[0] = leftover;
tmp.chunks[1] = (int32_t)bytes[0];
siphashadd64bits(&sh, tmp.whole);
grapheme += 1;
}
for (; grapheme + 1 < subtext.length; grapheme += 2) {
tmp.chunks[0] = (int32_t)bytes[grapheme];
tmp.chunks[1] = (int32_t)bytes[grapheme+1];
siphashadd64bits(&sh, tmp.whole);
}
leftover = grapheme < subtext.length ? (int32_t)bytes[grapheme] : 0;
} else if (subtext.tag == TEXT_SHORT_GRAPHEMES) {
if (leftover) {
tmp.chunks[0] = leftover;
tmp.chunks[1] = subtext.short_graphemes[0];
siphashadd64bits(&sh, tmp.whole);
leftover = subtext.length > 1 ? subtext.short_graphemes[1] : 0;
} else if (subtext.length == 1) {
leftover = subtext.short_graphemes[0];
} else {
tmp.chunks[0] = subtext.short_graphemes[0];
tmp.chunks[1] = subtext.short_graphemes[1];
siphashadd64bits(&sh, tmp.whole);
}
} else if (subtext.tag == TEXT_GRAPHEMES) {
int32_t *graphemes = subtext.graphemes;
int64_t grapheme = 0;
if (leftover) {
tmp.chunks[0] = leftover;
tmp.chunks[1] = graphemes[0];
siphashadd64bits(&sh, tmp.whole);
grapheme += 1;
}
for (; grapheme + 1 < subtext.length; grapheme += 2) {
tmp.chunks[0] = graphemes[grapheme];
tmp.chunks[1] = graphemes[grapheme+1];
siphashadd64bits(&sh, tmp.whole);
}
leftover = grapheme < subtext.length ? graphemes[grapheme] : 0;
}
to_hash -= text->subtexts[sub_i].length;
++sub_i;
}
text->hash = siphashfinish_last_part(&sh, leftover);
break;
}
default: errx(1, "Invalid text");
}
if (text->hash == 0)
text->hash = 1;
return text->hash;
}
int32_t _next_grapheme(Text_t text, iteration_state_t *state, int64_t index)
{
switch (text.tag) {
case TEXT_ASCII: return index < text.length ? (int32_t)text.ascii[index] : 0;
case TEXT_SHORT_ASCII: return index < text.length ? (int32_t)text.short_ascii[index] : 0;
case TEXT_GRAPHEMES: return index < text.length ? text.graphemes[index] : 0;
case TEXT_SHORT_GRAPHEMES: return index < text.length ? text.short_graphemes[index] : 0;
case TEXT_SUBTEXT: {
iteration_state_t backup_state = {0, 0};
if (!state) state = &backup_state;
if (index < 0 || index >= text.length)
return 0;
while (index < state->sum_of_previous_subtexts && state->subtext > 0) {
state->sum_of_previous_subtexts -= text.subtexts[state->subtext].length;
state->subtext -= 1;
}
for (;;) {
if (index < state->sum_of_previous_subtexts + text.subtexts[state->subtext].length)
return _next_grapheme(text.subtexts[state->subtext], NULL, index - state->sum_of_previous_subtexts);
state->sum_of_previous_subtexts += text.subtexts[state->subtext].length;
state->subtext += 1;
}
return 0;
}
default: errx(1, "Invalid text");
}
return 0;
}
int32_t get_grapheme(Text_t text, int64_t index)
{
iteration_state_t state = {0, 0};
return _next_grapheme(text, &state, index);
}
public int32_t Text$compare(const Text_t *a, const Text_t *b)
{
int64_t len = MAX(a->length, b->length);
iteration_state_t a_state = {0, 0}, b_state = {0, 0};
for (int64_t i = 0; i < len; i++) {
int32_t ai = _next_grapheme(*a, &a_state, i);
int32_t bi = _next_grapheme(*b, &b_state, i);
if (ai == bi) continue;
int32_t cmp;
if (ai > 0 && bi > 0) {
cmp = u32_cmp((uint32_t*)&ai, (uint32_t*)&bi, 1);
} else if (ai > 0) {
cmp = u32_cmp2(
(uint32_t*)&ai, 1,
synthetic_graphemes[-bi-1].codepoints,
synthetic_graphemes[-bi-1].num_codepoints);
} else if (bi > 0) {
cmp = u32_cmp2(
synthetic_graphemes[-ai-1].codepoints,
synthetic_graphemes[-ai-1].num_codepoints,
(uint32_t*)&bi, 1);
} else {
cmp = u32_cmp2(
synthetic_graphemes[-ai-1].codepoints,
synthetic_graphemes[-ai-1].num_codepoints,
synthetic_graphemes[-bi-1].codepoints,
synthetic_graphemes[-bi-1].num_codepoints);
}
if (cmp != 0) return cmp;
}
return 0;
}
public bool Text$equal(const Text_t *a, const Text_t *b)
{
if (a->length != b->length || (a->hash != 0 && b->hash != 0 && a->hash != b->hash))
return false;
int64_t len = a->length;
iteration_state_t a_state = {0, 0}, b_state = {0, 0};
for (int64_t i = 0; i < len; i++) {
int32_t ai = _next_grapheme(*a, &a_state, i);
int32_t bi = _next_grapheme(*b, &b_state, i);
if (ai != bi) return false;
}
return true;
}
public bool Text$equal_ignoring_case(Text_t a, Text_t b)
{
if (a.length != b.length)
return false;
int64_t len = a.length;
iteration_state_t a_state = {0, 0}, b_state = {0, 0};
const char *language = uc_locale_language();
for (int64_t i = 0; i < len; i++) {
int32_t ai = _next_grapheme(a, &a_state, i);
int32_t bi = _next_grapheme(b, &b_state, i);
if (ai != bi) {
const uint32_t *a_codepoints = ai >= 0 ? (uint32_t*)&ai : synthetic_graphemes[-ai-1].codepoints;
size_t a_len = ai >= 0 ? 1 : synthetic_graphemes[-ai-1].num_codepoints;
const uint32_t *b_codepoints = bi >= 0 ? (uint32_t*)&bi : synthetic_graphemes[-bi-1].codepoints;
size_t b_len = bi >= 0 ? 1 : synthetic_graphemes[-bi-1].num_codepoints;
int cmp;
(void)u32_casecmp(a_codepoints, a_len, b_codepoints, b_len, language, UNINORM_NFC, &cmp);
if (cmp != 0)
return false;
}
}
return true;
}
public Text_t Text$upper(Text_t text)
{
size_t length;
uint32_t *codepoints = text_to_u32(text, &length);
const char *language = uc_locale_language();
uint32_t buf[128];
size_t out_len;
uint32_t *upper = u32_toupper(codepoints, length, language, UNINORM_NFC, buf, &out_len);
Text_t ret = text_from_u32(upper, out_len, false);
if (upper != buf) free(upper);
return ret;
}
public Text_t Text$lower(Text_t text)
{
size_t length;
uint32_t *codepoints = text_to_u32(text, &length);
const char *language = uc_locale_language();
uint32_t buf[128];
size_t out_len;
uint32_t *lower = u32_tolower(codepoints, length, language, UNINORM_NFC, buf, &out_len);
Text_t ret = text_from_u32(lower, out_len, false);
if (lower != codepoints) free(lower);
return ret;
}
public Text_t Text$title(Text_t text)
{
size_t length;
uint32_t *codepoints = text_to_u32(text, &length);
const char *language = uc_locale_language();
uint32_t buf[128];
size_t out_len;
uint32_t *title = u32_totitle(codepoints, length, language, UNINORM_NFC, buf, &out_len);
Text_t ret = text_from_u32(title, out_len, false);
if (title != codepoints) free(title);
return ret;
}
static inline void skip_whitespace(Text_t text, int64_t *i)
{
iteration_state_t state = {0, 0};
while (*i < text.length) {
int32_t grapheme = _next_grapheme(text, &state, *i);
if (grapheme > 0 && !uc_is_property_white_space(grapheme))
return;
*i += 1;
}
}
static inline bool match_grapheme(Text_t text, int64_t *i, int32_t grapheme)
{
if (*i < text.length && get_grapheme(text, *i) == grapheme) {
*i += 1;
return true;
}
return false;
}
static inline bool match_str(Text_t text, int64_t *i, const char *str)
{
iteration_state_t state = {0, 0};
int64_t matched = 0;
while (matched[str]) {
if (*i + matched >= text.length || _next_grapheme(text, &state, *i + matched) != str[matched])
return false;
matched += 1;
}
*i += matched;
return true;
}
static inline bool match_property(Text_t text, int64_t *i, uc_property_t prop)
{
if (*i >= text.length) return false;
int32_t grapheme = get_grapheme(text, *i);
if (grapheme < 0) // TODO: check every codepoint in the cluster?
grapheme = synthetic_graphemes[-grapheme-1].codepoints[0];
if (uc_is_property(grapheme, prop)) {
*i += 1;
return true;
}
return false;
}
static int64_t parse_int(Text_t text, int64_t *i)
{
iteration_state_t state = {0, 0};
int64_t value = 0;
for (;; *i += 1) {
int32_t grapheme = _next_grapheme(text, &state, *i);
if (grapheme < 0)
grapheme = synthetic_graphemes[-grapheme-1].codepoints[0];
int digit = uc_digit_value(grapheme);
if (digit < 0) break;
if (value >= INT64_MAX/10) break;
value = 10*value + digit;
}
return value;
}
const char *get_property_name(Text_t text, int64_t *i)
{
skip_whitespace(text, i);
char *name = GC_MALLOC_ATOMIC(UNINAME_MAX);
char *dest = name;
iteration_state_t state = {0, 0};
while (*i < text.length) {
int32_t grapheme = _next_grapheme(text, &state, *i);
if (!(grapheme & ~0xFF) && (isalnum(grapheme) || grapheme == ' ' || grapheme == '_' || grapheme == '-')) {
*dest = (char)grapheme;
++dest;
if (dest >= name + UNINAME_MAX - 1)
break;
} else if (dest == name && grapheme >= 0 && grapheme != ']') {
// Literal character escape: [..[] --> "LEFT SQUARE BRACKET"
name = unicode_character_name(grapheme, name);
*i += 1;
return name;
} else {
break;
}
*i += 1;
}
if (dest == name) return NULL;
*dest = '\0';
return name;
}
#define EAT1(state, cond) ({\
int32_t grapheme = _next_grapheme(text, state, text_index); \
bool success = (cond); \
if (success) text_index += 1; \
success; })
#define EAT_MANY(state, cond) ({ int64_t n = 0; while (EAT1(state, cond)) { n += 1; } n; })
int64_t match_email(Text_t text, int64_t text_index)
{
// email = local "@" domain
// local = 1-64 ([a-zA-Z0-9!#$%&*+/=?^_`.{|}~] | non-ascii)
// domain = dns-label ("." dns-label)*
// dns-label = 1-63 ([a-zA-Z0-9-] | non-ascii)
iteration_state_t state = {0, 0};
if (text_index > 0) {
int32_t prev_codepoint = _next_grapheme(text, &state, text_index - 1);
if (prev_codepoint < 0)
prev_codepoint = synthetic_graphemes[-prev_codepoint-1].codepoints[0];
if (uc_is_property_alphabetic(prev_codepoint))
return -1;
}
int64_t start_index = text_index;
// Local part:
int64_t local_len = 0;
static const char *allowed_local = "!#$%&*+/=?^_`.{|}~";
while (EAT1(&state, (grapheme & ~0x7F) || isalnum((char)grapheme) || strchr(allowed_local, (char)grapheme))) {
local_len += 1;
if (local_len > 64) return -1;
}
if (!EAT1(&state, grapheme == '@'))
return -1;
// Host
int64_t host_len = 0;
do {
int64_t label_len = 0;
while (EAT1(&state, (grapheme & ~0x7F) || isalnum((char)grapheme) || grapheme == '-')) {
label_len += 1;
if (label_len > 63) return -1;
}
if (label_len == 0)
return -1;
host_len += label_len;
if (host_len > 255)
return -1;
host_len += 1;
} while (EAT1(&state, grapheme == '.'));
return text_index - start_index;
}
int64_t match_ipv6(Text_t text, int64_t text_index)
{
iteration_state_t state = {0, 0};
if (text_index > 0) {
int32_t prev_codepoint = _next_grapheme(text, &state, text_index - 1);
if ((prev_codepoint & ~0x7F) && (isxdigit(prev_codepoint) || prev_codepoint == ':'))
return -1;
}
int64_t start_index = text_index;
const int NUM_CLUSTERS = 8;
bool double_colon_used = false;
for (int cluster = 0; cluster < NUM_CLUSTERS; cluster++) {
for (int digits = 0; digits < 4; digits++) {
if (!EAT1(&state, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
break;
}
if (EAT1(&state, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
return -1; // Too many digits
if (cluster == NUM_CLUSTERS-1) {
break;
} else if (!EAT1(&state, grapheme == ':')) {
if (double_colon_used)
break;
return -1;
}
if (EAT1(&state, grapheme == ':')) {
if (double_colon_used)
return -1;
double_colon_used = true;
}
}
return text_index - start_index;
}
static int64_t match_ipv4(Text_t text, int64_t text_index)
{
iteration_state_t state = {0, 0};
if (text_index > 0) {
int32_t prev_codepoint = _next_grapheme(text, &state, text_index - 1);
if ((prev_codepoint & ~0x7F) && (isdigit(prev_codepoint) || prev_codepoint == '.'))
return -1;
}
int64_t start_index = text_index;
const int NUM_CLUSTERS = 4;
for (int cluster = 0; cluster < NUM_CLUSTERS; cluster++) {
for (int digits = 0; digits < 3; digits++) {
if (!EAT1(&state, ~(grapheme & ~0x7F) && isdigit((char)grapheme))) {
if (digits == 0) return -1;
break;
}
}
if (EAT1(&state, ~(grapheme & ~0x7F) && isdigit((char)grapheme)))
return -1; // Too many digits
if (cluster == NUM_CLUSTERS-1)
break;
else if (!EAT1(&state, grapheme == '.'))
return -1;
}
return (text_index - start_index);
}
int64_t match_uri(Text_t text, int64_t text_index)
{
// URI = scheme ":" ["//" authority] path ["?" query] ["#" fragment]
// scheme = [a-zA-Z] [a-zA-Z0-9+.-]
// authority = [userinfo "@"] host [":" port]
iteration_state_t state = {0, 0};
if (text_index > 0) {
int32_t prev_codepoint = _next_grapheme(text, &state, text_index - 1);
if (prev_codepoint < 0)
prev_codepoint = synthetic_graphemes[-prev_codepoint-1].codepoints[0];
if (uc_is_property_alphabetic(prev_codepoint))
return -1;
}
int64_t start_index = text_index;
// Scheme:
if (!EAT1(&state, isalpha(grapheme)))
return -1;
EAT_MANY(&state, !(grapheme & ~0x7F) && (isalnum(grapheme) || grapheme == '+' || grapheme == '.' || grapheme == '-'));
if (text_index == start_index)
return -1;
if (!match_grapheme(text, &text_index, ':'))
return -1;
// Authority:
if (match_str(text, &text_index, "//")) {
int64_t authority_start = text_index;
// Username or host:
static const char *forbidden = "#?:@ \t\r\n<>[]{}\\^|\"`/";
if (EAT_MANY(&state, (grapheme & ~0x7F) || !strchr(forbidden, (char)grapheme)) == 0)
return -1;
if (EAT1(&state, grapheme == '@')) {
// Found a username, now get a host:
if (EAT_MANY(&state, (grapheme & ~0x7F) || !strchr(forbidden, (char)grapheme)) == 0)
return -1;
} else {
int64_t ip = authority_start;
int64_t ipv4_len = match_ipv4(text, ip);
if (ipv4_len > 0) {
ip += ipv4_len;
} else if (match_grapheme(text, &ip, '[')) {
ip += match_ipv6(text, ip);
if (ip > authority_start + 1 && match_grapheme(text, &ip, ']'))
text_index = ip;
}
}
// Port:
if (EAT1(&state, grapheme == ':')) {
if (EAT_MANY(&state, !(grapheme & ~0x7F) && isdigit(grapheme)) == 0)
return -1;
}
if (!EAT1(&state, grapheme == '/'))
return (text_index - start_index); // No path
} else {
// Optional path root:
EAT1(&state, grapheme == '/');
}
// Path:
static const char *non_path = " \"#?<>[]{}\\^`|";
EAT_MANY(&state, (grapheme & ~0x7F) || !strchr(non_path, (char)grapheme));
if (EAT1(&state, grapheme == '?')) { // Query
static const char *non_query = " \"#<>[]{}\\^`|";
EAT_MANY(&state, (grapheme & ~0x7F) || !strchr(non_query, (char)grapheme));
}
if (EAT1(&state, grapheme == '#')) { // Fragment
static const char *non_fragment = " \"#<>[]{}\\^`|";
EAT_MANY(&state, (grapheme & ~0x7F) || !strchr(non_fragment, (char)grapheme));
}
return text_index - start_index;
}
int64_t match(Text_t text, Text_t pattern, int64_t text_index, int64_t pattern_index)
{
if (pattern_index >= pattern.length) return 0;
int64_t start_index = text_index;
iteration_state_t pattern_state = {0, 0}, text_state = {0, 0};
while (pattern_index < pattern.length) {
int64_t old_pat_index = pattern_index;
if (match_str(pattern, &pattern_index, "[..")) {
skip_whitespace(pattern, &pattern_index);
int64_t min, max;
if (uc_is_digit(_next_grapheme(pattern, &pattern_state, pattern_index))) {
min = parse_int(pattern, &pattern_index);
skip_whitespace(pattern, &pattern_index);
if (match_grapheme(pattern, &pattern_index, '+')) {
max = INT64_MAX;
} else if (match_grapheme(pattern, &pattern_index, '-')) {
max = parse_int(pattern, &pattern_index);
} else {
max = min;
}
} else {
min = 1, max = INT64_MAX;
}
skip_whitespace(pattern, &pattern_index);
bool want_to_match = !match_grapheme(pattern, &pattern_index, '!');
const char *prop_name = get_property_name(pattern, &pattern_index);
skip_whitespace(pattern, &pattern_index);
if (!match_grapheme(pattern, &pattern_index, ']'))
errx(1, "Missing closing ']' in pattern: \"%T\"", &pattern);
int64_t before_group = text_index;
bool any = false;
uc_property_t prop;
int32_t specific_codepoint = UNINAME_INVALID;
#define FAIL() ({ if (min < 1) { text_index = before_group; continue; } else { return -1; } })
if (prop_name) {
switch (tolower(prop_name[0])) {
case 'd':
if (strcasecmp(prop_name, "digit") == 0) {
prop = UC_PROPERTY_DECIMAL_DIGIT;
goto got_prop;
}
break;
case 'e':
if (strcasecmp(prop_name, "end") == 0) {
if (text_index != text.length)
FAIL();
continue;
} else if (prop_name && strcasecmp(prop_name, "email") == 0) {
int64_t len = match_email(text, text_index);
if (len < 0)
FAIL();
text_index += len;
continue;
} else if (prop_name && strcasecmp(prop_name, "emoji") == 0) {
prop = UC_PROPERTY_EMOJI;
goto got_prop;
}
break;
case 'i':
if (prop_name && strcasecmp(prop_name, "id") == 0) {
if (!EAT1(&text_state, uc_is_property(grapheme, UC_PROPERTY_XID_START)))
FAIL();
EAT_MANY(&text_state, uc_is_property(grapheme, UC_PROPERTY_XID_CONTINUE));
continue;
} else if (prop_name && strcasecmp(prop_name, "ipv4") == 0) {
int64_t len = match_ipv4(text, text_index);
if (len < 0)
FAIL();
text_index += len;
continue;
} else if (prop_name && strcasecmp(prop_name, "ipv6") == 0) {
int64_t len = match_ipv6(text, text_index);
if (len < 0)
FAIL();
text_index += len;
continue;
} else if (prop_name && strcasecmp(prop_name, "ip") == 0) {
int64_t len = match_ipv6(text, text_index);
if (len < 0)
len = match_ipv4(text, text_index);
if (len < 0)
FAIL();
text_index += len;
continue;
}
break;
case 's':
if (strcasecmp(prop_name, "start") == 0) {
if (text_index != 0) return -1;
continue;
}
break;
case 'u':
if (prop_name && strcasecmp(prop_name, "uri") == 0) {
int64_t len = match_uri(text, text_index);
if (len < 0)
FAIL();
text_index += len;
continue;
} else if (prop_name && strcasecmp(prop_name, "url") == 0) {
int64_t lookahead = text_index;
if (!(match_str(text, &lookahead, "https:")
|| match_str(text, &lookahead, "http:")
|| match_str(text, &lookahead, "ftp:")
|| match_str(text, &lookahead, "wss:")
|| match_str(text, &lookahead, "ws:")))
FAIL();
int64_t len = match_uri(text, text_index);
if (len < 0)
FAIL();
text_index += len;
continue;
}
break;
}
prop = uc_property_byname(prop_name);
if (!uc_property_is_valid(prop)) {
specific_codepoint = unicode_name_character(prop_name);
if (specific_codepoint == UNINAME_INVALID)
errx(1, "Not a valid property or character name: %s", prop_name);
}
} else {
any = true;
prop = UC_PROPERTY_PRIVATE_USE;
}
got_prop:;
if (min == 0 && pattern_index < pattern.length) {
int64_t match_len = match(text, pattern, text_index, pattern_index);
if (match_len >= 0)
return (text_index - start_index) + match_len;
}
for (int64_t count = 0; count < max; ) {
int32_t grapheme = _next_grapheme(text, &text_state, text_index);
if (grapheme < 0)
grapheme = synthetic_graphemes[-grapheme-1].codepoints[0];
bool success;
if (any)
success = true;
else if (specific_codepoint != UNINAME_INVALID)
success = (grapheme == specific_codepoint);
else
success = uc_is_property(grapheme, prop);
if (success != want_to_match) {
if (count < min) return -1;
else break;
}
text_index += 1;
count += 1;
if (count >= min) {
if (pattern_index < pattern.length) {
int64_t match_len = match(text, pattern, text_index, pattern_index);
if (match_len >= 0) {
return (text_index - start_index) + match_len;
}
} else if (text_index >= text.length) {
break;
}
}
}
} else if (uc_is_property(_next_grapheme(pattern, &pattern_state, pattern_index), UC_PROPERTY_QUOTATION_MARK)
&& (pattern_index += 1, match_grapheme(pattern, &pattern_index, '?'))) {
// Quotation: "?", '?', etc
int32_t open = _next_grapheme(pattern, &pattern_state, pattern_index-2);
if (!match_grapheme(text, &text_index, open)) return -1;
int32_t close = open;
uc_mirror_char(open, (uint32_t*)&close);
if (!match_grapheme(pattern, &pattern_index, close))
errx(1, "I expected a closing brace");
while (text_index < text.length) {
int32_t c = _next_grapheme(text, &text_state, text_index);
if (c == close)
return (text_index - start_index);
if (c == '\\' && text_index < text.length) {
text_index += 2;
} else {
text_index += 1;
}
}
return -1;
} else if (uc_is_property(_next_grapheme(pattern, &pattern_state, pattern_index), UC_PROPERTY_PAIRED_PUNCTUATION)
&& (pattern_index += 1, match_grapheme(pattern, &pattern_index, '?'))) {
// Nested punctuation: (?), [?], etc
int32_t open = _next_grapheme(pattern, &pattern_state, pattern_index-2);
if (!match_grapheme(text, &text_index, open)) return -1;
int32_t close = open;
uc_mirror_char(open, (uint32_t*)&close);
if (!match_grapheme(pattern, &pattern_index, close))
errx(1, "I expected a closing brace");
int64_t depth = 1;
for (; depth > 0 && text_index < text.length; ++text_index) {
int32_t c = _next_grapheme(text, &text_state, text_index);
if (c == open)
depth += 1;
else if (c == close)
depth -= 1;
}
if (depth > 0) return -1;
} else {
// Plain character:
pattern_index = old_pat_index;
int32_t pat_grapheme = _next_grapheme(pattern, &pattern_state, pattern_index);
// if (pattern_index == 0 && text_index > 0) {
// int32_t pat_codepoint = pat_grapheme;
// if (pat_codepoint < 0)
// pat_codepoint = synthetic_graphemes[-pat_codepoint-1].codepoints[0];
// int32_t prev_codepoint = _next_grapheme(text, &text_state, text_index - 1);
// if (prev_codepoint < 0)
// prev_codepoint = synthetic_graphemes[-prev_codepoint-1].codepoints[0];
// if (uc_is_property_alphabetic(pat_codepoint) && uc_is_property_alphabetic(prev_codepoint))
// return -1;
// }
int32_t text_grapheme = _next_grapheme(text, &text_state, text_index);
if (pat_grapheme != text_grapheme)
return -1;
pattern_index += 1;
text_index += 1;
// if (pattern_index == pattern.length && text_index < text.length) {
// int32_t pat_codepoint = pat_grapheme;
// if (pat_codepoint < 0)
// pat_codepoint = synthetic_graphemes[-pat_codepoint-1].codepoints[0];
// int32_t next_codepoint = _next_grapheme(text, &text_state, text_index);
// if (next_codepoint < 0)
// next_codepoint = synthetic_graphemes[-next_codepoint-1].codepoints[0];
// if (uc_is_property_alphabetic(pat_codepoint) && uc_is_property_alphabetic(next_codepoint))
// return -1;
// }
}
}
if (text_index >= text.length && pattern_index < pattern.length)
return -1;
return (text_index - start_index);
}
#undef EAT1
#undef EAT_MANY
public Int_t Text$find(Text_t text, Text_t pattern, Int_t from_index, int64_t *match_length)
{
int32_t first = get_grapheme(pattern, 0);
bool find_first = (first != '['
&& !uc_is_property(first, UC_PROPERTY_QUOTATION_MARK)
&& !uc_is_property(first, UC_PROPERTY_PAIRED_PUNCTUATION));
iteration_state_t text_state = {0, 0};
for (int64_t i = Int_to_Int64(from_index, false)-1; i < text.length; i++) {
// Optimization: quickly skip ahead to first char in pattern:
if (find_first) {
while (i < text.length && _next_grapheme(text, &text_state, i) != first)
++i;
}
int64_t m = match(text, pattern, i, 0);
if (m >= 0) {
if (match_length)
*match_length = m;
return I(i+1);
}
}
if (match_length)
*match_length = -1;
return I(0);
}
public int printf_text_size(const struct printf_info *info, size_t n, int argtypes[n], int sizes[n])
{
if (n < 1) return -1;
(void)info;
argtypes[0] = PA_POINTER;
sizes[0] = sizeof(Text_t*);
return 1;
}
public int printf_text(FILE *stream, const struct printf_info *info, const void *const args[])
{
(void)info;
Text_t t = **(Text_t**)args[0];
return Text$print(stream, t);
}
public Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info)
{
(void)info;
if (!text) return info && info->TextInfo.lang ? Text$from_str(info->TextInfo.lang) : Text$from_str("Text");
Text_t as_text = Text$quoted(*(Text_t*)text, colorize);
if (info && info->TextInfo.lang && info != &$Text)
as_text = Text$concat(Text$from_str(colorize ? "\x1b[1m$" : "$"), Text$from_str(info->TextInfo.lang), as_text);
return as_text;
}
public Text_t Text$quoted(Text_t text, bool colorize)
{
// TODO: optimize for ASCII and short strings
array_t graphemes = {.atomic=1};
#define add_char(c) Array$insert_value(&graphemes, (uint32_t)c, I_small(0), sizeof(uint32_t))
#define add_str(s) ({ for (char *_c = s; *_c; ++_c) Array$insert_value(&graphemes, (uint32_t)*_c, I_small(0), sizeof(uint32_t)); })
if (colorize)
add_str("\x1b[35m\"");
else
add_char('"');
#define add_escaped(str) ({ if (colorize) add_str("\x1b[34;1m"); add_char('\\'); add_str(str); if (colorize) add_str("\x1b[0;35m"); })
iteration_state_t state = {0, 0};
for (int64_t i = 0; i < text.length; i++) {
int32_t g = _next_grapheme(text, &state, i);
switch (g) {
case '\a': add_escaped("a"); break;
case '\b': add_escaped("b"); break;
case '\x1b': add_escaped("e"); break;
case '\f': add_escaped("f"); break;
case '\n': add_escaped("n"); break;
case '\r': add_escaped("r"); break;
case '\t': add_escaped("t"); break;
case '\v': add_escaped("v"); break;
case '"': add_escaped("\""); break;
case '\\': add_escaped("\\"); break;
case '\x00' ... '\x06': case '\x0E' ... '\x1A':
case '\x1C' ... '\x1F': case '\x7F' ... '\x7F': {
if (colorize) add_str("\x1b[34;1m");
add_char('\\');
add_char('x');
char tmp[4];
sprintf(tmp, "%02X", g);
add_str(tmp);
if (colorize)
add_str("\x1b[0;35m");
break;
}
default: add_char(g); break;
}
}
if (colorize)
add_str("\"\x1b[m");
else
add_char('"');
return (Text_t){.length=graphemes.length, .tag=TEXT_GRAPHEMES, .graphemes=graphemes.data};
#undef add_str
#undef add_char
#undef add_escaped
}
public Text_t Text$replace(Text_t text, Text_t pattern, Text_t replacement)
{
Text_t ret = {.length=0};
Int_t i = I_small(1);
for (;;) {
int64_t len;
Int_t found = Text$find(text, pattern, i, &len);
if (I_is_zero(found)) break;
if (Int$compare(&found, &i, &$Text) > 0) {
ret = Text$concat(
ret,
Text$slice(text, i, Int$minus(found, I_small(1))),
replacement
);
} else {
ret = concat2(ret, replacement);
}
i = Int$plus(found, Int64_to_Int(len));
}
if (Int_to_Int64(i, false) <= text.length) {
ret = concat2(ret, Text$slice(text, i, Int64_to_Int(text.length)));
}
return ret;
}
public Text_t Text$format(const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
char buf[9];
int len = vsnprintf(buf, sizeof(buf), fmt, args);
Text_t ret;
if (len <= 8) {
ret = (Text_t){
.length=len,
.tag=TEXT_SHORT_ASCII,
};
for (int i = 0; i < len; i++)
ret.short_ascii[i] = buf[i];
} else {
char *str = GC_MALLOC_ATOMIC(len+1);
vsnprintf(str, len+1, fmt, args);
ret = Text$from_str(str);
}
va_end(args);
return ret;
}
public const TypeInfo $Text = {
.size=sizeof(Text_t),
.align=__alignof__(Text_t),
.tag=TextInfo,
.TextInfo={.lang="Text"},
};
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0