aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ast.c6
-rw-r--r--builtins/array.c67
-rw-r--r--builtins/array.h8
-rw-r--r--builtins/bool.c23
-rw-r--r--builtins/bool.h4
-rw-r--r--builtins/c_string.c8
-rw-r--r--builtins/c_string.h4
-rw-r--r--builtins/channel.c17
-rw-r--r--builtins/channel.h4
-rw-r--r--builtins/datatypes.h15
-rw-r--r--builtins/functions.c94
-rw-r--r--builtins/functions.h18
-rw-r--r--builtins/integers.c152
-rw-r--r--builtins/integers.h22
-rw-r--r--builtins/memory.c12
-rw-r--r--builtins/memory.h2
-rw-r--r--builtins/nums.c55
-rw-r--r--builtins/nums.h20
-rw-r--r--builtins/pointer.c59
-rw-r--r--builtins/pointer.h3
-rw-r--r--builtins/range.c16
-rw-r--r--builtins/table.c46
-rw-r--r--builtins/table.h4
-rw-r--r--builtins/text.c1597
-rw-r--r--builtins/text.h77
-rw-r--r--builtins/thread.c8
-rw-r--r--builtins/thread.h2
-rw-r--r--builtins/types.c25
-rw-r--r--builtins/types.h9
-rw-r--r--builtins/util.c51
-rw-r--r--builtins/util.h2
-rw-r--r--builtins/where.c16
-rw-r--r--compile.c78
-rw-r--r--parse.c3
-rw-r--r--repl.c18
-rw-r--r--structs.c2
-rw-r--r--tomo.c2
-rw-r--r--typecheck.c5
38 files changed, 1857 insertions, 697 deletions
diff --git a/ast.c b/ast.c
index 6f1d20c9..b380a04f 100644
--- a/ast.c
+++ b/ast.c
@@ -35,9 +35,9 @@ static CORD optional_tagged_type(const char *tag, type_ast_t *ast);
CORD xml_escape(CORD text)
{
- text = Text$replace(text, "&", "&", I(-1));
- text = Text$replace(text, "<", "&lt;", I(-1));
- text = Text$replace(text, ">", "&gt;", I(-1));
+ text = CORD_replace(text, "&", "&amp;");
+ text = CORD_replace(text, "<", "&lt;");
+ text = CORD_replace(text, ">", "&gt;");
return text;
}
diff --git a/builtins/array.c b/builtins/array.c
index bf1fe4d8..63539559 100644
--- a/builtins/array.c
+++ b/builtins/array.c
@@ -12,12 +12,14 @@
#include "array.h"
#include "functions.h"
-#include "halfsiphash.h"
#include "integers.h"
#include "table.h"
+#include "text.h"
#include "types.h"
#include "util.h"
+#include "siphash.c"
+
static inline int64_t get_padded_item_size(const TypeInfo *info)
{
int64_t size = info->ArrayInfo.item->size;
@@ -532,67 +534,38 @@ public bool Array$equal(const array_t *x, const array_t *y, const TypeInfo *type
return (Array$compare(x, y, type) == 0);
}
-public CORD Array$as_text(const array_t *arr, bool colorize, const TypeInfo *type)
+public Text_t Array$as_text(const array_t *arr, bool colorize, const TypeInfo *type)
{
if (!arr)
- return CORD_all("[", generic_as_text(NULL, false, type->ArrayInfo.item), "]");
+ return Text$concat(Text$from_str("["), generic_as_text(NULL, false, type->ArrayInfo.item), Text$from_str("]"));
const TypeInfo *item_type = type->ArrayInfo.item;
- CORD c = "[";
+ Text_t text = Text$from_str("[");
for (int64_t i = 0; i < arr->length; i++) {
if (i > 0)
- c = CORD_cat(c, ", ");
- CORD item_cord = generic_as_text(arr->data + i*arr->stride, colorize, item_type);
- c = CORD_cat(c, item_cord);
+ text = Text$concat(text, Text$from_str(", "));
+ Text_t item_text = generic_as_text(arr->data + i*arr->stride, colorize, item_type);
+ text = Text$concat(text, item_text);
}
- c = CORD_cat(c, "]");
- return c;
+ text = Text$concat(text, Text$from_str("]"));
+ return text;
}
-public uint32_t Array$hash(const array_t *arr, const TypeInfo *type)
+public uint64_t Array$hash(const array_t *arr, const TypeInfo *type)
{
- // Array hash is calculated as a rolling, compacting hash of the length of the array, followed by
- // the hashes of its items (or the items themselves if they're small plain data)
- // In other words, it reads in a chunk of items or item hashes, then when it fills up the chunk,
- // hashes it down to a single item to start the next chunk. This repeats until the end, when it
- // hashes the last chunk down to a uint32_t.
const TypeInfo *item = type->ArrayInfo.item;
- if (item->tag == PointerInfo || (item->tag == CustomInfo && item->CustomInfo.hash == NULL)) { // Raw data hash
- uint8_t hash_batch[4 + 8*item->size];
- memset(hash_batch, 0, sizeof(hash_batch));
- uint8_t *p = hash_batch, *end = hash_batch + sizeof(hash_batch);
- int64_t length = arr->length;
- *p = (uint32_t)length;
- p += sizeof(uint32_t);
- for (int64_t i = 0; i < arr->length; i++) {
- if (p >= end) {
- uint32_t chunk_hash;
- halfsiphash(&hash_batch, sizeof(hash_batch), TOMO_HASH_KEY, (uint8_t*)&chunk_hash, sizeof(chunk_hash));
- p = hash_batch;
- *(uint32_t*)p = chunk_hash;
- p += sizeof(uint32_t);
- }
- memcpy((p += item->size), arr->data + i*arr->stride, item->size);
- }
- uint32_t hash;
- halfsiphash(&hash_batch, ((int64_t)p) - ((int64_t)hash_batch), TOMO_HASH_KEY, (uint8_t*)&hash, sizeof(hash));
- return hash;
+ siphash sh;
+ siphashinit(&sh, sizeof(uint64_t[arr->length]), (uint64_t*)TOMO_HASH_KEY);
+ if (item->tag == PointerInfo || (item->tag == CustomInfo && item->CustomInfo.hash == NULL && item->size == sizeof(void*))) { // Raw data hash
+ for (int64_t i = 0; i < arr->length; i++)
+ siphashadd64bits(&sh, (uint64_t)(arr->data + i*arr->stride));
} else {
- uint32_t hash_batch[16] = {(uint32_t)arr->length};
- uint32_t *p = &hash_batch[1], *end = hash_batch + sizeof(hash_batch)/sizeof(hash_batch[0]);
for (int64_t i = 0; i < arr->length; i++) {
- if (p >= end) {
- uint64_t chunk_hash;
- halfsiphash(&hash_batch, sizeof(hash_batch), TOMO_HASH_KEY, (uint8_t*)&chunk_hash, sizeof(chunk_hash));
- p = hash_batch;
- *(p++) = chunk_hash;
- }
- *(p++) = generic_hash(arr->data + i*arr->stride, item);
+ uint64_t item_hash = generic_hash(arr->data + i*arr->stride, item);
+ siphashadd64bits(&sh, item_hash);
}
- uint32_t hash;
- halfsiphash(&hash_batch, ((int64_t)p) - ((int64_t)hash_batch), TOMO_HASH_KEY, (uint8_t*)&hash, sizeof(hash));
- return hash;
}
+ return siphashfinish_last_part(&sh, 0);
}
static void siftdown(array_t *heap, int64_t startpos, int64_t pos, closure_t comparison, int64_t padded_item_size)
diff --git a/builtins/array.h b/builtins/array.h
index 47d10fd1..9dcdca6f 100644
--- a/builtins/array.h
+++ b/builtins/array.h
@@ -16,7 +16,7 @@
const array_t arr = arr_expr; int64_t index = index_expr; \
int64_t off = index + (index < 0) * (arr.length + 1) - 1; \
if (__builtin_expect(off < 0 || off >= arr.length, 0)) \
- fail_source(filename, start, end, "Invalid array index: %r (array has length %ld)\n", Int64$as_text(&index, no, NULL), arr.length); \
+ fail_source(filename, start, end, "Invalid array index: %s (array has length %ld)\n", Text$as_c_string(Int64$as_text(&index, no, NULL)), arr.length); \
(item_type*)(arr.data + arr.stride * off);})
#define Array_get_unchecked(type, x, i) *({ const array_t arr = x; int64_t index = i; \
int64_t off = index + (index < 0) * (arr.length + 1) - 1; \
@@ -25,7 +25,7 @@
array_t *arr = arr_expr; int64_t index = index_expr; \
int64_t off = index + (index < 0) * (arr->length + 1) - 1; \
if (__builtin_expect(off < 0 || off >= arr->length, 0)) \
- fail_source(filename, start, end, "Invalid array index: %r (array has length %ld)\n", Int64$as_text(&index, no, NULL), arr->length); \
+ fail_source(filename, start, end, "Invalid array index: %s (array has length %ld)\n", Text$as_c_string(Int64$as_text(&index, no, NULL)), arr->length); \
if (arr->data_refcount > 0) \
Array$compact(arr, padded_item_size); \
(item_type*)(arr->data + arr->stride * off); })
@@ -87,10 +87,10 @@ array_t Array$to(array_t array, Int_t last);
array_t Array$by(array_t array, Int_t stride, int64_t padded_item_size);
array_t Array$reversed(array_t array, int64_t padded_item_size);
array_t Array$concat(array_t x, array_t y, int64_t padded_item_size);
-uint32_t Array$hash(const array_t *arr, const TypeInfo *type);
+uint64_t Array$hash(const array_t *arr, const TypeInfo *type);
int32_t Array$compare(const array_t *x, const array_t *y, const TypeInfo *type);
bool Array$equal(const array_t *x, const array_t *y, const TypeInfo *type);
-CORD Array$as_text(const array_t *arr, bool colorize, const TypeInfo *type);
+Text_t Array$as_text(const array_t *arr, bool colorize, const TypeInfo *type);
void Array$heapify(array_t *heap, closure_t comparison, int64_t padded_item_size);
void Array$heap_push(array_t *heap, const void *item, closure_t comparison, int64_t padded_item_size);
#define Array$heap_push_value(heap, _value, comparison, padded_item_size) ({ __typeof(_value) value = _value; Array$heap_push(heap, &value, comparison, padded_item_size); })
diff --git a/builtins/bool.c b/builtins/bool.c
index af2f0ac7..488c6ddc 100644
--- a/builtins/bool.c
+++ b/builtins/bool.c
@@ -13,25 +13,28 @@
#include "types.h"
#include "util.h"
-public CORD Bool$as_text(const bool *b, bool colorize, const TypeInfo *type)
+public Text_t Bool$as_text(const bool *b, bool colorize, const TypeInfo *type)
{
(void)type;
- if (!b) return "Bool";
+ if (!b) return Text$from_str("Bool");
if (colorize)
- return *b ? "\x1b[35myes\x1b[m" : "\x1b[35mno\x1b[m";
+ return *b ? Text$from_str("\x1b[35myes\x1b[m") : Text$from_str("\x1b[35mno\x1b[m");
else
- return *b ? "yes" : "no";
+ return *b ? Text$from_str("yes") : Text$from_str("no");
}
-public Bool_t Bool$from_text(CORD text, bool *success)
+public Bool_t Bool$from_text(Text_t text, bool *success)
{
- CORD lower = Text$lower(text);
- if (CORD_cmp(lower, "yes") == 0 || CORD_cmp(lower, "on") == 0
- || CORD_cmp(lower, "true") == 0 || CORD_cmp(lower, "1") == 0) {
+ if (Text$equal_ignoring_case(text, Text$from_str("yes"))
+ || Text$equal_ignoring_case(text, Text$from_str("on"))
+ || Text$equal_ignoring_case(text, Text$from_str("true"))
+ || Text$equal_ignoring_case(text, Text$from_str("1"))) {
if (success) *success = yes;
return yes;
- } else if (CORD_cmp(lower, "no") == 0 || CORD_cmp(lower, "off") == 0
- || CORD_cmp(lower, "false") == 0 || CORD_cmp(lower, "0") == 0) {
+ } else if (Text$equal_ignoring_case(text, Text$from_str("no"))
+ || Text$equal_ignoring_case(text, Text$from_str("off"))
+ || Text$equal_ignoring_case(text, Text$from_str("false"))
+ || Text$equal_ignoring_case(text, Text$from_str("0"))) {
if (success) *success = yes;
return no;
} else {
diff --git a/builtins/bool.h b/builtins/bool.h
index 716ddd5b..578cad6c 100644
--- a/builtins/bool.h
+++ b/builtins/bool.h
@@ -12,8 +12,8 @@
#define yes (Bool_t)true
#define no (Bool_t)false
-CORD Bool$as_text(const bool *b, bool colorize, const TypeInfo *type);
-bool Bool$from_text(CORD text, bool *success);
+Text_t Bool$as_text(const bool *b, bool colorize, const TypeInfo *type);
+bool Bool$from_text(Text_t text, bool *success);
Bool_t Bool$random(double p);
extern const TypeInfo $Bool;
diff --git a/builtins/c_string.c b/builtins/c_string.c
index 3b258aad..8abb2b9f 100644
--- a/builtins/c_string.c
+++ b/builtins/c_string.c
@@ -13,12 +13,12 @@
#include "types.h"
#include "util.h"
-public CORD CString$as_text(const void *c_string, bool colorize, const TypeInfo *info)
+public Text_t CString$as_text(const void *c_string, bool colorize, const TypeInfo *info)
{
(void)info;
- if (!c_string) return "CString";
- CORD text = CORD_from_char_star(*(char**)c_string);
- return CORD_all(colorize ? "\x1b[34mCString\x1b[m(" : "CString(", Text$quoted(text, colorize), ")");
+ if (!c_string) return Text$from_str("CString");
+ Text_t text = Text$from_str(*(char**)c_string);
+ return Text$concat(Text$from_str(colorize ? "\x1b[34mCString\x1b[m(" : "CString("), Text$quoted(text, colorize), Text$from_str(")"));
}
public int CString$compare(const char **x, const char **y)
diff --git a/builtins/c_string.h b/builtins/c_string.h
index 6b4b0aad..d909083d 100644
--- a/builtins/c_string.h
+++ b/builtins/c_string.h
@@ -8,10 +8,10 @@
#include "types.h"
-CORD CString$as_text(const void *str, bool colorize, const TypeInfo *info);
+Text_t CString$as_text(const void *str, bool colorize, const TypeInfo *info);
int CString$compare(const char **x, const char **y);
bool CString$equal(const char **x, const char **y);
-uint32_t CString$hash(const char **str);
+uint64_t CString$hash(const char **str);
extern const TypeInfo $CString;
diff --git a/builtins/channel.c b/builtins/channel.c
index c2e2cf82..a0a0ddc5 100644
--- a/builtins/channel.c
+++ b/builtins/channel.c
@@ -15,6 +15,7 @@
#include "functions.h"
#include "halfsiphash.h"
#include "integers.h"
+#include "text.h"
#include "types.h"
#include "util.h"
#include "where.h"
@@ -120,15 +121,21 @@ bool Channel$equal(const channel_t **x, const channel_t **y, const TypeInfo *typ
return (*x == *y);
}
-CORD Channel$as_text(const channel_t **channel, bool colorize, const TypeInfo *type)
+Text_t Channel$as_text(const channel_t **channel, bool colorize, const TypeInfo *type)
{
const TypeInfo *item_type = type->ChannelInfo.item;
if (!channel) {
- CORD typename = generic_as_text(NULL, false, item_type);
- return colorize ? CORD_asprintf("\x1b[34;1m|:%s|\x1b[m", typename) : CORD_all("|:", typename, "|");
+ Text_t typename = generic_as_text(NULL, false, item_type);
+ return Text$concat(Text$from_str(colorize ? "\x1b[34;1m|:" : "|:"), typename, Text$from_str(colorize ? "|\x1b[m" : "|"));
}
- CORD typename = generic_as_text(NULL, false, item_type);
- return CORD_asprintf(colorize ? "\x1b[34;1m|:%s|<%p>\x1b[m" : "|:%s|<%p>", typename, *channel);
+ Text_t typename = generic_as_text(NULL, false, item_type);
+ return Text$concat(
+ Text$from_str(colorize ? "\x1b[34;1m|:" : "|:"),
+ typename,
+ Text$from_str("|<"),
+ Int64$hex((int64_t)(void*)*channel, I_small(0), true, true),
+ Text$from_str(colorize ? ">\x1b[m" : ">")
+ );
}
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/channel.h b/builtins/channel.h
index 241e7ec9..bf24f806 100644
--- a/builtins/channel.h
+++ b/builtins/channel.h
@@ -22,9 +22,9 @@ void Channel$peek(channel_t *channel, void *out, Where_t where, int64_t item_siz
#define Channel$peek_value(channel, where, t) ({ t _val; Channel$peek(channel, &_val, where, sizeof(t)); _val; })
void Channel$clear(channel_t *channel);
array_t Channel$view(channel_t *channel);
-uint32_t Channel$hash(const channel_t **channel, const TypeInfo *type);
+uint64_t Channel$hash(const channel_t **channel, const TypeInfo *type);
int32_t Channel$compare(const channel_t **x, const channel_t **y, const TypeInfo *type);
bool Channel$equal(const channel_t **x, const channel_t **y, const TypeInfo *type);
-CORD Channel$as_text(const channel_t **channel, bool colorize, const TypeInfo *type);
+Text_t Channel$as_text(const channel_t **channel, bool colorize, const TypeInfo *type);
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/datatypes.h b/builtins/datatypes.h
index 699c40e0..433e1dd9 100644
--- a/builtins/datatypes.h
+++ b/builtins/datatypes.h
@@ -71,4 +71,19 @@ typedef struct {
int64_t max_size;
} channel_t;
+enum text_type { TEXT_SHORT_ASCII, TEXT_ASCII, TEXT_SHORT_GRAPHEMES, TEXT_GRAPHEMES, TEXT_SUBTEXT };
+
+typedef struct Text_s {
+ int64_t length; // Number of grapheme clusters
+ uint64_t hash:61;
+ uint8_t tag:3;
+ union {
+ char short_ascii[8];
+ const char *ascii;
+ int32_t short_graphemes[2];
+ int32_t *graphemes;
+ struct Text_s *subtexts;
+ };
+} Text_t;
+
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/functions.c b/builtins/functions.c
index 3eea3c89..06636cba 100644
--- a/builtins/functions.c
+++ b/builtins/functions.c
@@ -2,7 +2,6 @@
#include <errno.h>
#include <execinfo.h>
#include <gc.h>
-#include <gc/cord.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
@@ -16,7 +15,6 @@
#include "channel.h"
#include "files.h"
#include "functions.h"
-#include "halfsiphash.h"
#include "integers.h"
#include "pointer.h"
#include "string.h"
@@ -25,7 +23,9 @@
#include "types.h"
#include "util.h"
-public uint8_t TOMO_HASH_KEY[8] = {0};
+#include "siphash.c"
+
+public uint8_t TOMO_HASH_KEY[16] = {0};
public void tomo_init(void)
{
@@ -37,6 +37,9 @@ public void tomo_init(void)
srand(seed);
srand48(seed);
Int$init_random(seed);
+
+ if (register_printf_specifier('k', printf_text, printf_text_size))
+ errx(1, "Couldn't set printf specifier");
}
static void print_stack_trace(FILE *out)
@@ -60,13 +63,13 @@ static void print_stack_trace(FILE *out)
fprintf(out, "\x1b[m");
}
-public void fail(CORD fmt, ...)
+public void fail(const char *fmt, ...)
{
if (USE_COLOR) fputs("\x1b[31;7m ==================== ERROR ==================== \n\n\x1b[0;1m", stderr);
else fputs("==================== ERROR ====================\n\n", stderr);
va_list args;
va_start(args, fmt);
- CORD_vfprintf(stderr, fmt, args);
+ vfprintf(stderr, fmt, args);
if (USE_COLOR) fputs("\x1b[m", stderr);
fputs("\n\n", stderr);
va_end(args);
@@ -75,14 +78,14 @@ public void fail(CORD fmt, ...)
raise(SIGABRT);
}
-public void fail_source(const char *filename, int64_t start, int64_t end, CORD fmt, ...)
+public void fail_source(const char *filename, int64_t start, int64_t end, const char *fmt, ...)
{
if (USE_COLOR) fputs("\n\x1b[31;7m ==================== ERROR ==================== \n\n\x1b[0;1m", stderr);
else fputs("\n==================== ERROR ====================\n\n", stderr);
va_list args;
va_start(args, fmt);
- CORD_vfprintf(stderr, fmt, args);
+ vfprintf(stderr, fmt, args);
va_end(args);
file_t *file = filename ? load_file(filename) : NULL;
@@ -98,11 +101,10 @@ public void fail_source(const char *filename, int64_t start, int64_t end, CORD f
raise(SIGABRT);
}
-public uint32_t generic_hash(const void *obj, const TypeInfo *type)
+public uint64_t generic_hash(const void *obj, const TypeInfo *type)
{
switch (type->tag) {
- case PointerInfo: case FunctionInfo: return Pointer$hash(obj, type);
- case TextInfo: return Text$hash(obj);
+ case TextInfo: return Text$hash((void*)obj);
case ArrayInfo: return Array$hash(obj, type);
case ChannelInfo: return Channel$hash((const channel_t**)obj, type);
case TableInfo: return Table$hash(obj, type);
@@ -113,9 +115,7 @@ public uint32_t generic_hash(const void *obj, const TypeInfo *type)
return type->CustomInfo.hash(obj, type);
default: {
hash_data:;
- uint32_t hash;
- halfsiphash((void*)obj, type->size, TOMO_HASH_KEY, (uint8_t*)&hash, sizeof(hash));
- return hash;
+ return siphash24((void*)obj, type->size, (uint64_t*)TOMO_HASH_KEY);
}
}
}
@@ -158,7 +158,7 @@ public bool generic_equal(const void *x, const void *y, const TypeInfo *type)
}
}
-public CORD generic_as_text(const void *obj, bool colorize, const TypeInfo *type)
+public Text_t generic_as_text(const void *obj, bool colorize, const TypeInfo *type)
{
switch (type->tag) {
case PointerInfo: return Pointer$as_text(obj, colorize, type);
@@ -168,19 +168,21 @@ public CORD generic_as_text(const void *obj, bool colorize, const TypeInfo *type
case ChannelInfo: return Channel$as_text((const channel_t**)obj, colorize, type);
case TableInfo: return Table$as_text(obj, colorize, type);
case TypeInfoInfo: return Type$as_text(obj, colorize, type);
- case EmptyStruct: return colorize ? CORD_all("\x1b[0;1m", type->EmptyStruct.name, "\x1b[m()") : CORD_all(type->EmptyStruct.name, "()");
+ case EmptyStruct: return colorize ?
+ Text$concat(Text$from_str("\x1b[0;1m"), Text$from_str(type->EmptyStruct.name), Text$from_str("\x1b[m()"))
+ : Text$concat(Text$from_str(type->EmptyStruct.name), Text$from_str("()"));
case CustomInfo:
if (!type->CustomInfo.as_text)
- fail("No cord function provided for type!\n");
+ fail("No text function provided for type!\n");
return type->CustomInfo.as_text(obj, colorize, type);
default: errx(1, "Invalid type tag: %d", type->tag);
}
}
-public CORD builtin_last_err()
+public Text_t builtin_last_err()
{
- return CORD_from_char_star(strerror(errno));
+ return Text$from_str(strerror(errno));
}
static int TEST_DEPTH = 0;
@@ -193,12 +195,12 @@ public void start_test(const char *filename, int64_t start, int64_t end)
if (filename && file) {
for (int i = 0; i < 3*TEST_DEPTH; i++) fputc(' ', stderr);
- CORD_fprintf(stderr, USE_COLOR ? "\x1b[33;1m>> \x1b[0m%.*s\x1b[m\n" : ">> %.*s\n", (end - start), file->text + start);
+ fprintf(stderr, USE_COLOR ? "\x1b[33;1m>> \x1b[0m%.*s\x1b[m\n" : ">> %.*s\n", (end - start), file->text + start);
}
++TEST_DEPTH;
}
-public void end_test(void *expr, const TypeInfo *type, CORD expected, const char *filename, int64_t start, int64_t end)
+public void end_test(void *expr, const TypeInfo *type, const char *expected, const char *filename, int64_t start, int64_t end)
{
(void)filename;
(void)start;
@@ -206,25 +208,29 @@ public void end_test(void *expr, const TypeInfo *type, CORD expected, const char
--TEST_DEPTH;
if (!expr) return;
- CORD expr_cord = generic_as_text(expr, USE_COLOR, type);
- CORD type_name = generic_as_text(NULL, false, type);
+ Text_t expr_text = generic_as_text(expr, USE_COLOR, type);
+ Text_t type_name = generic_as_text(NULL, false, type);
for (int i = 0; i < 3*TEST_DEPTH; i++) fputc(' ', stderr);
- CORD_fprintf(stderr, USE_COLOR ? "\x1b[2m=\x1b[0m %r \x1b[2m: %r\x1b[m\n" : "= %r : %r\n", expr_cord, type_name);
- if (expected) {
- CORD expr_plain = USE_COLOR ? generic_as_text(expr, false, type) : expr_cord;
- bool success = Text$equal(&expr_plain, &expected);
- if (!success && CORD_chr(expected, 0, ':')) {
- CORD with_type = CORD_catn(3, expr_plain, " : ", type_name);
- success = Text$equal(&with_type, &expected);
+ fprintf(stderr, USE_COLOR ? "\x1b[2m=\x1b[0m %k \x1b[2m: %k\x1b[m\n" : "= %k : %k\n", &expr_text, &type_name);
+ if (expected && expected[0]) {
+ Text_t expected_text = Text$from_str(expected);
+ Text_t expr_plain = USE_COLOR ? generic_as_text(expr, false, type) : expr_text;
+ bool success = Text$equal(&expr_plain, &expected_text);
+ if (!success) {
+ Int_t colon = Text$find(expected_text, Text$from_str(":"), I_small(0), NULL);
+ if (colon.small != I_small(0).small) {
+ Text_t with_type = Text$concat(expr_plain, Text$from_str(" : "), type_name);
+ success = Text$equal(&with_type, &expected_text);
+ }
}
if (!success) {
fprintf(stderr,
USE_COLOR
- ? "\n\x1b[31;7m ==================== TEST FAILED ==================== \x1b[0;1m\n\nExpected: \x1b[1;32m%s\x1b[0m\n\x1b[1m But got:\x1b[m %s\n\n"
- : "\n==================== TEST FAILED ====================\nExpected: %s\n\n But got: %s\n\n",
- CORD_to_const_char_star(expected), CORD_to_const_char_star(expr_cord));
+ ? "\n\x1b[31;7m ==================== TEST FAILED ==================== \x1b[0;1m\n\nExpected: \x1b[1;32m%s\x1b[0m\n\x1b[1m But got:\x1b[m %k\n\n"
+ : "\n==================== TEST FAILED ====================\nExpected: %s\n\n But got: %k\n\n",
+ expected, &expr_text);
print_stack_trace(stderr);
fflush(stderr);
@@ -233,37 +239,29 @@ public void end_test(void *expr, const TypeInfo *type, CORD expected, const char
}
}
-public void say(CORD text, bool newline)
+public void say(Text_t text, bool newline)
{
- uint8_t buf[512] = {0};
- size_t buf_len = sizeof(buf)-1;
- const char *str = CORD_to_const_char_star(text);
- uint8_t *normalized = u8_normalize(UNINORM_NFD, (uint8_t*)str, strlen(str), buf, &buf_len);
- if (normalized) {
- write(STDOUT_FILENO, normalized, buf_len);
- if (newline)
- write(STDOUT_FILENO, "\n", 1);
- if (normalized != buf)
- free(normalized);
- }
+ Text$print(stdout, text);
+ if (newline)
+ fputc('\n', stdout);
}
-public bool pop_flag(char **argv, int *i, const char *flag, CORD *result)
+public bool pop_flag(char **argv, int *i, const char *flag, Text_t *result)
{
if (argv[*i][0] != '-' || argv[*i][1] != '-') {
return false;
} else if (streq(argv[*i] + 2, flag)) {
- *result = CORD_EMPTY;
+ *result = (Text_t){.length=0};
argv[*i] = NULL;
*i += 1;
return true;
} else if (strncmp(argv[*i] + 2, "no-", 3) == 0 && streq(argv[*i] + 5, flag)) {
- *result = "no";
+ *result = Text$from_str("no");
argv[*i] = NULL;
*i += 1;
return true;
} else if (strncmp(argv[*i] + 2, flag, strlen(flag)) == 0 && argv[*i][2 + strlen(flag)] == '=') {
- *result = CORD_from_char_star(argv[*i] + 2 + strlen(flag) + 1);
+ *result = Text$from_str(argv[*i] + 2 + strlen(flag) + 1);
argv[*i] = NULL;
*i += 1;
return true;
diff --git a/builtins/functions.h b/builtins/functions.h
index 70266ba6..96837249 100644
--- a/builtins/functions.h
+++ b/builtins/functions.h
@@ -9,25 +9,25 @@
#include "datatypes.h"
#include "types.h"
-extern uint8_t TOMO_HASH_KEY[8];
+extern uint8_t TOMO_HASH_KEY[16];
void tomo_init(void);
-void fail(CORD fmt, ...);
-void fail_source(const char *filename, int64_t start, int64_t end, CORD fmt, ...);
-CORD builtin_last_err();
+void fail(const char *fmt, ...);
+void fail_source(const char *filename, int64_t start, int64_t end, const char *fmt, ...);
+Text_t builtin_last_err();
void start_test(const char *filename, int64_t start, int64_t end);
-void end_test(void *expr, const TypeInfo *type, CORD expected, const char *filename, int64_t start, int64_t end);
+void end_test(void *expr, const TypeInfo *type, const char *expected, const char *filename, int64_t start, int64_t end);
#define test(expr, type, expected, filename, start, end) {\
start_test(filename, start, end); \
end_test(expr, type, expected, filename, start, end); }
-void say(CORD text, bool newline);
+void say(Text_t text, bool newline);
-uint32_t generic_hash(const void *obj, const TypeInfo *type);
+uint64_t generic_hash(const void *obj, const TypeInfo *type);
int32_t generic_compare(const void *x, const void *y, const TypeInfo *type);
bool generic_equal(const void *x, const void *y, const TypeInfo *type);
-CORD generic_as_text(const void *obj, bool colorize, const TypeInfo *type);
+Text_t generic_as_text(const void *obj, bool colorize, const TypeInfo *type);
closure_t spawn(closure_t fn);
-bool pop_flag(char **argv, int *i, const char *flag, CORD *result);
+bool pop_flag(char **argv, int *i, const char *flag, Text_t *result);
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/integers.c b/builtins/integers.c
index 4a7b5c3a..45db160d 100644
--- a/builtins/integers.c
+++ b/builtins/integers.c
@@ -1,4 +1,5 @@
// Integer type infos and methods
+#include <ctype.h>
#include <gc.h>
#include <gc/cord.h>
#include <gmp.h>
@@ -11,7 +12,8 @@
#include "integers.h"
#include "text.h"
#include "types.h"
-#include "SipHash/halfsiphash.h"
+
+#include "siphash.c"
static gmp_randstate_t Int_rng = {};
@@ -21,15 +23,17 @@ public void Int$init_random(long seed)
gmp_randseed_ui(Int_rng, (unsigned long)seed);
}
-public CORD Int$as_text(const Int_t *i, bool colorize, const TypeInfo *type) {
+public Text_t Int$as_text(const Int_t *i, bool colorize, const TypeInfo *type) {
(void)type;
- if (!i) return "Int";
+ if (!i) return Text$from_str("Int");
if (__builtin_expect(i->small & 1, 1)) {
- return CORD_asprintf(colorize ? "\x1b[35m%ld\x1b[33;2m\x1b[m" : "%ld", (i->small)>>2);
+ return Text$format(colorize ? "\x1b[35m%ld\x1b[m" : "%ld", (i->small)>>2);
} else {
char *str = mpz_get_str(NULL, 10, *i->big);
- return CORD_asprintf(colorize ? "\x1b[35m%s\x1b[33;2m\x1b[m" : "%s", str);
+ Text_t text = Text$from_str(str);
+ if (colorize) text = Text$concat(Text$from_str("\x1b[35m"), text, Text$from_str("\x1b[m"));
+ return text;
}
}
@@ -55,62 +59,86 @@ public bool Int$equal_value(const Int_t x, const Int_t y) {
return x.small == y.small || (__builtin_expect(((x.small | y.small) & 1) == 0, 0) && mpz_cmp(*x.big, *y.big) == 0);
}
-public uint32_t Int$hash(const Int_t *x, const TypeInfo *type) {
+public uint64_t Int$hash(const Int_t *x, const TypeInfo *type) {
(void)type;
- uint32_t hash;
if (__builtin_expect(x->small & 1, 1)) {
- halfsiphash(&x->small, sizeof(x->small), TOMO_HASH_KEY, (uint8_t*)&hash, sizeof(hash));
+ int64_t i = (x->small>>2);
+ return siphash24((void*)&i, sizeof(i), (uint64_t*)TOMO_HASH_KEY);
} else {
char *str = mpz_get_str(NULL, 16, *x->big);
- halfsiphash(str, strlen(str), TOMO_HASH_KEY, (uint8_t*)&hash, sizeof(hash));
+ return siphash24((void*)str, strlen(str), (uint64_t*)TOMO_HASH_KEY);
}
- return hash;
}
-public CORD Int$format(Int_t i, Int_t digits_int)
+public Text_t Int$format(Int_t i, Int_t digits_int)
{
int64_t digits = Int_to_Int64(digits_int, false);
if (__builtin_expect(i.small & 1, 1)) {
- return CORD_asprintf("%0.*ld", digits, (i.small)>>2);
+ return Text$format("%0.*ld", digits, (i.small)>>2);
} else {
- CORD str = mpz_get_str(NULL, 10, *i.big);
+ char *str = mpz_get_str(NULL, 10, *i.big);
bool negative = (str[0] == '-');
- if (digits > (int64_t)CORD_len(str)) {
- if (negative)
- str = CORD_all("-", CORD_chars('0', digits - CORD_len(str)), CORD_substr(str, 1, ~0));
- else
- str = CORD_all(CORD_chars('0', digits - CORD_len(str)), str);
- }
- return str;
+ int64_t needed_zeroes = digits - (int64_t)strlen(str);
+ if (needed_zeroes <= 0)
+ return Text$from_str(str);
+
+ char *zeroes = GC_MALLOC_ATOMIC(needed_zeroes);
+ memset(zeroes, '0', needed_zeroes);
+ if (negative)
+ return Text$concat(Text$from_str("-"), Text$from_str(zeroes), Text$from_str(str + 1));
+ else
+ return Text$concat(Text$from_str(zeroes), Text$from_str(str));
}
}
-public CORD Int$hex(Int_t i, Int_t digits_int, bool uppercase, bool prefix) {
+public Text_t Int$hex(Int_t i, Int_t digits_int, bool uppercase, bool prefix) {
+ if (Int$compare(&i, (Int_t[1]){I_small(0)}, &$Int) < 0)
+ return Text$concat(Text$from_str("-"), Int$hex(Int$negative(i), digits_int, uppercase, prefix));
+
int64_t digits = Int_to_Int64(digits_int, false);
- const char *hex_fmt = uppercase ? (prefix ? "0x%0.*lX" : "%0.*lX") : (prefix ? "0x%0.*lx" : "%0.*lx");
if (__builtin_expect(i.small & 1, 1)) {
- return CORD_asprintf(hex_fmt, digits, (i.small)>>2);
+ const char *hex_fmt = uppercase ? (prefix ? "0x%0.*lX" : "%0.*lX") : (prefix ? "0x%0.*lx" : "%0.*lx");
+ return Text$format(hex_fmt, digits, (i.small)>>2);
} else {
- CORD str = mpz_get_str(NULL, 16, *i.big);
- if (uppercase) str = Text$upper(str);
- if (digits > (int64_t)CORD_len(str))
- str = CORD_cat(CORD_chars('0', digits - CORD_len(str)), str);
- if (prefix) str = CORD_cat("0x", str);
- return str;
+ char *str = mpz_get_str(NULL, 16, *i.big);
+ if (uppercase) {
+ for (char *c = str; *c; c++)
+ *c = (char)toupper(*c);
+ }
+ int64_t needed_zeroes = digits - (int64_t)strlen(str);
+ if (needed_zeroes <= 0)
+ return prefix ? Text$concat(Text$from_str("0x"), Text$from_str(str)) : Text$from_str(str);
+
+ char *zeroes = GC_MALLOC_ATOMIC(needed_zeroes);
+ memset(zeroes, '0', needed_zeroes);
+ if (prefix)
+ return Text$concat(Text$from_str("0x"), Text$from_str(zeroes), Text$from_str(str));
+ else
+ return Text$concat(Text$from_str(zeroes), Text$from_str(str));
}
}
-public CORD Int$octal(Int_t i, Int_t digits_int, bool prefix) {
+public Text_t Int$octal(Int_t i, Int_t digits_int, bool prefix) {
+ Int_t zero = I_small(0);
+ if (Int$compare(&i, &zero, &$Int) < 0)
+ return Text$concat(Text$from_str("-"), Int$octal(Int$negative(i), digits_int, prefix));
+
int64_t digits = Int_to_Int64(digits_int, false);
- const char *octal_fmt = prefix ? "0o%0.*lo" : "%0.*lo";
if (__builtin_expect(i.small & 1, 1)) {
- return CORD_asprintf(octal_fmt, (int)digits, (uint64_t)(i.small >> 2));
+ const char *octal_fmt = prefix ? "0o%0.*lo" : "%0.*lo";
+ return Text$format(octal_fmt, digits, (i.small)>>2);
} else {
- CORD str = mpz_get_str(NULL, 8, *i.big);
- if (digits > (int64_t)CORD_len(str))
- str = CORD_cat(CORD_chars('0', digits - CORD_len(str)), str);
- if (prefix) str = CORD_cat("0o", str);
- return str;
+ char *str = mpz_get_str(NULL, 8, *i.big);
+ int64_t needed_zeroes = digits - (int64_t)strlen(str);
+ if (needed_zeroes <= 0)
+ return prefix ? Text$concat(Text$from_str("0o"), Text$from_str(str)) : Text$from_str(str);
+
+ char *zeroes = GC_MALLOC_ATOMIC(needed_zeroes);
+ memset(zeroes, '0', needed_zeroes);
+ if (prefix)
+ return Text$concat(Text$from_str("0o"), Text$from_str(zeroes), Text$from_str(str));
+ else
+ return Text$concat(Text$from_str(zeroes), Text$from_str(str));
}
}
@@ -290,9 +318,11 @@ public Int_t Int$sqrt(Int_t i)
public Int_t Int$random(Int_t min, Int_t max) {
int32_t cmp = Int$compare(&min, &max, &$Int);
- if (cmp > 0)
- fail("Random minimum value (%r) is larger than the maximum value (%r)",
- Int$as_text(&min, false, &$Int), Int$as_text(&max, false, &$Int));
+ if (cmp > 0) {
+ Text_t min_text = Int$as_text(&min, false, &$Int), max_text = Int$as_text(&max, false, &$Int);
+ fail("Random minimum value (%k) is larger than the maximum value (%k)",
+ &min_text, &max_text);
+ }
if (cmp == 0) return min;
mpz_t range_size;
@@ -315,8 +345,8 @@ public Range_t Int$to(Int_t from, Int_t to) {
return (Range_t){from, to, Int$compare(&to, &from, &$Int) >= 0 ? (Int_t){.small=(1<<2)|1} : (Int_t){.small=(-1>>2)|1}};
}
-public Int_t Int$from_text(CORD text, bool *success) {
- const char *str = CORD_to_const_char_star(text);
+public Int_t Int$from_text(Text_t text, bool *success) {
+ const char *str = Text$as_c_string(text);
mpz_t i;
int result;
if (strncmp(str, "0x", 2) == 0) {
@@ -355,7 +385,7 @@ public Int_t Int$prev_prime(Int_t x)
mpz_t p;
mpz_init_set_int(p, x);
if (mpz_prevprime(p, p) == 0)
- fail("There is no prime number before %r", Int$as_text(&x, false, &$Int));
+ fail("There is no prime number before %k", (Text_t[1]){Int$as_text(&x, false, &$Int)});
return Int$from_mpz(p);
}
@@ -373,13 +403,11 @@ public const TypeInfo $Int = {
#define DEFINE_INT_TYPE(c_type, KindOfInt, fmt, min_val, max_val)\
- public CORD KindOfInt ## $as_text(const c_type *i, bool colorize, const TypeInfo *type) { \
+ public Text_t KindOfInt ## $as_text(const c_type *i, bool colorize, const TypeInfo *type) { \
(void)type; \
- if (!i) return #KindOfInt; \
- CORD c; \
- if (colorize) CORD_sprintf(&c, "\x1b[35m%"fmt"\x1b[33;2m\x1b[m", *i); \
- else CORD_sprintf(&c, "%"fmt, *i); \
- return c; \
+ if (!i) return Text$from_str(#KindOfInt); \
+ Int_t as_int = KindOfInt##_to_Int(*i); \
+ return Int$as_text(&as_int, colorize, type); \
} \
public int32_t KindOfInt ## $compare(const c_type *x, const c_type *y, const TypeInfo *type) { \
(void)type; \
@@ -389,19 +417,17 @@ public const TypeInfo $Int = {
(void)type; \
return *x == *y; \
} \
- public CORD KindOfInt ## $format(c_type i, Int_t digits_int) { \
- int64_t digits = Int_to_Int64(digits_int, false); \
- return CORD_asprintf("%0*ld", (int)digits, (int64_t)i); \
+ public Text_t KindOfInt ## $format(c_type i, Int_t digits_int) { \
+ Int_t as_int = KindOfInt##_to_Int(i); \
+ return Int$format(as_int, digits_int); \
} \
- public CORD KindOfInt ## $hex(c_type i, Int_t digits_int, bool uppercase, bool prefix) { \
- int64_t digits = Int_to_Int64(digits_int, false); \
- const char *hex_fmt = uppercase ? (prefix ? "0x%0.*lX" : "%0.*lX") : (prefix ? "0x%0.*lx" : "%0.*lx"); \
- return CORD_asprintf(hex_fmt, (int)digits, (uint64_t)i); \
+ public Text_t KindOfInt ## $hex(c_type i, Int_t digits_int, bool uppercase, bool prefix) { \
+ Int_t as_int = KindOfInt##_to_Int(i); \
+ return Int$hex(as_int, digits_int, uppercase, prefix); \
} \
- public CORD KindOfInt ## $octal(c_type i, Int_t digits_int, bool prefix) { \
- int64_t digits = Int_to_Int64(digits_int, false); \
- const char *octal_fmt = prefix ? "0o%0.*lo" : "%0.*lo"; \
- return CORD_asprintf(octal_fmt, (int)digits, (uint64_t)i); \
+ public Text_t KindOfInt ## $octal(c_type i, Int_t digits_int, bool prefix) { \
+ Int_t as_int = KindOfInt##_to_Int(i); \
+ return Int$octal(as_int, digits_int, prefix); \
} \
public array_t KindOfInt ## $bits(c_type x) { \
array_t bit_array = (array_t){.data=GC_MALLOC_ATOMIC(sizeof(bool[8*sizeof(c_type)])), .atomic=1, .stride=sizeof(bool), .length=8*sizeof(c_type)}; \
@@ -432,8 +458,8 @@ public const TypeInfo $Int = {
public Range_t KindOfInt ## $to(c_type from, c_type to) { \
return (Range_t){Int64_to_Int(from), Int64_to_Int(to), to >= from ? (Int_t){.small=(1<<2)&1} : (Int_t){.small=(1<<2)&1}}; \
} \
- public c_type KindOfInt ## $from_text(CORD text, CORD *the_rest) { \
- const char *str = CORD_to_const_char_star(text); \
+ public c_type KindOfInt ## $from_text(Text_t text, Text_t *the_rest) { \
+ const char *str = Text$as_c_string(text); \
long i; \
char *end_ptr = NULL; \
if (strncmp(str, "0x", 2) == 0) { \
@@ -445,7 +471,7 @@ public const TypeInfo $Int = {
} else { \
i = strtol(str, &end_ptr, 10); \
} \
- if (the_rest) *the_rest = CORD_from_char_star(end_ptr); \
+ if (the_rest) *the_rest = Text$from_str(end_ptr); \
if (i < min_val) i = min_val; \
else if (i > max_val) i = min_val; \
return (c_type)i; \
diff --git a/builtins/integers.h b/builtins/integers.h
index e5a662cc..359b1d57 100644
--- a/builtins/integers.h
+++ b/builtins/integers.h
@@ -24,16 +24,16 @@
#define I8(x) ((int8_t)x)
#define DEFINE_INT_TYPE(c_type, type_name) \
- CORD type_name ## $as_text(const c_type *i, bool colorize, const TypeInfo *type); \
+ Text_t type_name ## $as_text(const c_type *i, bool colorize, const TypeInfo *type); \
int32_t type_name ## $compare(const c_type *x, const c_type *y, const TypeInfo *type); \
bool type_name ## $equal(const c_type *x, const c_type *y, const TypeInfo *type); \
- CORD type_name ## $format(c_type i, Int_t digits); \
- CORD type_name ## $hex(c_type i, Int_t digits, bool uppercase, bool prefix); \
- CORD type_name ## $octal(c_type i, Int_t digits, bool prefix); \
+ Text_t type_name ## $format(c_type i, Int_t digits); \
+ Text_t type_name ## $hex(c_type i, Int_t digits, bool uppercase, bool prefix); \
+ Text_t type_name ## $octal(c_type i, Int_t digits, bool prefix); \
array_t type_name ## $bits(c_type x); \
c_type type_name ## $random(c_type min, c_type max); \
Range_t type_name ## $to(c_type from, c_type to); \
- c_type type_name ## $from_text(CORD text, CORD *the_rest); \
+ c_type type_name ## $from_text(Text_t text, Text_t *the_rest); \
static inline c_type type_name ## $clamped(c_type x, c_type min, c_type max) { \
return x < min ? min : (x > max ? max : x); \
} \
@@ -70,19 +70,19 @@ DEFINE_INT_TYPE(int8_t, Int8);
#define Int16$abs(...) I16(abs(__VA_ARGS__))
#define Int8$abs(...) I8(abs(__VA_ARGS__))
-CORD Int$as_text(const Int_t *i, bool colorize, const TypeInfo *type);
-uint32_t Int$hash(const Int_t *x, const TypeInfo *type);
+Text_t Int$as_text(const Int_t *i, bool colorize, const TypeInfo *type);
+uint64_t Int$hash(const Int_t *x, const TypeInfo *type);
int32_t Int$compare(const Int_t *x, const Int_t *y, const TypeInfo *type);
int32_t Int$compare_value(const Int_t x, const Int_t y);
bool Int$equal(const Int_t *x, const Int_t *y, const TypeInfo *type);
bool Int$equal_value(const Int_t x, const Int_t y);
-CORD Int$format(Int_t i, Int_t digits);
-CORD Int$hex(Int_t i, Int_t digits, bool uppercase, bool prefix);
-CORD Int$octal(Int_t i, Int_t digits, bool prefix);
+Text_t Int$format(Int_t i, Int_t digits);
+Text_t Int$hex(Int_t i, Int_t digits, bool uppercase, bool prefix);
+Text_t Int$octal(Int_t i, Int_t digits, bool prefix);
void Int$init_random(long seed);
Int_t Int$random(Int_t min, Int_t max);
Range_t Int$to(Int_t from, Int_t to);
-Int_t Int$from_text(CORD text, bool *success);
+Int_t Int$from_text(Text_t text, bool *success);
Int_t Int$abs(Int_t x);
Int_t Int$power(Int_t base, Int_t exponent);
Int_t Int$sqrt(Int_t i);
diff --git a/builtins/memory.c b/builtins/memory.c
index 5b9f39ad..4e8e4c50 100644
--- a/builtins/memory.c
+++ b/builtins/memory.c
@@ -1,6 +1,5 @@
// Type info and methods for "Memory" opaque type
#include <gc.h>
-#include <gc/cord.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
@@ -8,17 +7,16 @@
#include <sys/param.h>
#include <err.h>
-#include "util.h"
#include "halfsiphash.h"
#include "memory.h"
+#include "text.h"
#include "types.h"
+#include "util.h"
-public CORD Memory__as_text(const void *p, bool colorize, const TypeInfo *type) {
+public Text_t Memory__as_text(const void *p, bool colorize, const TypeInfo *type) {
(void)type;
- if (!p) return "Memory";
- CORD cord;
- CORD_sprintf(&cord, colorize ? "\x1b[0;34;1mMemory<%p>\x1b[m" : "Memory<%p>", p);
- return cord;
+ if (!p) return Text$from_str("Memory");
+ return Text$format(colorize ? "\x1b[0;34;1mMemory<%p>\x1b[m" : "Memory<%p>", p);
}
public const TypeInfo $Memory = {
diff --git a/builtins/memory.h b/builtins/memory.h
index 48a2dafd..e3cb2983 100644
--- a/builtins/memory.h
+++ b/builtins/memory.h
@@ -9,6 +9,6 @@
#include "types.h"
extern const TypeInfo $Memory;
-CORD Memory$as_text(const void *p, bool colorize, const TypeInfo *type);
+Text_t Memory$as_text(const void *p, bool colorize, const TypeInfo *type);
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/nums.c b/builtins/nums.c
index 6b4f6a8a..5848a589 100644
--- a/builtins/nums.c
+++ b/builtins/nums.c
@@ -11,15 +11,13 @@
#include "array.h"
#include "nums.h"
#include "string.h"
+#include "text.h"
#include "types.h"
-public CORD Num$as_text(const double *f, bool colorize, const TypeInfo *type) {
+public Text_t Num$as_text(const double *f, bool colorize, const TypeInfo *type) {
(void)type;
- if (!f) return "Num";
- CORD c;
- if (colorize) CORD_sprintf(&c, "\x1b[35m%.16g\x1b[33;2m\x1b[m", *f);
- else CORD_sprintf(&c, "%.16g", *f);
- return c;
+ if (!f) return Text$from_str("Num");
+ return Text$format(colorize ? "\x1b[35m%.16g\x1b[33;2m\x1b[m" : "%.16g", *f);
}
public int32_t Num$compare(const double *x, const double *y, const TypeInfo *type) {
@@ -47,12 +45,12 @@ public bool Num$near(double a, double b, double ratio, double absolute) {
return (diff < epsilon);
}
-public CORD Num$format(double f, Int_t precision) {
- return CORD_asprintf("%.*f", (int)Int_to_Int64(precision, false), f);
+public Text_t Num$format(double f, Int_t precision) {
+ return Text$format("%.*f", (int)Int_to_Int64(precision, false), f);
}
-public CORD Num$scientific(double f, Int_t precision) {
- return CORD_asprintf("%.*e", (int)Int_to_Int64(precision, false), f);
+public Text_t Num$scientific(double f, Int_t precision) {
+ return Text$format("%.*e", (int)Int_to_Int64(precision, false), f);
}
public double Num$mod(double num, double modulus) {
@@ -68,16 +66,16 @@ public double Num$mix(double amount, double x, double y) {
return (1.0-amount)*x + amount*y;
}
-public double Num$from_text(CORD text, CORD *the_rest) {
- const char *str = CORD_to_const_char_star(text);
+public double Num$from_text(Text_t text, Text_t *the_rest) {
+ const char *str = Text$as_c_string(text);
char *end = NULL;
double d = strtod(str, &end);
- if (the_rest) *the_rest = CORD_from_char_star(end);
+ if (the_rest) *the_rest = Text$from_str(end);
return d;
}
-public double Num$nan(CORD tag) {
- return nan(CORD_to_const_char_star(tag));
+public double Num$nan(Text_t tag) {
+ return nan(Text$as_c_string(tag));
}
public bool Num$isinf(double n) { return !!isinf(n); }
@@ -95,13 +93,10 @@ public const TypeInfo $Num = {
},
};
-public CORD Num32$as_text(const float *f, bool colorize, const TypeInfo *type) {
+public Text_t Num32$as_text(const float *f, bool colorize, const TypeInfo *type) {
(void)type;
- if (!f) return "Num32";
- CORD c;
- if (colorize) CORD_sprintf(&c, "\x1b[35m%.8g_f32\x1b[m", *f);
- else CORD_sprintf(&c, "%.8g_f32", *f);
- return c;
+ if (!f) return Text$from_str("Num32");
+ return Text$format(colorize ? "\x1b[35m%.8g_f32\x1b[33;2m\x1b[m" : "%.8g_f32", *f);
}
public int32_t Num32$compare(const float *x, const float *y, const TypeInfo *type) {
@@ -129,12 +124,12 @@ public bool Num32$near(float a, float b, float ratio, float absolute) {
return (diff < epsilon);
}
-public CORD Num32$format(float f, Int_t precision) {
- return CORD_asprintf("%.*f", (int)Int_to_Int64(precision, false), f);
+public Text_t Num32$format(float f, Int_t precision) {
+ return Text$format("%.*f", (int)Int_to_Int64(precision, false), f);
}
-public CORD Num32$scientific(float f, Int_t precision) {
- return CORD_asprintf("%.*e", (int)Int_to_Int64(precision, false), f);
+public Text_t Num32$scientific(float f, Int_t precision) {
+ return Text$format("%.*e", (int)Int_to_Int64(precision, false), f);
}
public float Num32$mod(float num, float modulus) {
@@ -150,16 +145,16 @@ public float Num32$mix(float amount, float x, float y) {
return (1.0-amount)*x + amount*y;
}
-public float Num32$from_text(CORD text, CORD *the_rest) {
- const char *str = CORD_to_const_char_star(text);
+public float Num32$from_text(Text_t text, Text_t *the_rest) {
+ const char *str = Text$as_c_string(text);
char *end = NULL;
double d = strtod(str, &end);
- if (the_rest) *the_rest = CORD_from_char_star(end);
+ if (the_rest) *the_rest = Text$from_str(end);
return (float)d;
}
-public float Num32$nan(CORD tag) {
- return nanf(CORD_to_const_char_star(tag));
+public float Num32$nan(Text_t tag) {
+ return nanf(Text$as_c_string(tag));
}
public bool Num32$isinf(float n) { return isinf(n); }
diff --git a/builtins/nums.h b/builtins/nums.h
index 94b11055..c5562f0a 100644
--- a/builtins/nums.h
+++ b/builtins/nums.h
@@ -14,39 +14,39 @@
#define N32(n) ((float)n)
#define N64(n) ((double)n)
-CORD Num$as_text(const double *f, bool colorize, const TypeInfo *type);
+Text_t Num$as_text(const double *f, bool colorize, const TypeInfo *type);
int32_t Num$compare(const double *x, const double *y, const TypeInfo *type);
bool Num$equal(const double *x, const double *y, const TypeInfo *type);
bool Num$near(double a, double b, double ratio, double absolute);
-CORD Num$format(double f, Int_t precision);
-CORD Num$scientific(double f, Int_t precision);
+Text_t Num$format(double f, Int_t precision);
+Text_t Num$scientific(double f, Int_t precision);
double Num$mod(double num, double modulus);
bool Num$isinf(double n);
bool Num$finite(double n);
bool Num$isnan(double n);
-double Num$nan(CORD tag);
+double Num$nan(Text_t tag);
double Num$random(void);
double Num$mix(double amount, double x, double y);
-double Num$from_text(CORD text, CORD *the_rest);
+double Num$from_text(Text_t text, Text_t *the_rest);
static inline double Num$clamped(double x, double low, double high) {
return (x <= low) ? low : (x >= high ? high : x);
}
extern const TypeInfo $Num;
-CORD Num32$as_text(const float *f, bool colorize, const TypeInfo *type);
+Text_t Num32$as_text(const float *f, bool colorize, const TypeInfo *type);
int32_t Num32$compare(const float *x, const float *y, const TypeInfo *type);
bool Num32$equal(const float *x, const float *y, const TypeInfo *type);
bool Num32$near(float a, float b, float ratio, float absolute);
-CORD Num32$format(float f, Int_t precision);
-CORD Num32$scientific(float f, Int_t precision);
+Text_t Num32$format(float f, Int_t precision);
+Text_t Num32$scientific(float f, Int_t precision);
float Num32$mod(float num, float modulus);
bool Num32$isinf(float n);
bool Num32$finite(float n);
bool Num32$isnan(float n);
float Num32$random(void);
float Num32$mix(float amount, float x, float y);
-float Num32$from_text(CORD text, CORD *the_rest);
-float Num32$nan(CORD tag);
+float Num32$from_text(Text_t text, Text_t *the_rest);
+float Num32$nan(Text_t tag);
static inline float Num32$clamped(float x, float low, float high) {
return (x <= low) ? low : (x >= high ? high : x);
}
diff --git a/builtins/pointer.c b/builtins/pointer.c
index 73bd41be..41f4a2a1 100644
--- a/builtins/pointer.c
+++ b/builtins/pointer.c
@@ -8,27 +8,39 @@
#include <stdlib.h>
#include <sys/param.h>
-#include "util.h"
#include "functions.h"
#include "halfsiphash.h"
+#include "text.h"
#include "types.h"
+#include "util.h"
typedef struct recursion_s {
const void *ptr;
struct recursion_s *next;
} recursion_t;
-public CORD Pointer$as_text(const void *x, bool colorize, const TypeInfo *type) {
+public Text_t Pointer$as_text(const void *x, bool colorize, const TypeInfo *type) {
auto ptr_info = type->PointerInfo;
if (!x) {
- CORD typename = generic_as_text(NULL, false, ptr_info.pointed);
- CORD c = colorize ? CORD_asprintf("\x1b[34;1m%s%s\x1b[m", ptr_info.sigil, typename) : CORD_cat(ptr_info.sigil, typename);
- return ptr_info.is_optional ? CORD_cat(c, "?") : c;
+ Text_t typename = generic_as_text(NULL, false, ptr_info.pointed);
+ Text_t text;
+ if (colorize)
+ text = Text$concat(Text$from_str("\x1b[34;1m"), Text$from_str(ptr_info.sigil), typename, Text$from_str("\x1b[m"));
+ else
+ text = Text$concat(Text$from_str(ptr_info.sigil), typename);
+
+ if (ptr_info.is_optional)
+ text = Text$concat(text, Text$from_str("?"));
+
+ return text;
}
const void *ptr = *(const void**)x;
if (!ptr) {
- CORD typename = generic_as_text(NULL, false, ptr_info.pointed);
- return colorize ? CORD_asprintf("\x1b[34;1m!%s\x1b[m", typename) : CORD_cat("!", typename);
+ Text_t typename = generic_as_text(NULL, false, ptr_info.pointed);
+ if (colorize)
+ return Text$concat(Text$from_str("\x1b[34;1m!"), typename, Text$from_str("\x1b[m"));
+ else
+ return Text$concat(Text$from_str("!"), typename);
}
// Check for recursive references, so if `x.foo = x`, then it prints as
@@ -38,22 +50,34 @@ public CORD Pointer$as_text(const void *x, bool colorize, const TypeInfo *type)
for (recursion_t *r = recursion; r; r = r->next) {
++depth;
if (r->ptr == ptr) {
- CORD c = CORD_asprintf(colorize ? "\x1b[34;1m%s..%d\x1b[m" : "%s..%d", ptr_info.sigil, depth);
- if (ptr_info.is_optional) c = CORD_cat(c, colorize ? "\x1b[34;1m?\x1b[m" : "?");
- return c;
+ Text_t text = Text$concat(
+ Text$from_str(colorize ? "\x1b[34;1m" : ""),
+ Text$from_str(ptr_info.sigil),
+ Text$from_str(".."),
+ Int32$as_text(&depth, false, &$Int32),
+ Text$from_str(colorize ? "\x1b[m" : ""));
+ if (ptr_info.is_optional)
+ text = Text$concat(text, Text$from_str(colorize ? "\x1b[34;1m?\x1b[m" : "?"));
+ return text;
}
}
- CORD pointed;
+ Text_t pointed;
{ // Stringify with this pointer flagged as a recursive one:
recursion_t my_recursion = {.ptr=ptr, .next=recursion};
recursion = &my_recursion;
pointed = generic_as_text(ptr, colorize, ptr_info.pointed);
recursion = recursion->next;
}
- CORD c = colorize ? CORD_asprintf("\x1b[34;1m%s\x1b[m%r", ptr_info.sigil, pointed) : CORD_cat(ptr_info.sigil, pointed);
- if (ptr_info.is_optional) c = CORD_cat(c, colorize ? "\x1b[34;1m?\x1b[m" : "?");
- return c;
+ Text_t text;
+ if (colorize)
+ text = Text$concat(Text$from_str("\x1b[34;1m"), Text$from_str(ptr_info.sigil), Text$from_str("\x1b[m"), pointed);
+ else
+ text = Text$concat(Text$from_str(ptr_info.sigil), pointed);
+
+ if (ptr_info.is_optional)
+ text = Text$concat(text, Text$from_str("?"));
+ return text;
}
public int32_t Pointer$compare(const void *x, const void *y, const TypeInfo *type) {
@@ -68,11 +92,4 @@ public bool Pointer$equal(const void *x, const void *y, const TypeInfo *type) {
return xp == yp;
}
-public uint32_t Pointer$hash(const void *x, const TypeInfo *type) {
- (void)type;
- uint32_t hash;
- halfsiphash(x, sizeof(void*), TOMO_HASH_KEY, (uint8_t*)&hash, sizeof(hash));
- return hash;
-}
-
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/pointer.h b/builtins/pointer.h
index 538960b3..7748da4b 100644
--- a/builtins/pointer.h
+++ b/builtins/pointer.h
@@ -8,10 +8,9 @@
#include "types.h"
-CORD Pointer$as_text(const void *x, bool colorize, const TypeInfo *type);
+Text_t Pointer$as_text(const void *x, bool colorize, const TypeInfo *type);
int32_t Pointer$compare(const void *x, const void *y, const TypeInfo *type);
bool Pointer$equal(const void *x, const void *y, const TypeInfo *type);
-uint32_t Pointer$hash(const void *x, const TypeInfo *type);
#define Null(t) (t*)NULL
#define POINTER_TYPE(_sigil, _pointed) (&(TypeInfo){\
diff --git a/builtins/range.c b/builtins/range.c
index 840397b9..9b5af8cd 100644
--- a/builtins/range.c
+++ b/builtins/range.c
@@ -4,15 +4,15 @@
#include <err.h>
#include <gmp.h>
#include <gc.h>
-#include <gc/cord.h>
#include <math.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/param.h>
-#include "types.h"
#include "integers.h"
+#include "text.h"
+#include "types.h"
#include "util.h"
@@ -32,15 +32,15 @@ static bool Range$equal(const Range_t *x, const Range_t *y, const TypeInfo *type
return Int$equal(&x->first, &y->first, &$Int) && Int$equal(&x->last, &y->last, &$Int) && Int$equal(&x->step, &y->step, &$Int);
}
-static CORD Range$as_text(const Range_t *r, bool use_color, const TypeInfo *type)
+static Text_t Range$as_text(const Range_t *r, bool use_color, const TypeInfo *type)
{
(void)type;
- if (!r) return "Range";
+ if (!r) return Text$from_str("Range");
- return CORD_asprintf(use_color ? "\x1b[0;1mRange\x1b[m(first=%r, last=%r, step=%r)"
- : "Range(first=%r, last=%r, step=%r)",
- Int$as_text(&r->first, use_color, &$Int), Int$as_text(&r->last, use_color, &$Int),
- Int$as_text(&r->step, use_color, &$Int));
+ return Text$format(use_color ? "\x1b[0;1mRange\x1b[m(first=%r, last=%r, step=%r)"
+ : "Range(first=%r, last=%r, step=%r)",
+ Int$as_text(&r->first, use_color, &$Int), Int$as_text(&r->last, use_color, &$Int),
+ Int$as_text(&r->step, use_color, &$Int));
}
public Range_t Range$reversed(Range_t r)
diff --git a/builtins/table.c b/builtins/table.c
index 8de6532c..9bc3ded1 100644
--- a/builtins/table.c
+++ b/builtins/table.c
@@ -16,14 +16,15 @@
#include <string.h>
#include <sys/param.h>
-#include "util.h"
#include "array.h"
+#include "c_string.h"
#include "datatypes.h"
#include "halfsiphash.h"
#include "memory.h"
#include "table.h"
#include "text.h"
#include "types.h"
+#include "util.h"
// #define DEBUG_TABLES
@@ -51,11 +52,11 @@ static const TypeInfo MemoryPointer = {
},
};
-const TypeInfo StrToVoidStarTable = {
+const TypeInfo CStrToVoidStarTable = {
.size=sizeof(table_t),
.align=__alignof__(table_t),
.tag=TableInfo,
- .TableInfo={.key=&$Text, .value=&MemoryPointer},
+ .TableInfo={.key=&$CString, .value=&MemoryPointer},
};
static inline size_t entry_size(const TypeInfo *info)
@@ -450,36 +451,43 @@ public uint32_t Table$hash(const table_t *t, const TypeInfo *type)
return hash;
}
-public CORD Table$as_text(const table_t *t, bool colorize, const TypeInfo *type)
+public Text_t Table$as_text(const table_t *t, bool colorize, const TypeInfo *type)
{
assert(type->tag == TableInfo);
auto table = type->TableInfo;
if (!t) {
if (table.value != &$Void)
- return CORD_all("{", generic_as_text(NULL, false, table.key), ":", generic_as_text(NULL, false, table.value), "}");
+ return Text$concat(
+ Text$from_str("{"),
+ generic_as_text(NULL, false, table.key),
+ Text$from_str(":"),
+ generic_as_text(NULL, false, table.value),
+ Text$from_str("}"));
else
- return CORD_all("{", generic_as_text(NULL, false, table.key), "}");
+ return Text$concat(
+ Text$from_str("{"),
+ generic_as_text(NULL, false, table.key),
+ Text$from_str("}"));
}
int64_t val_off = value_offset(type);
- CORD c = "{";
+ Text_t text = Text$from_str("{");
for (int64_t i = 0, length = Table$length(*t); i < length; i++) {
if (i > 0)
- c = CORD_cat(c, ", ");
+ text = Text$concat(text, Text$from_str(", "));
void *entry = GET_ENTRY(*t, i);
- c = CORD_cat(c, generic_as_text(entry, colorize, table.key));
+ text = Text$concat(text, generic_as_text(entry, colorize, table.key));
if (table.value != &$Void)
- c = CORD_all(c, ":", generic_as_text(entry + val_off, colorize, table.value));
+ text = Text$concat(text, Text$from_str(":"), generic_as_text(entry + val_off, colorize, table.value));
}
if (t->fallback) {
- c = CORD_cat(c, "; fallback=");
- c = CORD_cat(c, Table$as_text(t->fallback, colorize, type));
+ text = Text$concat(text, Text$from_str("; fallback="), Table$as_text(t->fallback, colorize, type));
}
- c = CORD_cat(c, "}");
- return c;
+ text = Text$concat(text, Text$from_str("}"));
+ return text;
}
public table_t Table$from_entries(array_t entries, const TypeInfo *type)
@@ -592,29 +600,29 @@ public bool Table$is_superset_of(table_t a, table_t b, bool strict, const TypeIn
public void *Table$str_get(table_t t, const char *key)
{
- void **ret = Table$get(t, &key, &StrToVoidStarTable);
+ void **ret = Table$get(t, &key, &CStrToVoidStarTable);
return ret ? *ret : NULL;
}
public void *Table$str_get_raw(table_t t, const char *key)
{
- void **ret = Table$get_raw(t, &key, &StrToVoidStarTable);
+ void **ret = Table$get_raw(t, &key, &CStrToVoidStarTable);
return ret ? *ret : NULL;
}
public void *Table$str_reserve(table_t *t, const char *key, const void *value)
{
- return Table$reserve(t, &key, &value, &StrToVoidStarTable);
+ return Table$reserve(t, &key, &value, &CStrToVoidStarTable);
}
public void Table$str_set(table_t *t, const char *key, const void *value)
{
- Table$set(t, &key, &value, &StrToVoidStarTable);
+ Table$set(t, &key, &value, &CStrToVoidStarTable);
}
public void Table$str_remove(table_t *t, const char *key)
{
- return Table$remove(t, &key, &StrToVoidStarTable);
+ return Table$remove(t, &key, &CStrToVoidStarTable);
}
public void *Table$str_entry(table_t t, int64_t n)
diff --git a/builtins/table.h b/builtins/table.h
index 0ff4cb91..da60b3be 100644
--- a/builtins/table.h
+++ b/builtins/table.h
@@ -74,7 +74,7 @@ void Table$mark_copy_on_write(table_t *t);
int32_t Table$compare(const table_t *x, const table_t *y, const TypeInfo *type);
bool Table$equal(const table_t *x, const table_t *y, const TypeInfo *type);
uint32_t Table$hash(const table_t *t, const TypeInfo *type);
-CORD Table$as_text(const table_t *t, bool colorize, const TypeInfo *type);
+Text_t Table$as_text(const table_t *t, bool colorize, const TypeInfo *type);
void *Table$str_entry(table_t t, int64_t n);
void *Table$str_get(table_t t, const char *key);
@@ -85,6 +85,6 @@ void Table$str_remove(table_t *t, const char *key);
#define Table$length(t) ((t).entries.length)
-extern const TypeInfo StrToVoidStarTable;
+extern const TypeInfo CStrToVoidStarTable;
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/builtins/text.c b/builtins/text.c
index 966018f1..ff709e02 100644
--- a/builtins/text.c
+++ b/builtins/text.c
@@ -1,417 +1,1454 @@
// Type info and methods for Text datatype, which uses the Boehm "cord" library
// and libunistr
+
#include <assert.h>
#include <ctype.h>
#include <err.h>
#include <gc.h>
-#include <gc/cord.h>
#include <gmp.h>
#include <limits.h>
+#include <printf.h>
#include <readline/history.h>
#include <readline/readline.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/param.h>
+
#include <unicase.h>
+#include <unictype.h>
#include <unigbrk.h>
#include <uniname.h>
#include <uninorm.h>
+#include <unistd.h>
+#include <unistdio.h>
#include <unistr.h>
#include "array.h"
#include "functions.h"
-#include "halfsiphash.h"
#include "integers.h"
#include "text.h"
#include "types.h"
-#define CLAMP(x, lo, hi) MIN(hi, MAX(x,lo))
+static struct {
+ size_t num_codepoints;
+ const uint32_t *codepoints;
+} synthetic_graphemes[1024] = {};
+
+static int32_t num_synthetic_graphemes = 0;
+
+static int32_t get_grapheme(Text_t text, int64_t index);
+
+typedef struct {
+ int64_t subtext, sum_of_previous_subtexts;
+} iteration_state_t;
-static inline uint8_t *_normalize(CORD str, uint8_t *buf, size_t *len)
+static int32_t _next_grapheme(Text_t text, iteration_state_t *state, int64_t index);
+
+int32_t find_synthetic_grapheme(const uint32_t *codepoints, size_t len)
{
- const uint8_t *str_u8 = (const uint8_t*)CORD_to_const_char_star(str);
- uint8_t *normalized = u8_normalize(UNINORM_NFD, str_u8, strlen((char*)str_u8)+1, buf, len);
- if (!normalized) errx(1, "Unicode normalization error!");
- return normalized;
+ int32_t lo = 0, hi = num_synthetic_graphemes;
+ while (lo <= hi) {
+ int32_t mid = (lo + hi) / 2;
+ int32_t cmp = (synthetic_graphemes[mid].num_codepoints > len) - (synthetic_graphemes[mid].num_codepoints < len);
+ if (cmp == 0)
+ cmp = memcmp(synthetic_graphemes[mid].codepoints, codepoints, sizeof(uint32_t[len]));
+
+ if (cmp == 0)
+ return mid;
+ else if (cmp < 0)
+ lo = mid + 1;
+ else if (cmp > 0)
+ hi = mid - 1;
+ }
+ return hi;
}
-public CORD Text$as_text(const void *text, bool colorize, const TypeInfo *info)
+int32_t get_synthetic_grapheme(const uint32_t *codepoints, size_t len)
{
- if (!text) return info->TextInfo.lang;
- CORD ret = Text$quoted(*(CORD*)text, colorize);
- if (!streq(info->TextInfo.lang, "Text"))
- ret = colorize ? CORD_all("\x1b[1m$", info->TextInfo.lang, "\x1b[m", ret) : CORD_all("$", info->TextInfo.lang, ret);
- return ret;
+ int32_t index = find_synthetic_grapheme(codepoints, len);
+ if (index < num_synthetic_graphemes
+ && synthetic_graphemes[index].num_codepoints == len
+ && memcmp(synthetic_graphemes[index].codepoints, codepoints, len) == 0) {
+ return -(index+1);
+ } else {
+ if (num_synthetic_graphemes > 0)
+ memmove(&synthetic_graphemes[index], &synthetic_graphemes[index + 1], num_synthetic_graphemes - index);
+
+ uint32_t *buf = GC_MALLOC_ATOMIC(sizeof(uint32_t[len]));
+ memcpy(buf, codepoints, sizeof(uint32_t[len]));
+ synthetic_graphemes[index].codepoints = buf;
+ synthetic_graphemes[index].num_codepoints = len;
+
+ ++num_synthetic_graphemes;
+ return -(index+1);
+ }
}
-public CORD Text$quoted(CORD str, bool colorize)
-{
- // Note: it's important to have unicode strings not get broken up with
- // escapes, otherwise they won't print right.
- if (colorize) {
- CORD quoted = "\x1b[35m\"";
- CORD_pos i;
- CORD_FOR(i, str) {
- char c = CORD_pos_fetch(i);
- switch (c) {
-#define BACKSLASHED(esc) "\x1b[34m\\\x1b[1m" esc "\x1b[0;35m"
- case '\a': quoted = CORD_cat(quoted, BACKSLASHED("a")); break;
- case '\b': quoted = CORD_cat(quoted, BACKSLASHED("b")); break;
- case '\x1b': quoted = CORD_cat(quoted, BACKSLASHED("e")); break;
- case '\f': quoted = CORD_cat(quoted, BACKSLASHED("f")); break;
- case '\n': quoted = CORD_cat(quoted, BACKSLASHED("n")); break;
- case '\r': quoted = CORD_cat(quoted, BACKSLASHED("r")); break;
- case '\t': quoted = CORD_cat(quoted, BACKSLASHED("t")); break;
- case '\v': quoted = CORD_cat(quoted, BACKSLASHED("v")); break;
- case '"': quoted = CORD_cat(quoted, BACKSLASHED("\"")); break;
- case '\\': quoted = CORD_cat(quoted, BACKSLASHED("\\")); break;
- case '\x00' ... '\x06': case '\x0E' ... '\x1A':
- case '\x1C' ... '\x1F': case '\x7F' ... '\x7F':
- CORD_sprintf(&quoted, "%r" BACKSLASHED("x%02X"), quoted, c);
- break;
- default: quoted = CORD_cat_char(quoted, c); break;
-#undef BACKSLASHED
+static inline size_t num_subtexts(Text_t t)
+{
+ if (t.tag != TEXT_SUBTEXT) return 1;
+ size_t len = t.length;
+ size_t n = 0;
+ while (len > 0) {
+ len -= t.subtexts[n].length;
+ ++n;
+ }
+ return n;
+}
+
+int text_visualize(FILE *stream, Text_t t)
+{
+ switch (t.tag) {
+ case TEXT_SHORT_ASCII: return fprintf(stream, "<ascii length=%ld>%.*s</ascii>", t.length, t.length, t.short_ascii);
+ case TEXT_ASCII: return fprintf(stream, "<ascii length=%ld>%.*s</ascii>", t.length, t.length, t.ascii);
+ case TEXT_GRAPHEMES: case TEXT_SHORT_GRAPHEMES: {
+ int printed = fprintf(stream, "<graphemes length=%ld>", t.length);
+ printed += Text$print(stream, t);
+ printed += fprintf(stream, "</graphemes>");
+ return printed;
+ }
+ case TEXT_SUBTEXT: {
+ int printed = fprintf(stream, "<text length=%ld>", t.length);
+ size_t to_print = t.length;
+ for (int i = 0; to_print > 0; ++i) {
+ printed += fprintf(stream, "\n ");
+ printed += text_visualize(stream, t.subtexts[i]);
+ to_print -= t.subtexts[i].length;
+ if (t.subtexts[i].length == 0) break;
+ }
+ printed += fprintf(stream, "\n</text>");
+ return printed;
+ }
+ default: return 0;
+ }
+}
+
+public int Text$print(FILE *stream, Text_t t)
+{
+ switch (t.tag) {
+ case TEXT_SHORT_ASCII: return fwrite(t.short_ascii, sizeof(char), t.length, stream);
+ case TEXT_ASCII: return fwrite(t.ascii, sizeof(char), t.length, stream);
+ case TEXT_GRAPHEMES: case TEXT_SHORT_GRAPHEMES: {
+ int32_t *graphemes = t.tag == TEXT_SHORT_GRAPHEMES ? t.short_graphemes : t.graphemes;
+ int written = 0;
+ for (int64_t i = 0; i < t.length; i++) {
+ int32_t grapheme = graphemes[i];
+ if (grapheme >= 0) {
+ written += ulc_fprintf(stream, "%.*llU", 1, &grapheme);
+ } else {
+ written += ulc_fprintf(
+ stream, "%.*llU",
+ synthetic_graphemes[-grapheme-1].num_codepoints,
+ synthetic_graphemes[-grapheme-1].codepoints);
}
}
- quoted = CORD_cat(quoted, "\"\x1b[m");
- return quoted;
+ return written;
+ }
+ case TEXT_SUBTEXT: {
+ int written = 0;
+ int i = 0;
+ for (size_t to_print = t.length; to_print > 0; to_print -= t.subtexts[i].length, ++i)
+ written += Text$print(stream, t.subtexts[i]);
+ return written;
+ }
+ default: return 0;
+ }
+}
+
+static Text_t concat2(Text_t a, Text_t b)
+{
+ if (a.length == 0) return b;
+ if (b.length == 0) return a;
+
+ if (a.tag == TEXT_SUBTEXT && b.tag == TEXT_SUBTEXT) {
+ size_t na = num_subtexts(a);
+ size_t nb = num_subtexts(b);
+ Text_t ret = {
+ .length=a.length + b.length,
+ .tag=TEXT_SUBTEXT,
+ .subtexts=GC_MALLOC(sizeof(Text_t[na + nb])),
+ };
+ memcpy(&ret.subtexts[0], a.subtexts, sizeof(Text_t[na]));
+ memcpy(&ret.subtexts[na], b.subtexts, sizeof(Text_t[nb]));
+ return ret;
+ } else if (a.tag == TEXT_SUBTEXT) {
+ size_t n = num_subtexts(a);
+ Text_t ret = {
+ .length=a.length + b.length,
+ .tag=TEXT_SUBTEXT,
+ .subtexts=GC_MALLOC(sizeof(Text_t[n + 1])),
+ };
+ memcpy(ret.subtexts, a.subtexts, sizeof(Text_t[n]));
+ ret.subtexts[n] = b;
+ return ret;
+ } else if (b.tag == TEXT_SUBTEXT) {
+ size_t n = num_subtexts(b);
+ Text_t ret = {
+ .length=a.length + b.length,
+ .tag=TEXT_SUBTEXT,
+ .subtexts=GC_MALLOC(sizeof(Text_t[n + 1])),
+ };
+ ret.subtexts[0] = a;
+ memcpy(&ret.subtexts[1], b.subtexts, sizeof(Text_t[n]));
+ return ret;
} else {
- CORD quoted = "\"";
- CORD_pos i;
- CORD_FOR(i, str) {
- char c = CORD_pos_fetch(i);
- switch (c) {
- case '\a': quoted = CORD_cat(quoted, "\\a"); break;
- case '\b': quoted = CORD_cat(quoted, "\\b"); break;
- case '\x1b': quoted = CORD_cat(quoted, "\\e"); break;
- case '\f': quoted = CORD_cat(quoted, "\\f"); break;
- case '\n': quoted = CORD_cat(quoted, "\\n"); break;
- case '\r': quoted = CORD_cat(quoted, "\\r"); break;
- case '\t': quoted = CORD_cat(quoted, "\\t"); break;
- case '\v': quoted = CORD_cat(quoted, "\\v"); break;
- case '"': quoted = CORD_cat(quoted, "\\\""); break;
- case '\\': quoted = CORD_cat(quoted, "\\\\"); break;
- case '\x00' ... '\x06': case '\x0E' ... '\x1A':
- case '\x1C' ... '\x1F': case '\x7F' ... '\x7F':
- CORD_sprintf(&quoted, "%r\\x%02X", quoted, c);
- break;
- default: quoted = CORD_cat_char(quoted, c); break;
+ Text_t ret = {
+ .length=a.length + b.length,
+ .tag=TEXT_SUBTEXT,
+ .subtexts=GC_MALLOC(sizeof(Text_t[2])),
+ };
+ ret.subtexts[0] = a;
+ ret.subtexts[1] = b;
+ return ret;
+ }
+}
+
+public Text_t Text$_concat(int n, Text_t items[n])
+{
+ if (n == 0) return (Text_t){.length=0};
+ if (n == 1) return items[0];
+ if (n == 2) return concat2(items[0], items[1]);
+
+ int64_t len = 0, subtexts = 0;
+ for (int i = 0; i < n; i++) {
+ len += items[i].length;
+ subtexts += num_subtexts(items[i]);
+ }
+
+ Text_t ret = {
+ .length=len,
+ .tag=TEXT_SUBTEXT,
+ .subtexts=GC_MALLOC(sizeof(Text_t[len])),
+ };
+ int64_t sub_i = 0;
+ for (int i = 0; i < n; i++) {
+ if (items[i].tag == TEXT_SUBTEXT) {
+ for (int64_t j = 0, remainder = items[i].length; remainder > 0; j++) {
+ ret.subtexts[sub_i++] = items[i].subtexts[j];
+ remainder -= items[i].subtexts[j].length;
}
+ } else {
+ ret.subtexts[sub_i++] = items[i];
}
- quoted = CORD_cat_char(quoted, '"');
- return quoted;
}
+ return ret;
}
-public int Text$compare(const CORD *x, const CORD *y)
+public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int)
{
- uint8_t *xx = (uint8_t*)CORD_to_const_char_star(*x);
- uint8_t *yy = (uint8_t*)CORD_to_const_char_star(*y);
- int result = 0;
- if (u8_normcmp(xx, strlen((char*)xx), yy, strlen((char*)yy), UNINORM_NFD, &result))
- fail("Something went wrong while comparing text");
- return result;
+ int64_t first = Int_to_Int64(first_int, false)-1;
+ int64_t last = Int_to_Int64(last_int, false)-1;
+ if (first == 0) errx(1, "Invalid index: 0");
+ if (last == 0) return (Text_t){.length=0};
+
+ if (first < 0) first = text.length + first + 1;
+ if (last < 0) last = text.length + last + 1;
+
+ if (last > text.length) last = text.length;
+
+ if (first > text.length || last < first)
+ return (Text_t){.length=0};
+
+ if (first == 1 && last == text.length)
+ return text;
+
+ switch (text.tag) {
+ case TEXT_SHORT_ASCII: {
+ Text_t ret = text;
+ ret.length = last - first + 1;
+ if (first > 1)
+ memcpy(ret.short_ascii, text.short_ascii + (first-1), ret.length);
+ return ret;
+ }
+ case TEXT_ASCII: {
+ Text_t ret = {
+ .tag=TEXT_ASCII,
+ .length=last - first + 1,
+ .ascii=text.ascii + (first-1),
+ };
+ return ret;
+ }
+ case TEXT_SHORT_GRAPHEMES: {
+ assert((first == 1 && last == 1) || (first == 2 && last == 2));
+ Text_t ret = {
+ .tag=TEXT_SHORT_GRAPHEMES,
+ .length=1,
+ .short_graphemes={text.short_graphemes[first-1]},
+ };
+ return ret;
+ }
+ case TEXT_GRAPHEMES: {
+ Text_t ret = {
+ .tag=TEXT_GRAPHEMES,
+ .length=last - first + 1,
+ .graphemes=text.graphemes + (first-1),
+ };
+ return ret;
+ }
+ case TEXT_SUBTEXT: {
+ Text_t *subtexts = text.subtexts;
+ while (first > subtexts[0].length) {
+ first -= subtexts[0].length;
+ last -= subtexts[0].length;
+ ++subtexts;
+ }
+
+ int64_t needed_len = (last - first) + 1;
+ int64_t num_subtexts = 0;
+ for (int64_t included = 0; included < needed_len; ) {
+ if (included == 0)
+ included += subtexts[num_subtexts].length - first + 1;
+ else
+ included += subtexts[num_subtexts].length;
+ num_subtexts += 1;
+ }
+ if (num_subtexts == 1)
+ return Text$slice(subtexts[0], Int64_to_Int(first+1), Int64_to_Int(last+1));
+
+ Text_t ret = {
+ .length=needed_len,
+ .tag=TEXT_SUBTEXT,
+ .subtexts=GC_MALLOC(sizeof(Text_t[num_subtexts])),
+ };
+ for (int64_t i = 0; i < num_subtexts; i++) {
+ ret.subtexts[i] = Text$slice(subtexts[i], Int64_to_Int(first+1), Int64_to_Int(last+1));
+ first = 1;
+ needed_len -= ret.subtexts[i].length;
+ last = first + needed_len - 1;
+ }
+ return ret;
+ }
+ default: errx(1, "Invalid tag");
+ }
}
-public bool Text$equal(const CORD *x, const CORD *y)
+Text_t text_from_u32(uint32_t *codepoints, size_t num_codepoints, bool normalize)
{
- return Text$compare(x, y) == 0;
+ uint32_t norm_buf[128];
+ if (normalize) {
+ size_t norm_length = sizeof(norm_buf)/sizeof(norm_buf[0]);
+ uint32_t *normalized = u32_normalize(UNINORM_NFC, codepoints, num_codepoints, norm_buf, &norm_length);
+ codepoints = normalized;
+ num_codepoints = norm_length;
+ }
+
+ char breaks[num_codepoints];
+ u32_grapheme_breaks(codepoints, num_codepoints, breaks);
+
+ Text_t ret = {
+ .length=0,
+ .tag=TEXT_SHORT_GRAPHEMES,
+ };
+ const uint32_t *src = codepoints;
+ int32_t *dest = &ret.short_graphemes[0];
+ while (src != &codepoints[num_codepoints]) {
+ ++ret.length;
+
+ if (ret.tag == TEXT_SHORT_GRAPHEMES && ret.length > 2) {
+ int32_t *graphemes = GC_MALLOC_ATOMIC(sizeof(int32_t[num_codepoints])); // May be a slight overallocation
+ graphemes[0] = ret.short_graphemes[0];
+ graphemes[1] = ret.short_graphemes[1];
+ ret.tag = TEXT_GRAPHEMES;
+ ret.graphemes = graphemes;
+ dest = &graphemes[2];
+ }
+
+ const uint32_t *next = u32_grapheme_next(src, &codepoints[num_codepoints]);
+ if (next == &src[1]) {
+ *dest = (int32_t)*src;
+ } else {
+ // Synthetic grapheme
+ *dest = get_synthetic_grapheme(src, next-src);
+ }
+ ++dest;
+ src = next;
+ }
+ if (normalize && codepoints != norm_buf) free(codepoints);
+ return ret;
}
-public uint32_t Text$hash(const CORD *cord)
+public Text_t Text$from_str(const char *str)
{
- if (!*cord) return 0;
+ size_t ascii_span = 0;
+ while (str[ascii_span] && isascii(str[ascii_span]))
+ ascii_span++;
- uint8_t buf[128] = {0}; size_t norm_len = sizeof(buf);
- uint8_t *normalized = _normalize(*cord, buf, &norm_len);
+ if (str[ascii_span] == '\0') { // All ASCII
+ Text_t ret = {.length=ascii_span};
+ if (ascii_span <= 8) {
+ ret.tag = TEXT_SHORT_ASCII;
+ for (size_t i = 0; i < ascii_span; i++)
+ ret.short_ascii[i] = str[i];
+ } else {
+ ret.tag = TEXT_ASCII;
+ ret.ascii = str;
+ }
+ return ret;
+ } else {
+ uint32_t buf[128];
+ size_t length = sizeof(buf)/sizeof(buf[0]);
+ uint32_t *codepoints = u8_to_u32((uint8_t*)str, ascii_span + strlen(str + ascii_span), buf, &length);
+ Text_t ret = text_from_u32(codepoints, length, true);
+ if (codepoints != buf) free(codepoints);
+ return ret;
+ }
+}
+
+static void u8_buf_append(Text_t text, char **buf, size_t *capacity, int64_t *i)
+{
+ switch (text.tag) {
+ case TEXT_ASCII: case TEXT_SHORT_ASCII: {
+ if (*i + text.length > (int64_t)*capacity) {
+ *capacity = *i + text.length;
+ *buf = GC_REALLOC(*buf, *capacity);
+ }
+
+ const char *bytes = text.tag == TEXT_ASCII ? text.ascii : text.short_ascii;
+ memcpy(*buf + *i, bytes, text.length);
+ *i += text.length;
+ break;
+ }
+ case TEXT_GRAPHEMES: case TEXT_SHORT_GRAPHEMES: {
+ const int32_t *graphemes = text.tag == TEXT_GRAPHEMES ? text.graphemes : text.short_graphemes;
+ for (int64_t g = 0; g + 1 < text.length; g++) {
+ const uint32_t *codepoints = graphemes[g] < 0 ? synthetic_graphemes[-graphemes[g]-1].codepoints : (uint32_t*)&graphemes[g];
+ size_t num_codepoints = graphemes[g] < 0 ? synthetic_graphemes[-graphemes[g]-1].num_codepoints : 1;
+ uint8_t u8_buf[64];
+ size_t u8_len = sizeof(u8_buf);
+ uint8_t *u8 = u32_to_u8(codepoints, num_codepoints, u8_buf, &u8_len);
+
+ if (*i + (int64_t)u8_len > (int64_t)*capacity) {
+ *capacity = *i + u8_len;
+ *buf = GC_REALLOC(*buf, *capacity);
+ }
- uint32_t hash;
- halfsiphash(normalized, norm_len, TOMO_HASH_KEY, (uint8_t*)&hash, sizeof(hash));
- if (normalized != buf) free(normalized);
- return hash;
+ memcpy(*buf + *i, u8, u8_len);
+ *i += u8_len;
+ if (u8 != u8_buf) free(u8);
+ }
+ break;
+ }
+ case TEXT_SUBTEXT: {
+ for (int64_t s = 0, remaining = text.length; remaining > 0; s++) {
+ u8_buf_append(text.subtexts[s], buf, capacity, i);
+ remaining -= text.subtexts[s].length;
+ }
+ break;
+ }
+ default: break;
+ }
}
-public CORD Text$upper(CORD str)
+public const char *Text$as_c_string(Text_t text)
{
- if (!str) return str;
- size_t len = strlen(str) + 1;
- uint8_t *dest = GC_MALLOC_ATOMIC(len);
- dest[len-1] = 0;
- return (CORD)u8_toupper((const uint8_t*)str, len-1, uc_locale_language(), NULL, dest, &len);
+ size_t capacity = text.length;
+ char *buf = GC_MALLOC_ATOMIC(capacity);
+ int64_t i = 0;
+ u8_buf_append(text, &buf, &capacity, &i);
+ return buf;
}
-public CORD Text$lower(CORD str)
+uint32_t *text_to_u32(Text_t text, size_t *length)
{
- if (!str) return str;
- size_t len = strlen(str) + 1;
- uint8_t *dest = GC_MALLOC_ATOMIC(len);
- dest[len-1] = 0;
- return (CORD)u8_tolower((const uint8_t*)str, len-1, uc_locale_language(), NULL, dest, &len);
+ // Precalculate size:
+ size_t len = 0;
+ if (text.tag == TEXT_ASCII) {
+ len = text.length;
+ } else {
+ iteration_state_t state = {0, 0};
+ for (int64_t i = 0; i < text.length; i++) {
+ int32_t grapheme = _next_grapheme(text, &state, i);
+ if (grapheme < 0)
+ len += synthetic_graphemes[-grapheme-1].num_codepoints;
+ else
+ len += 1;
+ }
+ }
+ assert(length);
+ *length = len;
+
+ // Copy over codepoints one grapheme cluster at a time:
+ uint32_t *ret = GC_MALLOC_ATOMIC(sizeof(uint32_t[len]));
+ uint32_t *dest = ret;
+ iteration_state_t state = {0, 0};
+ for (int64_t i = 0; i < text.length; i++) {
+ int32_t grapheme = _next_grapheme(text, &state, i);
+ if (grapheme < 0) {
+ const uint32_t *codepoints = synthetic_graphemes[-grapheme-1].codepoints;
+ size_t num_codepoints = synthetic_graphemes[-grapheme-1].num_codepoints;
+ for (size_t j = 0; j < num_codepoints; j++)
+ *(dest++) = codepoints[j];
+ } else {
+ *(dest++) = (uint32_t)grapheme;
+ }
+ }
+ return ret;
}
-public CORD Text$title(CORD str)
+#include "siphash.c"
+
+public uint64_t Text$hash(Text_t *text)
{
- if (!str) return str;
- size_t len = strlen(str) + 1;
- uint8_t *dest = GC_MALLOC_ATOMIC(len);
- dest[len-1] = 0;
- return (CORD)u8_totitle((const uint8_t*)str, len-1, uc_locale_language(), NULL, dest, &len);
+ if (text->hash != 0) return text->hash;
+ siphash sh;
+ siphashinit(&sh, sizeof(int32_t[text->length]), (uint64_t*)TOMO_HASH_KEY);
+
+ union {
+ int32_t chunks[2];
+ uint64_t whole;
+ } tmp;
+ switch (text->tag) {
+ case TEXT_ASCII: case TEXT_SHORT_ASCII: {
+ const char *bytes = text->tag == TEXT_ASCII ? text->ascii : text->short_ascii;
+ for (int64_t i = 0; i + 1 < text->length; i++) {
+ tmp.chunks[0] = (int32_t)bytes[i];
+ tmp.chunks[1] = (int32_t)bytes[i+1];
+ siphashadd64bits(&sh, tmp.whole);
+ }
+ int32_t last = text->length & 0x1 ? (int32_t)bytes[text->length-1] : 0; // Odd number of graphemes
+ text->hash = siphashfinish_last_part(&sh, (uint64_t)last);
+ break;
+ }
+ case TEXT_GRAPHEMES: {
+ const int32_t *graphemes = text->graphemes;
+ for (int64_t i = 0; i + 1 < text->length; i++) {
+ tmp.chunks[0] = graphemes[i];
+ tmp.chunks[1] = graphemes[i];
+ siphashadd64bits(&sh, tmp.whole);
+ }
+ int32_t last = text->length & 0x1 ? graphemes[text->length-1] : 0; // Odd number of graphemes
+ text->hash = siphashfinish_last_part(&sh, (uint64_t)last);
+ break;
+ }
+ case TEXT_SHORT_GRAPHEMES: {
+ tmp.chunks[0] = text->short_graphemes[0];
+ if (text->length > 1)
+ tmp.chunks[1] = text->short_graphemes[1];
+ text->hash = siphashfinish_last_part(&sh, (uint64_t)tmp.whole);
+ break;
+ }
+ case TEXT_SUBTEXT: {
+ int32_t leftover = 0;
+ for (int64_t sub_i = 0, to_hash = text->length; to_hash > 0; ) {
+ Text_t subtext = text->subtexts[sub_i];
+ if (subtext.tag == TEXT_ASCII || subtext.tag == TEXT_SHORT_ASCII) {
+ const char *bytes = subtext.tag == TEXT_ASCII ? subtext.ascii : subtext.short_ascii;
+ int64_t grapheme = 0;
+ if (leftover) {
+ tmp.chunks[0] = leftover;
+ tmp.chunks[1] = (int32_t)bytes[0];
+ siphashadd64bits(&sh, tmp.whole);
+ grapheme += 1;
+ }
+ for (; grapheme + 1 < subtext.length; grapheme += 2) {
+ tmp.chunks[0] = (int32_t)bytes[grapheme];
+ tmp.chunks[1] = (int32_t)bytes[grapheme+1];
+ siphashadd64bits(&sh, tmp.whole);
+ }
+ leftover = grapheme < subtext.length ? (int32_t)bytes[grapheme] : 0;
+ } else if (subtext.tag == TEXT_SHORT_GRAPHEMES) {
+ if (leftover) {
+ tmp.chunks[0] = leftover;
+ tmp.chunks[1] = subtext.short_graphemes[0];
+ siphashadd64bits(&sh, tmp.whole);
+ leftover = subtext.length > 1 ? subtext.short_graphemes[1] : 0;
+ } else if (subtext.length == 1) {
+ leftover = subtext.short_graphemes[0];
+ } else {
+ tmp.chunks[0] = subtext.short_graphemes[0];
+ tmp.chunks[1] = subtext.short_graphemes[1];
+ siphashadd64bits(&sh, tmp.whole);
+ }
+ } else if (subtext.tag == TEXT_GRAPHEMES) {
+ int32_t *graphemes = subtext.graphemes;
+ int64_t grapheme = 0;
+ if (leftover) {
+ tmp.chunks[0] = leftover;
+ tmp.chunks[1] = graphemes[0];
+ siphashadd64bits(&sh, tmp.whole);
+ grapheme += 1;
+ }
+ for (; grapheme + 1 < subtext.length; grapheme += 2) {
+ tmp.chunks[0] = graphemes[grapheme];
+ tmp.chunks[1] = graphemes[grapheme+1];
+ siphashadd64bits(&sh, tmp.whole);
+ }
+ leftover = grapheme < subtext.length ? graphemes[grapheme] : 0;
+ }
+
+ to_hash -= text->subtexts[sub_i].length;
+
+ ++sub_i;
+ }
+
+ text->hash = siphashfinish_last_part(&sh, leftover);
+ break;
+ }
+ default: errx(1, "Invalid text");
+ }
+
+ if (text->hash == 0)
+ text->hash = 1;
+
+ return text->hash;
}
-public bool Text$has(CORD str, CORD target, Where_t where)
+int32_t _next_grapheme(Text_t text, iteration_state_t *state, int64_t index)
{
- if (!target) return true;
- if (!str) return false;
+ switch (text.tag) {
+ case TEXT_ASCII: return index < text.length ? (int32_t)text.ascii[index] : 0;
+ case TEXT_SHORT_ASCII: return index < text.length ? (int32_t)text.short_ascii[index] : 0;
+ case TEXT_GRAPHEMES: return index < text.length ? text.graphemes[index] : 0;
+ case TEXT_SHORT_GRAPHEMES: return index < text.length ? text.short_graphemes[index] : 0;
+ case TEXT_SUBTEXT: {
+ iteration_state_t backup_state = {0, 0};
+ if (!state) state = &backup_state;
- uint8_t str_buf[128] = {0}; size_t str_norm_len = sizeof(str_buf);
- uint8_t *str_normalized = _normalize(str, str_buf, &str_norm_len);
+ if (index < 0 || index >= text.length)
+ return 0;
- uint8_t target_buf[128] = {0}; size_t target_norm_len = sizeof(target_buf);
- uint8_t *target_normalized = _normalize(target, target_buf, &target_norm_len);
+ while (index < state->sum_of_previous_subtexts && state->subtext > 0) {
+ state->sum_of_previous_subtexts -= text.subtexts[state->subtext].length;
+ state->subtext -= 1;
+ }
+ for (;;) {
+ if (index < state->sum_of_previous_subtexts + text.subtexts[state->subtext].length)
+ return _next_grapheme(text.subtexts[state->subtext], NULL, index);
+ state->sum_of_previous_subtexts += text.subtexts[state->subtext].length;
+ state->subtext += 1;
+ }
+ return 0;
+ }
+ default: errx(1, "Invalid text");
+ }
+ return 0;
+}
- if (target_norm_len > str_norm_len) return false;
+int32_t get_grapheme(Text_t text, int64_t index)
+{
+ iteration_state_t state = {0, 0};
+ return _next_grapheme(text, &state, index);
+}
- bool ret;
- if (where.tag == $tag$Where$Start) {
- ret = (u8_strncmp(str_normalized, target_normalized, target_norm_len-1) == 0);
- } else if (where.tag == $tag$Where$End) {
- ret = (u8_strcmp(str_normalized + str_norm_len - target_norm_len, target_normalized) == 0);
- } else {
- assert(where.tag == $tag$Where$Anywhere);
- ret = (u8_strstr(str_normalized, target_normalized) != NULL);
+int32_t Text$compare(const Text_t *a, const Text_t *b)
+{
+ int64_t len = MAX(a->length, b->length);
+ iteration_state_t a_state = {0, 0}, b_state = {0, 0};
+ for (int64_t i = 0; i < len; i++) {
+ int32_t ai = _next_grapheme(*a, &a_state, i);
+ int32_t bi = _next_grapheme(*b, &b_state, i);
+ if (ai == bi) continue;
+ int32_t cmp;
+ if (ai > 0 && bi > 0) {
+ cmp = u32_cmp((uint32_t*)&ai, (uint32_t*)&bi, 1);
+ } else if (ai > 0) {
+ cmp = u32_cmp2(
+ (uint32_t*)&ai, 1,
+ synthetic_graphemes[-bi-1].codepoints,
+ synthetic_graphemes[-bi-1].num_codepoints);
+ } else if (bi > 0) {
+ cmp = u32_cmp2(
+ synthetic_graphemes[-ai-1].codepoints,
+ synthetic_graphemes[-ai-1].num_codepoints,
+ (uint32_t*)&bi, 1);
+ } else {
+ cmp = u32_cmp2(
+ synthetic_graphemes[-ai-1].codepoints,
+ synthetic_graphemes[-ai-1].num_codepoints,
+ synthetic_graphemes[-bi-1].codepoints,
+ synthetic_graphemes[-bi-1].num_codepoints);
+ }
+ if (cmp != 0) return cmp;
}
+ return 0;
+}
- if (str_normalized != str_buf) free(str_normalized);
- if (target_normalized != target_buf) free(target_normalized);
- return ret;
+public bool Text$equal(const Text_t *a, const Text_t *b)
+{
+ if (a->length != b->length || (a->hash != 0 && b->hash != 0 && a->hash != b->hash))
+ return false;
+ int64_t len = a->length;
+ iteration_state_t a_state = {0, 0}, b_state = {0, 0};
+ for (int64_t i = 0; i < len; i++) {
+ int32_t ai = _next_grapheme(*a, &a_state, i);
+ int32_t bi = _next_grapheme(*b, &b_state, i);
+ if (ai != bi) return false;
+ }
+ return true;
}
-public CORD Text$without(CORD str, CORD target, Where_t where)
+public bool Text$equal_ignoring_case(Text_t a, Text_t b)
{
- if (!str || !target) return str;
+ if (a.length != b.length)
+ return false;
+ int64_t len = a.length;
+ iteration_state_t a_state = {0, 0}, b_state = {0, 0};
+ const char *language = uc_locale_language();
+ for (int64_t i = 0; i < len; i++) {
+ int32_t ai = _next_grapheme(a, &a_state, i);
+ int32_t bi = _next_grapheme(b, &b_state, i);
+ if (ai != bi) {
+ const uint32_t *a_codepoints = ai >= 0 ? (uint32_t*)&ai : synthetic_graphemes[-ai-1].codepoints;
+ size_t a_len = ai >= 0 ? 1 : synthetic_graphemes[-ai-1].num_codepoints;
- size_t target_len = CORD_len(target);
- size_t str_len = CORD_len(str);
- if (where.tag == $tag$Where$Start) {
- if (CORD_ncmp(str, 0, target, 0, target_len) == 0)
- return CORD_substr(str, target_len, str_len - target_len);
- return str;
- } else if (where.tag == $tag$Where$End) {
- if (CORD_ncmp(str, str_len-target_len, target, 0, target_len) == 0)
- return CORD_substr(str, 0, str_len - target_len);
- return str;
- } else {
- CORD ret = CORD_EMPTY;
- size_t i = 0;
- for (;;) {
- size_t match = CORD_str(str, i, target);
- if (match == CORD_NOT_FOUND) {
- if (i == 0) return str; // No matches!
- ret = CORD_cat(ret, CORD_substr(str, i, str_len));
- break;
- }
- ret = CORD_cat(ret, CORD_substr(str, i, (match-i)));
- i = match + target_len;
+ const uint32_t *b_codepoints = bi >= 0 ? (uint32_t*)&bi : synthetic_graphemes[-bi-1].codepoints;
+ size_t b_len = bi >= 0 ? 1 : synthetic_graphemes[-bi-1].num_codepoints;
+
+ int cmp;
+ (void)u32_casecmp(a_codepoints, a_len, b_codepoints, b_len, language, UNINORM_NFC, &cmp);
+ if (cmp != 0)
+ return false;
}
- return ret;
}
+ return true;
}
-public CORD Text$trimmed(CORD str, CORD skip, Where_t where)
+public Text_t Text$upper(Text_t text)
{
- if (!str || !skip) return str;
- const uint8_t *ustr = (const uint8_t*)CORD_to_const_char_star(str);
- const uint8_t *uskip = (const uint8_t*)CORD_to_const_char_star(skip);
- // TODO: implement proper reverse iteration with u8_prev()
- if (where.tag == $tag$Where$Start) {
- size_t span = u8_strspn(ustr, uskip);
- return (CORD)ustr + span;
- } else if (where.tag == $tag$Where$End) {
- size_t len = u8_strlen(ustr);
- const uint8_t *back = ustr + len;
- size_t back_span = 0;
- while (back - back_span > ustr && u8_strspn(back-back_span-1, uskip) > back_span)
- ++back_span;
- return CORD_substr((CORD)ustr, 0, len - back_span);
- } else {
- size_t span = u8_strspn(ustr, uskip);
- size_t len = u8_strlen(ustr);
- const uint8_t *back = ustr + len;
- size_t back_span = 0;
- while (back - back_span > ustr + span && u8_strspn(back-back_span-1, uskip) > back_span)
- ++back_span;
- return CORD_substr((CORD)(ustr + span), 0, len - span - back_span);
+ size_t length;
+ uint32_t *codepoints = text_to_u32(text, &length);
+ const char *language = uc_locale_language();
+ uint32_t buf[128];
+ size_t out_len;
+ uint32_t *upper = u32_toupper(codepoints, length, language, UNINORM_NFC, buf, &out_len);
+ Text_t ret = text_from_u32(upper, out_len, false);
+ if (upper != buf) free(upper);
+ return ret;
+}
+
+public Text_t Text$lower(Text_t text)
+{
+ size_t length;
+ uint32_t *codepoints = text_to_u32(text, &length);
+ const char *language = uc_locale_language();
+ uint32_t buf[128];
+ size_t out_len;
+ uint32_t *lower = u32_tolower(codepoints, length, language, UNINORM_NFC, buf, &out_len);
+ Text_t ret = text_from_u32(lower, out_len, false);
+ if (lower != codepoints) free(lower);
+ return ret;
+}
+
+public Text_t Text$title(Text_t text)
+{
+ size_t length;
+ uint32_t *codepoints = text_to_u32(text, &length);
+ const char *language = uc_locale_language();
+ uint32_t buf[128];
+ size_t out_len;
+ uint32_t *title = u32_totitle(codepoints, length, language, UNINORM_NFC, buf, &out_len);
+ Text_t ret = text_from_u32(title, out_len, false);
+ if (title != codepoints) free(title);
+ return ret;
+}
+
+static inline void skip_whitespace(Text_t text, int64_t *i)
+{
+ iteration_state_t state = {0, 0};
+ while (*i < text.length) {
+ int32_t grapheme = _next_grapheme(text, &state, *i);
+ if (grapheme > 0 && !uc_is_property_white_space(grapheme))
+ return;
+ *i += 1;
}
}
-public find_result_t Text$find(CORD str, CORD pat)
+static inline bool match_grapheme(Text_t text, int64_t *i, int32_t grapheme)
{
- if (!pat) return (find_result_t){.status=FIND_SUCCESS, .index=1};
- size_t pos = CORD_str(str, 0, pat);
- return (pos == CORD_NOT_FOUND) ? (find_result_t){.status=FIND_FAILURE} : (find_result_t){.status=FIND_SUCCESS, .index=(int32_t)pos};
+ if (*i < text.length && get_grapheme(text, *i) == grapheme) {
+ *i += 1;
+ return true;
+ }
+ return false;
}
-public CORD Text$replace(CORD text, CORD pat, CORD replacement, Int_t int_limit)
+static inline bool match_str(Text_t text, int64_t *i, const char *str)
{
- if (!text || !pat) return text;
- CORD ret = CORD_EMPTY;
- size_t pos = 0, pat_len = CORD_len(pat);
- int64_t limit = Int_to_Int64(int_limit, false);
- for (size_t found; limit != 0 && (found=CORD_str(text, pos, pat)) != CORD_NOT_FOUND; --limit) {
- ret = CORD_all(ret, CORD_substr(text, pos, found - pos), replacement);
- pos = found + pat_len;
+ iteration_state_t state = {0, 0};
+ int64_t matched = 0;
+ while (matched[str]) {
+ if (*i + matched >= text.length || _next_grapheme(text, &state, *i + matched) != str[matched])
+ return false;
+ matched += 1;
}
- size_t str_len = CORD_len(text);
- return CORD_cat(ret, CORD_substr(text, pos, str_len - pos));
+ *i += matched;
+ return true;
}
-public array_t Text$split(CORD str, CORD split)
+static inline bool match_property(Text_t text, int64_t *i, uc_property_t prop)
{
- if (!str) return (array_t){.data=GC_MALLOC(sizeof(CORD)), .atomic=1, .length=1, .stride=sizeof(CORD)};
- array_t strings = {.stride=sizeof(CORD), .atomic=1};
+ if (*i >= text.length) return false;
+ int32_t grapheme = get_grapheme(text, *i);
+ if (grapheme < 0) // TODO: check every codepoint in the cluster?
+ grapheme = synthetic_graphemes[-grapheme-1].codepoints[0];
- const uint8_t *ustr = (uint8_t*)CORD_to_const_char_star(str);
- const uint8_t *usplit = (uint8_t*)CORD_to_const_char_star(split);
- for (int64_t i = 0; ; ) {
- size_t non_split = u8_strcspn(ustr + i, usplit);
- CORD chunk = CORD_substr((CORD)ustr, i, non_split);
- Array$insert(&strings, &chunk, I(0), sizeof(CORD));
+ if (uc_is_property(grapheme, prop)) {
+ *i += 1;
+ return true;
+ }
+ return false;
+}
- i += non_split;
+static int64_t parse_int(Text_t text, int64_t *i)
+{
+ iteration_state_t state = {0, 0};
+ int64_t value = 0;
+ for (;; *i += 1) {
+ int32_t grapheme = _next_grapheme(text, &state, *i);
+ if (grapheme < 0)
+ grapheme = synthetic_graphemes[-grapheme-1].codepoints[0];
+ int digit = uc_digit_value(grapheme);
+ if (digit < 0) break;
+ if (value >= INT64_MAX/10) break;
+ value = 10*value + digit;
+ }
+ return value;
+}
- size_t split_span = u8_strspn(ustr + i, usplit);
- if (split_span == 0) break;
- i += split_span;
+const char *get_property_name(Text_t text, int64_t *i)
+{
+ skip_whitespace(text, i);
+ char *name = GC_MALLOC_ATOMIC(UNINAME_MAX);
+ char *dest = name;
+ iteration_state_t state = {0, 0};
+ while (*i < text.length) {
+ int32_t grapheme = _next_grapheme(text, &state, *i);
+ if (!(grapheme & ~0xFF) && (isalnum(grapheme) || grapheme == ' ' || grapheme == '_' || grapheme == '-')) {
+ *dest = (char)grapheme;
+ ++dest;
+ if (dest >= name + UNINAME_MAX - 1)
+ break;
+ } else if (dest == name && grapheme >= 0 && grapheme != ']') {
+ // Literal character escape: [..[] --> "LEFT SQUARE BRACKET"
+ name = unicode_character_name(grapheme, name);
+ *i += 1;
+ return name;
+ } else {
+ break;
+ }
+ *i += 1;
}
- return strings;
+ if (dest == name) return NULL;
+ *dest = '\0';
+ return name;
}
-public CORD Text$join(CORD glue, array_t pieces)
+#define EAT1(state, cond) ({\
+ int32_t grapheme = _next_grapheme(text, state, text_index); \
+ bool success = (cond); \
+ if (success) text_index += 1; \
+ success; })
+
+#define EAT_MANY(state, cond) ({ int64_t n = 0; while (EAT1(state, cond)) { n += 1; } n; })
+
+int64_t match_email(Text_t text, int64_t text_index)
{
- if (pieces.length == 0) return CORD_EMPTY;
+ // email = local "@" domain
+ // local = 1-64 ([a-zA-Z0-9!#$%&‘*+–/=?^_`.{|}~] | non-ascii)
+ // domain = dns-label ("." dns-label)*
+ // dns-label = 1-63 ([a-zA-Z0-9-] | non-ascii)
- CORD ret = CORD_EMPTY;
- for (int64_t i = 0; i < pieces.length; i++) {
- if (i > 0) ret = CORD_cat(ret, glue);
- ret = CORD_cat(ret, *(CORD*)((void*)pieces.data + i*pieces.stride));
+ iteration_state_t state = {0, 0};
+ if (text_index > 0) {
+ int32_t prev_codepoint = _next_grapheme(text, &state, text_index - 1);
+ if (prev_codepoint < 0)
+ prev_codepoint = synthetic_graphemes[-prev_codepoint-1].codepoints[0];
+ if (uc_is_property_alphabetic(prev_codepoint))
+ return -1;
}
- return ret;
+
+ int64_t start_index = text_index;
+
+ // Local part:
+ int64_t local_len = 0;
+ static const char *allowed_local = "!#$%&‘*+–/=?^_`.{|}~";
+ while (EAT1(&state, (grapheme & ~0x7F) || isalnum((char)grapheme) || strchr(allowed_local, (char)grapheme))) {
+ local_len += 1;
+ if (local_len > 64) return -1;
+ }
+
+ if (!EAT1(&state, grapheme == '@'))
+ return -1;
+
+ // Host
+ int64_t host_len = 0;
+ do {
+ int64_t label_len = 0;
+ while (EAT1(&state, (grapheme & ~0x7F) || isalnum((char)grapheme) || grapheme == '-')) {
+ label_len += 1;
+ if (label_len > 63) return -1;
+ }
+
+ if (label_len == 0)
+ return -1;
+
+ host_len += label_len;
+ if (host_len > 255)
+ return -1;
+ host_len += 1;
+ } while (EAT1(&state, grapheme == '.'));
+
+ return text_index - start_index;
}
-public array_t Text$clusters(CORD text)
+int64_t match_ipv6(Text_t text, int64_t text_index)
{
- array_t clusters = {.atomic=1};
- uint8_t buf[128] = {0}; size_t norm_len = sizeof(buf);
- uint8_t *normalized = _normalize(text, buf, &norm_len);
- const uint8_t *end = normalized + strlen((char*)normalized);
- for (const uint8_t *pos = normalized; pos != end; ) {
- const uint8_t *next = u8_grapheme_next(pos, end);
- size_t len = (size_t)(next - pos);
- char cluster_buf[len+1];
- strlcpy(cluster_buf, (char*)pos, len+1);
- CORD cluster = CORD_from_char_star(cluster_buf);
- Array$insert(&clusters, &cluster, I(0), sizeof(CORD));
- pos = next;
+ iteration_state_t state = {0, 0};
+ if (text_index > 0) {
+ int32_t prev_codepoint = _next_grapheme(text, &state, text_index - 1);
+ if ((prev_codepoint & ~0x7F) && (isxdigit(prev_codepoint) || prev_codepoint == ':'))
+ return -1;
}
+ int64_t start_index = text_index;
+ const int NUM_CLUSTERS = 8;
+ bool double_colon_used = false;
+ for (int cluster = 0; cluster < NUM_CLUSTERS; cluster++) {
+ for (int digits = 0; digits < 4; digits++) {
+ if (!EAT1(&state, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
+ break;
+ }
+ if (EAT1(&state, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
+ return -1; // Too many digits
+
+ if (cluster == NUM_CLUSTERS-1) {
+ break;
+ } else if (!EAT1(&state, grapheme == ':')) {
+ if (double_colon_used)
+ break;
+ return -1;
+ }
- if (normalized != buf) free(normalized);
- return clusters;
+ if (EAT1(&state, grapheme == ':')) {
+ if (double_colon_used)
+ return -1;
+ double_colon_used = true;
+ }
+ }
+ return text_index - start_index;
}
-public array_t Text$codepoints(CORD text)
+static int64_t match_ipv4(Text_t text, int64_t text_index)
{
- uint8_t norm_buf[128] = {0}; size_t norm_len = sizeof(norm_buf);
- uint8_t *normalized = _normalize(text, norm_buf, &norm_len);
+ iteration_state_t state = {0, 0};
+ if (text_index > 0) {
+ int32_t prev_codepoint = _next_grapheme(text, &state, text_index - 1);
+ if ((prev_codepoint & ~0x7F) && (isdigit(prev_codepoint) || prev_codepoint == '.'))
+ return -1;
+ }
+ int64_t start_index = text_index;
- uint32_t codepoint_buf[128] = {0};
- size_t codepoint_len = sizeof(codepoint_buf);
- uint32_t *codepoints = u8_to_u32(normalized, norm_len-1, codepoint_buf, &codepoint_len);
- array_t ret = {
- .length=codepoint_len,
- .data=memcpy(GC_MALLOC_ATOMIC(sizeof(int32_t[codepoint_len])), codepoints, sizeof(int32_t[codepoint_len])),
- .stride=sizeof(int32_t),
- .atomic=1,
- };
+ const int NUM_CLUSTERS = 4;
+ for (int cluster = 0; cluster < NUM_CLUSTERS; cluster++) {
+ for (int digits = 0; digits < 3; digits++) {
+ if (!EAT1(&state, ~(grapheme & ~0x7F) && isdigit((char)grapheme))) {
+ if (digits == 0) return -1;
+ break;
+ }
+ }
- if (normalized != norm_buf) free(normalized);
- if (codepoints != codepoint_buf) free(codepoints);
- return ret;
+ if (EAT1(&state, ~(grapheme & ~0x7F) && isdigit((char)grapheme)))
+ return -1; // Too many digits
+
+ if (cluster == NUM_CLUSTERS-1)
+ break;
+ else if (!EAT1(&state, grapheme == '.'))
+ return -1;
+ }
+ return (text_index - start_index);
}
-public array_t Text$bytes(CORD text)
+int64_t match_uri(Text_t text, int64_t text_index)
{
- uint8_t norm_buf[128] = {0}; size_t norm_len = sizeof(norm_buf);
- uint8_t *normalized = _normalize(text, norm_buf, &norm_len);
+ // URI = scheme ":" ["//" authority] path ["?" query] ["#" fragment]
+ // scheme = [a-zA-Z] [a-zA-Z0-9+.-]
+ // authority = [userinfo "@"] host [":" port]
- --norm_len; // NUL byte
- array_t ret = {
- .length=norm_len,
- .data=memcpy(GC_MALLOC_ATOMIC(sizeof(uint8_t[norm_len])), normalized, sizeof(uint8_t[norm_len])),
- .stride=sizeof(uint8_t),
- .atomic=1,
- };
+ iteration_state_t state = {0, 0};
+ if (text_index > 0) {
+ int32_t prev_codepoint = _next_grapheme(text, &state, text_index - 1);
+ if (prev_codepoint < 0)
+ prev_codepoint = synthetic_graphemes[-prev_codepoint-1].codepoints[0];
+ if (uc_is_property_alphabetic(prev_codepoint))
+ return -1;
+ }
- if (normalized != norm_buf) free(normalized);
- return ret;
+ int64_t start_index = text_index;
+
+ // Scheme:
+ if (!EAT1(&state, isalpha(grapheme)))
+ return -1;
+
+ EAT_MANY(&state, !(grapheme & ~0x7F) && (isalnum(grapheme) || grapheme == '+' || grapheme == '.' || grapheme == '-'));
+
+ if (text_index == start_index)
+ return -1;
+
+ if (!match_grapheme(text, &text_index, ':'))
+ return -1;
+
+ // Authority:
+ if (match_str(text, &text_index, "//")) {
+ int64_t authority_start = text_index;
+ // Username or host:
+ static const char *forbidden = "#?:@ \t\r\n<>[]{}\\^|\"`/";
+ if (EAT_MANY(&state, (grapheme & ~0x7F) || !strchr(forbidden, (char)grapheme)) == 0)
+ return -1;
+
+ if (EAT1(&state, grapheme == '@')) {
+ // Found a username, now get a host:
+ if (EAT_MANY(&state, (grapheme & ~0x7F) || !strchr(forbidden, (char)grapheme)) == 0)
+ return -1;
+ } else {
+ int64_t ip = authority_start;
+ int64_t ipv4_len = match_ipv4(text, ip);
+ if (ipv4_len > 0) {
+ ip += ipv4_len;
+ } else if (match_grapheme(text, &ip, '[')) {
+ ip += match_ipv6(text, ip);
+ if (ip > authority_start + 1 && match_grapheme(text, &ip, ']'))
+ text_index = ip;
+ }
+ }
+
+ // Port:
+ if (EAT1(&state, grapheme == ':')) {
+ if (EAT_MANY(&state, !(grapheme & ~0x7F) && isdigit(grapheme)) == 0)
+ return -1;
+ }
+ if (!EAT1(&state, grapheme == '/'))
+ return (text_index - start_index); // No path
+ } else {
+ // Optional path root:
+ EAT1(&state, grapheme == '/');
+ }
+
+ // Path:
+ static const char *non_path = " \"#?<>[]{}\\^`|";
+ EAT_MANY(&state, (grapheme & ~0x7F) || !strchr(non_path, (char)grapheme));
+
+ if (EAT1(&state, grapheme == '?')) { // Query
+ static const char *non_query = " \"#<>[]{}\\^`|";
+ EAT_MANY(&state, (grapheme & ~0x7F) || !strchr(non_query, (char)grapheme));
+ }
+
+ if (EAT1(&state, grapheme == '#')) { // Fragment
+ static const char *non_fragment = " \"#<>[]{}\\^`|";
+ EAT_MANY(&state, (grapheme & ~0x7F) || !strchr(non_fragment, (char)grapheme));
+ }
+ return text_index - start_index;
}
-public Int_t Text$num_clusters(CORD text)
+int64_t match(Text_t text, Text_t pattern, int64_t text_index, int64_t pattern_index)
{
- const uint8_t *ustr = (const uint8_t*)CORD_to_const_char_star(text);
- int64_t num_clusters = 0;
- const uint8_t *end = ustr + u8_strlen(ustr);
- for (const uint8_t *pos = ustr; pos != end; ) {
- const uint8_t *next = u8_grapheme_next(pos, end);
- ++num_clusters;
- pos = next;
+ if (pattern_index >= pattern.length) return 0;
+ int64_t start_index = text_index;
+ iteration_state_t pattern_state = {0, 0}, text_state = {0, 0};
+ while (pattern_index < pattern.length) {
+ int64_t old_pat_index = pattern_index;
+ if (match_str(pattern, &pattern_index, "[..")) {
+ skip_whitespace(pattern, &pattern_index);
+ int64_t min, max;
+ if (uc_is_digit(_next_grapheme(pattern, &pattern_state, pattern_index))) {
+ min = parse_int(pattern, &pattern_index);
+ skip_whitespace(pattern, &pattern_index);
+ if (match_grapheme(pattern, &pattern_index, '+')) {
+ max = INT64_MAX;
+ } else if (match_grapheme(pattern, &pattern_index, '-')) {
+ max = parse_int(pattern, &pattern_index);
+ } else {
+ max = min;
+ }
+ } else {
+ min = 1, max = INT64_MAX;
+ }
+
+ skip_whitespace(pattern, &pattern_index);
+ bool want_to_match = !match_grapheme(pattern, &pattern_index, '!');
+ const char *prop_name = get_property_name(pattern, &pattern_index);
+
+ skip_whitespace(pattern, &pattern_index);
+ if (!match_grapheme(pattern, &pattern_index, ']'))
+ errx(1, "Missing closing ']' in pattern: \"%T\"", &pattern);
+
+ int64_t before_group = text_index;
+ bool any = false;
+ uc_property_t prop;
+ int32_t specific_codepoint = UNINAME_INVALID;
+
+#define FAIL() ({ if (min < 1) { text_index = before_group; continue; } else { return -1; } })
+ if (prop_name) {
+ switch (tolower(prop_name[0])) {
+ case 'd':
+ if (strcasecmp(prop_name, "digit") == 0) {
+ prop = UC_PROPERTY_DECIMAL_DIGIT;
+ goto got_prop;
+ }
+ break;
+ case 'e':
+ if (strcasecmp(prop_name, "end") == 0) {
+ if (text_index != text.length)
+ FAIL();
+ continue;
+ } else if (prop_name && strcasecmp(prop_name, "email") == 0) {
+ int64_t len = match_email(text, text_index);
+ if (len < 0)
+ FAIL();
+ text_index += len;
+ continue;
+ } else if (prop_name && strcasecmp(prop_name, "emoji") == 0) {
+ prop = UC_PROPERTY_EMOJI;
+ goto got_prop;
+ }
+ break;
+ case 'i':
+ if (prop_name && strcasecmp(prop_name, "id") == 0) {
+ if (!EAT1(&text_state, uc_is_property(grapheme, UC_PROPERTY_XID_START)))
+ FAIL();
+ EAT_MANY(&text_state, uc_is_property(grapheme, UC_PROPERTY_XID_CONTINUE));
+ continue;
+ } else if (prop_name && strcasecmp(prop_name, "ipv4") == 0) {
+ int64_t len = match_ipv4(text, text_index);
+ if (len < 0)
+ FAIL();
+ text_index += len;
+ continue;
+ } else if (prop_name && strcasecmp(prop_name, "ipv6") == 0) {
+ int64_t len = match_ipv6(text, text_index);
+ if (len < 0)
+ FAIL();
+ text_index += len;
+ continue;
+ } else if (prop_name && strcasecmp(prop_name, "ip") == 0) {
+ int64_t len = match_ipv6(text, text_index);
+ if (len < 0)
+ len = match_ipv4(text, text_index);
+ if (len < 0)
+ FAIL();
+ text_index += len;
+ continue;
+ }
+ break;
+ case 's':
+ if (strcasecmp(prop_name, "start") == 0) {
+ if (text_index != 0) return -1;
+ continue;
+ }
+ break;
+ case 'u':
+ if (prop_name && strcasecmp(prop_name, "uri") == 0) {
+ int64_t len = match_uri(text, text_index);
+ if (len < 0)
+ FAIL();
+ text_index += len;
+ continue;
+ } else if (prop_name && strcasecmp(prop_name, "url") == 0) {
+ int64_t lookahead = text_index;
+ if (!(match_str(text, &lookahead, "https:")
+ || match_str(text, &lookahead, "http:")
+ || match_str(text, &lookahead, "ftp:")
+ || match_str(text, &lookahead, "wss:")
+ || match_str(text, &lookahead, "ws:")))
+ FAIL();
+
+ int64_t len = match_uri(text, text_index);
+ if (len < 0)
+ FAIL();
+ text_index += len;
+ continue;
+ }
+ break;
+ }
+
+ prop = uc_property_byname(prop_name);
+ if (!uc_property_is_valid(prop)) {
+ specific_codepoint = unicode_name_character(prop_name);
+ if (specific_codepoint == UNINAME_INVALID)
+ errx(1, "Not a valid property or character name: %s", prop_name);
+ }
+ } else {
+ any = true;
+ prop = UC_PROPERTY_PRIVATE_USE;
+ }
+ got_prop:;
+
+ if (min == 0 && pattern_index < pattern.length) {
+ int64_t match_len = match(text, pattern, text_index, pattern_index);
+ if (match_len >= 0)
+ return (text_index - start_index) + match_len;
+ }
+
+ for (int64_t count = 0; count < max; ) {
+ int32_t grapheme = _next_grapheme(text, &text_state, text_index);
+ if (grapheme < 0)
+ grapheme = synthetic_graphemes[-grapheme-1].codepoints[0];
+
+ bool success;
+ if (any)
+ success = true;
+ else if (specific_codepoint != UNINAME_INVALID)
+ success = (grapheme == specific_codepoint);
+ else
+ success = uc_is_property(grapheme, prop);
+
+ if (success != want_to_match) {
+ if (count < min) return -1;
+ else break;
+ }
+
+ text_index += 1;
+ count += 1;
+
+ if (count >= min) {
+ if (pattern_index < pattern.length) {
+ int64_t match_len = match(text, pattern, text_index, pattern_index);
+ if (match_len >= 0) {
+ return (text_index - start_index) + match_len;
+ }
+ } else if (text_index >= text.length) {
+ break;
+ }
+ }
+ }
+ } else if (uc_is_property(_next_grapheme(pattern, &pattern_state, pattern_index), UC_PROPERTY_QUOTATION_MARK)
+ && (pattern_index += 1, match_grapheme(pattern, &pattern_index, '?'))) {
+ // Quotation: "?", '?', etc
+ int32_t open = _next_grapheme(pattern, &pattern_state, pattern_index-2);
+ if (!match_grapheme(text, &text_index, open)) return -1;
+ int32_t close = open;
+ uc_mirror_char(open, (uint32_t*)&close);
+ if (!match_grapheme(pattern, &pattern_index, close))
+ errx(1, "I expected a closing brace");
+ while (text_index < text.length) {
+ int32_t c = _next_grapheme(text, &text_state, text_index);
+ if (c == close)
+ return (text_index - start_index);
+
+ if (c == '\\' && text_index < text.length) {
+ text_index += 2;
+ } else {
+ text_index += 1;
+ }
+ }
+ return -1;
+ } else if (uc_is_property(_next_grapheme(pattern, &pattern_state, pattern_index), UC_PROPERTY_PAIRED_PUNCTUATION)
+ && (pattern_index += 1, match_grapheme(pattern, &pattern_index, '?'))) {
+ // Nested punctuation: (?), [?], etc
+ int32_t open = _next_grapheme(pattern, &pattern_state, pattern_index-2);
+ if (!match_grapheme(text, &text_index, open)) return -1;
+ int32_t close = open;
+ uc_mirror_char(open, (uint32_t*)&close);
+ if (!match_grapheme(pattern, &pattern_index, close))
+ errx(1, "I expected a closing brace");
+ int64_t depth = 1;
+ for (; depth > 0 && text_index < text.length; ++text_index) {
+ int32_t c = _next_grapheme(text, &text_state, text_index);
+ if (c == open)
+ depth += 1;
+ else if (c == close)
+ depth -= 1;
+ }
+ if (depth > 0) return -1;
+ } else {
+ // Plain character:
+ pattern_index = old_pat_index;
+ int32_t pat_grapheme = _next_grapheme(pattern, &pattern_state, pattern_index);
+
+ if (pattern_index == 0 && text_index > 0) {
+ int32_t pat_codepoint = pat_grapheme;
+ if (pat_codepoint < 0)
+ pat_codepoint = synthetic_graphemes[-pat_codepoint-1].codepoints[0];
+
+ int32_t prev_codepoint = _next_grapheme(text, &text_state, text_index - 1);
+ if (prev_codepoint < 0)
+ prev_codepoint = synthetic_graphemes[-prev_codepoint-1].codepoints[0];
+ if (uc_is_property_alphabetic(pat_codepoint) && uc_is_property_alphabetic(prev_codepoint))
+ return -1;
+ }
+
+ int32_t text_grapheme = _next_grapheme(text, &text_state, text_index);
+ if (pat_grapheme != text_grapheme)
+ return -1;
+
+ pattern_index += 1;
+ text_index += 1;
+
+ if (pattern_index == pattern.length && text_index < text.length) {
+ int32_t pat_codepoint = pat_grapheme;
+ if (pat_codepoint < 0)
+ pat_codepoint = synthetic_graphemes[-pat_codepoint-1].codepoints[0];
+
+ int32_t next_codepoint = _next_grapheme(text, &text_state, text_index);
+ if (next_codepoint < 0)
+ next_codepoint = synthetic_graphemes[-next_codepoint-1].codepoints[0];
+ if (uc_is_property_alphabetic(pat_codepoint) && uc_is_property_alphabetic(next_codepoint))
+ return -1;
+ }
+ }
+ }
+ if (text_index >= text.length && pattern_index < pattern.length)
+ return -1;
+ return (text_index - start_index);
+}
+
+#undef EAT1
+#undef EAT_MANY
+
+public Int_t Text$find(Text_t text, Text_t pattern, Int_t from_index, int64_t *match_length)
+{
+ int32_t first = get_grapheme(pattern, 0);
+ bool find_first = (first != '['
+ && !uc_is_property(first, UC_PROPERTY_QUOTATION_MARK)
+ && !uc_is_property(first, UC_PROPERTY_PAIRED_PUNCTUATION));
+
+ iteration_state_t text_state = {0, 0};
+ for (int64_t i = Int_to_Int64(from_index, false)-1; i < text.length; i++) {
+ // Optimization: quickly skip ahead to first char in pattern:
+ if (find_first) {
+ while (i < text.length && _next_grapheme(text, &text_state, i) != first)
+ ++i;
+ }
+
+ int64_t m = match(text, pattern, i, 0);
+ if (m >= 0) {
+ if (match_length)
+ *match_length = m;
+ return I(i+1);
+ }
}
- return I(num_clusters);
+ if (match_length)
+ *match_length = -1;
+ return I(0);
+}
+
+public int printf_text_size(const struct printf_info *info, size_t n, int argtypes[n], int sizes[n])
+{
+ if (n < 1) return -1;
+ (void)info;
+ argtypes[0] = PA_POINTER;
+ sizes[0] = sizeof(Text_t*);
+ return 1;
+}
+
+public int printf_text(FILE *stream, const struct printf_info *info, const void *const args[])
+{
+ (void)info;
+ Text_t t = **(Text_t**)args[0];
+ return Text$print(stream, t);
}
-public Int_t Text$num_codepoints(CORD text)
+public Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info)
{
- uint8_t buf[128] = {0}; size_t norm_len = sizeof(buf);
- uint8_t *normalized = _normalize(text, buf, &norm_len);
- int64_t num_codepoints = u8_mbsnlen(normalized, norm_len-1);
- if (normalized != buf) free(normalized);
- return I(num_codepoints);
+ (void)info;
+ if (!text) return Text$from_str("Text");
+ return Text$quoted(*(Text_t*)text, colorize);
}
-public Int_t Text$num_bytes(CORD text)
+public Text_t Text$quoted(Text_t text, bool colorize)
{
- uint8_t norm_buf[128] = {0}; size_t norm_len = sizeof(norm_buf);
- uint8_t *normalized = _normalize(text, norm_buf, &norm_len);
- --norm_len; // NUL byte
- if (!normalized) errx(1, "Unicode normalization error!");
- if (normalized != norm_buf) free(normalized);
- return I(norm_len);
+ // TODO: optimize for ASCII and short strings
+ array_t graphemes = {.atomic=1};
+#define add_char(c) Array$insert_value(&graphemes, (uint32_t)c, I_small(0), sizeof(uint32_t))
+#define add_str(s) ({ for (char *_c = s; *_c; ++_c) Array$insert_value(&graphemes, (uint32_t)*_c, I_small(0), sizeof(uint32_t)); })
+ if (colorize)
+ add_str("\x1b[35m\"");
+ else
+ add_char('"');
+
+#define add_escaped(str) ({ if (colorize) add_str("\x1b[34;1m"); add_char('\\'); add_str(str); if (colorize) add_str("\x1b[0;35m"); })
+ iteration_state_t state = {0, 0};
+ for (int64_t i = 0; i < text.length; i++) {
+ int32_t g = _next_grapheme(text, &state, i);
+ switch (g) {
+ case '\a': add_escaped("a"); break;
+ case '\b': add_escaped("b"); break;
+ case '\x1b': add_escaped("e"); break;
+ case '\f': add_escaped("f"); break;
+ case '\n': add_escaped("n"); break;
+ case '\r': add_escaped("r"); break;
+ case '\t': add_escaped("t"); break;
+ case '\v': add_escaped("v"); break;
+ case '"': add_escaped("\""); break;
+ case '\\': add_escaped("\\"); break;
+ case '\x00' ... '\x06': case '\x0E' ... '\x1A':
+ case '\x1C' ... '\x1F': case '\x7F' ... '\x7F': {
+ if (colorize) add_str("\x1b[34;1m");
+ add_char('\\');
+ add_char('x');
+ char tmp[4];
+ sprintf(tmp, "%02X", g);
+ add_str(tmp);
+ if (colorize)
+ add_str("\x1b[0;35m");
+ break;
+ }
+ default: add_char(g); break;
+ }
+ }
+
+ if (colorize)
+ add_str("\"\x1b[m");
+ else
+ add_char('"');
+
+ return (Text_t){.length=graphemes.length, .tag=TEXT_GRAPHEMES, .graphemes=graphemes.data};
+#undef add_str
+#undef add_char
+#undef add_escaped
}
-public array_t Text$character_names(CORD text)
+public Text_t Text$replace(Text_t text, Text_t pattern, Text_t replacement)
{
- array_t codepoints = Text$codepoints(text);
- array_t ret = {.length=codepoints.length, .stride=sizeof(CORD), .data=GC_MALLOC(sizeof(CORD[codepoints.length]))};
- for (int64_t i = 0; i < codepoints.length; i++) {
- char buf[UNINAME_MAX];
- unicode_character_name(*(ucs4_t*)(codepoints.data + codepoints.stride*i), buf);
- *(CORD*)(ret.data + ret.stride*i) = CORD_from_char_star(buf);
+ Text_t ret = {.length=0};
+
+ Int_t i = I_small(0);
+ for (;;) {
+ int64_t len;
+ Int_t found = Text$find(text, pattern, i, &len);
+ if (found.small == I_small(0).small) break;
+ if (Int$compare(&found, &i, &$Text) > 0) {
+ ret = Text$concat(
+ ret,
+ Text$slice(text, i, Int$minus(found, I_small(1))),
+ replacement
+ );
+ } else {
+ ret = concat2(ret, replacement);
+ }
+ }
+ if (Int_to_Int64(i, false) <= text.length) {
+ ret = concat2(ret, Text$slice(text, i, Int64_to_Int(text.length)));
}
return ret;
}
-public CORD Text$read_line(CORD prompt)
+public Text_t Text$format(const char *fmt, ...)
{
- char *line = readline(CORD_to_const_char_star(prompt));
- if (!line) return CORD_EMPTY;
- CORD ret = CORD_from_char_star(line);
- free(line);
+ va_list args;
+ va_start(args, fmt);
+
+ char buf[8];
+ int len = vsnprintf(buf, sizeof(buf), fmt, args);
+ Text_t ret;
+ if (len <= (int)sizeof(buf)) {
+ ret = (Text_t){
+ .length=len,
+ .tag = TEXT_SHORT_ASCII,
+ };
+ for (int i = 0; i < len; i++)
+ ret.short_ascii[i] = buf[i];
+ } else {
+ char *str = GC_MALLOC_ATOMIC(len);
+ vsnprintf(str, len, fmt, args);
+ ret = Text$from_str(str);
+ }
+ va_end(args);
return ret;
}
public const TypeInfo $Text = {
- .size=sizeof(CORD),
- .align=__alignof__(CORD),
+ .size=sizeof(Text_t),
+ .align=__alignof__(Text_t),
.tag=TextInfo,
.TextInfo={.lang="Text"},
};
diff --git a/builtins/text.h b/builtins/text.h
index 017a2804..1e671695 100644
--- a/builtins/text.h
+++ b/builtins/text.h
@@ -1,10 +1,10 @@
#pragma once
-// Type info and methods for Text datatype, which uses the Boehm "cord" library
-// and libunistr
+// Type info and methods for Text datatype, which uses a struct inspired by
+// Raku's string representation and libunistr
-#include <gc/cord.h>
#include <stdbool.h>
+#include <printf.h>
#include <stdint.h>
#include "datatypes.h"
@@ -12,36 +12,57 @@
#include "types.h"
#include "where.h"
-#define Text_t CORD
-
typedef struct {
enum { FIND_FAILURE, FIND_SUCCESS } status;
int32_t index;
} find_result_t;
-CORD Text$as_text(const void *str, bool colorize, const TypeInfo *info);
-CORD Text$quoted(CORD str, bool colorize);
-int Text$compare(const CORD *x, const CORD *y);
-bool Text$equal(const CORD *x, const CORD *y);
-uint32_t Text$hash(const CORD *cord);
-CORD Text$upper(CORD str);
-CORD Text$lower(CORD str);
-CORD Text$title(CORD str);
-bool Text$has(CORD str, CORD target, Where_t where);
-CORD Text$without(CORD str, CORD target, Where_t where);
-CORD Text$trimmed(CORD str, CORD skip, Where_t where);
-find_result_t Text$find(CORD str, CORD pat);
-CORD Text$replace(CORD text, CORD pat, CORD replacement, Int_t limit);
-array_t Text$split(CORD str, CORD split);
-CORD Text$join(CORD glue, array_t pieces);
-array_t Text$clusters(CORD text);
-array_t Text$codepoints(CORD text);
-array_t Text$bytes(CORD text);
-Int_t Text$num_clusters(CORD text);
-Int_t Text$num_codepoints(CORD text);
-Int_t Text$num_bytes(CORD text);
-array_t Text$character_names(CORD text);
-CORD Text$read_line(CORD prompt);
+// CORD Text$as_text(const void *str, bool colorize, const TypeInfo *info);
+// CORD Text$quoted(CORD str, bool colorize);
+// // int Text$compare(const CORD *x, const CORD *y);
+// // bool Text$equal(const CORD *x, const CORD *y);
+// // uint32_t Text$hash(const CORD *cord);
+// // CORD Text$upper(CORD str);
+// // CORD Text$lower(CORD str);
+// // CORD Text$title(CORD str);
+// bool Text$has(CORD str, CORD target, Where_t where);
+// CORD Text$without(CORD str, CORD target, Where_t where);
+// CORD Text$trimmed(CORD str, CORD skip, Where_t where);
+// find_result_t Text$find(CORD str, CORD pat);
+// CORD Text$replace(CORD text, CORD pat, CORD replacement, Int_t limit);
+// array_t Text$split(CORD str, CORD split);
+// CORD Text$join(CORD glue, array_t pieces);
+// array_t Text$clusters(CORD text);
+// array_t Text$codepoints(CORD text);
+// array_t Text$bytes(CORD text);
+// Int_t Text$num_clusters(CORD text);
+// Int_t Text$num_codepoints(CORD text);
+// Int_t Text$num_bytes(CORD text);
+// array_t Text$character_names(CORD text);
+// CORD Text$read_line(CORD prompt);
+
+int printf_text(FILE *stream, const struct printf_info *info, const void *const args[]);
+int printf_text_size(const struct printf_info *info, size_t n, int argtypes[n], int sizes[n]);
+
+int Text$print(FILE *stream, Text_t t);
+void Text$visualize(Text_t t);
+Text_t Text$_concat(int n, Text_t items[n]);
+#define Text$concat(...) Text$_concat(sizeof((Text_t[]){__VA_ARGS__})/sizeof(Text_t), (Text_t[]){__VA_ARGS__})
+Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int);
+Text_t Text$from_str(const char *str);
+uint64_t Text$hash(Text_t *text);
+int32_t Text$compare(const Text_t *a, const Text_t *b);
+bool Text$equal(const Text_t *a, const Text_t *b);
+bool Text$equal_ignoring_case(Text_t a, Text_t b);
+Text_t Text$upper(Text_t text);
+Text_t Text$lower(Text_t text);
+Text_t Text$title(Text_t text);
+Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info);
+Text_t Text$quoted(Text_t str, bool colorize);
+Text_t Text$replace(Text_t str, Text_t pat, Text_t replacement);
+Int_t Text$find(Text_t text, Text_t pattern, Int_t i, int64_t *match_length);
+const char *Text$as_c_string(Text_t text);
+public Text_t Text$format(const char *fmt, ...);
extern const TypeInfo $Text;
diff --git a/builtins/thread.c b/builtins/thread.c
index b9586917..793a0101 100644
--- a/builtins/thread.c
+++ b/builtins/thread.c
@@ -3,7 +3,6 @@
#include <ctype.h>
#include <err.h>
#include <gc.h>
-#include <gc/cord.h>
#include <math.h>
#include <stdbool.h>
#include <stdint.h>
@@ -14,6 +13,7 @@
#include "array.h"
#include "functions.h"
#include "halfsiphash.h"
+#include "text.h"
#include "types.h"
#include "util.h"
@@ -39,13 +39,13 @@ public void Thread$detach(pthread_t *thread)
pthread_detach(*thread);
}
-CORD Thread$as_text(const pthread_t **thread, bool colorize, const TypeInfo *type)
+Text_t Thread$as_text(const pthread_t **thread, bool colorize, const TypeInfo *type)
{
(void)type;
if (!thread) {
- return colorize ? "\x1b[34;1mThread\x1b[m" : "Thread";
+ return Text$from_str(colorize ? "\x1b[34;1mThread\x1b[m" : "Thread");
}
- return CORD_asprintf(colorize ? "\x1b[34;1mThread(%p)\x1b[m" : "Thread(%p)", *thread);
+ return Text$format(colorize ? "\x1b[34;1mThread(%p)\x1b[m" : "Thread(%p)", *thread);
}
public const TypeInfo Thread = {
diff --git a/builtins/thread.h b/builtins/thread.h
index efccae33..2956dda6 100644
--- a/builtins/thread.h
+++ b/builtins/thread.h
@@ -14,7 +14,7 @@ pthread_t *Thread$new(closure_t fn);
void Thread$cancel(pthread_t *thread);
void Thread$join(pthread_t *thread);
void Thread$detach(pthread_t *thread);
-CORD Thread$as_text(const pthread_t **thread, bool colorize, const TypeInfo *type);
+Text_t Thread$as_text(const pthread_t **thread, bool colorize, const TypeInfo *type);
extern TypeInfo Thread;
diff --git a/builtins/types.c b/builtins/types.c
index 4fb2c523..ab1b8013 100644
--- a/builtins/types.c
+++ b/builtins/types.c
@@ -9,17 +9,20 @@
#include "array.h"
#include "pointer.h"
#include "table.h"
+#include "text.h"
#include "types.h"
-public CORD Type$as_text(const void *typeinfo, bool colorize, const TypeInfo *type)
+public Text_t Type$as_text(const void *typeinfo, bool colorize, const TypeInfo *type)
{
- if (!typeinfo) return "TypeInfo";
+ if (!typeinfo) return Text$from_str("TypeInfo");
- if (!colorize)
- return type->TypeInfoInfo.type_str;
- CORD c;
- CORD_sprintf(&c, "\x1b[36;1m%s\x1b[m", type->TypeInfoInfo.type_str);
- return c;
+ if (colorize)
+ return Text$concat(
+ Text$from_str("\x1b[36;1m"),
+ Text$from_str(type->TypeInfoInfo.type_str),
+ Text$from_str("\x1b[m"));
+ else
+ return Text$from_str(type->TypeInfoInfo.type_str);
}
public const TypeInfo $TypeInfo = {
@@ -32,13 +35,13 @@ public const TypeInfo $TypeInfo = {
public const TypeInfo $Void = {.size=0, .align=0, .tag=EmptyStruct};
public const TypeInfo $Abort = {.size=0, .align=0, .tag=EmptyStruct};
-public CORD Func$as_text(const void *fn, bool colorize, const TypeInfo *type)
+public Text_t Func$as_text(const void *fn, bool colorize, const TypeInfo *type)
{
(void)fn;
- CORD c = type->FunctionInfo.type_str;
+ Text_t text = Text$from_str(type->FunctionInfo.type_str);
if (fn && colorize)
- CORD_sprintf(&c, "\x1b[32;1m%r\x1b[m", c);
- return c;
+ text = Text$concat(Text$from_str("\x1b[32;1m"), text, Text$from_str("\x1b[m"));
+ return text;
}
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/types.h b/builtins/types.h
index 70f8dc00..2647ef92 100644
--- a/builtins/types.h
+++ b/builtins/types.h
@@ -2,7 +2,6 @@
// Type information and methods for TypeInfos (i.e. runtime representations of types)
-#include <gc/cord.h>
#include <stdbool.h>
#include <stdint.h>
@@ -13,7 +12,7 @@ struct TypeInfo;
typedef uint32_t (*hash_fn_t)(const void*, const struct TypeInfo*);
typedef int32_t (*compare_fn_t)(const void*, const void*, const struct TypeInfo*);
typedef bool (*equal_fn_t)(const void*, const void*, const struct TypeInfo*);
-typedef CORD (*str_fn_t)(const void*, bool, const struct TypeInfo*);
+typedef Text_t (*text_fn_t)(const void*, bool, const struct TypeInfo*);
typedef struct TypeInfo {
int64_t size, align;
@@ -24,7 +23,7 @@ typedef struct TypeInfo {
equal_fn_t equal;
compare_fn_t compare;
hash_fn_t hash;
- str_fn_t as_text;
+ text_fn_t as_text;
} CustomInfo;
struct {
const char *sigil;
@@ -76,7 +75,7 @@ extern const TypeInfo $Void;
extern const TypeInfo $Abort;
#define Void_t void
-CORD Type$as_text(const void *typeinfo, bool colorize, const TypeInfo *type);
-CORD Func$as_text(const void *fn, bool colorize, const TypeInfo *type);
+Text_t Type$as_text(const void *typeinfo, bool colorize, const TypeInfo *type);
+Text_t Func$as_text(const void *fn, bool colorize, const TypeInfo *type);
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/util.c b/builtins/util.c
index d4f3cd31..7fca15e3 100644
--- a/builtins/util.c
+++ b/builtins/util.c
@@ -7,6 +7,7 @@
#include <stdlib.h>
#include <string.h>
+#include "text.h"
#include "util.h"
public bool USE_COLOR;
@@ -67,4 +68,54 @@ public CORD CORD_asprintf(CORD fmt, ...)
return c;
}
+public CORD CORD_quoted(CORD str)
+{
+ CORD quoted = "\"";
+ CORD_pos i;
+ CORD_FOR(i, str) {
+ char c = CORD_pos_fetch(i);
+ switch (c) {
+ case '\a': quoted = CORD_cat(quoted, "\\a"); break;
+ case '\b': quoted = CORD_cat(quoted, "\\b"); break;
+ case '\x1b': quoted = CORD_cat(quoted, "\\e"); break;
+ case '\f': quoted = CORD_cat(quoted, "\\f"); break;
+ case '\n': quoted = CORD_cat(quoted, "\\n"); break;
+ case '\r': quoted = CORD_cat(quoted, "\\r"); break;
+ case '\t': quoted = CORD_cat(quoted, "\\t"); break;
+ case '\v': quoted = CORD_cat(quoted, "\\v"); break;
+ case '"': quoted = CORD_cat(quoted, "\\\""); break;
+ case '\\': quoted = CORD_cat(quoted, "\\\\"); break;
+ case '\x00' ... '\x06': case '\x0E' ... '\x1A':
+ case '\x1C' ... '\x1F': case '\x7F' ... '\x7F':
+ CORD_sprintf(&quoted, "%r\\x%02X", quoted, c);
+ break;
+ default: quoted = CORD_cat_char(quoted, c); break;
+ }
+ }
+ quoted = CORD_cat_char(quoted, '"');
+ return quoted;
+}
+
+public CORD CORD_replace(CORD c, CORD to_replace, CORD replacement)
+{
+ size_t len = CORD_len(c);
+ size_t replaced_len = CORD_len(to_replace);
+ size_t pos = 0;
+ CORD ret = CORD_EMPTY;
+ while (pos < len) {
+ size_t found = CORD_str(c, pos, to_replace);
+ if (found == CORD_NOT_FOUND) {
+ if (pos < len-1)
+ ret = CORD_cat(ret, CORD_substr(c, pos, len));
+ return ret;
+ }
+ if (found > pos)
+ ret = CORD_cat(ret, CORD_substr(c, pos, found-pos));
+ ret = CORD_cat(ret, replacement);
+ pos = found + replaced_len;
+ }
+ return ret;
+}
+
+
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/util.h b/builtins/util.h
index 271403ff..a3f5f2b4 100644
--- a/builtins/util.h
+++ b/builtins/util.h
@@ -31,6 +31,8 @@ extern bool USE_COLOR;
char *heap_strf(const char *fmt, ...);
CORD CORD_asprintf(CORD fmt, ...);
+CORD CORD_quoted(CORD str);
+CORD CORD_replace(CORD c, CORD to_replace, CORD replacement);
#define CORD_appendf(cord, fmt, ...) CORD_sprintf(cord, "%r" fmt, *(cord) __VA_OPT__(,) __VA_ARGS__)
#define CORD_all(...) CORD_catn(sizeof((CORD[]){__VA_ARGS__})/sizeof(CORD), __VA_ARGS__)
diff --git a/builtins/where.c b/builtins/where.c
index f7db2db3..d57f532e 100644
--- a/builtins/where.c
+++ b/builtins/where.c
@@ -1,27 +1,27 @@
// A type called "Where" that is an enum for "Anywhere", "Start", or "End"
// Mainly used for text methods
-#include <gc/cord.h>
#include <stdbool.h>
#include <stdint.h>
+#include "text.h"
#include "types.h"
-#include "where.h"
#include "util.h"
+#include "where.h"
-static CORD Where$as_text(Where_t *obj, bool use_color)
+static Text_t Where$as_text(Where_t *obj, bool use_color)
{
if (!obj)
- return "Where";
+ return Text$from_str("Where");
switch (obj->tag) {
case $tag$Where$Anywhere:
- return use_color ? "\x1b[36;1mWhere.Anywhere\x1b[m" : "Where.Anywhere";
+ return Text$from_str(use_color ? "\x1b[36;1mWhere.Anywhere\x1b[m" : "Where.Anywhere");
case $tag$Where$Start:
- return use_color ? "\x1b[36;1mWhere.Start\x1b[m" : "Where.Start";
+ return Text$from_str(use_color ? "\x1b[36;1mWhere.Start\x1b[m" : "Where.Start");
case $tag$Where$End:
- return use_color ? "\x1b[36;1mWhere.End\x1b[m" : "Where.End";
+ return Text$from_str(use_color ? "\x1b[36;1mWhere.End\x1b[m" : "Where.End");
default:
- return CORD_EMPTY;
+ return (Text_t){.length=0};
}
}
diff --git a/compile.c b/compile.c
index 1dfe54d9..0bc09c74 100644
--- a/compile.c
+++ b/compile.c
@@ -224,7 +224,7 @@ static CORD compile_lvalue(env_t *env, ast_t *ast)
return CORD_all("Array_lvalue(", compile_type(item_type), ", ", target_code, ", ",
compile_int_to_type(env, index->index, Type(IntType, .bits=TYPE_IBITS64)),
", ", CORD_asprintf("%ld", padded_type_size(item_type)),
- ", ", Text$quoted(ast->file->filename, false), ", ", heap_strf("%ld", ast->start - ast->file->text),
+ ", ", CORD_quoted(ast->file->filename), ", ", heap_strf("%ld", ast->start - ast->file->text),
", ", heap_strf("%ld", ast->end - ast->file->text), ")");
}
} else {
@@ -320,7 +320,7 @@ CORD compile_statement(env_t *env, ast_t *ast)
if (!expr_t)
code_err(test->expr, "I couldn't figure out the type of this expression");
- CORD output = NULL;
+ CORD output = CORD_EMPTY;
if (test->output) {
const uint8_t *raw = (const uint8_t*)CORD_to_const_char_star(test->output);
uint8_t buf[128] = {0};
@@ -328,6 +328,7 @@ CORD compile_statement(env_t *env, ast_t *ast)
uint8_t *norm = u8_normalize(UNINORM_NFD, (uint8_t*)raw, strlen((char*)raw)+1, buf, &norm_len);
assert(norm[norm_len-1] == 0);
output = CORD_from_char_star((char*)norm);
+ CORD_printf("OUTPUT: %r\n", output);
if (norm && norm != buf) free(norm);
}
@@ -337,8 +338,8 @@ CORD compile_statement(env_t *env, ast_t *ast)
assert(compile_statement(env, test->expr) == CORD_EMPTY);
return CORD_asprintf(
"test(NULL, NULL, %r, %r, %ld, %ld);",
- compile(env, WrapAST(test->expr, TextLiteral, .cord=output)),
- compile(env, WrapAST(test->expr, TextLiteral, .cord=test->expr->file->filename)),
+ CORD_quoted(output),
+ CORD_quoted(test->expr->file->filename),
(int64_t)(test->expr->start - test->expr->file->text),
(int64_t)(test->expr->end - test->expr->file->text));
} else {
@@ -355,8 +356,8 @@ CORD compile_statement(env_t *env, ast_t *ast)
compile_declaration(t, var),
var, val_code, var,
compile_type_info(env, get_type(env, decl->value)),
- compile(env, WrapAST(test->expr, TextLiteral, .cord=output)),
- compile(env, WrapAST(test->expr, TextLiteral, .cord=test->expr->file->filename)),
+ CORD_quoted(output),
+ CORD_quoted(test->expr->file->filename),
(int64_t)(test->expr->start - test->expr->file->text),
(int64_t)(test->expr->end - test->expr->file->text));
}
@@ -382,8 +383,8 @@ CORD compile_statement(env_t *env, ast_t *ast)
compile_assignment(env, assign->targets->ast, value),
compile(env, assign->targets->ast),
compile_type_info(env, lhs_t),
- compile(env, WrapAST(test->expr, TextLiteral, .cord=test->output)),
- compile(env, WrapAST(test->expr, TextLiteral, .cord=test->expr->file->filename)),
+ CORD_quoted(test->output),
+ CORD_quoted(test->expr->file->filename),
(int64_t)(test->expr->start - test->expr->file->text),
(int64_t)(test->expr->end - test->expr->file->text));
} else {
@@ -415,8 +416,8 @@ CORD compile_statement(env_t *env, ast_t *ast)
CORD_appendf(&code, "&$1; }), %r, %r, %r, %ld, %ld);",
compile_type_info(env, get_type(env, assign->targets->ast)),
- compile(env, WrapAST(test->expr, TextLiteral, .cord=test->output)),
- compile(env, WrapAST(test->expr, TextLiteral, .cord=test->expr->file->filename)),
+ CORD_quoted(test->output),
+ CORD_quoted(test->expr->file->filename),
(int64_t)(test->expr->start - test->expr->file->text),
(int64_t)(test->expr->end - test->expr->file->text));
return code;
@@ -427,25 +428,25 @@ CORD compile_statement(env_t *env, ast_t *ast)
compile_statement(env, test->expr),
compile_lvalue(env, Match(test->expr, UpdateAssign)->lhs),
compile_type_info(env, get_type(env, Match(test->expr, UpdateAssign)->lhs)),
- compile(env, WrapAST(test->expr, TextLiteral, .cord=test->output)),
- compile(env, WrapAST(test->expr, TextLiteral, .cord=test->expr->file->filename)),
+ CORD_quoted(test->output),
+ CORD_quoted(test->expr->file->filename),
(int64_t)(test->expr->start - test->expr->file->text),
(int64_t)(test->expr->end - test->expr->file->text));
} else if (expr_t->tag == VoidType || expr_t->tag == AbortType || expr_t->tag == ReturnType) {
return CORD_asprintf(
"test(({ %r; NULL; }), NULL, NULL, %r, %ld, %ld);",
compile_statement(env, test->expr),
- compile(env, WrapAST(test->expr, TextLiteral, .cord=test->expr->file->filename)),
+ CORD_quoted(test->expr->file->filename),
(int64_t)(test->expr->start - test->expr->file->text),
(int64_t)(test->expr->end - test->expr->file->text));
} else {
return CORD_asprintf(
"test(%r, %r, %r, %r, %ld, %ld);",
test->expr->tag == Var ? CORD_all("&", compile(env, test->expr))
- : CORD_all("(", compile_type(expr_t), "[1]){", compile(env, test->expr), "}"),
+ : CORD_all("(", compile_type(expr_t), "[1]){", compile(env, test->expr), "}"),
compile_type_info(env, expr_t),
- compile(env, WrapAST(test->expr, TextLiteral, .cord=output)),
- compile(env, WrapAST(test->expr, TextLiteral, .cord=test->expr->file->filename)),
+ CORD_quoted(output),
+ CORD_quoted(test->expr->file->filename),
(int64_t)(test->expr->start - test->expr->file->text),
(int64_t)(test->expr->end - test->expr->file->text));
}
@@ -629,7 +630,7 @@ CORD compile_statement(env_t *env, ast_t *ast)
auto def = Match(ast, LangDef);
CORD_appendf(&env->code->typeinfos, "public const TypeInfo %r%s = {%zu, %zu, {.tag=TextInfo, .TextInfo={%r}}};\n",
namespace_prefix(env->libname, env->namespace), def->name, sizeof(CORD), __alignof__(CORD),
- Text$quoted(def->name, false));
+ CORD_quoted(def->name));
compile_namespace(env, def->name, def->namespace);
return CORD_EMPTY;
}
@@ -703,7 +704,7 @@ CORD compile_statement(env_t *env, ast_t *ast)
"}\n");
env->code->funcs = CORD_cat(env->code->funcs, wrapper);
} else if (fndef->cache && fndef->cache->tag == Int) {
- int64_t cache_size = Int64$from_text(Match(fndef->cache, Int)->str, NULL);
+ int64_t cache_size = Int64$from_text(Text$from_str(Match(fndef->cache, Int)->str), NULL);
const char *arg_type_name = heap_strf("%s$args", Match(fndef->name, Var)->name);
ast_t *args_def = FakeAST(StructDef, .name=arg_type_name, .fields=fndef->args);
prebind_statement(env, args_def);
@@ -1314,7 +1315,7 @@ CORD compile_int_to_type(env_t *env, ast_t *ast, type_t *target)
}
int64_t target_bits = (int64_t)Match(target, IntType)->bits;
- Int_t int_val = Int$from_text(Match(ast, Int)->str, NULL);
+ Int_t int_val = Int$from_text(Text$from_str(Match(ast, Int)->str), NULL);
mpz_t i;
mpz_init_set_int(i, int_val);
@@ -1354,7 +1355,7 @@ CORD compile_arguments(env_t *env, ast_t *call_ast, arg_t *spec_args, arg_ast_t
if (spec_arg->type->tag == IntType && call_arg->value->tag == Int) {
value = compile_int_to_type(env, call_arg->value, spec_arg->type);
} else if (spec_arg->type->tag == NumType && call_arg->value->tag == Int) {
- Int_t int_val = Int$from_text(Match(call_arg->value, Int)->str, NULL);
+ Int_t int_val = Int$from_text(Text$from_str(Match(call_arg->value, Int)->str), NULL);
double n = Int_to_Num(int_val);
value = CORD_asprintf(Match(spec_arg->type, NumType)->bits == TYPE_NBITS64
? "N64(%.20g)" : "N32(%.10g)", n);
@@ -1382,7 +1383,7 @@ CORD compile_arguments(env_t *env, ast_t *call_ast, arg_t *spec_args, arg_ast_t
if (spec_arg->type->tag == IntType && call_arg->value->tag == Int) {
value = compile_int_to_type(env, call_arg->value, spec_arg->type);
} else if (spec_arg->type->tag == NumType && call_arg->value->tag == Int) {
- Int_t int_val = Int$from_text(Match(call_arg->value, Int)->str, NULL);
+ Int_t int_val = Int$from_text(Text$from_str(Match(call_arg->value, Int)->str), NULL);
double n = Int_to_Num(int_val);
value = CORD_asprintf(Match(spec_arg->type, NumType)->bits == TYPE_NBITS64
? "N64(%.20g)" : "N32(%.10g)", n);
@@ -1513,7 +1514,7 @@ CORD compile(env_t *env, ast_t *ast)
}
case Int: {
const char *str = Match(ast, Int)->str;
- Int_t int_val = Int$from_text(str, NULL);
+ Int_t int_val = Int$from_text(Text$from_str(str), NULL);
mpz_t i;
mpz_init_set_int(i, int_val);
@@ -1780,8 +1781,8 @@ CORD compile(env_t *env, ast_t *ast)
case TextLiteral: {
CORD literal = Match(ast, TextLiteral)->cord;
if (literal == CORD_EMPTY)
- return "(CORD)CORD_EMPTY";
- CORD code = "(CORD)\"";
+ return "((Text_t){.length=0})";
+ CORD code = "Text$from_str(\"";
CORD_pos i;
CORD_FOR(i, literal) {
char c = CORD_pos_fetch(i);
@@ -1803,7 +1804,7 @@ CORD compile(env_t *env, ast_t *ast)
}
}
}
- return CORD_cat_char(code, '"');
+ return CORD_cat(code, "\")");
}
case TextJoin: {
const char *lang = Match(ast, TextJoin)->lang;
@@ -1812,7 +1813,7 @@ CORD compile(env_t *env, ast_t *ast)
code_err(ast, "%s is not a valid text language name", lang);
ast_list_t *chunks = Match(ast, TextJoin)->children;
if (!chunks) {
- return "(CORD)CORD_EMPTY";
+ return "((Text_t){.length=0})";
} else if (!chunks->next && chunks->ast->tag == TextLiteral) {
return compile(env, chunks->ast);
} else {
@@ -1839,7 +1840,7 @@ CORD compile(env_t *env, ast_t *ast)
if (chunk->next) code = CORD_cat(code, ", ");
}
if (chunks->next)
- return CORD_all("CORD_all(", code, ")");
+ return CORD_all("Text$concat(", code, ")");
else
return code;
}
@@ -2447,7 +2448,8 @@ CORD compile(env_t *env, ast_t *ast)
file_t *f = ast->file;
return CORD_all("Table$get_value_or_fail(", self, ", ", compile_type(table->key_type), ", ", compile_type(table->value_type), ", ",
compile_arguments(env, ast, arg_spec, call->args), ", ", compile_type_info(env, self_value_t), ", ",
- Text$quoted(f->filename, false), ", ", CORD_asprintf("%ld", (int64_t)(ast->start - f->text)), ", ",
+ CORD_quoted(f->filename), ", ",
+ CORD_asprintf("%ld", (int64_t)(ast->start - f->text)), ", ",
CORD_asprintf("%ld", (int64_t)(ast->end - f->text)),
")");
}
@@ -2630,8 +2632,9 @@ CORD compile(env_t *env, ast_t *ast)
} else {
empty = FakeAST(
InlineCCode,
- CORD_asprintf("fail_source(%r, %ld, %ld, \"This collection was empty!\");\n",
- Text$quoted(ast->file->filename, false), (long)(reduction->iter->start - reduction->iter->file->text),
+ CORD_asprintf("fail_source(%s, %ld, %ld, \"This collection was empty!\");\n",
+ CORD_quoted(ast->file->filename),
+ (long)(reduction->iter->start - reduction->iter->file->text),
(long)(reduction->iter->end - reduction->iter->file->text)));
}
ast_t *item = FakeAST(Var, "$iter_value");
@@ -2785,7 +2788,8 @@ CORD compile(env_t *env, ast_t *ast)
else
return CORD_all("Array_get(", compile_type(item_type), ", ", arr, ", ",
compile_int_to_type(env, indexing->index, Type(IntType, .bits=TYPE_IBITS64)), ", ",
- Text$quoted(f->filename, false), ", ", CORD_asprintf("%ld", (int64_t)(indexing->index->start - f->text)), ", ",
+ CORD_quoted(f->filename), ", ",
+ CORD_asprintf("%ld", (int64_t)(indexing->index->start - f->text)), ", ",
CORD_asprintf("%ld", (int64_t)(indexing->index->end - f->text)),
")");
} else {
@@ -2935,15 +2939,15 @@ CORD compile_type_info(env_t *env, type_t *t)
CORD sigil = ptr->is_stack ? "&" : "@";
if (ptr->is_readonly) sigil = CORD_cat(sigil, "%");
return CORD_asprintf("$PointerInfo(%r, %r, %s)",
- Text$quoted(sigil, false),
+ CORD_quoted(sigil),
compile_type_info(env, ptr->pointed),
ptr->is_optional ? "yes" : "no");
}
case FunctionType: {
- return CORD_asprintf("$FunctionInfo(%r)", Text$quoted(type_to_cord(t), false));
+ return CORD_asprintf("$FunctionInfo(%r)", CORD_quoted(type_to_cord(t)));
}
case ClosureType: {
- return CORD_asprintf("$ClosureInfo(%r)", Text$quoted(type_to_cord(t), false));
+ return CORD_asprintf("$ClosureInfo(%r)", CORD_quoted(type_to_cord(t)));
}
case TypeInfoType: return "&$TypeInfo";
case MemoryType: return "&$Memory";
@@ -2968,7 +2972,7 @@ CORD compile_cli_arg_call(env_t *env, CORD fn_name, type_t *fn_type)
for (arg_t *arg = fn_info->args; arg; arg = arg->next) {
usage = CORD_cat(usage, " ");
type_t *t = get_arg_type(main_env, arg);
- CORD flag = Text$replace(arg->name, "_", "-", I(-1));
+ CORD flag = CORD_replace(arg->name, "_", "-");
if (arg->default_val) {
if (t->tag == BoolType)
usage = CORD_all(usage, "[--", flag, "]");
@@ -2983,7 +2987,7 @@ CORD compile_cli_arg_call(env_t *env, CORD fn_name, type_t *fn_type)
usage = CORD_all(usage, "<", flag, ">");
}
}
- CORD code = CORD_all("CORD usage = CORD_all(\"Usage: \", argv[0], ", usage ? Text$quoted(usage, false) : "CORD_EMPTY", ");\n",
+ CORD code = CORD_all("CORD usage = CORD_all(\"Usage: \", argv[0], ", usage ? CORD_quoted(usage) : "CORD_EMPTY", ");\n",
"#define USAGE_ERR(...) errx(1, CORD_to_const_char_star(CORD_all(__VA_ARGS__)))\n"
"#define IS_FLAG(str, flag) (strncmp(str, flag, strlen(flag) == 0 && (str[strlen(flag)] == 0 || str[strlen(flag)] == '=')) == 0)\n");
@@ -3006,7 +3010,7 @@ CORD compile_cli_arg_call(env_t *env, CORD fn_name, type_t *fn_type)
"if (strncmp(argv[i], \"--\", 2) != 0) {\n++i;\ncontinue;\n}\n");
for (arg_t *arg = fn_info->args; arg; arg = arg->next) {
type_t *t = get_arg_type(main_env, arg);
- CORD flag = Text$replace(arg->name, "_", "-", I(-1));
+ CORD flag = CORD_replace(arg->name, "_", "-");
switch (t->tag) {
case BoolType: {
code = CORD_all(code, "else if (pop_flag(argv, &i, \"", flag, "\", &flag)) {\n"
diff --git a/parse.c b/parse.c
index af011877..8ab3688e 100644
--- a/parse.c
+++ b/parse.c
@@ -15,6 +15,7 @@
#include "ast.h"
#include "builtins/integers.h"
+#include "builtins/text.h"
#include "builtins/table.h"
#include "builtins/util.h"
@@ -1894,7 +1895,7 @@ ast_t *parse_enum_def(parse_ctx_t *ctx, const char *pos) {
spaces(&pos);
if (match(&pos, "=")) {
ast_t *val = expect(ctx, tag_start, &pos, parse_int, "I expected an integer literal after this '='");
- Int_t i = Int$from_text(Match(val, Int)->str, NULL);
+ Int_t i = Int$from_text(Text$from_str(Match(val, Int)->str), NULL);
// TODO check for overflow
next_value = (i.small >> 2);
}
diff --git a/repl.c b/repl.c
index a2b77a81..85c1ec89 100644
--- a/repl.c
+++ b/repl.c
@@ -208,7 +208,7 @@ static double ast_to_num(env_t *env, ast_t *ast)
}
}
-static CORD obj_to_text(type_t *t, const void *obj, bool use_color)
+static Text_t obj_to_text(type_t *t, const void *obj, bool use_color)
{
const TypeInfo *info = type_to_type_info(t);
return generic_as_text(obj, use_color, info);
@@ -272,8 +272,8 @@ void run(env_t *env, ast_t *ast)
} else {
void *value = GC_MALLOC(size);
eval(env, doctest->expr, value);
- CORD c = obj_to_text(t, value, true);
- printf("= %s \x1b[2m: %T\x1b[m\n", CORD_to_const_char_star(c), t);
+ Text_t text = obj_to_text(t, value, true);
+ printf("= %k \x1b[2m: %T\x1b[m\n", &text, t);
fflush(stdout);
}
break;
@@ -353,11 +353,11 @@ void eval(env_t *env, ast_t *ast, void *dest)
case Int: {
if (!dest) return;
switch (Match(ast, Int)->bits) {
- case 0: *(Int_t*)dest = Int$from_text(Match(ast, Int)->str, NULL); break;
- case 64: *(int64_t*)dest = Int64$from_text(Match(ast, Int)->str, NULL); break;
- case 32: *(int32_t*)dest = Int32$from_text(Match(ast, Int)->str, NULL); break;
- case 16: *(int16_t*)dest = Int16$from_text(Match(ast, Int)->str, NULL); break;
- case 8: *(int8_t*)dest = Int8$from_text(Match(ast, Int)->str, NULL); break;
+ case 0: *(Int_t*)dest = Int$from_text(Text$from_str(Match(ast, Int)->str), NULL); break;
+ case 64: *(int64_t*)dest = Int64$from_text(Text$from_str(Match(ast, Int)->str), NULL); break;
+ case 32: *(int32_t*)dest = Int32$from_text(Text$from_str(Match(ast, Int)->str), NULL); break;
+ case 16: *(int16_t*)dest = Int16$from_text(Text$from_str(Match(ast, Int)->str), NULL); break;
+ case 8: *(int8_t*)dest = Int8$from_text(Text$from_str(Match(ast, Int)->str), NULL); break;
default: errx(1, "Invalid int bits: %ld", Match(ast, Int)->bits);
}
break;
@@ -386,7 +386,7 @@ void eval(env_t *env, ast_t *ast, void *dest)
size_t chunk_size = type_size(chunk_t);
char buf[chunk_size];
eval(env, chunk->ast, buf);
- ret = CORD_cat(ret, obj_to_text(chunk_t, buf, false));
+ ret = CORD_cat(ret, Text$as_c_string(obj_to_text(chunk_t, buf, false)));
}
}
if (dest) *(CORD*)dest = ret;
diff --git a/structs.c b/structs.c
index 8099012e..66ca3ff1 100644
--- a/structs.c
+++ b/structs.c
@@ -166,7 +166,7 @@ void compile_struct_def(env_t *env, ast_t *ast)
} else {
// If there are no fields, we can use an EmptyStruct typeinfo, which generates less code:
CORD typeinfo = CORD_asprintf("public const TypeInfo %r = {%zu, %zu, {.tag=EmptyStruct, .EmptyStruct.name=%r}};\n",
- full_name, type_size(t), type_align(t), Text$quoted(def->name, false));
+ full_name, type_size(t), type_align(t), Text$quoted(Text$from_str(def->name), false));
env->code->typeinfos = CORD_all(env->code->typeinfos, typeinfo);
}
diff --git a/tomo.c b/tomo.c
index 8b69f1f4..01bdf5ae 100644
--- a/tomo.c
+++ b/tomo.c
@@ -84,6 +84,8 @@ int main(int argc, char *argv[])
errx(1, "Couldn't set printf specifier");
if (register_printf_specifier('W', printf_ast, printf_pointer_size))
errx(1, "Couldn't set printf specifier");
+ if (register_printf_specifier('k', printf_text, printf_text_size))
+ errx(1, "Couldn't set printf specifier");
setenv("TOMO_IMPORT_PATH", "~/.local/src/tomo:.", 0);
setenv("TOMO_LIB_PATH", "~/.local/lib/tomo:.", 0);
diff --git a/typecheck.c b/typecheck.c
index bb40666f..4d4c080e 100644
--- a/typecheck.c
+++ b/typecheck.c
@@ -9,11 +9,12 @@
#include <sys/stat.h>
#include "ast.h"
+#include "builtins/text.h"
+#include "builtins/util.h"
#include "environment.h"
#include "parse.h"
#include "typecheck.h"
#include "types.h"
-#include "builtins/util.h"
type_t *parse_type_ast(env_t *env, type_ast_t *ast)
{
@@ -1367,7 +1368,7 @@ bool is_constant(env_t *env, ast_t *ast)
case Int: {
auto info = Match(ast, Int);
if (info->bits == IBITS_UNSPECIFIED) {
- Int_t int_val = Int$from_text(info->str, NULL);
+ Int_t int_val = Int$from_text(Text$from_str(info->str), NULL);
mpz_t i;
mpz_init_set_int(i, int_val);
return (mpz_cmpabs_ui(i, BIGGEST_SMALL_INT) <= 0);