aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2024-02-16 13:29:02 -0500
committerBruce Hill <bruce@bruce-hill.com>2024-02-16 13:29:02 -0500
commitf156f915f480b46f112e4137f42d55b88c297436 (patch)
treecb9d30e28ea35e0a7dfe60f51ba019da2f27bc32
parent317b8f53193e5ec14c92e229c89bc84ca32b7810 (diff)
Moving builtin strings over to cords
-rw-r--r--builtins/functions.h7
-rw-r--r--builtins/string.c652
-rw-r--r--builtins/string.h11
-rw-r--r--compile.c2
-rw-r--r--metamethods/cord.c208
5 files changed, 308 insertions, 572 deletions
diff --git a/builtins/functions.h b/builtins/functions.h
index c79f7fba..5d49da73 100644
--- a/builtins/functions.h
+++ b/builtins/functions.h
@@ -6,10 +6,9 @@
#include "string.h"
-void builtin_say(Str_t str, Str_t end);
-void builtin_fail(const char *fmt, ...);
-void builtin_fail_array(Str_t fmt, ...);
-Str_t builtin_last_err();
+void builtin_say(CORD str, CORD end);
+void builtin_fail(CORD fmt, ...);
+CORD builtin_last_err();
void builtin_doctest(const char *label, CORD expr, const char *type, bool use_color, const char *expected, const char *filename, int start, int end);
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/string.c b/builtins/string.c
index 92741903..ce641762 100644
--- a/builtins/string.c
+++ b/builtins/string.c
@@ -1,13 +1,16 @@
#include <assert.h>
+#include <ctype.h>
+#include <err.h>
#include <gc.h>
#include <gc/cord.h>
+#include <limits.h>
#include <stdalign.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
-#include <ctype.h>
#include <sys/param.h>
-#include <err.h>
+#include <unistr.h>
+#include <unicase.h>
#include "../SipHash/halfsiphash.h"
#include "types.h"
@@ -19,479 +22,271 @@
extern const void *SSS_HASH_VECTOR;
typedef struct {
- uint8_t success;
+ enum { FIND_FAILURE, FIND_SUCCESS } status;
int32_t index;
} find_result_t;
-static Str_t compacted(const Str_t str)
-{
- if (str.stride == 1) return str;
- char *buf = GC_MALLOC_ATOMIC(str.length + 1);
- for (int64_t i = 0; i < str.length; i++)
- buf[i] = str.data[i*str.stride];
- return (Str_t){.data=buf, .length=str.length, .stride=1};
-}
-
-public CORD Str__cord(const Str_t *s, bool colorize, const TypeInfo *type)
+public CORD Str__quoted(CORD str, bool colorize)
{
- (void)type;
- char *data = s->data;
// Note: it's important to have unicode strings not get broken up with
// escapes, otherwise they won't print right.
if (colorize) {
- CORD cord = "\x1b[35m\"";
- for (int32_t i = 0; i < s->length; i++) {
- char c = data[i*s->stride];
+ CORD quoted = "\x1b[35m\"";
+ CORD_pos i;
+ CORD_FOR(i, str) {
+ char c = CORD_pos_fetch(i);
switch (c) {
#define BACKSLASHED(esc) "\x1b[34m\\\x1b[1m" esc "\x1b[0;35m"
- case '\a': cord = CORD_cat(cord, BACKSLASHED("a")); break;
- case '\b': cord = CORD_cat(cord, BACKSLASHED("b")); break;
- case '\x1b': cord = CORD_cat(cord, BACKSLASHED("e")); break;
- case '\f': cord = CORD_cat(cord, BACKSLASHED("f")); break;
- case '\n': cord = CORD_cat(cord, BACKSLASHED("n")); break;
- case '\r': cord = CORD_cat(cord, BACKSLASHED("r")); break;
- case '\t': cord = CORD_cat(cord, BACKSLASHED("t")); break;
- case '\v': cord = CORD_cat(cord, BACKSLASHED("v")); break;
- case '"': cord = CORD_cat(cord, BACKSLASHED("\"")); break;
- case '\\': cord = CORD_cat(cord, BACKSLASHED("\\")); break;
+ case '\a': quoted = CORD_cat(quoted, BACKSLASHED("a")); break;
+ case '\b': quoted = CORD_cat(quoted, BACKSLASHED("b")); break;
+ case '\x1b': quoted = CORD_cat(quoted, BACKSLASHED("e")); break;
+ case '\f': quoted = CORD_cat(quoted, BACKSLASHED("f")); break;
+ case '\n': quoted = CORD_cat(quoted, BACKSLASHED("n")); break;
+ case '\r': quoted = CORD_cat(quoted, BACKSLASHED("r")); break;
+ case '\t': quoted = CORD_cat(quoted, BACKSLASHED("t")); break;
+ case '\v': quoted = CORD_cat(quoted, BACKSLASHED("v")); break;
+ case '"': quoted = CORD_cat(quoted, BACKSLASHED("\"")); break;
+ case '\\': quoted = CORD_cat(quoted, BACKSLASHED("\\")); break;
case '\x00' ... '\x06': case '\x0E' ... '\x1A':
case '\x1C' ... '\x1F': case '\x7F' ... '\x7F':
- CORD_sprintf(&cord, "%r" BACKSLASHED("x%02X"), cord, c);
+ CORD_sprintf(&quoted, "%r" BACKSLASHED("x%02X"), quoted, c);
break;
- default: cord = CORD_cat_char(cord, c); break;
+ default: quoted = CORD_cat_char(quoted, c); break;
#undef BACKSLASHED
}
}
- cord = CORD_cat(cord, "\"\x1b[m");
- if (strcmp(type->name, "Str") != 0)
- CORD_sprintf(&cord, "\x1b[0;1m%s::\x1b[m%r", type->name, cord);
- return cord;
+ quoted = CORD_cat(quoted, "\"\x1b[m");
+ return quoted;
} else {
- CORD cord = "\"";
- for (int32_t i = 0; i < s->length; i++) {
- char c = data[i*s->stride];
+ CORD quoted = "\"";
+ CORD_pos i;
+ CORD_FOR(i, str) {
+ char c = CORD_pos_fetch(i);
switch (c) {
- case '\a': cord = CORD_cat(cord, "\\a"); break;
- case '\b': cord = CORD_cat(cord, "\\b"); break;
- case '\x1b': cord = CORD_cat(cord, "\\e"); break;
- case '\f': cord = CORD_cat(cord, "\\f"); break;
- case '\n': cord = CORD_cat(cord, "\\n"); break;
- case '\r': cord = CORD_cat(cord, "\\r"); break;
- case '\t': cord = CORD_cat(cord, "\\t"); break;
- case '\v': cord = CORD_cat(cord, "\\v"); break;
- case '"': cord = CORD_cat(cord, "\\\""); break;
- case '\\': cord = CORD_cat(cord, "\\\\"); break;
+ case '\a': quoted = CORD_cat(quoted, "\\a"); break;
+ case '\b': quoted = CORD_cat(quoted, "\\b"); break;
+ case '\x1b': quoted = CORD_cat(quoted, "\\e"); break;
+ case '\f': quoted = CORD_cat(quoted, "\\f"); break;
+ case '\n': quoted = CORD_cat(quoted, "\\n"); break;
+ case '\r': quoted = CORD_cat(quoted, "\\r"); break;
+ case '\t': quoted = CORD_cat(quoted, "\\t"); break;
+ case '\v': quoted = CORD_cat(quoted, "\\v"); break;
+ case '"': quoted = CORD_cat(quoted, "\\\""); break;
+ case '\\': quoted = CORD_cat(quoted, "\\\\"); break;
case '\x00' ... '\x06': case '\x0E' ... '\x1A':
case '\x1C' ... '\x1F': case '\x7F' ... '\x7F':
- CORD_sprintf(&cord, "%r\\x%02X", cord, c);
+ CORD_sprintf(&quoted, "%r\\x%02X", quoted, c);
break;
- default: cord = CORD_cat_char(cord, c); break;
+ default: quoted = CORD_cat_char(quoted, c); break;
}
}
- cord = CORD_cat_char(cord, '"');
- if (strcmp(type->name, "Str") != 0)
- CORD_sprintf(&cord, "%s::%r", type->name, cord);
- return cord;
+ quoted = CORD_cat_char(quoted, '"');
+ return quoted;
}
}
-public int32_t Str__compare(const Str_t *x, const Str_t *y)
+public int Str__compare(CORD *x, CORD *y)
{
- int64_t length = x->length < y->length ? x->length : y->length;
- for (int64_t i = 0; i < length; i++) {
- char xc = x->data[i*x->stride], yc = y->data[i*y->stride];
- if (xc != yc)
- return (xc > yc) ? 1 : -1;
- }
- return (x->length > y->length) - (x->length < y->length);
+ return CORD_cmp(*x, *y);
}
-public bool Str__equal(const Str_t *x, const Str_t *y)
+public bool Str__equal(CORD *x, CORD *y)
{
- return (Str__compare(x, y) == 0);
+ return CORD_cmp(*x, *y) == 0;
}
-public int Str__hash(const Str_t *s, const TypeInfo *type)
+public uint32_t Str__hash(CORD *cord)
{
- (void)type;
- if (s->length == 0 || !s->data) return 0;
+ if (!*cord) return 0;
- const char *data;
- if (s->stride == 1) {
- data = s->data;
- } else {
- char *buf = alloca(s->length);
- for (int64_t i = 0; i < s->length; i++)
- buf[i] = s->data[i*s->stride];
- data = buf;
- }
+ const char *str = CORD_to_const_char_star(*cord);
+ *cord = str;
uint32_t hash;
- halfsiphash(data, s->length, SSS_HASH_VECTOR, (uint8_t*)&hash, sizeof(hash));
+ halfsiphash(str, strlen(str)+1, SSS_HASH_VECTOR, (uint8_t*)&hash, sizeof(hash));
return hash;
}
-public Str_t Str__uppercased(const Str_t s)
-{
- char *s2 = GC_MALLOC_ATOMIC(s.length + 1);
- for (int64_t i = 0; i < s.length; i++)
- s2[i] = toupper(s.data[i*s.stride]);
- return (Str_t){.data=s2, .length=s.length, .stride=1};
-}
-
-public Str_t Str__lowercased(const Str_t s)
+public CORD Str__uppercased(CORD str)
{
- char *s2 = GC_MALLOC_ATOMIC(s.length + 1);
- for (int64_t i = 0; i < s.length; i++)
- s2[i] = tolower(s.data[i*s.stride]);
- return (Str_t){.data=s2, .length=s.length, .stride=1};
+ if (!str) return str;
+ size_t len = strlen(str) + 1;
+ uint8_t *dest = GC_MALLOC_ATOMIC(len);
+ return (CORD)u8_toupper((const uint8_t*)str, len-1, uc_locale_language(), NULL, dest, &len);
}
-public Str_t Str__capitalized(const Str_t s)
+public CORD Str__lowercased(CORD str)
{
- char *s2 = GC_MALLOC_ATOMIC(s.length + 1);
- int64_t i;
- for (i = 0; i < s.length; i++) {
- if (isalpha(s.data[i*s.stride])) {
- s2[i] = toupper(s.data[i*s.stride]);
- ++i;
- break;
- } else {
- s2[i] = s.data[i*s.stride];
- }
- }
- for (; i < s.length; i++)
- s2[i] = s.data[i*s.stride];
- return (Str_t){.data=s2, .length=s.length, .stride=1};
+ if (!str) return str;
+ size_t len = strlen(str) + 1;
+ uint8_t *dest = GC_MALLOC_ATOMIC(len);
+ return (CORD)u8_tolower((const uint8_t*)str, len-1, uc_locale_language(), NULL, dest, &len);
}
-public Str_t Str__titlecased(const Str_t s)
+public CORD Str__titlecased(CORD str)
{
- char *s2 = GC_MALLOC_ATOMIC(s.length + 1);
- bool should_uppercase = true;
- for (int64_t i = 0; i < s.length; i++) {
- if (isalpha(s.data[i*s.stride])) {
- if (should_uppercase) {
- s2[i] = toupper(s.data[i*s.stride]);
- should_uppercase = false;
- } else {
- s2[i] = tolower(s.data[i*s.stride]);
- }
- } else {
- should_uppercase = true;
- s2[i] = s.data[i*s.stride];
- }
- }
- return (Str_t){.data=s2, .length=s.length, .stride=1};
-}
-
-public bool Str__starts_with(const Str_t s, const Str_t prefix)
-{
- if (s.length < prefix.length) return false;
- for (int64_t i = 0; i < prefix.length; i++) {
- if (s.data[i*s.stride] != prefix.data[i*prefix.stride])
- return false;
- }
- return true;
+ if (!str) return str;
+ size_t len = strlen(str) + 1;
+ uint8_t *dest = GC_MALLOC_ATOMIC(len);
+ return (CORD)u8_totitle((const uint8_t*)str, len-1, uc_locale_language(), NULL, dest, &len);
}
-public bool Str__ends_with(const Str_t s, const Str_t suffix)
-{
- if (s.length < suffix.length) return false;
- for (int64_t i = 0; i < suffix.length; i++) {
- if (s.data[(s.length-suffix.length+i)*s.stride] != suffix.data[i*suffix.stride])
- return false;
- }
- return true;
-}
+typedef enum { WHERE_ANYWHERE, WHERE_START, WHERE_END } where_e;
-public Str_t Str__without_prefix(const Str_t s, const Str_t prefix)
+public bool Str__has(CORD str, CORD target, where_e where)
{
- if (s.length < prefix.length) return s;
- for (int64_t i = 0; i < prefix.length; i++) {
- if (s.data[i*s.stride] != prefix.data[i*prefix.stride])
- return s;
+ if (!target) return true;
+ if (!str) return false;
+
+ if (where == WHERE_START) {
+ return (CORD_ncmp(str, 0, target, 0, CORD_len(target)) == 0);
+ } else if (where == WHERE_END) {
+ size_t str_len = CORD_len(str);
+ size_t target_len = CORD_len(target);
+ return (str_len >= target_len && CORD_ncmp(str, str_len-target_len, target, 0, target_len) == 0);
+ } else {
+ size_t pos = CORD_str(str, 0, target);
+ return (pos != CORD_NOT_FOUND);
}
- return (Str_t){
- .data=s.data + prefix.length*s.stride,
- .length=s.length - prefix.length,
- .stride=s.stride,
- .cow=0, .atomic=1,
- };
}
-public Str_t Str__without_suffix(const Str_t s, const Str_t suffix)
+public CORD Str__without(CORD str, CORD target, where_e where)
{
- if (s.length < suffix.length) return s;
- for (int64_t i = 0; i < suffix.length; i++) {
- if (s.data[(s.length - suffix.length + i)*s.stride] != suffix.data[i*suffix.stride])
- return s;
+ if (!str || !target) return str;
+
+ size_t target_len = CORD_len(target);
+ if (where == WHERE_START) {
+ if (CORD_ncmp(str, 0, target, 0, target_len) == 0)
+ return CORD_substr(str, target_len, SIZE_MAX);
+ return str;
+ } else if (where == WHERE_END) {
+ size_t str_len = CORD_len(str);
+ if (CORD_ncmp(str, str_len-target_len, target, 0, target_len) == 0)
+ return CORD_substr(str, 0, str_len - target_len);
+ return str;
+ } else {
+ errx(1, "Not implemented");
}
- return (Str_t){
- .data=s.data,
- .length=s.length - suffix.length,
- .stride=s.stride,
- .cow=0, .atomic=1,
- };
}
-public Str_t Str__trimmed(const Str_t s, const Str_t trim_chars, bool trim_left, bool trim_right)
+public CORD Str__trimmed(CORD str, CORD skip, where_e where)
{
- int64_t length = s.length;
- int64_t start = 0;
- if (trim_left) {
- for (; start < s.length; start++) {
- for (int64_t t = 0; t < trim_chars.length; t++) {
- if (s.data[start*s.stride] == trim_chars.data[t*trim_chars.stride])
- goto found_ltrim;
- }
- goto done_trimming_left;
- found_ltrim:
- --length;
- }
- }
- done_trimming_left:;
- if (trim_right) {
- while (length > 0) {
- for (int64_t t = 0; t < trim_chars.length; t++) {
- if (s.data[(start+length-1)*s.stride] == trim_chars.data[t*trim_chars.stride])
- goto found_rtrim;
- }
- goto done_trimming_right;
- found_rtrim:
- --length;
- }
+ if (!str || !skip) return str;
+ const uint8_t *ustr = (const uint8_t*)CORD_to_const_char_star(str);
+ const uint8_t *uskip = (const uint8_t*)CORD_to_const_char_star(skip);
+ if (where == WHERE_START) {
+ size_t span = u8_strspn(ustr, uskip);
+ return (CORD)ustr + span;
+ } else if (where == WHERE_END) {
+ size_t len = u8_strlen(ustr);
+ const uint8_t *back = ustr + len;
+ size_t back_span = 0;
+ while (back - back_span > ustr && u8_strspn(back-back_span-1, uskip) > back_span)
+ ++back_span;
+ return CORD_substr((CORD)ustr, 0, len - back_span);
+ } else {
+ size_t span = u8_strspn(ustr, uskip);
+ size_t len = u8_strlen(ustr);
+ const uint8_t *back = ustr + len;
+ size_t back_span = 0;
+ while (back - back_span > ustr + span && u8_strspn(back-back_span-1, uskip) > back_span)
+ ++back_span;
+ return CORD_substr((CORD)(ustr + span), 0, len - span - back_span);
}
- done_trimming_right:;
- return (Str_t){.data=s.data+start*s.stride, .length=length, .stride=s.stride};
-}
-
-public const char *Str__c_string(const Str_t str)
-{
- if (str.length == 0)
- return "";
- else if (str.stride == 1
- // Verify that the '\0' would be on the same page, so unlikely to segfault:
- && (((uint64_t)str.data + str.length + 1) & 0xFFF) != 0
- // Check for nul-termination
- && str.data[str.length+1] == '\0')
- return str.data;
-
- char *buf = GC_MALLOC_ATOMIC(str.length + 1);
- for (int64_t i = 0; i < str.length; i++)
- buf[i] = str.data[i*str.stride];
- buf[str.length] = '\0';
- return buf;
}
-public Str_t Str__from_c_string(const char *str)
+public CORD Str__slice(CORD str, int64_t first, int64_t stride, int64_t length)
{
- size_t length = str ? strlen(str) : 0;
- if (length == 0) return (Str_t){.length=0, .stride=0};
- char *buf = GC_MALLOC_ATOMIC(length + 1);
- memcpy(buf, str, length+1);
- buf[length+1] = '\0';
- return (Str_t){.data=buf, .length=(int64_t)length, .stride=1};
+ if (stride != 1) errx(1, "Slicing with non-1 stride is not supported");
+ return CORD_substr(str, first-1, length);
}
-public find_result_t Str__find(const Str_t str, const Str_t pat)
+public find_result_t Str__find(CORD str, CORD pat)
{
- if (str.length < pat.length) return (find_result_t){.success=0};
- if (pat.length == 0) return (find_result_t){.success=1, .index=1};
-
- // For short strings, do naive approach:
- // if (str.length*pat.length < UCHAR_MAX) {
- for (int64_t s = 0; s < str.length; s++) {
- for (int64_t p = 0; p < pat.length; p++) {
- if (str.data[s*str.stride] != pat.data[p*pat.stride])
- goto not_a_match;
- }
- return (find_result_t){.success=1, .index=s+1};
- not_a_match:;
- }
- return (find_result_t){.success=0};
- // }
-
- // // Boyer-Moore algorithm:
- // static int skip[UCHAR_MAX];
- // for (int32_t i = 0; i <= UCHAR_MAX; ++i)
- // skip[i] = str.length;
- // for (int32_t i = 0; i < pat.length; ++i)
- // skip[(unsigned char)pat.data[i*pat.stride]] = pat.length - i - 1;
- // char lastpatchar = pat.data[(pat.length - 1)*pat.stride];
- // int32_t min_skip = pat.length;
- // for (int32_t p = 0; p < pat.length - 1; ++p) {
- // if (pat.data[p*pat.stride] == lastpatchar)
- // min_skip = pat.length - p - 1;
- // }
-
- // for (int32_t i = pat.length - 1; i < str.length; ) {
- // // Use skip table:
- // int32_t can_skip = skip[(unsigned char)str.data[i*str.stride]];
- // if (can_skip != 0) {
- // i += can_skip;
- // continue;
- // }
- // // Check for exact match:
- // for (int32_t j = 0; j < pat.length; j++) {
- // if (str.data[(i-pat.length+j)*str.stride] != pat.data[j*pat.stride]) {
- // // Mismatch:
- // i += min_skip;
- // goto keep_going;
- // }
- // }
- // return i - pat.length + 1;
- // keep_going: continue;
- // }
- // return 0;
+ if (!pat) return (find_result_t){.status=FIND_SUCCESS, .index=1};
+ size_t pos = CORD_str(str, 0, pat);
+ return (pos == CORD_NOT_FOUND) ? (find_result_t){.status=FIND_FAILURE} : (find_result_t){.status=FIND_SUCCESS, .index=(int32_t)pos};
}
-public Str_t Str__replace(Str_t text, Str_t pat, Str_t replacement, int64_t limit)
+public CORD Str__replace(CORD text, CORD pat, CORD replacement, int64_t limit)
{
- text = compacted(text);
- pat = compacted(pat);
- replacement = compacted(replacement);
- char *buf;
- size_t size;
- FILE *mem = open_memstream(&buf, &size);
- for (const char *pos = text.data; ; --limit) {
- const char *match = limit == 0 ? NULL : memmem(pos, (int64_t)text.length - (int64_t)(pos - text.data), pat.data, pat.length);
- if (match) {
- fwrite(pos, 1, (size_t)(match - pos), mem);
- fwrite(replacement.data, 1, replacement.length, mem);
- pos = match + pat.length;
- } else {
- fwrite(pos, 1, (size_t)text.length - (size_t)(pos - text.data), mem);
- break;
- }
+ if (!text || !pat) return text;
+ CORD ret = NULL;
+ size_t pos = 0, pat_len = CORD_len(pat);
+ for (size_t found; limit > 0 && (found=CORD_str(text, pos, pat)) != CORD_NOT_FOUND; --limit) {
+ ret = CORD_cat(ret, CORD_substr(text, pos, found));
+ ret = CORD_cat(ret, replacement);
+ pos = found + pat_len;
}
- fflush(mem);
- char *str = GC_MALLOC_ATOMIC(size + 1);
- memcpy(str, buf, size+1);
- fclose(mem);
- free(buf);
- return (Str_t){.data=str, .length=size, .stride=1};
+ return CORD_cat(ret, CORD_substr(text, pos, SIZE_MAX));
}
-public Str_t Str__quoted(const Str_t text, const char *dsl, bool colorize)
+public Str_Array_t Str__split(CORD str, CORD split)
{
- char *buf;
- size_t size;
- FILE *mem = open_memstream(&buf, &size);
- if (colorize) fputs("\x1b[35m", mem);
- if (dsl && dsl[0]) fprintf(mem, "$%s", dsl);
- const char *escape_color = colorize ? "\x1b[1;34m" : "";
- const char *reset_color = colorize ? "\x1b[0;35m" : "";
- fputc('"', mem);
- for (int64_t i = 0; i < text.length; i++) {
- char c = text.data[i*text.stride];
- switch (c) {
- case '\\': fprintf(mem, "%s\\\\%s", escape_color, reset_color); break;
- case '"': fprintf(mem, "%s\\\"%s", escape_color, reset_color); break;
- case '\n': fprintf(mem, "%s\\n%s", escape_color, reset_color); break;
- case '\t': fprintf(mem, "%s\\t%s", escape_color, reset_color); break;
- case '\r': fprintf(mem, "%s\\r%s", escape_color, reset_color); break;
- case '\a': fprintf(mem, "%s\\a%s", escape_color, reset_color); break;
- case '\b': fprintf(mem, "%s\\b%s", escape_color, reset_color); break;
- case '\v': fprintf(mem, "%s\\v%s", escape_color, reset_color); break;
- default: {
- if (isprint(c))
- fputc(c, mem);
- else
- fprintf(mem, "%s\\x%02X%s", escape_color, (int)c, reset_color);
- }
- }
- }
- fputc('"', mem);
- if (colorize) fputs("\x1b[m", mem);
- fflush(mem);
- char *str = GC_MALLOC_ATOMIC(size + 1);
- memcpy(str, buf, size+1);
- fclose(mem);
- free(buf);
- return (Str_t){.data=str, .length=size, .stride=1};
-}
-
-public Str_Array_t Str__split(const Str_t str, const Str_t split_chars)
-{
- if (str.length == 0) return (Str_Array_t){.stride=sizeof(Str_t)};
- Str_Array_t strings = {.stride=sizeof(Str_t)};
+ if (!str) return (Str_Array_t){.stride=sizeof(CORD)};
+ Str_Array_t strings = {.stride=sizeof(CORD)};
int64_t capacity = 0;
- bool separators[256] = {0};
- for (int64_t i = 0; i < split_chars.length; i++)
- separators[(int)split_chars.data[split_chars.stride*i]] = true;
- for (int64_t i = 0; i < str.length; i++) {
- if (separators[(int)str.data[str.stride*i]]) continue;
- int64_t length = 0;
- while (i < str.length && !separators[(int)str.data[str.stride*i]]) {
- ++length;
- ++i;
+ const uint8_t *ustr = (uint8_t*)CORD_to_const_char_star(str);
+ const uint8_t *usplit = (uint8_t*)CORD_to_const_char_star(split);
+ for (int64_t i = 0; ; ) {
+ i += u8_strcspn(ustr + i, usplit);
+ size_t span = u8_strspn(ustr + i, usplit);
+ if (span > 0) {
+ CORD chunk = CORD_substr((CORD)ustr, i, i+span);
+ if (capacity <= 0)
+ strings.data = GC_REALLOC(strings.data, sizeof(CORD)*(capacity += 10));
+ strings.data[strings.length++] = chunk;
+ i += span;
+ } else {
+ break;
}
- strings.data = GC_REALLOC(strings.data, sizeof(Str_t)*(capacity += 1));
- strings.data[strings.length++] = (Str_t){
- .data=&str.data[str.stride*(i-length)],
- .length=length,
- .stride=str.stride,
- };
}
return strings;
}
-public Str_t Str__join(Str_t glue, Str_Array_t pieces)
+public CORD Str__join(CORD glue, Str_Array_t pieces)
{
- if (pieces.length == 0) return (Str_t){.stride=1};
+ if (pieces.length == 0) return CORD_EMPTY;
- int64_t length = 0;
+ CORD ret = CORD_EMPTY;
for (int64_t i = 0; i < pieces.length; i++) {
- if (i > 0) length += glue.length;
- length += ((Str_t*)((void*)pieces.data + i*pieces.stride))->length;
+ if (i > 0) ret = CORD_cat(ret, glue);
+ ret = CORD_cat(ret, *(CORD*)((void*)pieces.data + i*pieces.stride));
}
- char *data = GC_MALLOC_ATOMIC((size_t)length+1);
- char *ptr = data;
- for (int64_t i = 0; i < pieces.length; i++) {
- if (i > 0) {
- for (int64_t j = 0; j < glue.length; j++)
- *(ptr++) = glue.data[j*glue.stride];
- }
- Str_t piece = *(Str_t*)((void*)pieces.data + i*pieces.stride);
- for (int64_t j = 0; j < piece.length; j++)
- *(ptr++) = piece.data[j*piece.stride];
- }
- return (Str_t){.data=data, .length=length, .stride=1};
+ return ret;
}
public struct {
TypeInfo type;
- Str_t (*uppercased)(const Str_t s);
- Str_t (*lowercased)(const Str_t s);
- Str_t (*capitalized)(const Str_t s);
- Str_t (*titlecased)(const Str_t s);
- bool (*starts_with)(const Str_t s, const Str_t prefix);
- bool (*ends_with)(const Str_t s, const Str_t suffix);
- Str_t (*without_prefix)(const Str_t s, const Str_t prefix);
- Str_t (*without_suffix)(const Str_t s, const Str_t suffix);
- Str_t (*trimmed)(const Str_t s, const Str_t trim_chars, bool trim_left, bool trim_right);
- Str_t (*slice)(Str_t *s, int64_t first, int64_t stride, int64_t length, bool readonly, const TypeInfo *type);
- const char *(*c_string)(const Str_t str);
- Str_t (*from_c_string)(const char *str);
- find_result_t (*find)(const Str_t str, const Str_t pat);
- Str_t (*replace)(Str_t text, Str_t pat, Str_t replacement, int64_t limit);
- Str_t (*quoted)(const Str_t text, const char *dsl, bool colorize);
- Str_Array_t (*split)(const Str_t str, const Str_t split_chars);
- Str_t (*join)(Str_t glue, Str_Array_t pieces);
- bool (*equal)(const Str_t *x, const Str_t *y);
- int32_t (*compare)(const Str_t *x, const Str_t *y);
- int (*hash)(const Str_t *s, const TypeInfo *type);
- CORD (*cord)(const Str_t *s, bool colorize, const TypeInfo *type);
+ CORD (*uppercased)(CORD s);
+ CORD (*lowercased)(CORD s);
+ CORD (*titlecased)(CORD s);
+ bool (*has)(CORD s, CORD prefix, where_e where);
+ bool (*ends_with)(CORD s, CORD suffix);
+ CORD (*without)(CORD s, CORD target, where_e where);
+ CORD (*without_suffix)(CORD s, CORD suffix);
+ CORD (*trimmed)(CORD s, CORD trim_chars, where_e where);
+ CORD (*slice)(CORD s, int64_t first, int64_t stride, int64_t length);
+ char *(*c_string)(CORD str);
+ CORD (*from_c_string)(char *str);
+ find_result_t (*find)(CORD str, CORD pat);
+ CORD (*replace)(CORD text, CORD pat, CORD replacement, int64_t limit);
+ CORD (*quoted)(CORD text, bool colorize);
+ Str_Array_t (*split)(CORD str, CORD split_chars);
+ CORD (*join)(CORD glue, Str_Array_t pieces);
+ bool (*equal)(CORD *x, CORD *y);
+ int32_t (*compare)(CORD *x, CORD *y);
+ int (*hash)(CORD *s, TypeInfo *type);
+ CORD (*cord)(CORD *s, bool colorize, TypeInfo *type);
} Str_type = {
.type={
.name="Str",
- .size=sizeof(Str_t),
- .align=alignof(Str_t),
+ .size=sizeof(CORD),
+ .align=alignof(CORD),
.tag=CustomInfo,
.CustomInfo={
- .cord=(void*)Str__cord,
+ .cord=(void*)Str__quoted,
.compare=(void*)Str__compare,
.equal=(void*)Str__equal,
.hash=(void*)Str__hash,
@@ -499,16 +294,11 @@ public struct {
},
.uppercased=Str__uppercased,
.lowercased=Str__lowercased,
- .capitalized=Str__capitalized,
.titlecased=Str__titlecased,
- .starts_with=Str__starts_with,
- .ends_with=Str__ends_with,
- .without_prefix=Str__without_prefix,
- .without_suffix=Str__without_suffix,
+ .has=Str__has,
+ .without=Str__without,
.trimmed=Str__trimmed,
- .slice=(void*)Array_slice,
- .c_string=Str__c_string,
- .from_c_string=Str__from_c_string,
+ .slice=Str__slice,
.find=Str__find,
.replace=Str__replace,
.quoted=Str__quoted,
@@ -516,82 +306,4 @@ public struct {
.join=Str__join,
};
-public CORD CString_cord(const char **s, bool colorize, const TypeInfo *type)
-{
- Str_t str = {.data=(char*)*s, .length=*s ? strlen(*s) : 0, .stride=1};
- return Str__cord(&str, colorize, type);
-}
-
-public uint32_t CString_hash(const char **s, const TypeInfo *type)
-{
- (void)type;
- if (!*s) return 0;
- uint32_t hash;
- halfsiphash(*s, strlen(*s)+1, SSS_HASH_VECTOR, (uint8_t*)&hash, sizeof(hash));
- assert(strlen(*s) > 0);
- return hash;
-}
-
-public uint32_t CString_compare(const char **x, const char **y, const TypeInfo *type)
-{
- (void)type;
- if (!*x || !*y)
- return (!*x) - (!*y);
- return strcmp(*x, *y);
-}
-
-public struct {
- TypeInfo type;
- const char *(*from_str)(const Str_t str);
- Str_t (*as_str)(const char *str);
-} CString_type = {
- .type={
- .name="CString",
- .size=sizeof(char*),
- .align=alignof(char*),
- .tag=CustomInfo,
- .CustomInfo={
- .cord=(void*)CString_cord,
- .hash=(void*)CString_hash,
- .compare=(void*)CString_compare,
- },
- },
- .from_str=Str__c_string,
- .as_str=Str__from_c_string,
-};
-
-static CORD Cord_cord(const CORD *c, bool colorize, const TypeInfo *type)
-{
- const char *str = CORD_to_const_char_star(*c);
- return CString_cord(&str, colorize, type);
-}
-
-static uint32_t Cord_hash(const CORD *c, const TypeInfo *type)
-{
- const char *str = CORD_to_const_char_star(*c);
- return CString_hash(&str, type);
-}
-
-static uint32_t Cord_compare(const char **x, const char **y, const TypeInfo *type)
-{
- (void)type;
- return CORD_cmp(*x, *y);
-}
-
-public struct {
- TypeInfo type;
-} Cord_type = {
- .type={
- .name="Cord",
- .size=sizeof(CORD),
- .align=alignof(CORD),
- .tag=CustomInfo,
- .CustomInfo={
- .cord=(void*)Cord_cord,
- .hash=(void*)Cord_hash,
- .compare=(void*)Cord_compare,
- },
- },
-};
-
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/string.h b/builtins/string.h
index 1d6a91a4..c58c4468 100644
--- a/builtins/string.h
+++ b/builtins/string.h
@@ -4,16 +4,7 @@
#include <stdint.h>
typedef struct {
- char *data;
- int64_t length:42;
- uint8_t free:4, cow:1, atomic:1;
- int16_t stride:16;
-} Str_t;
-
-#define STRING(s) ((Str_t){.data=s, .length=(int32_t)(sizeof(s)), .stride=1, .free=0})
-
-typedef struct {
- Str_t *data;
+ CORD *data;
unsigned long int length:42;
unsigned short int free:4, cow:1, atomic:1;
short int stride:16;
diff --git a/compile.c b/compile.c
index 01aeba63..e23c894c 100644
--- a/compile.c
+++ b/compile.c
@@ -115,7 +115,7 @@ CORD compile(env_t *env, ast_t *ast)
CORD code = "\"";
CORD_pos i;
CORD_FOR(i, literal) {
- int c = CORD_pos_fetch(i);
+ char c = CORD_pos_fetch(i);
switch (c) {
case '\\': code = CORD_cat(code, "\\\\"); break;
case '"': code = CORD_cat(code, "\\\""); break;
diff --git a/metamethods/cord.c b/metamethods/cord.c
index 4fd00cd3..e927ea7c 100644
--- a/metamethods/cord.c
+++ b/metamethods/cord.c
@@ -5,99 +5,133 @@
#include <stdint.h>
#include "../util.h"
+#include "../builtins/datatypes.h"
-typedef CORD (*custom_cord_func)(void *x, bool use_color);
+typedef struct cord_info_s cord_info_t;
+struct cord_info_s {
+ union {
+ struct {
+ char sigil;
+ CORD (*func)(void *x, bool use_color, cord_info_t *info);
+ cord_info_t *info;
+ } pointer;
+ int8_t precision;
+ struct {
+ CORD (*func)(void *x, bool use_color, cord_info_t *info);
+ cord_info_t *info;
+ } array;
+ struct {
+ CORD (*key_func)(void *x, bool use_color, cord_info_t *info);
+ cord_info_t *key_info;
+ size_t value_offset;
+ CORD (*value_func)(void *x, bool use_color, cord_info_t *info);
+ cord_info_t *value_info;
+ } table;
+ };
+};
+#define INFO(type, ...) (&(cord_info_t){.type={__VA_ARGS__}})
-typedef struct {
- void *data;
- int64_t length:42;
- uint8_t free:4;
- bool copy_on_write:1, atomic:1;
- int16_t stride:16;
-} generic_array_t;
+// x := @[Str]
+// Pointer__as_str(&x, color, INFO(pointer, '@', Array__as_str, INFO(array, Str__quoted, NULL)))
-static CORD vas_cord(void *x, bool use_color, const char **fmt, va_list args)
+// x := {Int=>Bool}
+// Table__as_str(&x, color, INFO(table, Int64__as_str, NULL, Bool__as_str, NULL))
+
+// x := [[Foo]]
+// Array__as_str(&x, color, INFO(array, Array__as_str, INFO(array, Foo__as_str, NULL)))
+
+CORD Bool__as_str(void *x, bool color, cord_info_t *info) {
+ if (!x) return "Bool";
+ return *(bool*)x ? "yes" : "no";
+}
+
+CORD Int__as_str(void *x, bool color, cord_info_t *info) {
+ if (!x) return "Int";
+ return CORD_asprintf("%ld", *(Int_t)x);
+}
+
+CORD Int32__as_str(void *x, bool color, cord_info_t *info) {
+ if (!x) return "Int32";
+ return CORD_asprintf("%d", *(Int32_t)x);
+}
+
+CORD Int16__as_str(void *x, bool color, cord_info_t *info) {
+ if (!x) return "Int16";
+ return CORD_asprintf("%d", *(Int16_t)x);
+}
+
+CORD Int8__as_str(void *x, bool color, cord_info_t *info) {
+ if (!x) return "Int8";
+ return CORD_asprintf("%d", *(Int8_t)x);
+}
+
+CORD Num__as_str(void *x, bool color, cord_info_t *info) {
+ if (!x) return "Num";
+ return CORD_asprintf("%g", *(Num_t)x);
+}
+
+CORD Num32__as_str(void *x, bool color, cord_info_t *info) {
+ if (!x) return "Num32";
+ return CORD_asprintf("%g", *(Num32_t)x);
+}
+
+CORD Str__as_str(void *x, bool color, cord_info_t *info) {
+ if (!x) return "Str";
+ return Str__quoted(*(CORD*)x, color, NULL);
+}
+
+CORD Pointer__as_str(void *x, bool color, cord_info_t *info)
{
-#define CLR(color, str) (use_color ? "\x1b["color"m" str "\x1b[m" : str)
- char c = **fmt;
- ++(*fmt);
- switch (c) {
- case '@': case '?': case '&': {
- if (!x)
- return CORD_asprintf(use_color ? "\x1b[34;1m!\x1b[m%r" : "!%r", vas_cord(NULL, use_color, fmt, args));
- void *ptr = *(void**)x;
- char sigil = ptr ? c : '!';
- return CORD_asprintf(use_color ? "\x1b[34;1m%c\x1b[m%r" : "%c%r", sigil, vas_cord(ptr, use_color, fmt, args));
- }
- case 'B': {
- if (!x) return "Bool";
- return *(bool*)x ? CLR("35", "yes") : CLR("35", "no");
- }
- case 'I': {
- size_t bits = va_arg(args, size_t);
- switch (bits) {
- case 64:
- return x ? CORD_asprintf(CLR("35", "%ld"), *(int64_t*)x) : "Int64";
- case 32:
- return x ? CORD_asprintf(CLR("35", "%d"), *(int32_t*)x) : "Int32";
- case 16:
- return x ? CORD_asprintf(CLR("35", "%d"), *(int16_t*)x) : "Int16";
- case 8:
- return x ? CORD_asprintf(CLR("35", "%d"), *(int8_t*)x) : "Int8";
- default: errx(1, "Unsupported Int precision: %ld", bits);
- }
- }
- case 'N': {
- size_t bits = va_arg(args, size_t);
- switch (bits) {
- case 64:
- return x ? CORD_asprintf(CLR("35", "%g"), *(double*)x) : "Num64";
- case 32:
- return x ? CORD_asprintf(CLR("35", "%g"), *(double*)x) : "Num32";
- default: errx(1, "Unsupported Num precision: %ld", bits);
- }
- }
- case 'S': {
- return x ? *(CORD*)x : "Str";
- }
- case '[': {
- if (!x) {
- CORD cord = CORD_asprintf("[%r]", vas_cord(NULL, use_color, fmt, args));
- if (**fmt == ']')
- ++(*fmt);
- return cord;
- }
- CORD cord = "[";
- generic_array_t *arr = x;
- for (int64_t i = 0; i < arr->length; i++) {
- const char *item_fmt = *fmt;
- va_list args_copy;
- va_copy(args_copy, args);
- if (i > 0) cord = CORD_cat(cord, ", ");
- CORD item_cord = vas_cord(arr->data + i*arr->stride, use_color, &item_fmt, args_copy);
- cord = CORD_cat(cord, item_cord);
- va_end(args_copy);
- }
- (void)vas_cord(NULL, use_color, fmt, args);
- if (**fmt == ']') ++(*fmt);
- return CORD_cat(cord, "]");
- }
- case '_': {
- custom_cord_func fn = va_arg(args, custom_cord_func);
- return fn(x, use_color);
- }
- case ' ': return "?";
- default: errx(1, "Unsupported format specifier: '%c'", c);
+ static const sigils[] = {['@']="@", ['?']="?", ['&']="&"};
+ if (!x)
+ return CORD_cat(sigils[info->pointer.sigil], info->pointer.func(NULL, color, info->pointer.info));
+ void *ptr = *(void**)x;
+ CORD pointed = info->pointer.func(ptr, color, info->pointer.info);
+ return CORD_cat(ptr ? sigils[info->pointer.sigil] : "!", pointed);
+}
+
+CORD Array__as_str(void *x, bool color, cord_info_t *info)
+{
+ if (!x)
+ return CORD_asprintf("[%r]", info->array.func(NULL, color, info->array.info));
+ CORD cord = "[";
+ array_t *arr = x;
+ for (int64_t i = 0; i < arr->length; i++) {
+ if (i > 0) cord = CORD_cat(cord, ", ");
+ cord = CORD_cat(cord, info->array.func(arr->data + i*arr->stride, color, info->array.info));
}
- errx(1, "Unreachable");
-#undef CLR
+ return CORD_cat(cord, "]");
}
-public CORD as_cord(void *x, bool use_color, const char *fmt, ...)
+CORD Table__as_str(void *x, bool color, cord_info_t *info)
{
- va_list args;
- va_start(args, fmt);
- CORD ret = vas_cord(x, use_color, &fmt, args);
- va_end(args);
- return ret;
+ if (!x) {
+ return CORD_asprintf(
+ "{%r=>%r}",
+ info->table.key_func(NULL, color, info->table.key_info),
+ info->table.value_func(NULL, color, info->table.value_info));
+ }
+ CORD cord = "{";
+ table_t *table = x;
+ array_t entries = table->entries;
+ for (int64_t i = 0; i < entries.length; i++) {
+ if (i > 0) cord = CORD_cat(cord, ", ");
+ void *key = entries.data + i*entries.stride;
+ cord = CORD_cat(cord, info->table.key_func(key, color, info->table.key_info));
+ cord = CORD_cat(cord, "=");
+ cord = CORD_cat(cord, info->table.value_func(key + info->table.value_offset, color, info->table.value_info));
+ }
+ if (table->fallback) {
+ cord = CORD_cat(cord, "; fallback=");
+ cord = CORD_cat(cord, Table__as_str(table->fallback, color, info));
+ if (table->default_value) cord = CORD_cat(cord, ", ");
+ } else if (table->default_value) {
+ if (table->default_value) cord = CORD_cat(cord, "; ");
+ }
+ if (table->default_value) {
+ cord = CORD_cat(cord, "default=");
+ cord = CORD_cat(cord, info->table.value_func(table->default_value, color, info->table.value_info));
+ }
+ return CORD_cat(cord, "}");
}
+