diff options
| -rw-r--r-- | builtins/functions.h | 7 | ||||
| -rw-r--r-- | builtins/string.c | 652 | ||||
| -rw-r--r-- | builtins/string.h | 11 | ||||
| -rw-r--r-- | compile.c | 2 | ||||
| -rw-r--r-- | metamethods/cord.c | 208 |
5 files changed, 308 insertions, 572 deletions
diff --git a/builtins/functions.h b/builtins/functions.h index c79f7fba..5d49da73 100644 --- a/builtins/functions.h +++ b/builtins/functions.h @@ -6,10 +6,9 @@ #include "string.h" -void builtin_say(Str_t str, Str_t end); -void builtin_fail(const char *fmt, ...); -void builtin_fail_array(Str_t fmt, ...); -Str_t builtin_last_err(); +void builtin_say(CORD str, CORD end); +void builtin_fail(CORD fmt, ...); +CORD builtin_last_err(); void builtin_doctest(const char *label, CORD expr, const char *type, bool use_color, const char *expected, const char *filename, int start, int end); // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/builtins/string.c b/builtins/string.c index 92741903..ce641762 100644 --- a/builtins/string.c +++ b/builtins/string.c @@ -1,13 +1,16 @@ #include <assert.h> +#include <ctype.h> +#include <err.h> #include <gc.h> #include <gc/cord.h> +#include <limits.h> #include <stdalign.h> #include <stdbool.h> #include <stdint.h> #include <stdlib.h> -#include <ctype.h> #include <sys/param.h> -#include <err.h> +#include <unistr.h> +#include <unicase.h> #include "../SipHash/halfsiphash.h" #include "types.h" @@ -19,479 +22,271 @@ extern const void *SSS_HASH_VECTOR; typedef struct { - uint8_t success; + enum { FIND_FAILURE, FIND_SUCCESS } status; int32_t index; } find_result_t; -static Str_t compacted(const Str_t str) -{ - if (str.stride == 1) return str; - char *buf = GC_MALLOC_ATOMIC(str.length + 1); - for (int64_t i = 0; i < str.length; i++) - buf[i] = str.data[i*str.stride]; - return (Str_t){.data=buf, .length=str.length, .stride=1}; -} - -public CORD Str__cord(const Str_t *s, bool colorize, const TypeInfo *type) +public CORD Str__quoted(CORD str, bool colorize) { - (void)type; - char *data = s->data; // Note: it's important to have unicode strings not get broken up with // escapes, otherwise they won't print right. if (colorize) { - CORD cord = "\x1b[35m\""; - for (int32_t i = 0; i < s->length; i++) { - char c = data[i*s->stride]; + CORD quoted = "\x1b[35m\""; + CORD_pos i; + CORD_FOR(i, str) { + char c = CORD_pos_fetch(i); switch (c) { #define BACKSLASHED(esc) "\x1b[34m\\\x1b[1m" esc "\x1b[0;35m" - case '\a': cord = CORD_cat(cord, BACKSLASHED("a")); break; - case '\b': cord = CORD_cat(cord, BACKSLASHED("b")); break; - case '\x1b': cord = CORD_cat(cord, BACKSLASHED("e")); break; - case '\f': cord = CORD_cat(cord, BACKSLASHED("f")); break; - case '\n': cord = CORD_cat(cord, BACKSLASHED("n")); break; - case '\r': cord = CORD_cat(cord, BACKSLASHED("r")); break; - case '\t': cord = CORD_cat(cord, BACKSLASHED("t")); break; - case '\v': cord = CORD_cat(cord, BACKSLASHED("v")); break; - case '"': cord = CORD_cat(cord, BACKSLASHED("\"")); break; - case '\\': cord = CORD_cat(cord, BACKSLASHED("\\")); break; + case '\a': quoted = CORD_cat(quoted, BACKSLASHED("a")); break; + case '\b': quoted = CORD_cat(quoted, BACKSLASHED("b")); break; + case '\x1b': quoted = CORD_cat(quoted, BACKSLASHED("e")); break; + case '\f': quoted = CORD_cat(quoted, BACKSLASHED("f")); break; + case '\n': quoted = CORD_cat(quoted, BACKSLASHED("n")); break; + case '\r': quoted = CORD_cat(quoted, BACKSLASHED("r")); break; + case '\t': quoted = CORD_cat(quoted, BACKSLASHED("t")); break; + case '\v': quoted = CORD_cat(quoted, BACKSLASHED("v")); break; + case '"': quoted = CORD_cat(quoted, BACKSLASHED("\"")); break; + case '\\': quoted = CORD_cat(quoted, BACKSLASHED("\\")); break; case '\x00' ... '\x06': case '\x0E' ... '\x1A': case '\x1C' ... '\x1F': case '\x7F' ... '\x7F': - CORD_sprintf(&cord, "%r" BACKSLASHED("x%02X"), cord, c); + CORD_sprintf("ed, "%r" BACKSLASHED("x%02X"), quoted, c); break; - default: cord = CORD_cat_char(cord, c); break; + default: quoted = CORD_cat_char(quoted, c); break; #undef BACKSLASHED } } - cord = CORD_cat(cord, "\"\x1b[m"); - if (strcmp(type->name, "Str") != 0) - CORD_sprintf(&cord, "\x1b[0;1m%s::\x1b[m%r", type->name, cord); - return cord; + quoted = CORD_cat(quoted, "\"\x1b[m"); + return quoted; } else { - CORD cord = "\""; - for (int32_t i = 0; i < s->length; i++) { - char c = data[i*s->stride]; + CORD quoted = "\""; + CORD_pos i; + CORD_FOR(i, str) { + char c = CORD_pos_fetch(i); switch (c) { - case '\a': cord = CORD_cat(cord, "\\a"); break; - case '\b': cord = CORD_cat(cord, "\\b"); break; - case '\x1b': cord = CORD_cat(cord, "\\e"); break; - case '\f': cord = CORD_cat(cord, "\\f"); break; - case '\n': cord = CORD_cat(cord, "\\n"); break; - case '\r': cord = CORD_cat(cord, "\\r"); break; - case '\t': cord = CORD_cat(cord, "\\t"); break; - case '\v': cord = CORD_cat(cord, "\\v"); break; - case '"': cord = CORD_cat(cord, "\\\""); break; - case '\\': cord = CORD_cat(cord, "\\\\"); break; + case '\a': quoted = CORD_cat(quoted, "\\a"); break; + case '\b': quoted = CORD_cat(quoted, "\\b"); break; + case '\x1b': quoted = CORD_cat(quoted, "\\e"); break; + case '\f': quoted = CORD_cat(quoted, "\\f"); break; + case '\n': quoted = CORD_cat(quoted, "\\n"); break; + case '\r': quoted = CORD_cat(quoted, "\\r"); break; + case '\t': quoted = CORD_cat(quoted, "\\t"); break; + case '\v': quoted = CORD_cat(quoted, "\\v"); break; + case '"': quoted = CORD_cat(quoted, "\\\""); break; + case '\\': quoted = CORD_cat(quoted, "\\\\"); break; case '\x00' ... '\x06': case '\x0E' ... '\x1A': case '\x1C' ... '\x1F': case '\x7F' ... '\x7F': - CORD_sprintf(&cord, "%r\\x%02X", cord, c); + CORD_sprintf("ed, "%r\\x%02X", quoted, c); break; - default: cord = CORD_cat_char(cord, c); break; + default: quoted = CORD_cat_char(quoted, c); break; } } - cord = CORD_cat_char(cord, '"'); - if (strcmp(type->name, "Str") != 0) - CORD_sprintf(&cord, "%s::%r", type->name, cord); - return cord; + quoted = CORD_cat_char(quoted, '"'); + return quoted; } } -public int32_t Str__compare(const Str_t *x, const Str_t *y) +public int Str__compare(CORD *x, CORD *y) { - int64_t length = x->length < y->length ? x->length : y->length; - for (int64_t i = 0; i < length; i++) { - char xc = x->data[i*x->stride], yc = y->data[i*y->stride]; - if (xc != yc) - return (xc > yc) ? 1 : -1; - } - return (x->length > y->length) - (x->length < y->length); + return CORD_cmp(*x, *y); } -public bool Str__equal(const Str_t *x, const Str_t *y) +public bool Str__equal(CORD *x, CORD *y) { - return (Str__compare(x, y) == 0); + return CORD_cmp(*x, *y) == 0; } -public int Str__hash(const Str_t *s, const TypeInfo *type) +public uint32_t Str__hash(CORD *cord) { - (void)type; - if (s->length == 0 || !s->data) return 0; + if (!*cord) return 0; - const char *data; - if (s->stride == 1) { - data = s->data; - } else { - char *buf = alloca(s->length); - for (int64_t i = 0; i < s->length; i++) - buf[i] = s->data[i*s->stride]; - data = buf; - } + const char *str = CORD_to_const_char_star(*cord); + *cord = str; uint32_t hash; - halfsiphash(data, s->length, SSS_HASH_VECTOR, (uint8_t*)&hash, sizeof(hash)); + halfsiphash(str, strlen(str)+1, SSS_HASH_VECTOR, (uint8_t*)&hash, sizeof(hash)); return hash; } -public Str_t Str__uppercased(const Str_t s) -{ - char *s2 = GC_MALLOC_ATOMIC(s.length + 1); - for (int64_t i = 0; i < s.length; i++) - s2[i] = toupper(s.data[i*s.stride]); - return (Str_t){.data=s2, .length=s.length, .stride=1}; -} - -public Str_t Str__lowercased(const Str_t s) +public CORD Str__uppercased(CORD str) { - char *s2 = GC_MALLOC_ATOMIC(s.length + 1); - for (int64_t i = 0; i < s.length; i++) - s2[i] = tolower(s.data[i*s.stride]); - return (Str_t){.data=s2, .length=s.length, .stride=1}; + if (!str) return str; + size_t len = strlen(str) + 1; + uint8_t *dest = GC_MALLOC_ATOMIC(len); + return (CORD)u8_toupper((const uint8_t*)str, len-1, uc_locale_language(), NULL, dest, &len); } -public Str_t Str__capitalized(const Str_t s) +public CORD Str__lowercased(CORD str) { - char *s2 = GC_MALLOC_ATOMIC(s.length + 1); - int64_t i; - for (i = 0; i < s.length; i++) { - if (isalpha(s.data[i*s.stride])) { - s2[i] = toupper(s.data[i*s.stride]); - ++i; - break; - } else { - s2[i] = s.data[i*s.stride]; - } - } - for (; i < s.length; i++) - s2[i] = s.data[i*s.stride]; - return (Str_t){.data=s2, .length=s.length, .stride=1}; + if (!str) return str; + size_t len = strlen(str) + 1; + uint8_t *dest = GC_MALLOC_ATOMIC(len); + return (CORD)u8_tolower((const uint8_t*)str, len-1, uc_locale_language(), NULL, dest, &len); } -public Str_t Str__titlecased(const Str_t s) +public CORD Str__titlecased(CORD str) { - char *s2 = GC_MALLOC_ATOMIC(s.length + 1); - bool should_uppercase = true; - for (int64_t i = 0; i < s.length; i++) { - if (isalpha(s.data[i*s.stride])) { - if (should_uppercase) { - s2[i] = toupper(s.data[i*s.stride]); - should_uppercase = false; - } else { - s2[i] = tolower(s.data[i*s.stride]); - } - } else { - should_uppercase = true; - s2[i] = s.data[i*s.stride]; - } - } - return (Str_t){.data=s2, .length=s.length, .stride=1}; -} - -public bool Str__starts_with(const Str_t s, const Str_t prefix) -{ - if (s.length < prefix.length) return false; - for (int64_t i = 0; i < prefix.length; i++) { - if (s.data[i*s.stride] != prefix.data[i*prefix.stride]) - return false; - } - return true; + if (!str) return str; + size_t len = strlen(str) + 1; + uint8_t *dest = GC_MALLOC_ATOMIC(len); + return (CORD)u8_totitle((const uint8_t*)str, len-1, uc_locale_language(), NULL, dest, &len); } -public bool Str__ends_with(const Str_t s, const Str_t suffix) -{ - if (s.length < suffix.length) return false; - for (int64_t i = 0; i < suffix.length; i++) { - if (s.data[(s.length-suffix.length+i)*s.stride] != suffix.data[i*suffix.stride]) - return false; - } - return true; -} +typedef enum { WHERE_ANYWHERE, WHERE_START, WHERE_END } where_e; -public Str_t Str__without_prefix(const Str_t s, const Str_t prefix) +public bool Str__has(CORD str, CORD target, where_e where) { - if (s.length < prefix.length) return s; - for (int64_t i = 0; i < prefix.length; i++) { - if (s.data[i*s.stride] != prefix.data[i*prefix.stride]) - return s; + if (!target) return true; + if (!str) return false; + + if (where == WHERE_START) { + return (CORD_ncmp(str, 0, target, 0, CORD_len(target)) == 0); + } else if (where == WHERE_END) { + size_t str_len = CORD_len(str); + size_t target_len = CORD_len(target); + return (str_len >= target_len && CORD_ncmp(str, str_len-target_len, target, 0, target_len) == 0); + } else { + size_t pos = CORD_str(str, 0, target); + return (pos != CORD_NOT_FOUND); } - return (Str_t){ - .data=s.data + prefix.length*s.stride, - .length=s.length - prefix.length, - .stride=s.stride, - .cow=0, .atomic=1, - }; } -public Str_t Str__without_suffix(const Str_t s, const Str_t suffix) +public CORD Str__without(CORD str, CORD target, where_e where) { - if (s.length < suffix.length) return s; - for (int64_t i = 0; i < suffix.length; i++) { - if (s.data[(s.length - suffix.length + i)*s.stride] != suffix.data[i*suffix.stride]) - return s; + if (!str || !target) return str; + + size_t target_len = CORD_len(target); + if (where == WHERE_START) { + if (CORD_ncmp(str, 0, target, 0, target_len) == 0) + return CORD_substr(str, target_len, SIZE_MAX); + return str; + } else if (where == WHERE_END) { + size_t str_len = CORD_len(str); + if (CORD_ncmp(str, str_len-target_len, target, 0, target_len) == 0) + return CORD_substr(str, 0, str_len - target_len); + return str; + } else { + errx(1, "Not implemented"); } - return (Str_t){ - .data=s.data, - .length=s.length - suffix.length, - .stride=s.stride, - .cow=0, .atomic=1, - }; } -public Str_t Str__trimmed(const Str_t s, const Str_t trim_chars, bool trim_left, bool trim_right) +public CORD Str__trimmed(CORD str, CORD skip, where_e where) { - int64_t length = s.length; - int64_t start = 0; - if (trim_left) { - for (; start < s.length; start++) { - for (int64_t t = 0; t < trim_chars.length; t++) { - if (s.data[start*s.stride] == trim_chars.data[t*trim_chars.stride]) - goto found_ltrim; - } - goto done_trimming_left; - found_ltrim: - --length; - } - } - done_trimming_left:; - if (trim_right) { - while (length > 0) { - for (int64_t t = 0; t < trim_chars.length; t++) { - if (s.data[(start+length-1)*s.stride] == trim_chars.data[t*trim_chars.stride]) - goto found_rtrim; - } - goto done_trimming_right; - found_rtrim: - --length; - } + if (!str || !skip) return str; + const uint8_t *ustr = (const uint8_t*)CORD_to_const_char_star(str); + const uint8_t *uskip = (const uint8_t*)CORD_to_const_char_star(skip); + if (where == WHERE_START) { + size_t span = u8_strspn(ustr, uskip); + return (CORD)ustr + span; + } else if (where == WHERE_END) { + size_t len = u8_strlen(ustr); + const uint8_t *back = ustr + len; + size_t back_span = 0; + while (back - back_span > ustr && u8_strspn(back-back_span-1, uskip) > back_span) + ++back_span; + return CORD_substr((CORD)ustr, 0, len - back_span); + } else { + size_t span = u8_strspn(ustr, uskip); + size_t len = u8_strlen(ustr); + const uint8_t *back = ustr + len; + size_t back_span = 0; + while (back - back_span > ustr + span && u8_strspn(back-back_span-1, uskip) > back_span) + ++back_span; + return CORD_substr((CORD)(ustr + span), 0, len - span - back_span); } - done_trimming_right:; - return (Str_t){.data=s.data+start*s.stride, .length=length, .stride=s.stride}; -} - -public const char *Str__c_string(const Str_t str) -{ - if (str.length == 0) - return ""; - else if (str.stride == 1 - // Verify that the '\0' would be on the same page, so unlikely to segfault: - && (((uint64_t)str.data + str.length + 1) & 0xFFF) != 0 - // Check for nul-termination - && str.data[str.length+1] == '\0') - return str.data; - - char *buf = GC_MALLOC_ATOMIC(str.length + 1); - for (int64_t i = 0; i < str.length; i++) - buf[i] = str.data[i*str.stride]; - buf[str.length] = '\0'; - return buf; } -public Str_t Str__from_c_string(const char *str) +public CORD Str__slice(CORD str, int64_t first, int64_t stride, int64_t length) { - size_t length = str ? strlen(str) : 0; - if (length == 0) return (Str_t){.length=0, .stride=0}; - char *buf = GC_MALLOC_ATOMIC(length + 1); - memcpy(buf, str, length+1); - buf[length+1] = '\0'; - return (Str_t){.data=buf, .length=(int64_t)length, .stride=1}; + if (stride != 1) errx(1, "Slicing with non-1 stride is not supported"); + return CORD_substr(str, first-1, length); } -public find_result_t Str__find(const Str_t str, const Str_t pat) +public find_result_t Str__find(CORD str, CORD pat) { - if (str.length < pat.length) return (find_result_t){.success=0}; - if (pat.length == 0) return (find_result_t){.success=1, .index=1}; - - // For short strings, do naive approach: - // if (str.length*pat.length < UCHAR_MAX) { - for (int64_t s = 0; s < str.length; s++) { - for (int64_t p = 0; p < pat.length; p++) { - if (str.data[s*str.stride] != pat.data[p*pat.stride]) - goto not_a_match; - } - return (find_result_t){.success=1, .index=s+1}; - not_a_match:; - } - return (find_result_t){.success=0}; - // } - - // // Boyer-Moore algorithm: - // static int skip[UCHAR_MAX]; - // for (int32_t i = 0; i <= UCHAR_MAX; ++i) - // skip[i] = str.length; - // for (int32_t i = 0; i < pat.length; ++i) - // skip[(unsigned char)pat.data[i*pat.stride]] = pat.length - i - 1; - // char lastpatchar = pat.data[(pat.length - 1)*pat.stride]; - // int32_t min_skip = pat.length; - // for (int32_t p = 0; p < pat.length - 1; ++p) { - // if (pat.data[p*pat.stride] == lastpatchar) - // min_skip = pat.length - p - 1; - // } - - // for (int32_t i = pat.length - 1; i < str.length; ) { - // // Use skip table: - // int32_t can_skip = skip[(unsigned char)str.data[i*str.stride]]; - // if (can_skip != 0) { - // i += can_skip; - // continue; - // } - // // Check for exact match: - // for (int32_t j = 0; j < pat.length; j++) { - // if (str.data[(i-pat.length+j)*str.stride] != pat.data[j*pat.stride]) { - // // Mismatch: - // i += min_skip; - // goto keep_going; - // } - // } - // return i - pat.length + 1; - // keep_going: continue; - // } - // return 0; + if (!pat) return (find_result_t){.status=FIND_SUCCESS, .index=1}; + size_t pos = CORD_str(str, 0, pat); + return (pos == CORD_NOT_FOUND) ? (find_result_t){.status=FIND_FAILURE} : (find_result_t){.status=FIND_SUCCESS, .index=(int32_t)pos}; } -public Str_t Str__replace(Str_t text, Str_t pat, Str_t replacement, int64_t limit) +public CORD Str__replace(CORD text, CORD pat, CORD replacement, int64_t limit) { - text = compacted(text); - pat = compacted(pat); - replacement = compacted(replacement); - char *buf; - size_t size; - FILE *mem = open_memstream(&buf, &size); - for (const char *pos = text.data; ; --limit) { - const char *match = limit == 0 ? NULL : memmem(pos, (int64_t)text.length - (int64_t)(pos - text.data), pat.data, pat.length); - if (match) { - fwrite(pos, 1, (size_t)(match - pos), mem); - fwrite(replacement.data, 1, replacement.length, mem); - pos = match + pat.length; - } else { - fwrite(pos, 1, (size_t)text.length - (size_t)(pos - text.data), mem); - break; - } + if (!text || !pat) return text; + CORD ret = NULL; + size_t pos = 0, pat_len = CORD_len(pat); + for (size_t found; limit > 0 && (found=CORD_str(text, pos, pat)) != CORD_NOT_FOUND; --limit) { + ret = CORD_cat(ret, CORD_substr(text, pos, found)); + ret = CORD_cat(ret, replacement); + pos = found + pat_len; } - fflush(mem); - char *str = GC_MALLOC_ATOMIC(size + 1); - memcpy(str, buf, size+1); - fclose(mem); - free(buf); - return (Str_t){.data=str, .length=size, .stride=1}; + return CORD_cat(ret, CORD_substr(text, pos, SIZE_MAX)); } -public Str_t Str__quoted(const Str_t text, const char *dsl, bool colorize) +public Str_Array_t Str__split(CORD str, CORD split) { - char *buf; - size_t size; - FILE *mem = open_memstream(&buf, &size); - if (colorize) fputs("\x1b[35m", mem); - if (dsl && dsl[0]) fprintf(mem, "$%s", dsl); - const char *escape_color = colorize ? "\x1b[1;34m" : ""; - const char *reset_color = colorize ? "\x1b[0;35m" : ""; - fputc('"', mem); - for (int64_t i = 0; i < text.length; i++) { - char c = text.data[i*text.stride]; - switch (c) { - case '\\': fprintf(mem, "%s\\\\%s", escape_color, reset_color); break; - case '"': fprintf(mem, "%s\\\"%s", escape_color, reset_color); break; - case '\n': fprintf(mem, "%s\\n%s", escape_color, reset_color); break; - case '\t': fprintf(mem, "%s\\t%s", escape_color, reset_color); break; - case '\r': fprintf(mem, "%s\\r%s", escape_color, reset_color); break; - case '\a': fprintf(mem, "%s\\a%s", escape_color, reset_color); break; - case '\b': fprintf(mem, "%s\\b%s", escape_color, reset_color); break; - case '\v': fprintf(mem, "%s\\v%s", escape_color, reset_color); break; - default: { - if (isprint(c)) - fputc(c, mem); - else - fprintf(mem, "%s\\x%02X%s", escape_color, (int)c, reset_color); - } - } - } - fputc('"', mem); - if (colorize) fputs("\x1b[m", mem); - fflush(mem); - char *str = GC_MALLOC_ATOMIC(size + 1); - memcpy(str, buf, size+1); - fclose(mem); - free(buf); - return (Str_t){.data=str, .length=size, .stride=1}; -} - -public Str_Array_t Str__split(const Str_t str, const Str_t split_chars) -{ - if (str.length == 0) return (Str_Array_t){.stride=sizeof(Str_t)}; - Str_Array_t strings = {.stride=sizeof(Str_t)}; + if (!str) return (Str_Array_t){.stride=sizeof(CORD)}; + Str_Array_t strings = {.stride=sizeof(CORD)}; int64_t capacity = 0; - bool separators[256] = {0}; - for (int64_t i = 0; i < split_chars.length; i++) - separators[(int)split_chars.data[split_chars.stride*i]] = true; - for (int64_t i = 0; i < str.length; i++) { - if (separators[(int)str.data[str.stride*i]]) continue; - int64_t length = 0; - while (i < str.length && !separators[(int)str.data[str.stride*i]]) { - ++length; - ++i; + const uint8_t *ustr = (uint8_t*)CORD_to_const_char_star(str); + const uint8_t *usplit = (uint8_t*)CORD_to_const_char_star(split); + for (int64_t i = 0; ; ) { + i += u8_strcspn(ustr + i, usplit); + size_t span = u8_strspn(ustr + i, usplit); + if (span > 0) { + CORD chunk = CORD_substr((CORD)ustr, i, i+span); + if (capacity <= 0) + strings.data = GC_REALLOC(strings.data, sizeof(CORD)*(capacity += 10)); + strings.data[strings.length++] = chunk; + i += span; + } else { + break; } - strings.data = GC_REALLOC(strings.data, sizeof(Str_t)*(capacity += 1)); - strings.data[strings.length++] = (Str_t){ - .data=&str.data[str.stride*(i-length)], - .length=length, - .stride=str.stride, - }; } return strings; } -public Str_t Str__join(Str_t glue, Str_Array_t pieces) +public CORD Str__join(CORD glue, Str_Array_t pieces) { - if (pieces.length == 0) return (Str_t){.stride=1}; + if (pieces.length == 0) return CORD_EMPTY; - int64_t length = 0; + CORD ret = CORD_EMPTY; for (int64_t i = 0; i < pieces.length; i++) { - if (i > 0) length += glue.length; - length += ((Str_t*)((void*)pieces.data + i*pieces.stride))->length; + if (i > 0) ret = CORD_cat(ret, glue); + ret = CORD_cat(ret, *(CORD*)((void*)pieces.data + i*pieces.stride)); } - char *data = GC_MALLOC_ATOMIC((size_t)length+1); - char *ptr = data; - for (int64_t i = 0; i < pieces.length; i++) { - if (i > 0) { - for (int64_t j = 0; j < glue.length; j++) - *(ptr++) = glue.data[j*glue.stride]; - } - Str_t piece = *(Str_t*)((void*)pieces.data + i*pieces.stride); - for (int64_t j = 0; j < piece.length; j++) - *(ptr++) = piece.data[j*piece.stride]; - } - return (Str_t){.data=data, .length=length, .stride=1}; + return ret; } public struct { TypeInfo type; - Str_t (*uppercased)(const Str_t s); - Str_t (*lowercased)(const Str_t s); - Str_t (*capitalized)(const Str_t s); - Str_t (*titlecased)(const Str_t s); - bool (*starts_with)(const Str_t s, const Str_t prefix); - bool (*ends_with)(const Str_t s, const Str_t suffix); - Str_t (*without_prefix)(const Str_t s, const Str_t prefix); - Str_t (*without_suffix)(const Str_t s, const Str_t suffix); - Str_t (*trimmed)(const Str_t s, const Str_t trim_chars, bool trim_left, bool trim_right); - Str_t (*slice)(Str_t *s, int64_t first, int64_t stride, int64_t length, bool readonly, const TypeInfo *type); - const char *(*c_string)(const Str_t str); - Str_t (*from_c_string)(const char *str); - find_result_t (*find)(const Str_t str, const Str_t pat); - Str_t (*replace)(Str_t text, Str_t pat, Str_t replacement, int64_t limit); - Str_t (*quoted)(const Str_t text, const char *dsl, bool colorize); - Str_Array_t (*split)(const Str_t str, const Str_t split_chars); - Str_t (*join)(Str_t glue, Str_Array_t pieces); - bool (*equal)(const Str_t *x, const Str_t *y); - int32_t (*compare)(const Str_t *x, const Str_t *y); - int (*hash)(const Str_t *s, const TypeInfo *type); - CORD (*cord)(const Str_t *s, bool colorize, const TypeInfo *type); + CORD (*uppercased)(CORD s); + CORD (*lowercased)(CORD s); + CORD (*titlecased)(CORD s); + bool (*has)(CORD s, CORD prefix, where_e where); + bool (*ends_with)(CORD s, CORD suffix); + CORD (*without)(CORD s, CORD target, where_e where); + CORD (*without_suffix)(CORD s, CORD suffix); + CORD (*trimmed)(CORD s, CORD trim_chars, where_e where); + CORD (*slice)(CORD s, int64_t first, int64_t stride, int64_t length); + char *(*c_string)(CORD str); + CORD (*from_c_string)(char *str); + find_result_t (*find)(CORD str, CORD pat); + CORD (*replace)(CORD text, CORD pat, CORD replacement, int64_t limit); + CORD (*quoted)(CORD text, bool colorize); + Str_Array_t (*split)(CORD str, CORD split_chars); + CORD (*join)(CORD glue, Str_Array_t pieces); + bool (*equal)(CORD *x, CORD *y); + int32_t (*compare)(CORD *x, CORD *y); + int (*hash)(CORD *s, TypeInfo *type); + CORD (*cord)(CORD *s, bool colorize, TypeInfo *type); } Str_type = { .type={ .name="Str", - .size=sizeof(Str_t), - .align=alignof(Str_t), + .size=sizeof(CORD), + .align=alignof(CORD), .tag=CustomInfo, .CustomInfo={ - .cord=(void*)Str__cord, + .cord=(void*)Str__quoted, .compare=(void*)Str__compare, .equal=(void*)Str__equal, .hash=(void*)Str__hash, @@ -499,16 +294,11 @@ public struct { }, .uppercased=Str__uppercased, .lowercased=Str__lowercased, - .capitalized=Str__capitalized, .titlecased=Str__titlecased, - .starts_with=Str__starts_with, - .ends_with=Str__ends_with, - .without_prefix=Str__without_prefix, - .without_suffix=Str__without_suffix, + .has=Str__has, + .without=Str__without, .trimmed=Str__trimmed, - .slice=(void*)Array_slice, - .c_string=Str__c_string, - .from_c_string=Str__from_c_string, + .slice=Str__slice, .find=Str__find, .replace=Str__replace, .quoted=Str__quoted, @@ -516,82 +306,4 @@ public struct { .join=Str__join, }; -public CORD CString_cord(const char **s, bool colorize, const TypeInfo *type) -{ - Str_t str = {.data=(char*)*s, .length=*s ? strlen(*s) : 0, .stride=1}; - return Str__cord(&str, colorize, type); -} - -public uint32_t CString_hash(const char **s, const TypeInfo *type) -{ - (void)type; - if (!*s) return 0; - uint32_t hash; - halfsiphash(*s, strlen(*s)+1, SSS_HASH_VECTOR, (uint8_t*)&hash, sizeof(hash)); - assert(strlen(*s) > 0); - return hash; -} - -public uint32_t CString_compare(const char **x, const char **y, const TypeInfo *type) -{ - (void)type; - if (!*x || !*y) - return (!*x) - (!*y); - return strcmp(*x, *y); -} - -public struct { - TypeInfo type; - const char *(*from_str)(const Str_t str); - Str_t (*as_str)(const char *str); -} CString_type = { - .type={ - .name="CString", - .size=sizeof(char*), - .align=alignof(char*), - .tag=CustomInfo, - .CustomInfo={ - .cord=(void*)CString_cord, - .hash=(void*)CString_hash, - .compare=(void*)CString_compare, - }, - }, - .from_str=Str__c_string, - .as_str=Str__from_c_string, -}; - -static CORD Cord_cord(const CORD *c, bool colorize, const TypeInfo *type) -{ - const char *str = CORD_to_const_char_star(*c); - return CString_cord(&str, colorize, type); -} - -static uint32_t Cord_hash(const CORD *c, const TypeInfo *type) -{ - const char *str = CORD_to_const_char_star(*c); - return CString_hash(&str, type); -} - -static uint32_t Cord_compare(const char **x, const char **y, const TypeInfo *type) -{ - (void)type; - return CORD_cmp(*x, *y); -} - -public struct { - TypeInfo type; -} Cord_type = { - .type={ - .name="Cord", - .size=sizeof(CORD), - .align=alignof(CORD), - .tag=CustomInfo, - .CustomInfo={ - .cord=(void*)Cord_cord, - .hash=(void*)Cord_hash, - .compare=(void*)Cord_compare, - }, - }, -}; - // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/builtins/string.h b/builtins/string.h index 1d6a91a4..c58c4468 100644 --- a/builtins/string.h +++ b/builtins/string.h @@ -4,16 +4,7 @@ #include <stdint.h> typedef struct { - char *data; - int64_t length:42; - uint8_t free:4, cow:1, atomic:1; - int16_t stride:16; -} Str_t; - -#define STRING(s) ((Str_t){.data=s, .length=(int32_t)(sizeof(s)), .stride=1, .free=0}) - -typedef struct { - Str_t *data; + CORD *data; unsigned long int length:42; unsigned short int free:4, cow:1, atomic:1; short int stride:16; @@ -115,7 +115,7 @@ CORD compile(env_t *env, ast_t *ast) CORD code = "\""; CORD_pos i; CORD_FOR(i, literal) { - int c = CORD_pos_fetch(i); + char c = CORD_pos_fetch(i); switch (c) { case '\\': code = CORD_cat(code, "\\\\"); break; case '"': code = CORD_cat(code, "\\\""); break; diff --git a/metamethods/cord.c b/metamethods/cord.c index 4fd00cd3..e927ea7c 100644 --- a/metamethods/cord.c +++ b/metamethods/cord.c @@ -5,99 +5,133 @@ #include <stdint.h> #include "../util.h" +#include "../builtins/datatypes.h" -typedef CORD (*custom_cord_func)(void *x, bool use_color); +typedef struct cord_info_s cord_info_t; +struct cord_info_s { + union { + struct { + char sigil; + CORD (*func)(void *x, bool use_color, cord_info_t *info); + cord_info_t *info; + } pointer; + int8_t precision; + struct { + CORD (*func)(void *x, bool use_color, cord_info_t *info); + cord_info_t *info; + } array; + struct { + CORD (*key_func)(void *x, bool use_color, cord_info_t *info); + cord_info_t *key_info; + size_t value_offset; + CORD (*value_func)(void *x, bool use_color, cord_info_t *info); + cord_info_t *value_info; + } table; + }; +}; +#define INFO(type, ...) (&(cord_info_t){.type={__VA_ARGS__}}) -typedef struct { - void *data; - int64_t length:42; - uint8_t free:4; - bool copy_on_write:1, atomic:1; - int16_t stride:16; -} generic_array_t; +// x := @[Str] +// Pointer__as_str(&x, color, INFO(pointer, '@', Array__as_str, INFO(array, Str__quoted, NULL))) -static CORD vas_cord(void *x, bool use_color, const char **fmt, va_list args) +// x := {Int=>Bool} +// Table__as_str(&x, color, INFO(table, Int64__as_str, NULL, Bool__as_str, NULL)) + +// x := [[Foo]] +// Array__as_str(&x, color, INFO(array, Array__as_str, INFO(array, Foo__as_str, NULL))) + +CORD Bool__as_str(void *x, bool color, cord_info_t *info) { + if (!x) return "Bool"; + return *(bool*)x ? "yes" : "no"; +} + +CORD Int__as_str(void *x, bool color, cord_info_t *info) { + if (!x) return "Int"; + return CORD_asprintf("%ld", *(Int_t)x); +} + +CORD Int32__as_str(void *x, bool color, cord_info_t *info) { + if (!x) return "Int32"; + return CORD_asprintf("%d", *(Int32_t)x); +} + +CORD Int16__as_str(void *x, bool color, cord_info_t *info) { + if (!x) return "Int16"; + return CORD_asprintf("%d", *(Int16_t)x); +} + +CORD Int8__as_str(void *x, bool color, cord_info_t *info) { + if (!x) return "Int8"; + return CORD_asprintf("%d", *(Int8_t)x); +} + +CORD Num__as_str(void *x, bool color, cord_info_t *info) { + if (!x) return "Num"; + return CORD_asprintf("%g", *(Num_t)x); +} + +CORD Num32__as_str(void *x, bool color, cord_info_t *info) { + if (!x) return "Num32"; + return CORD_asprintf("%g", *(Num32_t)x); +} + +CORD Str__as_str(void *x, bool color, cord_info_t *info) { + if (!x) return "Str"; + return Str__quoted(*(CORD*)x, color, NULL); +} + +CORD Pointer__as_str(void *x, bool color, cord_info_t *info) { -#define CLR(color, str) (use_color ? "\x1b["color"m" str "\x1b[m" : str) - char c = **fmt; - ++(*fmt); - switch (c) { - case '@': case '?': case '&': { - if (!x) - return CORD_asprintf(use_color ? "\x1b[34;1m!\x1b[m%r" : "!%r", vas_cord(NULL, use_color, fmt, args)); - void *ptr = *(void**)x; - char sigil = ptr ? c : '!'; - return CORD_asprintf(use_color ? "\x1b[34;1m%c\x1b[m%r" : "%c%r", sigil, vas_cord(ptr, use_color, fmt, args)); - } - case 'B': { - if (!x) return "Bool"; - return *(bool*)x ? CLR("35", "yes") : CLR("35", "no"); - } - case 'I': { - size_t bits = va_arg(args, size_t); - switch (bits) { - case 64: - return x ? CORD_asprintf(CLR("35", "%ld"), *(int64_t*)x) : "Int64"; - case 32: - return x ? CORD_asprintf(CLR("35", "%d"), *(int32_t*)x) : "Int32"; - case 16: - return x ? CORD_asprintf(CLR("35", "%d"), *(int16_t*)x) : "Int16"; - case 8: - return x ? CORD_asprintf(CLR("35", "%d"), *(int8_t*)x) : "Int8"; - default: errx(1, "Unsupported Int precision: %ld", bits); - } - } - case 'N': { - size_t bits = va_arg(args, size_t); - switch (bits) { - case 64: - return x ? CORD_asprintf(CLR("35", "%g"), *(double*)x) : "Num64"; - case 32: - return x ? CORD_asprintf(CLR("35", "%g"), *(double*)x) : "Num32"; - default: errx(1, "Unsupported Num precision: %ld", bits); - } - } - case 'S': { - return x ? *(CORD*)x : "Str"; - } - case '[': { - if (!x) { - CORD cord = CORD_asprintf("[%r]", vas_cord(NULL, use_color, fmt, args)); - if (**fmt == ']') - ++(*fmt); - return cord; - } - CORD cord = "["; - generic_array_t *arr = x; - for (int64_t i = 0; i < arr->length; i++) { - const char *item_fmt = *fmt; - va_list args_copy; - va_copy(args_copy, args); - if (i > 0) cord = CORD_cat(cord, ", "); - CORD item_cord = vas_cord(arr->data + i*arr->stride, use_color, &item_fmt, args_copy); - cord = CORD_cat(cord, item_cord); - va_end(args_copy); - } - (void)vas_cord(NULL, use_color, fmt, args); - if (**fmt == ']') ++(*fmt); - return CORD_cat(cord, "]"); - } - case '_': { - custom_cord_func fn = va_arg(args, custom_cord_func); - return fn(x, use_color); - } - case ' ': return "?"; - default: errx(1, "Unsupported format specifier: '%c'", c); + static const sigils[] = {['@']="@", ['?']="?", ['&']="&"}; + if (!x) + return CORD_cat(sigils[info->pointer.sigil], info->pointer.func(NULL, color, info->pointer.info)); + void *ptr = *(void**)x; + CORD pointed = info->pointer.func(ptr, color, info->pointer.info); + return CORD_cat(ptr ? sigils[info->pointer.sigil] : "!", pointed); +} + +CORD Array__as_str(void *x, bool color, cord_info_t *info) +{ + if (!x) + return CORD_asprintf("[%r]", info->array.func(NULL, color, info->array.info)); + CORD cord = "["; + array_t *arr = x; + for (int64_t i = 0; i < arr->length; i++) { + if (i > 0) cord = CORD_cat(cord, ", "); + cord = CORD_cat(cord, info->array.func(arr->data + i*arr->stride, color, info->array.info)); } - errx(1, "Unreachable"); -#undef CLR + return CORD_cat(cord, "]"); } -public CORD as_cord(void *x, bool use_color, const char *fmt, ...) +CORD Table__as_str(void *x, bool color, cord_info_t *info) { - va_list args; - va_start(args, fmt); - CORD ret = vas_cord(x, use_color, &fmt, args); - va_end(args); - return ret; + if (!x) { + return CORD_asprintf( + "{%r=>%r}", + info->table.key_func(NULL, color, info->table.key_info), + info->table.value_func(NULL, color, info->table.value_info)); + } + CORD cord = "{"; + table_t *table = x; + array_t entries = table->entries; + for (int64_t i = 0; i < entries.length; i++) { + if (i > 0) cord = CORD_cat(cord, ", "); + void *key = entries.data + i*entries.stride; + cord = CORD_cat(cord, info->table.key_func(key, color, info->table.key_info)); + cord = CORD_cat(cord, "="); + cord = CORD_cat(cord, info->table.value_func(key + info->table.value_offset, color, info->table.value_info)); + } + if (table->fallback) { + cord = CORD_cat(cord, "; fallback="); + cord = CORD_cat(cord, Table__as_str(table->fallback, color, info)); + if (table->default_value) cord = CORD_cat(cord, ", "); + } else if (table->default_value) { + if (table->default_value) cord = CORD_cat(cord, "; "); + } + if (table->default_value) { + cord = CORD_cat(cord, "default="); + cord = CORD_cat(cord, info->table.value_func(table->default_value, color, info->table.value_info)); + } + return CORD_cat(cord, "}"); } + |
