From 8fab88c56f95c03ffcb4be178f5dbb21b239d95e Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Sun, 3 Mar 2024 18:15:45 -0500 Subject: Rename Str -> Text --- Makefile | 2 +- ast.c | 14 +-- ast.h | 8 +- builtins/string.c | 263 ---------------------------------------------------- builtins/string.h | 36 ------- builtins/table.c | 4 +- builtins/text.c | 263 ++++++++++++++++++++++++++++++++++++++++++++++++++++ builtins/text.h | 35 +++++++ compile.c | 58 ++++++------ docs/metamethods.md | 14 +-- docs/strings.md | 2 +- enums.c | 2 +- environment.c | 66 ++++++------- parse.c | 8 +- structs.c | 6 +- test/structs.tm | 2 +- test/tables.tm | 2 +- tomo.c | 2 +- tomo.h | 2 +- typecheck.c | 8 +- types.c | 6 +- types.h | 4 +- 22 files changed, 403 insertions(+), 404 deletions(-) delete mode 100644 builtins/string.c delete mode 100644 builtins/string.h create mode 100644 builtins/text.c create mode 100644 builtins/text.h diff --git a/Makefile b/Makefile index 51ec92e3..ebe3bc17 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ O=-Og CFLAGS=$(CCONFIG) $(EXTRA) $(CWARN) $(G) $(O) $(OSFLAGS) LDLIBS=-lgc -lgccjit -lcord -lm -lunistring BUILTIN_OBJS=builtins/array.o builtins/bool.o builtins/color.o builtins/nums.o builtins/functions.o builtins/integers.o \ - builtins/pointer.o builtins/memory.o builtins/string.o builtins/table.o builtins/types.o + builtins/pointer.o builtins/memory.o builtins/text.o builtins/table.o builtins/types.o all: libtomo.so tomo diff --git a/ast.c b/ast.c index 3e054cbb..71317e93 100644 --- a/ast.c +++ b/ast.c @@ -5,7 +5,7 @@ #include #include "ast.h" -#include "builtins/string.h" +#include "builtins/text.h" static const char *OP_NAMES[] = { [BINOP_UNKNOWN]="unknown", @@ -97,8 +97,8 @@ CORD ast_to_cord(ast_t *ast) T(Var, "(\x1b[36;1m%s\x1b[m)", data.name) T(Int, "(\x1b[35m%ld\x1b[m, bits=\x1b[35m%ld\x1b[m)", data.i, data.bits) T(Num, "(\x1b[35m%ld\x1b[m, bits=\x1b[35m%ld\x1b[m)", data.n, data.bits) - T(StringLiteral, "%r", Str__quoted(data.cord, true)) - T(StringJoin, "(%r)", ast_list_to_cord(data.children)) + T(TextLiteral, "%r", Text__quoted(data.cord, true)) + T(TextJoin, "(%r)", ast_list_to_cord(data.children)) T(Declare, "(var=%s, value=%r)", ast_to_cord(data.var), ast_to_cord(data.value)) T(Assign, "(targets=%r, values=%r)", ast_list_to_cord(data.targets), ast_list_to_cord(data.values)) T(BinaryOp, "(%r, %s, %r)", ast_to_cord(data.lhs), OP_NAMES[data.op], ast_to_cord(data.rhs)) @@ -137,10 +137,10 @@ CORD ast_to_cord(ast_t *ast) T(EnumDef, "(%s, tags=%r, namespace=%r)", data.name, tags_to_cord(data.tags), ast_to_cord(data.namespace)) T(Index, "(indexed=%r, index=%r)", ast_to_cord(data.indexed), ast_to_cord(data.index)) T(FieldAccess, "(fielded=%r, field=%s)", ast_to_cord(data.fielded), data.field) - T(DocTest, "(expr=%r, output=%r)", ast_to_cord(data.expr), Str__quoted(data.output, true)) - T(Use, "(%r)", Str__quoted(data.path, true)) - T(LinkerDirective, "(%r)", Str__quoted(data.directive, true)) - T(InlineCCode, "(%r)", Str__quoted(data.code, true)) + T(DocTest, "(expr=%r, output=%r)", ast_to_cord(data.expr), Text__quoted(data.output, true)) + T(Use, "(%r)", Text__quoted(data.path, true)) + T(LinkerDirective, "(%r)", Text__quoted(data.directive, true)) + T(InlineCCode, "(%r)", Text__quoted(data.code, true)) #undef T } return "???"; diff --git a/ast.h b/ast.h index 3cc93f43..b80aeb61 100644 --- a/ast.h +++ b/ast.h @@ -13,7 +13,7 @@ .tag=ast_tag, .__data.ast_tag={__VA_ARGS__})) #define FakeAST(ast_tag, ...) (new(ast_t, .tag=ast_tag, .__data.ast_tag={__VA_ARGS__})) #define WrapAST(ast, ast_tag, ...) (new(ast_t, .file=(ast)->file, .start=(ast)->start, .end=(ast)->end, .tag=ast_tag, .__data.ast_tag={__VA_ARGS__})) -#define StringAST(ast, _str) WrapAST(ast, StringLiteral, .str=heap_str(_str)) +#define TextAST(ast, _str) WrapAST(ast, TextLiteral, .str=heap_str(_str)) struct binding_s; typedef struct type_ast_s type_ast_t; @@ -91,7 +91,7 @@ typedef enum { Unknown = 0, Nil, Bool, Var, Int, Num, - StringLiteral, StringJoin, + TextLiteral, TextJoin, Declare, Assign, BinaryOp, UpdateAssign, Length, Not, Negative, HeapAllocate, StackReference, @@ -139,10 +139,10 @@ struct ast_s { } Num; struct { CORD cord; - } StringLiteral; + } TextLiteral; struct { ast_list_t *children; - } StringJoin; + } TextJoin; struct { ast_t *var; ast_t *value; diff --git a/builtins/string.c b/builtins/string.c deleted file mode 100644 index c28aa510..00000000 --- a/builtins/string.c +++ /dev/null @@ -1,263 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../SipHash/halfsiphash.h" -#include "types.h" -#include "array.h" -#include "string.h" - -#define CLAMP(x, lo, hi) MIN(hi, MAX(x,lo)) - -public CORD Str__as_str(const void *str, bool colorize, const TypeInfo *info) -{ - (void)info; - if (!str) return "Str"; - return Str__quoted(*(CORD*)str, colorize); -} - -public CORD Str__quoted(CORD str, bool colorize) -{ - // Note: it's important to have unicode strings not get broken up with - // escapes, otherwise they won't print right. - if (colorize) { - CORD quoted = "\x1b[35m\""; - CORD_pos i; - CORD_FOR(i, str) { - char c = CORD_pos_fetch(i); - switch (c) { -#define BACKSLASHED(esc) "\x1b[34m\\\x1b[1m" esc "\x1b[0;35m" - case '\a': quoted = CORD_cat(quoted, BACKSLASHED("a")); break; - case '\b': quoted = CORD_cat(quoted, BACKSLASHED("b")); break; - case '\x1b': quoted = CORD_cat(quoted, BACKSLASHED("e")); break; - case '\f': quoted = CORD_cat(quoted, BACKSLASHED("f")); break; - case '\n': quoted = CORD_cat(quoted, BACKSLASHED("n")); break; - case '\r': quoted = CORD_cat(quoted, BACKSLASHED("r")); break; - case '\t': quoted = CORD_cat(quoted, BACKSLASHED("t")); break; - case '\v': quoted = CORD_cat(quoted, BACKSLASHED("v")); break; - case '"': quoted = CORD_cat(quoted, BACKSLASHED("\"")); break; - case '\\': quoted = CORD_cat(quoted, BACKSLASHED("\\")); break; - case '\x00' ... '\x06': case '\x0E' ... '\x1A': - case '\x1C' ... '\x1F': case '\x7F' ... '\x7F': - CORD_sprintf("ed, "%r" BACKSLASHED("x%02X"), quoted, c); - break; - default: quoted = CORD_cat_char(quoted, c); break; -#undef BACKSLASHED - } - } - quoted = CORD_cat(quoted, "\"\x1b[m"); - return quoted; - } else { - CORD quoted = "\""; - CORD_pos i; - CORD_FOR(i, str) { - char c = CORD_pos_fetch(i); - switch (c) { - case '\a': quoted = CORD_cat(quoted, "\\a"); break; - case '\b': quoted = CORD_cat(quoted, "\\b"); break; - case '\x1b': quoted = CORD_cat(quoted, "\\e"); break; - case '\f': quoted = CORD_cat(quoted, "\\f"); break; - case '\n': quoted = CORD_cat(quoted, "\\n"); break; - case '\r': quoted = CORD_cat(quoted, "\\r"); break; - case '\t': quoted = CORD_cat(quoted, "\\t"); break; - case '\v': quoted = CORD_cat(quoted, "\\v"); break; - case '"': quoted = CORD_cat(quoted, "\\\""); break; - case '\\': quoted = CORD_cat(quoted, "\\\\"); break; - case '\x00' ... '\x06': case '\x0E' ... '\x1A': - case '\x1C' ... '\x1F': case '\x7F' ... '\x7F': - CORD_sprintf("ed, "%r\\x%02X", quoted, c); - break; - default: quoted = CORD_cat_char(quoted, c); break; - } - } - quoted = CORD_cat_char(quoted, '"'); - return quoted; - } -} - -public int Str__compare(CORD *x, CORD *y) -{ - return CORD_cmp(*x, *y); -} - -public bool Str__equal(CORD *x, CORD *y) -{ - return CORD_cmp(*x, *y) == 0; -} - -public uint32_t Str__hash(CORD *cord) -{ - if (!*cord) return 0; - - const char *str = CORD_to_const_char_star(*cord); - *cord = str; - - uint32_t hash; - halfsiphash(str, strlen(str)+1, SSS_HASH_VECTOR, (uint8_t*)&hash, sizeof(hash)); - return hash; -} - -public CORD Str__upper(CORD str) -{ - if (!str) return str; - size_t len = strlen(str) + 1; - uint8_t *dest = GC_MALLOC_ATOMIC(len); - return (CORD)u8_toupper((const uint8_t*)str, len-1, uc_locale_language(), NULL, dest, &len); -} - -public CORD Str__lower(CORD str) -{ - if (!str) return str; - size_t len = strlen(str) + 1; - uint8_t *dest = GC_MALLOC_ATOMIC(len); - return (CORD)u8_tolower((const uint8_t*)str, len-1, uc_locale_language(), NULL, dest, &len); -} - -public CORD Str__title(CORD str) -{ - if (!str) return str; - size_t len = strlen(str) + 1; - uint8_t *dest = GC_MALLOC_ATOMIC(len); - return (CORD)u8_totitle((const uint8_t*)str, len-1, uc_locale_language(), NULL, dest, &len); -} - -public bool Str__has(CORD str, CORD target, where_e where) -{ - if (!target) return true; - if (!str) return false; - - if (where == WHERE_START) { - return (CORD_ncmp(str, 0, target, 0, CORD_len(target)) == 0); - } else if (where == WHERE_END) { - size_t str_len = CORD_len(str); - size_t target_len = CORD_len(target); - return (str_len >= target_len && CORD_ncmp(str, str_len-target_len, target, 0, target_len) == 0); - } else { - size_t pos = CORD_str(str, 0, target); - return (pos != CORD_NOT_FOUND); - } -} - -public CORD Str__without(CORD str, CORD target, where_e where) -{ - if (!str || !target) return str; - - size_t target_len = CORD_len(target); - if (where == WHERE_START) { - if (CORD_ncmp(str, 0, target, 0, target_len) == 0) - return CORD_substr(str, target_len, SIZE_MAX); - return str; - } else if (where == WHERE_END) { - size_t str_len = CORD_len(str); - if (CORD_ncmp(str, str_len-target_len, target, 0, target_len) == 0) - return CORD_substr(str, 0, str_len - target_len); - return str; - } else { - errx(1, "Not implemented"); - } -} - -public CORD Str__trimmed(CORD str, CORD skip, where_e where) -{ - if (!str || !skip) return str; - const uint8_t *ustr = (const uint8_t*)CORD_to_const_char_star(str); - const uint8_t *uskip = (const uint8_t*)CORD_to_const_char_star(skip); - if (where == WHERE_START) { - size_t span = u8_strspn(ustr, uskip); - return (CORD)ustr + span; - } else if (where == WHERE_END) { - size_t len = u8_strlen(ustr); - const uint8_t *back = ustr + len; - size_t back_span = 0; - while (back - back_span > ustr && u8_strspn(back-back_span-1, uskip) > back_span) - ++back_span; - return CORD_substr((CORD)ustr, 0, len - back_span); - } else { - size_t span = u8_strspn(ustr, uskip); - size_t len = u8_strlen(ustr); - const uint8_t *back = ustr + len; - size_t back_span = 0; - while (back - back_span > ustr + span && u8_strspn(back-back_span-1, uskip) > back_span) - ++back_span; - return CORD_substr((CORD)(ustr + span), 0, len - span - back_span); - } -} - -public find_result_t Str__find(CORD str, CORD pat) -{ - if (!pat) return (find_result_t){.status=FIND_SUCCESS, .index=1}; - size_t pos = CORD_str(str, 0, pat); - return (pos == CORD_NOT_FOUND) ? (find_result_t){.status=FIND_FAILURE} : (find_result_t){.status=FIND_SUCCESS, .index=(int32_t)pos}; -} - -public CORD Str__replace(CORD text, CORD pat, CORD replacement, int64_t limit) -{ - if (!text || !pat) return text; - CORD ret = NULL; - size_t pos = 0, pat_len = CORD_len(pat); - for (size_t found; limit > 0 && (found=CORD_str(text, pos, pat)) != CORD_NOT_FOUND; --limit) { - ret = CORD_cat(ret, CORD_substr(text, pos, found)); - ret = CORD_cat(ret, replacement); - pos = found + pat_len; - } - return CORD_cat(ret, CORD_substr(text, pos, SIZE_MAX)); -} - -public array_t Str__split(CORD str, CORD split) -{ - if (!str) return (array_t){.data=GC_MALLOC(sizeof(CORD)), .atomic=1, .length=1, .stride=sizeof(CORD)}; - array_t strings = {.stride=sizeof(CORD), .atomic=1}; - int64_t capacity = 0; - - const uint8_t *ustr = (uint8_t*)CORD_to_const_char_star(str); - const uint8_t *usplit = (uint8_t*)CORD_to_const_char_star(split); - for (int64_t i = 0; ; ) { - size_t non_split = u8_strcspn(ustr + i, usplit); - CORD chunk = CORD_substr((CORD)ustr, i, non_split); - if (capacity <= 0) - strings.data = GC_REALLOC(strings.data, sizeof(CORD)*(capacity += 10)); - ((CORD*)strings.data)[strings.length++] = chunk; - - i += non_split; - - size_t split = u8_strspn(ustr + i, usplit); - if (split == 0) break; - i += split; - } - return strings; -} - -public CORD Str__join(CORD glue, array_t pieces) -{ - if (pieces.length == 0) return CORD_EMPTY; - - CORD ret = CORD_EMPTY; - for (int64_t i = 0; i < pieces.length; i++) { - if (i > 0) ret = CORD_cat(ret, glue); - ret = CORD_cat(ret, *(CORD*)((void*)pieces.data + i*pieces.stride)); - } - return ret; -} - -public const TypeInfo Str = { - .size=sizeof(CORD), - .align=__alignof__(CORD), - .tag=CustomInfo, - .CustomInfo={ - .as_str=(void*)Str__as_str, - .compare=(void*)Str__compare, - .equal=(void*)Str__equal, - .hash=(void*)Str__hash, - }, -}; - -// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/builtins/string.h b/builtins/string.h deleted file mode 100644 index c24de72d..00000000 --- a/builtins/string.h +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once -#include -#include -#include - -#include "types.h" - -#define String_t CORD -#define Str_t CORD - -typedef enum { WHERE_ANYWHERE, WHERE_START, WHERE_END } where_e; - -typedef struct { - enum { FIND_FAILURE, FIND_SUCCESS } status; - int32_t index; -} find_result_t; - -CORD Str__as_str(const void *str, bool colorize, const TypeInfo *info); -CORD Str__quoted(CORD str, bool colorize); -int Str__compare(CORD *x, CORD *y); -bool Str__equal(CORD *x, CORD *y); -uint32_t Str__hash(CORD *cord); -CORD Str__upper(CORD str); -CORD Str__lower(CORD str); -CORD Str__title(CORD str); -bool Str__has(CORD str, CORD target, where_e where); -CORD Str__without(CORD str, CORD target, where_e where); -CORD Str__trimmed(CORD str, CORD skip, where_e where); -find_result_t Str__find(CORD str, CORD pat); -CORD Str__replace(CORD text, CORD pat, CORD replacement, int64_t limit); -array_t Str__split(CORD str, CORD split); -CORD Str__join(CORD glue, array_t pieces); - -extern const TypeInfo Str; - -// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/builtins/table.c b/builtins/table.c index fcb64e40..523aae70 100644 --- a/builtins/table.c +++ b/builtins/table.c @@ -21,7 +21,7 @@ #include "../util.h" #include "array.h" #include "datatypes.h" -#include "string.h" +#include "text.h" #include "table.h" #include "types.h" @@ -55,7 +55,7 @@ TypeInfo StrToVoidStarTable = { .size=sizeof(table_t), .align=__alignof__(table_t), .tag=TableInfo, - .TableInfo={.key=&Str, .value=&MemoryPointer}, + .TableInfo={.key=&Text, .value=&MemoryPointer}, }; static inline size_t entry_size(const TypeInfo *info) diff --git a/builtins/text.c b/builtins/text.c new file mode 100644 index 00000000..d85c03f0 --- /dev/null +++ b/builtins/text.c @@ -0,0 +1,263 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../SipHash/halfsiphash.h" +#include "types.h" +#include "array.h" +#include "text.h" + +#define CLAMP(x, lo, hi) MIN(hi, MAX(x,lo)) + +public CORD Text__as_str(const void *str, bool colorize, const TypeInfo *info) +{ + (void)info; + if (!str) return "Text"; + return Text__quoted(*(CORD*)str, colorize); +} + +public CORD Text__quoted(CORD str, bool colorize) +{ + // Note: it's important to have unicode strings not get broken up with + // escapes, otherwise they won't print right. + if (colorize) { + CORD quoted = "\x1b[35m\""; + CORD_pos i; + CORD_FOR(i, str) { + char c = CORD_pos_fetch(i); + switch (c) { +#define BACKSLASHED(esc) "\x1b[34m\\\x1b[1m" esc "\x1b[0;35m" + case '\a': quoted = CORD_cat(quoted, BACKSLASHED("a")); break; + case '\b': quoted = CORD_cat(quoted, BACKSLASHED("b")); break; + case '\x1b': quoted = CORD_cat(quoted, BACKSLASHED("e")); break; + case '\f': quoted = CORD_cat(quoted, BACKSLASHED("f")); break; + case '\n': quoted = CORD_cat(quoted, BACKSLASHED("n")); break; + case '\r': quoted = CORD_cat(quoted, BACKSLASHED("r")); break; + case '\t': quoted = CORD_cat(quoted, BACKSLASHED("t")); break; + case '\v': quoted = CORD_cat(quoted, BACKSLASHED("v")); break; + case '"': quoted = CORD_cat(quoted, BACKSLASHED("\"")); break; + case '\\': quoted = CORD_cat(quoted, BACKSLASHED("\\")); break; + case '\x00' ... '\x06': case '\x0E' ... '\x1A': + case '\x1C' ... '\x1F': case '\x7F' ... '\x7F': + CORD_sprintf("ed, "%r" BACKSLASHED("x%02X"), quoted, c); + break; + default: quoted = CORD_cat_char(quoted, c); break; +#undef BACKSLASHED + } + } + quoted = CORD_cat(quoted, "\"\x1b[m"); + return quoted; + } else { + CORD quoted = "\""; + CORD_pos i; + CORD_FOR(i, str) { + char c = CORD_pos_fetch(i); + switch (c) { + case '\a': quoted = CORD_cat(quoted, "\\a"); break; + case '\b': quoted = CORD_cat(quoted, "\\b"); break; + case '\x1b': quoted = CORD_cat(quoted, "\\e"); break; + case '\f': quoted = CORD_cat(quoted, "\\f"); break; + case '\n': quoted = CORD_cat(quoted, "\\n"); break; + case '\r': quoted = CORD_cat(quoted, "\\r"); break; + case '\t': quoted = CORD_cat(quoted, "\\t"); break; + case '\v': quoted = CORD_cat(quoted, "\\v"); break; + case '"': quoted = CORD_cat(quoted, "\\\""); break; + case '\\': quoted = CORD_cat(quoted, "\\\\"); break; + case '\x00' ... '\x06': case '\x0E' ... '\x1A': + case '\x1C' ... '\x1F': case '\x7F' ... '\x7F': + CORD_sprintf("ed, "%r\\x%02X", quoted, c); + break; + default: quoted = CORD_cat_char(quoted, c); break; + } + } + quoted = CORD_cat_char(quoted, '"'); + return quoted; + } +} + +public int Text__compare(CORD *x, CORD *y) +{ + return CORD_cmp(*x, *y); +} + +public bool Text__equal(CORD *x, CORD *y) +{ + return CORD_cmp(*x, *y) == 0; +} + +public uint32_t Text__hash(CORD *cord) +{ + if (!*cord) return 0; + + const char *str = CORD_to_const_char_star(*cord); + *cord = str; + + uint32_t hash; + halfsiphash(str, strlen(str)+1, SSS_HASH_VECTOR, (uint8_t*)&hash, sizeof(hash)); + return hash; +} + +public CORD Text__upper(CORD str) +{ + if (!str) return str; + size_t len = strlen(str) + 1; + uint8_t *dest = GC_MALLOC_ATOMIC(len); + return (CORD)u8_toupper((const uint8_t*)str, len-1, uc_locale_language(), NULL, dest, &len); +} + +public CORD Text__lower(CORD str) +{ + if (!str) return str; + size_t len = strlen(str) + 1; + uint8_t *dest = GC_MALLOC_ATOMIC(len); + return (CORD)u8_tolower((const uint8_t*)str, len-1, uc_locale_language(), NULL, dest, &len); +} + +public CORD Text__title(CORD str) +{ + if (!str) return str; + size_t len = strlen(str) + 1; + uint8_t *dest = GC_MALLOC_ATOMIC(len); + return (CORD)u8_totitle((const uint8_t*)str, len-1, uc_locale_language(), NULL, dest, &len); +} + +public bool Text__has(CORD str, CORD target, where_e where) +{ + if (!target) return true; + if (!str) return false; + + if (where == WHERE_START) { + return (CORD_ncmp(str, 0, target, 0, CORD_len(target)) == 0); + } else if (where == WHERE_END) { + size_t str_len = CORD_len(str); + size_t target_len = CORD_len(target); + return (str_len >= target_len && CORD_ncmp(str, str_len-target_len, target, 0, target_len) == 0); + } else { + size_t pos = CORD_str(str, 0, target); + return (pos != CORD_NOT_FOUND); + } +} + +public CORD Text__without(CORD str, CORD target, where_e where) +{ + if (!str || !target) return str; + + size_t target_len = CORD_len(target); + if (where == WHERE_START) { + if (CORD_ncmp(str, 0, target, 0, target_len) == 0) + return CORD_substr(str, target_len, SIZE_MAX); + return str; + } else if (where == WHERE_END) { + size_t str_len = CORD_len(str); + if (CORD_ncmp(str, str_len-target_len, target, 0, target_len) == 0) + return CORD_substr(str, 0, str_len - target_len); + return str; + } else { + errx(1, "Not implemented"); + } +} + +public CORD Text__trimmed(CORD str, CORD skip, where_e where) +{ + if (!str || !skip) return str; + const uint8_t *ustr = (const uint8_t*)CORD_to_const_char_star(str); + const uint8_t *uskip = (const uint8_t*)CORD_to_const_char_star(skip); + if (where == WHERE_START) { + size_t span = u8_strspn(ustr, uskip); + return (CORD)ustr + span; + } else if (where == WHERE_END) { + size_t len = u8_strlen(ustr); + const uint8_t *back = ustr + len; + size_t back_span = 0; + while (back - back_span > ustr && u8_strspn(back-back_span-1, uskip) > back_span) + ++back_span; + return CORD_substr((CORD)ustr, 0, len - back_span); + } else { + size_t span = u8_strspn(ustr, uskip); + size_t len = u8_strlen(ustr); + const uint8_t *back = ustr + len; + size_t back_span = 0; + while (back - back_span > ustr + span && u8_strspn(back-back_span-1, uskip) > back_span) + ++back_span; + return CORD_substr((CORD)(ustr + span), 0, len - span - back_span); + } +} + +public find_result_t Text__find(CORD str, CORD pat) +{ + if (!pat) return (find_result_t){.status=FIND_SUCCESS, .index=1}; + size_t pos = CORD_str(str, 0, pat); + return (pos == CORD_NOT_FOUND) ? (find_result_t){.status=FIND_FAILURE} : (find_result_t){.status=FIND_SUCCESS, .index=(int32_t)pos}; +} + +public CORD Text__replace(CORD text, CORD pat, CORD replacement, int64_t limit) +{ + if (!text || !pat) return text; + CORD ret = NULL; + size_t pos = 0, pat_len = CORD_len(pat); + for (size_t found; limit > 0 && (found=CORD_str(text, pos, pat)) != CORD_NOT_FOUND; --limit) { + ret = CORD_cat(ret, CORD_substr(text, pos, found)); + ret = CORD_cat(ret, replacement); + pos = found + pat_len; + } + return CORD_cat(ret, CORD_substr(text, pos, SIZE_MAX)); +} + +public array_t Text__split(CORD str, CORD split) +{ + if (!str) return (array_t){.data=GC_MALLOC(sizeof(CORD)), .atomic=1, .length=1, .stride=sizeof(CORD)}; + array_t strings = {.stride=sizeof(CORD), .atomic=1}; + int64_t capacity = 0; + + const uint8_t *ustr = (uint8_t*)CORD_to_const_char_star(str); + const uint8_t *usplit = (uint8_t*)CORD_to_const_char_star(split); + for (int64_t i = 0; ; ) { + size_t non_split = u8_strcspn(ustr + i, usplit); + CORD chunk = CORD_substr((CORD)ustr, i, non_split); + if (capacity <= 0) + strings.data = GC_REALLOC(strings.data, sizeof(CORD)*(capacity += 10)); + ((CORD*)strings.data)[strings.length++] = chunk; + + i += non_split; + + size_t split = u8_strspn(ustr + i, usplit); + if (split == 0) break; + i += split; + } + return strings; +} + +public CORD Text__join(CORD glue, array_t pieces) +{ + if (pieces.length == 0) return CORD_EMPTY; + + CORD ret = CORD_EMPTY; + for (int64_t i = 0; i < pieces.length; i++) { + if (i > 0) ret = CORD_cat(ret, glue); + ret = CORD_cat(ret, *(CORD*)((void*)pieces.data + i*pieces.stride)); + } + return ret; +} + +public const TypeInfo Text = { + .size=sizeof(CORD), + .align=__alignof__(CORD), + .tag=CustomInfo, + .CustomInfo={ + .as_str=(void*)Text__as_str, + .compare=(void*)Text__compare, + .equal=(void*)Text__equal, + .hash=(void*)Text__hash, + }, +}; + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/builtins/text.h b/builtins/text.h new file mode 100644 index 00000000..a8782d24 --- /dev/null +++ b/builtins/text.h @@ -0,0 +1,35 @@ +#pragma once +#include +#include +#include + +#include "types.h" + +#define Text_t CORD + +typedef enum { WHERE_ANYWHERE, WHERE_START, WHERE_END } where_e; + +typedef struct { + enum { FIND_FAILURE, FIND_SUCCESS } status; + int32_t index; +} find_result_t; + +CORD Text__as_str(const void *str, bool colorize, const TypeInfo *info); +CORD Text__quoted(CORD str, bool colorize); +int Text__compare(CORD *x, CORD *y); +bool Text__equal(CORD *x, CORD *y); +uint32_t Text__hash(CORD *cord); +CORD Text__upper(CORD str); +CORD Text__lower(CORD str); +CORD Text__title(CORD str); +bool Text__has(CORD str, CORD target, where_e where); +CORD Text__without(CORD str, CORD target, where_e where); +CORD Text__trimmed(CORD str, CORD skip, where_e where); +find_result_t Text__find(CORD str, CORD pat); +CORD Text__replace(CORD text, CORD pat, CORD replacement, int64_t limit); +array_t Text__split(CORD str, CORD split); +CORD Text__join(CORD glue, array_t pieces); + +extern const TypeInfo Text; + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/compile.c b/compile.c index 3ef93325..a4df2ba9 100644 --- a/compile.c +++ b/compile.c @@ -5,7 +5,7 @@ #include #include "ast.h" -#include "builtins/string.h" +#include "builtins/text.h" #include "compile.h" #include "enums.h" #include "structs.h" @@ -34,9 +34,9 @@ CORD compile_type(type_t *t) case BoolType: return "Bool_t"; case IntType: return Match(t, IntType)->bits == 64 ? "Int_t" : CORD_asprintf("Int%ld_t", Match(t, IntType)->bits); case NumType: return Match(t, NumType)->bits == 64 ? "Num_t" : CORD_asprintf("Num%ld_t", Match(t, NumType)->bits); - case StringType: { - const char *dsl = Match(t, StringType)->dsl; - return dsl ? CORD_cat(dsl, "_t") : "Str_t"; + case TextType: { + const char *dsl = Match(t, TextType)->dsl; + return dsl ? CORD_cat(dsl, "_t") : "Text_t"; } case ArrayType: return "array_t"; case TableType: return "table_t"; @@ -80,7 +80,7 @@ CORD expr_as_string(env_t *env, CORD expr, type_t *t, CORD color) CORD name = type_to_cord(t); return CORD_asprintf("%r__as_str($stack(%r), %r, &Num%r)", name, expr, color, name); } - case StringType: return CORD_asprintf("Str__as_str($stack(%r), %r, &Str)", expr, color); + case TextType: return CORD_asprintf("Text__as_str($stack(%r), %r, &Text)", expr, color); case ArrayType: return CORD_asprintf("Array__as_str($stack(%r), %r, %r)", expr, color, compile_type_info(env, t)); case TableType: return CORD_asprintf("Table_as_str($stack(%r), %r, %r)", expr, color, compile_type_info(env, t)); case FunctionType: return CORD_asprintf("Func__as_str($stack(%r), %r, %r)", expr, color, compile_type_info(env, t)); @@ -170,7 +170,7 @@ CORD compile(env_t *env, ast_t *ast) ast_t *expr = Match(ast, Length)->value; type_t *t = get_type(env, expr); switch (value_type(t)->tag) { - case StringType: { + case TextType: { CORD str = compile_to_pointer_depth(env, expr, 0, false); return CORD_all("CORD_len(", str, ")"); } @@ -341,7 +341,7 @@ CORD compile(env_t *env, ast_t *ast) } case BINOP_CONCAT: { switch (operand_t->tag) { - case StringType: { + case TextType: { return CORD_all("CORD_cat(", lhs, ", ", rhs, ")"); } case ArrayType: { @@ -408,7 +408,7 @@ CORD compile(env_t *env, ast_t *ast) } case BINOP_XOR: return CORD_asprintf("%r ^= %r;", lhs, rhs); case BINOP_CONCAT: { - if (operand_t->tag == StringType) { + if (operand_t->tag == TextType) { return CORD_asprintf("%r = CORD_cat(%r, %r);", lhs, lhs, rhs); } else if (operand_t->tag == ArrayType) { if (can_promote(rhs_t, Match(lhs_t, ArrayType)->item_type)) { @@ -431,8 +431,8 @@ CORD compile(env_t *env, ast_t *ast) default: code_err(ast, "Update assignments are not implemented for this operation"); } } - case StringLiteral: { - CORD literal = Match(ast, StringLiteral)->cord; + case TextLiteral: { + CORD literal = Match(ast, TextLiteral)->cord; if (literal == CORD_EMPTY) return "(CORD)CORD_EMPTY"; CORD code = "(CORD)\""; @@ -459,20 +459,20 @@ CORD compile(env_t *env, ast_t *ast) } return CORD_cat_char(code, '"'); } - case StringJoin: { - ast_list_t *chunks = Match(ast, StringJoin)->children; + case TextJoin: { + ast_list_t *chunks = Match(ast, TextJoin)->children; if (!chunks) { return "(CORD)CORD_EMPTY"; } else if (!chunks->next) { type_t *t = get_type(env, chunks->ast); - if (t->tag == StringType) + if (t->tag == TextType) return compile(env, chunks->ast); return compile_string(env, chunks->ast, "no"); } else { CORD code = "CORD_all("; for (ast_list_t *chunk = chunks; chunk; chunk = chunk->next) { type_t *chunk_t = get_type(env, chunk->ast); - CORD chunk_str = (chunk_t->tag == StringType) ? + CORD chunk_str = (chunk_t->tag == TextType) ? compile(env, chunk->ast) : compile_string(env, chunk->ast, "no"); code = CORD_cat(code, chunk_str); if (chunk->next) code = CORD_cat(code, ", "); @@ -812,7 +812,7 @@ CORD compile(env_t *env, ast_t *ast) empty = FakeAST( InlineCCode, CORD_asprintf("fail_source(%s, %ld, %ld, \"This collection was empty!\");\n", - Str__quoted(ast->file->filename, false), (long)(reduction->iter->start - reduction->iter->file->text), + Text__quoted(ast->file->filename, false), (long)(reduction->iter->start - reduction->iter->file->text), (long)(reduction->iter->end - reduction->iter->file->text))); } ast_t *i = FakeAST(Var, "$i"); @@ -863,8 +863,8 @@ CORD compile(env_t *env, ast_t *ast) compile(env, test->expr), compile(env, decl->var), compile_type_info(env, get_type(env, decl->value)), - compile(env, WrapAST(test->expr, StringLiteral, .cord=test->output)), - compile(env, WrapAST(test->expr, StringLiteral, .cord=test->expr->file->filename)), + compile(env, WrapAST(test->expr, TextLiteral, .cord=test->output)), + compile(env, WrapAST(test->expr, TextLiteral, .cord=test->expr->file->filename)), (int64_t)(test->expr->start - test->expr->file->text), (int64_t)(test->expr->end - test->expr->file->text)); } else if (test->expr->tag == Assign) { @@ -888,16 +888,16 @@ CORD compile(env_t *env, ast_t *ast) expr_cord = CORD_cat(expr_cord, ")"); CORD_appendf(&code, "$test(%r, %r, %r);", - compile(env, WrapAST(test->expr, StringLiteral, .cord=src)), + compile(env, WrapAST(test->expr, TextLiteral, .cord=src)), expr_cord, - compile(env, WrapAST(test->expr, StringLiteral, .cord=test->output))); + compile(env, WrapAST(test->expr, TextLiteral, .cord=test->output))); return CORD_cat(code, "\n}"); } else if (expr_t->tag == VoidType || expr_t->tag == AbortType) { return CORD_asprintf( "%r;\n" "__doctest(NULL, NULL, NULL, %r, %ld, %ld);", compile(env, test->expr), - compile(env, WrapAST(test->expr, StringLiteral, .cord=test->expr->file->filename)), + compile(env, WrapAST(test->expr, TextLiteral, .cord=test->expr->file->filename)), (int64_t)(test->expr->start - test->expr->file->text), (int64_t)(test->expr->end - test->expr->file->text)); } else { @@ -908,8 +908,8 @@ CORD compile(env_t *env, ast_t *ast) compile_type(expr_t), compile(env, test->expr), compile_type_info(env, expr_t), - compile(env, WrapAST(test->expr, StringLiteral, .cord=test->output)), - compile(env, WrapAST(test->expr, StringLiteral, .cord=test->expr->file->filename)), + compile(env, WrapAST(test->expr, TextLiteral, .cord=test->output)), + compile(env, WrapAST(test->expr, TextLiteral, .cord=test->expr->file->filename)), (int64_t)(test->expr->start - test->expr->file->text), (int64_t)(test->expr->end - test->expr->file->text)); } @@ -1007,7 +1007,7 @@ CORD compile(env_t *env, ast_t *ast) return CORD_all("$Array_get_unchecked", compile_type(item_type), ", ", arr, ", ", index, ")"); else return CORD_all("$Array_get(", compile_type(item_type), ", ", arr, ", ", index, ", ", - Str__quoted(f->filename, false), ", ", CORD_asprintf("%ld", (int64_t)(indexing->index->start - f->text)), ", ", + Text__quoted(f->filename, false), ", ", CORD_asprintf("%ld", (int64_t)(indexing->index->start - f->text)), ", ", CORD_asprintf("%ld", (int64_t)(indexing->index->end - f->text)), ")"); } @@ -1021,7 +1021,7 @@ CORD compile(env_t *env, ast_t *ast) file_t *f = indexing->index->file; return CORD_all("$Table_get(", table, ", ", compile_type(key_t), ", ", compile_type(value_t), ", ", key, ", ", compile_type_info(env, container_t), ", ", - Str__quoted(f->filename, false), ", ", CORD_asprintf("%ld", (int64_t)(indexing->index->start - f->text)), ", ", + Text__quoted(f->filename, false), ", ", CORD_asprintf("%ld", (int64_t)(indexing->index->start - f->text)), ", ", CORD_asprintf("%ld", (int64_t)(indexing->index->end - f->text)), ")"); } @@ -1046,7 +1046,7 @@ CORD compile_type_info(env_t *env, type_t *t) { switch (t->tag) { case BoolType: case IntType: case NumType: return CORD_asprintf("&%r", type_to_cord(t)); - case StringType: return CORD_all("(&", Match(t, StringType)->dsl ? Match(t, StringType)->dsl : "Str", ")"); + case TextType: return CORD_all("(&", Match(t, TextType)->dsl ? Match(t, TextType)->dsl : "Text", ")"); case StructType: return CORD_all("(&", Match(t, StructType)->name, ")"); case EnumType: return CORD_all("(&", Match(t, EnumType)->name, ")"); case ArrayType: { @@ -1062,10 +1062,10 @@ CORD compile_type_info(env_t *env, type_t *t) auto ptr = Match(t, PointerType); CORD sigil = ptr->is_stack ? "&" : (ptr->is_optional ? "?" : "@"); if (ptr->is_readonly) sigil = CORD_cat(sigil, "(readonly)"); - return CORD_asprintf("$PointerInfo(%r, %r)", Str__quoted(sigil, false), compile_type_info(env, ptr->pointed)); + return CORD_asprintf("$PointerInfo(%r, %r)", Text__quoted(sigil, false), compile_type_info(env, ptr->pointed)); } case FunctionType: { - return CORD_asprintf("$FunctionInfo(%r)", Str__quoted(type_to_cord(t), false)); + return CORD_asprintf("$FunctionInfo(%r)", Text__quoted(type_to_cord(t), false)); } case ClosureType: { errx(1, "No typeinfo for closures yet"); @@ -1100,7 +1100,7 @@ module_code_t compile_file(ast_t *ast) return (module_code_t){ .module_name=module_name, .header=CORD_all( - // CORD_asprintf("#line 0 %r\n", Str__quoted(ast->file->filename, false)), + // CORD_asprintf("#line 0 %r\n", Text__quoted(ast->file->filename, false)), env->code->imports, "\n", env->code->typedefs, "\n", env->code->typecode, "\n", @@ -1108,7 +1108,7 @@ module_code_t compile_file(ast_t *ast) "void use$", module_name, "(void);\n" ), .c_file=CORD_all( - // CORD_asprintf("#line 0 %r\n", Str__quoted(ast->file->filename, false)), + // CORD_asprintf("#line 0 %r\n", Text__quoted(ast->file->filename, false)), env->code->staticdefs, "\n", env->code->funcs, "\n", env->code->typeinfos, "\n", diff --git a/docs/metamethods.md b/docs/metamethods.md index c3231883..1a1eb0e2 100644 --- a/docs/metamethods.md +++ b/docs/metamethods.md @@ -3,7 +3,7 @@ This language relies on a small set of "metamethods" which define special behavior that is required for all types: -- `as_str(obj:&(optional)T, colorize=no)->Str`: a method to convert the type to a +- `as_text(obj:&(optional)T, colorize=no)->Text`: a method to convert the type to a string. If `colorize` is `yes`, then the method should include ANSI escape codes for syntax highlighting. If the `obj` pointer is `NULL`, a string representation of the type will be returned instead. @@ -32,12 +32,12 @@ _every_ type had its own set of metamethods. To reduce the amount of generated code, we use generic metamethods, which are general-purpose functions that take an automatically compiled format string and variable number of arguments that describe how to run a metamethod for that type. As a simple example, if `foo` -is an array of type `Foo`, which has a defined `as_str()` method, then -rather than define a separate `Foo_Array_as_str()` function that would be -99% identical to a `Baz_Array_as_str()` function, we instead insert a call -to `as_str(&foo, colorize, "[_]", Foo__as_str)` to convert a `[Foo]` -array to a string, and you call `as_str(&baz, colorize, "[_]", -Baz__as_str)` to convert a `[Baz]` array to a string. The generic metamethod +is an array of type `Foo`, which has a defined `as_text()` method, then +rather than define a separate `Foo_Array_as_text()` function that would be +99% identical to a `Baz_Array_as_text()` function, we instead insert a call +to `as_text(&foo, colorize, "[_]", Foo__as_text)` to convert a `[Foo]` +array to a string, and you call `as_text(&baz, colorize, "[_]", +Baz__as_text)` to convert a `[Baz]` array to a string. The generic metamethod handles all the reusable logic like "an array's string form starts with a '[', then iterates over the items, getting the item's string form (whatever that is) and putting commas between them". diff --git a/docs/strings.md b/docs/strings.md index a981be2a..d2e886bf 100644 --- a/docs/strings.md +++ b/docs/strings.md @@ -276,7 +276,7 @@ To compare normalized forms of strings, use: ### Patterns - `string.has($/pattern/, at=Anywhere:enum(Anywhere, Start, End))` Check whether a pattern can be found -- `string.next($/pattern/)` Returns an `enum(NotFound, Found(match:Str, rest:Str))` +- `string.next($/pattern/)` Returns an `enum(NotFound, Found(match:Text, rest:Text))` - `string.matches($/pattern/)` Returns a list of matching strings - `string.replace($/pattern/, "replacement")` Returns a copy of the string with replacements - `string.without($/pattern/, at=Anywhere:enum(Anywhere, Start, End))` diff --git a/enums.c b/enums.c index c356fbd1..46f13d4d 100644 --- a/enums.c +++ b/enums.c @@ -5,7 +5,7 @@ #include #include "ast.h" -#include "builtins/string.h" +#include "builtins/text.h" #include "compile.h" #include "structs.h" #include "environment.h" diff --git a/environment.c b/environment.c index f5b9c625..5dd92beb 100644 --- a/environment.c +++ b/environment.c @@ -4,7 +4,7 @@ #include "environment.h" #include "builtins/table.h" -#include "builtins/string.h" +#include "builtins/text.h" #include "typecheck.h" #include "util.h" @@ -37,8 +37,8 @@ env_t *new_compilation_unit(void) const char *name; binding_t binding; } global_vars[] = { - {"say", {.code="say", .type=Type(FunctionType, .args=new(arg_t, .name="text", .type=Type(StringType)), .ret=Type(VoidType))}}, - {"fail", {.code="fail", .type=Type(FunctionType, .args=new(arg_t, .name="message", .type=Type(StringType)), .ret=Type(AbortType))}}, + {"say", {.code="say", .type=Type(FunctionType, .args=new(arg_t, .name="text", .type=Type(TextType)), .ret=Type(VoidType))}}, + {"fail", {.code="fail", .type=Type(FunctionType, .args=new(arg_t, .name="message", .type=Type(TextType)), .ret=Type(AbortType))}}, {"USE_COLOR", {.code="USE_COLOR", .type=Type(BoolType)}}, }; @@ -61,9 +61,9 @@ env_t *new_compilation_unit(void) } global_types[] = { {"Bool", Type(BoolType), "Bool_t", "Bool", {}}, {"Int", Type(IntType, .bits=64), "Int_t", "Int", $TypedArray(ns_entry_t, - {"format", "Int__format", "func(i:Int, digits=0)->Str"}, - {"hex", "Int__hex", "func(i:Int, digits=0, uppercase=yes, prefix=yes)->Str"}, - {"octal", "Int__octal", "func(i:Int, digits=0, prefix=yes)->Str"}, + {"format", "Int__format", "func(i:Int, digits=0)->Text"}, + {"hex", "Int__hex", "func(i:Int, digits=0, uppercase=yes, prefix=yes)->Text"}, + {"octal", "Int__octal", "func(i:Int, digits=0, prefix=yes)->Text"}, {"random", "Int__random", "func(min=0, max=0xffffffff)->Int"}, {"bits", "Int__bits", "func(x:Int)->[Bool]"}, {"abs", "Int__abs", "func(i:Int)->Int"}, @@ -71,9 +71,9 @@ env_t *new_compilation_unit(void) {"max", "Int__max", "Int"}, )}, {"Int32", Type(IntType, .bits=32), "Int32_t", "Int32", $TypedArray(ns_entry_t, - {"format", "Int32__format", "func(i:Int32, digits=0)->Str"}, - {"hex", "Int32__hex", "func(i:Int32, digits=0, uppercase=yes, prefix=yes)->Str"}, - {"octal", "Int32__octal", "func(i:Int32, digits=0, prefix=yes)->Str"}, + {"format", "Int32__format", "func(i:Int32, digits=0)->Text"}, + {"hex", "Int32__hex", "func(i:Int32, digits=0, uppercase=yes, prefix=yes)->Text"}, + {"octal", "Int32__octal", "func(i:Int32, digits=0, prefix=yes)->Text"}, {"random", "Int32__random", "func(min=0, max=0xffffffff)->Int32"}, {"bits", "Int32__bits", "func(x:Int32)->[Bool]"}, {"abs", "Int32__abs", "func(i:Int32)->Int32"}, @@ -81,9 +81,9 @@ env_t *new_compilation_unit(void) {"max", "Int32__max", "Int32"}, )}, {"Int16", Type(IntType, .bits=16), "Int16_t", "Int16", $TypedArray(ns_entry_t, - {"format", "Int16__format", "func(i:Int16, digits=0)->Str"}, - {"hex", "Int16__hex", "func(i:Int16, digits=0, uppercase=yes, prefix=yes)->Str"}, - {"octal", "Int16__octal", "func(i:Int16, digits=0, prefix=yes)->Str"}, + {"format", "Int16__format", "func(i:Int16, digits=0)->Text"}, + {"hex", "Int16__hex", "func(i:Int16, digits=0, uppercase=yes, prefix=yes)->Text"}, + {"octal", "Int16__octal", "func(i:Int16, digits=0, prefix=yes)->Text"}, {"random", "Int16__random", "func(min=0, max=0xffffffff)->Int16"}, {"bits", "Int16__bits", "func(x:Int16)->[Bool]"}, {"abs", "Int16__abs", "func(i:Int16)->Int16"}, @@ -91,9 +91,9 @@ env_t *new_compilation_unit(void) {"max", "Int16__max", "Int16"}, )}, {"Int8", Type(IntType, .bits=8), "Int8_t", "Int8", $TypedArray(ns_entry_t, - {"format", "Int8__format", "func(i:Int8, digits=0)->Str"}, - {"hex", "Int8__hex", "func(i:Int8, digits=0, uppercase=yes, prefix=yes)->Str"}, - {"octal", "Int8__octal", "func(i:Int8, digits=0, prefix=yes)->Str"}, + {"format", "Int8__format", "func(i:Int8, digits=0)->Text"}, + {"hex", "Int8__hex", "func(i:Int8, digits=0, uppercase=yes, prefix=yes)->Text"}, + {"octal", "Int8__octal", "func(i:Int8, digits=0, prefix=yes)->Text"}, {"random", "Int8__random", "func(min=0, max=0xffffffff)->Int8"}, {"bits", "Int8__bits", "func(x:Int8)->[Bool]"}, {"abs", "Int8__abs", "func(i:Int8)->Int8"}, @@ -105,8 +105,8 @@ env_t *new_compilation_unit(void) #define F2(name) {#name, "Num__"#name, "func(x:Num, y:Num)->Num"} {"Num", Type(NumType, .bits=64), "Num_t", "Num", $TypedArray(ns_entry_t, {"near", "Num__near", "func(x:Num, y:Num, ratio=1e-9, min_epsilon=1e-9)->Bool"}, - {"format", "Num__format", "func(n:Num, precision=0)->Str"}, - {"scientific", "Num__scientific", "func(n:Num, precision=0)->Str"}, + {"format", "Num__format", "func(n:Num, precision=0)->Text"}, + {"scientific", "Num__scientific", "func(n:Num, precision=0)->Text"}, {"nan", "Num__nan", "func(tag=\"\")->Num"}, {"isinf", "Num__isinf", "func(n:Num)->Bool"}, {"isfinite", "Num__isfinite", "func(n:Num)->Bool"}, @@ -128,8 +128,8 @@ env_t *new_compilation_unit(void) #define F2(name) {#name, "Num32__"#name, "func(x:Num32, y:Num32)->Num32"} {"Num32", Type(NumType, .bits=32), "Num32_t", "Num32", $TypedArray(ns_entry_t, {"near", "Num32__near", "func(x:Num32, y:Num32, ratio=1e-9f32, min_epsilon=1e-9f32)->Bool"}, - {"format", "Num32__format", "func(n:Num32, precision=0)->Str"}, - {"scientific", "Num32__scientific", "func(n:Num32, precision=0)->Str"}, + {"format", "Num32__format", "func(n:Num32, precision=0)->Text"}, + {"scientific", "Num32__scientific", "func(n:Num32, precision=0)->Text"}, {"nan", "Num32__nan", "func(tag=\"\")->Num32"}, {"isinf", "Num32__isinf", "func(n:Num32)->Bool"}, {"isfinite", "Num32__isfinite", "func(n:Num32)->Bool"}, @@ -146,19 +146,19 @@ env_t *new_compilation_unit(void) #undef F2 #undef F #undef C - {"Str", Type(StringType), "Str_t", "Str", $TypedArray(ns_entry_t, - {"quoted", "Str__quoted", "func(s:Str, color=no)->Str"}, - {"upper", "Str__upper", "func(s:Str)->Str"}, - {"lower", "Str__lower", "func(s:Str)->Str"}, - {"title", "Str__title", "func(s:Str)->Str"}, - // {"has", "Str__has", "func(s:Str, target:Str, where=ANYWHERE)->Bool"}, - // {"without", "Str__without", "func(s:Str, target:Str, where=ANYWHERE)->Str"}, - // {"trimmed", "Str__without", "func(s:Str, skip:Str, where=ANYWHERE)->Str"}, - {"title", "Str__title", "func(s:Str)->Str"}, - // {"find", "Str__find", "func(s:Str, pattern:Str)->FindResult"}, - {"replace", "Str__replace", "func(s:Str, pattern:Str, replacement:Str, limit=Int.max)->Str"}, - {"split", "Str__split", "func(s:Str, split:Str)->[Str]"}, - {"join", "Str__join", "func(glue:Str, pieces:[Str])->Str"}, + {"Text", Type(TextType), "Text_t", "Text", $TypedArray(ns_entry_t, + {"quoted", "Text__quoted", "func(s:Text, color=no)->Text"}, + {"upper", "Text__upper", "func(s:Text)->Text"}, + {"lower", "Text__lower", "func(s:Text)->Text"}, + {"title", "Text__title", "func(s:Text)->Text"}, + // {"has", "Text__has", "func(s:Text, target:Text, where=ANYWHERE)->Bool"}, + // {"without", "Text__without", "func(s:Text, target:Text, where=ANYWHERE)->Text"}, + // {"trimmed", "Text__without", "func(s:Text, skip:Text, where=ANYWHERE)->Text"}, + {"title", "Text__title", "func(s:Text)->Text"}, + // {"find", "Text__find", "func(s:Text, pattern:Text)->FindResult"}, + {"replace", "Text__replace", "func(s:Text, pattern:Text, replacement:Text, limit=Int.max)->Text"}, + {"split", "Text__split", "func(s:Text, split:Text)->[Text]"}, + {"join", "Text__join", "func(glue:Text, pieces:[Text])->Text"}, )}, }; @@ -208,7 +208,7 @@ binding_t *get_namespace_binding(env_t *env, ast_t *self, const char *name) case TableType: { errx(1, "Table methods not implemented"); } - case BoolType: case IntType: case NumType: case StringType: { + case BoolType: case IntType: case NumType: case TextType: { table_t *ns = Table_str_get(env->type_namespaces, CORD_to_const_char_star(type_to_cord(cls_type))); if (!ns) { code_err(self, "No namespace found for this type!"); diff --git a/parse.c b/parse.c index 44e3d33e..0b1ebd6d 100644 --- a/parse.c +++ b/parse.c @@ -953,7 +953,7 @@ PARSER(parse_string) { cord = CORD_cat(cord, c); // cord = CORD_cat_char(cord, c); } while (*pos == '\\'); - return NewAST(ctx->file, start, pos, StringLiteral, .cord=cord); + return NewAST(ctx->file, start, pos, TextLiteral, .cord=cord); } char open_quote, close_quote, open_interp = '\x03', close_interp = '\x02'; @@ -997,7 +997,7 @@ PARSER(parse_string) { if (*pos == open_interp) { // Interpolation const char *interp_start = pos; if (chunk) { - ast_t *literal = NewAST(ctx->file, chunk_start, pos, StringLiteral, .cord=chunk); + ast_t *literal = NewAST(ctx->file, chunk_start, pos, TextLiteral, .cord=chunk); chunks = new(ast_list_t, .ast=literal, .next=chunks); chunk = NULL; } @@ -1051,14 +1051,14 @@ PARSER(parse_string) { } if (chunk) { - ast_t *literal = NewAST(ctx->file, chunk_start, pos, StringLiteral, .cord=chunk); + ast_t *literal = NewAST(ctx->file, chunk_start, pos, TextLiteral, .cord=chunk); chunks = new(ast_list_t, .ast=literal, .next=chunks); chunk = NULL; } REVERSE_LIST(chunks); expect_closing(ctx, &pos, (char[]){close_quote, 0}, "I was expecting a '%c' to finish this string", close_quote); - return NewAST(ctx->file, start, pos, StringJoin, .children=chunks); + return NewAST(ctx->file, start, pos, TextJoin, .children=chunks); } PARSER(parse_skip) { diff --git a/structs.c b/structs.c index d13e6db7..cabd83d2 100644 --- a/structs.c +++ b/structs.c @@ -5,7 +5,7 @@ #include #include "ast.h" -#include "builtins/string.h" +#include "builtins/text.h" #include "compile.h" #include "environment.h" #include "typecheck.h" @@ -69,7 +69,7 @@ static CORD compile_compare_method(env_t *env, ast_t *ast) case BoolType: case IntType: case NumType: case PointerType: case FunctionType: cmp_func = CORD_all(cmp_func, "diff = (x->", field->name, " > y->", field->name, ") - (x->", field->name, " < y->", field->name, ");"); break; - case StringType: + case TextType: cmp_func = CORD_all(cmp_func, "diff = CORD_cmp(x->", field->name, ", y->", field->name, ");"); break; default: @@ -95,7 +95,7 @@ static CORD compile_equals_method(env_t *env, ast_t *ast) case BoolType: case IntType: case NumType: case PointerType: case FunctionType: eq_func = CORD_all(eq_func, "if (x->", field->name, " != y->", field->name, ") return no;\n"); break; - case StringType: + case TextType: eq_func = CORD_all(eq_func, "if (CORD_cmp(x->", field->name, ", y->", field->name, ") != 0) return no;\n"); break; default: diff --git a/test/structs.tm b/test/structs.tm index a9bb545f..c86f9102 100644 --- a/test/structs.tm +++ b/test/structs.tm @@ -1,6 +1,6 @@ struct Pair(x,y:Int) -struct Mixed(x:Int, str:Str) +struct Mixed(x:Int, text:Text) func test_literals() >> x := Pair(10, 20) diff --git a/test/tables.tm b/test/tables.tm index 7c614466..53b720c9 100644 --- a/test/tables.tm +++ b/test/tables.tm @@ -20,7 +20,7 @@ for k,v in t >> t.default = ?(readonly)999 >> t.fallback -= !{Str=>Int} += !{Text=>Int} >> t.keys = ["one", "two"] diff --git a/tomo.c b/tomo.c index 13ac7b7f..96c7d436 100644 --- a/tomo.c +++ b/tomo.c @@ -5,7 +5,7 @@ #include #include "ast.h" -#include "builtins/string.h" +#include "builtins/text.h" #include "compile.h" #include "parse.h" #include "typecheck.h" diff --git a/tomo.h b/tomo.h index 113e5820..a3bde24a 100644 --- a/tomo.h +++ b/tomo.h @@ -22,8 +22,8 @@ #include "builtins/memory.h" #include "builtins/nums.h" #include "builtins/pointer.h" -#include "builtins/string.h" #include "builtins/table.h" +#include "builtins/text.h" #include "builtins/types.h" #define Void_t void diff --git a/typecheck.c b/typecheck.c index aba9ec2b..75417e63 100644 --- a/typecheck.c +++ b/typecheck.c @@ -257,8 +257,8 @@ type_t *get_type(env_t *env, ast_t *ast) code_err(ast, "'&' stack references can only be used on variables or fields of variables"); } - case StringJoin: case StringLiteral: { - return Type(StringType); + case TextJoin: case TextLiteral: { + return Type(TextType); } case Var: { auto var = Match(ast, Var); @@ -510,10 +510,10 @@ type_t *get_type(env_t *env, ast_t *ast) if (!type_eq(lhs_t, rhs_t)) code_err(ast, "The type on the left side of this concatenation doesn't match the right side: %T vs. %T", lhs_t, rhs_t); - if (lhs_t->tag == ArrayType || lhs_t->tag == StringType) + if (lhs_t->tag == ArrayType || lhs_t->tag == TextType) return lhs_t; - code_err(ast, "Only array/string value types support concatenation, not %T", lhs_t); + code_err(ast, "Only array/text value types support concatenation, not %T", lhs_t); } case BINOP_EQ: case BINOP_NE: case BINOP_LT: case BINOP_LE: case BINOP_GT: case BINOP_GE: { if (!can_promote(lhs_t, rhs_t) && !can_promote(rhs_t, lhs_t)) diff --git a/types.c b/types.c index 59896b28..30f648b5 100644 --- a/types.c +++ b/types.c @@ -16,7 +16,7 @@ CORD type_to_cord(type_t *t) { case VoidType: return "Void"; case MemoryType: return "Memory"; case BoolType: return "Bool"; - case StringType: return "Str"; + case TextType: return "Text"; case IntType: return Match(t, IntType)->bits == 64 ? "Int" : CORD_asprintf("Int%ld", Match(t, IntType)->bits); case NumType: return Match(t, NumType)->bits == 64 ? "Num" : CORD_asprintf("Num%ld", Match(t, NumType)->bits); case ArrayType: { @@ -428,7 +428,7 @@ size_t type_size(type_t *t) case BoolType: return sizeof(bool); case IntType: return Match(t, IntType)->bits/8; case NumType: return Match(t, NumType)->bits/8; - case StringType: return sizeof(CORD); + case TextType: return sizeof(CORD); case ArrayType: return sizeof(array_t); case TableType: return sizeof(table_t); case FunctionType: return sizeof(void*); @@ -477,7 +477,7 @@ size_t type_align(type_t *t) case BoolType: return __alignof__(bool); case IntType: return Match(t, IntType)->bits/8; case NumType: return Match(t, NumType)->bits/8; - case StringType: return __alignof__(CORD); + case TextType: return __alignof__(CORD); case ArrayType: return __alignof__(array_t); case TableType: return __alignof__(table_t); case FunctionType: return __alignof__(void*); diff --git a/types.h b/types.h index 073bbe3b..1a0e8edb 100644 --- a/types.h +++ b/types.h @@ -37,7 +37,7 @@ struct type_s { BoolType, IntType, NumType, - StringType, + TextType, ArrayType, TableType, FunctionType, @@ -59,7 +59,7 @@ struct type_s { } NumType; struct { const char *dsl; - } StringType; + } TextType; struct { type_t *item_type; } ArrayType; -- cgit v1.2.3