diff --git a/builtins/integers.h b/builtins/integers.h index 6e2a1fe..9f342f2 100644 --- a/builtins/integers.h +++ b/builtins/integers.h @@ -106,6 +106,7 @@ Int_t Int$sqrt(Int_t i); #define I(i) ((int64_t)(i) == (int32_t)(i) ? ((Int_t){.small=((uint64_t)(i)<<2)|1}) : Int64_to_Int(i)) #define I_small(i) ((Int_t){.small=((uint64_t)(i)<<2)|1}) +#define I_is_zero(i) ((i).small == 1) Int_t Int$slow_plus(Int_t x, Int_t y); Int_t Int$slow_minus(Int_t x, Int_t y); diff --git a/builtins/text.c b/builtins/text.c index 32e9609..cff3a7a 100644 --- a/builtins/text.c +++ b/builtins/text.c @@ -237,8 +237,8 @@ public Text_t Text$_concat(int n, Text_t items[n]) public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int) { - int64_t first = Int_to_Int64(first_int, false)-1; - int64_t last = Int_to_Int64(last_int, false)-1; + int64_t first = Int_to_Int64(first_int, false); + int64_t last = Int_to_Int64(last_int, false); if (first == 0) errx(1, "Invalid index: 0"); if (last == 0) return (Text_t){.length=0}; @@ -1260,17 +1260,17 @@ int64_t match(Text_t text, Text_t pattern, int64_t text_index, int64_t pattern_i pattern_index = old_pat_index; int32_t pat_grapheme = _next_grapheme(pattern, &pattern_state, pattern_index); - if (pattern_index == 0 && text_index > 0) { - int32_t pat_codepoint = pat_grapheme; - if (pat_codepoint < 0) - pat_codepoint = synthetic_graphemes[-pat_codepoint-1].codepoints[0]; + // if (pattern_index == 0 && text_index > 0) { + // int32_t pat_codepoint = pat_grapheme; + // if (pat_codepoint < 0) + // pat_codepoint = synthetic_graphemes[-pat_codepoint-1].codepoints[0]; - int32_t prev_codepoint = _next_grapheme(text, &text_state, text_index - 1); - if (prev_codepoint < 0) - prev_codepoint = synthetic_graphemes[-prev_codepoint-1].codepoints[0]; - if (uc_is_property_alphabetic(pat_codepoint) && uc_is_property_alphabetic(prev_codepoint)) - return -1; - } + // int32_t prev_codepoint = _next_grapheme(text, &text_state, text_index - 1); + // if (prev_codepoint < 0) + // prev_codepoint = synthetic_graphemes[-prev_codepoint-1].codepoints[0]; + // if (uc_is_property_alphabetic(pat_codepoint) && uc_is_property_alphabetic(prev_codepoint)) + // return -1; + // } int32_t text_grapheme = _next_grapheme(text, &text_state, text_index); if (pat_grapheme != text_grapheme) @@ -1279,17 +1279,17 @@ int64_t match(Text_t text, Text_t pattern, int64_t text_index, int64_t pattern_i pattern_index += 1; text_index += 1; - if (pattern_index == pattern.length && text_index < text.length) { - int32_t pat_codepoint = pat_grapheme; - if (pat_codepoint < 0) - pat_codepoint = synthetic_graphemes[-pat_codepoint-1].codepoints[0]; + // if (pattern_index == pattern.length && text_index < text.length) { + // int32_t pat_codepoint = pat_grapheme; + // if (pat_codepoint < 0) + // pat_codepoint = synthetic_graphemes[-pat_codepoint-1].codepoints[0]; - int32_t next_codepoint = _next_grapheme(text, &text_state, text_index); - if (next_codepoint < 0) - next_codepoint = synthetic_graphemes[-next_codepoint-1].codepoints[0]; - if (uc_is_property_alphabetic(pat_codepoint) && uc_is_property_alphabetic(next_codepoint)) - return -1; - } + // int32_t next_codepoint = _next_grapheme(text, &text_state, text_index); + // if (next_codepoint < 0) + // next_codepoint = synthetic_graphemes[-next_codepoint-1].codepoints[0]; + // if (uc_is_property_alphabetic(pat_codepoint) && uc_is_property_alphabetic(next_codepoint)) + // return -1; + // } } } if (text_index >= text.length && pattern_index < pattern.length) @@ -1346,8 +1346,11 @@ public int printf_text(FILE *stream, const struct printf_info *info, const void public Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info) { (void)info; - if (!text) return Text$from_str("Text"); - return Text$quoted(*(Text_t*)text, colorize); + if (!text) return info && info->TextInfo.lang ? Text$from_str(info->TextInfo.lang) : Text$from_str("Text"); + Text_t as_text = Text$quoted(*(Text_t*)text, colorize); + if (info && info->TextInfo.lang && info != &$Text) + as_text = Text$concat(Text$from_str(colorize ? "\x1b[1m$" : "$"), Text$from_str(info->TextInfo.lang), as_text); + return as_text; } public Text_t Text$quoted(Text_t text, bool colorize) @@ -1407,11 +1410,11 @@ public Text_t Text$replace(Text_t text, Text_t pattern, Text_t replacement) { Text_t ret = {.length=0}; - Int_t i = I_small(0); + Int_t i = I_small(1); for (;;) { int64_t len; Int_t found = Text$find(text, pattern, i, &len); - if (found.small == I_small(0).small) break; + if (I_is_zero(found)) break; if (Int$compare(&found, &i, &$Text) > 0) { ret = Text$concat( ret, @@ -1421,6 +1424,7 @@ public Text_t Text$replace(Text_t text, Text_t pattern, Text_t replacement) } else { ret = concat2(ret, replacement); } + i = Int$plus(found, Int64_to_Int(len)); } if (Int_to_Int64(i, false) <= text.length) { ret = concat2(ret, Text$slice(text, i, Int64_to_Int(text.length))); diff --git a/compile.c b/compile.c index 10a345c..8296306 100644 --- a/compile.c +++ b/compile.c @@ -3256,7 +3256,7 @@ CORD compile_statement_typedefs(env_t *env, ast_t *ast) } case LangDef: { auto def = Match(ast, LangDef); - return CORD_all("typedef CORD ", namespace_prefix(env->libname, env->namespace), def->name, "_t;\n"); + return CORD_all("typedef Text_t ", namespace_prefix(env->libname, env->namespace), def->name, "_t;\n"); } case Lambda: { auto lambda = Match(ast, Lambda); diff --git a/environment.c b/environment.c index b650004..01c3094 100644 --- a/environment.c +++ b/environment.c @@ -262,7 +262,7 @@ env_t *new_compilation_unit(CORD *libname) {"num_codepoints", "Text$num_codepoints", "func(text:Text)->Int"}, {"quoted", "Text$quoted", "func(text:Text, color=no)->Text"}, {"read_line", "Text$read_line", "func(prompt='')->Text"}, - {"replace", "Text$replace", "func(text:Text, pattern:Text, replacement:Text, limit=-1)->Text"}, + {"replace", "Text$replace", "func(text:Text, pattern:Text, replacement:Text)->Text"}, {"split", "Text$split", "func(text:Text, split:Text)->[Text]"}, {"title", "Text$title", "func(text:Text)->Text"}, {"trimmed", "Text$trimmed", "func(text:Text, trim=\" {\\n\\r\\t}\", where=Where.Anywhere)->Text"}, diff --git a/typecheck.c b/typecheck.c index 4d4c080..27e20b0 100644 --- a/typecheck.c +++ b/typecheck.c @@ -1377,7 +1377,8 @@ bool is_constant(env_t *env, ast_t *ast) } case TextJoin: { auto text = Match(ast, TextJoin); - return !text->children || !text->children->next; + // TODO: support short literal strings + return !text->children; } case Not: return is_constant(env, Match(ast, Not)->value); case Negative: return is_constant(env, Match(ast, Negative)->value);