aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2024-09-02 19:49:52 -0400
committerBruce Hill <bruce@bruce-hill.com>2024-09-02 19:49:52 -0400
commit6d7a359f8f7757b1e23fa7c0feaf535541dcc84a (patch)
tree4dfc03e0c2b05306195c3fd645e81796714f30e5
parent80a09e6dba7042271cba5372e31c2e5e86e58215 (diff)
Bugfix some text replacement things
-rw-r--r--builtins/integers.h1
-rw-r--r--builtins/text.c58
-rw-r--r--compile.c2
-rw-r--r--environment.c2
-rw-r--r--typecheck.c3
5 files changed, 36 insertions, 30 deletions
diff --git a/builtins/integers.h b/builtins/integers.h
index 6e2a1fe6..9f342f2f 100644
--- a/builtins/integers.h
+++ b/builtins/integers.h
@@ -106,6 +106,7 @@ Int_t Int$sqrt(Int_t i);
#define I(i) ((int64_t)(i) == (int32_t)(i) ? ((Int_t){.small=((uint64_t)(i)<<2)|1}) : Int64_to_Int(i))
#define I_small(i) ((Int_t){.small=((uint64_t)(i)<<2)|1})
+#define I_is_zero(i) ((i).small == 1)
Int_t Int$slow_plus(Int_t x, Int_t y);
Int_t Int$slow_minus(Int_t x, Int_t y);
diff --git a/builtins/text.c b/builtins/text.c
index 32e9609b..cff3a7a8 100644
--- a/builtins/text.c
+++ b/builtins/text.c
@@ -237,8 +237,8 @@ public Text_t Text$_concat(int n, Text_t items[n])
public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int)
{
- int64_t first = Int_to_Int64(first_int, false)-1;
- int64_t last = Int_to_Int64(last_int, false)-1;
+ int64_t first = Int_to_Int64(first_int, false);
+ int64_t last = Int_to_Int64(last_int, false);
if (first == 0) errx(1, "Invalid index: 0");
if (last == 0) return (Text_t){.length=0};
@@ -1260,17 +1260,17 @@ int64_t match(Text_t text, Text_t pattern, int64_t text_index, int64_t pattern_i
pattern_index = old_pat_index;
int32_t pat_grapheme = _next_grapheme(pattern, &pattern_state, pattern_index);
- if (pattern_index == 0 && text_index > 0) {
- int32_t pat_codepoint = pat_grapheme;
- if (pat_codepoint < 0)
- pat_codepoint = synthetic_graphemes[-pat_codepoint-1].codepoints[0];
+ // if (pattern_index == 0 && text_index > 0) {
+ // int32_t pat_codepoint = pat_grapheme;
+ // if (pat_codepoint < 0)
+ // pat_codepoint = synthetic_graphemes[-pat_codepoint-1].codepoints[0];
- int32_t prev_codepoint = _next_grapheme(text, &text_state, text_index - 1);
- if (prev_codepoint < 0)
- prev_codepoint = synthetic_graphemes[-prev_codepoint-1].codepoints[0];
- if (uc_is_property_alphabetic(pat_codepoint) && uc_is_property_alphabetic(prev_codepoint))
- return -1;
- }
+ // int32_t prev_codepoint = _next_grapheme(text, &text_state, text_index - 1);
+ // if (prev_codepoint < 0)
+ // prev_codepoint = synthetic_graphemes[-prev_codepoint-1].codepoints[0];
+ // if (uc_is_property_alphabetic(pat_codepoint) && uc_is_property_alphabetic(prev_codepoint))
+ // return -1;
+ // }
int32_t text_grapheme = _next_grapheme(text, &text_state, text_index);
if (pat_grapheme != text_grapheme)
@@ -1279,17 +1279,17 @@ int64_t match(Text_t text, Text_t pattern, int64_t text_index, int64_t pattern_i
pattern_index += 1;
text_index += 1;
- if (pattern_index == pattern.length && text_index < text.length) {
- int32_t pat_codepoint = pat_grapheme;
- if (pat_codepoint < 0)
- pat_codepoint = synthetic_graphemes[-pat_codepoint-1].codepoints[0];
-
- int32_t next_codepoint = _next_grapheme(text, &text_state, text_index);
- if (next_codepoint < 0)
- next_codepoint = synthetic_graphemes[-next_codepoint-1].codepoints[0];
- if (uc_is_property_alphabetic(pat_codepoint) && uc_is_property_alphabetic(next_codepoint))
- return -1;
- }
+ // if (pattern_index == pattern.length && text_index < text.length) {
+ // int32_t pat_codepoint = pat_grapheme;
+ // if (pat_codepoint < 0)
+ // pat_codepoint = synthetic_graphemes[-pat_codepoint-1].codepoints[0];
+
+ // int32_t next_codepoint = _next_grapheme(text, &text_state, text_index);
+ // if (next_codepoint < 0)
+ // next_codepoint = synthetic_graphemes[-next_codepoint-1].codepoints[0];
+ // if (uc_is_property_alphabetic(pat_codepoint) && uc_is_property_alphabetic(next_codepoint))
+ // return -1;
+ // }
}
}
if (text_index >= text.length && pattern_index < pattern.length)
@@ -1346,8 +1346,11 @@ public int printf_text(FILE *stream, const struct printf_info *info, const void
public Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info)
{
(void)info;
- if (!text) return Text$from_str("Text");
- return Text$quoted(*(Text_t*)text, colorize);
+ if (!text) return info && info->TextInfo.lang ? Text$from_str(info->TextInfo.lang) : Text$from_str("Text");
+ Text_t as_text = Text$quoted(*(Text_t*)text, colorize);
+ if (info && info->TextInfo.lang && info != &$Text)
+ as_text = Text$concat(Text$from_str(colorize ? "\x1b[1m$" : "$"), Text$from_str(info->TextInfo.lang), as_text);
+ return as_text;
}
public Text_t Text$quoted(Text_t text, bool colorize)
@@ -1407,11 +1410,11 @@ public Text_t Text$replace(Text_t text, Text_t pattern, Text_t replacement)
{
Text_t ret = {.length=0};
- Int_t i = I_small(0);
+ Int_t i = I_small(1);
for (;;) {
int64_t len;
Int_t found = Text$find(text, pattern, i, &len);
- if (found.small == I_small(0).small) break;
+ if (I_is_zero(found)) break;
if (Int$compare(&found, &i, &$Text) > 0) {
ret = Text$concat(
ret,
@@ -1421,6 +1424,7 @@ public Text_t Text$replace(Text_t text, Text_t pattern, Text_t replacement)
} else {
ret = concat2(ret, replacement);
}
+ i = Int$plus(found, Int64_to_Int(len));
}
if (Int_to_Int64(i, false) <= text.length) {
ret = concat2(ret, Text$slice(text, i, Int64_to_Int(text.length)));
diff --git a/compile.c b/compile.c
index 10a345ce..82963066 100644
--- a/compile.c
+++ b/compile.c
@@ -3256,7 +3256,7 @@ CORD compile_statement_typedefs(env_t *env, ast_t *ast)
}
case LangDef: {
auto def = Match(ast, LangDef);
- return CORD_all("typedef CORD ", namespace_prefix(env->libname, env->namespace), def->name, "_t;\n");
+ return CORD_all("typedef Text_t ", namespace_prefix(env->libname, env->namespace), def->name, "_t;\n");
}
case Lambda: {
auto lambda = Match(ast, Lambda);
diff --git a/environment.c b/environment.c
index b650004c..01c30946 100644
--- a/environment.c
+++ b/environment.c
@@ -262,7 +262,7 @@ env_t *new_compilation_unit(CORD *libname)
{"num_codepoints", "Text$num_codepoints", "func(text:Text)->Int"},
{"quoted", "Text$quoted", "func(text:Text, color=no)->Text"},
{"read_line", "Text$read_line", "func(prompt='')->Text"},
- {"replace", "Text$replace", "func(text:Text, pattern:Text, replacement:Text, limit=-1)->Text"},
+ {"replace", "Text$replace", "func(text:Text, pattern:Text, replacement:Text)->Text"},
{"split", "Text$split", "func(text:Text, split:Text)->[Text]"},
{"title", "Text$title", "func(text:Text)->Text"},
{"trimmed", "Text$trimmed", "func(text:Text, trim=\" {\\n\\r\\t}\", where=Where.Anywhere)->Text"},
diff --git a/typecheck.c b/typecheck.c
index 4d4c080e..27e20b04 100644
--- a/typecheck.c
+++ b/typecheck.c
@@ -1377,7 +1377,8 @@ bool is_constant(env_t *env, ast_t *ast)
}
case TextJoin: {
auto text = Match(ast, TextJoin);
- return !text->children || !text->children->next;
+ // TODO: support short literal strings
+ return !text->children;
}
case Not: return is_constant(env, Match(ast, Not)->value);
case Negative: return is_constant(env, Match(ast, Negative)->value);