From 6d7e09bf1801c2fe183df17cc67017a6d3d8513b Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Mon, 2 Sep 2024 23:07:08 -0400 Subject: [PATCH] Add Text:split() and use that with an empty pattern instead of Text:clusters() --- builtins/text.c | 23 +++++++++++++++++++++++ builtins/text.h | 1 + environment.c | 3 +-- test/text.tm | 16 ++++++++++++++-- 4 files changed, 39 insertions(+), 4 deletions(-) diff --git a/builtins/text.c b/builtins/text.c index ff1f5ba..a1a98a0 100644 --- a/builtins/text.c +++ b/builtins/text.c @@ -1426,6 +1426,29 @@ public Text_t Text$replace(Text_t text, Text_t pattern, Text_t replacement) return ret; } +public array_t Text$split(Text_t text, Text_t pattern) +{ + if (pattern.length == 0) // special case + return Text$clusters(text); + + array_t chunks = {}; + + Int_t i = I_small(1); + for (;;) { + int64_t len; + Int_t found = Text$find(text, pattern, i, &len); + if (I_is_zero(found)) break; + Text_t chunk = Text$slice(text, i, Int$minus(found, I_small(1))); + Array$insert(&chunks, &chunk, I_small(0), sizeof(Text_t)); + i = Int$plus(found, Int64_to_Int(len)); + } + + Text_t last_chunk = Text$slice(text, i, Int64_to_Int(text.length)); + Array$insert(&chunks, &last_chunk, I_small(0), sizeof(Text_t)); + + return chunks; +} + public Text_t Text$format(const char *fmt, ...) { va_list args; diff --git a/builtins/text.h b/builtins/text.h index a469750..cca18a2 100644 --- a/builtins/text.h +++ b/builtins/text.h @@ -60,6 +60,7 @@ Text_t Text$title(Text_t text); Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info); Text_t Text$quoted(Text_t str, bool colorize); Text_t Text$replace(Text_t str, Text_t pat, Text_t replacement); +array_t Text$split(Text_t text, Text_t pattern); Int_t Text$find(Text_t text, Text_t pattern, Int_t i, int64_t *match_length); bool Text$has(Text_t text, Text_t pattern); const char *Text$as_c_string(Text_t text); diff --git a/environment.c b/environment.c index 2385455..09935f1 100644 --- a/environment.c +++ b/environment.c @@ -249,7 +249,6 @@ env_t *new_compilation_unit(CORD *libname) {"Text", TEXT_TYPE, "Text_t", "$Text", TypedArray(ns_entry_t, // {"find", "Text$find", "func(text:Text, pattern:Text)->FindResult"}, {"as_c_string", "CORD_to_char_star", "func(text:Text)->CString"}, - {"clusters", "Text$clusters", "func(text:Text)->[Text]"}, {"codepoint_names", "Text$codepoint_names", "func(text:Text)->[Text]"}, {"from_bytes", "Text$from_bytes", "func(bytes:[Int8])->Text"}, {"from_c_string", "Text$from_str", "func(str:CString)->Text"}, @@ -261,7 +260,7 @@ env_t *new_compilation_unit(CORD *libname) {"lower", "Text$lower", "func(text:Text)->Text"}, {"quoted", "Text$quoted", "func(text:Text, color=no)->Text"}, {"replace", "Text$replace", "func(text:Text, pattern:Text, replacement:Text)->Text"}, - {"split", "Text$split", "func(text:Text, split:Text)->[Text]"}, + {"split", "Text$split", "func(text:Text, pattern='')->[Text]"}, {"title", "Text$title", "func(text:Text)->Text"}, {"trimmed", "Text$trimmed", "func(text:Text, trim=\" {\\n\\r\\t}\", where=Where.Anywhere)->Text"}, {"upper", "Text$upper", "func(text:Text)->Text"}, diff --git a/test/text.tm b/test/text.tm index dc34265..0955ffd 100644 --- a/test/text.tm +++ b/test/text.tm @@ -20,7 +20,7 @@ func main(): = yes >> amelie := "Am$(\UE9)lie" - >> amelie:clusters() + >> amelie:split() = ["A", "m", "é", "l", "i", "e"] : [Text] >> amelie:utf32_codepoints() = [65_i32, 109_i32, 233_i32, 108_i32, 105_i32, 101_i32] : [Int32] @@ -28,7 +28,7 @@ func main(): = [65_i8, 109_i8, -61_i8, -87_i8, 108_i8, 105_i8, 101_i8] : [Int8] >> amelie2 := "Am$(\U65\U301)lie" - >> amelie2:clusters() + >> amelie2:split() = ["A", "m", "é", "l", "i", "e"] : [Text] >> amelie2:utf32_codepoints() = [65_i32, 109_i32, 233_i32, 108_i32, 105_i32, 101_i32] : [Int32] @@ -128,3 +128,15 @@ func main(): = ["one", "two", "three", ""] >> "one$(\r\n)two$(\r\n)three$(\r\n)":lines() = ["one", "two", "three"] + + >> "one two three":split(" ") + = ["one", "two", "three"] + + >> "one,two,three,":split(",") + = ["one", "two", "three", ""] + + >> "one two three":split("[..space]") + = ["one", "two", "three"] + + >> "abc":split("") + = ["a", "b", "c"]