aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2024-09-02 22:57:49 -0400
committerBruce Hill <bruce@bruce-hill.com>2024-09-02 22:57:49 -0400
commit9214e621bf7fdaec29b872a5b0e757806fa61b40 (patch)
tree78fbdb799582500aeadcaa5643e6a54455e9038c
parent5c093e6a4ae7ede43ddd29611c8e524960cbd2d0 (diff)
Add Text:lines()
-rw-r--r--builtins/text.c23
-rw-r--r--builtins/text.h1
-rw-r--r--environment.c1
-rw-r--r--test/text.tm9
4 files changed, 34 insertions, 0 deletions
diff --git a/builtins/text.c b/builtins/text.c
index 1145fb08..ff1f5ba2 100644
--- a/builtins/text.c
+++ b/builtins/text.c
@@ -1535,6 +1535,29 @@ public Text_t Text$from_bytes(array_t bytes)
return Text$from_str(bytes.data);
}
+public array_t Text$lines(Text_t text)
+{
+ array_t lines = {};
+ iteration_state_t state = {0, 0};
+ for (int64_t i = 0, line_start = 0; i < text.length; i++) {
+ int32_t grapheme = _next_grapheme(text, &state, i);
+ if (grapheme == '\r' && _next_grapheme(text, &state, i + 1) == '\n') { // CRLF
+ Text_t line = Text$slice(text, Int64_to_Int(line_start+1), Int64_to_Int(i));
+ Array$insert(&lines, &line, I_small(0), sizeof(Text_t));
+ i += 1; // skip one extra for CR
+ line_start = i + 1;
+ } else if (grapheme == '\n') { // newline
+ Text_t line = Text$slice(text, Int64_to_Int(line_start+1), Int64_to_Int(i));
+ Array$insert(&lines, &line, I_small(0), sizeof(Text_t));
+ line_start = i + 1;
+ } else if (i == text.length-1 && line_start != i) { // last line
+ Text_t line = Text$slice(text, Int64_to_Int(line_start+1), Int64_to_Int(i+1));
+ Array$insert(&lines, &line, I_small(0), sizeof(Text_t));
+ }
+ }
+ return lines;
+}
+
public const TypeInfo $Text = {
.size=sizeof(Text_t),
.align=__alignof__(Text_t),
diff --git a/builtins/text.h b/builtins/text.h
index 7cef834d..a4697507 100644
--- a/builtins/text.h
+++ b/builtins/text.h
@@ -71,6 +71,7 @@ array_t Text$codepoint_names(Text_t text);
Text_t Text$from_codepoints(array_t codepoints);
Text_t Text$from_codepoint_names(array_t codepoint_names);
Text_t Text$from_bytes(array_t bytes);
+array_t Text$lines(Text_t text);
extern const TypeInfo $Text;
diff --git a/environment.c b/environment.c
index 100cfcc2..2385455a 100644
--- a/environment.c
+++ b/environment.c
@@ -257,6 +257,7 @@ env_t *new_compilation_unit(CORD *libname)
{"from_codepoints", "Text$from_codepoints", "func(codepoints:[Int32])->Text"},
{"has", "Text$has", "func(text:Text, pattern:Text)->Bool"},
{"join", "Text$join", "func(glue:Text, pieces:[Text])->Text"},
+ {"lines", "Text$lines", "func(text:Text)->[Text]"},
{"lower", "Text$lower", "func(text:Text)->Text"},
{"quoted", "Text$quoted", "func(text:Text, color=no)->Text"},
{"replace", "Text$replace", "func(text:Text, pattern:Text, replacement:Text)->Text"},
diff --git a/test/text.tm b/test/text.tm
index 2d386048..dc342659 100644
--- a/test/text.tm
+++ b/test/text.tm
@@ -119,3 +119,12 @@ func main():
>> c == Text.from_bytes(c:utf8_bytes())
= yes
+
+ >> "one$(\n)two$(\n)three":lines()
+ = ["one", "two", "three"]
+ >> "one$(\n)two$(\n)three$(\n)":lines()
+ = ["one", "two", "three"]
+ >> "one$(\n)two$(\n)three$(\n\n)":lines()
+ = ["one", "two", "three", ""]
+ >> "one$(\r\n)two$(\r\n)three$(\r\n)":lines()
+ = ["one", "two", "three"]