diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2024-09-02 22:57:49 -0400 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2024-09-02 22:57:49 -0400 |
| commit | 9214e621bf7fdaec29b872a5b0e757806fa61b40 (patch) | |
| tree | 78fbdb799582500aeadcaa5643e6a54455e9038c | |
| parent | 5c093e6a4ae7ede43ddd29611c8e524960cbd2d0 (diff) | |
Add Text:lines()
| -rw-r--r-- | builtins/text.c | 23 | ||||
| -rw-r--r-- | builtins/text.h | 1 | ||||
| -rw-r--r-- | environment.c | 1 | ||||
| -rw-r--r-- | test/text.tm | 9 |
4 files changed, 34 insertions, 0 deletions
diff --git a/builtins/text.c b/builtins/text.c index 1145fb08..ff1f5ba2 100644 --- a/builtins/text.c +++ b/builtins/text.c @@ -1535,6 +1535,29 @@ public Text_t Text$from_bytes(array_t bytes) return Text$from_str(bytes.data); } +public array_t Text$lines(Text_t text) +{ + array_t lines = {}; + iteration_state_t state = {0, 0}; + for (int64_t i = 0, line_start = 0; i < text.length; i++) { + int32_t grapheme = _next_grapheme(text, &state, i); + if (grapheme == '\r' && _next_grapheme(text, &state, i + 1) == '\n') { // CRLF + Text_t line = Text$slice(text, Int64_to_Int(line_start+1), Int64_to_Int(i)); + Array$insert(&lines, &line, I_small(0), sizeof(Text_t)); + i += 1; // skip one extra for CR + line_start = i + 1; + } else if (grapheme == '\n') { // newline + Text_t line = Text$slice(text, Int64_to_Int(line_start+1), Int64_to_Int(i)); + Array$insert(&lines, &line, I_small(0), sizeof(Text_t)); + line_start = i + 1; + } else if (i == text.length-1 && line_start != i) { // last line + Text_t line = Text$slice(text, Int64_to_Int(line_start+1), Int64_to_Int(i+1)); + Array$insert(&lines, &line, I_small(0), sizeof(Text_t)); + } + } + return lines; +} + public const TypeInfo $Text = { .size=sizeof(Text_t), .align=__alignof__(Text_t), diff --git a/builtins/text.h b/builtins/text.h index 7cef834d..a4697507 100644 --- a/builtins/text.h +++ b/builtins/text.h @@ -71,6 +71,7 @@ array_t Text$codepoint_names(Text_t text); Text_t Text$from_codepoints(array_t codepoints); Text_t Text$from_codepoint_names(array_t codepoint_names); Text_t Text$from_bytes(array_t bytes); +array_t Text$lines(Text_t text); extern const TypeInfo $Text; diff --git a/environment.c b/environment.c index 100cfcc2..2385455a 100644 --- a/environment.c +++ b/environment.c @@ -257,6 +257,7 @@ env_t *new_compilation_unit(CORD *libname) {"from_codepoints", "Text$from_codepoints", "func(codepoints:[Int32])->Text"}, {"has", "Text$has", "func(text:Text, pattern:Text)->Bool"}, {"join", "Text$join", "func(glue:Text, pieces:[Text])->Text"}, + {"lines", "Text$lines", "func(text:Text)->[Text]"}, {"lower", "Text$lower", "func(text:Text)->Text"}, {"quoted", "Text$quoted", "func(text:Text, color=no)->Text"}, {"replace", "Text$replace", "func(text:Text, pattern:Text, replacement:Text)->Text"}, diff --git a/test/text.tm b/test/text.tm index 2d386048..dc342659 100644 --- a/test/text.tm +++ b/test/text.tm @@ -119,3 +119,12 @@ func main(): >> c == Text.from_bytes(c:utf8_bytes()) = yes + + >> "one$(\n)two$(\n)three":lines() + = ["one", "two", "three"] + >> "one$(\n)two$(\n)three$(\n)":lines() + = ["one", "two", "three"] + >> "one$(\n)two$(\n)three$(\n\n)":lines() + = ["one", "two", "three", ""] + >> "one$(\r\n)two$(\r\n)three$(\r\n)":lines() + = ["one", "two", "three"] |
