From 5d6fa135b1eadbceac04e5456fabb7e53feedc10 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Mon, 2 Sep 2024 23:26:55 -0400 Subject: Add Text:find_all() --- builtins/text.c | 20 ++++++++++++++++++++ builtins/text.h | 6 +----- environment.c | 3 ++- test/text.tm | 18 ++++++++++++++++++ 4 files changed, 41 insertions(+), 6 deletions(-) diff --git a/builtins/text.c b/builtins/text.c index 4ec3d835..2f3fbb46 100644 --- a/builtins/text.c +++ b/builtins/text.c @@ -1402,6 +1402,26 @@ public Text_t Text$quoted(Text_t text, bool colorize) #undef add_escaped } +public array_t Text$find_all(Text_t text, Text_t pattern) +{ + if (pattern.length == 0) // special case + return (array_t){.length=0}; + + array_t matches = {}; + + Int_t i = I_small(1); + for (;;) { + int64_t len; + Int_t found = Text$find(text, pattern, i, &len); + if (I_is_zero(found)) break; + Text_t match = Text$slice(text, found, Int$plus(found, Int64_to_Int(len-1))); + Array$insert(&matches, &match, I_small(0), sizeof(Text_t)); + i = Int$plus(found, Int64_to_Int(len)); + } + + return matches; +} + public Text_t Text$replace(Text_t text, Text_t pattern, Text_t replacement) { Text_t ret = {.length=0}; diff --git a/builtins/text.h b/builtins/text.h index 9f187a37..20ea9a25 100644 --- a/builtins/text.h +++ b/builtins/text.h @@ -12,11 +12,6 @@ #include "types.h" #include "where.h" -typedef struct { - enum { FIND_FAILURE, FIND_SUCCESS } status; - int32_t index; -} find_result_t; - int printf_text(FILE *stream, const struct printf_info *info, const void *const args[]); int printf_text_size(const struct printf_info *info, size_t n, int argtypes[n], int sizes[n]); @@ -38,6 +33,7 @@ Text_t Text$quoted(Text_t str, bool colorize); Text_t Text$replace(Text_t str, Text_t pat, Text_t replacement); array_t Text$split(Text_t text, Text_t pattern); Int_t Text$find(Text_t text, Text_t pattern, Int_t i, int64_t *match_length); +array_t Text$find_all(Text_t text, Text_t pattern); bool Text$has(Text_t text, Text_t pattern); const char *Text$as_c_string(Text_t text); public Text_t Text$format(const char *fmt, ...); diff --git a/environment.c b/environment.c index 09935f11..d4ed6c8d 100644 --- a/environment.c +++ b/environment.c @@ -247,7 +247,8 @@ env_t *new_compilation_unit(CORD *libname) {"by", "Range$by", "func(range:Range, step:Int)->Range"}, )}, {"Text", TEXT_TYPE, "Text_t", "$Text", TypedArray(ns_entry_t, - // {"find", "Text$find", "func(text:Text, pattern:Text)->FindResult"}, + {"find", "Text$find", "func(text:Text, pattern:Text)->Int"}, + {"find_all", "Text$find_all", "func(text:Text, pattern:Text)->[Text]"}, {"as_c_string", "CORD_to_char_star", "func(text:Text)->CString"}, {"codepoint_names", "Text$codepoint_names", "func(text:Text)->[Text]"}, {"from_bytes", "Text$from_bytes", "func(bytes:[Int8])->Text"}, diff --git a/test/text.tm b/test/text.tm index cf084f8a..0dd5f2ec 100644 --- a/test/text.tm +++ b/test/text.tm @@ -155,3 +155,21 @@ func main(): >> "":split() = [] + + >> " one two three ":find_all("[..alpha]") + = ["one", "two", "three"] + + >> " one two three ":find_all("[..!space]") + = ["one", "two", "three"] + + >> " ":find_all("[..alpha]") + = [] + + >> " foo(baz(), 1) doop() ":find_all("[..id](?)") + = ["foo(baz(), 1)", "doop()"] + + >> "":find_all("") + = [] + + >> "Hello":find_all("") + = [] -- cgit v1.2.3