aboutsummaryrefslogtreecommitdiff
path: root/stdlib
diff options
context:
space:
mode:
Diffstat (limited to 'stdlib')
-rw-r--r--stdlib/patterns.c78
-rw-r--r--stdlib/patterns.h2
-rw-r--r--stdlib/text.c35
-rw-r--r--stdlib/text.h1
4 files changed, 115 insertions, 1 deletions
diff --git a/stdlib/patterns.c b/stdlib/patterns.c
index 66855d2d..48f43aed 100644
--- a/stdlib/patterns.c
+++ b/stdlib/patterns.c
@@ -873,6 +873,33 @@ public Array_t Text$find_all(Text_t text, Pattern_t pattern)
return matches;
}
+typedef struct {
+ TextIter_t state;
+ Int_t i;
+ Pattern_t pattern;
+} match_iter_state_t;
+
+static OptionalMatch_t next_match(match_iter_state_t *state)
+{
+ if (Int_to_Int64(state->i, false) > state->state.text.length)
+ return NONE_MATCH;
+
+ OptionalMatch_t m = Text$find(state->state.text, state->pattern, state->i);
+ if (m.index.small == 0) // No match
+ state->i = I(state->state.text.length + 1);
+ else
+ state->i = Int$plus(m.index, I(MAX(1, m.text.length)));
+ return m;
+}
+
+public Closure_t Text$by_match(Text_t text, Pattern_t pattern)
+{
+ return (Closure_t){
+ .fn=(void*)next_match,
+ .userdata=new(match_iter_state_t, .state={text, 0, 0}, .i=I_small(1), .pattern=pattern),
+ };
+}
+
static Text_t apply_backrefs(Text_t text, Pattern_t original_pattern, Text_t replacement, Pattern_t backref_pat, capture_t *captures)
{
if (backref_pat.length == 0)
@@ -1145,10 +1172,12 @@ public Array_t Text$split(Text_t text, Pattern_t pattern)
for (;;) {
int64_t len = 0;
int64_t found = _find(text, pattern, i, text.length-1, &len, NULL);
+ if (found == i && len == 0)
+ found = _find(text, pattern, i + 1, text.length-1, &len, NULL);
if (found < 0) break;
Text_t chunk = Text$slice(text, I(i+1), I(found));
Array$insert(&chunks, &chunk, I_small(0), sizeof(Text_t));
- i = found + MAX(len, 1);
+ i = MAX(found + len, i + 1);
}
Text_t last_chunk = Text$slice(text, I(i+1), I(text.length));
@@ -1157,6 +1186,53 @@ public Array_t Text$split(Text_t text, Pattern_t pattern)
return chunks;
}
+typedef struct {
+ TextIter_t state;
+ int64_t i;
+ Pattern_t pattern;
+} split_iter_state_t;
+
+static OptionalText_t next_split(split_iter_state_t *state)
+{
+ Text_t text = state->state.text;
+ if (state->i >= text.length) {
+ if (state->pattern.length > 0 && state->i == text.length) { // special case
+ state->i = text.length + 1;
+ return (Text_t){.length=0};
+ }
+ return NONE_TEXT;
+ }
+
+ if (state->pattern.length == 0) { // special case
+ Text_t ret = Text$cluster(text, I(state->i+1));
+ state->i += 1;
+ return ret;
+ }
+
+ int64_t start = state->i;
+ int64_t len = 0;
+ int64_t found = _find(text, state->pattern, start, text.length-1, &len, NULL);
+
+ if (found == start && len == 0)
+ found = _find(text, state->pattern, start + 1, text.length-1, &len, NULL);
+
+ if (found >= 0) {
+ state->i = MAX(found + len, state->i + 1);
+ return Text$slice(text, I(start+1), I(found));
+ } else {
+ state->i = state->state.text.length + 1;
+ return Text$slice(text, I(start+1), I(text.length));
+ }
+}
+
+public Closure_t Text$by_split(Text_t text, Pattern_t pattern)
+{
+ return (Closure_t){
+ .fn=(void*)next_split,
+ .userdata=new(split_iter_state_t, .state={text, 0, 0}, .i=0, .pattern=pattern),
+ };
+}
+
public const TypeInfo_t Pattern$info = {
.size=sizeof(Pattern_t),
.align=__alignof__(Pattern_t),
diff --git a/stdlib/patterns.h b/stdlib/patterns.h
index 641009a7..494c8338 100644
--- a/stdlib/patterns.h
+++ b/stdlib/patterns.h
@@ -27,9 +27,11 @@ Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement, Pattern_t bac
Pattern_t Pattern$escape_text(Text_t text);
Text_t Text$replace_all(Text_t text, Table_t replacements, Pattern_t backref_pat, bool recursive);
Array_t Text$split(Text_t text, Pattern_t pattern);
+Closure_t Text$by_split(Text_t text, Pattern_t pattern);
Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right);
OptionalMatch_t Text$find(Text_t text, Pattern_t pattern, Int_t i);
Array_t Text$find_all(Text_t text, Pattern_t pattern);
+Closure_t Text$by_match(Text_t text, Pattern_t pattern);
PUREFUNC bool Text$has(Text_t text, Pattern_t pattern);
OptionalArray_t Text$matches(Text_t text, Pattern_t pattern);
Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn);
diff --git a/stdlib/text.c b/stdlib/text.c
index 57a98e8d..65d4150a 100644
--- a/stdlib/text.c
+++ b/stdlib/text.c
@@ -1422,6 +1422,41 @@ public Array_t Text$lines(Text_t text)
return lines;
}
+typedef struct {
+ TextIter_t state;
+ int64_t i;
+} line_iter_state_t;
+
+static OptionalText_t next_line(line_iter_state_t *state)
+{
+ Text_t text = state->state.text;
+ for (int64_t i = state->i; i < text.length; i++) {
+ int32_t grapheme = Text$get_grapheme_fast(&state->state, i);
+ if (grapheme == '\r' && Text$get_grapheme_fast(&state->state, i + 1) == '\n') { // CRLF
+ Text_t line = Text$slice(text, I(state->i+1), I(i));
+ state->i = i + 2; // skip one extra for CR
+ return line;
+ } else if (grapheme == '\n') { // newline
+ Text_t line = Text$slice(text, I(state->i+1), I(i));
+ state->i = i + 1;
+ return line;
+ } else if (i == text.length-1 && state->i != i) { // last line
+ Text_t line = Text$slice(text, I(state->i+1), I(i+1));
+ state->i = i + 1;
+ return line;
+ }
+ }
+ return NONE_TEXT;
+}
+
+public Closure_t Text$by_line(Text_t text)
+{
+ return (Closure_t){
+ .fn=(void*)next_line,
+ .userdata=new(line_iter_state_t, .state={text, 0, 0}, .i=0),
+ };
+}
+
PUREFUNC public bool Text$is_none(const void *t, const TypeInfo_t*)
{
return ((Text_t*)t)->length < 0;
diff --git a/stdlib/text.h b/stdlib/text.h
index f7785ddb..79c094af 100644
--- a/stdlib/text.h
+++ b/stdlib/text.h
@@ -59,6 +59,7 @@ Text_t Text$from_codepoints(Array_t codepoints);
OptionalText_t Text$from_codepoint_names(Array_t codepoint_names);
OptionalText_t Text$from_bytes(Array_t bytes);
Array_t Text$lines(Text_t text);
+Closure_t Text$by_line(Text_t text);
Text_t Text$join(Text_t glue, Array_t pieces);
Text_t Text$repeat(Text_t text, Int_t count);
int32_t Text$get_grapheme_fast(TextIter_t *state, int64_t index);