diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2024-10-28 13:53:15 -0400 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2024-10-28 13:53:15 -0400 |
| commit | 9c302fdc34403f46572d9524309617888ba816bb (patch) | |
| tree | 58ea7faf390536503de114cf2889ed85ba60df7b /stdlib/patterns.c | |
| parent | c632a72486d347e7ef30c0b7890e2045ed42b903 (diff) | |
| parent | ce2aebe91085f987aab31bd2a49820fb605cf386 (diff) | |
Merge branch 'main' into internal-textsinternal-texts
Diffstat (limited to 'stdlib/patterns.c')
| -rw-r--r-- | stdlib/patterns.c | 30 |
1 files changed, 15 insertions, 15 deletions
diff --git a/stdlib/patterns.c b/stdlib/patterns.c index 701aff9c..6acb58a2 100644 --- a/stdlib/patterns.c +++ b/stdlib/patterns.c @@ -67,7 +67,7 @@ static inline bool match_str(TextIter_t *state, int64_t *i, const char *str) static inline bool match_property(TextIter_t *state, int64_t *i, uc_property_t prop) { if (*i >= state->text.length) return false; - ucs4_t grapheme = Text$get_main_grapheme_fast(state, *i); + uint32_t grapheme = Text$get_main_grapheme_fast(state, *i); // TODO: check every codepoint in the cluster? if (uc_is_property(grapheme, prop)) { *i += 1; @@ -80,8 +80,8 @@ static int64_t parse_int(TextIter_t *state, int64_t *i) { int64_t value = 0; for (;; *i += 1) { - ucs4_t grapheme = Text$get_main_grapheme_fast(state, *i); - int digit = uc_digit_value((ucs4_t)grapheme); + uint32_t grapheme = Text$get_main_grapheme_fast(state, *i); + int digit = uc_digit_value(grapheme); if (digit < 0) break; if (value >= INT64_MAX/10) break; value = 10*value + digit; @@ -143,8 +143,8 @@ static int64_t match_email(TextIter_t *state, int64_t index) // dns-label = 1-63 ([a-zA-Z0-9-] | non-ascii) if (index > 0) { - ucs4_t prev_codepoint = Text$get_main_grapheme_fast(state, index - 1); - if (uc_is_property_alphabetic((ucs4_t)prev_codepoint)) + uint32_t prev_codepoint = Text$get_main_grapheme_fast(state, index - 1); + if (uc_is_property_alphabetic(prev_codepoint)) return -1; } @@ -310,7 +310,7 @@ static int64_t match_uri(TextIter_t *state, int64_t index) if (index > 0) { // Don't match if we're not at a word edge: - ucs4_t prev_codepoint = Text$get_main_grapheme_fast(state, index - 1); + uint32_t prev_codepoint = Text$get_main_grapheme_fast(state, index - 1); if (uc_is_property_alphabetic(prev_codepoint)) return -1; } @@ -407,7 +407,7 @@ static int64_t match_newline(TextIter_t *state, int64_t index) if (index >= state->text.length) return -1; - ucs4_t grapheme = index >= state->text.length ? 0 : Text$get_main_grapheme_fast(state, index); + uint32_t grapheme = index >= state->text.length ? 0 : Text$get_main_grapheme_fast(state, index); if (grapheme == '\n') return 1; if (grapheme == '\r' && Text$get_grapheme_fast(state, index + 1) == '\n') @@ -796,14 +796,14 @@ static int64_t _find(Text_t text, Pattern_t pattern, int64_t first, int64_t last return -1; } -public Int_t Text$find(Text_t text, Pattern_t pattern, Int_t from_index, int64_t *match_length) +public Int_t Text$find(Text_t text, Pattern_t pattern, Int_t from_index) { int64_t first = Int_to_Int64(from_index, false); if (first == 0) fail("Invalid index: 0"); if (first < 0) first = text.length + first + 1; if (first > text.length || first < 1) return I(0); - int64_t found = _find(text, pattern, first-1, text.length-1, match_length); + int64_t found = _find(text, pattern, first-1, text.length-1, NULL); return I(found+1); } @@ -1081,17 +1081,17 @@ public Array_t Text$split(Text_t text, Pattern_t pattern) Array_t chunks = {}; - Int_t i = I_small(1); + int64_t i = 0; for (;;) { int64_t len = 0; - Int_t found = Text$find(text, pattern, i, &len); - if (I_is_zero(found)) break; - Text_t chunk = Text$slice(text, i, Int$minus(found, I_small(1))); + int64_t found = _find(text, pattern, i, text.length-1, &len); + if (found < 0) break; + Text_t chunk = Text$slice(text, I(i+1), I(found)); Array$insert(&chunks, &chunk, I_small(0), sizeof(Text_t)); - i = Int$plus(found, I(MAX(len, 1))); + i = found + MAX(len, 1); } - Text_t last_chunk = Text$slice(text, i, I(text.length)); + Text_t last_chunk = Text$slice(text, I(i+1), I(text.length)); Array$insert(&chunks, &last_chunk, I_small(0), sizeof(Text_t)); return chunks; |
