diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2025-08-16 17:21:01 -0400 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2025-08-16 17:21:01 -0400 |
| commit | c72b0406a32ffc3f04324f7b6c321486762fca41 (patch) | |
| tree | 244e51c858890ea2ffb8c74a2c33c81b79de376e /lib/patterns | |
| parent | 849fd423a759edf1b58b548a6148c177a6f8cd71 (diff) | |
Improved parsing and prefix/suffix matching using a `remainder`
parameter
Diffstat (limited to 'lib/patterns')
| -rw-r--r-- | lib/patterns/CHANGES.md | 3 | ||||
| -rw-r--r-- | lib/patterns/patterns.c | 25 | ||||
| -rw-r--r-- | lib/patterns/patterns.tm | 19 |
3 files changed, 45 insertions, 2 deletions
diff --git a/lib/patterns/CHANGES.md b/lib/patterns/CHANGES.md index 42ae752c..cf6254cb 100644 --- a/lib/patterns/CHANGES.md +++ b/lib/patterns/CHANGES.md @@ -1,5 +1,8 @@ # Version History +## v1.1 +- Added `Text.matching_pattern(text:Text, pattern:Pattern, pos:Int = 1 -> PatternMatch?)` + ## v1.0 Initial version diff --git a/lib/patterns/patterns.c b/lib/patterns/patterns.c index 74d542b8..224a00a0 100644 --- a/lib/patterns/patterns.c +++ b/lib/patterns/patterns.c @@ -663,6 +663,8 @@ static pat_t parse_next_pat(TextIter_t *state, int64_t *index) case 'w': if (strcasecmp(prop_name, "word") == 0) { return PAT(PAT_FUNCTION, .fn=match_id); + } else if (strcasecmp(prop_name, "ws") == 0 || strcasecmp(prop_name, "whitespace") == 0) { + return PAT(PAT_PROPERTY, .property=UC_PROPERTY_WHITE_SPACE); } break; default: break; @@ -837,10 +839,10 @@ static OptionalPatternMatch find(Text_t text, Text_t pattern, Int_t from_index) PUREFUNC static bool Pattern$has(Text_t text, Text_t pattern) { - if (Text$starts_with(pattern, Text("{start}"))) { + if (Text$starts_with(pattern, Text("{start}"), &pattern)) { int64_t m = match(text, 0, pattern, 0, NULL, 0); return m >= 0; - } else if (Text$ends_with(text, Text("{end}"))) { + } else if (Text$ends_with(text, Text("{end}"), NULL)) { for (int64_t i = text.length-1; i >= 0; i--) { int64_t match_len = match(text, i, pattern, 0, NULL, 0); if (match_len >= 0 && i + match_len == text.length) @@ -860,6 +862,25 @@ static bool Pattern$matches(Text_t text, Text_t pattern) return (match_len == text.length); } +static bool Pattern$match_at(Text_t text, Text_t pattern, Int_t pos, PatternMatch *dest) +{ + int64_t start = Int64$from_int(pos, false) - 1; + capture_t captures[MAX_BACKREFS] = {}; + int64_t match_len = match(text, start, pattern, 0, captures, 0); + if (match_len < 0) + return false; + + List_t capture_list = {}; + for (int i = 0; captures[i].occupied; i++) { + Text_t capture = Text$slice(text, I(captures[i].index+1), I(captures[i].index+captures[i].length)); + List$insert(&capture_list, &capture, I(0), sizeof(Text_t)); + } + dest->text = Text$slice(text, I(start+1), I(start+match_len)); + dest->index = I(start+1); + dest->captures = capture_list; + return true; +} + static OptionalList_t Pattern$captures(Text_t text, Text_t pattern) { capture_t captures[MAX_BACKREFS] = {}; diff --git a/lib/patterns/patterns.tm b/lib/patterns/patterns.tm index bab0c3dc..c5444b86 100644 --- a/lib/patterns/patterns.tm +++ b/lib/patterns/patterns.tm @@ -10,6 +10,12 @@ lang Pat return Pat.from_text("$n") extend Text + func matching_pattern(text:Text, pattern:Pat, pos:Int = 1 -> PatternMatch?) + result : PatternMatch + if C_code:Bool(Pattern$match_at(@text, @pattern, @pos, (void*)&@result)) + return result + return none + func matches_pattern(text:Text, pattern:Pat -> Bool) return C_code:Bool(Pattern$matches(@text, @pattern)) @@ -45,3 +51,16 @@ extend Text func trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text) return C_code:Text(Pattern$trim(@text, @pattern, @left, @right)) + +func main() + >> "Hello world".matching_pattern($Pat'{id}') + >> "...Hello world".matching_pattern($Pat'{id}') +# func main(pattern:Pat, input=(/dev/stdin)) +# for line in input.by_line()! +# skip if not line.has_pattern(pattern) +# pos := 1 +# for match in line.by_pattern(pattern) +# say(line.slice(pos, match.index-1), newline=no) +# say("\033[34;1m$(match.text)\033[m", newline=no) +# pos = match.index + match.text.length +# say(line.from(pos), newline=yes) |
