aboutsummaryrefslogtreecommitdiff
path: root/lib/patterns
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2025-08-16 17:21:01 -0400
committerBruce Hill <bruce@bruce-hill.com>2025-08-16 17:21:01 -0400
commitc72b0406a32ffc3f04324f7b6c321486762fca41 (patch)
tree244e51c858890ea2ffb8c74a2c33c81b79de376e /lib/patterns
parent849fd423a759edf1b58b548a6148c177a6f8cd71 (diff)
Improved parsing and prefix/suffix matching using a `remainder`
parameter
Diffstat (limited to 'lib/patterns')
-rw-r--r--lib/patterns/CHANGES.md3
-rw-r--r--lib/patterns/patterns.c25
-rw-r--r--lib/patterns/patterns.tm19
3 files changed, 45 insertions, 2 deletions
diff --git a/lib/patterns/CHANGES.md b/lib/patterns/CHANGES.md
index 42ae752c..cf6254cb 100644
--- a/lib/patterns/CHANGES.md
+++ b/lib/patterns/CHANGES.md
@@ -1,5 +1,8 @@
# Version History
+## v1.1
+- Added `Text.matching_pattern(text:Text, pattern:Pattern, pos:Int = 1 -> PatternMatch?)`
+
## v1.0
Initial version
diff --git a/lib/patterns/patterns.c b/lib/patterns/patterns.c
index 74d542b8..224a00a0 100644
--- a/lib/patterns/patterns.c
+++ b/lib/patterns/patterns.c
@@ -663,6 +663,8 @@ static pat_t parse_next_pat(TextIter_t *state, int64_t *index)
case 'w':
if (strcasecmp(prop_name, "word") == 0) {
return PAT(PAT_FUNCTION, .fn=match_id);
+ } else if (strcasecmp(prop_name, "ws") == 0 || strcasecmp(prop_name, "whitespace") == 0) {
+ return PAT(PAT_PROPERTY, .property=UC_PROPERTY_WHITE_SPACE);
}
break;
default: break;
@@ -837,10 +839,10 @@ static OptionalPatternMatch find(Text_t text, Text_t pattern, Int_t from_index)
PUREFUNC static bool Pattern$has(Text_t text, Text_t pattern)
{
- if (Text$starts_with(pattern, Text("{start}"))) {
+ if (Text$starts_with(pattern, Text("{start}"), &pattern)) {
int64_t m = match(text, 0, pattern, 0, NULL, 0);
return m >= 0;
- } else if (Text$ends_with(text, Text("{end}"))) {
+ } else if (Text$ends_with(text, Text("{end}"), NULL)) {
for (int64_t i = text.length-1; i >= 0; i--) {
int64_t match_len = match(text, i, pattern, 0, NULL, 0);
if (match_len >= 0 && i + match_len == text.length)
@@ -860,6 +862,25 @@ static bool Pattern$matches(Text_t text, Text_t pattern)
return (match_len == text.length);
}
+static bool Pattern$match_at(Text_t text, Text_t pattern, Int_t pos, PatternMatch *dest)
+{
+ int64_t start = Int64$from_int(pos, false) - 1;
+ capture_t captures[MAX_BACKREFS] = {};
+ int64_t match_len = match(text, start, pattern, 0, captures, 0);
+ if (match_len < 0)
+ return false;
+
+ List_t capture_list = {};
+ for (int i = 0; captures[i].occupied; i++) {
+ Text_t capture = Text$slice(text, I(captures[i].index+1), I(captures[i].index+captures[i].length));
+ List$insert(&capture_list, &capture, I(0), sizeof(Text_t));
+ }
+ dest->text = Text$slice(text, I(start+1), I(start+match_len));
+ dest->index = I(start+1);
+ dest->captures = capture_list;
+ return true;
+}
+
static OptionalList_t Pattern$captures(Text_t text, Text_t pattern)
{
capture_t captures[MAX_BACKREFS] = {};
diff --git a/lib/patterns/patterns.tm b/lib/patterns/patterns.tm
index bab0c3dc..c5444b86 100644
--- a/lib/patterns/patterns.tm
+++ b/lib/patterns/patterns.tm
@@ -10,6 +10,12 @@ lang Pat
return Pat.from_text("$n")
extend Text
+ func matching_pattern(text:Text, pattern:Pat, pos:Int = 1 -> PatternMatch?)
+ result : PatternMatch
+ if C_code:Bool(Pattern$match_at(@text, @pattern, @pos, (void*)&@result))
+ return result
+ return none
+
func matches_pattern(text:Text, pattern:Pat -> Bool)
return C_code:Bool(Pattern$matches(@text, @pattern))
@@ -45,3 +51,16 @@ extend Text
func trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text)
return C_code:Text(Pattern$trim(@text, @pattern, @left, @right))
+
+func main()
+ >> "Hello world".matching_pattern($Pat'{id}')
+ >> "...Hello world".matching_pattern($Pat'{id}')
+# func main(pattern:Pat, input=(/dev/stdin))
+# for line in input.by_line()!
+# skip if not line.has_pattern(pattern)
+# pos := 1
+# for match in line.by_pattern(pattern)
+# say(line.slice(pos, match.index-1), newline=no)
+# say("\033[34;1m$(match.text)\033[m", newline=no)
+# pos = match.index + match.text.length
+# say(line.from(pos), newline=yes)