diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2025-04-01 20:08:36 -0400 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2025-04-01 20:08:36 -0400 |
| commit | dd4c25e6b768e39fbcd437e44f80dc958e1f5869 (patch) | |
| tree | 013c42119866c574980fae7a3323d9ace3eafc0a /examples/patterns | |
| parent | 57ff9fdfc6b557211052aa275bfcca3e2316a0c6 (diff) | |
Update patterns API and docs
Diffstat (limited to 'examples/patterns')
| -rw-r--r-- | examples/patterns/README.md | 253 | ||||
| -rw-r--r-- | examples/patterns/patterns.c | 9 | ||||
| -rw-r--r-- | examples/patterns/patterns.tm | 15 |
3 files changed, 200 insertions, 77 deletions
diff --git a/examples/patterns/README.md b/examples/patterns/README.md index 1d280da6..2fb17391 100644 --- a/examples/patterns/README.md +++ b/examples/patterns/README.md @@ -1,6 +1,6 @@ # Text Pattern Matching -As an alternative to full regular expressions, Tomo provides a limited string +As an alternative to full regular expressions, Tomo provides a limited text matching pattern syntax that is intended to solve 80% of use cases in under 1% of the code size (PCRE's codebase is roughly 150k lines of code, and Tomo's pattern matching code is a bit under 1k lines of code). Tomo's pattern matching @@ -16,16 +16,17 @@ like a `Text`, but it has a distinct type. Patterns are used in a small, but very powerful API that handles many text functions that would normally be handled by a more extensive API: -- [`matches_pattern(text:Text, pattern:Pat -> [Text]?)`](#matches_pattern) -- [`replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text)`](#replace_pattern) -- [`translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text)`](#translate_patterns) -- [`has_pattern(text:Text, pattern:Pat -> Bool)`](#has_pattern) -- [`find_patterns(text:Text, pattern:Pat -> [PatternMatch])`](#find_patterns) - [`by_pattern(text:Text, pattern:Pat -> func(->PatternMatch?))`](#by_pattern) +- [`by_pattern_split(text:Text, pattern:Pat -> func(->Text?))`](#by_pattern_split) - [`each_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch), recursive=yes)`](#each_pattern) +- [`find_patterns(text:Text, pattern:Pat -> [PatternMatch])`](#find_patterns) +- [`has_pattern(text:Text, pattern:Pat -> Bool)`](#has_pattern) - [`map_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch -> Text), recursive=yes -> Text)`](#map_pattern) +- [`matches_pattern(text:Text, pattern:Pat -> Bool)`](#matches_pattern) +- [`pattern_captures(text:Text, pattern:Pat -> [Text]?)`](#pattern_captures) +- [`replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text)`](#replace_pattern) - [`split_pattern(text:Text, pattern:Pat -> [Text])`](#split_pattern) -- [`by_pattern_split(text:Text, pattern:Pat -> func(->Text?))`](#by_pattern_split) +- [`translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text)`](#translate_patterns) - [`trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text)`](#trim_pattern) ## Matches @@ -151,53 +152,91 @@ many repetitions you want by putting a number or range of numbers first using # Methods -### `matches_pattern` -Returns an array of text segments that match the given pattern. +### `by_pattern` +Returns an iterator function that yields `PatternMatch` objects for each occurrence. ```tomo -func matches_pattern(text:Text, pattern:Pat -> [Text]?) +func by_pattern(text:Text, pattern:Pat -> func(->PatternMatch?)) ``` -- `text`: The text to search within. +- `text`: The text to search. - `pattern`: The pattern to match. -**Returns:** -An optional array of matched text segments. Returns `none` if no matches are found. +**Returns:** +An iterator function that yields `PatternMatch` objects one at a time. + +**Example:** +```tomo +text := "one, two, three" +for word in text:by_pattern($Pat"{id}"): + say(word.text) +``` --- -### `replace_pattern` -Replaces occurrences of a pattern with a replacement string, supporting backreferences. +### `by_pattern_split` +Returns an iterator function that yields text segments split by a pattern. ```tomo -func replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text) +func by_pattern_split(text:Text, pattern:Pat -> func(->Text?)) ``` -- `text`: The text to modify. +- `text`: The text to split. +- `pattern`: The pattern to use as a separator. + +**Returns:** +An iterator function that yields text segments. + +**Example:** +```tomo +text := "one two three" +for word in text:by_pattern_split($Pat"{whitespace}"): + say(word.text) +``` + +--- + +### `each_pattern` +Applies a function to each occurrence of a pattern in the text. + +```tomo +func each_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch), recursive=yes) +``` + +- `text`: The text to search. - `pattern`: The pattern to match. -- `replacement`: The text to replace matches with. -- `backref`: The symbol for backreferences in the replacement. -- `recursive`: If `yes`, applies replacements recursively. +- `fn`: The function to apply to each match. +- `recursive`: If `yes`, applies the function recursively on modified text. -**Returns:** -A new text with replacements applied. +**Example:** +```tomo +text := "one two three" +text:each_pattern($Pat"{id}", func(m:PatternMatch): + say(m.txt) +) +``` --- -### `translate_patterns` -Replaces multiple patterns using a mapping of patterns to replacement texts. +### `find_patterns` +Finds all occurrences of a pattern in a text and returns them as `PatternMatch` objects. ```tomo -func translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text) +func find_patterns(text:Text, pattern:Pat -> [PatternMatch]) ``` -- `text`: The text to modify. -- `replacements`: A table mapping patterns to their replacements. -- `backref`: The symbol for backreferences in replacements. -- `recursive`: If `yes`, applies replacements recursively. +- `text`: The text to search. +- `pattern`: The pattern to match. -**Returns:** -A new text with all specified replacements applied. +**Returns:** +An array of `PatternMatch` objects. + +**Example:** +```tomo +text := "one! two three!" +>> text:find_patterns($Pat"{id}!") += [PatternMatch(text="one!", index=1, captures=["one"]), PatternMatch(text="three!", index=10, captures=["three"])] +``` --- @@ -211,69 +250,126 @@ func has_pattern(text:Text, pattern:Pat -> Bool) - `text`: The text to search. - `pattern`: The pattern to check for. -**Returns:** +**Returns:** `yes` if a match is found, otherwise `no`. +**Example:** +```tomo +text := "...okay..." +>> text:has_pattern($Pat"{id}") += yes +``` + --- -### `find_patterns` -Finds all occurrences of a pattern in a text and returns them as `PatternMatch` objects. +### `map_pattern` +Transforms matches of a pattern using a mapping function. ```tomo -func find_patterns(text:Text, pattern:Pat -> [PatternMatch]) +func map_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch -> Text), recursive=yes -> Text) ``` -- `text`: The text to search. +- `text`: The text to modify. - `pattern`: The pattern to match. +- `fn`: A function that transforms matches. +- `recursive`: If `yes`, applies transformations recursively. -**Returns:** -An array of `PatternMatch` objects. +**Returns:** +A new text with the transformed matches. + +**Example:** +```tomo +text := "I have #apples and #oranges and #plums" +fruits := {"apples"=4, "oranges"=5} +>> text:map_pattern($Pat'#{id}', func(match:PatternMatch): + fruit := match.captures[1] + "$(fruits[fruit] or 0) $fruit" +) += "I have 4 apples and 5 oranges and 0 plums" +``` --- -### `by_pattern` -Returns an iterator function that yields `PatternMatch` objects for each occurrence. +### `matches_pattern` +Returns whether or not text matches a pattern completely. ```tomo -func by_pattern(text:Text, pattern:Pat -> func(->PatternMatch?)) +func matches_pattern(text:Text, pattern:Pat -> Bool) ``` -- `text`: The text to search. +- `text`: The text to match against. - `pattern`: The pattern to match. -**Returns:** -An iterator function that yields `PatternMatch` objects one at a time. +**Returns:** +`yes` if the whole text matches the pattern, otherwise `no`. + +**Example:** +```tomo +>> "Hello!!!":matches_pattern($Pat"{id}") += no +>> "Hello":matches_pattern($Pat"{id}") += yes +``` --- -### `each_pattern` -Applies a function to each occurrence of a pattern in the text. +### `pattern_captures` +Returns an array of pattern captures for the given pattern. ```tomo -func each_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch), recursive=yes) +func pattern_captures(text:Text, pattern:Pat -> [Text]?) ``` -- `text`: The text to search. +- `text`: The text to match against. - `pattern`: The pattern to match. -- `fn`: The function to apply to each match. -- `recursive`: If `yes`, applies the function recursively on modified text. + +**Returns:** +An optional array of matched pattern captures. Returns `none` if the text does +not match the pattern. + +**Example:** +```tomo +>> "123 boxes":pattern_captures($Pat"{int} {id}") += ["123", "boxes"]? +>> "xxx":pattern_captures($Pat"{int} {id}") += none:[Text] +``` --- -### `map_pattern` -Transforms matches of a pattern using a mapping function. +### `replace_pattern` +Replaces occurrences of a pattern with a replacement text, supporting backreferences. ```tomo -func map_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch -> Text), recursive=yes -> Text) +func replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text) ``` - `text`: The text to modify. - `pattern`: The pattern to match. -- `fn`: A function that transforms matches. -- `recursive`: If `yes`, applies transformations recursively. +- `replacement`: The text to replace matches with. +- `backref`: The symbol for backreferences in the replacement. +- `recursive`: If `yes`, applies replacements recursively. -**Returns:** -A new text with the transformed matches. +**Returns:** +A new text with replacements applied. + +**Example:** +```tomo +>> "I have 123 apples and 456 oranges":replace_pattern($Pat"{int}", "some") += "I have some apples and some oranges" + +>> "I have 123 apples and 456 oranges":replace_pattern($Pat"{int}", "(@1)") += "I have (123) apples and (456) oranges" + +>> "I have 123 apples and 456 oranges":replace_pattern($Pat"{int}", "(?1)", backref="?") += "I have (123) apples and (456) oranges" + +>> "bad(fn(), bad(notbad))":replace_pattern($Pat"bad(?)", "good(@1)") += "good(fn(), good(notbad))" + +>> "bad(fn(), bad(notbad))":replace_pattern($Pat"bad(?)", "good(@1)", recursive=no) += "good(fn(), bad(notbad))" +``` --- @@ -287,23 +383,42 @@ func split_pattern(text:Text, pattern:Pat -> [Text]) - `text`: The text to split. - `pattern`: The pattern to use as a separator. -**Returns:** +**Returns:** An array of text segments. +**Example:** +```tomo +>> "one two three":split_pattern($Pat"{whitespace}") += ["one", "two", "three"] +``` + --- -### `by_pattern_split` -Returns an iterator function that yields text segments split by a pattern. +### `translate_patterns` +Replaces multiple patterns using a mapping of patterns to replacement texts. ```tomo -func by_pattern_split(text:Text, pattern:Pat -> func(->Text?)) +func translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text) ``` -- `text`: The text to split. -- `pattern`: The pattern to use as a separator. +- `text`: The text to modify. +- `replacements`: A table mapping patterns to their replacements. +- `backref`: The symbol for backreferences in replacements. +- `recursive`: If `yes`, applies replacements recursively. -**Returns:** -An iterator function that yields text segments. +**Returns:** +A new text with all specified replacements applied. + +**Example:** +```tomo +>> text := "foo(x, baz(1))" +>> text:translate_patterns({ + $Pat"{id}(?)"="call(fn('@1'), @2)", + $Pat"{id}"="var('@1')", + $Pat"{int}"="int(@1)", +}) += "call(fn('foo'), var('x'), call(fn('baz'), int(1)))" +``` --- @@ -319,5 +434,11 @@ func trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text) - `left`: If `yes`, trims from the beginning. - `right`: If `yes`, trims from the end. -**Returns:** +**Returns:** The trimmed text. + +**Example:** +```tomo +>> "123abc456":trim_pattern($Pat"{digit}") += "abc" +``` diff --git a/examples/patterns/patterns.c b/examples/patterns/patterns.c index ade68e04..180ab431 100644 --- a/examples/patterns/patterns.c +++ b/examples/patterns/patterns.c @@ -851,7 +851,14 @@ PUREFUNC static bool Pattern$has(Text_t text, Text_t pattern) } } -static OptionalArray_t Pattern$matches(Text_t text, Text_t pattern) +static bool Pattern$matches(Text_t text, Text_t pattern) +{ + capture_t captures[MAX_BACKREFS] = {}; + int64_t match_len = match(text, 0, pattern, 0, NULL, 0); + return (match_len == text.length); +} + +static OptionalArray_t Pattern$captures(Text_t text, Text_t pattern) { capture_t captures[MAX_BACKREFS] = {}; int64_t match_len = match(text, 0, pattern, 0, captures, 0); diff --git a/examples/patterns/patterns.tm b/examples/patterns/patterns.tm index 8ca5faa2..6afcdc25 100644 --- a/examples/patterns/patterns.tm +++ b/examples/patterns/patterns.tm @@ -10,8 +10,11 @@ lang Pat: return Pat.from_text("$n") extend Text: - func matches_pattern(text:Text, pattern:Pat -> [Text]?): - return inline C : [Text]? { Pattern$matches(_$text, _$pattern); } + func matches_pattern(text:Text, pattern:Pat -> Bool): + return inline C : Bool { Pattern$matches(_$text, _$pattern); } + + func pattern_captures(text:Text, pattern:Pat -> [Text]?): + return inline C : [Text]? { Pattern$captures(_$text, _$pattern); } func replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text): return inline C : Text { Pattern$replace(_$text, _$pattern, _$replacement, _$backref, _$recursive); } @@ -42,11 +45,3 @@ extend Text: func trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text): return inline C : Text { Pattern$trim(_$text, _$pattern, _$left, _$right); } - -func main(): - >> "hello world":replace_pattern($Pat/{id}/, "XXX") - >> "hello world":find_patterns($Pat/l/) - - for m in "hello one two three":by_pattern($Pat/{id}/): - >> m - |
