diff --git a/examples/patterns/README.md b/examples/patterns/README.md index 1d280da..2fb1739 100644 --- a/examples/patterns/README.md +++ b/examples/patterns/README.md @@ -1,6 +1,6 @@ # Text Pattern Matching -As an alternative to full regular expressions, Tomo provides a limited string +As an alternative to full regular expressions, Tomo provides a limited text matching pattern syntax that is intended to solve 80% of use cases in under 1% of the code size (PCRE's codebase is roughly 150k lines of code, and Tomo's pattern matching code is a bit under 1k lines of code). Tomo's pattern matching @@ -16,16 +16,17 @@ like a `Text`, but it has a distinct type. Patterns are used in a small, but very powerful API that handles many text functions that would normally be handled by a more extensive API: -- [`matches_pattern(text:Text, pattern:Pat -> [Text]?)`](#matches_pattern) -- [`replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text)`](#replace_pattern) -- [`translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text)`](#translate_patterns) -- [`has_pattern(text:Text, pattern:Pat -> Bool)`](#has_pattern) -- [`find_patterns(text:Text, pattern:Pat -> [PatternMatch])`](#find_patterns) - [`by_pattern(text:Text, pattern:Pat -> func(->PatternMatch?))`](#by_pattern) -- [`each_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch), recursive=yes)`](#each_pattern) -- [`map_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch -> Text), recursive=yes -> Text)`](#map_pattern) -- [`split_pattern(text:Text, pattern:Pat -> [Text])`](#split_pattern) - [`by_pattern_split(text:Text, pattern:Pat -> func(->Text?))`](#by_pattern_split) +- [`each_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch), recursive=yes)`](#each_pattern) +- [`find_patterns(text:Text, pattern:Pat -> [PatternMatch])`](#find_patterns) +- [`has_pattern(text:Text, pattern:Pat -> Bool)`](#has_pattern) +- [`map_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch -> Text), recursive=yes -> Text)`](#map_pattern) +- [`matches_pattern(text:Text, pattern:Pat -> Bool)`](#matches_pattern) +- [`pattern_captures(text:Text, pattern:Pat -> [Text]?)`](#pattern_captures) +- [`replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text)`](#replace_pattern) +- [`split_pattern(text:Text, pattern:Pat -> [Text])`](#split_pattern) +- [`translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text)`](#translate_patterns) - [`trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text)`](#trim_pattern) ## Matches @@ -151,86 +152,6 @@ many repetitions you want by putting a number or range of numbers first using # Methods -### `matches_pattern` -Returns an array of text segments that match the given pattern. - -```tomo -func matches_pattern(text:Text, pattern:Pat -> [Text]?) -``` - -- `text`: The text to search within. -- `pattern`: The pattern to match. - -**Returns:** -An optional array of matched text segments. Returns `none` if no matches are found. - ---- - -### `replace_pattern` -Replaces occurrences of a pattern with a replacement string, supporting backreferences. - -```tomo -func replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text) -``` - -- `text`: The text to modify. -- `pattern`: The pattern to match. -- `replacement`: The text to replace matches with. -- `backref`: The symbol for backreferences in the replacement. -- `recursive`: If `yes`, applies replacements recursively. - -**Returns:** -A new text with replacements applied. - ---- - -### `translate_patterns` -Replaces multiple patterns using a mapping of patterns to replacement texts. - -```tomo -func translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text) -``` - -- `text`: The text to modify. -- `replacements`: A table mapping patterns to their replacements. -- `backref`: The symbol for backreferences in replacements. -- `recursive`: If `yes`, applies replacements recursively. - -**Returns:** -A new text with all specified replacements applied. - ---- - -### `has_pattern` -Checks whether a given pattern appears in the text. - -```tomo -func has_pattern(text:Text, pattern:Pat -> Bool) -``` - -- `text`: The text to search. -- `pattern`: The pattern to check for. - -**Returns:** -`yes` if a match is found, otherwise `no`. - ---- - -### `find_patterns` -Finds all occurrences of a pattern in a text and returns them as `PatternMatch` objects. - -```tomo -func find_patterns(text:Text, pattern:Pat -> [PatternMatch]) -``` - -- `text`: The text to search. -- `pattern`: The pattern to match. - -**Returns:** -An array of `PatternMatch` objects. - ---- - ### `by_pattern` Returns an iterator function that yields `PatternMatch` objects for each occurrence. @@ -241,9 +162,38 @@ func by_pattern(text:Text, pattern:Pat -> func(->PatternMatch?)) - `text`: The text to search. - `pattern`: The pattern to match. -**Returns:** +**Returns:** An iterator function that yields `PatternMatch` objects one at a time. +**Example:** +```tomo +text := "one, two, three" +for word in text:by_pattern($Pat"{id}"): + say(word.text) +``` + +--- + +### `by_pattern_split` +Returns an iterator function that yields text segments split by a pattern. + +```tomo +func by_pattern_split(text:Text, pattern:Pat -> func(->Text?)) +``` + +- `text`: The text to split. +- `pattern`: The pattern to use as a separator. + +**Returns:** +An iterator function that yields text segments. + +**Example:** +```tomo +text := "one two three" +for word in text:by_pattern_split($Pat"{whitespace}"): + say(word.text) +``` + --- ### `each_pattern` @@ -258,6 +208,58 @@ func each_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch), recursive=yes - `fn`: The function to apply to each match. - `recursive`: If `yes`, applies the function recursively on modified text. +**Example:** +```tomo +text := "one two three" +text:each_pattern($Pat"{id}", func(m:PatternMatch): + say(m.txt) +) +``` + +--- + +### `find_patterns` +Finds all occurrences of a pattern in a text and returns them as `PatternMatch` objects. + +```tomo +func find_patterns(text:Text, pattern:Pat -> [PatternMatch]) +``` + +- `text`: The text to search. +- `pattern`: The pattern to match. + +**Returns:** +An array of `PatternMatch` objects. + +**Example:** +```tomo +text := "one! two three!" +>> text:find_patterns($Pat"{id}!") += [PatternMatch(text="one!", index=1, captures=["one"]), PatternMatch(text="three!", index=10, captures=["three"])] +``` + +--- + +### `has_pattern` +Checks whether a given pattern appears in the text. + +```tomo +func has_pattern(text:Text, pattern:Pat -> Bool) +``` + +- `text`: The text to search. +- `pattern`: The pattern to check for. + +**Returns:** +`yes` if a match is found, otherwise `no`. + +**Example:** +```tomo +text := "...okay..." +>> text:has_pattern($Pat"{id}") += yes +``` + --- ### `map_pattern` @@ -272,9 +274,103 @@ func map_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch -> Text), recurs - `fn`: A function that transforms matches. - `recursive`: If `yes`, applies transformations recursively. -**Returns:** +**Returns:** A new text with the transformed matches. +**Example:** +```tomo +text := "I have #apples and #oranges and #plums" +fruits := {"apples"=4, "oranges"=5} +>> text:map_pattern($Pat'#{id}', func(match:PatternMatch): + fruit := match.captures[1] + "$(fruits[fruit] or 0) $fruit" +) += "I have 4 apples and 5 oranges and 0 plums" +``` + +--- + +### `matches_pattern` +Returns whether or not text matches a pattern completely. + +```tomo +func matches_pattern(text:Text, pattern:Pat -> Bool) +``` + +- `text`: The text to match against. +- `pattern`: The pattern to match. + +**Returns:** +`yes` if the whole text matches the pattern, otherwise `no`. + +**Example:** +```tomo +>> "Hello!!!":matches_pattern($Pat"{id}") += no +>> "Hello":matches_pattern($Pat"{id}") += yes +``` + +--- + +### `pattern_captures` +Returns an array of pattern captures for the given pattern. + +```tomo +func pattern_captures(text:Text, pattern:Pat -> [Text]?) +``` + +- `text`: The text to match against. +- `pattern`: The pattern to match. + +**Returns:** +An optional array of matched pattern captures. Returns `none` if the text does +not match the pattern. + +**Example:** +```tomo +>> "123 boxes":pattern_captures($Pat"{int} {id}") += ["123", "boxes"]? +>> "xxx":pattern_captures($Pat"{int} {id}") += none:[Text] +``` + +--- + +### `replace_pattern` +Replaces occurrences of a pattern with a replacement text, supporting backreferences. + +```tomo +func replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text) +``` + +- `text`: The text to modify. +- `pattern`: The pattern to match. +- `replacement`: The text to replace matches with. +- `backref`: The symbol for backreferences in the replacement. +- `recursive`: If `yes`, applies replacements recursively. + +**Returns:** +A new text with replacements applied. + +**Example:** +```tomo +>> "I have 123 apples and 456 oranges":replace_pattern($Pat"{int}", "some") += "I have some apples and some oranges" + +>> "I have 123 apples and 456 oranges":replace_pattern($Pat"{int}", "(@1)") += "I have (123) apples and (456) oranges" + +>> "I have 123 apples and 456 oranges":replace_pattern($Pat"{int}", "(?1)", backref="?") += "I have (123) apples and (456) oranges" + +>> "bad(fn(), bad(notbad))":replace_pattern($Pat"bad(?)", "good(@1)") += "good(fn(), good(notbad))" + +>> "bad(fn(), bad(notbad))":replace_pattern($Pat"bad(?)", "good(@1)", recursive=no) += "good(fn(), bad(notbad))" +``` + --- ### `split_pattern` @@ -287,23 +383,42 @@ func split_pattern(text:Text, pattern:Pat -> [Text]) - `text`: The text to split. - `pattern`: The pattern to use as a separator. -**Returns:** +**Returns:** An array of text segments. +**Example:** +```tomo +>> "one two three":split_pattern($Pat"{whitespace}") += ["one", "two", "three"] +``` + --- -### `by_pattern_split` -Returns an iterator function that yields text segments split by a pattern. +### `translate_patterns` +Replaces multiple patterns using a mapping of patterns to replacement texts. ```tomo -func by_pattern_split(text:Text, pattern:Pat -> func(->Text?)) +func translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text) ``` -- `text`: The text to split. -- `pattern`: The pattern to use as a separator. +- `text`: The text to modify. +- `replacements`: A table mapping patterns to their replacements. +- `backref`: The symbol for backreferences in replacements. +- `recursive`: If `yes`, applies replacements recursively. -**Returns:** -An iterator function that yields text segments. +**Returns:** +A new text with all specified replacements applied. + +**Example:** +```tomo +>> text := "foo(x, baz(1))" +>> text:translate_patterns({ + $Pat"{id}(?)"="call(fn('@1'), @2)", + $Pat"{id}"="var('@1')", + $Pat"{int}"="int(@1)", +}) += "call(fn('foo'), var('x'), call(fn('baz'), int(1)))" +``` --- @@ -319,5 +434,11 @@ func trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text) - `left`: If `yes`, trims from the beginning. - `right`: If `yes`, trims from the end. -**Returns:** +**Returns:** The trimmed text. + +**Example:** +```tomo +>> "123abc456":trim_pattern($Pat"{digit}") += "abc" +``` diff --git a/examples/patterns/patterns.c b/examples/patterns/patterns.c index ade68e0..180ab43 100644 --- a/examples/patterns/patterns.c +++ b/examples/patterns/patterns.c @@ -851,7 +851,14 @@ PUREFUNC static bool Pattern$has(Text_t text, Text_t pattern) } } -static OptionalArray_t Pattern$matches(Text_t text, Text_t pattern) +static bool Pattern$matches(Text_t text, Text_t pattern) +{ + capture_t captures[MAX_BACKREFS] = {}; + int64_t match_len = match(text, 0, pattern, 0, NULL, 0); + return (match_len == text.length); +} + +static OptionalArray_t Pattern$captures(Text_t text, Text_t pattern) { capture_t captures[MAX_BACKREFS] = {}; int64_t match_len = match(text, 0, pattern, 0, captures, 0); diff --git a/examples/patterns/patterns.tm b/examples/patterns/patterns.tm index 8ca5faa..6afcdc2 100644 --- a/examples/patterns/patterns.tm +++ b/examples/patterns/patterns.tm @@ -10,8 +10,11 @@ lang Pat: return Pat.from_text("$n") extend Text: - func matches_pattern(text:Text, pattern:Pat -> [Text]?): - return inline C : [Text]? { Pattern$matches(_$text, _$pattern); } + func matches_pattern(text:Text, pattern:Pat -> Bool): + return inline C : Bool { Pattern$matches(_$text, _$pattern); } + + func pattern_captures(text:Text, pattern:Pat -> [Text]?): + return inline C : [Text]? { Pattern$captures(_$text, _$pattern); } func replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text): return inline C : Text { Pattern$replace(_$text, _$pattern, _$replacement, _$backref, _$recursive); } @@ -42,11 +45,3 @@ extend Text: func trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text): return inline C : Text { Pattern$trim(_$text, _$pattern, _$left, _$right); } - -func main(): - >> "hello world":replace_pattern($Pat/{id}/, "XXX") - >> "hello world":find_patterns($Pat/l/) - - for m in "hello one two three":by_pattern($Pat/{id}/): - >> m -