aboutsummaryrefslogtreecommitdiff
path: root/examples/patterns
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2025-04-01 20:08:36 -0400
committerBruce Hill <bruce@bruce-hill.com>2025-04-01 20:08:36 -0400
commitdd4c25e6b768e39fbcd437e44f80dc958e1f5869 (patch)
tree013c42119866c574980fae7a3323d9ace3eafc0a /examples/patterns
parent57ff9fdfc6b557211052aa275bfcca3e2316a0c6 (diff)
Update patterns API and docs
Diffstat (limited to 'examples/patterns')
-rw-r--r--examples/patterns/README.md253
-rw-r--r--examples/patterns/patterns.c9
-rw-r--r--examples/patterns/patterns.tm15
3 files changed, 200 insertions, 77 deletions
diff --git a/examples/patterns/README.md b/examples/patterns/README.md
index 1d280da6..2fb17391 100644
--- a/examples/patterns/README.md
+++ b/examples/patterns/README.md
@@ -1,6 +1,6 @@
# Text Pattern Matching
-As an alternative to full regular expressions, Tomo provides a limited string
+As an alternative to full regular expressions, Tomo provides a limited text
matching pattern syntax that is intended to solve 80% of use cases in under 1%
of the code size (PCRE's codebase is roughly 150k lines of code, and Tomo's
pattern matching code is a bit under 1k lines of code). Tomo's pattern matching
@@ -16,16 +16,17 @@ like a `Text`, but it has a distinct type.
Patterns are used in a small, but very powerful API that handles many text
functions that would normally be handled by a more extensive API:
-- [`matches_pattern(text:Text, pattern:Pat -> [Text]?)`](#matches_pattern)
-- [`replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text)`](#replace_pattern)
-- [`translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text)`](#translate_patterns)
-- [`has_pattern(text:Text, pattern:Pat -> Bool)`](#has_pattern)
-- [`find_patterns(text:Text, pattern:Pat -> [PatternMatch])`](#find_patterns)
- [`by_pattern(text:Text, pattern:Pat -> func(->PatternMatch?))`](#by_pattern)
+- [`by_pattern_split(text:Text, pattern:Pat -> func(->Text?))`](#by_pattern_split)
- [`each_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch), recursive=yes)`](#each_pattern)
+- [`find_patterns(text:Text, pattern:Pat -> [PatternMatch])`](#find_patterns)
+- [`has_pattern(text:Text, pattern:Pat -> Bool)`](#has_pattern)
- [`map_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch -> Text), recursive=yes -> Text)`](#map_pattern)
+- [`matches_pattern(text:Text, pattern:Pat -> Bool)`](#matches_pattern)
+- [`pattern_captures(text:Text, pattern:Pat -> [Text]?)`](#pattern_captures)
+- [`replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text)`](#replace_pattern)
- [`split_pattern(text:Text, pattern:Pat -> [Text])`](#split_pattern)
-- [`by_pattern_split(text:Text, pattern:Pat -> func(->Text?))`](#by_pattern_split)
+- [`translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text)`](#translate_patterns)
- [`trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text)`](#trim_pattern)
## Matches
@@ -151,53 +152,91 @@ many repetitions you want by putting a number or range of numbers first using
# Methods
-### `matches_pattern`
-Returns an array of text segments that match the given pattern.
+### `by_pattern`
+Returns an iterator function that yields `PatternMatch` objects for each occurrence.
```tomo
-func matches_pattern(text:Text, pattern:Pat -> [Text]?)
+func by_pattern(text:Text, pattern:Pat -> func(->PatternMatch?))
```
-- `text`: The text to search within.
+- `text`: The text to search.
- `pattern`: The pattern to match.
-**Returns:**
-An optional array of matched text segments. Returns `none` if no matches are found.
+**Returns:**
+An iterator function that yields `PatternMatch` objects one at a time.
+
+**Example:**
+```tomo
+text := "one, two, three"
+for word in text:by_pattern($Pat"{id}"):
+ say(word.text)
+```
---
-### `replace_pattern`
-Replaces occurrences of a pattern with a replacement string, supporting backreferences.
+### `by_pattern_split`
+Returns an iterator function that yields text segments split by a pattern.
```tomo
-func replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text)
+func by_pattern_split(text:Text, pattern:Pat -> func(->Text?))
```
-- `text`: The text to modify.
+- `text`: The text to split.
+- `pattern`: The pattern to use as a separator.
+
+**Returns:**
+An iterator function that yields text segments.
+
+**Example:**
+```tomo
+text := "one two three"
+for word in text:by_pattern_split($Pat"{whitespace}"):
+ say(word.text)
+```
+
+---
+
+### `each_pattern`
+Applies a function to each occurrence of a pattern in the text.
+
+```tomo
+func each_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch), recursive=yes)
+```
+
+- `text`: The text to search.
- `pattern`: The pattern to match.
-- `replacement`: The text to replace matches with.
-- `backref`: The symbol for backreferences in the replacement.
-- `recursive`: If `yes`, applies replacements recursively.
+- `fn`: The function to apply to each match.
+- `recursive`: If `yes`, applies the function recursively on modified text.
-**Returns:**
-A new text with replacements applied.
+**Example:**
+```tomo
+text := "one two three"
+text:each_pattern($Pat"{id}", func(m:PatternMatch):
+ say(m.txt)
+)
+```
---
-### `translate_patterns`
-Replaces multiple patterns using a mapping of patterns to replacement texts.
+### `find_patterns`
+Finds all occurrences of a pattern in a text and returns them as `PatternMatch` objects.
```tomo
-func translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text)
+func find_patterns(text:Text, pattern:Pat -> [PatternMatch])
```
-- `text`: The text to modify.
-- `replacements`: A table mapping patterns to their replacements.
-- `backref`: The symbol for backreferences in replacements.
-- `recursive`: If `yes`, applies replacements recursively.
+- `text`: The text to search.
+- `pattern`: The pattern to match.
-**Returns:**
-A new text with all specified replacements applied.
+**Returns:**
+An array of `PatternMatch` objects.
+
+**Example:**
+```tomo
+text := "one! two three!"
+>> text:find_patterns($Pat"{id}!")
+= [PatternMatch(text="one!", index=1, captures=["one"]), PatternMatch(text="three!", index=10, captures=["three"])]
+```
---
@@ -211,69 +250,126 @@ func has_pattern(text:Text, pattern:Pat -> Bool)
- `text`: The text to search.
- `pattern`: The pattern to check for.
-**Returns:**
+**Returns:**
`yes` if a match is found, otherwise `no`.
+**Example:**
+```tomo
+text := "...okay..."
+>> text:has_pattern($Pat"{id}")
+= yes
+```
+
---
-### `find_patterns`
-Finds all occurrences of a pattern in a text and returns them as `PatternMatch` objects.
+### `map_pattern`
+Transforms matches of a pattern using a mapping function.
```tomo
-func find_patterns(text:Text, pattern:Pat -> [PatternMatch])
+func map_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch -> Text), recursive=yes -> Text)
```
-- `text`: The text to search.
+- `text`: The text to modify.
- `pattern`: The pattern to match.
+- `fn`: A function that transforms matches.
+- `recursive`: If `yes`, applies transformations recursively.
-**Returns:**
-An array of `PatternMatch` objects.
+**Returns:**
+A new text with the transformed matches.
+
+**Example:**
+```tomo
+text := "I have #apples and #oranges and #plums"
+fruits := {"apples"=4, "oranges"=5}
+>> text:map_pattern($Pat'#{id}', func(match:PatternMatch):
+ fruit := match.captures[1]
+ "$(fruits[fruit] or 0) $fruit"
+)
+= "I have 4 apples and 5 oranges and 0 plums"
+```
---
-### `by_pattern`
-Returns an iterator function that yields `PatternMatch` objects for each occurrence.
+### `matches_pattern`
+Returns whether or not text matches a pattern completely.
```tomo
-func by_pattern(text:Text, pattern:Pat -> func(->PatternMatch?))
+func matches_pattern(text:Text, pattern:Pat -> Bool)
```
-- `text`: The text to search.
+- `text`: The text to match against.
- `pattern`: The pattern to match.
-**Returns:**
-An iterator function that yields `PatternMatch` objects one at a time.
+**Returns:**
+`yes` if the whole text matches the pattern, otherwise `no`.
+
+**Example:**
+```tomo
+>> "Hello!!!":matches_pattern($Pat"{id}")
+= no
+>> "Hello":matches_pattern($Pat"{id}")
+= yes
+```
---
-### `each_pattern`
-Applies a function to each occurrence of a pattern in the text.
+### `pattern_captures`
+Returns an array of pattern captures for the given pattern.
```tomo
-func each_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch), recursive=yes)
+func pattern_captures(text:Text, pattern:Pat -> [Text]?)
```
-- `text`: The text to search.
+- `text`: The text to match against.
- `pattern`: The pattern to match.
-- `fn`: The function to apply to each match.
-- `recursive`: If `yes`, applies the function recursively on modified text.
+
+**Returns:**
+An optional array of matched pattern captures. Returns `none` if the text does
+not match the pattern.
+
+**Example:**
+```tomo
+>> "123 boxes":pattern_captures($Pat"{int} {id}")
+= ["123", "boxes"]?
+>> "xxx":pattern_captures($Pat"{int} {id}")
+= none:[Text]
+```
---
-### `map_pattern`
-Transforms matches of a pattern using a mapping function.
+### `replace_pattern`
+Replaces occurrences of a pattern with a replacement text, supporting backreferences.
```tomo
-func map_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch -> Text), recursive=yes -> Text)
+func replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text)
```
- `text`: The text to modify.
- `pattern`: The pattern to match.
-- `fn`: A function that transforms matches.
-- `recursive`: If `yes`, applies transformations recursively.
+- `replacement`: The text to replace matches with.
+- `backref`: The symbol for backreferences in the replacement.
+- `recursive`: If `yes`, applies replacements recursively.
-**Returns:**
-A new text with the transformed matches.
+**Returns:**
+A new text with replacements applied.
+
+**Example:**
+```tomo
+>> "I have 123 apples and 456 oranges":replace_pattern($Pat"{int}", "some")
+= "I have some apples and some oranges"
+
+>> "I have 123 apples and 456 oranges":replace_pattern($Pat"{int}", "(@1)")
+= "I have (123) apples and (456) oranges"
+
+>> "I have 123 apples and 456 oranges":replace_pattern($Pat"{int}", "(?1)", backref="?")
+= "I have (123) apples and (456) oranges"
+
+>> "bad(fn(), bad(notbad))":replace_pattern($Pat"bad(?)", "good(@1)")
+= "good(fn(), good(notbad))"
+
+>> "bad(fn(), bad(notbad))":replace_pattern($Pat"bad(?)", "good(@1)", recursive=no)
+= "good(fn(), bad(notbad))"
+```
---
@@ -287,23 +383,42 @@ func split_pattern(text:Text, pattern:Pat -> [Text])
- `text`: The text to split.
- `pattern`: The pattern to use as a separator.
-**Returns:**
+**Returns:**
An array of text segments.
+**Example:**
+```tomo
+>> "one two three":split_pattern($Pat"{whitespace}")
+= ["one", "two", "three"]
+```
+
---
-### `by_pattern_split`
-Returns an iterator function that yields text segments split by a pattern.
+### `translate_patterns`
+Replaces multiple patterns using a mapping of patterns to replacement texts.
```tomo
-func by_pattern_split(text:Text, pattern:Pat -> func(->Text?))
+func translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text)
```
-- `text`: The text to split.
-- `pattern`: The pattern to use as a separator.
+- `text`: The text to modify.
+- `replacements`: A table mapping patterns to their replacements.
+- `backref`: The symbol for backreferences in replacements.
+- `recursive`: If `yes`, applies replacements recursively.
-**Returns:**
-An iterator function that yields text segments.
+**Returns:**
+A new text with all specified replacements applied.
+
+**Example:**
+```tomo
+>> text := "foo(x, baz(1))"
+>> text:translate_patterns({
+ $Pat"{id}(?)"="call(fn('@1'), @2)",
+ $Pat"{id}"="var('@1')",
+ $Pat"{int}"="int(@1)",
+})
+= "call(fn('foo'), var('x'), call(fn('baz'), int(1)))"
+```
---
@@ -319,5 +434,11 @@ func trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text)
- `left`: If `yes`, trims from the beginning.
- `right`: If `yes`, trims from the end.
-**Returns:**
+**Returns:**
The trimmed text.
+
+**Example:**
+```tomo
+>> "123abc456":trim_pattern($Pat"{digit}")
+= "abc"
+```
diff --git a/examples/patterns/patterns.c b/examples/patterns/patterns.c
index ade68e04..180ab431 100644
--- a/examples/patterns/patterns.c
+++ b/examples/patterns/patterns.c
@@ -851,7 +851,14 @@ PUREFUNC static bool Pattern$has(Text_t text, Text_t pattern)
}
}
-static OptionalArray_t Pattern$matches(Text_t text, Text_t pattern)
+static bool Pattern$matches(Text_t text, Text_t pattern)
+{
+ capture_t captures[MAX_BACKREFS] = {};
+ int64_t match_len = match(text, 0, pattern, 0, NULL, 0);
+ return (match_len == text.length);
+}
+
+static OptionalArray_t Pattern$captures(Text_t text, Text_t pattern)
{
capture_t captures[MAX_BACKREFS] = {};
int64_t match_len = match(text, 0, pattern, 0, captures, 0);
diff --git a/examples/patterns/patterns.tm b/examples/patterns/patterns.tm
index 8ca5faa2..6afcdc25 100644
--- a/examples/patterns/patterns.tm
+++ b/examples/patterns/patterns.tm
@@ -10,8 +10,11 @@ lang Pat:
return Pat.from_text("$n")
extend Text:
- func matches_pattern(text:Text, pattern:Pat -> [Text]?):
- return inline C : [Text]? { Pattern$matches(_$text, _$pattern); }
+ func matches_pattern(text:Text, pattern:Pat -> Bool):
+ return inline C : Bool { Pattern$matches(_$text, _$pattern); }
+
+ func pattern_captures(text:Text, pattern:Pat -> [Text]?):
+ return inline C : [Text]? { Pattern$captures(_$text, _$pattern); }
func replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text):
return inline C : Text { Pattern$replace(_$text, _$pattern, _$replacement, _$backref, _$recursive); }
@@ -42,11 +45,3 @@ extend Text:
func trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text):
return inline C : Text { Pattern$trim(_$text, _$pattern, _$left, _$right); }
-
-func main():
- >> "hello world":replace_pattern($Pat/{id}/, "XXX")
- >> "hello world":find_patterns($Pat/l/)
-
- for m in "hello one two three":by_pattern($Pat/{id}/):
- >> m
-