Update patterns API and docs

This commit is contained in:
Bruce Hill 2025-04-01 20:08:36 -04:00
parent 57ff9fdfc6
commit dd4c25e6b7
3 changed files with 234 additions and 111 deletions

View File

@ -1,6 +1,6 @@
# Text Pattern Matching # Text Pattern Matching
As an alternative to full regular expressions, Tomo provides a limited string As an alternative to full regular expressions, Tomo provides a limited text
matching pattern syntax that is intended to solve 80% of use cases in under 1% matching pattern syntax that is intended to solve 80% of use cases in under 1%
of the code size (PCRE's codebase is roughly 150k lines of code, and Tomo's of the code size (PCRE's codebase is roughly 150k lines of code, and Tomo's
pattern matching code is a bit under 1k lines of code). Tomo's pattern matching pattern matching code is a bit under 1k lines of code). Tomo's pattern matching
@ -16,16 +16,17 @@ like a `Text`, but it has a distinct type.
Patterns are used in a small, but very powerful API that handles many text Patterns are used in a small, but very powerful API that handles many text
functions that would normally be handled by a more extensive API: functions that would normally be handled by a more extensive API:
- [`matches_pattern(text:Text, pattern:Pat -> [Text]?)`](#matches_pattern)
- [`replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text)`](#replace_pattern)
- [`translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text)`](#translate_patterns)
- [`has_pattern(text:Text, pattern:Pat -> Bool)`](#has_pattern)
- [`find_patterns(text:Text, pattern:Pat -> [PatternMatch])`](#find_patterns)
- [`by_pattern(text:Text, pattern:Pat -> func(->PatternMatch?))`](#by_pattern) - [`by_pattern(text:Text, pattern:Pat -> func(->PatternMatch?))`](#by_pattern)
- [`each_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch), recursive=yes)`](#each_pattern)
- [`map_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch -> Text), recursive=yes -> Text)`](#map_pattern)
- [`split_pattern(text:Text, pattern:Pat -> [Text])`](#split_pattern)
- [`by_pattern_split(text:Text, pattern:Pat -> func(->Text?))`](#by_pattern_split) - [`by_pattern_split(text:Text, pattern:Pat -> func(->Text?))`](#by_pattern_split)
- [`each_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch), recursive=yes)`](#each_pattern)
- [`find_patterns(text:Text, pattern:Pat -> [PatternMatch])`](#find_patterns)
- [`has_pattern(text:Text, pattern:Pat -> Bool)`](#has_pattern)
- [`map_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch -> Text), recursive=yes -> Text)`](#map_pattern)
- [`matches_pattern(text:Text, pattern:Pat -> Bool)`](#matches_pattern)
- [`pattern_captures(text:Text, pattern:Pat -> [Text]?)`](#pattern_captures)
- [`replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text)`](#replace_pattern)
- [`split_pattern(text:Text, pattern:Pat -> [Text])`](#split_pattern)
- [`translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text)`](#translate_patterns)
- [`trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text)`](#trim_pattern) - [`trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text)`](#trim_pattern)
## Matches ## Matches
@ -151,68 +152,69 @@ many repetitions you want by putting a number or range of numbers first using
# Methods # Methods
### `matches_pattern` ### `by_pattern`
Returns an array of text segments that match the given pattern. Returns an iterator function that yields `PatternMatch` objects for each occurrence.
```tomo ```tomo
func matches_pattern(text:Text, pattern:Pat -> [Text]?) func by_pattern(text:Text, pattern:Pat -> func(->PatternMatch?))
```
- `text`: The text to search within.
- `pattern`: The pattern to match.
**Returns:**
An optional array of matched text segments. Returns `none` if no matches are found.
---
### `replace_pattern`
Replaces occurrences of a pattern with a replacement string, supporting backreferences.
```tomo
func replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text)
```
- `text`: The text to modify.
- `pattern`: The pattern to match.
- `replacement`: The text to replace matches with.
- `backref`: The symbol for backreferences in the replacement.
- `recursive`: If `yes`, applies replacements recursively.
**Returns:**
A new text with replacements applied.
---
### `translate_patterns`
Replaces multiple patterns using a mapping of patterns to replacement texts.
```tomo
func translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text)
```
- `text`: The text to modify.
- `replacements`: A table mapping patterns to their replacements.
- `backref`: The symbol for backreferences in replacements.
- `recursive`: If `yes`, applies replacements recursively.
**Returns:**
A new text with all specified replacements applied.
---
### `has_pattern`
Checks whether a given pattern appears in the text.
```tomo
func has_pattern(text:Text, pattern:Pat -> Bool)
``` ```
- `text`: The text to search. - `text`: The text to search.
- `pattern`: The pattern to check for. - `pattern`: The pattern to match.
**Returns:** **Returns:**
`yes` if a match is found, otherwise `no`. An iterator function that yields `PatternMatch` objects one at a time.
**Example:**
```tomo
text := "one, two, three"
for word in text:by_pattern($Pat"{id}"):
say(word.text)
```
---
### `by_pattern_split`
Returns an iterator function that yields text segments split by a pattern.
```tomo
func by_pattern_split(text:Text, pattern:Pat -> func(->Text?))
```
- `text`: The text to split.
- `pattern`: The pattern to use as a separator.
**Returns:**
An iterator function that yields text segments.
**Example:**
```tomo
text := "one two three"
for word in text:by_pattern_split($Pat"{whitespace}"):
say(word.text)
```
---
### `each_pattern`
Applies a function to each occurrence of a pattern in the text.
```tomo
func each_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch), recursive=yes)
```
- `text`: The text to search.
- `pattern`: The pattern to match.
- `fn`: The function to apply to each match.
- `recursive`: If `yes`, applies the function recursively on modified text.
**Example:**
```tomo
text := "one two three"
text:each_pattern($Pat"{id}", func(m:PatternMatch):
say(m.txt)
)
```
--- ---
@ -229,35 +231,35 @@ func find_patterns(text:Text, pattern:Pat -> [PatternMatch])
**Returns:** **Returns:**
An array of `PatternMatch` objects. An array of `PatternMatch` objects.
**Example:**
```tomo
text := "one! two three!"
>> text:find_patterns($Pat"{id}!")
= [PatternMatch(text="one!", index=1, captures=["one"]), PatternMatch(text="three!", index=10, captures=["three"])]
```
--- ---
### `by_pattern` ### `has_pattern`
Returns an iterator function that yields `PatternMatch` objects for each occurrence. Checks whether a given pattern appears in the text.
```tomo ```tomo
func by_pattern(text:Text, pattern:Pat -> func(->PatternMatch?)) func has_pattern(text:Text, pattern:Pat -> Bool)
``` ```
- `text`: The text to search. - `text`: The text to search.
- `pattern`: The pattern to match. - `pattern`: The pattern to check for.
**Returns:** **Returns:**
An iterator function that yields `PatternMatch` objects one at a time. `yes` if a match is found, otherwise `no`.
---
### `each_pattern`
Applies a function to each occurrence of a pattern in the text.
**Example:**
```tomo ```tomo
func each_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch), recursive=yes) text := "...okay..."
>> text:has_pattern($Pat"{id}")
= yes
``` ```
- `text`: The text to search.
- `pattern`: The pattern to match.
- `fn`: The function to apply to each match.
- `recursive`: If `yes`, applies the function recursively on modified text.
--- ---
### `map_pattern` ### `map_pattern`
@ -275,6 +277,100 @@ func map_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch -> Text), recurs
**Returns:** **Returns:**
A new text with the transformed matches. A new text with the transformed matches.
**Example:**
```tomo
text := "I have #apples and #oranges and #plums"
fruits := {"apples"=4, "oranges"=5}
>> text:map_pattern($Pat'#{id}', func(match:PatternMatch):
fruit := match.captures[1]
"$(fruits[fruit] or 0) $fruit"
)
= "I have 4 apples and 5 oranges and 0 plums"
```
---
### `matches_pattern`
Returns whether or not text matches a pattern completely.
```tomo
func matches_pattern(text:Text, pattern:Pat -> Bool)
```
- `text`: The text to match against.
- `pattern`: The pattern to match.
**Returns:**
`yes` if the whole text matches the pattern, otherwise `no`.
**Example:**
```tomo
>> "Hello!!!":matches_pattern($Pat"{id}")
= no
>> "Hello":matches_pattern($Pat"{id}")
= yes
```
---
### `pattern_captures`
Returns an array of pattern captures for the given pattern.
```tomo
func pattern_captures(text:Text, pattern:Pat -> [Text]?)
```
- `text`: The text to match against.
- `pattern`: The pattern to match.
**Returns:**
An optional array of matched pattern captures. Returns `none` if the text does
not match the pattern.
**Example:**
```tomo
>> "123 boxes":pattern_captures($Pat"{int} {id}")
= ["123", "boxes"]?
>> "xxx":pattern_captures($Pat"{int} {id}")
= none:[Text]
```
---
### `replace_pattern`
Replaces occurrences of a pattern with a replacement text, supporting backreferences.
```tomo
func replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text)
```
- `text`: The text to modify.
- `pattern`: The pattern to match.
- `replacement`: The text to replace matches with.
- `backref`: The symbol for backreferences in the replacement.
- `recursive`: If `yes`, applies replacements recursively.
**Returns:**
A new text with replacements applied.
**Example:**
```tomo
>> "I have 123 apples and 456 oranges":replace_pattern($Pat"{int}", "some")
= "I have some apples and some oranges"
>> "I have 123 apples and 456 oranges":replace_pattern($Pat"{int}", "(@1)")
= "I have (123) apples and (456) oranges"
>> "I have 123 apples and 456 oranges":replace_pattern($Pat"{int}", "(?1)", backref="?")
= "I have (123) apples and (456) oranges"
>> "bad(fn(), bad(notbad))":replace_pattern($Pat"bad(?)", "good(@1)")
= "good(fn(), good(notbad))"
>> "bad(fn(), bad(notbad))":replace_pattern($Pat"bad(?)", "good(@1)", recursive=no)
= "good(fn(), bad(notbad))"
```
--- ---
### `split_pattern` ### `split_pattern`
@ -290,20 +386,39 @@ func split_pattern(text:Text, pattern:Pat -> [Text])
**Returns:** **Returns:**
An array of text segments. An array of text segments.
--- **Example:**
### `by_pattern_split`
Returns an iterator function that yields text segments split by a pattern.
```tomo ```tomo
func by_pattern_split(text:Text, pattern:Pat -> func(->Text?)) >> "one two three":split_pattern($Pat"{whitespace}")
= ["one", "two", "three"]
``` ```
- `text`: The text to split. ---
- `pattern`: The pattern to use as a separator.
### `translate_patterns`
Replaces multiple patterns using a mapping of patterns to replacement texts.
```tomo
func translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text)
```
- `text`: The text to modify.
- `replacements`: A table mapping patterns to their replacements.
- `backref`: The symbol for backreferences in replacements.
- `recursive`: If `yes`, applies replacements recursively.
**Returns:** **Returns:**
An iterator function that yields text segments. A new text with all specified replacements applied.
**Example:**
```tomo
>> text := "foo(x, baz(1))"
>> text:translate_patterns({
$Pat"{id}(?)"="call(fn('@1'), @2)",
$Pat"{id}"="var('@1')",
$Pat"{int}"="int(@1)",
})
= "call(fn('foo'), var('x'), call(fn('baz'), int(1)))"
```
--- ---
@ -321,3 +436,9 @@ func trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text)
**Returns:** **Returns:**
The trimmed text. The trimmed text.
**Example:**
```tomo
>> "123abc456":trim_pattern($Pat"{digit}")
= "abc"
```

View File

@ -851,7 +851,14 @@ PUREFUNC static bool Pattern$has(Text_t text, Text_t pattern)
} }
} }
static OptionalArray_t Pattern$matches(Text_t text, Text_t pattern) static bool Pattern$matches(Text_t text, Text_t pattern)
{
capture_t captures[MAX_BACKREFS] = {};
int64_t match_len = match(text, 0, pattern, 0, NULL, 0);
return (match_len == text.length);
}
static OptionalArray_t Pattern$captures(Text_t text, Text_t pattern)
{ {
capture_t captures[MAX_BACKREFS] = {}; capture_t captures[MAX_BACKREFS] = {};
int64_t match_len = match(text, 0, pattern, 0, captures, 0); int64_t match_len = match(text, 0, pattern, 0, captures, 0);

View File

@ -10,8 +10,11 @@ lang Pat:
return Pat.from_text("$n") return Pat.from_text("$n")
extend Text: extend Text:
func matches_pattern(text:Text, pattern:Pat -> [Text]?): func matches_pattern(text:Text, pattern:Pat -> Bool):
return inline C : [Text]? { Pattern$matches(_$text, _$pattern); } return inline C : Bool { Pattern$matches(_$text, _$pattern); }
func pattern_captures(text:Text, pattern:Pat -> [Text]?):
return inline C : [Text]? { Pattern$captures(_$text, _$pattern); }
func replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text): func replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text):
return inline C : Text { Pattern$replace(_$text, _$pattern, _$replacement, _$backref, _$recursive); } return inline C : Text { Pattern$replace(_$text, _$pattern, _$replacement, _$backref, _$recursive); }
@ -42,11 +45,3 @@ extend Text:
func trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text): func trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text):
return inline C : Text { Pattern$trim(_$text, _$pattern, _$left, _$right); } return inline C : Text { Pattern$trim(_$text, _$pattern, _$left, _$right); }
func main():
>> "hello world":replace_pattern($Pat/{id}/, "XXX")
>> "hello world":find_patterns($Pat/l/)
for m in "hello one two three":by_pattern($Pat/{id}/):
>> m