Update patterns API and docs

This commit is contained in:
Bruce Hill 2025-04-01 20:08:36 -04:00
parent 57ff9fdfc6
commit dd4c25e6b7
3 changed files with 234 additions and 111 deletions

View File

@ -1,6 +1,6 @@
# Text Pattern Matching
As an alternative to full regular expressions, Tomo provides a limited string
As an alternative to full regular expressions, Tomo provides a limited text
matching pattern syntax that is intended to solve 80% of use cases in under 1%
of the code size (PCRE's codebase is roughly 150k lines of code, and Tomo's
pattern matching code is a bit under 1k lines of code). Tomo's pattern matching
@ -16,16 +16,17 @@ like a `Text`, but it has a distinct type.
Patterns are used in a small, but very powerful API that handles many text
functions that would normally be handled by a more extensive API:
- [`matches_pattern(text:Text, pattern:Pat -> [Text]?)`](#matches_pattern)
- [`replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text)`](#replace_pattern)
- [`translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text)`](#translate_patterns)
- [`has_pattern(text:Text, pattern:Pat -> Bool)`](#has_pattern)
- [`find_patterns(text:Text, pattern:Pat -> [PatternMatch])`](#find_patterns)
- [`by_pattern(text:Text, pattern:Pat -> func(->PatternMatch?))`](#by_pattern)
- [`each_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch), recursive=yes)`](#each_pattern)
- [`map_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch -> Text), recursive=yes -> Text)`](#map_pattern)
- [`split_pattern(text:Text, pattern:Pat -> [Text])`](#split_pattern)
- [`by_pattern_split(text:Text, pattern:Pat -> func(->Text?))`](#by_pattern_split)
- [`each_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch), recursive=yes)`](#each_pattern)
- [`find_patterns(text:Text, pattern:Pat -> [PatternMatch])`](#find_patterns)
- [`has_pattern(text:Text, pattern:Pat -> Bool)`](#has_pattern)
- [`map_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch -> Text), recursive=yes -> Text)`](#map_pattern)
- [`matches_pattern(text:Text, pattern:Pat -> Bool)`](#matches_pattern)
- [`pattern_captures(text:Text, pattern:Pat -> [Text]?)`](#pattern_captures)
- [`replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text)`](#replace_pattern)
- [`split_pattern(text:Text, pattern:Pat -> [Text])`](#split_pattern)
- [`translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text)`](#translate_patterns)
- [`trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text)`](#trim_pattern)
## Matches
@ -151,86 +152,6 @@ many repetitions you want by putting a number or range of numbers first using
# Methods
### `matches_pattern`
Returns an array of text segments that match the given pattern.
```tomo
func matches_pattern(text:Text, pattern:Pat -> [Text]?)
```
- `text`: The text to search within.
- `pattern`: The pattern to match.
**Returns:**
An optional array of matched text segments. Returns `none` if no matches are found.
---
### `replace_pattern`
Replaces occurrences of a pattern with a replacement string, supporting backreferences.
```tomo
func replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text)
```
- `text`: The text to modify.
- `pattern`: The pattern to match.
- `replacement`: The text to replace matches with.
- `backref`: The symbol for backreferences in the replacement.
- `recursive`: If `yes`, applies replacements recursively.
**Returns:**
A new text with replacements applied.
---
### `translate_patterns`
Replaces multiple patterns using a mapping of patterns to replacement texts.
```tomo
func translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text)
```
- `text`: The text to modify.
- `replacements`: A table mapping patterns to their replacements.
- `backref`: The symbol for backreferences in replacements.
- `recursive`: If `yes`, applies replacements recursively.
**Returns:**
A new text with all specified replacements applied.
---
### `has_pattern`
Checks whether a given pattern appears in the text.
```tomo
func has_pattern(text:Text, pattern:Pat -> Bool)
```
- `text`: The text to search.
- `pattern`: The pattern to check for.
**Returns:**
`yes` if a match is found, otherwise `no`.
---
### `find_patterns`
Finds all occurrences of a pattern in a text and returns them as `PatternMatch` objects.
```tomo
func find_patterns(text:Text, pattern:Pat -> [PatternMatch])
```
- `text`: The text to search.
- `pattern`: The pattern to match.
**Returns:**
An array of `PatternMatch` objects.
---
### `by_pattern`
Returns an iterator function that yields `PatternMatch` objects for each occurrence.
@ -241,9 +162,38 @@ func by_pattern(text:Text, pattern:Pat -> func(->PatternMatch?))
- `text`: The text to search.
- `pattern`: The pattern to match.
**Returns:**
**Returns:**
An iterator function that yields `PatternMatch` objects one at a time.
**Example:**
```tomo
text := "one, two, three"
for word in text:by_pattern($Pat"{id}"):
say(word.text)
```
---
### `by_pattern_split`
Returns an iterator function that yields text segments split by a pattern.
```tomo
func by_pattern_split(text:Text, pattern:Pat -> func(->Text?))
```
- `text`: The text to split.
- `pattern`: The pattern to use as a separator.
**Returns:**
An iterator function that yields text segments.
**Example:**
```tomo
text := "one two three"
for word in text:by_pattern_split($Pat"{whitespace}"):
say(word.text)
```
---
### `each_pattern`
@ -258,6 +208,58 @@ func each_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch), recursive=yes
- `fn`: The function to apply to each match.
- `recursive`: If `yes`, applies the function recursively on modified text.
**Example:**
```tomo
text := "one two three"
text:each_pattern($Pat"{id}", func(m:PatternMatch):
say(m.txt)
)
```
---
### `find_patterns`
Finds all occurrences of a pattern in a text and returns them as `PatternMatch` objects.
```tomo
func find_patterns(text:Text, pattern:Pat -> [PatternMatch])
```
- `text`: The text to search.
- `pattern`: The pattern to match.
**Returns:**
An array of `PatternMatch` objects.
**Example:**
```tomo
text := "one! two three!"
>> text:find_patterns($Pat"{id}!")
= [PatternMatch(text="one!", index=1, captures=["one"]), PatternMatch(text="three!", index=10, captures=["three"])]
```
---
### `has_pattern`
Checks whether a given pattern appears in the text.
```tomo
func has_pattern(text:Text, pattern:Pat -> Bool)
```
- `text`: The text to search.
- `pattern`: The pattern to check for.
**Returns:**
`yes` if a match is found, otherwise `no`.
**Example:**
```tomo
text := "...okay..."
>> text:has_pattern($Pat"{id}")
= yes
```
---
### `map_pattern`
@ -272,9 +274,103 @@ func map_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch -> Text), recurs
- `fn`: A function that transforms matches.
- `recursive`: If `yes`, applies transformations recursively.
**Returns:**
**Returns:**
A new text with the transformed matches.
**Example:**
```tomo
text := "I have #apples and #oranges and #plums"
fruits := {"apples"=4, "oranges"=5}
>> text:map_pattern($Pat'#{id}', func(match:PatternMatch):
fruit := match.captures[1]
"$(fruits[fruit] or 0) $fruit"
)
= "I have 4 apples and 5 oranges and 0 plums"
```
---
### `matches_pattern`
Returns whether or not text matches a pattern completely.
```tomo
func matches_pattern(text:Text, pattern:Pat -> Bool)
```
- `text`: The text to match against.
- `pattern`: The pattern to match.
**Returns:**
`yes` if the whole text matches the pattern, otherwise `no`.
**Example:**
```tomo
>> "Hello!!!":matches_pattern($Pat"{id}")
= no
>> "Hello":matches_pattern($Pat"{id}")
= yes
```
---
### `pattern_captures`
Returns an array of pattern captures for the given pattern.
```tomo
func pattern_captures(text:Text, pattern:Pat -> [Text]?)
```
- `text`: The text to match against.
- `pattern`: The pattern to match.
**Returns:**
An optional array of matched pattern captures. Returns `none` if the text does
not match the pattern.
**Example:**
```tomo
>> "123 boxes":pattern_captures($Pat"{int} {id}")
= ["123", "boxes"]?
>> "xxx":pattern_captures($Pat"{int} {id}")
= none:[Text]
```
---
### `replace_pattern`
Replaces occurrences of a pattern with a replacement text, supporting backreferences.
```tomo
func replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text)
```
- `text`: The text to modify.
- `pattern`: The pattern to match.
- `replacement`: The text to replace matches with.
- `backref`: The symbol for backreferences in the replacement.
- `recursive`: If `yes`, applies replacements recursively.
**Returns:**
A new text with replacements applied.
**Example:**
```tomo
>> "I have 123 apples and 456 oranges":replace_pattern($Pat"{int}", "some")
= "I have some apples and some oranges"
>> "I have 123 apples and 456 oranges":replace_pattern($Pat"{int}", "(@1)")
= "I have (123) apples and (456) oranges"
>> "I have 123 apples and 456 oranges":replace_pattern($Pat"{int}", "(?1)", backref="?")
= "I have (123) apples and (456) oranges"
>> "bad(fn(), bad(notbad))":replace_pattern($Pat"bad(?)", "good(@1)")
= "good(fn(), good(notbad))"
>> "bad(fn(), bad(notbad))":replace_pattern($Pat"bad(?)", "good(@1)", recursive=no)
= "good(fn(), bad(notbad))"
```
---
### `split_pattern`
@ -287,23 +383,42 @@ func split_pattern(text:Text, pattern:Pat -> [Text])
- `text`: The text to split.
- `pattern`: The pattern to use as a separator.
**Returns:**
**Returns:**
An array of text segments.
**Example:**
```tomo
>> "one two three":split_pattern($Pat"{whitespace}")
= ["one", "two", "three"]
```
---
### `by_pattern_split`
Returns an iterator function that yields text segments split by a pattern.
### `translate_patterns`
Replaces multiple patterns using a mapping of patterns to replacement texts.
```tomo
func by_pattern_split(text:Text, pattern:Pat -> func(->Text?))
func translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text)
```
- `text`: The text to split.
- `pattern`: The pattern to use as a separator.
- `text`: The text to modify.
- `replacements`: A table mapping patterns to their replacements.
- `backref`: The symbol for backreferences in replacements.
- `recursive`: If `yes`, applies replacements recursively.
**Returns:**
An iterator function that yields text segments.
**Returns:**
A new text with all specified replacements applied.
**Example:**
```tomo
>> text := "foo(x, baz(1))"
>> text:translate_patterns({
$Pat"{id}(?)"="call(fn('@1'), @2)",
$Pat"{id}"="var('@1')",
$Pat"{int}"="int(@1)",
})
= "call(fn('foo'), var('x'), call(fn('baz'), int(1)))"
```
---
@ -319,5 +434,11 @@ func trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text)
- `left`: If `yes`, trims from the beginning.
- `right`: If `yes`, trims from the end.
**Returns:**
**Returns:**
The trimmed text.
**Example:**
```tomo
>> "123abc456":trim_pattern($Pat"{digit}")
= "abc"
```

View File

@ -851,7 +851,14 @@ PUREFUNC static bool Pattern$has(Text_t text, Text_t pattern)
}
}
static OptionalArray_t Pattern$matches(Text_t text, Text_t pattern)
static bool Pattern$matches(Text_t text, Text_t pattern)
{
capture_t captures[MAX_BACKREFS] = {};
int64_t match_len = match(text, 0, pattern, 0, NULL, 0);
return (match_len == text.length);
}
static OptionalArray_t Pattern$captures(Text_t text, Text_t pattern)
{
capture_t captures[MAX_BACKREFS] = {};
int64_t match_len = match(text, 0, pattern, 0, captures, 0);

View File

@ -10,8 +10,11 @@ lang Pat:
return Pat.from_text("$n")
extend Text:
func matches_pattern(text:Text, pattern:Pat -> [Text]?):
return inline C : [Text]? { Pattern$matches(_$text, _$pattern); }
func matches_pattern(text:Text, pattern:Pat -> Bool):
return inline C : Bool { Pattern$matches(_$text, _$pattern); }
func pattern_captures(text:Text, pattern:Pat -> [Text]?):
return inline C : [Text]? { Pattern$captures(_$text, _$pattern); }
func replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text):
return inline C : Text { Pattern$replace(_$text, _$pattern, _$replacement, _$backref, _$recursive); }
@ -42,11 +45,3 @@ extend Text:
func trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text):
return inline C : Text { Pattern$trim(_$text, _$pattern, _$left, _$right); }
func main():
>> "hello world":replace_pattern($Pat/{id}/, "XXX")
>> "hello world":find_patterns($Pat/l/)
for m in "hello one two three":by_pattern($Pat/{id}/):
>> m