Update patterns API and docs
This commit is contained in:
parent
57ff9fdfc6
commit
dd4c25e6b7
@ -1,6 +1,6 @@
|
||||
# Text Pattern Matching
|
||||
|
||||
As an alternative to full regular expressions, Tomo provides a limited string
|
||||
As an alternative to full regular expressions, Tomo provides a limited text
|
||||
matching pattern syntax that is intended to solve 80% of use cases in under 1%
|
||||
of the code size (PCRE's codebase is roughly 150k lines of code, and Tomo's
|
||||
pattern matching code is a bit under 1k lines of code). Tomo's pattern matching
|
||||
@ -16,16 +16,17 @@ like a `Text`, but it has a distinct type.
|
||||
Patterns are used in a small, but very powerful API that handles many text
|
||||
functions that would normally be handled by a more extensive API:
|
||||
|
||||
- [`matches_pattern(text:Text, pattern:Pat -> [Text]?)`](#matches_pattern)
|
||||
- [`replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text)`](#replace_pattern)
|
||||
- [`translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text)`](#translate_patterns)
|
||||
- [`has_pattern(text:Text, pattern:Pat -> Bool)`](#has_pattern)
|
||||
- [`find_patterns(text:Text, pattern:Pat -> [PatternMatch])`](#find_patterns)
|
||||
- [`by_pattern(text:Text, pattern:Pat -> func(->PatternMatch?))`](#by_pattern)
|
||||
- [`each_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch), recursive=yes)`](#each_pattern)
|
||||
- [`map_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch -> Text), recursive=yes -> Text)`](#map_pattern)
|
||||
- [`split_pattern(text:Text, pattern:Pat -> [Text])`](#split_pattern)
|
||||
- [`by_pattern_split(text:Text, pattern:Pat -> func(->Text?))`](#by_pattern_split)
|
||||
- [`each_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch), recursive=yes)`](#each_pattern)
|
||||
- [`find_patterns(text:Text, pattern:Pat -> [PatternMatch])`](#find_patterns)
|
||||
- [`has_pattern(text:Text, pattern:Pat -> Bool)`](#has_pattern)
|
||||
- [`map_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch -> Text), recursive=yes -> Text)`](#map_pattern)
|
||||
- [`matches_pattern(text:Text, pattern:Pat -> Bool)`](#matches_pattern)
|
||||
- [`pattern_captures(text:Text, pattern:Pat -> [Text]?)`](#pattern_captures)
|
||||
- [`replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text)`](#replace_pattern)
|
||||
- [`split_pattern(text:Text, pattern:Pat -> [Text])`](#split_pattern)
|
||||
- [`translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text)`](#translate_patterns)
|
||||
- [`trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text)`](#trim_pattern)
|
||||
|
||||
## Matches
|
||||
@ -151,86 +152,6 @@ many repetitions you want by putting a number or range of numbers first using
|
||||
|
||||
# Methods
|
||||
|
||||
### `matches_pattern`
|
||||
Returns an array of text segments that match the given pattern.
|
||||
|
||||
```tomo
|
||||
func matches_pattern(text:Text, pattern:Pat -> [Text]?)
|
||||
```
|
||||
|
||||
- `text`: The text to search within.
|
||||
- `pattern`: The pattern to match.
|
||||
|
||||
**Returns:**
|
||||
An optional array of matched text segments. Returns `none` if no matches are found.
|
||||
|
||||
---
|
||||
|
||||
### `replace_pattern`
|
||||
Replaces occurrences of a pattern with a replacement string, supporting backreferences.
|
||||
|
||||
```tomo
|
||||
func replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text)
|
||||
```
|
||||
|
||||
- `text`: The text to modify.
|
||||
- `pattern`: The pattern to match.
|
||||
- `replacement`: The text to replace matches with.
|
||||
- `backref`: The symbol for backreferences in the replacement.
|
||||
- `recursive`: If `yes`, applies replacements recursively.
|
||||
|
||||
**Returns:**
|
||||
A new text with replacements applied.
|
||||
|
||||
---
|
||||
|
||||
### `translate_patterns`
|
||||
Replaces multiple patterns using a mapping of patterns to replacement texts.
|
||||
|
||||
```tomo
|
||||
func translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text)
|
||||
```
|
||||
|
||||
- `text`: The text to modify.
|
||||
- `replacements`: A table mapping patterns to their replacements.
|
||||
- `backref`: The symbol for backreferences in replacements.
|
||||
- `recursive`: If `yes`, applies replacements recursively.
|
||||
|
||||
**Returns:**
|
||||
A new text with all specified replacements applied.
|
||||
|
||||
---
|
||||
|
||||
### `has_pattern`
|
||||
Checks whether a given pattern appears in the text.
|
||||
|
||||
```tomo
|
||||
func has_pattern(text:Text, pattern:Pat -> Bool)
|
||||
```
|
||||
|
||||
- `text`: The text to search.
|
||||
- `pattern`: The pattern to check for.
|
||||
|
||||
**Returns:**
|
||||
`yes` if a match is found, otherwise `no`.
|
||||
|
||||
---
|
||||
|
||||
### `find_patterns`
|
||||
Finds all occurrences of a pattern in a text and returns them as `PatternMatch` objects.
|
||||
|
||||
```tomo
|
||||
func find_patterns(text:Text, pattern:Pat -> [PatternMatch])
|
||||
```
|
||||
|
||||
- `text`: The text to search.
|
||||
- `pattern`: The pattern to match.
|
||||
|
||||
**Returns:**
|
||||
An array of `PatternMatch` objects.
|
||||
|
||||
---
|
||||
|
||||
### `by_pattern`
|
||||
Returns an iterator function that yields `PatternMatch` objects for each occurrence.
|
||||
|
||||
@ -241,9 +162,38 @@ func by_pattern(text:Text, pattern:Pat -> func(->PatternMatch?))
|
||||
- `text`: The text to search.
|
||||
- `pattern`: The pattern to match.
|
||||
|
||||
**Returns:**
|
||||
**Returns:**
|
||||
An iterator function that yields `PatternMatch` objects one at a time.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
text := "one, two, three"
|
||||
for word in text:by_pattern($Pat"{id}"):
|
||||
say(word.text)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `by_pattern_split`
|
||||
Returns an iterator function that yields text segments split by a pattern.
|
||||
|
||||
```tomo
|
||||
func by_pattern_split(text:Text, pattern:Pat -> func(->Text?))
|
||||
```
|
||||
|
||||
- `text`: The text to split.
|
||||
- `pattern`: The pattern to use as a separator.
|
||||
|
||||
**Returns:**
|
||||
An iterator function that yields text segments.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
text := "one two three"
|
||||
for word in text:by_pattern_split($Pat"{whitespace}"):
|
||||
say(word.text)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `each_pattern`
|
||||
@ -258,6 +208,58 @@ func each_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch), recursive=yes
|
||||
- `fn`: The function to apply to each match.
|
||||
- `recursive`: If `yes`, applies the function recursively on modified text.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
text := "one two three"
|
||||
text:each_pattern($Pat"{id}", func(m:PatternMatch):
|
||||
say(m.txt)
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `find_patterns`
|
||||
Finds all occurrences of a pattern in a text and returns them as `PatternMatch` objects.
|
||||
|
||||
```tomo
|
||||
func find_patterns(text:Text, pattern:Pat -> [PatternMatch])
|
||||
```
|
||||
|
||||
- `text`: The text to search.
|
||||
- `pattern`: The pattern to match.
|
||||
|
||||
**Returns:**
|
||||
An array of `PatternMatch` objects.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
text := "one! two three!"
|
||||
>> text:find_patterns($Pat"{id}!")
|
||||
= [PatternMatch(text="one!", index=1, captures=["one"]), PatternMatch(text="three!", index=10, captures=["three"])]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `has_pattern`
|
||||
Checks whether a given pattern appears in the text.
|
||||
|
||||
```tomo
|
||||
func has_pattern(text:Text, pattern:Pat -> Bool)
|
||||
```
|
||||
|
||||
- `text`: The text to search.
|
||||
- `pattern`: The pattern to check for.
|
||||
|
||||
**Returns:**
|
||||
`yes` if a match is found, otherwise `no`.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
text := "...okay..."
|
||||
>> text:has_pattern($Pat"{id}")
|
||||
= yes
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `map_pattern`
|
||||
@ -272,9 +274,103 @@ func map_pattern(text:Text, pattern:Pat, fn:func(m:PatternMatch -> Text), recurs
|
||||
- `fn`: A function that transforms matches.
|
||||
- `recursive`: If `yes`, applies transformations recursively.
|
||||
|
||||
**Returns:**
|
||||
**Returns:**
|
||||
A new text with the transformed matches.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
text := "I have #apples and #oranges and #plums"
|
||||
fruits := {"apples"=4, "oranges"=5}
|
||||
>> text:map_pattern($Pat'#{id}', func(match:PatternMatch):
|
||||
fruit := match.captures[1]
|
||||
"$(fruits[fruit] or 0) $fruit"
|
||||
)
|
||||
= "I have 4 apples and 5 oranges and 0 plums"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `matches_pattern`
|
||||
Returns whether or not text matches a pattern completely.
|
||||
|
||||
```tomo
|
||||
func matches_pattern(text:Text, pattern:Pat -> Bool)
|
||||
```
|
||||
|
||||
- `text`: The text to match against.
|
||||
- `pattern`: The pattern to match.
|
||||
|
||||
**Returns:**
|
||||
`yes` if the whole text matches the pattern, otherwise `no`.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
>> "Hello!!!":matches_pattern($Pat"{id}")
|
||||
= no
|
||||
>> "Hello":matches_pattern($Pat"{id}")
|
||||
= yes
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `pattern_captures`
|
||||
Returns an array of pattern captures for the given pattern.
|
||||
|
||||
```tomo
|
||||
func pattern_captures(text:Text, pattern:Pat -> [Text]?)
|
||||
```
|
||||
|
||||
- `text`: The text to match against.
|
||||
- `pattern`: The pattern to match.
|
||||
|
||||
**Returns:**
|
||||
An optional array of matched pattern captures. Returns `none` if the text does
|
||||
not match the pattern.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
>> "123 boxes":pattern_captures($Pat"{int} {id}")
|
||||
= ["123", "boxes"]?
|
||||
>> "xxx":pattern_captures($Pat"{int} {id}")
|
||||
= none:[Text]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `replace_pattern`
|
||||
Replaces occurrences of a pattern with a replacement text, supporting backreferences.
|
||||
|
||||
```tomo
|
||||
func replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text)
|
||||
```
|
||||
|
||||
- `text`: The text to modify.
|
||||
- `pattern`: The pattern to match.
|
||||
- `replacement`: The text to replace matches with.
|
||||
- `backref`: The symbol for backreferences in the replacement.
|
||||
- `recursive`: If `yes`, applies replacements recursively.
|
||||
|
||||
**Returns:**
|
||||
A new text with replacements applied.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
>> "I have 123 apples and 456 oranges":replace_pattern($Pat"{int}", "some")
|
||||
= "I have some apples and some oranges"
|
||||
|
||||
>> "I have 123 apples and 456 oranges":replace_pattern($Pat"{int}", "(@1)")
|
||||
= "I have (123) apples and (456) oranges"
|
||||
|
||||
>> "I have 123 apples and 456 oranges":replace_pattern($Pat"{int}", "(?1)", backref="?")
|
||||
= "I have (123) apples and (456) oranges"
|
||||
|
||||
>> "bad(fn(), bad(notbad))":replace_pattern($Pat"bad(?)", "good(@1)")
|
||||
= "good(fn(), good(notbad))"
|
||||
|
||||
>> "bad(fn(), bad(notbad))":replace_pattern($Pat"bad(?)", "good(@1)", recursive=no)
|
||||
= "good(fn(), bad(notbad))"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `split_pattern`
|
||||
@ -287,23 +383,42 @@ func split_pattern(text:Text, pattern:Pat -> [Text])
|
||||
- `text`: The text to split.
|
||||
- `pattern`: The pattern to use as a separator.
|
||||
|
||||
**Returns:**
|
||||
**Returns:**
|
||||
An array of text segments.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
>> "one two three":split_pattern($Pat"{whitespace}")
|
||||
= ["one", "two", "three"]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `by_pattern_split`
|
||||
Returns an iterator function that yields text segments split by a pattern.
|
||||
### `translate_patterns`
|
||||
Replaces multiple patterns using a mapping of patterns to replacement texts.
|
||||
|
||||
```tomo
|
||||
func by_pattern_split(text:Text, pattern:Pat -> func(->Text?))
|
||||
func translate_patterns(text:Text, replacements:{Pat,Text}, backref="@", recursive=yes -> Text)
|
||||
```
|
||||
|
||||
- `text`: The text to split.
|
||||
- `pattern`: The pattern to use as a separator.
|
||||
- `text`: The text to modify.
|
||||
- `replacements`: A table mapping patterns to their replacements.
|
||||
- `backref`: The symbol for backreferences in replacements.
|
||||
- `recursive`: If `yes`, applies replacements recursively.
|
||||
|
||||
**Returns:**
|
||||
An iterator function that yields text segments.
|
||||
**Returns:**
|
||||
A new text with all specified replacements applied.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
>> text := "foo(x, baz(1))"
|
||||
>> text:translate_patterns({
|
||||
$Pat"{id}(?)"="call(fn('@1'), @2)",
|
||||
$Pat"{id}"="var('@1')",
|
||||
$Pat"{int}"="int(@1)",
|
||||
})
|
||||
= "call(fn('foo'), var('x'), call(fn('baz'), int(1)))"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@ -319,5 +434,11 @@ func trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text)
|
||||
- `left`: If `yes`, trims from the beginning.
|
||||
- `right`: If `yes`, trims from the end.
|
||||
|
||||
**Returns:**
|
||||
**Returns:**
|
||||
The trimmed text.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
>> "123abc456":trim_pattern($Pat"{digit}")
|
||||
= "abc"
|
||||
```
|
||||
|
@ -851,7 +851,14 @@ PUREFUNC static bool Pattern$has(Text_t text, Text_t pattern)
|
||||
}
|
||||
}
|
||||
|
||||
static OptionalArray_t Pattern$matches(Text_t text, Text_t pattern)
|
||||
static bool Pattern$matches(Text_t text, Text_t pattern)
|
||||
{
|
||||
capture_t captures[MAX_BACKREFS] = {};
|
||||
int64_t match_len = match(text, 0, pattern, 0, NULL, 0);
|
||||
return (match_len == text.length);
|
||||
}
|
||||
|
||||
static OptionalArray_t Pattern$captures(Text_t text, Text_t pattern)
|
||||
{
|
||||
capture_t captures[MAX_BACKREFS] = {};
|
||||
int64_t match_len = match(text, 0, pattern, 0, captures, 0);
|
||||
|
@ -10,8 +10,11 @@ lang Pat:
|
||||
return Pat.from_text("$n")
|
||||
|
||||
extend Text:
|
||||
func matches_pattern(text:Text, pattern:Pat -> [Text]?):
|
||||
return inline C : [Text]? { Pattern$matches(_$text, _$pattern); }
|
||||
func matches_pattern(text:Text, pattern:Pat -> Bool):
|
||||
return inline C : Bool { Pattern$matches(_$text, _$pattern); }
|
||||
|
||||
func pattern_captures(text:Text, pattern:Pat -> [Text]?):
|
||||
return inline C : [Text]? { Pattern$captures(_$text, _$pattern); }
|
||||
|
||||
func replace_pattern(text:Text, pattern:Pat, replacement:Text, backref="@", recursive=yes -> Text):
|
||||
return inline C : Text { Pattern$replace(_$text, _$pattern, _$replacement, _$backref, _$recursive); }
|
||||
@ -42,11 +45,3 @@ extend Text:
|
||||
|
||||
func trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text):
|
||||
return inline C : Text { Pattern$trim(_$text, _$pattern, _$left, _$right); }
|
||||
|
||||
func main():
|
||||
>> "hello world":replace_pattern($Pat/{id}/, "XXX")
|
||||
>> "hello world":find_patterns($Pat/l/)
|
||||
|
||||
for m in "hello one two three":by_pattern($Pat/{id}/):
|
||||
>> m
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user