Move patterns into a module
This commit is contained in:
parent
7a2c99de74
commit
4d59fc2987
@ -11,12 +11,12 @@ where a different type of string is needed.
|
||||
```tomo
|
||||
lang HTML:
|
||||
convert(t:Text -> HTML):
|
||||
t = t:replace_all({
|
||||
$/&/ = "&",
|
||||
$/</ = "<",
|
||||
$/>/ = ">",
|
||||
$/"/ = """,
|
||||
$/'/ = "'",
|
||||
t = t:translate({
|
||||
"&" = "&",
|
||||
"<" = "<",
|
||||
">" = ">",
|
||||
'"' = """,
|
||||
"'" = "'",
|
||||
})
|
||||
return HTML.from_text(t)
|
||||
|
||||
@ -75,7 +75,7 @@ instead of building a global function called `execute()` that takes a
|
||||
```tomo
|
||||
lang Sh:
|
||||
convert(text:Text -> Sh):
|
||||
return Sh.from_text("'" ++ text:replace($/'/, "''") ++ "'")
|
||||
return Sh.from_text("'" ++ text:replace("'", "''") ++ "'")
|
||||
|
||||
func execute(sh:Sh -> Text):
|
||||
...
|
||||
@ -94,7 +94,7 @@ another type's block or at the top level.
|
||||
```tomo
|
||||
lang Sh:
|
||||
convert(text:Text -> Sh):
|
||||
return Sh.from_text("'" ++ text:replace($/'/, "''") ++ "'")
|
||||
return Sh.from_text("'" ++ text:replace("'", "''") ++ "'")
|
||||
|
||||
struct Foo(x,y:Int):
|
||||
convert(f:Foo -> Sh):
|
||||
|
394
docs/text.md
394
docs/text.md
@ -273,43 +273,43 @@ pattern documentation](patterns.md) for more details.
|
||||
- [`func as_c_string(text: Text -> CString)`](#as_c_string)
|
||||
- [`func at(text: Text, index: Int -> Text)`](#at)
|
||||
- [`func by_line(text: Text -> func(->Text?))`](#by_line)
|
||||
- [`func by_match(text: Text, pattern: Pattern -> func(->Match?))`](#by_match)
|
||||
- [`func by_split(text: Text, pattern: Pattern = $// -> func(->Text?))`](#by_split)
|
||||
- [`func by_split(text: Text, delimiter: Text = "" -> func(->Text?))`](#by_split)
|
||||
- [`func by_split_any(text: Text, delimiters: Text = " $\t\r\n" -> func(->Text?))`](#by_split_any)
|
||||
- [`func bytes(text: Text -> [Byte])`](#bytes)
|
||||
- [`func caseless_equals(a: Text, b:Text, language:Text = "C" -> Bool)`](#caseless_equals)
|
||||
- [`func codepoint_names(text: Text -> [Text])`](#codepoint_names)
|
||||
- [`func each(text: Text, pattern: Pattern, fn: func(m: Match), recursive: Bool = yes -> Int?)`](#each)
|
||||
- [`func ends_with(text: Text, suffix: Text -> Bool)`](#ends_with)
|
||||
- [`func find(text: Text, pattern: Pattern, start: Int = 1 -> Int?)`](#find)
|
||||
- [`func find_all(text: Text, pattern: Pattern -> [Match])`](#find_all)
|
||||
- [`func from(text: Text, first: Int -> Text)`](#from)
|
||||
- [`func from_codepoint_names(codepoints: [Int32] -> [Text])`](#from_bytes)
|
||||
- [`func from_bytes(codepoints: [Int32] -> [Text])`](#from_bytes)
|
||||
- [`func from_c_string(str: CString -> Text)`](#from_c_string)
|
||||
- [`func from_codepoint_names(codepoint_names: [Text] -> [Text])`](#from_codepoint_names)
|
||||
- [`func from_codepoint_names(codepoints: [Int32] -> [Text])`](#from_codepoints)
|
||||
- [`func has(text: Text, pattern: Pattern -> Bool)`](#has)
|
||||
- [`func from_codepoints(codepoints: [Int32] -> [Text])`](#from_codepoints)
|
||||
- [`func has(text: Text, target: Text -> Bool)`](#has)
|
||||
- [`func join(glue: Text, pieces: [Text] -> Text)`](#join)
|
||||
- [`func split(text: Text -> [Text])`](#lines)
|
||||
- [`func split(text: Text, delimiter: Text = "" -> [Text])`](#split)
|
||||
- [`func split_any(text: Text, delimiters: Text = " $\t\r\n" -> [Text])`](#split_any)
|
||||
- [`func middle_pad(text: Text, width: Int, pad: Text = " ", language: Text = "C" -> Text)`](#middle_pad)
|
||||
- [`func left_pad(text: Text, width: Int, pad: Text = " ", language: Text = "C" -> Text)`](#left_pad)
|
||||
- [`func lines(text: Text, pattern: Pattern = "" -> [Text])`](#lines)
|
||||
- [`func lines(text: Text -> [Text])`](#lines)
|
||||
- [`func lower(text: Text, language: Text = "C" -> Text)`](#lower)
|
||||
- [`func map(text: Text, pattern: Pattern, fn: func(text:Match)->Text -> Text, recursive: Bool = yes)`](#map)
|
||||
- [`func matches(text: Text, pattern: Pattern -> [Text])`](#matches)
|
||||
- [`func quoted(text: Text, color: Bool = no, quotation_mark: Text = '"' -> Text)`](#quoted)
|
||||
- [`func repeat(text: Text, count:Int -> Text)`](#repeat)
|
||||
- [`func replace(text: Text, pattern: Pattern, replacement: Text, backref: Pattern = $/\/, recursive: Bool = yes -> Text)`](#replace)
|
||||
- [`func replace_all(replacements:{Pattern,Text}, backref: Pattern = $/\/, recursive: Bool = yes -> Text)`](#replace_all)
|
||||
- [`func replace(text: Text, target: Text, replacement: Text -> Text)`](#replace)
|
||||
- [`func reversed(text: Text -> Text)`](#reversed)
|
||||
- [`func right_pad(text: Text, width: Int, pad: Text = " ", language: Text = "C" -> Text)`](#right_pad)
|
||||
- [`func slice(text: Text, from: Int = 1, to: Int = -1 -> Text)`](#slice)
|
||||
- [`func starts_with(text: Text, prefix: Text -> Bool)`](#starts_with)
|
||||
- [`func title(text: Text, language: Text = "C" -> Text)`](#title)
|
||||
- [`func to(text: Text, last: Int -> Text)`](#to)
|
||||
- [`func trim(text: Text, pattern: Pattern = $/{whitespace/, trim_left: Bool = yes, trim_right: Bool = yes -> Text)`](#trim)
|
||||
- [`func translate(translations:{Text,Text} -> Text)`](#translate)
|
||||
- [`func trim(text: Text, to_trim: Text = " $\t\r\n", left: Bool = yes, right: Bool = yes -> Text)`](#trim)
|
||||
- [`func upper(text: Text, language: Text "C" -> Text)`](#upper)
|
||||
- [`func utf32_codepoints(text: Text -> [Int32])`](#utf32_codepoints)
|
||||
- [`func width(text: Text -> Int)`](#width)
|
||||
- [`func without_prefix(text: Text, prefix: Text -> Text)`](#without_prefix)
|
||||
- [`func without_suffix(text: Text, suffix: Text -> Text)`](#without_suffix)
|
||||
|
||||
----------------
|
||||
|
||||
### `as_c_string`
|
||||
Converts a `Text` value to a C-style string.
|
||||
@ -411,24 +411,53 @@ for match in text:by_match($/{alpha}/):
|
||||
|
||||
### `by_split`
|
||||
Returns an iterator function that can be used to iterate over text separated by
|
||||
a pattern.
|
||||
a delimiter.
|
||||
**Note:** to split based on a set of delimiters, use [`by_split_any()`](#by_split_any).
|
||||
|
||||
```tomo
|
||||
func by_split(text: Text, pattern: Pattern = $// -> func(->Text?))
|
||||
func by_split(text: Text, delimiter: Text = "" -> func(->Text?))
|
||||
```
|
||||
|
||||
- `text`: The text to be iterated over in pattern-delimited chunks.
|
||||
- `pattern`: The [pattern](patterns.md) to split the text on.
|
||||
- `delimiter`: An exact delimiter to use for splitting the text. If an empty text
|
||||
is given, then each split will be the graphical clusters of the text.
|
||||
|
||||
**Returns:**
|
||||
An iterator function that returns one chunk of text at a time, separated by the
|
||||
given pattern, until it runs out and returns `none`. **Note:** using an empty
|
||||
pattern (the default) will iterate over single grapheme clusters in the text.
|
||||
given delimiter, until it runs out and returns `none`. **Note:** using an empty
|
||||
delimiter (the default) will iterate over single grapheme clusters in the text.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
text := "one,two,three"
|
||||
for chunk in text:by_split($/,/):
|
||||
for chunk in text:by_split(","):
|
||||
# Prints: "one" then "two" then "three":
|
||||
say(chunk)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `by_split_any`
|
||||
Returns an iterator function that can be used to iterate over text separated by
|
||||
one or more characters (grapheme clusters) from a given text of delimiters.
|
||||
**Note:** to split based on an exact delimiter, use [`by_split()`](#by_split).
|
||||
|
||||
```tomo
|
||||
func by_split_any(text: Text, delimiters: Text = " $\t\r\n" -> func(->Text?))
|
||||
```
|
||||
|
||||
- `text`: The text to be iterated over in pattern-delimited chunks.
|
||||
- `delimiters`: An text containing multiple delimiter characters (grapheme clusters)
|
||||
to use for splitting the text.
|
||||
|
||||
**Returns:**
|
||||
An iterator function that returns one chunk of text at a time, separated by the
|
||||
given delimiter characters, until it runs out and returns `none`.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
text := "one,two,;,three"
|
||||
for chunk in text:by_split_any(",;"):
|
||||
# Prints: "one" then "two" then "three":
|
||||
say(chunk)
|
||||
```
|
||||
@ -628,7 +657,7 @@ func from(text: Text, first: Int -> Text)
|
||||
The text from the given grapheme cluster to the end of the text. Note: a
|
||||
negative index counts backwards from the end of the text, so `-1` refers to the
|
||||
last cluster, `-2` the second-to-last, etc. Slice ranges will be truncated to
|
||||
the length of the string.
|
||||
the length of the text.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
@ -647,10 +676,10 @@ text will be normalized, so the resulting text's UTF8 bytes may not exactly
|
||||
match the input.
|
||||
|
||||
```tomo
|
||||
func from_codepoint_names(codepoints: [Int32] -> [Text])
|
||||
func from_bytes(bytes: [Byte] -> [Text])
|
||||
```
|
||||
|
||||
- `codepoints`: The UTF32 codepoints in the desired text.
|
||||
- `bytes`: The UTF-8 bytes of the desired text.
|
||||
|
||||
**Returns:**
|
||||
A new text based on the input UTF8 bytes after normalization has been applied.
|
||||
@ -717,7 +746,7 @@ the text will be normalized, so the resulting text's codepoints may not exactly
|
||||
match the input codepoints.
|
||||
|
||||
```tomo
|
||||
func from_codepoint_names(codepoints: [Int32] -> [Text])
|
||||
func from_codepoints(codepoints: [Int32] -> [Text])
|
||||
```
|
||||
|
||||
- `codepoints`: The UTF32 codepoints in the desired text.
|
||||
@ -734,28 +763,24 @@ A new text with the specified codepoints after normalization has been applied.
|
||||
---
|
||||
|
||||
### `has`
|
||||
Checks if the `Text` contains a target [pattern](patterns.md).
|
||||
Checks if the `Text` contains some target text.
|
||||
|
||||
```tomo
|
||||
func has(text: Text, pattern: Pattern -> Bool)
|
||||
func has(text: Text, target: Text -> Bool)
|
||||
```
|
||||
|
||||
- `text`: The text to be searched.
|
||||
- `pattern`: The [pattern](patterns.md) to search for.
|
||||
- `target`: The text to search for.
|
||||
|
||||
**Returns:**
|
||||
`yes` if the target pattern is found, `no` otherwise.
|
||||
`yes` if the target text is found, `no` otherwise.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
>> "hello world":has($/wo/)
|
||||
>> "hello world":has("wo")
|
||||
= yes
|
||||
>> "hello world":has($/{alpha}/)
|
||||
= yes
|
||||
>> "hello world":has($/{digit}/)
|
||||
>> "hello world":has("xxx")
|
||||
= no
|
||||
>> "hello world":has($/{start}he/)
|
||||
= yes
|
||||
```
|
||||
|
||||
---
|
||||
@ -888,63 +913,8 @@ The lowercase version of the text.
|
||||
|
||||
---
|
||||
|
||||
### `map`
|
||||
For each occurrence of the given [pattern](patterns.md), replace the text with
|
||||
the result of calling the given function on that match.
|
||||
|
||||
```tomo
|
||||
func map(text: Text, pattern: Pattern, fn: func(text:Match)->Text -> Text, recursive: Bool = yes)
|
||||
```
|
||||
|
||||
- `text`: The text to be searched.
|
||||
- `pattern`: The [pattern](patterns.md) to search for.
|
||||
- `fn`: The function to apply to each match.
|
||||
- `recursive`: Whether to recursively map `fn` to each of the captures of the
|
||||
pattern before handing them to `fn`.
|
||||
|
||||
**Returns:**
|
||||
The text with the matching parts replaced with the result of applying the given
|
||||
function to each.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
>> "hello world":map($/world/, func(m:Match): m.text:upper())
|
||||
= "hello WORLD"
|
||||
>> "Some nums: 1 2 3 4":map($/{int}/, func(m:Match): "$(Int.parse(m.text)! + 10)")
|
||||
= "Some nums: 11 12 13 14"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `matches`
|
||||
Checks if the `Text` matches target [pattern](patterns.md) and returns an array
|
||||
of the matching text captures or a null value if the entire text doesn't match
|
||||
the pattern.
|
||||
|
||||
```tomo
|
||||
func matches(text: Text, pattern: Pattern -> [Text])
|
||||
```
|
||||
|
||||
- `text`: The text to be searched.
|
||||
- `pattern`: The [pattern](patterns.md) to search for.
|
||||
|
||||
**Returns:**
|
||||
An array of the matching text captures if the entire text matches the pattern,
|
||||
or a null value otherwise.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
>> "hello world":matches($/{id}/)
|
||||
= none : [Text]?
|
||||
|
||||
>> "hello world":matches($/{id} {id}/)
|
||||
= ["hello", "world"] : [Text]?
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `quoted`
|
||||
Formats the text as a quoted string.
|
||||
Formats the text with quotation marks and escapes.
|
||||
|
||||
```tomo
|
||||
func quoted(text: Text, color: Bool = no, quotation_mark: Text = '"' -> Text)
|
||||
@ -955,7 +925,7 @@ func quoted(text: Text, color: Bool = no, quotation_mark: Text = '"' -> Text)
|
||||
- `quotation_mark`: The quotation mark to use (default is `"`).
|
||||
|
||||
**Returns:**
|
||||
The text formatted as a quoted string.
|
||||
The text formatted as a quoted text.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
@ -987,106 +957,23 @@ The text repeated the given number of times.
|
||||
---
|
||||
|
||||
### `replace`
|
||||
Replaces occurrences of a [pattern](patterns.md) in the text with a replacement
|
||||
string.
|
||||
Replaces occurrences of a target text with a replacement text.
|
||||
|
||||
```tomo
|
||||
func replace(text: Text, pattern: Pattern, replacement: Text, backref: Pattern = $/\/, recursive: Bool = yes -> Text)
|
||||
func replace(text: Text, target: Text, replacement: Text -> Text)
|
||||
```
|
||||
|
||||
- `text`: The text in which to perform replacements.
|
||||
- `pattern`: The [pattern](patterns.md) to be replaced.
|
||||
- `replacement`: The text to replace the pattern with.
|
||||
- `backref`: If non-empty, the replacement text will have occurrences of this
|
||||
pattern followed by a number replaced with the corresponding backreference.
|
||||
By default, the backreference pattern is a single backslash, so
|
||||
backreferences look like `\0`, `\1`, etc.
|
||||
- `recursive`: For backreferences of a nested capture, if recursive is set to
|
||||
`yes`, then the whole replacement will be reapplied recursively to the
|
||||
backreferenced text if it's used in the replacement.
|
||||
|
||||
**Backreferences**
|
||||
If a backreference pattern is in the replacement, then that backreference is
|
||||
replaced with the corresponding group from the matching text. Backreference
|
||||
`0` is the entire matching text, backreference `1` is the first matched group,
|
||||
and so on. Literal text is not captured for backreferences, only named group
|
||||
captures (`{foo}`), quoted captures (`"?"`), and nested group captures (`(?)`).
|
||||
For quoted and nested group captures, the backreference refers to the *inside*
|
||||
of the capture without the enclosing punctuation.
|
||||
|
||||
If you need to insert a digit immediately after a backreference, you can use an
|
||||
optional semicolon: `\1;2` (backref 1, followed by the replacement text`"2"`).
|
||||
- `target`: The target text to be replaced.
|
||||
- `replacement`: The text to replace the target with.
|
||||
|
||||
**Returns:**
|
||||
The text with occurrences of the pattern replaced.
|
||||
The text with occurrences of the target replaced.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
>> "Hello world":replace($/world/, "there")
|
||||
>> "Hello world":replace("world", "there")
|
||||
= "Hello there"
|
||||
|
||||
>> "Hello world":replace($/{id}/, "xxx")
|
||||
= "xxx xxx"
|
||||
|
||||
>> "Hello world":replace($/{id}/, "\0")
|
||||
= "(Hello) (world)"
|
||||
|
||||
>> "Hello world":replace($/{id}/, "(@0)", backref=$/@/)
|
||||
= "(Hello) (world)"
|
||||
|
||||
>> "Hello world":replace($/{id} {id}/, "just \2")
|
||||
= "just world"
|
||||
|
||||
# Recursive is the default behavior:
|
||||
>> " BAD(x, BAD(y), z) ":replace($/BAD(?)/, "good(\1)", recursive=yes)
|
||||
= " good(x, good(y), z) "
|
||||
|
||||
>> " BAD(x, BAD(y), z) ":replace($/BAD(?)/, "good(\1)", recursive=no)
|
||||
= " good(x, BAD(y), z) "
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `replace_all`
|
||||
Takes a table mapping [patterns](patterns.md) to replacement texts and performs
|
||||
all the replacements in the table on the whole text. At each position, the
|
||||
first matching pattern's replacement is applied and the pattern matching moves
|
||||
on to *after* the replacement text, so replacement text is not recursively
|
||||
modified. See [`replace()`](#replace) for more information about replacement
|
||||
behavior.
|
||||
|
||||
```tomo
|
||||
func replace_all(replacements:{Pattern,Text}, backref: Pattern = $/\/, recursive: Bool = yes -> Text)
|
||||
```
|
||||
|
||||
- `text`: The text in which to perform replacements.
|
||||
- `replacements`: A table mapping from [pattern](patterns.md) to the
|
||||
replacement text associated with that pattern.
|
||||
- `backref`: If non-empty, the replacement text will have occurrences of this
|
||||
pattern followed by a number replaced with the corresponding backreference.
|
||||
By default, the backreference pattern is a single backslash, so
|
||||
backreferences look like `\0`, `\1`, etc.
|
||||
- `recursive`: For backreferences of a nested capture, if recursive is set to
|
||||
`yes`, then the matching replacement will be reapplied recursively to the
|
||||
backreferenced text if it's used in the replacement.
|
||||
|
||||
**Returns:**
|
||||
The text with all occurrences of the patterns replaced with their corresponding
|
||||
replacement text.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
>> "A <tag> & an amperand":replace_all({
|
||||
$/&/ = "&",
|
||||
$/</ = "<",
|
||||
$/>/ = ">",
|
||||
$/"/ = """,
|
||||
$/'/ = "'",
|
||||
}
|
||||
= "A <tag> & an ampersand"
|
||||
|
||||
>> "Hello":replace_all({$/{lower}/="[\0]", $/{upper}/="{\0}"})
|
||||
= "{H}[ello]"
|
||||
```
|
||||
|
||||
---
|
||||
@ -1153,7 +1040,7 @@ func slice(text: Text, from: Int = 1, to: Int = -1 -> Text)
|
||||
The text that spans the given grapheme cluster indices. Note: a negative index
|
||||
counts backwards from the end of the text, so `-1` refers to the last cluster,
|
||||
`-2` the second-to-last, etc. Slice ranges will be truncated to the length of
|
||||
the string.
|
||||
the text.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
@ -1170,32 +1057,51 @@ the string.
|
||||
---
|
||||
|
||||
### `split`
|
||||
Splits the text into an array of substrings based on a [pattern](patterns.md).
|
||||
Splits the text into an array of substrings based on exact matches of a delimiter.
|
||||
**Note:** to split based on a set of delimiter characters, use [`split_any()`](#split_any).
|
||||
|
||||
```tomo
|
||||
func split(text: Text, pattern: Pattern = "" -> [Text])
|
||||
func split(text: Text, delimiter: Text = "" -> [Text])
|
||||
```
|
||||
|
||||
- `text`: The text to be split.
|
||||
- `pattern`: The [pattern](patterns.md) used to split the text. If the pattern
|
||||
is the empty string, the text will be split into individual grapheme clusters.
|
||||
- `delimiter`: The delimiter used to split the text. If the delimiter is the
|
||||
empty text, the text will be split into individual grapheme clusters.
|
||||
|
||||
**Returns:**
|
||||
An array of substrings resulting from the split.
|
||||
An array of subtexts resulting from the split.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
>> "one,two,three":split($/,/)
|
||||
= ["one", "two", "three"]
|
||||
>> "one,two,,three":split(",")
|
||||
= ["one", "two", "", "three"]
|
||||
|
||||
>> "abc":split()
|
||||
= ["a", "b", "c"]
|
||||
```
|
||||
|
||||
>> "a b c":split($/{space}/)
|
||||
= ["a", "b", "c"]
|
||||
---
|
||||
|
||||
>> "a,b,c,":split($/,/)
|
||||
= ["a", "b", "c", ""]
|
||||
### `split_any`
|
||||
Splits the text into an array of substrings at one or more occurrences of a set
|
||||
of delimiter characters (grapheme clusters).
|
||||
**Note:** to split based on an exact delimiter, use [`split()`](#split).
|
||||
|
||||
```tomo
|
||||
func split_any(text: Text, delimiters: Text = " $\t\r\n" -> [Text])
|
||||
```
|
||||
|
||||
- `text`: The text to be split.
|
||||
- `delimiters`: A text containing multiple delimiters to be used for
|
||||
splitting the text into chunks.
|
||||
|
||||
**Returns:**
|
||||
An array of subtexts resulting from the split.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
>> "one, two,,three":split_any(", ")
|
||||
= ["one", "two", "three"]
|
||||
```
|
||||
|
||||
---
|
||||
@ -1260,7 +1166,7 @@ func to(text: Text, last: Int -> Text)
|
||||
The text up to and including the given grapheme cluster. Note: a negative index
|
||||
counts backwards from the end of the text, so `-1` refers to the last cluster,
|
||||
`-2` the second-to-last, etc. Slice ranges will be truncated to the length of
|
||||
the string.
|
||||
the text.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
@ -1273,30 +1179,62 @@ the string.
|
||||
|
||||
---
|
||||
|
||||
### `trim`
|
||||
Trims the matching [pattern](patterns.md) from the left and/or right side of the text.
|
||||
### `translate`
|
||||
Takes a table mapping target texts to their replacements and performs all the
|
||||
replacements in the table on the whole text. At each position, the first
|
||||
matching replacement is applied and the matching moves on to *after* the
|
||||
replacement text, so replacement text is not recursively modified. See
|
||||
[`replace()`](#replace) for more information about replacement behavior.
|
||||
|
||||
```tomo
|
||||
func trim(text: Text, pattern: Pattern = $/{whitespace/, trim_left: Bool = yes, trim_right: Bool = yes -> Text)
|
||||
func translate(translations:{Pattern,Text} -> Text)
|
||||
```
|
||||
|
||||
- `text`: The text in which to perform replacements.
|
||||
- `translations`: A table mapping from target text to its replacement.
|
||||
|
||||
**Returns:**
|
||||
The text with all occurrences of the patterns replaced with their corresponding
|
||||
replacement text.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
>> "A <tag> & an amperand":translate({
|
||||
"&" = "&",
|
||||
"<" = "<",
|
||||
">" = ">",
|
||||
'"" = """,
|
||||
"'" = "'",
|
||||
}
|
||||
= "A <tag> & an ampersand"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `trim`
|
||||
Trims the given characters (grapheme clusters) from the left and/or right side of the text.
|
||||
|
||||
```tomo
|
||||
func trim(text: Text, to_trim: Text = " $\t\r\n", left: Bool = yes, right: Bool = yes -> Text)
|
||||
```
|
||||
|
||||
- `text`: The text to be trimmed.
|
||||
- `pattern`: The [pattern](patterns.md) that will be trimmed away.
|
||||
- `trim_left`: Whether or not to trim from the front of the text.
|
||||
- `trim_right`: Whether or not to trim from the back of the text.
|
||||
- `to_trim`: The characters to remove from the left/right of the text.
|
||||
- `left`: Whether or not to trim from the front of the text.
|
||||
- `right`: Whether or not to trim from the back of the text.
|
||||
|
||||
**Returns:**
|
||||
The text without the trim pattern at either end.
|
||||
The text without the trim characters at either end.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
>> " x y z $(\n)":trim()
|
||||
= "x y z"
|
||||
|
||||
>> "abc123def":trim($/{!digit}/)
|
||||
= "123"
|
||||
>> "one,":trim(",")
|
||||
= "one"
|
||||
|
||||
>> " xyz ":trim(trim_right=no)
|
||||
>> " xyz ":trim(right=no)
|
||||
= "xyz "
|
||||
```
|
||||
|
||||
@ -1371,3 +1309,51 @@ An integer representing the display width of the text.
|
||||
>> "🤠":width()
|
||||
= 2
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `without_prefix`
|
||||
Returns the text with a given prefix removed (if present).
|
||||
|
||||
```tomo
|
||||
func without_prefix(text: Text, prefix: Text -> Text)
|
||||
```
|
||||
|
||||
- `text`: The text to remove the prefix from.
|
||||
- `prefix`: The prefix to remove.
|
||||
|
||||
**Returns:**
|
||||
A text without the given prefix (if present) or the unmodified text if the
|
||||
prefix is not present.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
>> "foo:baz":without_prefix("foo:")
|
||||
= "baz"
|
||||
>> "qux":without_prefix("foo:")
|
||||
= "qux"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `without_suffix`
|
||||
Returns the text with a given suffix removed (if present).
|
||||
|
||||
```tomo
|
||||
func without_suffix(text: Text, suffix: Text -> Text)
|
||||
```
|
||||
|
||||
- `text`: The text to remove the suffix from.
|
||||
- `suffix`: The suffix to remove.
|
||||
|
||||
**Returns:**
|
||||
A text without the given suffix (if present) or the unmodified text if the
|
||||
suffix is not present.
|
||||
|
||||
**Example:**
|
||||
```tomo
|
||||
>> "baz.foo":without_suffix(".foo")
|
||||
= "baz"
|
||||
>> "qux":without_suffix(".foo")
|
||||
= "qux"
|
||||
```
|
||||
|
@ -62,32 +62,3 @@ $Colorful"
|
||||
We have @(green,bold:colors)!
|
||||
":print()
|
||||
```
|
||||
|
||||
You can very easily introduce your own syntax highlighting for a custom DSL:
|
||||
|
||||
```tomo
|
||||
lang Markdown:
|
||||
func Colorful(md:Markdown -> Colorful):
|
||||
text := md.text:replace_all({
|
||||
$/@/="@(at)",
|
||||
$/(/="@(lparen)",
|
||||
$/)/="@(rparen)",
|
||||
$/**{..}**/="@(b:\1)",
|
||||
$/*{..}*/="@(i:\1)",
|
||||
$/[?](?)/="@(blue,underline:\1)",
|
||||
})
|
||||
return Colorful.from_text(text)
|
||||
|
||||
func colorful(md:Markdown -> Colorful):
|
||||
return $Colorful"$md"
|
||||
...
|
||||
|
||||
md := $Markdown"
|
||||
This is [a link with **bold** inside](example.com)!
|
||||
"
|
||||
>> colorful := md:colorful()
|
||||
= $Colorful"This is @(blue,underline:a link with @(b:bold) inside)!"
|
||||
>> colorful:for_terminal()
|
||||
= "$\e[mThis is $\e[4;34ma link with $\e[1mbold$\e[22m inside$\e[24;39m!"
|
||||
colorful:print()
|
||||
```
|
||||
|
@ -8,7 +8,7 @@ CSI := "$\033["
|
||||
|
||||
lang Colorful:
|
||||
convert(text:Text -> Colorful):
|
||||
text = text:replace_all({$/@/="@(at)", $/(/="@(lparen)", $/)/="@(rparen)"})
|
||||
text = text:translate({"@"="@(at)", "("="@(lparen)", ")"="@(rparen)"})
|
||||
return Colorful.from_text(text)
|
||||
|
||||
convert(i:Int -> Colorful): return Colorful.from_text("$i")
|
||||
|
@ -68,8 +68,8 @@ struct World(player:@Player, goal:@Box, boxes:@[@Box], dt_accum=Num32(0.0), won=
|
||||
DrawText(CString("WINNER"), GetScreenWidth()/Int32(2)-Int32(48*3), GetScreenHeight()/Int32(2)-Int32(24), 48, Color(0,0,0))
|
||||
|
||||
func load_map(w:@World, map:Text):
|
||||
if map:has($/[]/):
|
||||
map = map:replace_all({$/[]/="#", $/@{1..}/="@", $/ /=" "})
|
||||
if map:has("[]"):
|
||||
map = map:translate({"[]"="#", "@ "="@", " "=" "})
|
||||
w.boxes = @[:@Box]
|
||||
box_size := Vector2(50., 50.)
|
||||
for y,line in map:lines():
|
||||
|
8
examples/patterns/match_type.h
Normal file
8
examples/patterns/match_type.h
Normal file
@ -0,0 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
typedef struct {
|
||||
Text_t text;
|
||||
Int_t index;
|
||||
Array_t captures;
|
||||
} XMatch;
|
||||
|
@ -2,21 +2,28 @@
|
||||
|
||||
#include <ctype.h>
|
||||
#include <sys/param.h>
|
||||
#include <tomo/tomo.h>
|
||||
#include <unictype.h>
|
||||
#include <uniname.h>
|
||||
#include <unistring/version.h>
|
||||
|
||||
#include "arrays.h"
|
||||
#include "integers.h"
|
||||
#include "optionals.h"
|
||||
#include "patterns.h"
|
||||
#include "structs.h"
|
||||
#include "tables.h"
|
||||
#include "text.h"
|
||||
#include "types.h"
|
||||
|
||||
#define MAX_BACKREFS 100
|
||||
|
||||
typedef struct {
|
||||
Text_t text;
|
||||
Int_t index;
|
||||
Array_t captures;
|
||||
} PatternMatch;
|
||||
|
||||
typedef struct {
|
||||
Text_t text;
|
||||
Int_t index;
|
||||
Array_t captures;
|
||||
bool is_none:1;
|
||||
} OptionalPatternMatch;
|
||||
|
||||
#define NONE_MATCH ((OptionalPatternMatch){.is_none=true})
|
||||
|
||||
typedef struct {
|
||||
int64_t index, length;
|
||||
bool occupied, recursive;
|
||||
@ -35,7 +42,7 @@ typedef struct {
|
||||
};
|
||||
} pat_t;
|
||||
|
||||
static Text_t Text$replace_array(Text_t text, Array_t replacements, Text_t backref_pat, bool recursive);
|
||||
static Text_t replace_array(Text_t text, Array_t replacements, Text_t backref_pat, bool recursive);
|
||||
|
||||
static INLINE void skip_whitespace(TextIter_t *state, int64_t *i)
|
||||
{
|
||||
@ -673,7 +680,7 @@ static pat_t parse_next_pat(TextIter_t *state, int64_t *index)
|
||||
}
|
||||
}
|
||||
|
||||
static int64_t match(Text_t text, int64_t text_index, Pattern_t pattern, int64_t pattern_index, capture_t *captures, int64_t capture_index)
|
||||
static int64_t match(Text_t text, int64_t text_index, Text_t pattern, int64_t pattern_index, capture_t *captures, int64_t capture_index)
|
||||
{
|
||||
if (pattern_index >= pattern.length) // End of the pattern
|
||||
return 0;
|
||||
@ -773,7 +780,7 @@ static int64_t match(Text_t text, int64_t text_index, Pattern_t pattern, int64_t
|
||||
#undef EAT2
|
||||
#undef EAT_MANY
|
||||
|
||||
static int64_t _find(Text_t text, Pattern_t pattern, int64_t first, int64_t last, int64_t *match_length, capture_t *captures)
|
||||
static int64_t _find(Text_t text, Text_t pattern, int64_t first, int64_t last, int64_t *match_length, capture_t *captures)
|
||||
{
|
||||
int32_t first_grapheme = Text$get_grapheme(pattern, 0);
|
||||
bool find_first = (first_grapheme != '{'
|
||||
@ -800,7 +807,7 @@ static int64_t _find(Text_t text, Pattern_t pattern, int64_t first, int64_t last
|
||||
return -1;
|
||||
}
|
||||
|
||||
public OptionalMatch_t Text$find(Text_t text, Pattern_t pattern, Int_t from_index)
|
||||
static OptionalPatternMatch find(Text_t text, Text_t pattern, Int_t from_index)
|
||||
{
|
||||
int64_t first = Int64$from_int(from_index, false);
|
||||
if (first == 0) fail("Invalid index: 0");
|
||||
@ -819,14 +826,14 @@ public OptionalMatch_t Text$find(Text_t text, Pattern_t pattern, Int_t from_inde
|
||||
Text_t capture = Text$slice(text, I(captures[i].index+1), I(captures[i].index+captures[i].length));
|
||||
Array$insert(&capture_array, &capture, I(0), sizeof(Text_t));
|
||||
}
|
||||
return (OptionalMatch_t){
|
||||
return (OptionalPatternMatch){
|
||||
.text=Text$slice(text, I(found+1), I(found+len)),
|
||||
.index=I(found+1),
|
||||
.captures=capture_array,
|
||||
};
|
||||
}
|
||||
|
||||
PUREFUNC public bool Text$has(Text_t text, Pattern_t pattern)
|
||||
PUREFUNC static bool Pattern$has(Text_t text, Text_t pattern)
|
||||
{
|
||||
if (Text$starts_with(pattern, Text("{start}"))) {
|
||||
int64_t m = match(text, 0, pattern, 0, NULL, 0);
|
||||
@ -844,7 +851,7 @@ PUREFUNC public bool Text$has(Text_t text, Pattern_t pattern)
|
||||
}
|
||||
}
|
||||
|
||||
public OptionalArray_t Text$matches(Text_t text, Pattern_t pattern)
|
||||
static OptionalArray_t Pattern$matches(Text_t text, Text_t pattern)
|
||||
{
|
||||
capture_t captures[MAX_BACKREFS] = {};
|
||||
int64_t match_len = match(text, 0, pattern, 0, captures, 0);
|
||||
@ -859,18 +866,18 @@ public OptionalArray_t Text$matches(Text_t text, Pattern_t pattern)
|
||||
return capture_array;
|
||||
}
|
||||
|
||||
public Array_t Text$find_all(Text_t text, Pattern_t pattern)
|
||||
static Array_t Pattern$find_all(Text_t text, Text_t pattern)
|
||||
{
|
||||
if (pattern.length == 0) // special case
|
||||
return (Array_t){.length=0};
|
||||
|
||||
Array_t matches = {};
|
||||
for (int64_t i = 1; ; ) {
|
||||
OptionalMatch_t m = Text$find(text, pattern, I(i));
|
||||
if (!m.index.small)
|
||||
OptionalPatternMatch m = find(text, pattern, I(i));
|
||||
if (m.is_none)
|
||||
break;
|
||||
i = Int64$from_int(m.index, false) + m.text.length;
|
||||
Array$insert(&matches, &m, I_small(0), sizeof(Match_t));
|
||||
Array$insert(&matches, &m, I_small(0), sizeof(PatternMatch));
|
||||
}
|
||||
return matches;
|
||||
}
|
||||
@ -878,23 +885,23 @@ public Array_t Text$find_all(Text_t text, Pattern_t pattern)
|
||||
typedef struct {
|
||||
TextIter_t state;
|
||||
Int_t i;
|
||||
Pattern_t pattern;
|
||||
Text_t pattern;
|
||||
} match_iter_state_t;
|
||||
|
||||
static OptionalMatch_t next_match(match_iter_state_t *state)
|
||||
static OptionalPatternMatch next_match(match_iter_state_t *state)
|
||||
{
|
||||
if (Int64$from_int(state->i, false) > state->state.stack[0].text.length)
|
||||
return NONE_MATCH;
|
||||
|
||||
OptionalMatch_t m = Text$find(state->state.stack[0].text, state->pattern, state->i);
|
||||
if (m.index.small == 0) // No match
|
||||
OptionalPatternMatch m = find(state->state.stack[0].text, state->pattern, state->i);
|
||||
if (m.is_none) // No match
|
||||
state->i = I(state->state.stack[0].text.length + 1);
|
||||
else
|
||||
state->i = Int$plus(m.index, I(MAX(1, m.text.length)));
|
||||
return m;
|
||||
}
|
||||
|
||||
public Closure_t Text$by_match(Text_t text, Pattern_t pattern)
|
||||
static Closure_t Pattern$by_match(Text_t text, Text_t pattern)
|
||||
{
|
||||
return (Closure_t){
|
||||
.fn=(void*)next_match,
|
||||
@ -902,7 +909,7 @@ public Closure_t Text$by_match(Text_t text, Pattern_t pattern)
|
||||
};
|
||||
}
|
||||
|
||||
static Text_t apply_backrefs(Text_t text, Array_t recursive_replacements, Text_t replacement, Pattern_t backref_pat, capture_t *captures)
|
||||
static Text_t apply_backrefs(Text_t text, Array_t recursive_replacements, Text_t replacement, Text_t backref_pat, capture_t *captures)
|
||||
{
|
||||
if (backref_pat.length == 0)
|
||||
return replacement;
|
||||
@ -946,7 +953,7 @@ static Text_t apply_backrefs(Text_t text, Array_t recursive_replacements, Text_t
|
||||
Text_t backref_text = Text$slice(text, I(captures[backref].index+1), I(captures[backref].index + captures[backref].length));
|
||||
|
||||
if (captures[backref].recursive && recursive_replacements.length > 0)
|
||||
backref_text = Text$replace_array(backref_text, recursive_replacements, backref_pat, true);
|
||||
backref_text = replace_array(backref_text, recursive_replacements, backref_pat, true);
|
||||
|
||||
if (pos > nonmatching_pos) {
|
||||
Text_t before_slice = Text$slice(replacement, I(nonmatching_pos+1), I(pos));
|
||||
@ -965,7 +972,7 @@ static Text_t apply_backrefs(Text_t text, Array_t recursive_replacements, Text_t
|
||||
return ret;
|
||||
}
|
||||
|
||||
public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, Pattern_t backref_pat, bool recursive)
|
||||
static Text_t Pattern$replace(Text_t text, Text_t pattern, Text_t replacement, Text_t backref_pat, bool recursive)
|
||||
{
|
||||
Text_t ret = EMPTY_TEXT;
|
||||
|
||||
@ -1018,7 +1025,7 @@ public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, P
|
||||
return ret;
|
||||
}
|
||||
|
||||
public Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right)
|
||||
static Text_t Pattern$trim(Text_t text, Text_t pattern, bool trim_left, bool trim_right)
|
||||
{
|
||||
int64_t first = 0, last = text.length-1;
|
||||
if (trim_left) {
|
||||
@ -1037,7 +1044,7 @@ public Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool tri
|
||||
return Text$slice(text, I(first+1), I(last+1));
|
||||
}
|
||||
|
||||
public Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn, bool recursive)
|
||||
static Text_t Pattern$map(Text_t text, Text_t pattern, Closure_t fn, bool recursive)
|
||||
{
|
||||
Text_t ret = EMPTY_TEXT;
|
||||
|
||||
@ -1049,7 +1056,7 @@ public Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn, bool recurs
|
||||
TextIter_t text_state = NEW_TEXT_ITER_STATE(text);
|
||||
int64_t nonmatching_pos = 0;
|
||||
|
||||
Text_t (*text_mapper)(Match_t, void*) = fn.fn;
|
||||
Text_t (*text_mapper)(PatternMatch, void*) = fn.fn;
|
||||
for (int64_t pos = 0; pos < text.length; pos++) {
|
||||
// Optimization: quickly skip ahead to first char in pattern:
|
||||
if (find_first) {
|
||||
@ -1061,7 +1068,7 @@ public Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn, bool recurs
|
||||
int64_t match_len = match(text, pos, pattern, 0, captures, 0);
|
||||
if (match_len < 0) continue;
|
||||
|
||||
Match_t m = {
|
||||
PatternMatch m = {
|
||||
.text=Text$slice(text, I(pos+1), I(pos+match_len)),
|
||||
.index=I(pos+1),
|
||||
.captures={},
|
||||
@ -1069,7 +1076,7 @@ public Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn, bool recurs
|
||||
for (int i = 0; captures[i].occupied; i++) {
|
||||
Text_t capture = Text$slice(text, I(captures[i].index+1), I(captures[i].index+captures[i].length));
|
||||
if (recursive)
|
||||
capture = Text$map(capture, pattern, fn, recursive);
|
||||
capture = Pattern$map(capture, pattern, fn, recursive);
|
||||
Array$insert(&m.captures, &capture, I(0), sizeof(Text_t));
|
||||
}
|
||||
|
||||
@ -1090,7 +1097,7 @@ public Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn, bool recurs
|
||||
return ret;
|
||||
}
|
||||
|
||||
public void Text$each(Text_t text, Pattern_t pattern, Closure_t fn, bool recursive)
|
||||
static void Pattern$each(Text_t text, Text_t pattern, Closure_t fn, bool recursive)
|
||||
{
|
||||
int32_t first_grapheme = Text$get_grapheme(pattern, 0);
|
||||
bool find_first = (first_grapheme != '{'
|
||||
@ -1098,7 +1105,7 @@ public void Text$each(Text_t text, Pattern_t pattern, Closure_t fn, bool recursi
|
||||
&& !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
|
||||
|
||||
TextIter_t text_state = NEW_TEXT_ITER_STATE(text);
|
||||
void (*action)(Match_t, void*) = fn.fn;
|
||||
void (*action)(PatternMatch, void*) = fn.fn;
|
||||
for (int64_t pos = 0; pos < text.length; pos++) {
|
||||
// Optimization: quickly skip ahead to first char in pattern:
|
||||
if (find_first) {
|
||||
@ -1110,7 +1117,7 @@ public void Text$each(Text_t text, Pattern_t pattern, Closure_t fn, bool recursi
|
||||
int64_t match_len = match(text, pos, pattern, 0, captures, 0);
|
||||
if (match_len < 0) continue;
|
||||
|
||||
Match_t m = {
|
||||
PatternMatch m = {
|
||||
.text=Text$slice(text, I(pos+1), I(pos+match_len)),
|
||||
.index=I(pos+1),
|
||||
.captures={},
|
||||
@ -1118,7 +1125,7 @@ public void Text$each(Text_t text, Pattern_t pattern, Closure_t fn, bool recursi
|
||||
for (int i = 0; captures[i].occupied; i++) {
|
||||
Text_t capture = Text$slice(text, I(captures[i].index+1), I(captures[i].index+captures[i].length));
|
||||
if (recursive)
|
||||
Text$each(capture, pattern, fn, recursive);
|
||||
Pattern$each(capture, pattern, fn, recursive);
|
||||
Array$insert(&m.captures, &capture, I(0), sizeof(Text_t));
|
||||
}
|
||||
|
||||
@ -1127,7 +1134,7 @@ public void Text$each(Text_t text, Pattern_t pattern, Closure_t fn, bool recursi
|
||||
}
|
||||
}
|
||||
|
||||
Text_t Text$replace_array(Text_t text, Array_t replacements, Text_t backref_pat, bool recursive)
|
||||
Text_t replace_array(Text_t text, Array_t replacements, Text_t backref_pat, bool recursive)
|
||||
{
|
||||
if (replacements.length == 0) return text;
|
||||
|
||||
@ -1137,7 +1144,7 @@ Text_t Text$replace_array(Text_t text, Array_t replacements, Text_t backref_pat,
|
||||
for (int64_t pos = 0; pos < text.length; ) {
|
||||
// Find the first matching pattern at this position:
|
||||
for (int64_t i = 0; i < replacements.length; i++) {
|
||||
Pattern_t pattern = *(Pattern_t*)(replacements.data + i*replacements.stride);
|
||||
Text_t pattern = *(Text_t*)(replacements.data + i*replacements.stride);
|
||||
capture_t captures[MAX_BACKREFS] = {};
|
||||
int64_t len = match(text, pos, pattern, 0, captures, 1);
|
||||
if (len < 0) continue;
|
||||
@ -1171,12 +1178,12 @@ Text_t Text$replace_array(Text_t text, Array_t replacements, Text_t backref_pat,
|
||||
return ret;
|
||||
}
|
||||
|
||||
public Text_t Text$replace_all(Text_t text, Table_t replacements, Text_t backref_pat, bool recursive)
|
||||
static Text_t Pattern$replace_all(Text_t text, Table_t replacements, Text_t backref_pat, bool recursive)
|
||||
{
|
||||
return Text$replace_array(text, replacements.entries, backref_pat, recursive);
|
||||
return replace_array(text, replacements.entries, backref_pat, recursive);
|
||||
}
|
||||
|
||||
public Array_t Text$split(Text_t text, Pattern_t pattern)
|
||||
static Array_t Pattern$split(Text_t text, Text_t pattern)
|
||||
{
|
||||
if (text.length == 0) // special case
|
||||
return (Array_t){.length=0};
|
||||
@ -1207,7 +1214,7 @@ public Array_t Text$split(Text_t text, Pattern_t pattern)
|
||||
typedef struct {
|
||||
TextIter_t state;
|
||||
int64_t i;
|
||||
Pattern_t pattern;
|
||||
Text_t pattern;
|
||||
} split_iter_state_t;
|
||||
|
||||
static OptionalText_t next_split(split_iter_state_t *state)
|
||||
@ -1243,7 +1250,7 @@ static OptionalText_t next_split(split_iter_state_t *state)
|
||||
}
|
||||
}
|
||||
|
||||
public Closure_t Text$by_split(Text_t text, Pattern_t pattern)
|
||||
static Closure_t Pattern$by_split(Text_t text, Text_t pattern)
|
||||
{
|
||||
return (Closure_t){
|
||||
.fn=(void*)next_split,
|
||||
@ -1251,7 +1258,7 @@ public Closure_t Text$by_split(Text_t text, Pattern_t pattern)
|
||||
};
|
||||
}
|
||||
|
||||
public Pattern_t Pattern$escape_text(Text_t text)
|
||||
static Text_t Pattern$escape_text(Text_t text)
|
||||
{
|
||||
// TODO: optimize for spans of non-escaped text
|
||||
Text_t ret = EMPTY_TEXT;
|
||||
@ -1276,62 +1283,9 @@ static Text_t Pattern$as_text(const void *obj, bool colorize, const TypeInfo_t *
|
||||
(void)info;
|
||||
if (!obj) return Text("Pattern");
|
||||
|
||||
Pattern_t pat = *(Pattern_t*)obj;
|
||||
Text_t quote = Text$has(pat, Pattern("/")) && !Text$has(pat, Pattern("|")) ? Text("|") : Text("/");
|
||||
return Text$concat( colorize ? Text("\x1b[1m$\033[m") : Text("$"), Text$quoted(pat, colorize, quote));
|
||||
Text_t pat = *(Text_t*)obj;
|
||||
Text_t quote = Pattern$has(pat, Text("/")) && !Pattern$has(pat, Text("|")) ? Text("|") : Text("/");
|
||||
return Text$concat(colorize ? Text("\x1b[1m$\033[m") : Text("$"), Text$quoted(pat, colorize, quote));
|
||||
}
|
||||
|
||||
public const TypeInfo_t Pattern$info = {
|
||||
.size=sizeof(Pattern_t),
|
||||
.align=__alignof__(Pattern_t),
|
||||
.tag=TextInfo,
|
||||
.TextInfo={.lang="Pattern"},
|
||||
.metamethods={
|
||||
.as_text=Pattern$as_text,
|
||||
.hash=Text$hash,
|
||||
.compare=Text$compare,
|
||||
.equal=Text$equal,
|
||||
.is_none=Text$is_none,
|
||||
.serialize=Text$serialize,
|
||||
.deserialize=Text$deserialize,
|
||||
},
|
||||
};
|
||||
|
||||
static const TypeInfo_t _text_array = {
|
||||
.size=sizeof(Array_t),
|
||||
.align=__alignof__(Array_t),
|
||||
.tag=ArrayInfo,
|
||||
.ArrayInfo.item=&Text$info,
|
||||
.metamethods=Array$metamethods,
|
||||
};
|
||||
|
||||
static NamedType_t _match_fields[3] = {
|
||||
{"text", &Text$info},
|
||||
{"index", &Int$info},
|
||||
{"captures", &_text_array},
|
||||
};
|
||||
|
||||
static bool Match$is_none(const void *m, const TypeInfo_t*)
|
||||
{
|
||||
return ((OptionalMatch_t*)m)->index.small == 0;
|
||||
}
|
||||
|
||||
public const TypeInfo_t Match$info = {
|
||||
.size=sizeof(Match_t),
|
||||
.align=__alignof__(Match_t),
|
||||
.tag=StructInfo,
|
||||
.StructInfo={
|
||||
.name="Match",
|
||||
.num_fields=3,
|
||||
.fields=_match_fields,
|
||||
},
|
||||
.metamethods={
|
||||
.as_text=Struct$as_text,
|
||||
.hash=Struct$hash,
|
||||
.compare=Struct$compare,
|
||||
.equal=Struct$equal,
|
||||
.is_none=Match$is_none,
|
||||
},
|
||||
};
|
||||
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
58
examples/patterns/patterns.tm
Normal file
58
examples/patterns/patterns.tm
Normal file
@ -0,0 +1,58 @@
|
||||
use ./patterns.c
|
||||
|
||||
struct PatternMatch(text:Text, index:Int, captures:[Text])
|
||||
|
||||
lang P:
|
||||
convert(text:Text -> P):
|
||||
return inline C : P { Pattern$escape_text(_$text); }
|
||||
|
||||
convert(n:Int -> P):
|
||||
return P.from_text("$n")
|
||||
|
||||
extend Text:
|
||||
func matches(text:Text, pattern:P -> [Text]?):
|
||||
return inline C : [Text]? { Pattern$matches(_$text, _$pattern); }
|
||||
|
||||
func pat_replace(text:Text, pattern:P, replacement:Text, backref="@", recursive=yes -> Text):
|
||||
return inline C : Text { Pattern$replace(_$text, _$pattern, _$replacement, _$backref, _$recursive); }
|
||||
|
||||
func pat_replace_all(text:Text, replacements:{P,Text}, backref="@", recursive=yes -> Text):
|
||||
return inline C : Text { Pattern$replace_all(_$text, _$replacements, _$backref, _$recursive); }
|
||||
|
||||
func has(text:Text, pattern:P -> Bool):
|
||||
return inline C : Bool { Pattern$has(_$text, _$pattern); }
|
||||
|
||||
func find_all(text:Text, pattern:P -> [PatternMatch]):
|
||||
return inline C : [PatternMatch] { Pattern$find_all(_$text, _$pattern); }
|
||||
|
||||
func by_match(text:Text, pattern:P -> func(->PatternMatch?)):
|
||||
return inline C : func(->PatternMatch?) { Pattern$by_match(_$text, _$pattern); }
|
||||
|
||||
func each(text:Text, pattern:P, fn:func(m:PatternMatch), recursive=yes):
|
||||
inline C { Pattern$each(_$text, _$pattern, _$fn, _$recursive); }
|
||||
|
||||
func map(text:Text, pattern:P, fn:func(m:PatternMatch -> Text), recursive=yes -> Text):
|
||||
return inline C : Text { Pattern$map(_$text, _$pattern, _$fn, _$recursive); }
|
||||
|
||||
func split(text:Text, pattern:P -> [Text]):
|
||||
return inline C : [Text] { Pattern$split(_$text, _$pattern); }
|
||||
|
||||
func by_split(text:Text, pattern:P -> func(->Text?)):
|
||||
return inline C : func(->Text?) { Pattern$by_split(_$text, _$pattern); }
|
||||
|
||||
func trim(text:Text, pattern:P, trim_left=yes, trim_right=yes -> Text):
|
||||
return inline C : Text { Pattern$trim(_$text, _$pattern, _$trim_left, _$trim_right); }
|
||||
|
||||
func trim_left(text:Text, pattern:P -> Text):
|
||||
return text:trim(pattern, trim_left=yes, trim_right=no)
|
||||
|
||||
func trim_right(text:Text, pattern:P -> Text):
|
||||
return text:trim(pattern, trim_left=no, trim_right=yes)
|
||||
|
||||
func main():
|
||||
>> "hello world":pat_replace($P/{id}/, "XXX")
|
||||
>> "hello world":find_all($P/l/)
|
||||
|
||||
for m in "hello one two three":by_match($P/{id}/):
|
||||
>> m
|
||||
|
@ -165,6 +165,7 @@ CORD ast_to_xml(ast_t *ast)
|
||||
T(Use, "<Use>%r%r</Use>", optional_tagged("var", data.var), xml_escape(data.path))
|
||||
T(InlineCCode, "<InlineCode>%r</InlineCode>", xml_escape(data.code))
|
||||
T(Deserialize, "<Deserialize><type>%r</type>%r</Deserialize>", type_ast_to_xml(data.type), ast_to_xml(data.value))
|
||||
T(Extend, "<Extend name=\"%s\">%r</Extend>", data.name, ast_to_xml(data.body))
|
||||
default: return "???";
|
||||
#undef T
|
||||
}
|
||||
|
@ -143,6 +143,7 @@ typedef enum {
|
||||
Use,
|
||||
InlineCCode,
|
||||
Deserialize,
|
||||
Extend,
|
||||
} ast_e;
|
||||
|
||||
struct ast_s {
|
||||
@ -331,6 +332,10 @@ struct ast_s {
|
||||
ast_t *value;
|
||||
type_ast_t *type;
|
||||
} Deserialize;
|
||||
struct {
|
||||
const char *name;
|
||||
ast_t *body;
|
||||
} Extend;
|
||||
} __data;
|
||||
};
|
||||
|
||||
|
@ -15,7 +15,6 @@
|
||||
#include "stdlib/integers.h"
|
||||
#include "stdlib/nums.h"
|
||||
#include "stdlib/paths.h"
|
||||
#include "stdlib/patterns.h"
|
||||
#include "stdlib/text.h"
|
||||
#include "stdlib/util.h"
|
||||
#include "structs.h"
|
||||
@ -39,7 +38,7 @@ static CORD compile_string_literal(CORD literal);
|
||||
|
||||
CORD promote_to_optional(type_t *t, CORD code)
|
||||
{
|
||||
if (t == PATH_TYPE || t == PATH_TYPE_TYPE || t == MATCH_TYPE) {
|
||||
if (t == PATH_TYPE || t == PATH_TYPE_TYPE) {
|
||||
return code;
|
||||
} else if (t->tag == IntType) {
|
||||
switch (Match(t, IntType)->bits) {
|
||||
@ -442,7 +441,7 @@ static void add_closed_vars(Table_t *closed_vars, env_t *enclosing_scope, env_t
|
||||
add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, Deserialize)->value);
|
||||
break;
|
||||
}
|
||||
case Use: case FunctionDef: case ConvertDef: case StructDef: case EnumDef: case LangDef: {
|
||||
case Use: case FunctionDef: case ConvertDef: case StructDef: case EnumDef: case LangDef: case Extend: {
|
||||
errx(1, "Definitions should not be reachable in a closure.");
|
||||
}
|
||||
default:
|
||||
@ -497,7 +496,6 @@ PUREFUNC CORD compile_unsigned_type(type_t *t)
|
||||
CORD compile_type(type_t *t)
|
||||
{
|
||||
if (t == RNG_TYPE) return "RNG_t";
|
||||
else if (t == MATCH_TYPE) return "Match_t";
|
||||
else if (t == PATH_TYPE) return "Path_t";
|
||||
else if (t == PATH_TYPE_TYPE) return "PathType_t";
|
||||
|
||||
@ -516,8 +514,6 @@ CORD compile_type(type_t *t)
|
||||
auto text = Match(t, TextType);
|
||||
if (!text->lang || streq(text->lang, "Text"))
|
||||
return "Text_t";
|
||||
else if (streq(text->lang, "Pattern"))
|
||||
return "Pattern_t";
|
||||
else
|
||||
return CORD_all(namespace_prefix(text->env, text->env->namespace->parent), text->lang, "$$type");
|
||||
}
|
||||
@ -558,8 +554,6 @@ CORD compile_type(type_t *t)
|
||||
case ArrayType: case TableType: case SetType:
|
||||
return CORD_all("Optional", compile_type(nonnull));
|
||||
case StructType: {
|
||||
if (nonnull == MATCH_TYPE)
|
||||
return "OptionalMatch_t";
|
||||
if (nonnull == PATH_TYPE)
|
||||
return "OptionalPath_t";
|
||||
if (nonnull == PATH_TYPE_TYPE)
|
||||
@ -680,7 +674,7 @@ CORD optional_into_nonnone(type_t *t, CORD value)
|
||||
case IntType:
|
||||
return CORD_all(value, ".value");
|
||||
case StructType:
|
||||
if (t == MATCH_TYPE || t == PATH_TYPE || t == PATH_TYPE_TYPE)
|
||||
if (t == PATH_TYPE || t == PATH_TYPE_TYPE)
|
||||
return value;
|
||||
return CORD_all(value, ".value");
|
||||
default:
|
||||
@ -695,8 +689,6 @@ CORD check_none(type_t *t, CORD value)
|
||||
// complain about excessive parens around equality comparisons
|
||||
if (t->tag == PointerType || t->tag == FunctionType || t->tag == CStringType)
|
||||
return CORD_all("({", value, " == NULL;})");
|
||||
else if (t == MATCH_TYPE)
|
||||
return CORD_all("({(", value, ").index.small == 0;})");
|
||||
else if (t == PATH_TYPE)
|
||||
return CORD_all("({(", value, ").type.$tag == PATH_NONE;})");
|
||||
else if (t == PATH_TYPE_TYPE)
|
||||
@ -1168,7 +1160,7 @@ static CORD _compile_statement(env_t *env, ast_t *ast)
|
||||
default: code_err(ast, "Update assignments are not implemented for this operation");
|
||||
}
|
||||
}
|
||||
case StructDef: case EnumDef: case LangDef: case FunctionDef: case ConvertDef: {
|
||||
case StructDef: case EnumDef: case LangDef: case Extend: case FunctionDef: case ConvertDef: {
|
||||
return CORD_EMPTY;
|
||||
}
|
||||
case Skip: {
|
||||
@ -1730,8 +1722,13 @@ static CORD _compile_statement(env_t *env, ast_t *ast)
|
||||
code_err(ast, "Could not find library");
|
||||
|
||||
CORD initialization = CORD_EMPTY;
|
||||
const char *lib_id = Text$as_c_string(
|
||||
Text$replace(Text$from_str(use->path), Pattern("{1+ !alphanumeric}"), Text("_"), Pattern(""), false));
|
||||
|
||||
char *lib_id = String(use->path);
|
||||
for (char *p = lib_id; *p; p++) {
|
||||
if (!isalnum(*p) && *p != '_')
|
||||
*p = '_';
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < tm_files.gl_pathc; i++) {
|
||||
const char *filename = tm_files.gl_pathv[i];
|
||||
initialization = CORD_all(
|
||||
@ -2165,7 +2162,6 @@ CORD compile_none(type_t *t)
|
||||
|
||||
if (t == PATH_TYPE) return "NONE_PATH";
|
||||
else if (t == PATH_TYPE_TYPE) return "((OptionalPathType_t){})";
|
||||
else if (t == MATCH_TYPE) return "NONE_MATCH";
|
||||
|
||||
switch (t->tag) {
|
||||
case BigIntType: return "NONE_INT";
|
||||
@ -2597,8 +2593,6 @@ CORD compile(env_t *env, ast_t *ast)
|
||||
CORD lang_constructor;
|
||||
if (!lang || streq(lang, "Text"))
|
||||
lang_constructor = "Text";
|
||||
else if (streq(lang, "Pattern"))
|
||||
lang_constructor = lang;
|
||||
else
|
||||
lang_constructor = CORD_all(namespace_prefix(Match(text_t, TextType)->env, Match(text_t, TextType)->env->namespace->parent), lang);
|
||||
|
||||
@ -3752,7 +3746,7 @@ CORD compile(env_t *env, ast_t *ast)
|
||||
case Defer: code_err(ast, "Compiling 'defer' as expression!");
|
||||
case Extern: code_err(ast, "Externs are not supported as expressions");
|
||||
case TableEntry: code_err(ast, "Table entries should not be compiled directly");
|
||||
case Declare: case Assign: case UpdateAssign: case For: case While: case Repeat: case StructDef: case LangDef:
|
||||
case Declare: case Assign: case UpdateAssign: case For: case While: case Repeat: case StructDef: case LangDef: case Extend:
|
||||
case EnumDef: case FunctionDef: case ConvertDef: case Skip: case Stop: case Pass: case Return: case DocTest: case PrintStatement:
|
||||
code_err(ast, "This is not a valid expression");
|
||||
default: case Unknown: code_err(ast, "Unknown AST");
|
||||
@ -3762,7 +3756,6 @@ CORD compile(env_t *env, ast_t *ast)
|
||||
CORD compile_type_info(type_t *t)
|
||||
{
|
||||
if (t == RNG_TYPE) return "&RNG$info";
|
||||
else if (t == MATCH_TYPE) return "&Match$info";
|
||||
else if (t == PATH_TYPE) return "&Path$info";
|
||||
else if (t == PATH_TYPE_TYPE) return "&PathType$info";
|
||||
|
||||
@ -3773,8 +3766,6 @@ CORD compile_type_info(type_t *t)
|
||||
auto text = Match(t, TextType);
|
||||
if (!text->lang || streq(text->lang, "Text"))
|
||||
return "&Text$info";
|
||||
else if (streq(text->lang, "Pattern"))
|
||||
return "&Pattern$info";
|
||||
return CORD_all("(&", namespace_prefix(text->env, text->env->namespace->parent), text->lang, "$$info)");
|
||||
}
|
||||
case StructType: {
|
||||
@ -4206,6 +4197,12 @@ CORD compile_top_level_code(env_t *env, ast_t *ast)
|
||||
env_t *ns_env = namespace_env(env, def->name);
|
||||
return CORD_all(code, def->namespace ? compile_top_level_code(ns_env, def->namespace) : CORD_EMPTY);
|
||||
}
|
||||
case Extend: {
|
||||
auto extend = Match(ast, Extend);
|
||||
env_t *ns_env = namespace_env(env, extend->name);
|
||||
ns_env->libname = env->libname;
|
||||
return compile_top_level_code(ns_env, extend->body);
|
||||
}
|
||||
case Extern: return CORD_EMPTY;
|
||||
case Block: {
|
||||
CORD code = CORD_EMPTY;
|
||||
@ -4258,6 +4255,9 @@ static void initialize_vars_and_statics(env_t *env, ast_t *ast)
|
||||
} else if (stmt->ast->tag == LangDef) {
|
||||
initialize_vars_and_statics(namespace_env(env, Match(stmt->ast, LangDef)->name),
|
||||
Match(stmt->ast, LangDef)->namespace);
|
||||
} else if (stmt->ast->tag == Extend) {
|
||||
initialize_vars_and_statics(namespace_env(env, Match(stmt->ast, Extend)->name),
|
||||
Match(stmt->ast, Extend)->body);
|
||||
} else if (stmt->ast->tag == Use) {
|
||||
continue;
|
||||
} else {
|
||||
@ -4348,6 +4348,9 @@ CORD compile_statement_type_header(env_t *env, Path_t header_path, ast_t *ast)
|
||||
"extern const TypeInfo_t ", full_name, ";\n"
|
||||
);
|
||||
}
|
||||
case Extend: {
|
||||
return CORD_EMPTY;
|
||||
}
|
||||
default:
|
||||
return CORD_EMPTY;
|
||||
}
|
||||
@ -4364,6 +4367,12 @@ CORD compile_statement_namespace_header(env_t *env, Path_t header_path, ast_t *a
|
||||
block = def->namespace;
|
||||
break;
|
||||
}
|
||||
case Extend: {
|
||||
auto extend = Match(ast, Extend);
|
||||
ns_name = extend->name;
|
||||
block = extend->body;
|
||||
break;
|
||||
}
|
||||
case StructDef: {
|
||||
auto def = Match(ast, StructDef);
|
||||
ns_name = def->name;
|
||||
|
@ -13,7 +13,6 @@
|
||||
#include "typecheck.h"
|
||||
|
||||
type_t *TEXT_TYPE = NULL;
|
||||
type_t *MATCH_TYPE = NULL;
|
||||
type_t *RNG_TYPE = NULL;
|
||||
public type_t *PATH_TYPE = NULL;
|
||||
public type_t *PATH_TYPE_TYPE = NULL;
|
||||
@ -67,7 +66,6 @@ env_t *global_env(void)
|
||||
(void)bind_type(env, "Int32", Type(IntType, .bits=TYPE_IBITS32));
|
||||
(void)bind_type(env, "Memory", Type(MemoryType));
|
||||
PATH_TYPE_TYPE = declare_type(env, "enum PathType(Relative, Absolute, Home)");
|
||||
MATCH_TYPE = declare_type(env, "struct Match(text:Text, index:Int, captures:[Text])");
|
||||
PATH_TYPE = declare_type(env, "struct Path(type:PathType, components:[Text])");
|
||||
RNG_TYPE = declare_type(env, "struct RNG(state:@Memory)");
|
||||
|
||||
@ -279,13 +277,6 @@ env_t *global_env(void)
|
||||
#undef F_opt
|
||||
#undef F
|
||||
#undef C
|
||||
{"Match", MATCH_TYPE, "Match_t", "Match", TypedArray(ns_entry_t,
|
||||
// No methods
|
||||
)},
|
||||
{"Pattern", Type(TextType, .lang="Pattern", .env=namespace_env(env, "Pattern")), "Pattern_t", "Pattern$info", TypedArray(ns_entry_t,
|
||||
{"escape_int", "Int$value_as_text", "func(i:Int -> Pattern)"},
|
||||
{"escape_text", "Pattern$escape_text", "func(text:Text -> Pattern)"},
|
||||
)},
|
||||
{"PathType", PATH_TYPE_TYPE, "PathType_t", "PathType$info", TypedArray(ns_entry_t,
|
||||
{"Relative", "((PathType_t){.$tag=PATH_RELATIVE})", "PathType"},
|
||||
{"Absolute", "((PathType_t){.$tag=PATH_ABSOLUTE})", "PathType"},
|
||||
@ -353,44 +344,42 @@ env_t *global_env(void)
|
||||
{"as_c_string", "Text$as_c_string", "func(text:Text -> CString)"},
|
||||
{"at", "Text$cluster", "func(text:Text, index:Int -> Text)"},
|
||||
{"by_line", "Text$by_line", "func(text:Text -> func(->Text?))"},
|
||||
{"by_match", "Text$by_match", "func(text:Text, pattern:Pattern -> func(->Match?))"},
|
||||
{"by_split", "Text$by_split", "func(text:Text, pattern=$Pattern'' -> func(->Text?))"},
|
||||
{"by_split", "Text$by_split", "func(text:Text, delimiter='' -> func(->Text?))"},
|
||||
{"by_split_any", "Text$by_split_any", "func(text:Text, delimiters=\" $\\t\\r\\n\" -> func(->Text?))"},
|
||||
{"bytes", "Text$utf8_bytes", "func(text:Text -> [Byte])"},
|
||||
{"caseless_equals", "Text$equal_ignoring_case", "func(a,b:Text, language='C' -> Bool)"},
|
||||
{"codepoint_names", "Text$codepoint_names", "func(text:Text -> [Text])"},
|
||||
{"ends_with", "Text$ends_with", "func(text,suffix:Text -> Bool)"},
|
||||
{"each", "Text$each", "func(text:Text, pattern:Pattern, fn:func(match:Match), recursive=yes)"},
|
||||
{"find", "Text$find", "func(text:Text, pattern:Pattern, start=1 -> Match?)"},
|
||||
{"find_all", "Text$find_all", "func(text:Text, pattern:Pattern -> [Match])"},
|
||||
{"from", "Text$from", "func(text:Text, first:Int -> Text)"},
|
||||
{"from_bytes", "Text$from_bytes", "func(bytes:[Byte] -> Text?)"},
|
||||
{"from_c_string", "Text$from_str", "func(str:CString -> Text?)"},
|
||||
{"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text] -> Text?)"},
|
||||
{"from_codepoints", "Text$from_codepoints", "func(codepoints:[Int32] -> Text)"},
|
||||
{"from_text", "Path$from_text", "func(text:Text -> Path)"},
|
||||
{"has", "Text$has", "func(text:Text, pattern:Pattern -> Bool)"},
|
||||
{"has", "Text$has", "func(text:Text, target:Text -> Bool)"},
|
||||
{"join", "Text$join", "func(glue:Text, pieces:[Text] -> Text)"},
|
||||
{"left_pad", "Text$left_pad", "func(text:Text, count:Int, pad=' ', language='C' -> Text)"},
|
||||
{"lines", "Text$lines", "func(text:Text -> [Text])"},
|
||||
{"lower", "Text$lower", "func(text:Text, language='C' -> Text)"},
|
||||
{"map", "Text$map", "func(text:Text, pattern:Pattern, fn:func(match:Match -> Text), recursive=yes -> Text)"},
|
||||
{"matches", "Text$matches", "func(text:Text, pattern:Pattern -> [Text]?)"},
|
||||
{"middle_pad", "Text$middle_pad", "func(text:Text, count:Int, pad=' ', language='C' -> Text)"},
|
||||
{"quoted", "Text$quoted", "func(text:Text, color=no, quotation_mark='\"' -> Text)"},
|
||||
{"repeat", "Text$repeat", "func(text:Text, count:Int -> Text)"},
|
||||
{"replace", "Text$replace", "func(text:Text, pattern:Pattern, replacement:Text, backref=$/\\/, recursive=yes -> Text)"},
|
||||
{"replace_all", "Text$replace_all", "func(text:Text, replacements:{Pattern,Text}, backref=$/\\/, recursive=yes -> Text)"},
|
||||
{"replace", "Text$replace", "func(text:Text, target:Text, replacement:Text -> Text)"},
|
||||
{"reversed", "Text$reversed", "func(text:Text -> Text)"},
|
||||
{"right_pad", "Text$right_pad", "func(text:Text, count:Int, pad=' ', language='C' -> Text)"},
|
||||
{"slice", "Text$slice", "func(text:Text, from=1, to=-1 -> Text)"},
|
||||
{"split", "Text$split", "func(text:Text, pattern=$Pattern'' -> [Text])"},
|
||||
{"split", "Text$split", "func(text:Text, delimiter='' -> [Text])"},
|
||||
{"split_any", "Text$split_any", "func(text:Text, delimiters=\" $\\t\\r\\n\" -> [Text])"},
|
||||
{"starts_with", "Text$starts_with", "func(text,prefix:Text -> Bool)"},
|
||||
{"title", "Text$title", "func(text:Text, language='C' -> Text)"},
|
||||
{"to", "Text$to", "func(text:Text, last:Int -> Text)"},
|
||||
{"trim", "Text$trim", "func(text:Text, pattern=$/{whitespace}/, trim_left=yes, trim_right=yes -> Text)"},
|
||||
{"translate", "Text$translate", "func(text:Text, translations:{Text,Text} -> Text)"},
|
||||
{"trim", "Text$trim", "func(text:Text, to_trim=\" \t\r\n\", left=yes, right=yes -> Text)"},
|
||||
{"upper", "Text$upper", "func(text:Text, language='C' -> Text)"},
|
||||
{"utf32_codepoints", "Text$utf32_codepoints", "func(text:Text -> [Int32])"},
|
||||
{"width", "Text$width", "func(text:Text, language='C' -> Int)"},
|
||||
{"without_prefix", "Text$without_prefix", "func(text,prefix:Text -> Text)"},
|
||||
{"without_suffix", "Text$without_suffix", "func(text,suffix:Text -> Text)"},
|
||||
)},
|
||||
};
|
||||
|
||||
@ -518,9 +507,6 @@ env_t *global_env(void)
|
||||
{"Num32$from_int64", "func(i:Int64, truncate=no -> Num32)"},
|
||||
{"Num32$from_int", "func(i:Int, truncate=no -> Num32)"},
|
||||
{"Num32$from_num", "func(n:Num -> Num32)"});
|
||||
ADD_CONSTRUCTORS("Pattern",
|
||||
{"Pattern$escape_text", "func(text:Text -> Pattern)"},
|
||||
{"Int$value_as_text", "func(i:Int -> Pattern)"});
|
||||
ADD_CONSTRUCTORS("Path",
|
||||
{"Path$escape_text", "func(text:Text -> Path)"},
|
||||
{"Path$escape_path", "func(path:Path -> Path)"},
|
||||
@ -534,11 +520,6 @@ env_t *global_env(void)
|
||||
.ret=PATH_TYPE),
|
||||
"Path$from_text");
|
||||
|
||||
set_binding(namespace_env(env, "Pattern"), "from_text",
|
||||
Type(FunctionType, .args=new(arg_t, .name="text", .type=TEXT_TYPE),
|
||||
.ret=Type(TextType, .lang="Pattern", .env=namespace_env(env, "Pattern"))),
|
||||
"(Pattern_t)");
|
||||
|
||||
struct {
|
||||
const char *name, *code, *type_str;
|
||||
} global_vars[] = {
|
||||
|
@ -89,7 +89,6 @@ void set_binding(env_t *env, const char *name, type_t *type, CORD code);
|
||||
binding_t *get_namespace_binding(env_t *env, ast_t *self, const char *name);
|
||||
#define code_err(ast, ...) compiler_err((ast)->file, (ast)->start, (ast)->end, __VA_ARGS__)
|
||||
extern type_t *TEXT_TYPE;
|
||||
extern type_t *MATCH_TYPE;
|
||||
extern type_t *RNG_TYPE;
|
||||
extern type_t *PATH_TYPE;
|
||||
extern type_t *PATH_TYPE_TYPE;
|
||||
|
53
src/parse.c
53
src/parse.c
@ -22,7 +22,6 @@
|
||||
#include "ast.h"
|
||||
#include "cordhelpers.h"
|
||||
#include "stdlib/integers.h"
|
||||
#include "stdlib/patterns.h"
|
||||
#include "stdlib/paths.h"
|
||||
#include "stdlib/print.h"
|
||||
#include "stdlib/stdlib.h"
|
||||
@ -64,7 +63,7 @@ int op_tightness[] = {
|
||||
static const char *keywords[] = {
|
||||
"yes", "xor", "while", "when", "use", "unless", "struct", "stop", "skip", "return",
|
||||
"or", "not", "none", "no", "mod1", "mod", "pass", "lang", "inline", "in", "if",
|
||||
"func", "for", "extern", "enum", "else", "do", "deserialize", "defer", "and",
|
||||
"func", "for", "extern", "extend", "enum", "else", "do", "deserialize", "defer", "and",
|
||||
"_min_", "_max_", NULL,
|
||||
};
|
||||
|
||||
@ -120,6 +119,7 @@ static PARSER(parse_inline_c);
|
||||
static PARSER(parse_int);
|
||||
static PARSER(parse_lambda);
|
||||
static PARSER(parse_lang_def);
|
||||
static PARSER(parse_extend);
|
||||
static PARSER(parse_namespace);
|
||||
static PARSER(parse_negative);
|
||||
static PARSER(parse_not);
|
||||
@ -1241,9 +1241,6 @@ PARSER(parse_text) {
|
||||
open_quote = *pos;
|
||||
++pos;
|
||||
close_quote = closing[(int)open_quote] ? closing[(int)open_quote] : open_quote;
|
||||
|
||||
if (!lang && (open_quote == '/' || open_quote == '|'))
|
||||
lang = "Pattern";
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
@ -1904,9 +1901,10 @@ PARSER(parse_namespace) {
|
||||
if (get_indent(ctx, next) != indent) break;
|
||||
ast_t *stmt;
|
||||
if ((stmt=optional(ctx, &pos, parse_struct_def))
|
||||
||(stmt=optional(ctx, &pos, parse_func_def))
|
||||
||(stmt=optional(ctx, &pos, parse_enum_def))
|
||||
||(stmt=optional(ctx, &pos, parse_lang_def))
|
||||
||(stmt=optional(ctx, &pos, parse_func_def))
|
||||
||(stmt=optional(ctx, &pos, parse_extend))
|
||||
||(stmt=optional(ctx, &pos, parse_convert_def))
|
||||
||(stmt=optional(ctx, &pos, parse_use))
|
||||
||(stmt=optional(ctx, &pos, parse_extern))
|
||||
@ -1940,9 +1938,10 @@ PARSER(parse_file_body) {
|
||||
if (get_indent(ctx, next) != 0) break;
|
||||
ast_t *stmt;
|
||||
if ((stmt=optional(ctx, &pos, parse_struct_def))
|
||||
||(stmt=optional(ctx, &pos, parse_func_def))
|
||||
||(stmt=optional(ctx, &pos, parse_enum_def))
|
||||
||(stmt=optional(ctx, &pos, parse_lang_def))
|
||||
||(stmt=optional(ctx, &pos, parse_func_def))
|
||||
||(stmt=optional(ctx, &pos, parse_extend))
|
||||
||(stmt=optional(ctx, &pos, parse_convert_def))
|
||||
||(stmt=optional(ctx, &pos, parse_use))
|
||||
||(stmt=optional(ctx, &pos, parse_extern))
|
||||
@ -2112,6 +2111,32 @@ PARSER(parse_lang_def) {
|
||||
return NewAST(ctx->file, start, pos, LangDef, .name=name, .namespace=namespace);
|
||||
}
|
||||
|
||||
PARSER(parse_extend) {
|
||||
const char *start = pos;
|
||||
// extend Name: body...
|
||||
if (!match_word(&pos, "extend")) return NULL;
|
||||
int64_t starting_indent = get_indent(ctx, pos);
|
||||
spaces(&pos);
|
||||
const char *name = get_id(&pos);
|
||||
if (!name)
|
||||
parser_err(ctx, start, pos, "I expected a name for this lang");
|
||||
|
||||
ast_t *body = NULL;
|
||||
if (match(&pos, ":")) {
|
||||
const char *ns_pos = pos;
|
||||
whitespace(&ns_pos);
|
||||
int64_t ns_indent = get_indent(ctx, ns_pos);
|
||||
if (ns_indent > starting_indent) {
|
||||
pos = ns_pos;
|
||||
body = optional(ctx, &pos, parse_namespace);
|
||||
}
|
||||
}
|
||||
if (!body)
|
||||
body = NewAST(ctx->file, pos, pos, Block, .statements=NULL);
|
||||
|
||||
return NewAST(ctx->file, start, pos, Extend, .name=name, .body=body);
|
||||
}
|
||||
|
||||
arg_ast_t *parse_args(parse_ctx_t *ctx, const char **pos)
|
||||
{
|
||||
arg_ast_t *args = NULL;
|
||||
@ -2373,20 +2398,6 @@ PARSER(parse_use) {
|
||||
what = USE_LOCAL;
|
||||
} else {
|
||||
what = USE_MODULE;
|
||||
|
||||
// When `use`ing a URL, convert it to a hash:
|
||||
Text_t text = Text$from_str(name);
|
||||
Array_t m = Text$matches(text, Pattern("{url}"));
|
||||
if (m.length >= 0) {
|
||||
text = Text$trim(text, Pattern("http{0-1 s}://"), true, false);
|
||||
FILE *shasum = popen(String("echo -n '", text, "' | sha256sum"), "r");
|
||||
const size_t HASH_LEN = 32;
|
||||
char *hash = GC_MALLOC_ATOMIC(HASH_LEN + 1);
|
||||
size_t just_read = fread(hash, sizeof(char), HASH_LEN, shasum);
|
||||
if (just_read < HASH_LEN)
|
||||
print_err("Failed to get SHA sum for 'use': ", name);
|
||||
name = hash;
|
||||
}
|
||||
}
|
||||
return NewAST(ctx->file, start, pos, Use, .var=var, .path=name, .what=what);
|
||||
}
|
||||
|
@ -27,7 +27,6 @@ some common functionality.
|
||||
- Nums: [nums.h](nums.h), [nums.c](nums.c)
|
||||
- Optionals: [optionals.h](optionals.h), [optionals.c](optionals.c)
|
||||
- Paths: [paths.h](paths.h), [paths.c](paths.c)
|
||||
- Patterns: [patterns.h](patterns.h), [patterns.c](patterns.c)
|
||||
- Pointers: [pointers.h](pointers.h), [pointers.c](pointers.c)
|
||||
- Tables: [tables.h](tables.h), [tables.c](tables.c)
|
||||
- Text: [text.h](text.h), [text.c](text.c)
|
||||
|
@ -94,9 +94,6 @@ typedef struct Text_s {
|
||||
};
|
||||
} Text_t;
|
||||
|
||||
#define Pattern_t Text_t
|
||||
#define OptionalPattern_t Text_t
|
||||
|
||||
typedef struct {
|
||||
enum { PATH_NONE, PATH_RELATIVE, PATH_ABSOLUTE, PATH_HOME } $tag;
|
||||
} PathType_t;
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include "integers.h"
|
||||
#include "metamethods.h"
|
||||
#include "nums.h"
|
||||
#include "patterns.h"
|
||||
#include "text.h"
|
||||
#include "util.h"
|
||||
|
||||
|
@ -24,7 +24,6 @@
|
||||
#include "integers.h"
|
||||
#include "optionals.h"
|
||||
#include "paths.h"
|
||||
#include "patterns.h"
|
||||
#include "structs.h"
|
||||
#include "text.h"
|
||||
#include "types.h"
|
||||
@ -599,15 +598,10 @@ public PUREFUNC Text_t Path$base_name(Path_t path)
|
||||
|
||||
public Text_t Path$extension(Path_t path, bool full)
|
||||
{
|
||||
Text_t base = Path$base_name(path);
|
||||
Array_t results = Text$matches(base, full ? Pattern(".{!.}.{..}") : Pattern(".{..}.{!.}{end}"));
|
||||
if (results.length > 0)
|
||||
return *((Text_t*)(results.data + results.stride*1));
|
||||
results = Text$matches(base, full ? Pattern("{!.}.{..}") : Pattern("{..}.{!.}{end}"));
|
||||
if (results.length > 0)
|
||||
return *((Text_t*)(results.data + results.stride*1));
|
||||
else
|
||||
return Text("");
|
||||
const char *base = Text$as_c_string(Path$base_name(path));
|
||||
const char *dot = full ? strchr(base + 1, '.') : strrchr(base + 1, '.');
|
||||
const char *extension = dot ? dot + 1 : "";
|
||||
return Text$from_str(extension);
|
||||
}
|
||||
|
||||
public Path_t Path$with_component(Path_t path, Text_t component)
|
||||
@ -635,10 +629,10 @@ public Path_t Path$with_extension(Path_t path, Text_t extension, bool replace)
|
||||
Text_t last = *(Text_t*)(path.components.data + path.components.stride*(path.components.length-1));
|
||||
Array$remove_at(&result.components, I(-1), I(1), sizeof(Text_t));
|
||||
if (replace) {
|
||||
if (Text$starts_with(last, Text(".")))
|
||||
last = Text$replace(last, Pattern(".{!.}.{..}"), Text(".@1"), Pattern("@"), false);
|
||||
else
|
||||
last = Text$replace(last, Pattern("{!.}.{..}"), Text("@1"), Pattern("@"), false);
|
||||
const char *base = Text$as_c_string(last);
|
||||
const char *dot = strchr(base + 1, '.');
|
||||
if (dot)
|
||||
last = Text$from_strn(base, (size_t)(dot - base));
|
||||
}
|
||||
|
||||
last = Text$concat(last, extension);
|
||||
|
@ -1,46 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
// The type representing text patterns for pattern matching.
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "datatypes.h"
|
||||
#include "integers.h"
|
||||
#include "optionals.h"
|
||||
#include "types.h"
|
||||
|
||||
#define Pattern(text) ((Pattern_t)Text(text))
|
||||
#define Patterns(...) ((Pattern_t)Texts(__VA_ARGS__))
|
||||
|
||||
typedef struct {
|
||||
Text_t text;
|
||||
Int_t index;
|
||||
Array_t captures;
|
||||
} Match_t;
|
||||
|
||||
typedef Match_t OptionalMatch_t;
|
||||
#define NONE_MATCH ((OptionalMatch_t){.index=NONE_INT})
|
||||
|
||||
Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement, Pattern_t backref_pat, bool recursive);
|
||||
Pattern_t Pattern$escape_text(Text_t text);
|
||||
Text_t Text$replace_all(Text_t text, Table_t replacements, Pattern_t backref_pat, bool recursive);
|
||||
Array_t Text$split(Text_t text, Pattern_t pattern);
|
||||
Closure_t Text$by_split(Text_t text, Pattern_t pattern);
|
||||
Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right);
|
||||
OptionalMatch_t Text$find(Text_t text, Pattern_t pattern, Int_t i);
|
||||
Array_t Text$find_all(Text_t text, Pattern_t pattern);
|
||||
Closure_t Text$by_match(Text_t text, Pattern_t pattern);
|
||||
PUREFUNC bool Text$has(Text_t text, Pattern_t pattern);
|
||||
OptionalArray_t Text$matches(Text_t text, Pattern_t pattern);
|
||||
Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn, bool recursive);
|
||||
void Text$each(Text_t text, Pattern_t pattern, Closure_t fn, bool recursive);
|
||||
|
||||
#define Pattern$hash Text$hash
|
||||
#define Pattern$compare Text$compare
|
||||
#define Pattern$equal Text$equal
|
||||
|
||||
extern const TypeInfo_t Match$info;
|
||||
extern const TypeInfo_t Pattern$info;
|
||||
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
@ -20,7 +20,6 @@
|
||||
#include "optionals.h"
|
||||
#include "metamethods.h"
|
||||
#include "nums.h"
|
||||
#include "patterns.h"
|
||||
#include "paths.h"
|
||||
#include "rng.h"
|
||||
#include "siphash.h"
|
||||
|
@ -998,17 +998,22 @@ PUREFUNC public int32_t Text$compare(const void *va, const void *vb, const TypeI
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool _matches(TextIter_t *text_state, TextIter_t *target_state, int64_t pos)
|
||||
{
|
||||
for (int64_t i = 0; i < target_state->stack[0].text.length; i++) {
|
||||
int32_t text_i = Text$get_grapheme_fast(text_state, pos + i);
|
||||
int32_t prefix_i = Text$get_grapheme_fast(target_state, i);
|
||||
if (text_i != prefix_i) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix)
|
||||
{
|
||||
if (text.length < prefix.length)
|
||||
return false;
|
||||
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), prefix_state = NEW_TEXT_ITER_STATE(prefix);
|
||||
for (int64_t i = 0; i < prefix.length; i++) {
|
||||
int32_t text_i = Text$get_grapheme_fast(&text_state, i);
|
||||
int32_t prefix_i = Text$get_grapheme_fast(&prefix_state, i);
|
||||
if (text_i != prefix_i) return false;
|
||||
}
|
||||
return true;
|
||||
return _matches(&text_state, &prefix_state, 0);
|
||||
}
|
||||
|
||||
PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix)
|
||||
@ -1016,12 +1021,236 @@ PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix)
|
||||
if (text.length < suffix.length)
|
||||
return false;
|
||||
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), suffix_state = NEW_TEXT_ITER_STATE(suffix);
|
||||
for (int64_t i = 0; i < suffix.length; i++) {
|
||||
int32_t text_i = Text$get_grapheme_fast(&text_state, text.length - suffix.length + i);
|
||||
int32_t suffix_i = Text$get_grapheme_fast(&suffix_state, i);
|
||||
if (text_i != suffix_i) return false;
|
||||
return _matches(&text_state, &suffix_state, text.length - suffix.length);
|
||||
}
|
||||
|
||||
public Text_t Text$without_prefix(Text_t text, Text_t prefix)
|
||||
{
|
||||
return Text$starts_with(text, prefix) ? Text$slice(text, I(prefix.length + 1), I(text.length)) : text;
|
||||
}
|
||||
|
||||
public Text_t Text$without_suffix(Text_t text, Text_t suffix)
|
||||
{
|
||||
return Text$ends_with(text, suffix) ? Text$slice(text, I(1), I(text.length - suffix.length)) : text;
|
||||
}
|
||||
|
||||
static bool _has_grapheme(TextIter_t *text, int32_t g)
|
||||
{
|
||||
for (int64_t t = 0; t < text->stack[0].text.length; t++) {
|
||||
if (g == Text$get_grapheme_fast(text, t)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
public Text_t Text$trim(Text_t text, Text_t to_trim, bool left, bool right)
|
||||
{
|
||||
int64_t first = 0;
|
||||
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), trim_state = NEW_TEXT_ITER_STATE(to_trim);
|
||||
if (left) {
|
||||
while (first < text.length && _has_grapheme(&trim_state, Text$get_grapheme_fast(&text_state, first))) {
|
||||
first += 1;
|
||||
}
|
||||
}
|
||||
int64_t last = text.length-1;
|
||||
if (right) {
|
||||
while (last >= first && _has_grapheme(&trim_state, Text$get_grapheme_fast(&text_state, last))) {
|
||||
last -= 1;
|
||||
}
|
||||
}
|
||||
return (first != 0 || last != text.length-1) ? Text$slice(text, I(first+1), I(last+1)) : text;
|
||||
}
|
||||
|
||||
public Text_t Text$translate(Text_t text, Table_t translations)
|
||||
{
|
||||
TextIter_t text_state = NEW_TEXT_ITER_STATE(text);
|
||||
Text_t result = EMPTY_TEXT;
|
||||
int64_t span_start = 0;
|
||||
Array_t replacement_array = translations.entries;
|
||||
for (int64_t i = 0; i < text.length; ) {
|
||||
for (int64_t r = 0; r < replacement_array.length; r++) {
|
||||
struct { Text_t target, replacement; } *entry = replacement_array.data + r*replacement_array.stride;
|
||||
TextIter_t target_state = NEW_TEXT_ITER_STATE(entry->target);
|
||||
if (_matches(&text_state, &target_state, i)) {
|
||||
if (i > span_start)
|
||||
result = concat2(result, Text$slice(text, I(span_start+1), I(i)));
|
||||
|
||||
result = concat2(result, entry->replacement);
|
||||
i += entry->target.length;
|
||||
span_start = i;
|
||||
goto found_match;
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
found_match: continue;
|
||||
}
|
||||
if (span_start < text.length)
|
||||
result = concat2(result, Text$slice(text, I(span_start+1), I(text.length)));
|
||||
return result;
|
||||
}
|
||||
|
||||
public Text_t Text$replace(Text_t text, Text_t target, Text_t replacement)
|
||||
{
|
||||
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), target_state = NEW_TEXT_ITER_STATE(target);
|
||||
Text_t result = EMPTY_TEXT;
|
||||
int64_t span_start = 0;
|
||||
for (int64_t i = 0; i < text.length; ) {
|
||||
if (_matches(&text_state, &target_state, i)) {
|
||||
if (i > span_start)
|
||||
result = concat2(result, Text$slice(text, I(span_start+1), I(i)));
|
||||
|
||||
result = concat2(result, replacement);
|
||||
i += target.length;
|
||||
span_start = i;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
if (span_start < text.length)
|
||||
result = concat2(result, Text$slice(text, I(span_start+1), I(text.length)));
|
||||
return result;
|
||||
}
|
||||
|
||||
public bool Text$has(Text_t text, Text_t target)
|
||||
{
|
||||
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), target_state = NEW_TEXT_ITER_STATE(target);
|
||||
for (int64_t i = 0; i < text.length; i++) {
|
||||
if (_matches(&text_state, &target_state, i))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public Array_t Text$split(Text_t text, Text_t delimiters)
|
||||
{
|
||||
if (delimiters.length == 0)
|
||||
return Text$clusters(text);
|
||||
|
||||
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), delim_state = NEW_TEXT_ITER_STATE(delimiters);
|
||||
Array_t splits = {};
|
||||
for (int64_t i = 0; i < text.length; ) {
|
||||
int64_t span_len = 0;
|
||||
while (i + span_len < text.length && !_matches(&text_state, &delim_state, i + span_len)) {
|
||||
span_len += 1;
|
||||
}
|
||||
Text_t slice = Text$slice(text, I(i+1), I(i+span_len));
|
||||
Array$insert(&splits, &slice, I(0), sizeof(slice));
|
||||
i += span_len + delimiters.length;
|
||||
if (i == text.length) {
|
||||
Text_t empty = Text("");
|
||||
Array$insert(&splits, &empty, I(0), sizeof(empty));
|
||||
}
|
||||
}
|
||||
return splits;
|
||||
}
|
||||
|
||||
public Array_t Text$split_any(Text_t text, Text_t delimiters)
|
||||
{
|
||||
if (delimiters.length == 0)
|
||||
return Array(text);
|
||||
|
||||
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), delim_state = NEW_TEXT_ITER_STATE(delimiters);
|
||||
Array_t splits = {};
|
||||
for (int64_t i = 0; i < text.length; ) {
|
||||
int64_t span_len = 0;
|
||||
while (i + span_len < text.length && !_has_grapheme(&delim_state, Text$get_grapheme_fast(&text_state, i + span_len))) {
|
||||
span_len += 1;
|
||||
}
|
||||
bool trailing_delim = i + span_len < text.length;
|
||||
Text_t slice = Text$slice(text, I(i+1), I(i+span_len));
|
||||
Array$insert(&splits, &slice, I(0), sizeof(slice));
|
||||
i += span_len + 1;
|
||||
while (i < text.length && _has_grapheme(&delim_state, Text$get_grapheme_fast(&text_state, i))) {
|
||||
i += 1;
|
||||
}
|
||||
if (i >= text.length && trailing_delim) {
|
||||
Text_t empty = Text("");
|
||||
Array$insert(&splits, &empty, I(0), sizeof(empty));
|
||||
}
|
||||
}
|
||||
return splits;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
TextIter_t state;
|
||||
int64_t i;
|
||||
Text_t delimiter;
|
||||
} split_iter_state_t;
|
||||
|
||||
static OptionalText_t next_split(split_iter_state_t *state)
|
||||
{
|
||||
Text_t text = state->state.stack[0].text;
|
||||
if (state->i >= text.length) {
|
||||
if (state->delimiter.length > 0 && state->i == text.length) { // special case
|
||||
state->i = text.length + 1;
|
||||
return EMPTY_TEXT;
|
||||
}
|
||||
return NONE_TEXT;
|
||||
}
|
||||
|
||||
if (state->delimiter.length == 0) { // special case
|
||||
state->i = text.length + 1;
|
||||
return text;
|
||||
}
|
||||
|
||||
TextIter_t delim_state = NEW_TEXT_ITER_STATE(state->delimiter);
|
||||
int64_t i = state->i;
|
||||
int64_t span_len = 0;
|
||||
while (i + span_len < text.length && !_matches(&state->state, &delim_state, i + span_len)) {
|
||||
span_len += 1;
|
||||
}
|
||||
Text_t slice = Text$slice(text, I(i+1), I(i+span_len));
|
||||
state->i = i + span_len + state->delimiter.length;
|
||||
return slice;
|
||||
}
|
||||
|
||||
public Closure_t Text$by_split(Text_t text, Text_t delimiter)
|
||||
{
|
||||
return (Closure_t){
|
||||
.fn=(void*)next_split,
|
||||
.userdata=new(split_iter_state_t, .state=NEW_TEXT_ITER_STATE(text), .i=0, .delimiter=delimiter),
|
||||
};
|
||||
}
|
||||
|
||||
static OptionalText_t next_split_any(split_iter_state_t *state)
|
||||
{
|
||||
Text_t text = state->state.stack[0].text;
|
||||
if (state->i >= text.length) {
|
||||
if (state->delimiter.length > 0 && state->i == text.length) { // special case
|
||||
state->i = text.length + 1;
|
||||
return EMPTY_TEXT;
|
||||
}
|
||||
return NONE_TEXT;
|
||||
}
|
||||
|
||||
if (state->delimiter.length == 0) { // special case
|
||||
Text_t ret = Text$cluster(text, I(state->i+1));
|
||||
state->i += 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
TextIter_t delim_state = NEW_TEXT_ITER_STATE(state->delimiter);
|
||||
int64_t i = state->i;
|
||||
int64_t span_len = 0;
|
||||
while (i + span_len < text.length && !_has_grapheme(&delim_state, Text$get_grapheme_fast(&state->state, i + span_len))) {
|
||||
span_len += 1;
|
||||
}
|
||||
Text_t slice = Text$slice(text, I(i+1), I(i+span_len));
|
||||
i += span_len + 1;
|
||||
while (i < text.length && _has_grapheme(&delim_state, Text$get_grapheme_fast(&state->state, i))) {
|
||||
i += 1;
|
||||
}
|
||||
state->i = i;
|
||||
return slice;
|
||||
}
|
||||
|
||||
public Closure_t Text$by_split_any(Text_t text, Text_t delimiters)
|
||||
{
|
||||
return (Closure_t){
|
||||
.fn=(void*)next_split_any,
|
||||
.userdata=new(split_iter_state_t, .state=NEW_TEXT_ITER_STATE(text), .i=0, .delimiter=delimiters),
|
||||
};
|
||||
}
|
||||
|
||||
PUREFUNC public bool Text$equal_values(Text_t a, Text_t b)
|
||||
|
@ -50,6 +50,16 @@ Text_t Text$as_text(const void *text, bool colorize, const TypeInfo_t *info);
|
||||
Text_t Text$quoted(Text_t str, bool colorize, Text_t quotation_mark);
|
||||
PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix);
|
||||
PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix);
|
||||
Text_t Text$without_prefix(Text_t text, Text_t prefix);
|
||||
Text_t Text$without_suffix(Text_t text, Text_t suffix);
|
||||
Text_t Text$replace(Text_t text, Text_t target, Text_t replacement);
|
||||
Text_t Text$translate(Text_t text, Table_t translations);
|
||||
bool Text$has(Text_t text, Text_t target);
|
||||
Array_t Text$split(Text_t text, Text_t delimiter);
|
||||
Array_t Text$split_any(Text_t text, Text_t delimiters);
|
||||
Closure_t Text$by_split(Text_t text, Text_t delimiter);
|
||||
Closure_t Text$by_split_any(Text_t text, Text_t delimiters);
|
||||
Text_t Text$trim(Text_t text, Text_t to_trim, bool left, bool right);
|
||||
char *Text$as_c_string(Text_t text);
|
||||
__attribute__((format(printf, 1, 2)))
|
||||
public Text_t Text$format(const char *fmt, ...);
|
||||
|
@ -20,7 +20,6 @@
|
||||
#include "nums.h"
|
||||
#include "optionals.h"
|
||||
#include "paths.h"
|
||||
#include "patterns.h"
|
||||
#include "pointers.h"
|
||||
#include "print.h"
|
||||
#include "rng.h"
|
||||
|
@ -21,7 +21,6 @@
|
||||
#include "stdlib/datatypes.h"
|
||||
#include "stdlib/integers.h"
|
||||
#include "stdlib/optionals.h"
|
||||
#include "stdlib/patterns.h"
|
||||
#include "stdlib/paths.h"
|
||||
#include "stdlib/print.h"
|
||||
#include "stdlib/text.h"
|
||||
@ -294,7 +293,12 @@ int main(int argc, char *argv[])
|
||||
|
||||
Text_t escape_lib_name(Text_t lib_name)
|
||||
{
|
||||
return Text$replace(lib_name, Pattern("{1+ !alphanumeric}"), Text("_"), Pattern(""), false);
|
||||
char *libname_id = String(lib_name);
|
||||
for (char *p = libname_id; *p; p++) {
|
||||
if (!isalnum(*p) && *p != '_')
|
||||
*p = '_';
|
||||
}
|
||||
return Text$from_str(libname_id);
|
||||
}
|
||||
|
||||
Path_t build_file(Path_t path, const char *extension)
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include "cordhelpers.h"
|
||||
#include "environment.h"
|
||||
#include "parse.h"
|
||||
#include "stdlib/patterns.h"
|
||||
#include "stdlib/paths.h"
|
||||
#include "stdlib/tables.h"
|
||||
#include "stdlib/text.h"
|
||||
@ -195,8 +194,11 @@ static env_t *load_module(env_t *env, ast_t *module_ast)
|
||||
|
||||
env_t *module_env = fresh_scope(env);
|
||||
Table$str_set(env->imports, use->path, module_env);
|
||||
char *libname_id = Text$as_c_string(
|
||||
Text$replace(Text$from_str(use->path), Pattern("{1+ !alphanumeric}"), Text("_"), Pattern(""), false));
|
||||
char *libname_id = String(use->path);
|
||||
for (char *p = libname_id; *p; p++) {
|
||||
if (!isalnum(*p) && *p != '_')
|
||||
*p = '_';
|
||||
}
|
||||
module_env->libname = libname_id;
|
||||
for (size_t i = 0; i < tm_files.gl_pathc; i++) {
|
||||
const char *filename = tm_files.gl_pathv[i];
|
||||
@ -269,6 +271,14 @@ void prebind_statement(env_t *env, ast_t *statement)
|
||||
prebind_statement(ns_env, stmt->ast);
|
||||
break;
|
||||
}
|
||||
case Extend: {
|
||||
auto extend = Match(statement, Extend);
|
||||
env_t *ns_env = namespace_env(env, extend->name);
|
||||
ns_env->libname = env->libname;
|
||||
for (ast_list_t *stmt = extend->body ? Match(extend->body, Block)->statements : NULL; stmt; stmt = stmt->next)
|
||||
prebind_statement(ns_env, stmt->ast);
|
||||
break;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
@ -435,6 +445,14 @@ void bind_statement(env_t *env, ast_t *statement)
|
||||
bind_statement(ns_env, stmt->ast);
|
||||
break;
|
||||
}
|
||||
case Extend: {
|
||||
auto extend = Match(statement, Extend);
|
||||
env_t *ns_env = namespace_env(env, extend->name);
|
||||
ns_env->libname = env->libname;
|
||||
for (ast_list_t *stmt = extend->body ? Match(extend->body, Block)->statements : NULL; stmt; stmt = stmt->next)
|
||||
bind_statement(ns_env, stmt->ast);
|
||||
break;
|
||||
}
|
||||
case Use: {
|
||||
env_t *module_env = load_module(env, statement);
|
||||
if (!module_env) break;
|
||||
@ -940,7 +958,7 @@ type_t *get_type(env_t *env, ast_t *ast)
|
||||
|
||||
// Early out if the type is knowable without any context from the block:
|
||||
switch (last->ast->tag) {
|
||||
case UpdateAssign: case Assign: case Declare: case FunctionDef: case ConvertDef: case StructDef: case EnumDef: case LangDef:
|
||||
case UpdateAssign: case Assign: case Declare: case FunctionDef: case ConvertDef: case StructDef: case EnumDef: case LangDef: case Extend:
|
||||
return Type(VoidType);
|
||||
default: break;
|
||||
}
|
||||
@ -1240,7 +1258,7 @@ type_t *get_type(env_t *env, ast_t *ast)
|
||||
return Type(ClosureType, Type(FunctionType, .args=args, .ret=ret));
|
||||
}
|
||||
|
||||
case FunctionDef: case ConvertDef: case StructDef: case EnumDef: case LangDef: {
|
||||
case FunctionDef: case ConvertDef: case StructDef: case EnumDef: case LangDef: case Extend: {
|
||||
return Type(VoidType);
|
||||
}
|
||||
|
||||
@ -1399,7 +1417,7 @@ PUREFUNC bool is_discardable(env_t *env, ast_t *ast)
|
||||
{
|
||||
switch (ast->tag) {
|
||||
case UpdateAssign: case Assign: case Declare: case FunctionDef: case ConvertDef: case StructDef: case EnumDef:
|
||||
case LangDef: case Use:
|
||||
case LangDef: case Use: case Extend:
|
||||
return true;
|
||||
default: break;
|
||||
}
|
||||
|
12
test/lang.tm
12
test/lang.tm
@ -1,12 +1,12 @@
|
||||
lang HTML:
|
||||
HEADER := $HTML"<!DOCTYPE HTML>"
|
||||
convert(t:Text->HTML):
|
||||
t = t:replace_all({
|
||||
$/&/="&",
|
||||
$/</="<",
|
||||
$/>/=">",
|
||||
$/"/=""",
|
||||
$/'/="'",
|
||||
t = t:translate({
|
||||
"&"="&",
|
||||
"<"="<",
|
||||
">"=">",
|
||||
'"'=""",
|
||||
"'"="'",
|
||||
})
|
||||
|
||||
return HTML.from_text(t)
|
||||
|
171
test/text.tm
171
test/text.tm
@ -74,45 +74,24 @@ func main():
|
||||
>> amelie2:codepoint_names()
|
||||
= ["LATIN CAPITAL LETTER A", "LATIN SMALL LETTER M", "LATIN SMALL LETTER E WITH ACUTE", "LATIN SMALL LETTER L", "LATIN SMALL LETTER I", "LATIN SMALL LETTER E"]
|
||||
|
||||
>> "Hello":replace($/e/, "X")
|
||||
>> "Hello":replace("e", "X")
|
||||
= "HXllo"
|
||||
|
||||
>> "Hello":has($/l/)
|
||||
>> "Hello":has("l")
|
||||
= yes
|
||||
>> "Hello":has($/l{end}/)
|
||||
= no
|
||||
>> "Hello":has($/{start}l/)
|
||||
>> "Hello":has("x")
|
||||
= no
|
||||
|
||||
>> "Hello":has($/o/)
|
||||
= yes
|
||||
>> "Hello":has($/o{end}/)
|
||||
= yes
|
||||
>> "Hello":has($/{start}o/)
|
||||
= no
|
||||
|
||||
>> "Hello":has($/H/)
|
||||
= yes
|
||||
>> "Hello":has($/H{end}/)
|
||||
= no
|
||||
>> "Hello":has($/{start}H/)
|
||||
= yes
|
||||
|
||||
>> "Hello":replace($/l/, "")
|
||||
>> "Hello":replace("l", "")
|
||||
= "Heo"
|
||||
>> "xxxx":replace($/x/, "")
|
||||
>> "xxxx":replace("x", "")
|
||||
= ""
|
||||
>> "xxxx":replace($/y/, "")
|
||||
>> "xxxx":replace("y", "")
|
||||
= "xxxx"
|
||||
>> "One two three four five six":replace($/e /, "")
|
||||
>> "One two three four five six":replace("e ", "")
|
||||
= "Ontwo threfour fivsix"
|
||||
|
||||
>> " one ":replace($/{start}{space}/, "")
|
||||
= "one "
|
||||
>> " one ":replace($/{space}{end}/, "")
|
||||
= " one"
|
||||
|
||||
>> amelie:has($/$amelie2/)
|
||||
>> amelie:has(amelie2)
|
||||
= yes
|
||||
|
||||
>> multiline := "
|
||||
@ -138,11 +117,6 @@ func main():
|
||||
>> ${one {nested} two $(1+2)}
|
||||
= "one {nested} two 3"
|
||||
|
||||
>> "one two three":replace($/{alpha}/, "")
|
||||
= " "
|
||||
>> "one two three":replace($/{alpha}/, "word")
|
||||
= "word word word"
|
||||
|
||||
c := "É̩"
|
||||
>> c:codepoint_names()
|
||||
= ["LATIN CAPITAL LETTER E WITH ACUTE", "COMBINING VERTICAL LINE BELOW"]
|
||||
@ -165,18 +139,29 @@ func main():
|
||||
= [:Text]
|
||||
|
||||
!! Test splitting and joining text:
|
||||
>> "one two three":split($/ /)
|
||||
>> "one,, two,three":split(",")
|
||||
= ["one", "", " two", "three"]
|
||||
>> [t for t in "one,, two,three":by_split(",")]
|
||||
= ["one", "", " two", "three"]
|
||||
>> "one,, two,three":split_any(", ")
|
||||
= ["one", "two", "three"]
|
||||
|
||||
>> "one,two,three,":split($/,/)
|
||||
= ["one", "two", "three", ""]
|
||||
|
||||
>> "one two three":split($/{space}/)
|
||||
>> [t for t in "one,, two,three":by_split_any(", ")]
|
||||
= ["one", "two", "three"]
|
||||
>> ",one,, two,three,":split(",")
|
||||
= ["", "one", "", " two", "three", ""]
|
||||
>> [t for t in ",one,, two,three,":by_split(",")]
|
||||
= ["", "one", "", " two", "three", ""]
|
||||
>> ",one,, two,three,":split_any(", ")
|
||||
= ["", "one", "two", "three", ""]
|
||||
>> [t for t in ",one,, two,three,":by_split_any(", ")]
|
||||
= ["", "one", "two", "three", ""]
|
||||
|
||||
>> "abc":split($//)
|
||||
>> "abc":split()
|
||||
= ["a", "b", "c"]
|
||||
|
||||
>> "one two three":split_any()
|
||||
= ["one", "two", "three"]
|
||||
|
||||
>> ", ":join(["one", "two", "three"])
|
||||
= "one, two, three"
|
||||
|
||||
@ -192,35 +177,6 @@ func main():
|
||||
>> "":split()
|
||||
= [:Text]
|
||||
|
||||
!! Test text:find_all()
|
||||
>> " #one #two #three ":find_all($/#{alpha}/)
|
||||
= [Match(text="#one", index=2, captures=["one"]), Match(text="#two", index=8, captures=["two"]), Match(text="#three", index=13, captures=["three"])]
|
||||
|
||||
>> " #one #two #three ":find_all($/#{!space}/)
|
||||
= [Match(text="#one", index=2, captures=["one"]), Match(text="#two", index=8, captures=["two"]), Match(text="#three", index=13, captures=["three"])]
|
||||
|
||||
>> " ":find_all($/{alpha}/)
|
||||
= [:Match]
|
||||
|
||||
>> " foo(baz(), 1) doop() ":find_all($/{id}(?)/)
|
||||
= [Match(text="foo(baz(), 1)", index=2, captures=["foo", "baz(), 1"]), Match(text="doop()", index=17, captures=["doop", ""])]
|
||||
|
||||
>> "":find_all($Pattern'')
|
||||
= [:Match]
|
||||
|
||||
>> "Hello":find_all($Pattern'')
|
||||
= [:Match]
|
||||
|
||||
!! Test text:find()
|
||||
>> " one two three ":find($/{id}/, start=-999)
|
||||
= none : Match
|
||||
>> " one two three ":find($/{id}/, start=999)
|
||||
= none : Match
|
||||
>> " one two three ":find($/{id}/)
|
||||
= Match(text="one", index=2, captures=["one"])?
|
||||
>> " one two three ":find($/{id}/, start=5)
|
||||
= Match(text="two", index=8, captures=["two"])?
|
||||
|
||||
!! Test text slicing:
|
||||
>> "abcdef":slice()
|
||||
= "abcdef"
|
||||
@ -248,64 +204,15 @@ func main():
|
||||
>> Text.from_codepoint_names(["not a valid name here buddy"])
|
||||
= none : Text
|
||||
|
||||
>> "one two; three four":find_all($/; {..}/)
|
||||
= [Match(text="; three four", index=8, captures=["three four"])]
|
||||
>> "Hello":replace("ello", "i")
|
||||
= "Hi"
|
||||
|
||||
malicious := "{xxx}"
|
||||
>> $/$malicious/
|
||||
= $/{1{}xxx}/
|
||||
|
||||
>> "Hello":replace($/{lower}/, "(\0)")
|
||||
= "H(ello)"
|
||||
|
||||
>> " foo(xyz) foo(yyy) foo(z()) ":replace($/foo(?)/, "baz(\1)")
|
||||
= " baz(xyz) baz(yyy) baz(z()) "
|
||||
|
||||
>> "<tag>":replace_all({$/</="<", $/>/=">"})
|
||||
>> "<tag>":translate({"<"="<", ">"=">"})
|
||||
= "<tag>"
|
||||
|
||||
>> " BAD(x, fn(y), BAD(z), w) ":replace($/BAD(?)/, "good(\1)", recursive=yes)
|
||||
= " good(x, fn(y), good(z), w) "
|
||||
|
||||
>> " BAD(x, fn(y), BAD(z), w) ":replace($/BAD(?)/, "good(\1)", recursive=no)
|
||||
= " good(x, fn(y), BAD(z), w) "
|
||||
|
||||
>> "Hello":matches($/{id}/)
|
||||
= ["Hello"]?
|
||||
>> "Hello":matches($/{lower}/)
|
||||
= none : [Text]
|
||||
>> "Hello":matches($/{upper}/)
|
||||
= none : [Text]
|
||||
>> "Hello...":matches($/{id}/)
|
||||
= none : [Text]
|
||||
|
||||
if matches := "hello world":matches($/{id} {id}/):
|
||||
>> matches
|
||||
= ["hello", "world"]
|
||||
else:
|
||||
fail("Failed to match")
|
||||
|
||||
>> "hello world":map($/world/, func(m:Match): m.text:upper())
|
||||
= "hello WORLD"
|
||||
|
||||
>> "Abc":repeat(3)
|
||||
= "AbcAbcAbc"
|
||||
|
||||
>> " abc def ":trim()
|
||||
= "abc def"
|
||||
>> " abc123def ":trim($/{!digit}/)
|
||||
= "123"
|
||||
>> " abc123def ":trim($/{!digit}/, trim_left=no)
|
||||
= " abc123"
|
||||
>> " abc123def ":trim($/{!digit}/, trim_right=no)
|
||||
= "123def "
|
||||
# Only trim single whole matches that bookend the text:
|
||||
>> "AbcAbcxxxxxxxxAbcAbc":trim($/Abc/)
|
||||
= "AbcxxxxxxxxAbc"
|
||||
|
||||
>> "A=B=C=D":replace($/{..}={..}/, "1:(\1) 2:(\2)")
|
||||
= "1:(A) 2:(B=C=D)"
|
||||
|
||||
>> "abcde":starts_with("ab")
|
||||
= yes
|
||||
>> "abcde":starts_with("bc")
|
||||
@ -316,6 +223,16 @@ func main():
|
||||
>> "abcde":starts_with("cd")
|
||||
= no
|
||||
|
||||
>> "abcde":without_prefix("ab")
|
||||
= "cde"
|
||||
>> "abcde":without_suffix("ab")
|
||||
= "abcde"
|
||||
|
||||
>> "abcde":without_prefix("de")
|
||||
= "abcde"
|
||||
>> "abcde":without_suffix("de")
|
||||
= "abc"
|
||||
|
||||
>> ("hello" ++ " " ++ "Amélie"):reversed()
|
||||
= "eilémA olleh"
|
||||
|
||||
@ -387,3 +304,13 @@ func main():
|
||||
>> cowboy:middle_pad(4)
|
||||
= " 🤠 "
|
||||
|
||||
>> " one, ":trim(" ,")
|
||||
= "one"
|
||||
>> " one, ":trim(" ,", left=no)
|
||||
= " one"
|
||||
>> " one, ":trim(" ,", right=no)
|
||||
= "one, "
|
||||
>> " ":trim(" ,")
|
||||
= ""
|
||||
>> " ":trim(" ,", left=no)
|
||||
= ""
|
||||
|
Loading…
Reference in New Issue
Block a user