Move patterns into a module

This commit is contained in:
Bruce Hill 2025-04-01 14:05:10 -04:00
parent 7a2c99de74
commit 4d59fc2987
28 changed files with 743 additions and 631 deletions

View File

@ -11,12 +11,12 @@ where a different type of string is needed.
```tomo
lang HTML:
convert(t:Text -> HTML):
t = t:replace_all({
$/&/ = "&",
$/</ = "&lt;",
$/>/ = "&gt;",
$/"/ = "&quot",
$/'/ = "&#39;",
t = t:translate({
"&" = "&amp;",
"<" = "&lt;",
">" = "&gt;",
'"' = "&quot",
"'" = "&#39;",
})
return HTML.from_text(t)
@ -75,7 +75,7 @@ instead of building a global function called `execute()` that takes a
```tomo
lang Sh:
convert(text:Text -> Sh):
return Sh.from_text("'" ++ text:replace($/'/, "''") ++ "'")
return Sh.from_text("'" ++ text:replace("'", "''") ++ "'")
func execute(sh:Sh -> Text):
...
@ -94,7 +94,7 @@ another type's block or at the top level.
```tomo
lang Sh:
convert(text:Text -> Sh):
return Sh.from_text("'" ++ text:replace($/'/, "''") ++ "'")
return Sh.from_text("'" ++ text:replace("'", "''") ++ "'")
struct Foo(x,y:Int):
convert(f:Foo -> Sh):

View File

@ -273,43 +273,43 @@ pattern documentation](patterns.md) for more details.
- [`func as_c_string(text: Text -> CString)`](#as_c_string)
- [`func at(text: Text, index: Int -> Text)`](#at)
- [`func by_line(text: Text -> func(->Text?))`](#by_line)
- [`func by_match(text: Text, pattern: Pattern -> func(->Match?))`](#by_match)
- [`func by_split(text: Text, pattern: Pattern = $// -> func(->Text?))`](#by_split)
- [`func by_split(text: Text, delimiter: Text = "" -> func(->Text?))`](#by_split)
- [`func by_split_any(text: Text, delimiters: Text = " $\t\r\n" -> func(->Text?))`](#by_split_any)
- [`func bytes(text: Text -> [Byte])`](#bytes)
- [`func caseless_equals(a: Text, b:Text, language:Text = "C" -> Bool)`](#caseless_equals)
- [`func codepoint_names(text: Text -> [Text])`](#codepoint_names)
- [`func each(text: Text, pattern: Pattern, fn: func(m: Match), recursive: Bool = yes -> Int?)`](#each)
- [`func ends_with(text: Text, suffix: Text -> Bool)`](#ends_with)
- [`func find(text: Text, pattern: Pattern, start: Int = 1 -> Int?)`](#find)
- [`func find_all(text: Text, pattern: Pattern -> [Match])`](#find_all)
- [`func from(text: Text, first: Int -> Text)`](#from)
- [`func from_codepoint_names(codepoints: [Int32] -> [Text])`](#from_bytes)
- [`func from_bytes(codepoints: [Int32] -> [Text])`](#from_bytes)
- [`func from_c_string(str: CString -> Text)`](#from_c_string)
- [`func from_codepoint_names(codepoint_names: [Text] -> [Text])`](#from_codepoint_names)
- [`func from_codepoint_names(codepoints: [Int32] -> [Text])`](#from_codepoints)
- [`func has(text: Text, pattern: Pattern -> Bool)`](#has)
- [`func from_codepoints(codepoints: [Int32] -> [Text])`](#from_codepoints)
- [`func has(text: Text, target: Text -> Bool)`](#has)
- [`func join(glue: Text, pieces: [Text] -> Text)`](#join)
- [`func split(text: Text -> [Text])`](#lines)
- [`func split(text: Text, delimiter: Text = "" -> [Text])`](#split)
- [`func split_any(text: Text, delimiters: Text = " $\t\r\n" -> [Text])`](#split_any)
- [`func middle_pad(text: Text, width: Int, pad: Text = " ", language: Text = "C" -> Text)`](#middle_pad)
- [`func left_pad(text: Text, width: Int, pad: Text = " ", language: Text = "C" -> Text)`](#left_pad)
- [`func lines(text: Text, pattern: Pattern = "" -> [Text])`](#lines)
- [`func lines(text: Text -> [Text])`](#lines)
- [`func lower(text: Text, language: Text = "C" -> Text)`](#lower)
- [`func map(text: Text, pattern: Pattern, fn: func(text:Match)->Text -> Text, recursive: Bool = yes)`](#map)
- [`func matches(text: Text, pattern: Pattern -> [Text])`](#matches)
- [`func quoted(text: Text, color: Bool = no, quotation_mark: Text = '"' -> Text)`](#quoted)
- [`func repeat(text: Text, count:Int -> Text)`](#repeat)
- [`func replace(text: Text, pattern: Pattern, replacement: Text, backref: Pattern = $/\/, recursive: Bool = yes -> Text)`](#replace)
- [`func replace_all(replacements:{Pattern,Text}, backref: Pattern = $/\/, recursive: Bool = yes -> Text)`](#replace_all)
- [`func replace(text: Text, target: Text, replacement: Text -> Text)`](#replace)
- [`func reversed(text: Text -> Text)`](#reversed)
- [`func right_pad(text: Text, width: Int, pad: Text = " ", language: Text = "C" -> Text)`](#right_pad)
- [`func slice(text: Text, from: Int = 1, to: Int = -1 -> Text)`](#slice)
- [`func starts_with(text: Text, prefix: Text -> Bool)`](#starts_with)
- [`func title(text: Text, language: Text = "C" -> Text)`](#title)
- [`func to(text: Text, last: Int -> Text)`](#to)
- [`func trim(text: Text, pattern: Pattern = $/{whitespace/, trim_left: Bool = yes, trim_right: Bool = yes -> Text)`](#trim)
- [`func translate(translations:{Text,Text} -> Text)`](#translate)
- [`func trim(text: Text, to_trim: Text = " $\t\r\n", left: Bool = yes, right: Bool = yes -> Text)`](#trim)
- [`func upper(text: Text, language: Text "C" -> Text)`](#upper)
- [`func utf32_codepoints(text: Text -> [Int32])`](#utf32_codepoints)
- [`func width(text: Text -> Int)`](#width)
- [`func without_prefix(text: Text, prefix: Text -> Text)`](#without_prefix)
- [`func without_suffix(text: Text, suffix: Text -> Text)`](#without_suffix)
----------------
### `as_c_string`
Converts a `Text` value to a C-style string.
@ -411,24 +411,53 @@ for match in text:by_match($/{alpha}/):
### `by_split`
Returns an iterator function that can be used to iterate over text separated by
a pattern.
a delimiter.
**Note:** to split based on a set of delimiters, use [`by_split_any()`](#by_split_any).
```tomo
func by_split(text: Text, pattern: Pattern = $// -> func(->Text?))
func by_split(text: Text, delimiter: Text = "" -> func(->Text?))
```
- `text`: The text to be iterated over in pattern-delimited chunks.
- `pattern`: The [pattern](patterns.md) to split the text on.
- `delimiter`: An exact delimiter to use for splitting the text. If an empty text
is given, then each split will be the graphical clusters of the text.
**Returns:**
An iterator function that returns one chunk of text at a time, separated by the
given pattern, until it runs out and returns `none`. **Note:** using an empty
pattern (the default) will iterate over single grapheme clusters in the text.
given delimiter, until it runs out and returns `none`. **Note:** using an empty
delimiter (the default) will iterate over single grapheme clusters in the text.
**Example:**
```tomo
text := "one,two,three"
for chunk in text:by_split($/,/):
for chunk in text:by_split(","):
# Prints: "one" then "two" then "three":
say(chunk)
```
---
### `by_split_any`
Returns an iterator function that can be used to iterate over text separated by
one or more characters (grapheme clusters) from a given text of delimiters.
**Note:** to split based on an exact delimiter, use [`by_split()`](#by_split).
```tomo
func by_split_any(text: Text, delimiters: Text = " $\t\r\n" -> func(->Text?))
```
- `text`: The text to be iterated over in pattern-delimited chunks.
- `delimiters`: An text containing multiple delimiter characters (grapheme clusters)
to use for splitting the text.
**Returns:**
An iterator function that returns one chunk of text at a time, separated by the
given delimiter characters, until it runs out and returns `none`.
**Example:**
```tomo
text := "one,two,;,three"
for chunk in text:by_split_any(",;"):
# Prints: "one" then "two" then "three":
say(chunk)
```
@ -628,7 +657,7 @@ func from(text: Text, first: Int -> Text)
The text from the given grapheme cluster to the end of the text. Note: a
negative index counts backwards from the end of the text, so `-1` refers to the
last cluster, `-2` the second-to-last, etc. Slice ranges will be truncated to
the length of the string.
the length of the text.
**Example:**
```tomo
@ -647,10 +676,10 @@ text will be normalized, so the resulting text's UTF8 bytes may not exactly
match the input.
```tomo
func from_codepoint_names(codepoints: [Int32] -> [Text])
func from_bytes(bytes: [Byte] -> [Text])
```
- `codepoints`: The UTF32 codepoints in the desired text.
- `bytes`: The UTF-8 bytes of the desired text.
**Returns:**
A new text based on the input UTF8 bytes after normalization has been applied.
@ -717,7 +746,7 @@ the text will be normalized, so the resulting text's codepoints may not exactly
match the input codepoints.
```tomo
func from_codepoint_names(codepoints: [Int32] -> [Text])
func from_codepoints(codepoints: [Int32] -> [Text])
```
- `codepoints`: The UTF32 codepoints in the desired text.
@ -734,28 +763,24 @@ A new text with the specified codepoints after normalization has been applied.
---
### `has`
Checks if the `Text` contains a target [pattern](patterns.md).
Checks if the `Text` contains some target text.
```tomo
func has(text: Text, pattern: Pattern -> Bool)
func has(text: Text, target: Text -> Bool)
```
- `text`: The text to be searched.
- `pattern`: The [pattern](patterns.md) to search for.
- `target`: The text to search for.
**Returns:**
`yes` if the target pattern is found, `no` otherwise.
`yes` if the target text is found, `no` otherwise.
**Example:**
```tomo
>> "hello world":has($/wo/)
>> "hello world":has("wo")
= yes
>> "hello world":has($/{alpha}/)
= yes
>> "hello world":has($/{digit}/)
>> "hello world":has("xxx")
= no
>> "hello world":has($/{start}he/)
= yes
```
---
@ -888,63 +913,8 @@ The lowercase version of the text.
---
### `map`
For each occurrence of the given [pattern](patterns.md), replace the text with
the result of calling the given function on that match.
```tomo
func map(text: Text, pattern: Pattern, fn: func(text:Match)->Text -> Text, recursive: Bool = yes)
```
- `text`: The text to be searched.
- `pattern`: The [pattern](patterns.md) to search for.
- `fn`: The function to apply to each match.
- `recursive`: Whether to recursively map `fn` to each of the captures of the
pattern before handing them to `fn`.
**Returns:**
The text with the matching parts replaced with the result of applying the given
function to each.
**Example:**
```tomo
>> "hello world":map($/world/, func(m:Match): m.text:upper())
= "hello WORLD"
>> "Some nums: 1 2 3 4":map($/{int}/, func(m:Match): "$(Int.parse(m.text)! + 10)")
= "Some nums: 11 12 13 14"
```
---
### `matches`
Checks if the `Text` matches target [pattern](patterns.md) and returns an array
of the matching text captures or a null value if the entire text doesn't match
the pattern.
```tomo
func matches(text: Text, pattern: Pattern -> [Text])
```
- `text`: The text to be searched.
- `pattern`: The [pattern](patterns.md) to search for.
**Returns:**
An array of the matching text captures if the entire text matches the pattern,
or a null value otherwise.
**Example:**
```tomo
>> "hello world":matches($/{id}/)
= none : [Text]?
>> "hello world":matches($/{id} {id}/)
= ["hello", "world"] : [Text]?
```
---
### `quoted`
Formats the text as a quoted string.
Formats the text with quotation marks and escapes.
```tomo
func quoted(text: Text, color: Bool = no, quotation_mark: Text = '"' -> Text)
@ -955,7 +925,7 @@ func quoted(text: Text, color: Bool = no, quotation_mark: Text = '"' -> Text)
- `quotation_mark`: The quotation mark to use (default is `"`).
**Returns:**
The text formatted as a quoted string.
The text formatted as a quoted text.
**Example:**
```tomo
@ -987,106 +957,23 @@ The text repeated the given number of times.
---
### `replace`
Replaces occurrences of a [pattern](patterns.md) in the text with a replacement
string.
Replaces occurrences of a target text with a replacement text.
```tomo
func replace(text: Text, pattern: Pattern, replacement: Text, backref: Pattern = $/\/, recursive: Bool = yes -> Text)
func replace(text: Text, target: Text, replacement: Text -> Text)
```
- `text`: The text in which to perform replacements.
- `pattern`: The [pattern](patterns.md) to be replaced.
- `replacement`: The text to replace the pattern with.
- `backref`: If non-empty, the replacement text will have occurrences of this
pattern followed by a number replaced with the corresponding backreference.
By default, the backreference pattern is a single backslash, so
backreferences look like `\0`, `\1`, etc.
- `recursive`: For backreferences of a nested capture, if recursive is set to
`yes`, then the whole replacement will be reapplied recursively to the
backreferenced text if it's used in the replacement.
**Backreferences**
If a backreference pattern is in the replacement, then that backreference is
replaced with the corresponding group from the matching text. Backreference
`0` is the entire matching text, backreference `1` is the first matched group,
and so on. Literal text is not captured for backreferences, only named group
captures (`{foo}`), quoted captures (`"?"`), and nested group captures (`(?)`).
For quoted and nested group captures, the backreference refers to the *inside*
of the capture without the enclosing punctuation.
If you need to insert a digit immediately after a backreference, you can use an
optional semicolon: `\1;2` (backref 1, followed by the replacement text`"2"`).
- `target`: The target text to be replaced.
- `replacement`: The text to replace the target with.
**Returns:**
The text with occurrences of the pattern replaced.
The text with occurrences of the target replaced.
**Example:**
```tomo
>> "Hello world":replace($/world/, "there")
>> "Hello world":replace("world", "there")
= "Hello there"
>> "Hello world":replace($/{id}/, "xxx")
= "xxx xxx"
>> "Hello world":replace($/{id}/, "\0")
= "(Hello) (world)"
>> "Hello world":replace($/{id}/, "(@0)", backref=$/@/)
= "(Hello) (world)"
>> "Hello world":replace($/{id} {id}/, "just \2")
= "just world"
# Recursive is the default behavior:
>> " BAD(x, BAD(y), z) ":replace($/BAD(?)/, "good(\1)", recursive=yes)
= " good(x, good(y), z) "
>> " BAD(x, BAD(y), z) ":replace($/BAD(?)/, "good(\1)", recursive=no)
= " good(x, BAD(y), z) "
```
---
### `replace_all`
Takes a table mapping [patterns](patterns.md) to replacement texts and performs
all the replacements in the table on the whole text. At each position, the
first matching pattern's replacement is applied and the pattern matching moves
on to *after* the replacement text, so replacement text is not recursively
modified. See [`replace()`](#replace) for more information about replacement
behavior.
```tomo
func replace_all(replacements:{Pattern,Text}, backref: Pattern = $/\/, recursive: Bool = yes -> Text)
```
- `text`: The text in which to perform replacements.
- `replacements`: A table mapping from [pattern](patterns.md) to the
replacement text associated with that pattern.
- `backref`: If non-empty, the replacement text will have occurrences of this
pattern followed by a number replaced with the corresponding backreference.
By default, the backreference pattern is a single backslash, so
backreferences look like `\0`, `\1`, etc.
- `recursive`: For backreferences of a nested capture, if recursive is set to
`yes`, then the matching replacement will be reapplied recursively to the
backreferenced text if it's used in the replacement.
**Returns:**
The text with all occurrences of the patterns replaced with their corresponding
replacement text.
**Example:**
```tomo
>> "A <tag> & an amperand":replace_all({
$/&/ = "&amp;",
$/</ = "&lt;",
$/>/ = "&gt;",
$/"/ = "&quot",
$/'/ = "&#39;",
}
= "A &lt;tag&gt; &amp; an ampersand"
>> "Hello":replace_all({$/{lower}/="[\0]", $/{upper}/="{\0}"})
= "{H}[ello]"
```
---
@ -1153,7 +1040,7 @@ func slice(text: Text, from: Int = 1, to: Int = -1 -> Text)
The text that spans the given grapheme cluster indices. Note: a negative index
counts backwards from the end of the text, so `-1` refers to the last cluster,
`-2` the second-to-last, etc. Slice ranges will be truncated to the length of
the string.
the text.
**Example:**
```tomo
@ -1170,32 +1057,51 @@ the string.
---
### `split`
Splits the text into an array of substrings based on a [pattern](patterns.md).
Splits the text into an array of substrings based on exact matches of a delimiter.
**Note:** to split based on a set of delimiter characters, use [`split_any()`](#split_any).
```tomo
func split(text: Text, pattern: Pattern = "" -> [Text])
func split(text: Text, delimiter: Text = "" -> [Text])
```
- `text`: The text to be split.
- `pattern`: The [pattern](patterns.md) used to split the text. If the pattern
is the empty string, the text will be split into individual grapheme clusters.
- `delimiter`: The delimiter used to split the text. If the delimiter is the
empty text, the text will be split into individual grapheme clusters.
**Returns:**
An array of substrings resulting from the split.
An array of subtexts resulting from the split.
**Example:**
```tomo
>> "one,two,three":split($/,/)
= ["one", "two", "three"]
>> "one,two,,three":split(",")
= ["one", "two", "", "three"]
>> "abc":split()
= ["a", "b", "c"]
```
>> "a b c":split($/{space}/)
= ["a", "b", "c"]
---
>> "a,b,c,":split($/,/)
= ["a", "b", "c", ""]
### `split_any`
Splits the text into an array of substrings at one or more occurrences of a set
of delimiter characters (grapheme clusters).
**Note:** to split based on an exact delimiter, use [`split()`](#split).
```tomo
func split_any(text: Text, delimiters: Text = " $\t\r\n" -> [Text])
```
- `text`: The text to be split.
- `delimiters`: A text containing multiple delimiters to be used for
splitting the text into chunks.
**Returns:**
An array of subtexts resulting from the split.
**Example:**
```tomo
>> "one, two,,three":split_any(", ")
= ["one", "two", "three"]
```
---
@ -1260,7 +1166,7 @@ func to(text: Text, last: Int -> Text)
The text up to and including the given grapheme cluster. Note: a negative index
counts backwards from the end of the text, so `-1` refers to the last cluster,
`-2` the second-to-last, etc. Slice ranges will be truncated to the length of
the string.
the text.
**Example:**
```tomo
@ -1273,30 +1179,62 @@ the string.
---
### `trim`
Trims the matching [pattern](patterns.md) from the left and/or right side of the text.
### `translate`
Takes a table mapping target texts to their replacements and performs all the
replacements in the table on the whole text. At each position, the first
matching replacement is applied and the matching moves on to *after* the
replacement text, so replacement text is not recursively modified. See
[`replace()`](#replace) for more information about replacement behavior.
```tomo
func trim(text: Text, pattern: Pattern = $/{whitespace/, trim_left: Bool = yes, trim_right: Bool = yes -> Text)
func translate(translations:{Pattern,Text} -> Text)
```
- `text`: The text in which to perform replacements.
- `translations`: A table mapping from target text to its replacement.
**Returns:**
The text with all occurrences of the patterns replaced with their corresponding
replacement text.
**Example:**
```tomo
>> "A <tag> & an amperand":translate({
"&" = "&amp;",
"<" = "&lt;",
">" = "&gt;",
'"" = "&quot",
"'" = "&#39;",
}
= "A &lt;tag&gt; &amp; an ampersand"
```
---
### `trim`
Trims the given characters (grapheme clusters) from the left and/or right side of the text.
```tomo
func trim(text: Text, to_trim: Text = " $\t\r\n", left: Bool = yes, right: Bool = yes -> Text)
```
- `text`: The text to be trimmed.
- `pattern`: The [pattern](patterns.md) that will be trimmed away.
- `trim_left`: Whether or not to trim from the front of the text.
- `trim_right`: Whether or not to trim from the back of the text.
- `to_trim`: The characters to remove from the left/right of the text.
- `left`: Whether or not to trim from the front of the text.
- `right`: Whether or not to trim from the back of the text.
**Returns:**
The text without the trim pattern at either end.
The text without the trim characters at either end.
**Example:**
```tomo
>> " x y z $(\n)":trim()
= "x y z"
>> "abc123def":trim($/{!digit}/)
= "123"
>> "one,":trim(",")
= "one"
>> " xyz ":trim(trim_right=no)
>> " xyz ":trim(right=no)
= "xyz "
```
@ -1371,3 +1309,51 @@ An integer representing the display width of the text.
>> "🤠":width()
= 2
```
---
### `without_prefix`
Returns the text with a given prefix removed (if present).
```tomo
func without_prefix(text: Text, prefix: Text -> Text)
```
- `text`: The text to remove the prefix from.
- `prefix`: The prefix to remove.
**Returns:**
A text without the given prefix (if present) or the unmodified text if the
prefix is not present.
**Example:**
```tomo
>> "foo:baz":without_prefix("foo:")
= "baz"
>> "qux":without_prefix("foo:")
= "qux"
```
---
### `without_suffix`
Returns the text with a given suffix removed (if present).
```tomo
func without_suffix(text: Text, suffix: Text -> Text)
```
- `text`: The text to remove the suffix from.
- `suffix`: The suffix to remove.
**Returns:**
A text without the given suffix (if present) or the unmodified text if the
suffix is not present.
**Example:**
```tomo
>> "baz.foo":without_suffix(".foo")
= "baz"
>> "qux":without_suffix(".foo")
= "qux"
```

View File

@ -62,32 +62,3 @@ $Colorful"
We have @(green,bold:colors)!
":print()
```
You can very easily introduce your own syntax highlighting for a custom DSL:
```tomo
lang Markdown:
func Colorful(md:Markdown -> Colorful):
text := md.text:replace_all({
$/@/="@(at)",
$/(/="@(lparen)",
$/)/="@(rparen)",
$/**{..}**/="@(b:\1)",
$/*{..}*/="@(i:\1)",
$/[?](?)/="@(blue,underline:\1)",
})
return Colorful.from_text(text)
func colorful(md:Markdown -> Colorful):
return $Colorful"$md"
...
md := $Markdown"
This is [a link with **bold** inside](example.com)!
"
>> colorful := md:colorful()
= $Colorful"This is @(blue,underline:a link with @(b:bold) inside)!"
>> colorful:for_terminal()
= "$\e[mThis is $\e[4;34ma link with $\e[1mbold$\e[22m inside$\e[24;39m!"
colorful:print()
```

View File

@ -8,7 +8,7 @@ CSI := "$\033["
lang Colorful:
convert(text:Text -> Colorful):
text = text:replace_all({$/@/="@(at)", $/(/="@(lparen)", $/)/="@(rparen)"})
text = text:translate({"@"="@(at)", "("="@(lparen)", ")"="@(rparen)"})
return Colorful.from_text(text)
convert(i:Int -> Colorful): return Colorful.from_text("$i")

View File

@ -68,8 +68,8 @@ struct World(player:@Player, goal:@Box, boxes:@[@Box], dt_accum=Num32(0.0), won=
DrawText(CString("WINNER"), GetScreenWidth()/Int32(2)-Int32(48*3), GetScreenHeight()/Int32(2)-Int32(24), 48, Color(0,0,0))
func load_map(w:@World, map:Text):
if map:has($/[]/):
map = map:replace_all({$/[]/="#", $/@{1..}/="@", $/ /=" "})
if map:has("[]"):
map = map:translate({"[]"="#", "@ "="@", " "=" "})
w.boxes = @[:@Box]
box_size := Vector2(50., 50.)
for y,line in map:lines():

View File

@ -0,0 +1,8 @@
#pragma once
typedef struct {
Text_t text;
Int_t index;
Array_t captures;
} XMatch;

View File

@ -2,21 +2,28 @@
#include <ctype.h>
#include <sys/param.h>
#include <tomo/tomo.h>
#include <unictype.h>
#include <uniname.h>
#include <unistring/version.h>
#include "arrays.h"
#include "integers.h"
#include "optionals.h"
#include "patterns.h"
#include "structs.h"
#include "tables.h"
#include "text.h"
#include "types.h"
#define MAX_BACKREFS 100
typedef struct {
Text_t text;
Int_t index;
Array_t captures;
} PatternMatch;
typedef struct {
Text_t text;
Int_t index;
Array_t captures;
bool is_none:1;
} OptionalPatternMatch;
#define NONE_MATCH ((OptionalPatternMatch){.is_none=true})
typedef struct {
int64_t index, length;
bool occupied, recursive;
@ -35,7 +42,7 @@ typedef struct {
};
} pat_t;
static Text_t Text$replace_array(Text_t text, Array_t replacements, Text_t backref_pat, bool recursive);
static Text_t replace_array(Text_t text, Array_t replacements, Text_t backref_pat, bool recursive);
static INLINE void skip_whitespace(TextIter_t *state, int64_t *i)
{
@ -673,7 +680,7 @@ static pat_t parse_next_pat(TextIter_t *state, int64_t *index)
}
}
static int64_t match(Text_t text, int64_t text_index, Pattern_t pattern, int64_t pattern_index, capture_t *captures, int64_t capture_index)
static int64_t match(Text_t text, int64_t text_index, Text_t pattern, int64_t pattern_index, capture_t *captures, int64_t capture_index)
{
if (pattern_index >= pattern.length) // End of the pattern
return 0;
@ -773,7 +780,7 @@ static int64_t match(Text_t text, int64_t text_index, Pattern_t pattern, int64_t
#undef EAT2
#undef EAT_MANY
static int64_t _find(Text_t text, Pattern_t pattern, int64_t first, int64_t last, int64_t *match_length, capture_t *captures)
static int64_t _find(Text_t text, Text_t pattern, int64_t first, int64_t last, int64_t *match_length, capture_t *captures)
{
int32_t first_grapheme = Text$get_grapheme(pattern, 0);
bool find_first = (first_grapheme != '{'
@ -800,7 +807,7 @@ static int64_t _find(Text_t text, Pattern_t pattern, int64_t first, int64_t last
return -1;
}
public OptionalMatch_t Text$find(Text_t text, Pattern_t pattern, Int_t from_index)
static OptionalPatternMatch find(Text_t text, Text_t pattern, Int_t from_index)
{
int64_t first = Int64$from_int(from_index, false);
if (first == 0) fail("Invalid index: 0");
@ -819,14 +826,14 @@ public OptionalMatch_t Text$find(Text_t text, Pattern_t pattern, Int_t from_inde
Text_t capture = Text$slice(text, I(captures[i].index+1), I(captures[i].index+captures[i].length));
Array$insert(&capture_array, &capture, I(0), sizeof(Text_t));
}
return (OptionalMatch_t){
return (OptionalPatternMatch){
.text=Text$slice(text, I(found+1), I(found+len)),
.index=I(found+1),
.captures=capture_array,
};
}
PUREFUNC public bool Text$has(Text_t text, Pattern_t pattern)
PUREFUNC static bool Pattern$has(Text_t text, Text_t pattern)
{
if (Text$starts_with(pattern, Text("{start}"))) {
int64_t m = match(text, 0, pattern, 0, NULL, 0);
@ -844,7 +851,7 @@ PUREFUNC public bool Text$has(Text_t text, Pattern_t pattern)
}
}
public OptionalArray_t Text$matches(Text_t text, Pattern_t pattern)
static OptionalArray_t Pattern$matches(Text_t text, Text_t pattern)
{
capture_t captures[MAX_BACKREFS] = {};
int64_t match_len = match(text, 0, pattern, 0, captures, 0);
@ -859,18 +866,18 @@ public OptionalArray_t Text$matches(Text_t text, Pattern_t pattern)
return capture_array;
}
public Array_t Text$find_all(Text_t text, Pattern_t pattern)
static Array_t Pattern$find_all(Text_t text, Text_t pattern)
{
if (pattern.length == 0) // special case
return (Array_t){.length=0};
Array_t matches = {};
for (int64_t i = 1; ; ) {
OptionalMatch_t m = Text$find(text, pattern, I(i));
if (!m.index.small)
OptionalPatternMatch m = find(text, pattern, I(i));
if (m.is_none)
break;
i = Int64$from_int(m.index, false) + m.text.length;
Array$insert(&matches, &m, I_small(0), sizeof(Match_t));
Array$insert(&matches, &m, I_small(0), sizeof(PatternMatch));
}
return matches;
}
@ -878,23 +885,23 @@ public Array_t Text$find_all(Text_t text, Pattern_t pattern)
typedef struct {
TextIter_t state;
Int_t i;
Pattern_t pattern;
Text_t pattern;
} match_iter_state_t;
static OptionalMatch_t next_match(match_iter_state_t *state)
static OptionalPatternMatch next_match(match_iter_state_t *state)
{
if (Int64$from_int(state->i, false) > state->state.stack[0].text.length)
return NONE_MATCH;
OptionalMatch_t m = Text$find(state->state.stack[0].text, state->pattern, state->i);
if (m.index.small == 0) // No match
OptionalPatternMatch m = find(state->state.stack[0].text, state->pattern, state->i);
if (m.is_none) // No match
state->i = I(state->state.stack[0].text.length + 1);
else
state->i = Int$plus(m.index, I(MAX(1, m.text.length)));
return m;
}
public Closure_t Text$by_match(Text_t text, Pattern_t pattern)
static Closure_t Pattern$by_match(Text_t text, Text_t pattern)
{
return (Closure_t){
.fn=(void*)next_match,
@ -902,7 +909,7 @@ public Closure_t Text$by_match(Text_t text, Pattern_t pattern)
};
}
static Text_t apply_backrefs(Text_t text, Array_t recursive_replacements, Text_t replacement, Pattern_t backref_pat, capture_t *captures)
static Text_t apply_backrefs(Text_t text, Array_t recursive_replacements, Text_t replacement, Text_t backref_pat, capture_t *captures)
{
if (backref_pat.length == 0)
return replacement;
@ -946,7 +953,7 @@ static Text_t apply_backrefs(Text_t text, Array_t recursive_replacements, Text_t
Text_t backref_text = Text$slice(text, I(captures[backref].index+1), I(captures[backref].index + captures[backref].length));
if (captures[backref].recursive && recursive_replacements.length > 0)
backref_text = Text$replace_array(backref_text, recursive_replacements, backref_pat, true);
backref_text = replace_array(backref_text, recursive_replacements, backref_pat, true);
if (pos > nonmatching_pos) {
Text_t before_slice = Text$slice(replacement, I(nonmatching_pos+1), I(pos));
@ -965,7 +972,7 @@ static Text_t apply_backrefs(Text_t text, Array_t recursive_replacements, Text_t
return ret;
}
public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, Pattern_t backref_pat, bool recursive)
static Text_t Pattern$replace(Text_t text, Text_t pattern, Text_t replacement, Text_t backref_pat, bool recursive)
{
Text_t ret = EMPTY_TEXT;
@ -1018,7 +1025,7 @@ public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement, P
return ret;
}
public Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right)
static Text_t Pattern$trim(Text_t text, Text_t pattern, bool trim_left, bool trim_right)
{
int64_t first = 0, last = text.length-1;
if (trim_left) {
@ -1037,7 +1044,7 @@ public Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool tri
return Text$slice(text, I(first+1), I(last+1));
}
public Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn, bool recursive)
static Text_t Pattern$map(Text_t text, Text_t pattern, Closure_t fn, bool recursive)
{
Text_t ret = EMPTY_TEXT;
@ -1049,7 +1056,7 @@ public Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn, bool recurs
TextIter_t text_state = NEW_TEXT_ITER_STATE(text);
int64_t nonmatching_pos = 0;
Text_t (*text_mapper)(Match_t, void*) = fn.fn;
Text_t (*text_mapper)(PatternMatch, void*) = fn.fn;
for (int64_t pos = 0; pos < text.length; pos++) {
// Optimization: quickly skip ahead to first char in pattern:
if (find_first) {
@ -1061,7 +1068,7 @@ public Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn, bool recurs
int64_t match_len = match(text, pos, pattern, 0, captures, 0);
if (match_len < 0) continue;
Match_t m = {
PatternMatch m = {
.text=Text$slice(text, I(pos+1), I(pos+match_len)),
.index=I(pos+1),
.captures={},
@ -1069,7 +1076,7 @@ public Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn, bool recurs
for (int i = 0; captures[i].occupied; i++) {
Text_t capture = Text$slice(text, I(captures[i].index+1), I(captures[i].index+captures[i].length));
if (recursive)
capture = Text$map(capture, pattern, fn, recursive);
capture = Pattern$map(capture, pattern, fn, recursive);
Array$insert(&m.captures, &capture, I(0), sizeof(Text_t));
}
@ -1090,7 +1097,7 @@ public Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn, bool recurs
return ret;
}
public void Text$each(Text_t text, Pattern_t pattern, Closure_t fn, bool recursive)
static void Pattern$each(Text_t text, Text_t pattern, Closure_t fn, bool recursive)
{
int32_t first_grapheme = Text$get_grapheme(pattern, 0);
bool find_first = (first_grapheme != '{'
@ -1098,7 +1105,7 @@ public void Text$each(Text_t text, Pattern_t pattern, Closure_t fn, bool recursi
&& !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
TextIter_t text_state = NEW_TEXT_ITER_STATE(text);
void (*action)(Match_t, void*) = fn.fn;
void (*action)(PatternMatch, void*) = fn.fn;
for (int64_t pos = 0; pos < text.length; pos++) {
// Optimization: quickly skip ahead to first char in pattern:
if (find_first) {
@ -1110,7 +1117,7 @@ public void Text$each(Text_t text, Pattern_t pattern, Closure_t fn, bool recursi
int64_t match_len = match(text, pos, pattern, 0, captures, 0);
if (match_len < 0) continue;
Match_t m = {
PatternMatch m = {
.text=Text$slice(text, I(pos+1), I(pos+match_len)),
.index=I(pos+1),
.captures={},
@ -1118,7 +1125,7 @@ public void Text$each(Text_t text, Pattern_t pattern, Closure_t fn, bool recursi
for (int i = 0; captures[i].occupied; i++) {
Text_t capture = Text$slice(text, I(captures[i].index+1), I(captures[i].index+captures[i].length));
if (recursive)
Text$each(capture, pattern, fn, recursive);
Pattern$each(capture, pattern, fn, recursive);
Array$insert(&m.captures, &capture, I(0), sizeof(Text_t));
}
@ -1127,7 +1134,7 @@ public void Text$each(Text_t text, Pattern_t pattern, Closure_t fn, bool recursi
}
}
Text_t Text$replace_array(Text_t text, Array_t replacements, Text_t backref_pat, bool recursive)
Text_t replace_array(Text_t text, Array_t replacements, Text_t backref_pat, bool recursive)
{
if (replacements.length == 0) return text;
@ -1137,7 +1144,7 @@ Text_t Text$replace_array(Text_t text, Array_t replacements, Text_t backref_pat,
for (int64_t pos = 0; pos < text.length; ) {
// Find the first matching pattern at this position:
for (int64_t i = 0; i < replacements.length; i++) {
Pattern_t pattern = *(Pattern_t*)(replacements.data + i*replacements.stride);
Text_t pattern = *(Text_t*)(replacements.data + i*replacements.stride);
capture_t captures[MAX_BACKREFS] = {};
int64_t len = match(text, pos, pattern, 0, captures, 1);
if (len < 0) continue;
@ -1171,12 +1178,12 @@ Text_t Text$replace_array(Text_t text, Array_t replacements, Text_t backref_pat,
return ret;
}
public Text_t Text$replace_all(Text_t text, Table_t replacements, Text_t backref_pat, bool recursive)
static Text_t Pattern$replace_all(Text_t text, Table_t replacements, Text_t backref_pat, bool recursive)
{
return Text$replace_array(text, replacements.entries, backref_pat, recursive);
return replace_array(text, replacements.entries, backref_pat, recursive);
}
public Array_t Text$split(Text_t text, Pattern_t pattern)
static Array_t Pattern$split(Text_t text, Text_t pattern)
{
if (text.length == 0) // special case
return (Array_t){.length=0};
@ -1207,7 +1214,7 @@ public Array_t Text$split(Text_t text, Pattern_t pattern)
typedef struct {
TextIter_t state;
int64_t i;
Pattern_t pattern;
Text_t pattern;
} split_iter_state_t;
static OptionalText_t next_split(split_iter_state_t *state)
@ -1243,7 +1250,7 @@ static OptionalText_t next_split(split_iter_state_t *state)
}
}
public Closure_t Text$by_split(Text_t text, Pattern_t pattern)
static Closure_t Pattern$by_split(Text_t text, Text_t pattern)
{
return (Closure_t){
.fn=(void*)next_split,
@ -1251,7 +1258,7 @@ public Closure_t Text$by_split(Text_t text, Pattern_t pattern)
};
}
public Pattern_t Pattern$escape_text(Text_t text)
static Text_t Pattern$escape_text(Text_t text)
{
// TODO: optimize for spans of non-escaped text
Text_t ret = EMPTY_TEXT;
@ -1276,62 +1283,9 @@ static Text_t Pattern$as_text(const void *obj, bool colorize, const TypeInfo_t *
(void)info;
if (!obj) return Text("Pattern");
Pattern_t pat = *(Pattern_t*)obj;
Text_t quote = Text$has(pat, Pattern("/")) && !Text$has(pat, Pattern("|")) ? Text("|") : Text("/");
return Text$concat( colorize ? Text("\x1b[1m$\033[m") : Text("$"), Text$quoted(pat, colorize, quote));
Text_t pat = *(Text_t*)obj;
Text_t quote = Pattern$has(pat, Text("/")) && !Pattern$has(pat, Text("|")) ? Text("|") : Text("/");
return Text$concat(colorize ? Text("\x1b[1m$\033[m") : Text("$"), Text$quoted(pat, colorize, quote));
}
public const TypeInfo_t Pattern$info = {
.size=sizeof(Pattern_t),
.align=__alignof__(Pattern_t),
.tag=TextInfo,
.TextInfo={.lang="Pattern"},
.metamethods={
.as_text=Pattern$as_text,
.hash=Text$hash,
.compare=Text$compare,
.equal=Text$equal,
.is_none=Text$is_none,
.serialize=Text$serialize,
.deserialize=Text$deserialize,
},
};
static const TypeInfo_t _text_array = {
.size=sizeof(Array_t),
.align=__alignof__(Array_t),
.tag=ArrayInfo,
.ArrayInfo.item=&Text$info,
.metamethods=Array$metamethods,
};
static NamedType_t _match_fields[3] = {
{"text", &Text$info},
{"index", &Int$info},
{"captures", &_text_array},
};
static bool Match$is_none(const void *m, const TypeInfo_t*)
{
return ((OptionalMatch_t*)m)->index.small == 0;
}
public const TypeInfo_t Match$info = {
.size=sizeof(Match_t),
.align=__alignof__(Match_t),
.tag=StructInfo,
.StructInfo={
.name="Match",
.num_fields=3,
.fields=_match_fields,
},
.metamethods={
.as_text=Struct$as_text,
.hash=Struct$hash,
.compare=Struct$compare,
.equal=Struct$equal,
.is_none=Match$is_none,
},
};
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0

View File

@ -0,0 +1,58 @@
use ./patterns.c
struct PatternMatch(text:Text, index:Int, captures:[Text])
lang P:
convert(text:Text -> P):
return inline C : P { Pattern$escape_text(_$text); }
convert(n:Int -> P):
return P.from_text("$n")
extend Text:
func matches(text:Text, pattern:P -> [Text]?):
return inline C : [Text]? { Pattern$matches(_$text, _$pattern); }
func pat_replace(text:Text, pattern:P, replacement:Text, backref="@", recursive=yes -> Text):
return inline C : Text { Pattern$replace(_$text, _$pattern, _$replacement, _$backref, _$recursive); }
func pat_replace_all(text:Text, replacements:{P,Text}, backref="@", recursive=yes -> Text):
return inline C : Text { Pattern$replace_all(_$text, _$replacements, _$backref, _$recursive); }
func has(text:Text, pattern:P -> Bool):
return inline C : Bool { Pattern$has(_$text, _$pattern); }
func find_all(text:Text, pattern:P -> [PatternMatch]):
return inline C : [PatternMatch] { Pattern$find_all(_$text, _$pattern); }
func by_match(text:Text, pattern:P -> func(->PatternMatch?)):
return inline C : func(->PatternMatch?) { Pattern$by_match(_$text, _$pattern); }
func each(text:Text, pattern:P, fn:func(m:PatternMatch), recursive=yes):
inline C { Pattern$each(_$text, _$pattern, _$fn, _$recursive); }
func map(text:Text, pattern:P, fn:func(m:PatternMatch -> Text), recursive=yes -> Text):
return inline C : Text { Pattern$map(_$text, _$pattern, _$fn, _$recursive); }
func split(text:Text, pattern:P -> [Text]):
return inline C : [Text] { Pattern$split(_$text, _$pattern); }
func by_split(text:Text, pattern:P -> func(->Text?)):
return inline C : func(->Text?) { Pattern$by_split(_$text, _$pattern); }
func trim(text:Text, pattern:P, trim_left=yes, trim_right=yes -> Text):
return inline C : Text { Pattern$trim(_$text, _$pattern, _$trim_left, _$trim_right); }
func trim_left(text:Text, pattern:P -> Text):
return text:trim(pattern, trim_left=yes, trim_right=no)
func trim_right(text:Text, pattern:P -> Text):
return text:trim(pattern, trim_left=no, trim_right=yes)
func main():
>> "hello world":pat_replace($P/{id}/, "XXX")
>> "hello world":find_all($P/l/)
for m in "hello one two three":by_match($P/{id}/):
>> m

View File

@ -165,6 +165,7 @@ CORD ast_to_xml(ast_t *ast)
T(Use, "<Use>%r%r</Use>", optional_tagged("var", data.var), xml_escape(data.path))
T(InlineCCode, "<InlineCode>%r</InlineCode>", xml_escape(data.code))
T(Deserialize, "<Deserialize><type>%r</type>%r</Deserialize>", type_ast_to_xml(data.type), ast_to_xml(data.value))
T(Extend, "<Extend name=\"%s\">%r</Extend>", data.name, ast_to_xml(data.body))
default: return "???";
#undef T
}

View File

@ -143,6 +143,7 @@ typedef enum {
Use,
InlineCCode,
Deserialize,
Extend,
} ast_e;
struct ast_s {
@ -331,6 +332,10 @@ struct ast_s {
ast_t *value;
type_ast_t *type;
} Deserialize;
struct {
const char *name;
ast_t *body;
} Extend;
} __data;
};

View File

@ -15,7 +15,6 @@
#include "stdlib/integers.h"
#include "stdlib/nums.h"
#include "stdlib/paths.h"
#include "stdlib/patterns.h"
#include "stdlib/text.h"
#include "stdlib/util.h"
#include "structs.h"
@ -39,7 +38,7 @@ static CORD compile_string_literal(CORD literal);
CORD promote_to_optional(type_t *t, CORD code)
{
if (t == PATH_TYPE || t == PATH_TYPE_TYPE || t == MATCH_TYPE) {
if (t == PATH_TYPE || t == PATH_TYPE_TYPE) {
return code;
} else if (t->tag == IntType) {
switch (Match(t, IntType)->bits) {
@ -442,7 +441,7 @@ static void add_closed_vars(Table_t *closed_vars, env_t *enclosing_scope, env_t
add_closed_vars(closed_vars, enclosing_scope, env, Match(ast, Deserialize)->value);
break;
}
case Use: case FunctionDef: case ConvertDef: case StructDef: case EnumDef: case LangDef: {
case Use: case FunctionDef: case ConvertDef: case StructDef: case EnumDef: case LangDef: case Extend: {
errx(1, "Definitions should not be reachable in a closure.");
}
default:
@ -497,7 +496,6 @@ PUREFUNC CORD compile_unsigned_type(type_t *t)
CORD compile_type(type_t *t)
{
if (t == RNG_TYPE) return "RNG_t";
else if (t == MATCH_TYPE) return "Match_t";
else if (t == PATH_TYPE) return "Path_t";
else if (t == PATH_TYPE_TYPE) return "PathType_t";
@ -516,8 +514,6 @@ CORD compile_type(type_t *t)
auto text = Match(t, TextType);
if (!text->lang || streq(text->lang, "Text"))
return "Text_t";
else if (streq(text->lang, "Pattern"))
return "Pattern_t";
else
return CORD_all(namespace_prefix(text->env, text->env->namespace->parent), text->lang, "$$type");
}
@ -558,8 +554,6 @@ CORD compile_type(type_t *t)
case ArrayType: case TableType: case SetType:
return CORD_all("Optional", compile_type(nonnull));
case StructType: {
if (nonnull == MATCH_TYPE)
return "OptionalMatch_t";
if (nonnull == PATH_TYPE)
return "OptionalPath_t";
if (nonnull == PATH_TYPE_TYPE)
@ -680,7 +674,7 @@ CORD optional_into_nonnone(type_t *t, CORD value)
case IntType:
return CORD_all(value, ".value");
case StructType:
if (t == MATCH_TYPE || t == PATH_TYPE || t == PATH_TYPE_TYPE)
if (t == PATH_TYPE || t == PATH_TYPE_TYPE)
return value;
return CORD_all(value, ".value");
default:
@ -695,8 +689,6 @@ CORD check_none(type_t *t, CORD value)
// complain about excessive parens around equality comparisons
if (t->tag == PointerType || t->tag == FunctionType || t->tag == CStringType)
return CORD_all("({", value, " == NULL;})");
else if (t == MATCH_TYPE)
return CORD_all("({(", value, ").index.small == 0;})");
else if (t == PATH_TYPE)
return CORD_all("({(", value, ").type.$tag == PATH_NONE;})");
else if (t == PATH_TYPE_TYPE)
@ -1168,7 +1160,7 @@ static CORD _compile_statement(env_t *env, ast_t *ast)
default: code_err(ast, "Update assignments are not implemented for this operation");
}
}
case StructDef: case EnumDef: case LangDef: case FunctionDef: case ConvertDef: {
case StructDef: case EnumDef: case LangDef: case Extend: case FunctionDef: case ConvertDef: {
return CORD_EMPTY;
}
case Skip: {
@ -1730,8 +1722,13 @@ static CORD _compile_statement(env_t *env, ast_t *ast)
code_err(ast, "Could not find library");
CORD initialization = CORD_EMPTY;
const char *lib_id = Text$as_c_string(
Text$replace(Text$from_str(use->path), Pattern("{1+ !alphanumeric}"), Text("_"), Pattern(""), false));
char *lib_id = String(use->path);
for (char *p = lib_id; *p; p++) {
if (!isalnum(*p) && *p != '_')
*p = '_';
}
for (size_t i = 0; i < tm_files.gl_pathc; i++) {
const char *filename = tm_files.gl_pathv[i];
initialization = CORD_all(
@ -2165,7 +2162,6 @@ CORD compile_none(type_t *t)
if (t == PATH_TYPE) return "NONE_PATH";
else if (t == PATH_TYPE_TYPE) return "((OptionalPathType_t){})";
else if (t == MATCH_TYPE) return "NONE_MATCH";
switch (t->tag) {
case BigIntType: return "NONE_INT";
@ -2597,8 +2593,6 @@ CORD compile(env_t *env, ast_t *ast)
CORD lang_constructor;
if (!lang || streq(lang, "Text"))
lang_constructor = "Text";
else if (streq(lang, "Pattern"))
lang_constructor = lang;
else
lang_constructor = CORD_all(namespace_prefix(Match(text_t, TextType)->env, Match(text_t, TextType)->env->namespace->parent), lang);
@ -3752,7 +3746,7 @@ CORD compile(env_t *env, ast_t *ast)
case Defer: code_err(ast, "Compiling 'defer' as expression!");
case Extern: code_err(ast, "Externs are not supported as expressions");
case TableEntry: code_err(ast, "Table entries should not be compiled directly");
case Declare: case Assign: case UpdateAssign: case For: case While: case Repeat: case StructDef: case LangDef:
case Declare: case Assign: case UpdateAssign: case For: case While: case Repeat: case StructDef: case LangDef: case Extend:
case EnumDef: case FunctionDef: case ConvertDef: case Skip: case Stop: case Pass: case Return: case DocTest: case PrintStatement:
code_err(ast, "This is not a valid expression");
default: case Unknown: code_err(ast, "Unknown AST");
@ -3762,7 +3756,6 @@ CORD compile(env_t *env, ast_t *ast)
CORD compile_type_info(type_t *t)
{
if (t == RNG_TYPE) return "&RNG$info";
else if (t == MATCH_TYPE) return "&Match$info";
else if (t == PATH_TYPE) return "&Path$info";
else if (t == PATH_TYPE_TYPE) return "&PathType$info";
@ -3773,8 +3766,6 @@ CORD compile_type_info(type_t *t)
auto text = Match(t, TextType);
if (!text->lang || streq(text->lang, "Text"))
return "&Text$info";
else if (streq(text->lang, "Pattern"))
return "&Pattern$info";
return CORD_all("(&", namespace_prefix(text->env, text->env->namespace->parent), text->lang, "$$info)");
}
case StructType: {
@ -4206,6 +4197,12 @@ CORD compile_top_level_code(env_t *env, ast_t *ast)
env_t *ns_env = namespace_env(env, def->name);
return CORD_all(code, def->namespace ? compile_top_level_code(ns_env, def->namespace) : CORD_EMPTY);
}
case Extend: {
auto extend = Match(ast, Extend);
env_t *ns_env = namespace_env(env, extend->name);
ns_env->libname = env->libname;
return compile_top_level_code(ns_env, extend->body);
}
case Extern: return CORD_EMPTY;
case Block: {
CORD code = CORD_EMPTY;
@ -4258,6 +4255,9 @@ static void initialize_vars_and_statics(env_t *env, ast_t *ast)
} else if (stmt->ast->tag == LangDef) {
initialize_vars_and_statics(namespace_env(env, Match(stmt->ast, LangDef)->name),
Match(stmt->ast, LangDef)->namespace);
} else if (stmt->ast->tag == Extend) {
initialize_vars_and_statics(namespace_env(env, Match(stmt->ast, Extend)->name),
Match(stmt->ast, Extend)->body);
} else if (stmt->ast->tag == Use) {
continue;
} else {
@ -4348,6 +4348,9 @@ CORD compile_statement_type_header(env_t *env, Path_t header_path, ast_t *ast)
"extern const TypeInfo_t ", full_name, ";\n"
);
}
case Extend: {
return CORD_EMPTY;
}
default:
return CORD_EMPTY;
}
@ -4364,6 +4367,12 @@ CORD compile_statement_namespace_header(env_t *env, Path_t header_path, ast_t *a
block = def->namespace;
break;
}
case Extend: {
auto extend = Match(ast, Extend);
ns_name = extend->name;
block = extend->body;
break;
}
case StructDef: {
auto def = Match(ast, StructDef);
ns_name = def->name;

View File

@ -13,7 +13,6 @@
#include "typecheck.h"
type_t *TEXT_TYPE = NULL;
type_t *MATCH_TYPE = NULL;
type_t *RNG_TYPE = NULL;
public type_t *PATH_TYPE = NULL;
public type_t *PATH_TYPE_TYPE = NULL;
@ -67,7 +66,6 @@ env_t *global_env(void)
(void)bind_type(env, "Int32", Type(IntType, .bits=TYPE_IBITS32));
(void)bind_type(env, "Memory", Type(MemoryType));
PATH_TYPE_TYPE = declare_type(env, "enum PathType(Relative, Absolute, Home)");
MATCH_TYPE = declare_type(env, "struct Match(text:Text, index:Int, captures:[Text])");
PATH_TYPE = declare_type(env, "struct Path(type:PathType, components:[Text])");
RNG_TYPE = declare_type(env, "struct RNG(state:@Memory)");
@ -279,13 +277,6 @@ env_t *global_env(void)
#undef F_opt
#undef F
#undef C
{"Match", MATCH_TYPE, "Match_t", "Match", TypedArray(ns_entry_t,
// No methods
)},
{"Pattern", Type(TextType, .lang="Pattern", .env=namespace_env(env, "Pattern")), "Pattern_t", "Pattern$info", TypedArray(ns_entry_t,
{"escape_int", "Int$value_as_text", "func(i:Int -> Pattern)"},
{"escape_text", "Pattern$escape_text", "func(text:Text -> Pattern)"},
)},
{"PathType", PATH_TYPE_TYPE, "PathType_t", "PathType$info", TypedArray(ns_entry_t,
{"Relative", "((PathType_t){.$tag=PATH_RELATIVE})", "PathType"},
{"Absolute", "((PathType_t){.$tag=PATH_ABSOLUTE})", "PathType"},
@ -353,44 +344,42 @@ env_t *global_env(void)
{"as_c_string", "Text$as_c_string", "func(text:Text -> CString)"},
{"at", "Text$cluster", "func(text:Text, index:Int -> Text)"},
{"by_line", "Text$by_line", "func(text:Text -> func(->Text?))"},
{"by_match", "Text$by_match", "func(text:Text, pattern:Pattern -> func(->Match?))"},
{"by_split", "Text$by_split", "func(text:Text, pattern=$Pattern'' -> func(->Text?))"},
{"by_split", "Text$by_split", "func(text:Text, delimiter='' -> func(->Text?))"},
{"by_split_any", "Text$by_split_any", "func(text:Text, delimiters=\" $\\t\\r\\n\" -> func(->Text?))"},
{"bytes", "Text$utf8_bytes", "func(text:Text -> [Byte])"},
{"caseless_equals", "Text$equal_ignoring_case", "func(a,b:Text, language='C' -> Bool)"},
{"codepoint_names", "Text$codepoint_names", "func(text:Text -> [Text])"},
{"ends_with", "Text$ends_with", "func(text,suffix:Text -> Bool)"},
{"each", "Text$each", "func(text:Text, pattern:Pattern, fn:func(match:Match), recursive=yes)"},
{"find", "Text$find", "func(text:Text, pattern:Pattern, start=1 -> Match?)"},
{"find_all", "Text$find_all", "func(text:Text, pattern:Pattern -> [Match])"},
{"from", "Text$from", "func(text:Text, first:Int -> Text)"},
{"from_bytes", "Text$from_bytes", "func(bytes:[Byte] -> Text?)"},
{"from_c_string", "Text$from_str", "func(str:CString -> Text?)"},
{"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text] -> Text?)"},
{"from_codepoints", "Text$from_codepoints", "func(codepoints:[Int32] -> Text)"},
{"from_text", "Path$from_text", "func(text:Text -> Path)"},
{"has", "Text$has", "func(text:Text, pattern:Pattern -> Bool)"},
{"has", "Text$has", "func(text:Text, target:Text -> Bool)"},
{"join", "Text$join", "func(glue:Text, pieces:[Text] -> Text)"},
{"left_pad", "Text$left_pad", "func(text:Text, count:Int, pad=' ', language='C' -> Text)"},
{"lines", "Text$lines", "func(text:Text -> [Text])"},
{"lower", "Text$lower", "func(text:Text, language='C' -> Text)"},
{"map", "Text$map", "func(text:Text, pattern:Pattern, fn:func(match:Match -> Text), recursive=yes -> Text)"},
{"matches", "Text$matches", "func(text:Text, pattern:Pattern -> [Text]?)"},
{"middle_pad", "Text$middle_pad", "func(text:Text, count:Int, pad=' ', language='C' -> Text)"},
{"quoted", "Text$quoted", "func(text:Text, color=no, quotation_mark='\"' -> Text)"},
{"repeat", "Text$repeat", "func(text:Text, count:Int -> Text)"},
{"replace", "Text$replace", "func(text:Text, pattern:Pattern, replacement:Text, backref=$/\\/, recursive=yes -> Text)"},
{"replace_all", "Text$replace_all", "func(text:Text, replacements:{Pattern,Text}, backref=$/\\/, recursive=yes -> Text)"},
{"replace", "Text$replace", "func(text:Text, target:Text, replacement:Text -> Text)"},
{"reversed", "Text$reversed", "func(text:Text -> Text)"},
{"right_pad", "Text$right_pad", "func(text:Text, count:Int, pad=' ', language='C' -> Text)"},
{"slice", "Text$slice", "func(text:Text, from=1, to=-1 -> Text)"},
{"split", "Text$split", "func(text:Text, pattern=$Pattern'' -> [Text])"},
{"split", "Text$split", "func(text:Text, delimiter='' -> [Text])"},
{"split_any", "Text$split_any", "func(text:Text, delimiters=\" $\\t\\r\\n\" -> [Text])"},
{"starts_with", "Text$starts_with", "func(text,prefix:Text -> Bool)"},
{"title", "Text$title", "func(text:Text, language='C' -> Text)"},
{"to", "Text$to", "func(text:Text, last:Int -> Text)"},
{"trim", "Text$trim", "func(text:Text, pattern=$/{whitespace}/, trim_left=yes, trim_right=yes -> Text)"},
{"translate", "Text$translate", "func(text:Text, translations:{Text,Text} -> Text)"},
{"trim", "Text$trim", "func(text:Text, to_trim=\" \t\r\n\", left=yes, right=yes -> Text)"},
{"upper", "Text$upper", "func(text:Text, language='C' -> Text)"},
{"utf32_codepoints", "Text$utf32_codepoints", "func(text:Text -> [Int32])"},
{"width", "Text$width", "func(text:Text, language='C' -> Int)"},
{"without_prefix", "Text$without_prefix", "func(text,prefix:Text -> Text)"},
{"without_suffix", "Text$without_suffix", "func(text,suffix:Text -> Text)"},
)},
};
@ -518,9 +507,6 @@ env_t *global_env(void)
{"Num32$from_int64", "func(i:Int64, truncate=no -> Num32)"},
{"Num32$from_int", "func(i:Int, truncate=no -> Num32)"},
{"Num32$from_num", "func(n:Num -> Num32)"});
ADD_CONSTRUCTORS("Pattern",
{"Pattern$escape_text", "func(text:Text -> Pattern)"},
{"Int$value_as_text", "func(i:Int -> Pattern)"});
ADD_CONSTRUCTORS("Path",
{"Path$escape_text", "func(text:Text -> Path)"},
{"Path$escape_path", "func(path:Path -> Path)"},
@ -534,11 +520,6 @@ env_t *global_env(void)
.ret=PATH_TYPE),
"Path$from_text");
set_binding(namespace_env(env, "Pattern"), "from_text",
Type(FunctionType, .args=new(arg_t, .name="text", .type=TEXT_TYPE),
.ret=Type(TextType, .lang="Pattern", .env=namespace_env(env, "Pattern"))),
"(Pattern_t)");
struct {
const char *name, *code, *type_str;
} global_vars[] = {

View File

@ -89,7 +89,6 @@ void set_binding(env_t *env, const char *name, type_t *type, CORD code);
binding_t *get_namespace_binding(env_t *env, ast_t *self, const char *name);
#define code_err(ast, ...) compiler_err((ast)->file, (ast)->start, (ast)->end, __VA_ARGS__)
extern type_t *TEXT_TYPE;
extern type_t *MATCH_TYPE;
extern type_t *RNG_TYPE;
extern type_t *PATH_TYPE;
extern type_t *PATH_TYPE_TYPE;

View File

@ -22,7 +22,6 @@
#include "ast.h"
#include "cordhelpers.h"
#include "stdlib/integers.h"
#include "stdlib/patterns.h"
#include "stdlib/paths.h"
#include "stdlib/print.h"
#include "stdlib/stdlib.h"
@ -64,7 +63,7 @@ int op_tightness[] = {
static const char *keywords[] = {
"yes", "xor", "while", "when", "use", "unless", "struct", "stop", "skip", "return",
"or", "not", "none", "no", "mod1", "mod", "pass", "lang", "inline", "in", "if",
"func", "for", "extern", "enum", "else", "do", "deserialize", "defer", "and",
"func", "for", "extern", "extend", "enum", "else", "do", "deserialize", "defer", "and",
"_min_", "_max_", NULL,
};
@ -120,6 +119,7 @@ static PARSER(parse_inline_c);
static PARSER(parse_int);
static PARSER(parse_lambda);
static PARSER(parse_lang_def);
static PARSER(parse_extend);
static PARSER(parse_namespace);
static PARSER(parse_negative);
static PARSER(parse_not);
@ -1241,9 +1241,6 @@ PARSER(parse_text) {
open_quote = *pos;
++pos;
close_quote = closing[(int)open_quote] ? closing[(int)open_quote] : open_quote;
if (!lang && (open_quote == '/' || open_quote == '|'))
lang = "Pattern";
} else {
return NULL;
}
@ -1904,9 +1901,10 @@ PARSER(parse_namespace) {
if (get_indent(ctx, next) != indent) break;
ast_t *stmt;
if ((stmt=optional(ctx, &pos, parse_struct_def))
||(stmt=optional(ctx, &pos, parse_func_def))
||(stmt=optional(ctx, &pos, parse_enum_def))
||(stmt=optional(ctx, &pos, parse_lang_def))
||(stmt=optional(ctx, &pos, parse_func_def))
||(stmt=optional(ctx, &pos, parse_extend))
||(stmt=optional(ctx, &pos, parse_convert_def))
||(stmt=optional(ctx, &pos, parse_use))
||(stmt=optional(ctx, &pos, parse_extern))
@ -1940,9 +1938,10 @@ PARSER(parse_file_body) {
if (get_indent(ctx, next) != 0) break;
ast_t *stmt;
if ((stmt=optional(ctx, &pos, parse_struct_def))
||(stmt=optional(ctx, &pos, parse_func_def))
||(stmt=optional(ctx, &pos, parse_enum_def))
||(stmt=optional(ctx, &pos, parse_lang_def))
||(stmt=optional(ctx, &pos, parse_func_def))
||(stmt=optional(ctx, &pos, parse_extend))
||(stmt=optional(ctx, &pos, parse_convert_def))
||(stmt=optional(ctx, &pos, parse_use))
||(stmt=optional(ctx, &pos, parse_extern))
@ -2112,6 +2111,32 @@ PARSER(parse_lang_def) {
return NewAST(ctx->file, start, pos, LangDef, .name=name, .namespace=namespace);
}
PARSER(parse_extend) {
const char *start = pos;
// extend Name: body...
if (!match_word(&pos, "extend")) return NULL;
int64_t starting_indent = get_indent(ctx, pos);
spaces(&pos);
const char *name = get_id(&pos);
if (!name)
parser_err(ctx, start, pos, "I expected a name for this lang");
ast_t *body = NULL;
if (match(&pos, ":")) {
const char *ns_pos = pos;
whitespace(&ns_pos);
int64_t ns_indent = get_indent(ctx, ns_pos);
if (ns_indent > starting_indent) {
pos = ns_pos;
body = optional(ctx, &pos, parse_namespace);
}
}
if (!body)
body = NewAST(ctx->file, pos, pos, Block, .statements=NULL);
return NewAST(ctx->file, start, pos, Extend, .name=name, .body=body);
}
arg_ast_t *parse_args(parse_ctx_t *ctx, const char **pos)
{
arg_ast_t *args = NULL;
@ -2373,20 +2398,6 @@ PARSER(parse_use) {
what = USE_LOCAL;
} else {
what = USE_MODULE;
// When `use`ing a URL, convert it to a hash:
Text_t text = Text$from_str(name);
Array_t m = Text$matches(text, Pattern("{url}"));
if (m.length >= 0) {
text = Text$trim(text, Pattern("http{0-1 s}://"), true, false);
FILE *shasum = popen(String("echo -n '", text, "' | sha256sum"), "r");
const size_t HASH_LEN = 32;
char *hash = GC_MALLOC_ATOMIC(HASH_LEN + 1);
size_t just_read = fread(hash, sizeof(char), HASH_LEN, shasum);
if (just_read < HASH_LEN)
print_err("Failed to get SHA sum for 'use': ", name);
name = hash;
}
}
return NewAST(ctx->file, start, pos, Use, .var=var, .path=name, .what=what);
}

View File

@ -27,7 +27,6 @@ some common functionality.
- Nums: [nums.h](nums.h), [nums.c](nums.c)
- Optionals: [optionals.h](optionals.h), [optionals.c](optionals.c)
- Paths: [paths.h](paths.h), [paths.c](paths.c)
- Patterns: [patterns.h](patterns.h), [patterns.c](patterns.c)
- Pointers: [pointers.h](pointers.h), [pointers.c](pointers.c)
- Tables: [tables.h](tables.h), [tables.c](tables.c)
- Text: [text.h](text.h), [text.c](text.c)

View File

@ -94,9 +94,6 @@ typedef struct Text_s {
};
} Text_t;
#define Pattern_t Text_t
#define OptionalPattern_t Text_t
typedef struct {
enum { PATH_NONE, PATH_RELATIVE, PATH_ABSOLUTE, PATH_HOME } $tag;
} PathType_t;

View File

@ -6,7 +6,6 @@
#include "integers.h"
#include "metamethods.h"
#include "nums.h"
#include "patterns.h"
#include "text.h"
#include "util.h"

View File

@ -24,7 +24,6 @@
#include "integers.h"
#include "optionals.h"
#include "paths.h"
#include "patterns.h"
#include "structs.h"
#include "text.h"
#include "types.h"
@ -599,15 +598,10 @@ public PUREFUNC Text_t Path$base_name(Path_t path)
public Text_t Path$extension(Path_t path, bool full)
{
Text_t base = Path$base_name(path);
Array_t results = Text$matches(base, full ? Pattern(".{!.}.{..}") : Pattern(".{..}.{!.}{end}"));
if (results.length > 0)
return *((Text_t*)(results.data + results.stride*1));
results = Text$matches(base, full ? Pattern("{!.}.{..}") : Pattern("{..}.{!.}{end}"));
if (results.length > 0)
return *((Text_t*)(results.data + results.stride*1));
else
return Text("");
const char *base = Text$as_c_string(Path$base_name(path));
const char *dot = full ? strchr(base + 1, '.') : strrchr(base + 1, '.');
const char *extension = dot ? dot + 1 : "";
return Text$from_str(extension);
}
public Path_t Path$with_component(Path_t path, Text_t component)
@ -635,10 +629,10 @@ public Path_t Path$with_extension(Path_t path, Text_t extension, bool replace)
Text_t last = *(Text_t*)(path.components.data + path.components.stride*(path.components.length-1));
Array$remove_at(&result.components, I(-1), I(1), sizeof(Text_t));
if (replace) {
if (Text$starts_with(last, Text(".")))
last = Text$replace(last, Pattern(".{!.}.{..}"), Text(".@1"), Pattern("@"), false);
else
last = Text$replace(last, Pattern("{!.}.{..}"), Text("@1"), Pattern("@"), false);
const char *base = Text$as_c_string(last);
const char *dot = strchr(base + 1, '.');
if (dot)
last = Text$from_strn(base, (size_t)(dot - base));
}
last = Text$concat(last, extension);

View File

@ -1,46 +0,0 @@
#pragma once
// The type representing text patterns for pattern matching.
#include <stdbool.h>
#include <stdint.h>
#include "datatypes.h"
#include "integers.h"
#include "optionals.h"
#include "types.h"
#define Pattern(text) ((Pattern_t)Text(text))
#define Patterns(...) ((Pattern_t)Texts(__VA_ARGS__))
typedef struct {
Text_t text;
Int_t index;
Array_t captures;
} Match_t;
typedef Match_t OptionalMatch_t;
#define NONE_MATCH ((OptionalMatch_t){.index=NONE_INT})
Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement, Pattern_t backref_pat, bool recursive);
Pattern_t Pattern$escape_text(Text_t text);
Text_t Text$replace_all(Text_t text, Table_t replacements, Pattern_t backref_pat, bool recursive);
Array_t Text$split(Text_t text, Pattern_t pattern);
Closure_t Text$by_split(Text_t text, Pattern_t pattern);
Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right);
OptionalMatch_t Text$find(Text_t text, Pattern_t pattern, Int_t i);
Array_t Text$find_all(Text_t text, Pattern_t pattern);
Closure_t Text$by_match(Text_t text, Pattern_t pattern);
PUREFUNC bool Text$has(Text_t text, Pattern_t pattern);
OptionalArray_t Text$matches(Text_t text, Pattern_t pattern);
Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn, bool recursive);
void Text$each(Text_t text, Pattern_t pattern, Closure_t fn, bool recursive);
#define Pattern$hash Text$hash
#define Pattern$compare Text$compare
#define Pattern$equal Text$equal
extern const TypeInfo_t Match$info;
extern const TypeInfo_t Pattern$info;
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0

View File

@ -20,7 +20,6 @@
#include "optionals.h"
#include "metamethods.h"
#include "nums.h"
#include "patterns.h"
#include "paths.h"
#include "rng.h"
#include "siphash.h"

View File

@ -998,17 +998,22 @@ PUREFUNC public int32_t Text$compare(const void *va, const void *vb, const TypeI
return 0;
}
bool _matches(TextIter_t *text_state, TextIter_t *target_state, int64_t pos)
{
for (int64_t i = 0; i < target_state->stack[0].text.length; i++) {
int32_t text_i = Text$get_grapheme_fast(text_state, pos + i);
int32_t prefix_i = Text$get_grapheme_fast(target_state, i);
if (text_i != prefix_i) return false;
}
return true;
}
PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix)
{
if (text.length < prefix.length)
return false;
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), prefix_state = NEW_TEXT_ITER_STATE(prefix);
for (int64_t i = 0; i < prefix.length; i++) {
int32_t text_i = Text$get_grapheme_fast(&text_state, i);
int32_t prefix_i = Text$get_grapheme_fast(&prefix_state, i);
if (text_i != prefix_i) return false;
}
return true;
return _matches(&text_state, &prefix_state, 0);
}
PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix)
@ -1016,12 +1021,236 @@ PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix)
if (text.length < suffix.length)
return false;
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), suffix_state = NEW_TEXT_ITER_STATE(suffix);
for (int64_t i = 0; i < suffix.length; i++) {
int32_t text_i = Text$get_grapheme_fast(&text_state, text.length - suffix.length + i);
int32_t suffix_i = Text$get_grapheme_fast(&suffix_state, i);
if (text_i != suffix_i) return false;
return _matches(&text_state, &suffix_state, text.length - suffix.length);
}
public Text_t Text$without_prefix(Text_t text, Text_t prefix)
{
return Text$starts_with(text, prefix) ? Text$slice(text, I(prefix.length + 1), I(text.length)) : text;
}
public Text_t Text$without_suffix(Text_t text, Text_t suffix)
{
return Text$ends_with(text, suffix) ? Text$slice(text, I(1), I(text.length - suffix.length)) : text;
}
static bool _has_grapheme(TextIter_t *text, int32_t g)
{
for (int64_t t = 0; t < text->stack[0].text.length; t++) {
if (g == Text$get_grapheme_fast(text, t)) {
return true;
}
}
return true;
return false;
}
public Text_t Text$trim(Text_t text, Text_t to_trim, bool left, bool right)
{
int64_t first = 0;
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), trim_state = NEW_TEXT_ITER_STATE(to_trim);
if (left) {
while (first < text.length && _has_grapheme(&trim_state, Text$get_grapheme_fast(&text_state, first))) {
first += 1;
}
}
int64_t last = text.length-1;
if (right) {
while (last >= first && _has_grapheme(&trim_state, Text$get_grapheme_fast(&text_state, last))) {
last -= 1;
}
}
return (first != 0 || last != text.length-1) ? Text$slice(text, I(first+1), I(last+1)) : text;
}
public Text_t Text$translate(Text_t text, Table_t translations)
{
TextIter_t text_state = NEW_TEXT_ITER_STATE(text);
Text_t result = EMPTY_TEXT;
int64_t span_start = 0;
Array_t replacement_array = translations.entries;
for (int64_t i = 0; i < text.length; ) {
for (int64_t r = 0; r < replacement_array.length; r++) {
struct { Text_t target, replacement; } *entry = replacement_array.data + r*replacement_array.stride;
TextIter_t target_state = NEW_TEXT_ITER_STATE(entry->target);
if (_matches(&text_state, &target_state, i)) {
if (i > span_start)
result = concat2(result, Text$slice(text, I(span_start+1), I(i)));
result = concat2(result, entry->replacement);
i += entry->target.length;
span_start = i;
goto found_match;
}
}
i += 1;
found_match: continue;
}
if (span_start < text.length)
result = concat2(result, Text$slice(text, I(span_start+1), I(text.length)));
return result;
}
public Text_t Text$replace(Text_t text, Text_t target, Text_t replacement)
{
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), target_state = NEW_TEXT_ITER_STATE(target);
Text_t result = EMPTY_TEXT;
int64_t span_start = 0;
for (int64_t i = 0; i < text.length; ) {
if (_matches(&text_state, &target_state, i)) {
if (i > span_start)
result = concat2(result, Text$slice(text, I(span_start+1), I(i)));
result = concat2(result, replacement);
i += target.length;
span_start = i;
} else {
i += 1;
}
}
if (span_start < text.length)
result = concat2(result, Text$slice(text, I(span_start+1), I(text.length)));
return result;
}
public bool Text$has(Text_t text, Text_t target)
{
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), target_state = NEW_TEXT_ITER_STATE(target);
for (int64_t i = 0; i < text.length; i++) {
if (_matches(&text_state, &target_state, i))
return true;
}
return false;
}
public Array_t Text$split(Text_t text, Text_t delimiters)
{
if (delimiters.length == 0)
return Text$clusters(text);
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), delim_state = NEW_TEXT_ITER_STATE(delimiters);
Array_t splits = {};
for (int64_t i = 0; i < text.length; ) {
int64_t span_len = 0;
while (i + span_len < text.length && !_matches(&text_state, &delim_state, i + span_len)) {
span_len += 1;
}
Text_t slice = Text$slice(text, I(i+1), I(i+span_len));
Array$insert(&splits, &slice, I(0), sizeof(slice));
i += span_len + delimiters.length;
if (i == text.length) {
Text_t empty = Text("");
Array$insert(&splits, &empty, I(0), sizeof(empty));
}
}
return splits;
}
public Array_t Text$split_any(Text_t text, Text_t delimiters)
{
if (delimiters.length == 0)
return Array(text);
TextIter_t text_state = NEW_TEXT_ITER_STATE(text), delim_state = NEW_TEXT_ITER_STATE(delimiters);
Array_t splits = {};
for (int64_t i = 0; i < text.length; ) {
int64_t span_len = 0;
while (i + span_len < text.length && !_has_grapheme(&delim_state, Text$get_grapheme_fast(&text_state, i + span_len))) {
span_len += 1;
}
bool trailing_delim = i + span_len < text.length;
Text_t slice = Text$slice(text, I(i+1), I(i+span_len));
Array$insert(&splits, &slice, I(0), sizeof(slice));
i += span_len + 1;
while (i < text.length && _has_grapheme(&delim_state, Text$get_grapheme_fast(&text_state, i))) {
i += 1;
}
if (i >= text.length && trailing_delim) {
Text_t empty = Text("");
Array$insert(&splits, &empty, I(0), sizeof(empty));
}
}
return splits;
}
typedef struct {
TextIter_t state;
int64_t i;
Text_t delimiter;
} split_iter_state_t;
static OptionalText_t next_split(split_iter_state_t *state)
{
Text_t text = state->state.stack[0].text;
if (state->i >= text.length) {
if (state->delimiter.length > 0 && state->i == text.length) { // special case
state->i = text.length + 1;
return EMPTY_TEXT;
}
return NONE_TEXT;
}
if (state->delimiter.length == 0) { // special case
state->i = text.length + 1;
return text;
}
TextIter_t delim_state = NEW_TEXT_ITER_STATE(state->delimiter);
int64_t i = state->i;
int64_t span_len = 0;
while (i + span_len < text.length && !_matches(&state->state, &delim_state, i + span_len)) {
span_len += 1;
}
Text_t slice = Text$slice(text, I(i+1), I(i+span_len));
state->i = i + span_len + state->delimiter.length;
return slice;
}
public Closure_t Text$by_split(Text_t text, Text_t delimiter)
{
return (Closure_t){
.fn=(void*)next_split,
.userdata=new(split_iter_state_t, .state=NEW_TEXT_ITER_STATE(text), .i=0, .delimiter=delimiter),
};
}
static OptionalText_t next_split_any(split_iter_state_t *state)
{
Text_t text = state->state.stack[0].text;
if (state->i >= text.length) {
if (state->delimiter.length > 0 && state->i == text.length) { // special case
state->i = text.length + 1;
return EMPTY_TEXT;
}
return NONE_TEXT;
}
if (state->delimiter.length == 0) { // special case
Text_t ret = Text$cluster(text, I(state->i+1));
state->i += 1;
return ret;
}
TextIter_t delim_state = NEW_TEXT_ITER_STATE(state->delimiter);
int64_t i = state->i;
int64_t span_len = 0;
while (i + span_len < text.length && !_has_grapheme(&delim_state, Text$get_grapheme_fast(&state->state, i + span_len))) {
span_len += 1;
}
Text_t slice = Text$slice(text, I(i+1), I(i+span_len));
i += span_len + 1;
while (i < text.length && _has_grapheme(&delim_state, Text$get_grapheme_fast(&state->state, i))) {
i += 1;
}
state->i = i;
return slice;
}
public Closure_t Text$by_split_any(Text_t text, Text_t delimiters)
{
return (Closure_t){
.fn=(void*)next_split_any,
.userdata=new(split_iter_state_t, .state=NEW_TEXT_ITER_STATE(text), .i=0, .delimiter=delimiters),
};
}
PUREFUNC public bool Text$equal_values(Text_t a, Text_t b)

View File

@ -50,6 +50,16 @@ Text_t Text$as_text(const void *text, bool colorize, const TypeInfo_t *info);
Text_t Text$quoted(Text_t str, bool colorize, Text_t quotation_mark);
PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix);
PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix);
Text_t Text$without_prefix(Text_t text, Text_t prefix);
Text_t Text$without_suffix(Text_t text, Text_t suffix);
Text_t Text$replace(Text_t text, Text_t target, Text_t replacement);
Text_t Text$translate(Text_t text, Table_t translations);
bool Text$has(Text_t text, Text_t target);
Array_t Text$split(Text_t text, Text_t delimiter);
Array_t Text$split_any(Text_t text, Text_t delimiters);
Closure_t Text$by_split(Text_t text, Text_t delimiter);
Closure_t Text$by_split_any(Text_t text, Text_t delimiters);
Text_t Text$trim(Text_t text, Text_t to_trim, bool left, bool right);
char *Text$as_c_string(Text_t text);
__attribute__((format(printf, 1, 2)))
public Text_t Text$format(const char *fmt, ...);

View File

@ -20,7 +20,6 @@
#include "nums.h"
#include "optionals.h"
#include "paths.h"
#include "patterns.h"
#include "pointers.h"
#include "print.h"
#include "rng.h"

View File

@ -21,7 +21,6 @@
#include "stdlib/datatypes.h"
#include "stdlib/integers.h"
#include "stdlib/optionals.h"
#include "stdlib/patterns.h"
#include "stdlib/paths.h"
#include "stdlib/print.h"
#include "stdlib/text.h"
@ -294,7 +293,12 @@ int main(int argc, char *argv[])
Text_t escape_lib_name(Text_t lib_name)
{
return Text$replace(lib_name, Pattern("{1+ !alphanumeric}"), Text("_"), Pattern(""), false);
char *libname_id = String(lib_name);
for (char *p = libname_id; *p; p++) {
if (!isalnum(*p) && *p != '_')
*p = '_';
}
return Text$from_str(libname_id);
}
Path_t build_file(Path_t path, const char *extension)

View File

@ -12,7 +12,6 @@
#include "cordhelpers.h"
#include "environment.h"
#include "parse.h"
#include "stdlib/patterns.h"
#include "stdlib/paths.h"
#include "stdlib/tables.h"
#include "stdlib/text.h"
@ -195,8 +194,11 @@ static env_t *load_module(env_t *env, ast_t *module_ast)
env_t *module_env = fresh_scope(env);
Table$str_set(env->imports, use->path, module_env);
char *libname_id = Text$as_c_string(
Text$replace(Text$from_str(use->path), Pattern("{1+ !alphanumeric}"), Text("_"), Pattern(""), false));
char *libname_id = String(use->path);
for (char *p = libname_id; *p; p++) {
if (!isalnum(*p) && *p != '_')
*p = '_';
}
module_env->libname = libname_id;
for (size_t i = 0; i < tm_files.gl_pathc; i++) {
const char *filename = tm_files.gl_pathv[i];
@ -269,6 +271,14 @@ void prebind_statement(env_t *env, ast_t *statement)
prebind_statement(ns_env, stmt->ast);
break;
}
case Extend: {
auto extend = Match(statement, Extend);
env_t *ns_env = namespace_env(env, extend->name);
ns_env->libname = env->libname;
for (ast_list_t *stmt = extend->body ? Match(extend->body, Block)->statements : NULL; stmt; stmt = stmt->next)
prebind_statement(ns_env, stmt->ast);
break;
}
default: break;
}
}
@ -435,6 +445,14 @@ void bind_statement(env_t *env, ast_t *statement)
bind_statement(ns_env, stmt->ast);
break;
}
case Extend: {
auto extend = Match(statement, Extend);
env_t *ns_env = namespace_env(env, extend->name);
ns_env->libname = env->libname;
for (ast_list_t *stmt = extend->body ? Match(extend->body, Block)->statements : NULL; stmt; stmt = stmt->next)
bind_statement(ns_env, stmt->ast);
break;
}
case Use: {
env_t *module_env = load_module(env, statement);
if (!module_env) break;
@ -940,7 +958,7 @@ type_t *get_type(env_t *env, ast_t *ast)
// Early out if the type is knowable without any context from the block:
switch (last->ast->tag) {
case UpdateAssign: case Assign: case Declare: case FunctionDef: case ConvertDef: case StructDef: case EnumDef: case LangDef:
case UpdateAssign: case Assign: case Declare: case FunctionDef: case ConvertDef: case StructDef: case EnumDef: case LangDef: case Extend:
return Type(VoidType);
default: break;
}
@ -1240,7 +1258,7 @@ type_t *get_type(env_t *env, ast_t *ast)
return Type(ClosureType, Type(FunctionType, .args=args, .ret=ret));
}
case FunctionDef: case ConvertDef: case StructDef: case EnumDef: case LangDef: {
case FunctionDef: case ConvertDef: case StructDef: case EnumDef: case LangDef: case Extend: {
return Type(VoidType);
}
@ -1399,7 +1417,7 @@ PUREFUNC bool is_discardable(env_t *env, ast_t *ast)
{
switch (ast->tag) {
case UpdateAssign: case Assign: case Declare: case FunctionDef: case ConvertDef: case StructDef: case EnumDef:
case LangDef: case Use:
case LangDef: case Use: case Extend:
return true;
default: break;
}

View File

@ -1,12 +1,12 @@
lang HTML:
HEADER := $HTML"<!DOCTYPE HTML>"
convert(t:Text->HTML):
t = t:replace_all({
$/&/="&amp;",
$/</="&lt;",
$/>/="&gt;",
$/"/="&quot",
$/'/="&#39;",
t = t:translate({
"&"="&amp;",
"<"="&lt;",
">"="&gt;",
'"'="&quot",
"'"="&#39;",
})
return HTML.from_text(t)

View File

@ -74,45 +74,24 @@ func main():
>> amelie2:codepoint_names()
= ["LATIN CAPITAL LETTER A", "LATIN SMALL LETTER M", "LATIN SMALL LETTER E WITH ACUTE", "LATIN SMALL LETTER L", "LATIN SMALL LETTER I", "LATIN SMALL LETTER E"]
>> "Hello":replace($/e/, "X")
>> "Hello":replace("e", "X")
= "HXllo"
>> "Hello":has($/l/)
>> "Hello":has("l")
= yes
>> "Hello":has($/l{end}/)
= no
>> "Hello":has($/{start}l/)
>> "Hello":has("x")
= no
>> "Hello":has($/o/)
= yes
>> "Hello":has($/o{end}/)
= yes
>> "Hello":has($/{start}o/)
= no
>> "Hello":has($/H/)
= yes
>> "Hello":has($/H{end}/)
= no
>> "Hello":has($/{start}H/)
= yes
>> "Hello":replace($/l/, "")
>> "Hello":replace("l", "")
= "Heo"
>> "xxxx":replace($/x/, "")
>> "xxxx":replace("x", "")
= ""
>> "xxxx":replace($/y/, "")
>> "xxxx":replace("y", "")
= "xxxx"
>> "One two three four five six":replace($/e /, "")
>> "One two three four five six":replace("e ", "")
= "Ontwo threfour fivsix"
>> " one ":replace($/{start}{space}/, "")
= "one "
>> " one ":replace($/{space}{end}/, "")
= " one"
>> amelie:has($/$amelie2/)
>> amelie:has(amelie2)
= yes
>> multiline := "
@ -138,11 +117,6 @@ func main():
>> ${one {nested} two $(1+2)}
= "one {nested} two 3"
>> "one two three":replace($/{alpha}/, "")
= " "
>> "one two three":replace($/{alpha}/, "word")
= "word word word"
c := "É̩"
>> c:codepoint_names()
= ["LATIN CAPITAL LETTER E WITH ACUTE", "COMBINING VERTICAL LINE BELOW"]
@ -165,18 +139,29 @@ func main():
= [:Text]
!! Test splitting and joining text:
>> "one two three":split($/ /)
>> "one,, two,three":split(",")
= ["one", "", " two", "three"]
>> [t for t in "one,, two,three":by_split(",")]
= ["one", "", " two", "three"]
>> "one,, two,three":split_any(", ")
= ["one", "two", "three"]
>> "one,two,three,":split($/,/)
= ["one", "two", "three", ""]
>> "one two three":split($/{space}/)
>> [t for t in "one,, two,three":by_split_any(", ")]
= ["one", "two", "three"]
>> ",one,, two,three,":split(",")
= ["", "one", "", " two", "three", ""]
>> [t for t in ",one,, two,three,":by_split(",")]
= ["", "one", "", " two", "three", ""]
>> ",one,, two,three,":split_any(", ")
= ["", "one", "two", "three", ""]
>> [t for t in ",one,, two,three,":by_split_any(", ")]
= ["", "one", "two", "three", ""]
>> "abc":split($//)
>> "abc":split()
= ["a", "b", "c"]
>> "one two three":split_any()
= ["one", "two", "three"]
>> ", ":join(["one", "two", "three"])
= "one, two, three"
@ -192,35 +177,6 @@ func main():
>> "":split()
= [:Text]
!! Test text:find_all()
>> " #one #two #three ":find_all($/#{alpha}/)
= [Match(text="#one", index=2, captures=["one"]), Match(text="#two", index=8, captures=["two"]), Match(text="#three", index=13, captures=["three"])]
>> " #one #two #three ":find_all($/#{!space}/)
= [Match(text="#one", index=2, captures=["one"]), Match(text="#two", index=8, captures=["two"]), Match(text="#three", index=13, captures=["three"])]
>> " ":find_all($/{alpha}/)
= [:Match]
>> " foo(baz(), 1) doop() ":find_all($/{id}(?)/)
= [Match(text="foo(baz(), 1)", index=2, captures=["foo", "baz(), 1"]), Match(text="doop()", index=17, captures=["doop", ""])]
>> "":find_all($Pattern'')
= [:Match]
>> "Hello":find_all($Pattern'')
= [:Match]
!! Test text:find()
>> " one two three ":find($/{id}/, start=-999)
= none : Match
>> " one two three ":find($/{id}/, start=999)
= none : Match
>> " one two three ":find($/{id}/)
= Match(text="one", index=2, captures=["one"])?
>> " one two three ":find($/{id}/, start=5)
= Match(text="two", index=8, captures=["two"])?
!! Test text slicing:
>> "abcdef":slice()
= "abcdef"
@ -248,64 +204,15 @@ func main():
>> Text.from_codepoint_names(["not a valid name here buddy"])
= none : Text
>> "one two; three four":find_all($/; {..}/)
= [Match(text="; three four", index=8, captures=["three four"])]
>> "Hello":replace("ello", "i")
= "Hi"
malicious := "{xxx}"
>> $/$malicious/
= $/{1{}xxx}/
>> "Hello":replace($/{lower}/, "(\0)")
= "H(ello)"
>> " foo(xyz) foo(yyy) foo(z()) ":replace($/foo(?)/, "baz(\1)")
= " baz(xyz) baz(yyy) baz(z()) "
>> "<tag>":replace_all({$/</="&lt;", $/>/="&gt;"})
>> "<tag>":translate({"<"="&lt;", ">"="&gt;"})
= "&lt;tag&gt;"
>> " BAD(x, fn(y), BAD(z), w) ":replace($/BAD(?)/, "good(\1)", recursive=yes)
= " good(x, fn(y), good(z), w) "
>> " BAD(x, fn(y), BAD(z), w) ":replace($/BAD(?)/, "good(\1)", recursive=no)
= " good(x, fn(y), BAD(z), w) "
>> "Hello":matches($/{id}/)
= ["Hello"]?
>> "Hello":matches($/{lower}/)
= none : [Text]
>> "Hello":matches($/{upper}/)
= none : [Text]
>> "Hello...":matches($/{id}/)
= none : [Text]
if matches := "hello world":matches($/{id} {id}/):
>> matches
= ["hello", "world"]
else:
fail("Failed to match")
>> "hello world":map($/world/, func(m:Match): m.text:upper())
= "hello WORLD"
>> "Abc":repeat(3)
= "AbcAbcAbc"
>> " abc def ":trim()
= "abc def"
>> " abc123def ":trim($/{!digit}/)
= "123"
>> " abc123def ":trim($/{!digit}/, trim_left=no)
= " abc123"
>> " abc123def ":trim($/{!digit}/, trim_right=no)
= "123def "
# Only trim single whole matches that bookend the text:
>> "AbcAbcxxxxxxxxAbcAbc":trim($/Abc/)
= "AbcxxxxxxxxAbc"
>> "A=B=C=D":replace($/{..}={..}/, "1:(\1) 2:(\2)")
= "1:(A) 2:(B=C=D)"
>> "abcde":starts_with("ab")
= yes
>> "abcde":starts_with("bc")
@ -316,6 +223,16 @@ func main():
>> "abcde":starts_with("cd")
= no
>> "abcde":without_prefix("ab")
= "cde"
>> "abcde":without_suffix("ab")
= "abcde"
>> "abcde":without_prefix("de")
= "abcde"
>> "abcde":without_suffix("de")
= "abc"
>> ("hello" ++ " " ++ "Amélie"):reversed()
= "eilémA olleh"
@ -387,3 +304,13 @@ func main():
>> cowboy:middle_pad(4)
= " 🤠 "
>> " one, ":trim(" ,")
= "one"
>> " one, ":trim(" ,", left=no)
= " one"
>> " one, ":trim(" ,", right=no)
= "one, "
>> " ":trim(" ,")
= ""
>> " ":trim(" ,", left=no)
= ""