diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2025-08-16 17:21:01 -0400 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2025-08-16 17:21:01 -0400 |
| commit | c72b0406a32ffc3f04324f7b6c321486762fca41 (patch) | |
| tree | 244e51c858890ea2ffb8c74a2c33c81b79de376e | |
| parent | 849fd423a759edf1b58b548a6148c177a6f8cd71 (diff) | |
Improved parsing and prefix/suffix matching using a `remainder`
parameter
37 files changed, 528 insertions, 178 deletions
@@ -10,10 +10,17 @@ number and exit. - Added `tomo --prefix` to print the Tomo install prefix. - Sets now support infix operations for `and`, `or`, `xor`, and `-`. -- Added Path.sibling(). -- Added Path.has_extension(). -- Added Table.with_fallback(). -- Added Int*.get_bit() and Byte.get_bit(). +- Added `Path.sibling()`. +- Added `Path.has_extension()`. +- Added `Table.with_fallback()`. +- Added `Int*.get_bit()` and `Byte.get_bit()`. +- Added `Byte.parse()` to parse bytes from text. +- Added optional `remainder` parameter to `parse()` methods, which (if + non-none) receives the remaining text after the match. If `none`, the match + will fail unless it consumes the whole text. +- Added optional `remainder` parameter to `Text.starts_with()` and + `Text.ends_with()` to allow you to get the rest of the text without two + function calls. - Improved space efficiency of Text that contains non-ASCII codepoints. - Doctests now use equality checking instead of converting to text. - Fixed the following bugs: @@ -186,7 +186,7 @@ sleep(1.5) ## Bool.parse ```tomo -Bool.parse : func(text: Text -> Bool?) +Bool.parse : func(text: Text, remainder: &Text? = none -> Bool?) ``` Converts a text representation of a boolean value into a boolean. Acceptable boolean values are case-insensitive variations of `yes`/`no`, `y`/`n`, `true`/`false`, `on`/`off`. @@ -194,6 +194,7 @@ Converts a text representation of a boolean value into a boolean. Acceptable boo Argument | Type | Description | Default ---------|------|-------------|--------- text | `Text` | The string containing the boolean value. | - +remainder | `&Text?` | If non-none, this argument will be set to the remainder of the text after the matching part. If none, parsing will only succeed if the entire text matches. | `none` **Return:** `yes` if the string matches a recognized truthy boolean value; otherwise return `no`. @@ -207,6 +208,14 @@ text | `Text` | The string containing the boolean value. | - >> Bool.parse("???") = none : Bool? +>> Bool.parse("yesJUNK") += none : Bool? +remainder : Text +>> Bool.parse("yesJUNK", &remainder) += yes : Bool? +>> remainder += "JUNK" + ``` # Byte @@ -293,7 +302,7 @@ high | `Byte` | The upper bound to check (inclusive). | - ## Byte.parse ```tomo -Byte.parse : func(text: Text -> Byte?) +Byte.parse : func(text: Text, remainder: &Text? = none -> Byte?) ``` Parse a byte literal from text. @@ -301,6 +310,7 @@ Parse a byte literal from text. Argument | Type | Description | Default ---------|------|-------------|--------- text | `Text` | The text to parse. | - +remainder | `&Text?` | If non-none, this argument will be set to the remainder of the text after the matching part. If none, parsing will only succeed if the entire text matches. | `none` **Return:** The byte parsed from the text, if successful, otherwise `none`. @@ -308,9 +318,17 @@ text | `Text` | The text to parse. | - **Example:** ```tomo >> Byte.parse("5") -= Byte(5)? += Byte(5) : Byte? >> Byte.parse("asdf") -= none += none : Byte? + +>> Byte.parse("123xyz") += none : Byte? +remainder : Text +>> Byte.parse("123xyz", &remainder) += Byte(123) : Byte? +>> remainder += "xyz" ``` ## Byte.to @@ -614,7 +632,7 @@ stop if i == 10 ## Int.parse ```tomo -Int.parse : func(text: Text -> Int?) +Int.parse : func(text: Text, remainder: &Text? = none -> Int?) ``` Converts a text representation of an integer into an integer. @@ -622,6 +640,7 @@ Converts a text representation of an integer into an integer. Argument | Type | Description | Default ---------|------|-------------|--------- text | `Text` | The text containing the integer. | - +remainder | `&Text?` | If non-none, this argument will be set to the remainder of the text after the matching part. If none, parsing will only succeed if the entire text matches. | `none` **Return:** The integer represented by the text. If the given text contains a value outside of the representable range or if the entire text can't be parsed as an integer, `none` will be returned. @@ -633,6 +652,14 @@ text | `Text` | The text containing the integer. | - >> Int.parse("0xFF") = 255 : Int? +>> Int.parse("123xyz") += none +remainder : Text +>> Int.parse("123xyz", &remainder) += 123 : Int? +>> remainder += "xyz" + # Can't parse: >> Int.parse("asdf") = none : Int? @@ -2250,7 +2277,7 @@ y | `Num` | The direction towards which to find the next representable value. | ## Num.parse ```tomo -Num.parse : func(text: Text -> Num?) +Num.parse : func(text: Text, remainder: &Text? = none -> Num?) ``` Converts a text representation of a number into a floating-point number. @@ -2258,6 +2285,7 @@ Converts a text representation of a number into a floating-point number. Argument | Type | Description | Default ---------|------|-------------|--------- text | `Text` | The text containing the number. | - +remainder | `&Text?` | If non-none, this argument will be set to the remainder of the text after the matching part. If none, parsing will only succeed if the entire text matches. | `none` **Return:** The number represented by the text or `none` if the entire text can't be parsed as a number. @@ -2265,9 +2293,17 @@ text | `Text` | The text containing the number. | - **Example:** ```tomo >> Num.parse("3.14") -= 3.14 += 3.14 : Num? >> Num.parse("1e3") -= 1000 += 1000 : Num? + +>> Num.parse("1.5junk") += none : Num? +remainder : Text +>> Num.parse("1.5junk", &remainder) += 1.5 : Num? +>> remainder += "junk" ``` ## Num.percent @@ -4218,7 +4254,7 @@ text | `Text` | The text from which to extract codepoint names. | - ## Text.ends_with ```tomo -Text.ends_with : func(text: Text, suffix: Text -> Bool) +Text.ends_with : func(text: Text, suffix: Text, remainder: &Text? = none -> Bool) ``` Checks if the `Text` ends with a literal suffix text. @@ -4227,6 +4263,7 @@ Argument | Type | Description | Default ---------|------|-------------|--------- text | `Text` | The text to be searched. | - suffix | `Text` | The literal suffix text to check for. | - +remainder | `&Text?` | If non-none, this value will be set to the rest of the text up to the trailing suffix. If the suffix is not found, this value will be set to the original text. | `none` **Return:** `yes` if the text has the target, `no` otherwise. @@ -4235,6 +4272,11 @@ suffix | `Text` | The literal suffix text to check for. | - ```tomo >> "hello world".ends_with("world") = yes +remainder : Text +>> "hello world".ends_with("world", &remainder) += yes +>> remainder += "hello " ``` ## Text.from @@ -4712,7 +4754,7 @@ delimiters | `Text` | A text containing delimiters to use for splitting the text ## Text.starts_with ```tomo -Text.starts_with : func(text: Text, prefix: Text -> Bool) +Text.starts_with : func(text: Text, prefix: Text, remainder: &Text? = none -> Bool) ``` Checks if the `Text` starts with a literal prefix text. @@ -4721,6 +4763,7 @@ Argument | Type | Description | Default ---------|------|-------------|--------- text | `Text` | The text to be searched. | - prefix | `Text` | The literal prefix text to check for. | - +remainder | `&Text?` | If non-none, this value will be set to the rest of the text after the prefix. If the prefix is not found, this value will be set to the original text. | `none` **Return:** `yes` if the text has the given prefix, `no` otherwise. @@ -4729,6 +4772,11 @@ prefix | `Text` | The literal prefix text to check for. | - ```tomo >> "hello world".starts_with("hello") = yes +remainder : Text +>> "hello world".starts_with("hello", &remainder) += yes +>> remainder += " world" ``` ## Text.title diff --git a/api/booleans.md b/api/booleans.md index d45be505..ab08909d 100644 --- a/api/booleans.md +++ b/api/booleans.md @@ -6,7 +6,7 @@ ## Bool.parse ```tomo -Bool.parse : func(text: Text -> Bool?) +Bool.parse : func(text: Text, remainder: &Text? = none -> Bool?) ``` Converts a text representation of a boolean value into a boolean. Acceptable boolean values are case-insensitive variations of `yes`/`no`, `y`/`n`, `true`/`false`, `on`/`off`. @@ -14,6 +14,7 @@ Converts a text representation of a boolean value into a boolean. Acceptable boo Argument | Type | Description | Default ---------|------|-------------|--------- text | `Text` | The string containing the boolean value. | - +remainder | `&Text?` | If non-none, this argument will be set to the remainder of the text after the matching part. If none, parsing will only succeed if the entire text matches. | `none` **Return:** `yes` if the string matches a recognized truthy boolean value; otherwise return `no`. @@ -27,4 +28,12 @@ text | `Text` | The string containing the boolean value. | - >> Bool.parse("???") = none : Bool? +>> Bool.parse("yesJUNK") += none : Bool? +remainder : Text +>> Bool.parse("yesJUNK", &remainder) += yes : Bool? +>> remainder += "JUNK" + ``` diff --git a/api/booleans.yaml b/api/booleans.yaml index 0831ac18..1d9d68fd 100644 --- a/api/booleans.yaml +++ b/api/booleans.yaml @@ -13,6 +13,12 @@ Bool.parse: type: 'Text' description: > The string containing the boolean value. + remainder: + type: '&Text?' + default: 'none' + description: > + If non-none, this argument will be set to the remainder of the text after the matching part. + If none, parsing will only succeed if the entire text matches. example: | >> Bool.parse("yes") = yes : Bool? @@ -20,4 +26,11 @@ Bool.parse: = no : Bool? >> Bool.parse("???") = none : Bool? - + + >> Bool.parse("yesJUNK") + = none : Bool? + remainder : Text + >> Bool.parse("yesJUNK", &remainder) + = yes : Bool? + >> remainder + = "JUNK" diff --git a/api/bytes.md b/api/bytes.md index 908d78e2..fe8dddb1 100644 --- a/api/bytes.md +++ b/api/bytes.md @@ -86,7 +86,7 @@ high | `Byte` | The upper bound to check (inclusive). | - ## Byte.parse ```tomo -Byte.parse : func(text: Text -> Byte?) +Byte.parse : func(text: Text, remainder: &Text? = none -> Byte?) ``` Parse a byte literal from text. @@ -94,6 +94,7 @@ Parse a byte literal from text. Argument | Type | Description | Default ---------|------|-------------|--------- text | `Text` | The text to parse. | - +remainder | `&Text?` | If non-none, this argument will be set to the remainder of the text after the matching part. If none, parsing will only succeed if the entire text matches. | `none` **Return:** The byte parsed from the text, if successful, otherwise `none`. @@ -101,9 +102,17 @@ text | `Text` | The text to parse. | - **Example:** ```tomo >> Byte.parse("5") -= Byte(5)? += Byte(5) : Byte? >> Byte.parse("asdf") -= none += none : Byte? + +>> Byte.parse("123xyz") += none : Byte? +remainder : Text +>> Byte.parse("123xyz", &remainder) += Byte(123) : Byte? +>> remainder += "xyz" ``` ## Byte.to diff --git a/api/bytes.yaml b/api/bytes.yaml index 2785513d..f7b8cb5d 100644 --- a/api/bytes.yaml +++ b/api/bytes.yaml @@ -54,7 +54,7 @@ Byte.hex: example: | >> Byte(18).hex() = "0x12" - + Byte.is_between: short: test if inside a range description: > @@ -83,7 +83,7 @@ Byte.is_between: = no >> Byte(7).is_between(1, 7) = yes - + Byte.parse: short: convert text to a byte description: > @@ -97,12 +97,26 @@ Byte.parse: type: 'Text' description: > The text to parse. + remainder: + type: '&Text?' + default: 'none' + description: > + If non-none, this argument will be set to the remainder of the text after the matching part. + If none, parsing will only succeed if the entire text matches. example: | >> Byte.parse("5") - = Byte(5)? + = Byte(5) : Byte? >> Byte.parse("asdf") - = none - + = none : Byte? + + >> Byte.parse("123xyz") + = none : Byte? + remainder : Text + >> Byte.parse("123xyz", &remainder) + = Byte(123) : Byte? + >> remainder + = "xyz" + Byte.to: short: iterate over a range of bytes description: > @@ -132,7 +146,7 @@ Byte.to: = [Byte(2), Byte(3), Byte(4), Byte(5)] >> [x for x in Byte(5).to(2)] = [Byte(5), Byte(4), Byte(3), Byte(2)] - + >> [x for x in Byte(2).to(5, step=2)] = [Byte(2), Byte(4)] - + diff --git a/api/integers.md b/api/integers.md index efb891bf..6468589e 100644 --- a/api/integers.md +++ b/api/integers.md @@ -272,7 +272,7 @@ stop if i == 10 ## Int.parse ```tomo -Int.parse : func(text: Text -> Int?) +Int.parse : func(text: Text, remainder: &Text? = none -> Int?) ``` Converts a text representation of an integer into an integer. @@ -280,6 +280,7 @@ Converts a text representation of an integer into an integer. Argument | Type | Description | Default ---------|------|-------------|--------- text | `Text` | The text containing the integer. | - +remainder | `&Text?` | If non-none, this argument will be set to the remainder of the text after the matching part. If none, parsing will only succeed if the entire text matches. | `none` **Return:** The integer represented by the text. If the given text contains a value outside of the representable range or if the entire text can't be parsed as an integer, `none` will be returned. @@ -291,6 +292,14 @@ text | `Text` | The text containing the integer. | - >> Int.parse("0xFF") = 255 : Int? +>> Int.parse("123xyz") += none +remainder : Text +>> Int.parse("123xyz", &remainder) += 123 : Int? +>> remainder += "xyz" + # Can't parse: >> Int.parse("asdf") = none : Int? diff --git a/api/integers.yaml b/api/integers.yaml index a91a21ce..4d7e423f 100644 --- a/api/integers.yaml +++ b/api/integers.yaml @@ -14,7 +14,7 @@ Int.abs: example: | >> (-10).abs() = 10 - + Int.choose: short: binomial coefficient description: > @@ -38,7 +38,7 @@ Int.choose: example: | >> (4).choose(2) = 6 - + Int.clamped: short: clamp an integer description: > @@ -64,7 +64,7 @@ Int.clamped: example: | >> (2).clamped(5, 10) = 5 - + Int.factorial: short: factorial description: > @@ -147,7 +147,7 @@ Int.hex: example: | >> (255).hex(digits=4, uppercase=yes, prefix=yes) = "0x00FF" - + Int.is_between: short: test if an int is in a range description: > @@ -176,7 +176,7 @@ Int.is_between: = no >> (7).is_between(1, 7) = yes - + Int.is_prime: short: check if an integer is prime description: > @@ -205,7 +205,7 @@ Int.is_prime: = yes >> (6).is_prime() = no - + Int.next_prime: short: get the next prime description: > @@ -227,7 +227,7 @@ Int.next_prime: example: | >> (11).next_prime() = 13 - + Int.octal: short: convert to octal description: > @@ -254,7 +254,7 @@ Int.octal: example: | >> (64).octal(digits=4, prefix=yes) = "0o0100" - + Int.onward: short: iterate from a number onward description: > @@ -281,7 +281,7 @@ Int.onward: stop if i == 10 >> nums[] = [5, 6, 7, 8, 9, 10] - + Int.parse: short: convert text to integer description: > @@ -297,20 +297,34 @@ Int.parse: type: 'Text' description: > The text containing the integer. + remainder: + type: '&Text?' + default: 'none' + description: > + If non-none, this argument will be set to the remainder of the text after the matching part. + If none, parsing will only succeed if the entire text matches. example: | >> Int.parse("123") = 123 : Int? >> Int.parse("0xFF") = 255 : Int? - + + >> Int.parse("123xyz") + = none + remainder : Text + >> Int.parse("123xyz", &remainder) + = 123 : Int? + >> remainder + = "xyz" + # Can't parse: >> Int.parse("asdf") = none : Int? - + # Outside valid range: >> Int8.parse("9999999") = none : Int8? - + Int.prev_prime: short: get the previous prime description: > @@ -334,7 +348,7 @@ Int.prev_prime: example: | >> (11).prev_prime() = 7 - + Int.sqrt: short: square root description: > @@ -353,7 +367,7 @@ Int.sqrt: = 4 >> (17).sqrt() = 4 - + Int.to: short: iterate a range of integers description: > @@ -383,7 +397,7 @@ Int.to: = [2, 3, 4, 5] >> [x for x in (5).to(2)] = [5, 4, 3, 2] - + >> [x for x in (2).to(5, step=2)] = [2, 4] - + diff --git a/api/nums.md b/api/nums.md index 471a7739..ef771171 100644 --- a/api/nums.md +++ b/api/nums.md @@ -885,7 +885,7 @@ y | `Num` | The direction towards which to find the next representable value. | ## Num.parse ```tomo -Num.parse : func(text: Text -> Num?) +Num.parse : func(text: Text, remainder: &Text? = none -> Num?) ``` Converts a text representation of a number into a floating-point number. @@ -893,6 +893,7 @@ Converts a text representation of a number into a floating-point number. Argument | Type | Description | Default ---------|------|-------------|--------- text | `Text` | The text containing the number. | - +remainder | `&Text?` | If non-none, this argument will be set to the remainder of the text after the matching part. If none, parsing will only succeed if the entire text matches. | `none` **Return:** The number represented by the text or `none` if the entire text can't be parsed as a number. @@ -900,9 +901,17 @@ text | `Text` | The text containing the number. | - **Example:** ```tomo >> Num.parse("3.14") -= 3.14 += 3.14 : Num? >> Num.parse("1e3") -= 1000 += 1000 : Num? + +>> Num.parse("1.5junk") += none : Num? +remainder : Text +>> Num.parse("1.5junk", &remainder) += 1.5 : Num? +>> remainder += "junk" ``` ## Num.percent diff --git a/api/nums.yaml b/api/nums.yaml index e666a0d6..2c18fac5 100644 --- a/api/nums.yaml +++ b/api/nums.yaml @@ -686,11 +686,25 @@ Num.parse: type: 'Text' description: > The text containing the number. + remainder: + type: '&Text?' + default: 'none' + description: > + If non-none, this argument will be set to the remainder of the text after the matching part. + If none, parsing will only succeed if the entire text matches. example: | >> Num.parse("3.14") - = 3.14 + = 3.14 : Num? >> Num.parse("1e3") - = 1000 + = 1000 : Num? + + >> Num.parse("1.5junk") + = none : Num? + remainder : Text + >> Num.parse("1.5junk", &remainder) + = 1.5 : Num? + >> remainder + = "junk" Num.percent: short: format as a percentage diff --git a/api/text.md b/api/text.md index e500e7bf..bdff6841 100644 --- a/api/text.md +++ b/api/text.md @@ -202,7 +202,7 @@ text | `Text` | The text from which to extract codepoint names. | - ## Text.ends_with ```tomo -Text.ends_with : func(text: Text, suffix: Text -> Bool) +Text.ends_with : func(text: Text, suffix: Text, remainder: &Text? = none -> Bool) ``` Checks if the `Text` ends with a literal suffix text. @@ -211,6 +211,7 @@ Argument | Type | Description | Default ---------|------|-------------|--------- text | `Text` | The text to be searched. | - suffix | `Text` | The literal suffix text to check for. | - +remainder | `&Text?` | If non-none, this value will be set to the rest of the text up to the trailing suffix. If the suffix is not found, this value will be set to the original text. | `none` **Return:** `yes` if the text has the target, `no` otherwise. @@ -219,6 +220,11 @@ suffix | `Text` | The literal suffix text to check for. | - ```tomo >> "hello world".ends_with("world") = yes +remainder : Text +>> "hello world".ends_with("world", &remainder) += yes +>> remainder += "hello " ``` ## Text.from @@ -696,7 +702,7 @@ delimiters | `Text` | A text containing delimiters to use for splitting the text ## Text.starts_with ```tomo -Text.starts_with : func(text: Text, prefix: Text -> Bool) +Text.starts_with : func(text: Text, prefix: Text, remainder: &Text? = none -> Bool) ``` Checks if the `Text` starts with a literal prefix text. @@ -705,6 +711,7 @@ Argument | Type | Description | Default ---------|------|-------------|--------- text | `Text` | The text to be searched. | - prefix | `Text` | The literal prefix text to check for. | - +remainder | `&Text?` | If non-none, this value will be set to the rest of the text after the prefix. If the prefix is not found, this value will be set to the original text. | `none` **Return:** `yes` if the text has the given prefix, `no` otherwise. @@ -713,6 +720,11 @@ prefix | `Text` | The literal prefix text to check for. | - ```tomo >> "hello world".starts_with("hello") = yes +remainder : Text +>> "hello world".starts_with("hello", &remainder) += yes +>> remainder += " world" ``` ## Text.title diff --git a/api/text.yaml b/api/text.yaml index eb9dc286..c8d70f0b 100644 --- a/api/text.yaml +++ b/api/text.yaml @@ -14,7 +14,7 @@ Text.as_c_string: example: | >> "Hello".as_c_string() = CString("Hello") - + Text.at: short: get a letter description: > @@ -39,7 +39,7 @@ Text.at: example: | >> "Amélie".at(3) = "é" - + Text.by_line: short: iterate by line description: > @@ -66,7 +66,7 @@ Text.by_line: for line in text.by_line() # Prints: "line one" then "line two": say(line) - + Text.by_split: short: iterate by a spliting text description: > @@ -97,7 +97,7 @@ Text.by_split: for chunk in text.by_split(",") # Prints: "one" then "two" then "three": say(chunk) - + Text.by_split_any: short: iterate by one of many splitting characters description: > @@ -128,7 +128,7 @@ Text.by_split_any: for chunk in text.by_split_any(",;") # Prints: "one" then "two" then "three": say(chunk) - + Text.bytes: short: get UTF8 bytes description: > @@ -146,7 +146,7 @@ Text.bytes: example: | >> "Amélie".bytes() = [65, 109, 195, 169, 108, 105, 101] - + Text.caseless_equals: short: case-insensitive comparison description: > @@ -173,11 +173,11 @@ Text.caseless_equals: example: | >> "A".caseless_equals("a") = yes - + # Turkish lowercase "I" is "ı" (dotless I), not "i" >> "I".caseless_equals("i", language="tr_TR") = no - + Text.codepoint_names: short: get unicode codepoint names description: > @@ -194,7 +194,7 @@ Text.codepoint_names: example: | >> "Amélie".codepoint_names() = ["LATIN CAPITAL LETTER A", "LATIN SMALL LETTER M", "LATIN SMALL LETTER E WITH ACUTE", "LATIN SMALL LETTER L", "LATIN SMALL LETTER I", "LATIN SMALL LETTER E"] - + Text.ends_with: short: check suffix description: > @@ -212,10 +212,21 @@ Text.ends_with: type: 'Text' description: > The literal suffix text to check for. + remainder: + type: '&Text?' + default: 'none' + description: > + If non-none, this value will be set to the rest of the text up to the trailing suffix. + If the suffix is not found, this value will be set to the original text. example: | >> "hello world".ends_with("world") = yes - + remainder : Text + >> "hello world".ends_with("world", &remainder) + = yes + >> remainder + = "hello " + Text.from: short: slice from a starting index description: > @@ -240,10 +251,10 @@ Text.from: example: | >> "hello".from(2) = "ello" - + >> "hello".from(-2) = "lo" - + Text.from_bytes: short: convert UTF8 byte list to text description: > @@ -263,7 +274,7 @@ Text.from_bytes: example: | >> Text.from_bytes([195, 133, 107, 101]) = "Åke" - + Text.from_c_string: short: convert C-style string to text description: > @@ -280,7 +291,7 @@ Text.from_c_string: example: | >> Text.from_c_string(CString("Hello")) = "Hello" - + Text.from_codepoint_names: short: convert list of unicode codepoint names to text description: > @@ -306,7 +317,7 @@ Text.from_codepoint_names: "LATIN SMALL LETTER E", ] = "Åke" - + Text.from_codepoints: short: convert UTF32 codepoints to text description: > @@ -326,7 +337,7 @@ Text.from_codepoints: example: | >> Text.from_codepoints([197, 107, 101]) = "Åke" - + Text.has: short: check for substring description: > @@ -349,7 +360,7 @@ Text.has: = yes >> "hello world".has("xxx") = no - + Text.join: short: concatenate with separator description: > @@ -370,7 +381,7 @@ Text.join: example: | >> ", ".join(["one", "two", "three"]) = "one, two, three" - + Text.middle_pad: short: pad text, centered description: > @@ -405,7 +416,7 @@ Text.middle_pad: = " x " >> "x".middle_pad(10, "ABC") = "ABCAxABCAB" - + Text.left_pad: short: left-pad text description: > @@ -440,7 +451,7 @@ Text.left_pad: = " x" >> "x".left_pad(5, "ABC") = "ABCAx" - + Text.lines: short: get list of lines description: > @@ -466,7 +477,7 @@ Text.lines: = ["one", "two", "three"] >> "".lines() = [] - + Text.lower: short: convert to lowercase description: > @@ -488,10 +499,10 @@ Text.lower: example: | >> "AMÉLIE".lower() = "amélie" - + >> "I".lower(language="tr_TR") >> "ı" - + Text.quoted: short: add quotation marks and escapes description: > @@ -518,7 +529,7 @@ Text.quoted: example: | >> "one\ntwo".quoted() = "\"one\\ntwo\"" - + Text.repeat: short: repeat text description: > @@ -539,7 +550,7 @@ Text.repeat: example: | >> "Abc".repeat(3) = "AbcAbcAbc" - + Text.replace: short: replace a substring description: > @@ -564,7 +575,7 @@ Text.replace: example: | >> "Hello world".replace("world", "there") = "Hello there" - + Text.reversed: short: get a reversed copy description: > @@ -581,7 +592,7 @@ Text.reversed: example: | >> "Abc".reversed() = "cbA" - + Text.right_pad: short: right-pad text description: > @@ -616,7 +627,7 @@ Text.right_pad: = "x " >> "x".right_pad(5, "ABC") = "xABCA" - + Text.slice: short: get a slice of a text description: > @@ -647,13 +658,13 @@ Text.slice: example: | >> "hello".slice(2, 3) = "el" - + >> "hello".slice(to=-2) = "hell" - + >> "hello".slice(from=2) = "ello" - + Text.split: short: split a text by a delimiter description: > @@ -681,10 +692,10 @@ Text.split: example: | >> "one,two,,three".split(",") = ["one", "two", "", "three"] - + >> "abc".split() = ["a", "b", "c"] - + Text.split_any: short: split a text by multiple delimiters description: > @@ -712,7 +723,7 @@ Text.split_any: example: | >> "one, two,,three".split_any(", ") = ["one", "two", "three"] - + Text.starts_with: short: check prefix description: > @@ -730,10 +741,21 @@ Text.starts_with: type: 'Text' description: > The literal prefix text to check for. + remainder: + type: '&Text?' + default: 'none' + description: > + If non-none, this value will be set to the rest of the text after the prefix. + If the prefix is not found, this value will be set to the original text. example: | >> "hello world".starts_with("hello") = yes - + remainder : Text + >> "hello world".starts_with("hello", &remainder) + = yes + >> remainder + = " world" + Text.title: short: titlecase description: > @@ -755,11 +777,11 @@ Text.title: example: | >> "amélie".title() = "Amélie" - + # In Turkish, uppercase "i" is "İ" >> "i".title(language="tr_TR") = "İ" - + Text.to: short: slice to an end index description: > @@ -784,10 +806,10 @@ Text.to: example: | >> "goodbye".to(3) = "goo" - + >> "goodbye".to(-2) = "goodby" - + Text.translate: short: perform multiple replacements description: > @@ -819,7 +841,7 @@ Text.translate: "'" = "'", }) = "A <tag> & an ampersand" - + Text.trim: short: trim characters description: > @@ -851,13 +873,13 @@ Text.trim: example: | >> " x y z \n".trim() = "x y z" - + >> "one,".trim(",") = "one" - + >> " xyz ".trim(right=no) = "xyz " - + Text.upper: short: uppercase description: > @@ -879,11 +901,11 @@ Text.upper: example: | >> "amélie".upper() = "AMÉLIE" - + # In Turkish, uppercase "i" is "İ" >> "i".upper(language="tr_TR") = "İ" - + Text.utf32_codepoints: short: get UTF32 codepoints description: > @@ -900,7 +922,7 @@ Text.utf32_codepoints: example: | >> "Amélie".utf32_codepoints() = [65, 109, 233, 108, 105, 101] - + Text.width: short: get display width description: > @@ -924,7 +946,7 @@ Text.width: = 6 >> "🤠".width() = 2 - + Text.without_prefix: short: remove prefix description: > @@ -948,7 +970,7 @@ Text.without_prefix: = "baz" >> "qux".without_prefix("foo:") = "qux" - + Text.without_suffix: short: remove suffix description: > @@ -972,4 +994,4 @@ Text.without_suffix: = "baz" >> "qux".without_suffix(".foo") = "qux" - + diff --git a/lib/patterns/CHANGES.md b/lib/patterns/CHANGES.md index 42ae752c..cf6254cb 100644 --- a/lib/patterns/CHANGES.md +++ b/lib/patterns/CHANGES.md @@ -1,5 +1,8 @@ # Version History +## v1.1 +- Added `Text.matching_pattern(text:Text, pattern:Pattern, pos:Int = 1 -> PatternMatch?)` + ## v1.0 Initial version diff --git a/lib/patterns/patterns.c b/lib/patterns/patterns.c index 74d542b8..224a00a0 100644 --- a/lib/patterns/patterns.c +++ b/lib/patterns/patterns.c @@ -663,6 +663,8 @@ static pat_t parse_next_pat(TextIter_t *state, int64_t *index) case 'w': if (strcasecmp(prop_name, "word") == 0) { return PAT(PAT_FUNCTION, .fn=match_id); + } else if (strcasecmp(prop_name, "ws") == 0 || strcasecmp(prop_name, "whitespace") == 0) { + return PAT(PAT_PROPERTY, .property=UC_PROPERTY_WHITE_SPACE); } break; default: break; @@ -837,10 +839,10 @@ static OptionalPatternMatch find(Text_t text, Text_t pattern, Int_t from_index) PUREFUNC static bool Pattern$has(Text_t text, Text_t pattern) { - if (Text$starts_with(pattern, Text("{start}"))) { + if (Text$starts_with(pattern, Text("{start}"), &pattern)) { int64_t m = match(text, 0, pattern, 0, NULL, 0); return m >= 0; - } else if (Text$ends_with(text, Text("{end}"))) { + } else if (Text$ends_with(text, Text("{end}"), NULL)) { for (int64_t i = text.length-1; i >= 0; i--) { int64_t match_len = match(text, i, pattern, 0, NULL, 0); if (match_len >= 0 && i + match_len == text.length) @@ -860,6 +862,25 @@ static bool Pattern$matches(Text_t text, Text_t pattern) return (match_len == text.length); } +static bool Pattern$match_at(Text_t text, Text_t pattern, Int_t pos, PatternMatch *dest) +{ + int64_t start = Int64$from_int(pos, false) - 1; + capture_t captures[MAX_BACKREFS] = {}; + int64_t match_len = match(text, start, pattern, 0, captures, 0); + if (match_len < 0) + return false; + + List_t capture_list = {}; + for (int i = 0; captures[i].occupied; i++) { + Text_t capture = Text$slice(text, I(captures[i].index+1), I(captures[i].index+captures[i].length)); + List$insert(&capture_list, &capture, I(0), sizeof(Text_t)); + } + dest->text = Text$slice(text, I(start+1), I(start+match_len)); + dest->index = I(start+1); + dest->captures = capture_list; + return true; +} + static OptionalList_t Pattern$captures(Text_t text, Text_t pattern) { capture_t captures[MAX_BACKREFS] = {}; diff --git a/lib/patterns/patterns.tm b/lib/patterns/patterns.tm index bab0c3dc..c5444b86 100644 --- a/lib/patterns/patterns.tm +++ b/lib/patterns/patterns.tm @@ -10,6 +10,12 @@ lang Pat return Pat.from_text("$n") extend Text + func matching_pattern(text:Text, pattern:Pat, pos:Int = 1 -> PatternMatch?) + result : PatternMatch + if C_code:Bool(Pattern$match_at(@text, @pattern, @pos, (void*)&@result)) + return result + return none + func matches_pattern(text:Text, pattern:Pat -> Bool) return C_code:Bool(Pattern$matches(@text, @pattern)) @@ -45,3 +51,16 @@ extend Text func trim_pattern(text:Text, pattern=$Pat"{space}", left=yes, right=yes -> Text) return C_code:Text(Pattern$trim(@text, @pattern, @left, @right)) + +func main() + >> "Hello world".matching_pattern($Pat'{id}') + >> "...Hello world".matching_pattern($Pat'{id}') +# func main(pattern:Pat, input=(/dev/stdin)) +# for line in input.by_line()! +# skip if not line.has_pattern(pattern) +# pos := 1 +# for match in line.by_pattern(pattern) +# say(line.slice(pos, match.index-1), newline=no) +# say("\033[34;1m$(match.text)\033[m", newline=no) +# pos = match.index + match.text.length +# say(line.from(pos), newline=yes) diff --git a/man/man3/tomo-Bool.parse.3 b/man/man3/tomo-Bool.parse.3 index 81830fb3..e1d5f3b7 100644 --- a/man/man3/tomo-Bool.parse.3 +++ b/man/man3/tomo-Bool.parse.3 @@ -2,14 +2,14 @@ .\" Copyright (c) 2025 Bruce Hill .\" All rights reserved. .\" -.TH Bool.parse 3 2025-04-30 "Tomo man-pages" +.TH Bool.parse 3 2025-08-16 "Tomo man-pages" .SH NAME Bool.parse \- parse into boolean .SH LIBRARY Tomo Standard Library .SH SYNOPSIS .nf -.BI Bool.parse\ :\ func(text:\ Text\ ->\ Bool?) +.BI Bool.parse\ :\ func(text:\ Text,\ remainder:\ &Text?\ =\ none\ ->\ Bool?) .fi .SH DESCRIPTION Converts a text representation of a boolean value into a boolean. Acceptable boolean values are case-insensitive variations of `yes`/`no`, `y`/`n`, `true`/`false`, `on`/`off`. @@ -23,6 +23,7 @@ lb lb lbx lb l l l l. Name Type Description Default text Text The string containing the boolean value. - +remainder &Text? If non-none, this argument will be set to the remainder of the text after the matching part. If none, parsing will only succeed if the entire text matches. none .TE .SH RETURN `yes` if the string matches a recognized truthy boolean value; otherwise return `no`. @@ -35,4 +36,12 @@ text Text The string containing the boolean value. - = no : Bool? >> Bool.parse("???") = none : Bool? + +>> Bool.parse("yesJUNK") += none : Bool? +remainder : Text +>> Bool.parse("yesJUNK", &remainder) += yes : Bool? +>> remainder += "JUNK" .EE diff --git a/man/man3/tomo-Byte.parse.3 b/man/man3/tomo-Byte.parse.3 index 6e4d3bb6..1beeb3a4 100644 --- a/man/man3/tomo-Byte.parse.3 +++ b/man/man3/tomo-Byte.parse.3 @@ -2,14 +2,14 @@ .\" Copyright (c) 2025 Bruce Hill .\" All rights reserved. .\" -.TH Byte.parse 3 2025-04-30 "Tomo man-pages" +.TH Byte.parse 3 2025-08-16 "Tomo man-pages" .SH NAME Byte.parse \- convert text to a byte .SH LIBRARY Tomo Standard Library .SH SYNOPSIS .nf -.BI Byte.parse\ :\ func(text:\ Text\ ->\ Byte?) +.BI Byte.parse\ :\ func(text:\ Text,\ remainder:\ &Text?\ =\ none\ ->\ Byte?) .fi .SH DESCRIPTION Parse a byte literal from text. @@ -23,6 +23,7 @@ lb lb lbx lb l l l l. Name Type Description Default text Text The text to parse. - +remainder &Text? If non-none, this argument will be set to the remainder of the text after the matching part. If none, parsing will only succeed if the entire text matches. none .TE .SH RETURN The byte parsed from the text, if successful, otherwise `none`. @@ -30,7 +31,15 @@ The byte parsed from the text, if successful, otherwise `none`. .SH EXAMPLES .EX >> Byte.parse("5") -= Byte(5)? += Byte(5) : Byte? >> Byte.parse("asdf") -= none += none : Byte? + +>> Byte.parse("123xyz") += none : Byte? +remainder : Text +>> Byte.parse("123xyz", &remainder) += Byte(123) : Byte? +>> remainder += "xyz" .EE diff --git a/man/man3/tomo-Int.parse.3 b/man/man3/tomo-Int.parse.3 index 8facec2a..07a00e85 100644 --- a/man/man3/tomo-Int.parse.3 +++ b/man/man3/tomo-Int.parse.3 @@ -2,14 +2,14 @@ .\" Copyright (c) 2025 Bruce Hill .\" All rights reserved. .\" -.TH Int.parse 3 2025-04-30 "Tomo man-pages" +.TH Int.parse 3 2025-08-16 "Tomo man-pages" .SH NAME Int.parse \- convert text to integer .SH LIBRARY Tomo Standard Library .SH SYNOPSIS .nf -.BI Int.parse\ :\ func(text:\ Text\ ->\ Int?) +.BI Int.parse\ :\ func(text:\ Text,\ remainder:\ &Text?\ =\ none\ ->\ Int?) .fi .SH DESCRIPTION Converts a text representation of an integer into an integer. @@ -23,6 +23,7 @@ lb lb lbx lb l l l l. Name Type Description Default text Text The text containing the integer. - +remainder &Text? If non-none, this argument will be set to the remainder of the text after the matching part. If none, parsing will only succeed if the entire text matches. none .TE .SH RETURN The integer represented by the text. If the given text contains a value outside of the representable range or if the entire text can't be parsed as an integer, `none` will be returned. @@ -34,6 +35,14 @@ The integer represented by the text. If the given text contains a value outside >> Int.parse("0xFF") = 255 : Int? +>> Int.parse("123xyz") += none +remainder : Text +>> Int.parse("123xyz", &remainder) += 123 : Int? +>> remainder += "xyz" + # Can't parse: >> Int.parse("asdf") = none : Int? diff --git a/man/man3/tomo-Num.parse.3 b/man/man3/tomo-Num.parse.3 index 48ab90c8..63165e59 100644 --- a/man/man3/tomo-Num.parse.3 +++ b/man/man3/tomo-Num.parse.3 @@ -2,14 +2,14 @@ .\" Copyright (c) 2025 Bruce Hill .\" All rights reserved. .\" -.TH Num.parse 3 2025-04-30 "Tomo man-pages" +.TH Num.parse 3 2025-08-16 "Tomo man-pages" .SH NAME Num.parse \- convert text to number .SH LIBRARY Tomo Standard Library .SH SYNOPSIS .nf -.BI Num.parse\ :\ func(text:\ Text\ ->\ Num?) +.BI Num.parse\ :\ func(text:\ Text,\ remainder:\ &Text?\ =\ none\ ->\ Num?) .fi .SH DESCRIPTION Converts a text representation of a number into a floating-point number. @@ -23,6 +23,7 @@ lb lb lbx lb l l l l. Name Type Description Default text Text The text containing the number. - +remainder &Text? If non-none, this argument will be set to the remainder of the text after the matching part. If none, parsing will only succeed if the entire text matches. none .TE .SH RETURN The number represented by the text or `none` if the entire text can't be parsed as a number. @@ -30,7 +31,15 @@ The number represented by the text or `none` if the entire text can't be parsed .SH EXAMPLES .EX >> Num.parse("3.14") -= 3.14 += 3.14 : Num? >> Num.parse("1e3") -= 1000 += 1000 : Num? + +>> Num.parse("1.5junk") += none : Num? +remainder : Text +>> Num.parse("1.5junk", &remainder) += 1.5 : Num? +>> remainder += "junk" .EE diff --git a/man/man3/tomo-Text.ends_with.3 b/man/man3/tomo-Text.ends_with.3 index 7d19109b..38fa4c0b 100644 --- a/man/man3/tomo-Text.ends_with.3 +++ b/man/man3/tomo-Text.ends_with.3 @@ -2,14 +2,14 @@ .\" Copyright (c) 2025 Bruce Hill .\" All rights reserved. .\" -.TH Text.ends_with 3 2025-04-30 "Tomo man-pages" +.TH Text.ends_with 3 2025-08-16 "Tomo man-pages" .SH NAME Text.ends_with \- check suffix .SH LIBRARY Tomo Standard Library .SH SYNOPSIS .nf -.BI Text.ends_with\ :\ func(text:\ Text,\ suffix:\ Text\ ->\ Bool) +.BI Text.ends_with\ :\ func(text:\ Text,\ suffix:\ Text,\ remainder:\ &Text?\ =\ none\ ->\ Bool) .fi .SH DESCRIPTION Checks if the `Text` ends with a literal suffix text. @@ -24,6 +24,7 @@ l l l l. Name Type Description Default text Text The text to be searched. - suffix Text The literal suffix text to check for. - +remainder &Text? If non-none, this value will be set to the rest of the text up to the trailing suffix. If the suffix is not found, this value will be set to the original text. none .TE .SH RETURN `yes` if the text has the target, `no` otherwise. @@ -32,4 +33,9 @@ suffix Text The literal suffix text to check for. - .EX >> "hello world".ends_with("world") = yes +remainder : Text +>> "hello world".ends_with("world", &remainder) += yes +>> remainder += "hello " .EE diff --git a/man/man3/tomo-Text.starts_with.3 b/man/man3/tomo-Text.starts_with.3 index 0894ec74..fafa2a55 100644 --- a/man/man3/tomo-Text.starts_with.3 +++ b/man/man3/tomo-Text.starts_with.3 @@ -2,14 +2,14 @@ .\" Copyright (c) 2025 Bruce Hill .\" All rights reserved. .\" -.TH Text.starts_with 3 2025-04-30 "Tomo man-pages" +.TH Text.starts_with 3 2025-08-16 "Tomo man-pages" .SH NAME Text.starts_with \- check prefix .SH LIBRARY Tomo Standard Library .SH SYNOPSIS .nf -.BI Text.starts_with\ :\ func(text:\ Text,\ prefix:\ Text\ ->\ Bool) +.BI Text.starts_with\ :\ func(text:\ Text,\ prefix:\ Text,\ remainder:\ &Text?\ =\ none\ ->\ Bool) .fi .SH DESCRIPTION Checks if the `Text` starts with a literal prefix text. @@ -24,6 +24,7 @@ l l l l. Name Type Description Default text Text The text to be searched. - prefix Text The literal prefix text to check for. - +remainder &Text? If non-none, this value will be set to the rest of the text after the prefix. If the prefix is not found, this value will be set to the original text. none .TE .SH RETURN `yes` if the text has the given prefix, `no` otherwise. @@ -32,4 +33,9 @@ prefix Text The literal prefix text to check for. - .EX >> "hello world".starts_with("hello") = yes +remainder : Text +>> "hello world".starts_with("hello", &remainder) += yes +>> remainder += " world" .EE diff --git a/src/compile.c b/src/compile.c index 1795bb38..eb250d74 100644 --- a/src/compile.c +++ b/src/compile.c @@ -1513,7 +1513,7 @@ static Text_t _compile_statement(env_t *env, ast_t *ast) struct { const char *name; binding_t *b; } *entry = closed_vars.entries.data + closed_vars.entries.stride*i; if (entry->b->type->tag == ModuleType) continue; - if (Text$starts_with(entry->b->code, Text("userdata->"))) { + if (Text$starts_with(entry->b->code, Text("userdata->"), NULL)) { Table$str_set(defer_env->locals, entry->name, entry->b); } else { Text_t defer_name = Texts("defer$", String(++defer_id), "$", entry->name); @@ -4254,7 +4254,7 @@ Text_t compile_function(env_t *env, Text_t name_code, ast_t *ast, Text_t *static definition = Texts(definition, wrapper); } else if (cache && cache->tag == Int) { assert(args); - OptionalInt64_t cache_size = Int64$parse(Text$from_str(Match(cache, Int)->str)); + OptionalInt64_t cache_size = Int64$parse(Text$from_str(Match(cache, Int)->str), NULL); Text_t pop_code = EMPTY_TEXT; if (cache->tag == Int && !cache_size.is_none && cache_size.value > 0) { // FIXME: this currently just deletes the first entry, but this should be more like a diff --git a/src/environment.c b/src/environment.c index 93b5a16c..3faad24d 100644 --- a/src/environment.c +++ b/src/environment.c @@ -85,14 +85,15 @@ env_t *global_env(bool source_mapping) {"Abort", Type(AbortType), Text("void"), Text("Abort$info"), {}}, {"Memory", Type(MemoryType), Text("Memory_t"), Text("Memory$info"), {}}, {"Bool", Type(BoolType), Text("Bool_t"), Text("Bool$info"), TypedList(ns_entry_t, - {"parse", "Bool$parse", "func(text:Text -> Bool?)"}, + {"parse", "Bool$parse", "func(text:Text, remainder:&Text? = none -> Bool?)"}, )}, {"Byte", Type(ByteType), Text("Byte_t"), Text("Byte$info"), TypedList(ns_entry_t, - {"max", "Byte$max", "Byte"}, {"get_bit", "Byte$get_bit", "func(x:Byte, bit_index:Int -> Bool)"}, {"hex", "Byte$hex", "func(byte:Byte, uppercase=yes, prefix=no -> Text)"}, {"is_between", "Byte$is_between", "func(x:Byte, low:Byte, high:Byte -> Bool)"}, + {"max", "Byte$max", "Byte"}, {"min", "Byte$min", "Byte"}, + {"parse", "Byte$parse", "func(text:Text, remainder:&Text? = none -> Byte?)"}, {"to", "Byte$to", "func(first:Byte, last:Byte, step:Int8?=none -> func(->Byte?))"}, )}, {"Int", Type(BigIntType), Text("Int_t"), Text("Int$info"), TypedList(ns_entry_t, @@ -118,7 +119,7 @@ env_t *global_env(bool source_mapping) {"next_prime", "Int$next_prime", "func(x:Int -> Int)"}, {"octal", "Int$octal", "func(i:Int, digits=0, prefix=yes -> Text)"}, {"onward", "Int$onward", "func(first:Int,step=1 -> func(->Int?))"}, - {"parse", "Int$parse", "func(text:Text -> Int?)"}, + {"parse", "Int$parse", "func(text:Text, remainder:&Text? = none -> Int?)"}, {"plus", "Int$plus", "func(x,y:Int -> Int)"}, {"power", "Int$power", "func(base:Int,exponent:Int -> Int)"}, #if __GNU_MP_VERSION >= 6 @@ -137,7 +138,7 @@ env_t *global_env(bool source_mapping) {"clamped", "Int64$clamped", "func(x,low,high:Int64 -> Int64)"}, {"divided_by", "Int64$divided_by", "func(x,y:Int64 -> Int64)"}, {"gcd", "Int64$gcd", "func(x,y:Int64 -> Int64)"}, - {"parse", "Int64$parse", "func(text:Text -> Int64?)"}, + {"parse", "Int64$parse", "func(text:Text, remainder:&Text? = none -> Int64?)"}, {"get_bit", "Int64$get_bit", "func(x:Int64, bit_index:Int -> Bool)"}, {"hex", "Int64$hex", "func(i:Int64, digits=0, uppercase=yes, prefix=yes -> Text)"}, {"is_between", "Int64$is_between", "func(x:Int64,low:Int64,high:Int64 -> Bool)"}, @@ -159,7 +160,7 @@ env_t *global_env(bool source_mapping) {"clamped", "Int32$clamped", "func(x,low,high:Int32 -> Int32)"}, {"divided_by", "Int32$divided_by", "func(x,y:Int32 -> Int32)"}, {"gcd", "Int32$gcd", "func(x,y:Int32 -> Int32)"}, - {"parse", "Int32$parse", "func(text:Text -> Int32?)"}, + {"parse", "Int32$parse", "func(text:Text, remainder:&Text? = none -> Int32?)"}, {"get_bit", "Int32$get_bit", "func(x:Int32, bit_index:Int -> Bool)"}, {"hex", "Int32$hex", "func(i:Int32, digits=0, uppercase=yes, prefix=yes -> Text)"}, {"is_between", "Int32$is_between", "func(x:Int32,low:Int32,high:Int32 -> Bool)"}, @@ -181,7 +182,7 @@ env_t *global_env(bool source_mapping) {"clamped", "Int16$clamped", "func(x,low,high:Int16 -> Int16)"}, {"divided_by", "Int16$divided_by", "func(x,y:Int16 -> Int16)"}, {"gcd", "Int16$gcd", "func(x,y:Int16 -> Int16)"}, - {"parse", "Int16$parse", "func(text:Text -> Int16?)"}, + {"parse", "Int16$parse", "func(text:Text, remainder:&Text? = none -> Int16?)"}, {"get_bit", "Int16$get_bit", "func(x:Int16, bit_index:Int -> Bool)"}, {"hex", "Int16$hex", "func(i:Int16, digits=0, uppercase=yes, prefix=yes -> Text)"}, {"is_between", "Int16$is_between", "func(x:Int16,low:Int16,high:Int16 -> Bool)"}, @@ -203,7 +204,7 @@ env_t *global_env(bool source_mapping) {"clamped", "Int8$clamped", "func(x,low,high:Int8 -> Int8)"}, {"divided_by", "Int8$divided_by", "func(x,y:Int8 -> Int8)"}, {"gcd", "Int8$gcd", "func(x,y:Int8 -> Int8)"}, - {"parse", "Int8$parse", "func(text:Text -> Int8?)"}, + {"parse", "Int8$parse", "func(text:Text, remainder:&Text? = none -> Int8?)"}, {"get_bit", "Int8$get_bit", "func(x:Int8, bit_index:Int -> Bool)"}, {"hex", "Int8$hex", "func(i:Int8, digits=0, uppercase=yes, prefix=yes -> Text)"}, {"is_between", "Int8$is_between", "func(x:Int8,low:Int8,high:Int8 -> Bool)"}, @@ -238,7 +239,7 @@ env_t *global_env(bool source_mapping) {"INF", "(Num_t)(INFINITY)", "Num"}, {"TAU", "(Num_t)(2.*M_PI)", "Num"}, {"mix", "Num$mix", "func(amount,x,y:Num -> Num)"}, - {"parse", "Num$parse", "func(text:Text -> Num?)"}, + {"parse", "Num$parse", "func(text:Text, remainder:&Text? = none -> Num?)"}, {"abs", "fabs", "func(n:Num -> Num)"}, F_opt(acos), F_opt(acosh), F_opt(asin), F(asinh), F(atan), F_opt(atanh), F(cbrt), F(ceil), F_opt(cos), F(cosh), F(erf), F(erfc), @@ -268,7 +269,7 @@ env_t *global_env(bool source_mapping) {"INF", "(Num32_t)(INFINITY)", "Num32"}, {"TAU", "(Num32_t)(2.f*M_PI)", "Num32"}, {"mix", "Num32$mix", "func(amount,x,y:Num32 -> Num32)"}, - {"parse", "Num32$parse", "func(text:Text -> Num32?)"}, + {"parse", "Num32$parse", "func(text:Text, remainder:&Text? = none -> Num32?)"}, {"abs", "fabsf", "func(n:Num32 -> Num32)"}, {"modulo", "Num32$mod", "func(x,y:Num32 -> Num32)"}, {"modulo1", "Num32$mod1", "func(x,y:Num32 -> Num32)"}, @@ -345,7 +346,7 @@ env_t *global_env(bool source_mapping) {"bytes", "Text$utf8_bytes", "func(text:Text -> [Byte])"}, {"caseless_equals", "Text$equal_ignoring_case", "func(a,b:Text, language='C' -> Bool)"}, {"codepoint_names", "Text$codepoint_names", "func(text:Text -> [Text])"}, - {"ends_with", "Text$ends_with", "func(text,suffix:Text -> Bool)"}, + {"ends_with", "Text$ends_with", "func(text,suffix:Text, remainder:&Text? = none -> Bool)"}, {"from", "Text$from", "func(text:Text, first:Int -> Text)"}, {"from_bytes", "Text$from_bytes", "func(bytes:[Byte] -> Text?)"}, {"from_c_string", "Text$from_str", "func(str:CString -> Text?)"}, @@ -368,7 +369,7 @@ env_t *global_env(bool source_mapping) {"slice", "Text$slice", "func(text:Text, from=1, to=-1 -> Text)"}, {"split", "Text$split", "func(text:Text, delimiter='' -> [Text])"}, {"split_any", "Text$split_any", "func(text:Text, delimiters=' \\t\\r\\n' -> [Text])"}, - {"starts_with", "Text$starts_with", "func(text,prefix:Text -> Bool)"}, + {"starts_with", "Text$starts_with", "func(text,prefix:Text, remainder:&Text? = none -> Bool)"}, {"title", "Text$title", "func(text:Text, language='C' -> Text)"}, {"to", "Text$to", "func(text:Text, last:Int -> Text)"}, {"translate", "Text$translate", "func(text:Text, translations:{Text=Text} -> Text)"}, diff --git a/src/stdlib/bools.c b/src/stdlib/bools.c index 66b7e209..85de0621 100644 --- a/src/stdlib/bools.c +++ b/src/stdlib/bools.c @@ -22,24 +22,37 @@ PUREFUNC public Text_t Bool$as_text(const void *b, bool colorize, const TypeInfo return *(Bool_t*)b ? Text("yes") : Text("no"); } -PUREFUNC public OptionalBool_t Bool$parse(Text_t text) +static bool try_parse(Text_t text, Text_t target, bool target_value, Text_t *remainder, bool *result) { - Text_t lang = Text("C"); - if (Text$equal_ignoring_case(text, Text("yes"), lang) - || Text$equal_ignoring_case(text, Text("on"), lang) - || Text$equal_ignoring_case(text, Text("true"), lang) - || Text$equal_ignoring_case(text, Text("1"), lang)) { - return yes; - } else if (Text$equal_ignoring_case(text, Text("no"), lang) - || Text$equal_ignoring_case(text, Text("off"), lang) - || Text$equal_ignoring_case(text, Text("false"), lang) - || Text$equal_ignoring_case(text, Text("0"), lang)) { - return no; + static const Text_t lang = Text("C"); + if (text.length < target.length) return false; + Text_t prefix = Text$to(text, Int$from_int64(target.length)); + if (Text$equal_ignoring_case(prefix, target, lang)) { + if (remainder) *remainder = Text$from(text, Int$from_int64(target.length + 1)); + else if (text.length > target.length) return false; + *result = target_value; + return true; } else { - return NONE_BOOL; + return false; } } +PUREFUNC public OptionalBool_t Bool$parse(Text_t text, Text_t *remainder) +{ + bool result; + if (try_parse(text, Text("yes"), true, remainder, &result) + || try_parse(text, Text("true"), true, remainder, &result) + || try_parse(text, Text("on"), true, remainder, &result) + || try_parse(text, Text("1"), true, remainder, &result) + || try_parse(text, Text("no"), false, remainder, &result) + || try_parse(text, Text("false"), false, remainder, &result) + || try_parse(text, Text("off"), false, remainder, &result) + || try_parse(text, Text("0"), false, remainder, &result)) + return result; + else + return NONE_BOOL; +} + static bool Bool$is_none(const void *b, const TypeInfo_t *info) { (void)info; diff --git a/src/stdlib/bools.h b/src/stdlib/bools.h index 6d0300d5..ae6c5feb 100644 --- a/src/stdlib/bools.h +++ b/src/stdlib/bools.h @@ -13,7 +13,7 @@ #define no (Bool_t)false PUREFUNC Text_t Bool$as_text(const void *b, bool colorize, const TypeInfo_t *type); -OptionalBool_t Bool$parse(Text_t text); +OptionalBool_t Bool$parse(Text_t text, Text_t *remainder); MACROLIKE Bool_t Bool$from_int(Int_t i) { return (i.small != 0); } MACROLIKE Bool_t Bool$from_int64(Int64_t i) { return (i != 0); } MACROLIKE Bool_t Bool$from_int32(Int32_t i) { return (i != 0); } diff --git a/src/stdlib/bytes.c b/src/stdlib/bytes.c index 48c8b93b..130a645f 100644 --- a/src/stdlib/bytes.c +++ b/src/stdlib/bytes.c @@ -3,6 +3,7 @@ #include <stdint.h> #include "bytes.h" +#include "integers.h" #include "stdlib.h" #include "text.h" #include "util.h" @@ -29,6 +30,18 @@ public CONSTFUNC bool Byte$is_between(const Byte_t x, const Byte_t low, const By return low <= x && x <= high; } +public OptionalByte_t Byte$parse(Text_t text, Text_t *remainder) +{ + OptionalInt_t full_int = Int$parse(text, remainder); + if (full_int.small != 0L + && Int$compare_value(full_int, I(0)) >= 0 + && Int$compare_value(full_int, I(255)) <= 0) { + return (OptionalByte_t){.value=Byte$from_int(full_int, true)}; + } else { + return NONE_BYTE; + } +} + public Text_t Byte$hex(Byte_t byte, bool uppercase, bool prefix) { struct Text_s text = {.tag=TEXT_ASCII}; text.ascii = GC_MALLOC_ATOMIC(8); diff --git a/src/stdlib/bytes.h b/src/stdlib/bytes.h index e733c274..ab88b5bc 100644 --- a/src/stdlib/bytes.h +++ b/src/stdlib/bytes.h @@ -19,6 +19,7 @@ Byte_t Byte$from_int(Int_t i, bool truncate); Byte_t Byte$from_int64(int64_t i, bool truncate); Byte_t Byte$from_int32(int32_t i, bool truncate); Byte_t Byte$from_int16(int16_t i, bool truncate); +OptionalByte_t Byte$parse(Text_t text, Text_t *remainder); Closure_t Byte$to(Byte_t first, Byte_t last, OptionalInt8_t step); MACROLIKE Byte_t Byte$from_int8(int8_t i) { return (Byte_t)i; } MACROLIKE Byte_t Byte$from_bool(bool b) { return (Byte_t)b; } diff --git a/src/stdlib/integers.c b/src/stdlib/integers.c index 018798ec..86be790d 100644 --- a/src/stdlib/integers.c +++ b/src/stdlib/integers.c @@ -424,8 +424,36 @@ public Int_t Int$from_str(const char *str) { return Int$from_mpz(i); } -public OptionalInt_t Int$parse(Text_t text) { - return Int$from_str(Text$as_c_string(text)); +public OptionalInt_t Int$parse(Text_t text, Text_t *remainder) { + const char *str = Text$as_c_string(text); + mpz_t i; + int result; + if (strncmp(str, "0x", 2) == 0) { + const char *end = str + 2 + strcspn(str + 2, "0123456789abcdefABCDEF"); + if (remainder) *remainder = Text$from_str(end); + else if (*end != '\0') return NONE_INT; + result = mpz_init_set_str(i, str + 2, 16); + } else if (strncmp(str, "0o", 2) == 0) { + const char *end = str + 2 + strcspn(str + 2, "01234567"); + if (remainder) *remainder = Text$from_str(end); + else if (*end != '\0') return NONE_INT; + result = mpz_init_set_str(i, str + 2, 8); + } else if (strncmp(str, "0b", 2) == 0) { + const char *end = str + 2 + strcspn(str + 2, "01"); + if (remainder) *remainder = Text$from_str(end); + else if (*end != '\0') return NONE_INT; + result = mpz_init_set_str(i, str + 2, 2); + } else { + const char *end = str + 2 + strcspn(str + 2, "0123456789"); + if (remainder) *remainder = Text$from_str(end); + else if (*end != '\0') return NONE_INT; + result = mpz_init_set_str(i, str, 10); + } + if (result != 0) { + if (remainder) *remainder = text; + return NONE_INT; + } + return Int$from_mpz(i); } public bool Int$is_prime(Int_t x, Int_t reps) @@ -670,8 +698,8 @@ public void Int32$deserialize(FILE *in, void *outval, List_t *pointers, const Ty range->step = step; \ return (Closure_t){.fn=_next_##KindOfInt, .userdata=range}; \ } \ - public PUREFUNC Optional ## KindOfInt ## _t KindOfInt ## $parse(Text_t text) { \ - OptionalInt_t full_int = Int$parse(text); \ + public PUREFUNC Optional ## KindOfInt ## _t KindOfInt ## $parse(Text_t text, Text_t *remainder) { \ + OptionalInt_t full_int = Int$parse(text, remainder); \ if (full_int.small == 0L) return (Optional ## KindOfInt ## _t){.is_none=true}; \ if (Int$compare_value(full_int, I(min_val)) < 0) { \ return (Optional ## KindOfInt ## _t){.is_none=true}; \ diff --git a/src/stdlib/integers.h b/src/stdlib/integers.h index beb26bd6..50ca485f 100644 --- a/src/stdlib/integers.h +++ b/src/stdlib/integers.h @@ -32,7 +32,7 @@ bool type_name ## $get_bit(c_type x, Int_t bit_index); \ Closure_t type_name ## $to(c_type first, c_type last, Optional ## type_name ## _t step); \ Closure_t type_name ## $onward(c_type first, c_type step); \ - PUREFUNC Optional ## type_name ## _t type_name ## $parse(Text_t text); \ + PUREFUNC Optional ## type_name ## _t type_name ## $parse(Text_t text, Text_t *remainder); \ CONSTFUNC bool type_name ## $is_between(const c_type x, const c_type low, const c_type high); \ CONSTFUNC c_type type_name ## $clamped(c_type x, c_type min, c_type max); \ MACROLIKE CONSTFUNC c_type type_name ## $from_byte(Byte_t b) { return (c_type)b; } \ @@ -101,7 +101,7 @@ Text_t Int$octal(Int_t i, Int_t digits, bool prefix); PUREFUNC Closure_t Int$to(Int_t first, Int_t last, OptionalInt_t step); PUREFUNC Closure_t Int$onward(Int_t first, Int_t step); OptionalInt_t Int$from_str(const char *str); -OptionalInt_t Int$parse(Text_t text); +OptionalInt_t Int$parse(Text_t text, Text_t *remainder); Int_t Int$abs(Int_t x); Int_t Int$power(Int_t base, Int_t exponent); Int_t Int$gcd(Int_t x, Int_t y); diff --git a/src/stdlib/nums.c b/src/stdlib/nums.c index 34fbb162..3775c8f4 100644 --- a/src/stdlib/nums.c +++ b/src/stdlib/nums.c @@ -98,14 +98,20 @@ public CONSTFUNC double Num$clamped(double x, double low, double high) { return (x <= low) ? low : (x >= high ? high : x); } -public OptionalNum_t Num$parse(Text_t text) { +public OptionalNum_t Num$parse(Text_t text, Text_t *remainder) { const char *str = Text$as_c_string(text); char *end = NULL; double d = strtod(str, &end); - if (end > str && end[0] == '\0') + if (end > str) { + if (remainder) + *remainder = Text$from_str(end); + else if (*end != '\0') + return nan("none"); return d; - else + } else { + if (remainder) *remainder = text; return nan("none"); + } } static bool Num$is_none(const void *n, const TypeInfo_t *info) @@ -203,14 +209,19 @@ public CONSTFUNC float Num32$clamped(float x, float low, float high) { return (x <= low) ? low : (x >= high ? high : x); } -public OptionalNum32_t Num32$parse(Text_t text) { +public OptionalNum32_t Num32$parse(Text_t text, Text_t *remainder) { const char *str = Text$as_c_string(text); char *end = NULL; double d = strtod(str, &end); - if (end > str && end[0] == '\0') + if (end > str && end[0] == '\0') { + if (remainder) *remainder = Text$from_str(end); + else if (*end != '\0') + return nan("none"); return d; - else + } else { + if (remainder) *remainder = text; return nan("none"); + } } static bool Num32$is_none(const void *n, const TypeInfo_t *info) diff --git a/src/stdlib/nums.h b/src/stdlib/nums.h index fdd9e227..fe76d1c3 100644 --- a/src/stdlib/nums.h +++ b/src/stdlib/nums.h @@ -30,7 +30,7 @@ CONSTFUNC bool Num$finite(double n); CONSTFUNC bool Num$isnan(double n); double Num$nan(Text_t tag); CONSTFUNC double Num$mix(double amount, double x, double y); -OptionalNum_t Num$parse(Text_t text); +OptionalNum_t Num$parse(Text_t text, Text_t *remainder); CONSTFUNC bool Num$is_between(const double x, const double low, const double high); CONSTFUNC double Num$clamped(double x, double low, double high); MACROLIKE CONSTFUNC double Num$from_num32(Num32_t n) { return (double)n; } @@ -83,7 +83,7 @@ CONSTFUNC bool Num32$isinf(float n); CONSTFUNC bool Num32$finite(float n); CONSTFUNC bool Num32$isnan(float n); CONSTFUNC float Num32$mix(float amount, float x, float y); -OptionalNum32_t Num32$parse(Text_t text); +OptionalNum32_t Num32$parse(Text_t text, Text_t *remainder); float Num32$nan(Text_t tag); CONSTFUNC bool Num32$is_between(const float x, const float low, const float high); CONSTFUNC float Num32$clamped(float x, float low, float high); diff --git a/src/stdlib/paths.c b/src/stdlib/paths.c index 94baf995..58702ec7 100644 --- a/src/stdlib/paths.c +++ b/src/stdlib/paths.c @@ -619,10 +619,10 @@ public bool Path$has_extension(Path_t path, Text_t extension) if (extension.length == 0) return !Text$has(Text$from(last, I(2)), Text(".")) || Text$equal_values(last, Text("..")); - if (!Text$starts_with(extension, Text("."))) + if (!Text$starts_with(extension, Text("."), NULL)) extension = Texts(Text("."), extension); - return Text$ends_with(Text$from(last, I(2)), extension); + return Text$ends_with(Text$from(last, I(2)), extension, NULL); } public Path_t Path$child(Path_t path, Text_t name) diff --git a/src/stdlib/stdlib.c b/src/stdlib/stdlib.c index 2b4bd99c..02ccd710 100644 --- a/src/stdlib/stdlib.c +++ b/src/stdlib/stdlib.c @@ -90,37 +90,37 @@ static bool parse_single_arg(const TypeInfo_t *info, char *arg, void *dest) *(OptionalInt_t*)dest = parsed; return parsed.small != 0; } else if (info == &Int64$info) { - OptionalInt64_t parsed = Int64$parse(Text$from_str(arg)); + OptionalInt64_t parsed = Int64$parse(Text$from_str(arg), NULL); if (!parsed.is_none) *(OptionalInt64_t*)dest = parsed; return !parsed.is_none; } else if (info == &Int32$info) { - OptionalInt32_t parsed = Int32$parse(Text$from_str(arg)); + OptionalInt32_t parsed = Int32$parse(Text$from_str(arg), NULL); if (!parsed.is_none) *(OptionalInt32_t*)dest = parsed; return !parsed.is_none; } else if (info == &Int16$info) { - OptionalInt16_t parsed = Int16$parse(Text$from_str(arg)); + OptionalInt16_t parsed = Int16$parse(Text$from_str(arg), NULL); if (!parsed.is_none) *(OptionalInt16_t*)dest = parsed; return !parsed.is_none; } else if (info == &Int8$info) { - OptionalInt8_t parsed = Int8$parse(Text$from_str(arg)); + OptionalInt8_t parsed = Int8$parse(Text$from_str(arg), NULL); if (!parsed.is_none) *(OptionalInt8_t*)dest = parsed; return !parsed.is_none; } else if (info == &Bool$info) { - OptionalBool_t parsed = Bool$parse(Text$from_str(arg)); + OptionalBool_t parsed = Bool$parse(Text$from_str(arg), NULL); if (parsed != NONE_BOOL) *(OptionalBool_t*)dest = parsed; return parsed != NONE_BOOL; } else if (info == &Num$info) { - OptionalNum_t parsed = Num$parse(Text$from_str(arg)); + OptionalNum_t parsed = Num$parse(Text$from_str(arg), NULL); if (!isnan(parsed)) *(OptionalNum_t*)dest = parsed; return !isnan(parsed); } else if (info == &Num32$info) { - OptionalNum32_t parsed = Num32$parse(Text$from_str(arg)); + OptionalNum32_t parsed = Num32$parse(Text$from_str(arg), NULL); if (!isnan(parsed)) *(OptionalNum32_t*)dest = parsed; return !isnan(parsed); diff --git a/src/stdlib/text.c b/src/stdlib/text.c index 80c267ed..8ef0874e 100644 --- a/src/stdlib/text.c +++ b/src/stdlib/text.c @@ -1102,30 +1102,42 @@ bool _matches(TextIter_t *text_state, TextIter_t *target_state, int64_t pos) return true; } -PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix) +PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix, Text_t *remainder) { if (text.length < prefix.length) return false; TextIter_t text_state = NEW_TEXT_ITER_STATE(text), prefix_state = NEW_TEXT_ITER_STATE(prefix); - return _matches(&text_state, &prefix_state, 0); + if (_matches(&text_state, &prefix_state, 0)) { + if (remainder) *remainder = Text$from(text, Int$from_int64(prefix.length + 1)); + return true; + } else { + if (remainder) *remainder = text; + return false; + } } -PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix) +PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix, Text_t *remainder) { if (text.length < suffix.length) return false; TextIter_t text_state = NEW_TEXT_ITER_STATE(text), suffix_state = NEW_TEXT_ITER_STATE(suffix); - return _matches(&text_state, &suffix_state, text.length - suffix.length); + if (_matches(&text_state, &suffix_state, text.length - suffix.length)) { + if (remainder) *remainder = Text$to(text, Int$from_int64(text.length - suffix.length)); + return true; + } else { + if (remainder) *remainder = text; + return false; + } } public Text_t Text$without_prefix(Text_t text, Text_t prefix) { - return Text$starts_with(text, prefix) ? Text$slice(text, I(prefix.length + 1), I(text.length)) : text; + return Text$starts_with(text, prefix, false) ? Text$slice(text, I(prefix.length + 1), I(text.length)) : text; } public Text_t Text$without_suffix(Text_t text, Text_t suffix) { - return Text$ends_with(text, suffix) ? Text$slice(text, I(1), I(text.length - suffix.length)) : text; + return Text$ends_with(text, suffix, false) ? Text$slice(text, I(1), I(text.length - suffix.length)) : text; } static bool _has_grapheme(TextIter_t *text, int32_t g) diff --git a/src/stdlib/text.h b/src/stdlib/text.h index 642a74b6..637a3db7 100644 --- a/src/stdlib/text.h +++ b/src/stdlib/text.h @@ -59,8 +59,8 @@ Text_t Text$lower(Text_t text, Text_t language); Text_t Text$title(Text_t text, Text_t language); Text_t Text$as_text(const void *text, bool colorize, const TypeInfo_t *info); Text_t Text$quoted(Text_t str, bool colorize, Text_t quotation_mark); -PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix); -PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix); +PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix, Text_t *remainder); +PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix, Text_t *remainder); Text_t Text$without_prefix(Text_t text, Text_t prefix); Text_t Text$without_suffix(Text_t text, Text_t suffix); Text_t Text$replace(Text_t text, Text_t target, Text_t replacement); @@ -709,11 +709,11 @@ time_t latest_included_modification_time(Path_t path) bool allow_dot_include = Path$has_extension(path, Text("s")) || Path$has_extension(path, Text("S")); for (Text_t line; (line=next_line(by_line.userdata)).length >= 0; ) { line = Text$trim(line, Text(" \t"), true, false); - if (!Text$starts_with(line, Text("#include")) && !(allow_dot_include && Text$starts_with(line, Text(".include")))) + if (!Text$starts_with(line, Text("#include"), NULL) && !(allow_dot_include && Text$starts_with(line, Text(".include"), NULL))) continue; // Check for `"` after `#include` or `.include` and some spaces: - if (!Text$starts_with(Text$trim(Text$from(line, I(9)), Text(" \t"), true, false), Text("\""))) + if (!Text$starts_with(Text$trim(Text$from(line, I(9)), Text(" \t"), true, false), Text("\""), NULL)) continue; List_t chunks = Text$split(line, Text("\"")); diff --git a/src/typecheck.c b/src/typecheck.c index 494c73d8..0ed2328f 100644 --- a/src/typecheck.c +++ b/src/typecheck.c @@ -1686,7 +1686,7 @@ PUREFUNC bool is_constant(env_t *env, ast_t *ast) case Bool: case Num: case None: return true; case Int: { DeclareMatch(info, ast, Int); - Int_t int_val = Int$parse(Text$from_str(info->str)); + Int_t int_val = Int$parse(Text$from_str(info->str), NULL); if (int_val.small == 0) return false; // Failed to parse return (Int$compare_value(int_val, I(BIGGEST_SMALL_INT)) <= 0); } |
