diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2025-09-09 20:10:39 -0400 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2025-09-09 20:10:39 -0400 |
| commit | 25d85a501d0e5fb8bd9a76376e3868713dd56fdb (patch) | |
| tree | 9ba81bb51b41d19284ba02ec1197ae81f31b263c | |
| parent | fb216e955f04a803f11953be27e76bd4d2c9e76d (diff) | |
| parent | d64dcab138a34d5f5105e08f0a840f7cb5a1d159 (diff) | |
Merge branch 'main' into table-colonstable-colons
| -rw-r--r-- | CHANGES.md | 4 | ||||
| -rw-r--r-- | api/api.md | 148 | ||||
| -rw-r--r-- | api/text.md | 148 | ||||
| -rw-r--r-- | api/text.yaml | 57 | ||||
| -rw-r--r-- | man/man3/tomo-Text.from_utf16.3 | 39 | ||||
| -rw-r--r-- | man/man3/tomo-Text.from_utf32.3 (renamed from man/man3/tomo-Text.from_codepoints.3) | 8 | ||||
| -rw-r--r-- | man/man3/tomo-Text.from_utf8.3 (renamed from man/man3/tomo-Text.from_bytes.3) | 8 | ||||
| -rw-r--r-- | man/man3/tomo-Text.utf16.3 | 36 | ||||
| -rw-r--r-- | man/man3/tomo-Text.utf32.3 (renamed from man/man3/tomo-Text.utf32_codepoints.3) | 8 | ||||
| -rw-r--r-- | man/man3/tomo-Text.utf8.3 (renamed from man/man3/tomo-Text.bytes.3) | 8 | ||||
| -rw-r--r-- | src/compile/headers.c | 2 | ||||
| -rw-r--r-- | src/compile/indexing.c | 4 | ||||
| -rw-r--r-- | src/compile/statements.c | 2 | ||||
| -rw-r--r-- | src/environment.c | 10 | ||||
| -rw-r--r-- | src/formatter/formatter.c | 2 | ||||
| -rw-r--r-- | src/modules.c | 26 | ||||
| -rw-r--r-- | src/modules.h | 4 | ||||
| -rw-r--r-- | src/stdlib/paths.c | 8 | ||||
| -rw-r--r-- | src/stdlib/text.c | 107 | ||||
| -rw-r--r-- | src/stdlib/text.h | 10 | ||||
| -rw-r--r-- | src/tomo.c | 62 | ||||
| -rw-r--r-- | src/typecheck.c | 12 | ||||
| -rw-r--r-- | test/text.tm | 38 |
23 files changed, 516 insertions, 235 deletions
@@ -8,6 +8,10 @@ - Library installation has been cleaned up a bit. - Added a `--format` flag to the `tomo` binary that autoformats your code (currently unstable, do not rely on it just yet). +- Standardized text methods for Unicode encodings: + - `Text.from_utf8()`/`Text.utf8()` + - `Text.from_utf16()`/`Text.utf16()` + - `Text.from_utf32()`/`Text.utf32()` - Fixed bugs: - `Int.parse()` had a memory bug. @@ -4182,27 +4182,6 @@ for chunk in text.by_split_any(",;") say(chunk) ``` -## Text.bytes - -```tomo -Text.bytes : func(text: Text -> [Byte]) -``` - -Converts a `Text` value to a list of bytes representing a UTF8 encoding of the text. - -Argument | Type | Description | Default ----------|------|-------------|--------- -text | `Text` | The text to be converted to UTF8 bytes. | - - -**Return:** A list of bytes (`[Byte]`) representing the text in UTF8 encoding. - - -**Example:** -```tomo ->> "Amélie".bytes() -= [65, 109, 195, 169, 108, 105, 101] - -``` ## Text.caseless_equals ```tomo @@ -4306,29 +4285,6 @@ first | `Int` | The index to begin the slice. | - = "lo" ``` -## Text.from_bytes - -```tomo -Text.from_bytes : func(bytes: [Byte] -> [Text]) -``` - -Returns text that has been constructed from the given UTF8 bytes. - -The text will be normalized, so the resulting text's UTF8 bytes may not exactly match the input. - -Argument | Type | Description | Default ----------|------|-------------|--------- -bytes | `[Byte]` | The UTF-8 bytes of the desired text. | - - -**Return:** A new text based on the input UTF8 bytes after normalization has been applied. - - -**Example:** -```tomo ->> Text.from_bytes([195, 133, 107, 101]) -= "Åke" - -``` ## Text.from_c_string ```tomo @@ -4377,10 +4333,35 @@ codepoint_names | `[Text]` | The names of each codepoint in the desired text (ca = "Åke" ``` -## Text.from_codepoints +## Text.from_utf16 + +```tomo +Text.from_utf16 : func(bytes: [Int16] -> [Text]) +``` + +Returns text that has been constructed from the given UTF16 sequence. + +The text will be normalized, so the resulting text's UTF16 sequence may not exactly match the input. + +Argument | Type | Description | Default +---------|------|-------------|--------- +bytes | `[Int16]` | The UTF-16 integers of the desired text. | - + +**Return:** A new text based on the input UTF16 sequence after normalization has been applied. + + +**Example:** +```tomo +>> Text.from_utf16([197, 107, 101]) += "Åke" +>> Text.from_utf16([12371, 12435, 12395, 12385, 12399, 19990, 30028]) += "こんにちは世界".utf16() + +``` +## Text.from_utf32 ```tomo -Text.from_codepoints : func(codepoints: [Int32] -> [Text]) +Text.from_utf32 : func(codepoints: [Int32] -> [Text]) ``` Returns text that has been constructed from the given UTF32 codepoints. @@ -4396,7 +4377,30 @@ codepoints | `[Int32]` | The UTF32 codepoints in the desired text. | - **Example:** ```tomo ->> Text.from_codepoints([197, 107, 101]) +>> Text.from_utf32([197, 107, 101]) += "Åke" + +``` +## Text.from_utf8 + +```tomo +Text.from_utf8 : func(bytes: [Byte] -> [Text]) +``` + +Returns text that has been constructed from the given UTF8 bytes. + +The text will be normalized, so the resulting text's UTF8 bytes may not exactly match the input. + +Argument | Type | Description | Default +---------|------|-------------|--------- +bytes | `[Byte]` | The UTF-8 bytes of the desired text. | - + +**Return:** A new text based on the input UTF8 bytes after normalization has been applied. + + +**Example:** +```tomo +>> Text.from_utf8([195, 133, 107, 101]) = "Åke" ``` @@ -4916,10 +4920,33 @@ language | `Text` | The ISO 639 language code for which casing rules to use. | = "İ" ``` -## Text.utf32_codepoints +## Text.utf16 ```tomo -Text.utf32_codepoints : func(text: Text -> [Int32]) +Text.utf16 : func(text: Text -> [Int16]) +``` + +Returns a list of Unicode code points for UTF16 encoding of the text. + +Argument | Type | Description | Default +---------|------|-------------|--------- +text | `Text` | The text from which to extract Unicode code points. | - + +**Return:** A list of 16-bit integer Unicode code points (`[Int16]`). + + +**Example:** +```tomo +>> "Åke".utf16() += [197, 107, 101] +>> "こんにちは世界".utf16() += [12371, 12435, 12395, 12385, 12399, 19990, 30028] + +``` +## Text.utf32 + +```tomo +Text.utf32 : func(text: Text -> [Int32]) ``` Returns a list of Unicode code points for UTF32 encoding of the text. @@ -4933,10 +4960,31 @@ text | `Text` | The text from which to extract Unicode code points. | - **Example:** ```tomo ->> "Amélie".utf32_codepoints() +>> "Amélie".utf32() = [65, 109, 233, 108, 105, 101] ``` +## Text.utf8 + +```tomo +Text.utf8 : func(text: Text -> [Byte]) +``` + +Converts a `Text` value to a list of bytes representing a UTF8 encoding of the text. + +Argument | Type | Description | Default +---------|------|-------------|--------- +text | `Text` | The text to be converted to UTF8 bytes. | - + +**Return:** A list of bytes (`[Byte]`) representing the text in UTF8 encoding. + + +**Example:** +```tomo +>> "Amélie".utf8() += [65, 109, 195, 169, 108, 105, 101] + +``` ## Text.width ```tomo diff --git a/api/text.md b/api/text.md index fbecb6fc..22f08242 100644 --- a/api/text.md +++ b/api/text.md @@ -130,27 +130,6 @@ for chunk in text.by_split_any(",;") say(chunk) ``` -## Text.bytes - -```tomo -Text.bytes : func(text: Text -> [Byte]) -``` - -Converts a `Text` value to a list of bytes representing a UTF8 encoding of the text. - -Argument | Type | Description | Default ----------|------|-------------|--------- -text | `Text` | The text to be converted to UTF8 bytes. | - - -**Return:** A list of bytes (`[Byte]`) representing the text in UTF8 encoding. - - -**Example:** -```tomo ->> "Amélie".bytes() -= [65, 109, 195, 169, 108, 105, 101] - -``` ## Text.caseless_equals ```tomo @@ -254,29 +233,6 @@ first | `Int` | The index to begin the slice. | - = "lo" ``` -## Text.from_bytes - -```tomo -Text.from_bytes : func(bytes: [Byte] -> [Text]) -``` - -Returns text that has been constructed from the given UTF8 bytes. - -The text will be normalized, so the resulting text's UTF8 bytes may not exactly match the input. - -Argument | Type | Description | Default ----------|------|-------------|--------- -bytes | `[Byte]` | The UTF-8 bytes of the desired text. | - - -**Return:** A new text based on the input UTF8 bytes after normalization has been applied. - - -**Example:** -```tomo ->> Text.from_bytes([195, 133, 107, 101]) -= "Åke" - -``` ## Text.from_c_string ```tomo @@ -325,10 +281,35 @@ codepoint_names | `[Text]` | The names of each codepoint in the desired text (ca = "Åke" ``` -## Text.from_codepoints +## Text.from_utf16 + +```tomo +Text.from_utf16 : func(bytes: [Int16] -> [Text]) +``` + +Returns text that has been constructed from the given UTF16 sequence. + +The text will be normalized, so the resulting text's UTF16 sequence may not exactly match the input. + +Argument | Type | Description | Default +---------|------|-------------|--------- +bytes | `[Int16]` | The UTF-16 integers of the desired text. | - + +**Return:** A new text based on the input UTF16 sequence after normalization has been applied. + + +**Example:** +```tomo +>> Text.from_utf16([197, 107, 101]) += "Åke" +>> Text.from_utf16([12371, 12435, 12395, 12385, 12399, 19990, 30028]) += "こんにちは世界".utf16() + +``` +## Text.from_utf32 ```tomo -Text.from_codepoints : func(codepoints: [Int32] -> [Text]) +Text.from_utf32 : func(codepoints: [Int32] -> [Text]) ``` Returns text that has been constructed from the given UTF32 codepoints. @@ -344,7 +325,30 @@ codepoints | `[Int32]` | The UTF32 codepoints in the desired text. | - **Example:** ```tomo ->> Text.from_codepoints([197, 107, 101]) +>> Text.from_utf32([197, 107, 101]) += "Åke" + +``` +## Text.from_utf8 + +```tomo +Text.from_utf8 : func(bytes: [Byte] -> [Text]) +``` + +Returns text that has been constructed from the given UTF8 bytes. + +The text will be normalized, so the resulting text's UTF8 bytes may not exactly match the input. + +Argument | Type | Description | Default +---------|------|-------------|--------- +bytes | `[Byte]` | The UTF-8 bytes of the desired text. | - + +**Return:** A new text based on the input UTF8 bytes after normalization has been applied. + + +**Example:** +```tomo +>> Text.from_utf8([195, 133, 107, 101]) = "Åke" ``` @@ -864,10 +868,33 @@ language | `Text` | The ISO 639 language code for which casing rules to use. | = "İ" ``` -## Text.utf32_codepoints +## Text.utf16 ```tomo -Text.utf32_codepoints : func(text: Text -> [Int32]) +Text.utf16 : func(text: Text -> [Int16]) +``` + +Returns a list of Unicode code points for UTF16 encoding of the text. + +Argument | Type | Description | Default +---------|------|-------------|--------- +text | `Text` | The text from which to extract Unicode code points. | - + +**Return:** A list of 16-bit integer Unicode code points (`[Int16]`). + + +**Example:** +```tomo +>> "Åke".utf16() += [197, 107, 101] +>> "こんにちは世界".utf16() += [12371, 12435, 12395, 12385, 12399, 19990, 30028] + +``` +## Text.utf32 + +```tomo +Text.utf32 : func(text: Text -> [Int32]) ``` Returns a list of Unicode code points for UTF32 encoding of the text. @@ -881,10 +908,31 @@ text | `Text` | The text from which to extract Unicode code points. | - **Example:** ```tomo ->> "Amélie".utf32_codepoints() +>> "Amélie".utf32() = [65, 109, 233, 108, 105, 101] ``` +## Text.utf8 + +```tomo +Text.utf8 : func(text: Text -> [Byte]) +``` + +Converts a `Text` value to a list of bytes representing a UTF8 encoding of the text. + +Argument | Type | Description | Default +---------|------|-------------|--------- +text | `Text` | The text to be converted to UTF8 bytes. | - + +**Return:** A list of bytes (`[Byte]`) representing the text in UTF8 encoding. + + +**Example:** +```tomo +>> "Amélie".utf8() += [65, 109, 195, 169, 108, 105, 101] + +``` ## Text.width ```tomo diff --git a/api/text.yaml b/api/text.yaml index 6c6767fd..dcdcfb67 100644 --- a/api/text.yaml +++ b/api/text.yaml @@ -129,7 +129,7 @@ Text.by_split_any: # Prints: "one" then "two" then "three": say(chunk) -Text.bytes: +Text.utf8: short: get UTF8 bytes description: > Converts a `Text` value to a list of bytes representing a UTF8 encoding of @@ -144,7 +144,7 @@ Text.bytes: description: > The text to be converted to UTF8 bytes. example: | - >> "Amélie".bytes() + >> "Amélie".utf8() = [65, 109, 195, 169, 108, 105, 101] Text.caseless_equals: @@ -255,7 +255,7 @@ Text.from: >> "hello".from(-2) = "lo" -Text.from_bytes: +Text.from_utf8: short: convert UTF8 byte list to text description: > Returns text that has been constructed from the given UTF8 bytes. @@ -272,9 +272,31 @@ Text.from_bytes: description: > The UTF-8 bytes of the desired text. example: | - >> Text.from_bytes([195, 133, 107, 101]) + >> Text.from_utf8([195, 133, 107, 101]) = "Åke" +Text.from_utf16: + short: convert UTF16 list to text + description: > + Returns text that has been constructed from the given UTF16 sequence. + note: > + The text will be normalized, so the resulting text's UTF16 sequence may not + exactly match the input. + return: + type: '[Text]' + description: > + A new text based on the input UTF16 sequence after normalization has been applied. + args: + bytes: + type: '[Int16]' + description: > + The UTF-16 integers of the desired text. + example: | + >> Text.from_utf16([197, 107, 101]) + = "Åke" + >> Text.from_utf16([12371, 12435, 12395, 12385, 12399, 19990, 30028]) + = "こんにちは世界".utf16() + Text.from_c_string: short: convert C-style string to text description: > @@ -318,7 +340,7 @@ Text.from_codepoint_names: ] = "Åke" -Text.from_codepoints: +Text.from_utf32: short: convert UTF32 codepoints to text description: > Returns text that has been constructed from the given UTF32 codepoints. @@ -335,7 +357,7 @@ Text.from_codepoints: description: > The UTF32 codepoints in the desired text. example: | - >> Text.from_codepoints([197, 107, 101]) + >> Text.from_utf32([197, 107, 101]) = "Åke" Text.has: @@ -906,7 +928,26 @@ Text.upper: >> "i".upper(language="tr_TR") = "İ" -Text.utf32_codepoints: +Text.utf16: + short: get UTF16 codepoints + description: > + Returns a list of Unicode code points for UTF16 encoding of the text. + return: + type: '[Int16]' + description: > + A list of 16-bit integer Unicode code points (`[Int16]`). + args: + text: + type: 'Text' + description: > + The text from which to extract Unicode code points. + example: | + >> "Åke".utf16() + = [197, 107, 101] + >> "こんにちは世界".utf16() + = [12371, 12435, 12395, 12385, 12399, 19990, 30028] + +Text.utf32: short: get UTF32 codepoints description: > Returns a list of Unicode code points for UTF32 encoding of the text. @@ -920,7 +961,7 @@ Text.utf32_codepoints: description: > The text from which to extract Unicode code points. example: | - >> "Amélie".utf32_codepoints() + >> "Amélie".utf32() = [65, 109, 233, 108, 105, 101] Text.width: diff --git a/man/man3/tomo-Text.from_utf16.3 b/man/man3/tomo-Text.from_utf16.3 new file mode 100644 index 00000000..d4eaea02 --- /dev/null +++ b/man/man3/tomo-Text.from_utf16.3 @@ -0,0 +1,39 @@ +'\" t +.\" Copyright (c) 2025 Bruce Hill +.\" All rights reserved. +.\" +.TH Text.from_utf16 3 2025-09-09 "Tomo man-pages" +.SH NAME +Text.from_utf16 \- convert UTF16 list to text +.SH LIBRARY +Tomo Standard Library +.SH SYNOPSIS +.nf +.BI Text.from_utf16\ :\ func(bytes:\ [Int16]\ ->\ [Text]) +.fi +.SH DESCRIPTION +Returns text that has been constructed from the given UTF16 sequence. + + +.SH ARGUMENTS + +.TS +allbox; +lb lb lbx lb +l l l l. +Name Type Description Default +bytes [Int16] The UTF-16 integers of the desired text. - +.TE +.SH RETURN +A new text based on the input UTF16 sequence after normalization has been applied. + +.SH NOTES +The text will be normalized, so the resulting text's UTF16 sequence may not exactly match the input. + +.SH EXAMPLES +.EX +>> Text.from_utf16([197, 107, 101]) += "Åke" +>> Text.from_utf16([12371, 12435, 12395, 12385, 12399, 19990, 30028]) += "こんにちは世界".utf16() +.EE diff --git a/man/man3/tomo-Text.from_codepoints.3 b/man/man3/tomo-Text.from_utf32.3 index cfc3133b..31fc344f 100644 --- a/man/man3/tomo-Text.from_codepoints.3 +++ b/man/man3/tomo-Text.from_utf32.3 @@ -2,14 +2,14 @@ .\" Copyright (c) 2025 Bruce Hill .\" All rights reserved. .\" -.TH Text.from_codepoints 3 2025-09-06 "Tomo man-pages" +.TH Text.from_utf32 3 2025-09-09 "Tomo man-pages" .SH NAME -Text.from_codepoints \- convert UTF32 codepoints to text +Text.from_utf32 \- convert UTF32 codepoints to text .SH LIBRARY Tomo Standard Library .SH SYNOPSIS .nf -.BI Text.from_codepoints\ :\ func(codepoints:\ [Int32]\ ->\ [Text]) +.BI Text.from_utf32\ :\ func(codepoints:\ [Int32]\ ->\ [Text]) .fi .SH DESCRIPTION Returns text that has been constructed from the given UTF32 codepoints. @@ -32,6 +32,6 @@ The text will be normalized, so the resulting text's codepoints may not exactly .SH EXAMPLES .EX ->> Text.from_codepoints([197, 107, 101]) +>> Text.from_utf32([197, 107, 101]) = "Åke" .EE diff --git a/man/man3/tomo-Text.from_bytes.3 b/man/man3/tomo-Text.from_utf8.3 index a21bb169..ead65dc6 100644 --- a/man/man3/tomo-Text.from_bytes.3 +++ b/man/man3/tomo-Text.from_utf8.3 @@ -2,14 +2,14 @@ .\" Copyright (c) 2025 Bruce Hill .\" All rights reserved. .\" -.TH Text.from_bytes 3 2025-09-06 "Tomo man-pages" +.TH Text.from_utf8 3 2025-09-09 "Tomo man-pages" .SH NAME -Text.from_bytes \- convert UTF8 byte list to text +Text.from_utf8 \- convert UTF8 byte list to text .SH LIBRARY Tomo Standard Library .SH SYNOPSIS .nf -.BI Text.from_bytes\ :\ func(bytes:\ [Byte]\ ->\ [Text]) +.BI Text.from_utf8\ :\ func(bytes:\ [Byte]\ ->\ [Text]) .fi .SH DESCRIPTION Returns text that has been constructed from the given UTF8 bytes. @@ -32,6 +32,6 @@ The text will be normalized, so the resulting text's UTF8 bytes may not exactly .SH EXAMPLES .EX ->> Text.from_bytes([195, 133, 107, 101]) +>> Text.from_utf8([195, 133, 107, 101]) = "Åke" .EE diff --git a/man/man3/tomo-Text.utf16.3 b/man/man3/tomo-Text.utf16.3 new file mode 100644 index 00000000..2b3da2b1 --- /dev/null +++ b/man/man3/tomo-Text.utf16.3 @@ -0,0 +1,36 @@ +'\" t +.\" Copyright (c) 2025 Bruce Hill +.\" All rights reserved. +.\" +.TH Text.utf16 3 2025-09-09 "Tomo man-pages" +.SH NAME +Text.utf16 \- get UTF16 codepoints +.SH LIBRARY +Tomo Standard Library +.SH SYNOPSIS +.nf +.BI Text.utf16\ :\ func(text:\ Text\ ->\ [Int16]) +.fi +.SH DESCRIPTION +Returns a list of Unicode code points for UTF16 encoding of the text. + + +.SH ARGUMENTS + +.TS +allbox; +lb lb lbx lb +l l l l. +Name Type Description Default +text Text The text from which to extract Unicode code points. - +.TE +.SH RETURN +A list of 16-bit integer Unicode code points (`[Int16]`). + +.SH EXAMPLES +.EX +>> "Åke".utf16() += [197, 107, 101] +>> "こんにちは世界".utf16() += [12371, 12435, 12395, 12385, 12399, 19990, 30028] +.EE diff --git a/man/man3/tomo-Text.utf32_codepoints.3 b/man/man3/tomo-Text.utf32.3 index d5b218d6..ff37ba9c 100644 --- a/man/man3/tomo-Text.utf32_codepoints.3 +++ b/man/man3/tomo-Text.utf32.3 @@ -2,14 +2,14 @@ .\" Copyright (c) 2025 Bruce Hill .\" All rights reserved. .\" -.TH Text.utf32_codepoints 3 2025-09-06 "Tomo man-pages" +.TH Text.utf32 3 2025-09-09 "Tomo man-pages" .SH NAME -Text.utf32_codepoints \- get UTF32 codepoints +Text.utf32 \- get UTF32 codepoints .SH LIBRARY Tomo Standard Library .SH SYNOPSIS .nf -.BI Text.utf32_codepoints\ :\ func(text:\ Text\ ->\ [Int32]) +.BI Text.utf32\ :\ func(text:\ Text\ ->\ [Int32]) .fi .SH DESCRIPTION Returns a list of Unicode code points for UTF32 encoding of the text. @@ -29,6 +29,6 @@ A list of 32-bit integer Unicode code points (`[Int32]`). .SH EXAMPLES .EX ->> "Amélie".utf32_codepoints() +>> "Amélie".utf32() = [65, 109, 233, 108, 105, 101] .EE diff --git a/man/man3/tomo-Text.bytes.3 b/man/man3/tomo-Text.utf8.3 index b97d8632..80a91fb9 100644 --- a/man/man3/tomo-Text.bytes.3 +++ b/man/man3/tomo-Text.utf8.3 @@ -2,14 +2,14 @@ .\" Copyright (c) 2025 Bruce Hill .\" All rights reserved. .\" -.TH Text.bytes 3 2025-09-06 "Tomo man-pages" +.TH Text.utf8 3 2025-09-09 "Tomo man-pages" .SH NAME -Text.bytes \- get UTF8 bytes +Text.utf8 \- get UTF8 bytes .SH LIBRARY Tomo Standard Library .SH SYNOPSIS .nf -.BI Text.bytes\ :\ func(text:\ Text\ ->\ [Byte]) +.BI Text.utf8\ :\ func(text:\ Text\ ->\ [Byte]) .fi .SH DESCRIPTION Converts a `Text` value to a list of bytes representing a UTF8 encoding of the text. @@ -29,6 +29,6 @@ A list of bytes (`[Byte]`) representing the text in UTF8 encoding. .SH EXAMPLES .EX ->> "Amélie".bytes() +>> "Amélie".utf8() = [65, 109, 195, 169, 108, 105, 101] .EE diff --git a/src/compile/headers.c b/src/compile/headers.c index 6dc69f03..33a979cf 100644 --- a/src/compile/headers.c +++ b/src/compile/headers.c @@ -171,7 +171,7 @@ Text_t compile_statement_type_header(env_t *env, Path_t header_path, ast_t *ast) Path_t build_dir = Path$resolved(Path$parent(header_path), Path$current_dir()); switch (use->what) { case USE_MODULE: { - module_info_t mod = get_module_info(ast); + module_info_t mod = get_used_module_info(ast); glob_t tm_files; const char *folder = mod.version ? String(mod.name, "_", mod.version) : mod.name; if (glob(String(TOMO_PATH, "/lib/tomo_" TOMO_VERSION "/", folder, "/[!._0-9]*.tm"), GLOB_TILDE, NULL, diff --git a/src/compile/indexing.c b/src/compile/indexing.c index 39af1160..1510e924 100644 --- a/src/compile/indexing.c +++ b/src/compile/indexing.c @@ -52,13 +52,13 @@ Text_t compile_indexing(env_t *env, ast_t *ast) { if (table_type->default_value) { return Texts("Table$get_or_default(", compile_to_pointer_depth(env, indexing->indexed, 0, false), ", ", compile_type(table_type->key_type), ", ", compile_type(table_type->value_type), ", ", - compile(env, indexing->index), ", ", + compile_to_type(env, indexing->index, table_type->key_type), ", ", compile_to_type(env, table_type->default_value, table_type->value_type), ", ", compile_type_info(container_t), ")"); } else { return Texts("Table$get_optional(", compile_to_pointer_depth(env, indexing->indexed, 0, false), ", ", compile_type(table_type->key_type), ", ", compile_type(table_type->value_type), ", ", - compile(env, indexing->index), + compile_to_type(env, indexing->index, table_type->key_type), ", " "_, ", promote_to_optional(table_type->value_type, Text("(*_)")), ", ", diff --git a/src/compile/statements.c b/src/compile/statements.c index bde9ae36..3fc44ac4 100644 --- a/src/compile/statements.c +++ b/src/compile/statements.c @@ -188,7 +188,7 @@ static Text_t _compile_statement(env_t *env, ast_t *ast) { Text_t suffix = get_id_suffix(Path$as_c_string(path)); return with_source_info(env, ast, Texts("$initialize", suffix, "();\n")); } else if (use->what == USE_MODULE) { - module_info_t mod = get_module_info(ast); + module_info_t mod = get_used_module_info(ast); glob_t tm_files; const char *folder = mod.version ? String(mod.name, "_", mod.version) : mod.name; if (glob(String(TOMO_PATH, "/lib/tomo_" TOMO_VERSION "/", folder, "/[!._0-9]*.tm"), GLOB_TILDE, NULL, diff --git a/src/environment.c b/src/environment.c index 421f993e..35cc8650 100644 --- a/src/environment.c +++ b/src/environment.c @@ -343,16 +343,16 @@ env_t *global_env(bool source_mapping) { {"by_line", "Text$by_line", "func(text:Text -> func(->Text?))"}, // {"by_split", "Text$by_split", "func(text:Text, delimiter='' -> func(->Text?))"}, // {"by_split_any", "Text$by_split_any", "func(text:Text, delimiters=' \\t\\r\\n' -> func(->Text?))"}, // - {"bytes", "Text$utf8_bytes", "func(text:Text -> [Byte])"}, // {"caseless_equals", "Text$equal_ignoring_case", "func(a,b:Text, language='C' -> Bool)"}, // {"codepoint_names", "Text$codepoint_names", "func(text:Text -> [Text])"}, // {"ends_with", "Text$ends_with", "func(text,suffix:Text, remainder:&Text? = none -> Bool)"}, // {"from", "Text$from", "func(text:Text, first:Int -> Text)"}, // - {"from_bytes", "Text$from_bytes", "func(bytes:[Byte] -> Text?)"}, // {"from_c_string", "Text$from_str", "func(str:CString -> Text?)"}, // {"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text] -> Text?)"}, // - {"from_codepoints", "Text$from_codepoints", "func(codepoints:[Int32] -> Text)"}, // {"from_text", "Path$from_text", "func(text:Text -> Path)"}, // + {"from_utf8", "Text$from_utf8", "func(bytes:[Byte] -> Text?)"}, // + {"from_utf16", "Text$from_utf16", "func(codepoints:[Int16] -> Text?)"}, // + {"from_utf32", "Text$from_utf32", "func(codepoints:[Int32] -> Text?)"}, // {"has", "Text$has", "func(text:Text, target:Text -> Bool)"}, // {"join", "Text$join", "func(glue:Text, pieces:[Text] -> Text)"}, // {"layout", "Text$layout", "func(text:Text -> Text)"}, // @@ -375,7 +375,9 @@ env_t *global_env(bool source_mapping) { {"translate", "Text$translate", "func(text:Text, translations:{Text:Text} -> Text)"}, // {"trim", "Text$trim", "func(text:Text, to_trim=\" \t\r\n\", left=yes, right=yes -> Text)"}, // {"upper", "Text$upper", "func(text:Text, language='C' -> Text)"}, // - {"utf32_codepoints", "Text$utf32_codepoints", "func(text:Text -> [Int32])"}, // + {"utf8", "Text$utf8", "func(text:Text -> [Byte])"}, // + {"utf16", "Text$utf16", "func(text:Text -> [Int16])"}, // + {"utf32", "Text$utf32", "func(text:Text -> [Int32])"}, // {"width", "Text$width", "func(text:Text, language='C' -> Int)"}, // {"without_prefix", "Text$without_prefix", "func(text,prefix:Text -> Text)"}, // {"without_suffix", "Text$without_suffix", "func(text,suffix:Text -> Text)"}), diff --git a/src/formatter/formatter.c b/src/formatter/formatter.c index b68b3874..2c52aa63 100644 --- a/src/formatter/formatter.c +++ b/src/formatter/formatter.c @@ -54,7 +54,7 @@ PUREFUNC text_opts_t choose_text_options(ast_list_t *chunks) { } static bool starts_with_id(Text_t text) { - List_t codepoints = Text$utf32_codepoints(Text$slice(text, I_small(1), I_small(1))); + List_t codepoints = Text$utf32(Text$slice(text, I_small(1), I_small(1))); return uc_is_property_xid_continue(*(ucs4_t *)codepoints.data); } diff --git a/src/modules.c b/src/modules.c index fafbbf86..9c562387 100644 --- a/src/modules.c +++ b/src/modules.c @@ -24,6 +24,28 @@ errx(1, "Failed to run command: %s", String(__VA_ARGS__)); \ }) +const char *get_library_version(Path_t lib_dir) { + Path_t changes_file = Path$child(lib_dir, Text("CHANGES.md")); + OptionalText_t changes = Path$read(changes_file); + if (changes.length <= 0) { + return "v0.0"; + } + const char *changes_str = Text$as_c_string(Texts(Text("\n"), changes)); + const char *version_line = strstr(changes_str, "\n## "); + if (version_line == NULL) + print_err("CHANGES.md in ", lib_dir, " does not have any valid versions starting with '## '"); + return String(string_slice(version_line + 4, strcspn(version_line + 4, "\r\n"))); +} + +Text_t get_library_name(Path_t lib_dir) { + Text_t name = Path$base_name(lib_dir); + name = Text$without_prefix(name, Text("tomo-")); + name = Text$without_suffix(name, Text("-tomo")); + Text_t suffix = Texts(Text("_"), Text$from_str(get_library_version(lib_dir))); + if (!Text$ends_with(name, suffix, NULL)) name = Texts(name, suffix); + return name; +} + bool install_from_modules_ini(Path_t ini_file, bool ask_confirmation) { OptionalClosure_t by_line = Path$by_line(ini_file); if (by_line.fn == NULL) return false; @@ -72,7 +94,7 @@ find_section:; } } -module_info_t get_module_info(ast_t *use) { +module_info_t get_used_module_info(ast_t *use) { static Table_t cache = {}; TypeInfo_t *cache_type = Table$info(Pointer$info("@", &Memory$info), Pointer$info("@", &Memory$info)); module_info_t **cached = Table$get(cache, &use, cache_type); @@ -90,6 +112,8 @@ bool try_install_module(module_info_t mod, bool ask_confirmation) { "_", Text$from_str(mod.version))); if (Path$exists(dest)) return true; + print("No such path: ", dest); + if (mod.git) { if (ask_confirmation) { OptionalText_t answer = diff --git a/src/modules.h b/src/modules.h index c36d96dd..98911ec9 100644 --- a/src/modules.h +++ b/src/modules.h @@ -10,6 +10,8 @@ typedef struct { const char *name, *version, *url, *git, *revision, *path; } module_info_t; -module_info_t get_module_info(ast_t *use); +Text_t get_library_name(Path_t lib_dir); +const char *get_library_version(Path_t lib_dir); +module_info_t get_used_module_info(ast_t *use); bool install_from_modules_ini(Path_t ini_file, bool ask_confirmation); bool try_install_module(module_info_t mod, bool ask_confirmation); diff --git a/src/stdlib/paths.c b/src/stdlib/paths.c index 3de329a9..385f3bdf 100644 --- a/src/stdlib/paths.c +++ b/src/stdlib/paths.c @@ -290,7 +290,7 @@ static void _write(Path_t path, List_t bytes, int mode, int permissions) { public void Path$write(Path_t path, Text_t text, int permissions) { - List_t bytes = Text$utf8_bytes(text); + List_t bytes = Text$utf8(text); _write(path, bytes, O_WRONLY | O_CREAT | O_TRUNC, permissions); } @@ -301,7 +301,7 @@ void Path$write_bytes(Path_t path, List_t bytes, int permissions) { public void Path$append(Path_t path, Text_t text, int permissions) { - List_t bytes = Text$utf8_bytes(text); + List_t bytes = Text$utf8(text); _write(path, bytes, O_WRONLY | O_APPEND | O_CREAT, permissions); } @@ -367,7 +367,7 @@ public OptionalText_t Path$read(Path_t path) { List_t bytes = Path$read_bytes(path, NONE_INT); if (bytes.length < 0) return NONE_TEXT; - return Text$from_bytes(bytes); + return Text$from_utf8(bytes); } public @@ -537,7 +537,7 @@ Path_t Path$write_unique_bytes(Path_t path, List_t bytes) { } public -Path_t Path$write_unique(Path_t path, Text_t text) { return Path$write_unique_bytes(path, Text$utf8_bytes(text)); } +Path_t Path$write_unique(Path_t path, Text_t text) { return Path$write_unique_bytes(path, Text$utf8(text)); } public Path_t Path$parent(Path_t path) { diff --git a/src/stdlib/text.c b/src/stdlib/text.c index d3757a0d..b6e43f5a 100644 --- a/src/stdlib/text.c +++ b/src/stdlib/text.c @@ -47,8 +47,8 @@ // (U+1F680) followed by THUMBS UP (U+1F44D), it will render on your screen as // two things: a female astronaut and a thumbs up, and this is how most people // will think about the text. If you wish to operate on the raw codepoints that -// comprise the message, you are free to do so with the `.utf32_codepoints()` -// method and `Text.from_codepoints()`, but this is not the default behavior. +// comprise the message, you are free to do so with the `.utf32()` +// method and `Text.from_utf32()`, but this is not the default behavior. // The behavior for the given example is that `text.length == 2`, `text[1]` is // the grapheme cluster representing a female astronaut emoji, and `text[2]` is // the grapheme cluster representing the thumbs up emoji. @@ -244,7 +244,7 @@ int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_len) { synthetic_graphemes[-grapheme_id - 1].utf32_cluster = codepoint_copy; arena += sizeof(ucs4_t[1 + utf32_len]); - // Copy UTF8 bytes into the arena and store where they live: + // Copy UTF8 units into the arena and store where they live: uint8_t *utf8_final = arena; memcpy(utf8_final, u8, sizeof(uint8_t[u8_len])); utf8_final[u8_len] = '\0'; // Add a terminating NUL byte @@ -520,8 +520,9 @@ static Text_t concat2(Text_t a, Text_t b) { return concat2_assuming_safe(a, b); } - Text_t glue = - Text$from_codepoints((List_t){.data = norm_buf, .length = (int64_t)norm_length, .stride = sizeof(int32_t)}); + OptionalText_t glue = + Text$from_utf32((List_t){.data = norm_buf, .length = (int64_t)norm_length, .stride = sizeof(int32_t)}); + assert(glue.length >= 0); if (normalized != norm_buf) free(normalized); @@ -815,7 +816,7 @@ OptionalText_t Text$from_strn(const char *str, size_t len) { public OptionalText_t Text$from_str(const char *str) { return str ? Text$from_strn(str, strlen(str)) : Text(""); } -static void u8_buf_append(Text_t text, char **buf, int64_t *capacity, int64_t *i) { +static void u8_buf_append(Text_t text, Byte_t **buf, int64_t *capacity, int64_t *i) { switch (text.tag) { case TEXT_ASCII: { if (*i + text.length > (int64_t)*capacity) { @@ -902,7 +903,7 @@ static void u8_buf_append(Text_t text, char **buf, int64_t *capacity, int64_t *i public char *Text$as_c_string(Text_t text) { int64_t capacity = text.length + 1; - char *buf = GC_MALLOC_ATOMIC((size_t)capacity); + Byte_t *buf = GC_MALLOC_ATOMIC((size_t)capacity); int64_t i = 0; u8_buf_append(text, &buf, &capacity, &i); @@ -911,7 +912,7 @@ char *Text$as_c_string(Text_t text) { buf = GC_REALLOC(buf, (size_t)capacity); } buf[i] = '\0'; - return buf; + return (char *)buf; } PUREFUNC public uint64_t Text$hash(const void *obj, const TypeInfo_t *info) { @@ -1359,33 +1360,39 @@ PUREFUNC public bool Text$equal_ignoring_case(Text_t a, Text_t b, Text_t languag public Text_t Text$upper(Text_t text, Text_t language) { if (text.length == 0) return text; - List_t codepoints = Text$utf32_codepoints(text); + List_t codepoints = Text$utf32(text); const char *uc_language = Text$as_c_string(language); size_t out_len = 0; ucs4_t *upper = u32_toupper(codepoints.data, (size_t)codepoints.length, uc_language, UNINORM_NFC, NULL, &out_len); - Text_t ret = Text$from_codepoints((List_t){.data = upper, .length = (int64_t)out_len, .stride = sizeof(int32_t)}); + OptionalText_t ret = + Text$from_utf32((List_t){.data = upper, .length = (int64_t)out_len, .stride = sizeof(int32_t)}); + assert(ret.length >= 0); return ret; } public Text_t Text$lower(Text_t text, Text_t language) { if (text.length == 0) return text; - List_t codepoints = Text$utf32_codepoints(text); + List_t codepoints = Text$utf32(text); const char *uc_language = Text$as_c_string(language); size_t out_len = 0; ucs4_t *lower = u32_tolower(codepoints.data, (size_t)codepoints.length, uc_language, UNINORM_NFC, NULL, &out_len); - Text_t ret = Text$from_codepoints((List_t){.data = lower, .length = (int64_t)out_len, .stride = sizeof(int32_t)}); + OptionalText_t ret = + Text$from_utf32((List_t){.data = lower, .length = (int64_t)out_len, .stride = sizeof(int32_t)}); + assert(ret.length >= 0); return ret; } public Text_t Text$title(Text_t text, Text_t language) { if (text.length == 0) return text; - List_t codepoints = Text$utf32_codepoints(text); + List_t codepoints = Text$utf32(text); const char *uc_language = Text$as_c_string(language); size_t out_len = 0; ucs4_t *title = u32_totitle(codepoints.data, (size_t)codepoints.length, uc_language, UNINORM_NFC, NULL, &out_len); - Text_t ret = Text$from_codepoints((List_t){.data = title, .length = (int64_t)out_len, .stride = sizeof(int32_t)}); + OptionalText_t ret = + Text$from_utf32((List_t){.data = title, .length = (int64_t)out_len, .stride = sizeof(int32_t)}); + assert(ret.length >= 0); return ret; } @@ -1541,7 +1548,33 @@ List_t Text$clusters(Text_t text) { } public -List_t Text$utf32_codepoints(Text_t text) { +List_t Text$utf8(Text_t text) { + int64_t capacity = text.length + 1; + Byte_t *buf = GC_MALLOC_ATOMIC((size_t)capacity); + int64_t i = 0; + u8_buf_append(text, &buf, &capacity, &i); + return (List_t){.data = buf, .length = i, .stride = 1, .atomic = 1}; +} + +public +List_t Text$utf16(Text_t text) { + if (text.length == 0) return (List_t){}; + List_t utf32 = Text$utf32(text); + List_t utf16 = {.free = MIN(LIST_MAX_FREE_ENTRIES, (uint64_t)utf32.length), .atomic = 1}; + utf16.data = GC_MALLOC_ATOMIC(sizeof(int32_t[utf16.free])); + for (int64_t i = 0; i < utf32.length; i++) { + uint16_t u16_buf[4]; + size_t u16_len = sizeof(u16_buf) / sizeof(u16_buf[0]); + uint16_t *chunk_u16 = u32_to_u16(utf32.data + utf32.stride * i, 1, u16_buf, &u16_len); + if (chunk_u16 == NULL) fail("Invalid codepoints encountered!"); + List$insert_all(&utf16, (List_t){.data = u16_buf, .stride = sizeof(uint16_t), .length = (int64_t)u16_len}, I(0), + sizeof(uint16_t)); + } + return utf16; +} + +public +List_t Text$utf32(Text_t text) { List_t codepoints = {.atomic = 1}; TextIter_t state = NEW_TEXT_ITER_STATE(text); for (int64_t i = 0; i < text.length; i++) { @@ -1558,12 +1591,6 @@ List_t Text$utf32_codepoints(Text_t text) { return codepoints; } -public -List_t Text$utf8_bytes(Text_t text) { - const char *str = Text$as_c_string(text); - return (List_t){.length = (int64_t)strlen(str), .stride = 1, .atomic = 1, .data = (void *)str}; -} - static INLINE const char *codepoint_name(ucs4_t c) { char *name = GC_MALLOC_ATOMIC(UNINAME_MAX); char *found_name = unicode_character_name(c, name); @@ -1595,7 +1622,28 @@ List_t Text$codepoint_names(Text_t text) { } public -Text_t Text$from_codepoints(List_t codepoints) { +OptionalText_t Text$from_utf8(List_t units) { + if (units.stride != sizeof(int8_t)) List$compact(&units, sizeof(int8_t)); + return Text$from_strn(units.data, (size_t)units.length); +} + +public +OptionalText_t Text$from_utf16(List_t units) { + if (units.length == 0) return EMPTY_TEXT; + if (units.stride != sizeof(int16_t)) List$compact(&units, sizeof(int16_t)); + + size_t length = 256; + uint8_t buf[length]; + uint8_t *u8 = u16_to_u8(units.data, (size_t)units.length, buf, &length); + Text_t ret = + Text$from_utf8((List_t){.data = u8, .length = (int64_t)length, .stride = sizeof(uint8_t), .atomic = 1}); + if (u8 != buf) free(u8); + return ret; +} + +public +OptionalText_t Text$from_utf32(List_t codepoints) { + if (codepoints.length == 0) return EMPTY_TEXT; if (codepoints.stride != sizeof(uint32_t)) List$compact(&codepoints, sizeof(uint32_t)); List_t graphemes = {}; @@ -1607,6 +1655,7 @@ Text_t Text$from_codepoints(List_t codepoints) { // Buffer for normalized cluster: uint32_t buf[256]; size_t u32_normlen = sizeof(buf) / sizeof(buf[0]); + if (u32_check(pos, (size_t)(next - pos)) != NULL) return NONE_TEXT; uint32_t *u32s_normalized = u32_normalize(UNINORM_NFC, pos, (size_t)(next - pos), buf, &u32_normlen); int32_t g = get_synthetic_grapheme(u32s_normalized, (int64_t)u32_normlen); @@ -1622,8 +1671,9 @@ Text_t Text$from_codepoints(List_t codepoints) { .data = (void *)next, .stride = sizeof(int32_t), }; - return concat2_assuming_safe(Text$from_components(graphemes, unique_clusters), - Text$from_codepoints(remaining_codepoints)); + OptionalText_t remainder = Text$from_utf32(remaining_codepoints); + if (remainder.length < 0) return NONE_TEXT; + return concat2_assuming_safe(Text$from_components(graphemes, unique_clusters), remainder); } } return Text$from_components(graphemes, unique_clusters); @@ -1639,14 +1689,7 @@ OptionalText_t Text$from_codepoint_names(List_t codepoint_names) { if (codepoint == UNINAME_INVALID) return NONE_TEXT; List$insert(&codepoints, &codepoint, I_small(0), sizeof(ucs4_t)); } - return Text$from_codepoints(codepoints); -} - -public -OptionalText_t Text$from_bytes(List_t bytes) { - if (bytes.stride != sizeof(int8_t)) List$compact(&bytes, sizeof(int8_t)); - - return Text$from_strn(bytes.data, (size_t)bytes.length); + return Text$from_utf32(codepoints); } public diff --git a/src/stdlib/text.h b/src/stdlib/text.h index 7f7fc2c6..4d2f16b8 100644 --- a/src/stdlib/text.h +++ b/src/stdlib/text.h @@ -82,12 +82,14 @@ Closure_t Text$by_split_any(Text_t text, Text_t delimiters); Text_t Text$trim(Text_t text, Text_t to_trim, bool left, bool right); char *Text$as_c_string(Text_t text); List_t Text$clusters(Text_t text); -List_t Text$utf32_codepoints(Text_t text); -List_t Text$utf8_bytes(Text_t text); +List_t Text$utf8(Text_t text); +List_t Text$utf16(Text_t text); +List_t Text$utf32(Text_t text); List_t Text$codepoint_names(Text_t text); -Text_t Text$from_codepoints(List_t codepoints); +OptionalText_t Text$from_utf8(List_t units); +OptionalText_t Text$from_utf16(List_t units); +OptionalText_t Text$from_utf32(List_t codepoints); OptionalText_t Text$from_codepoint_names(List_t codepoint_names); -OptionalText_t Text$from_bytes(List_t bytes); List_t Text$lines(Text_t text); Closure_t Text$by_line(Text_t text); Text_t Text$join(Text_t glue, List_t pieces); @@ -88,10 +88,9 @@ static OptionalText_t show_codegen = NONE_TEXT, #if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__APPLE__) " -D_BSD_SOURCE" #endif - " -DGC_THREADS" - " -I/usr/local/include"), - ldlibs = Text("-lgc -lm -lgmp -lunistring -ltomo_" TOMO_VERSION), - ldflags = Text(" -L/usr/local/lib"), optimization = Text("2"), cc = Text(DEFAULT_C_COMPILER); + " -DGC_THREADS"), + ldlibs = Text("-lgc -lm -lgmp -lunistring -ltomo_" TOMO_VERSION), ldflags = Text(""), + optimization = Text("2"), cc = Text(DEFAULT_C_COMPILER); static Text_t config_summary, // This will be either "" or "sudo -u <user>" or "doas -u <user>" @@ -265,7 +264,7 @@ int main(int argc, char *argv[]) { cflags = Texts(cflags, Text(" -Wno-parentheses-equality")); } - ldflags = Texts("-Wl,-rpath,'", TOMO_PATH, "/lib',-rpath,/usr/local/lib ", ldflags); + ldflags = Texts("-Wl,-rpath,'", TOMO_PATH, "/lib' ", ldflags); #ifdef __APPLE__ cflags = Texts(cflags, Text(" -I/opt/homebrew/include")); @@ -275,7 +274,8 @@ int main(int argc, char *argv[]) { if (show_codegen.length > 0 && Text$equal_values(show_codegen, Text("pretty"))) show_codegen = Text("{ sed '/^#line/d;/^$/d' | clang-format | bat -l c -P; }"); - config_summary = Text$from_str(String(cc, " ", cflags, " -O", optimization)); + config_summary = Texts("TOMO_VERSION=", TOMO_VERSION, "\n", "COMPILER=", cc, " ", cflags, " -O", optimization, "\n", + "SOURCE_MAPPING=", source_mapping ? Text("yes") : Text("no"), "\n"); Text_t owner = Path$owner(Path$from_str(TOMO_PATH), true); Text_t user = Text$from_str(getenv("USER")); @@ -411,26 +411,6 @@ Path_t build_file(Path_t path, const char *extension) { return Path$child(build_dir, Texts(Path$base_name(path), Text$from_str(extension))); } -static const char *get_version(Path_t lib_dir) { - Path_t changes_file = Path$child(lib_dir, Text("CHANGES.md")); - OptionalText_t changes = Path$read(changes_file); - if (changes.length <= 0) { - return "v0.0"; - } - const char *changes_str = Text$as_c_string(Texts(Text("\n"), changes)); - const char *version_line = strstr(changes_str, "\n## "); - if (version_line == NULL) - print_err("CHANGES.md in ", lib_dir, " does not have any valid versions starting with '## '"); - return String(string_slice(version_line + 4, strcspn(version_line + 4, "\r\n"))); -} - -static Path_t with_version_suffix(Path_t lib_dir) { - Text_t suffix = Texts(Text("_"), Text$from_str(get_version(lib_dir))); - return Text$ends_with(Path$base_name(lib_dir), suffix, NULL) - ? lib_dir - : Path$sibling(lib_dir, Texts(Path$base_name(lib_dir), suffix)); -} - void build_library(Path_t lib_dir) { lib_dir = Path$resolved(lib_dir, Path$current_dir()); if (!Path$is_directory(lib_dir, true)) print_err("Not a valid directory: ", lib_dir); @@ -441,8 +421,8 @@ void build_library(Path_t lib_dir) { compile_files(env, tm_files, &object_files, &extra_ldlibs); - Text_t versioned_dir = Path$base_name(with_version_suffix(lib_dir)); - Path_t shared_lib = Path$child(lib_dir, Texts(Text("lib"), versioned_dir, Text(SHARED_SUFFIX))); + Text_t lib_name = get_library_name(lib_dir); + Path_t shared_lib = Path$child(lib_dir, Texts(Text("lib"), lib_name, Text(SHARED_SUFFIX))); if (!is_stale_for_any(shared_lib, object_files, false)) { if (verbose) whisper("Unchanged: ", shared_lib); return; @@ -453,7 +433,7 @@ void build_library(Path_t lib_dir) { " -Wl,-install_name,@rpath/'lib", Path$base_name(lib_dir), version_suffix, SHARED_SUFFIX, "'" #else - " -Wl,-soname,'lib", versioned_dir, SHARED_SUFFIX, + " -Wl,-soname,'lib", lib_name, SHARED_SUFFIX, "'" #endif " -shared ", @@ -467,12 +447,11 @@ void build_library(Path_t lib_dir) { } void install_library(Path_t lib_dir) { - Text_t lib_dir_name = Path$base_name(lib_dir); - Text_t versioned_dir = Path$base_name(with_version_suffix(lib_dir)); - Path_t dest = Path$child(Path$from_str(String(TOMO_PATH, "/lib/tomo_" TOMO_VERSION)), versioned_dir); + Text_t lib_name = get_library_name(lib_dir); + Path_t dest = Path$child(Path$from_str(String(TOMO_PATH, "/lib/tomo_" TOMO_VERSION)), lib_name); print("Installing ", lib_dir, " into ", dest); if (!Path$equal_values(lib_dir, dest)) { - if (verbose) whisper("Clearing out any pre-existing version of ", lib_dir_name); + if (verbose) whisper("Clearing out any pre-existing version of ", lib_name); xsystem(as_owner, "rm -rf '", dest, "'"); if (verbose) whisper("Moving files to ", dest); xsystem(as_owner, "mkdir -p '", dest, "'"); @@ -481,15 +460,15 @@ void install_library(Path_t lib_dir) { } // If we have `debugedit` on this system, use it to remap the debugging source information // to point to the installed version of the source file. Otherwise, fail silently. - if (verbose) whisper("Updating debug symbols for ", dest, "/lib", lib_dir_name, SHARED_SUFFIX); + if (verbose) whisper("Updating debug symbols for ", dest, "/lib", lib_name, SHARED_SUFFIX); int result = system(String(as_owner, "debugedit -b ", lib_dir, " -d '", dest, "'" " '", - dest, "/lib", versioned_dir, SHARED_SUFFIX, + dest, "/lib", lib_name, SHARED_SUFFIX, "' " ">/dev/null 2>/dev/null")); (void)result; - print("Installed \033[1m", lib_dir_name, "\033[m to ", TOMO_PATH, "/lib/tomo_" TOMO_VERSION "/", versioned_dir); + print("Installed \033[1m", lib_dir, "\033[m to ", TOMO_PATH, "/lib/tomo_" TOMO_VERSION "/", lib_name); } void compile_files(env_t *env, List_t to_compile, List_t *object_files, List_t *extra_ldlibs) { @@ -647,7 +626,7 @@ void build_file_dependency_graph(Path_t path, Table_t *to_compile, Table_t *to_l break; } case USE_MODULE: { - module_info_t mod = get_module_info(stmt_ast); + module_info_t mod = get_used_module_info(stmt_ast); const char *full_name = mod.version ? String(mod.name, "_", mod.version) : mod.name; Text_t lib = Texts("-Wl,-rpath,'", TOMO_PATH, "/lib/tomo_" TOMO_VERSION "/", Text$from_str(full_name), "' '", TOMO_PATH, "/lib/tomo_" TOMO_VERSION "/", Text$from_str(full_name), "/lib", @@ -802,7 +781,7 @@ void transpile_code(env_t *base_env, Path_t path) { Text$print(c_file, c_code); - const char *version = get_version(Path$parent(path)); + const char *version = get_library_version(Path$parent(path)); binding_t *main_binding = get_binding(module_env, "main"); if (main_binding && main_binding->type->tag == FunctionType) { type_t *ret = Match(main_binding->type, FunctionType)->ret; @@ -857,8 +836,6 @@ Path_t compile_executable(env_t *base_env, Path_t path, Path_t exe_path, List_t return exe_path; } - FILE *runner = run_cmd(cc, " ", cflags, " -O", optimization, " ", ldflags, " ", ldlibs, " ", - list_text(extra_ldlibs), " ", paths_str(object_files), " -x c - -o ", exe_path); Text_t program = Texts("extern int parse_and_run$$", main_binding->code, "(int argc, char *argv[]);\n" "__attribute__ ((noinline))\n" @@ -867,6 +844,11 @@ Path_t compile_executable(env_t *base_env, Path_t path, Path_t exe_path, List_t main_binding->code, "(argc, argv);\n" "}\n"); + Path_t runner_file = build_file(path, ".runner.c"); + Path$write(runner_file, program, 0644); + + FILE *runner = run_cmd(cc, " ", cflags, " -O", optimization, " ", ldflags, " ", ldlibs, " ", + list_text(extra_ldlibs), " ", paths_str(object_files), " ", runner_file, " -o ", exe_path); if (show_codegen.length > 0) { FILE *out = run_cmd(show_codegen); diff --git a/src/typecheck.c b/src/typecheck.c index 07f8aac4..89a21fc3 100644 --- a/src/typecheck.c +++ b/src/typecheck.c @@ -165,15 +165,15 @@ PUREFUNC type_t *get_math_type(env_t *env, ast_t *ast, type_t *lhs_t, type_t *rh return NULL; } -static env_t *load_module(env_t *env, ast_t *module_ast) { - DeclareMatch(use, module_ast, Use); +static env_t *load_module(env_t *env, ast_t *use_ast) { + DeclareMatch(use, use_ast, Use); switch (use->what) { case USE_LOCAL: { - Path_t source_path = Path$from_str(module_ast->file->filename); + Path_t source_path = Path$from_str(use_ast->file->filename); Path_t source_dir = Path$parent(source_path); Path_t used_path = Path$resolved(Path$from_str(use->path), source_dir); - if (!Path$exists(used_path)) code_err(module_ast, "No such file exists: ", quoted(use->path)); + if (!Path$exists(used_path)) code_err(use_ast, "No such file exists: ", quoted(use->path)); env_t *module_env = Table$str_get(*env->imports, String(used_path)); if (module_env) return module_env; @@ -183,12 +183,12 @@ static env_t *load_module(env_t *env, ast_t *module_ast) { return load_module_env(env, ast); } case USE_MODULE: { - module_info_t mod = get_module_info(module_ast); + module_info_t mod = get_used_module_info(use_ast); glob_t tm_files; const char *folder = mod.version ? String(mod.name, "_", mod.version) : mod.name; if (glob(String(TOMO_PATH, "/lib/tomo_" TOMO_VERSION "/", folder, "/[!._0-9]*.tm"), GLOB_TILDE, NULL, &tm_files) != 0) { - if (!try_install_module(mod, true)) code_err(module_ast, "Couldn't find or install library: ", folder); + if (!try_install_module(mod, true)) code_err(use_ast, "Couldn't find or install library: ", folder); } env_t *module_env = fresh_scope(env); diff --git a/test/text.tm b/test/text.tm index 93a1f2a8..4ffea7b6 100644 --- a/test/text.tm +++ b/test/text.tm @@ -51,21 +51,21 @@ func main() amelie := "Am\{UE9}lie" >> amelie.split() = ["A", "m", "é", "l", "i", "e"] - >> amelie.utf32_codepoints() + >> amelie.utf32() = [65, 109, 233, 108, 105, 101] - >> amelie.bytes() + >> amelie.utf8() = [0x41, 0x6D, 0xC3, 0xA9, 0x6C, 0x69, 0x65] - >> Text.from_bytes([0x41, 0x6D, 0xC3, 0xA9, 0x6C, 0x69, 0x65])! + >> Text.from_utf8([0x41, 0x6D, 0xC3, 0xA9, 0x6C, 0x69, 0x65])! = "Amélie" - >> Text.from_bytes([Byte(0xFF)]) + >> Text.from_utf8([Byte(0xFF)]) = none amelie2 := "Am\{U65}\{U301}lie" >> amelie2.split() = ["A", "m", "é", "l", "i", "e"] - >> amelie2.utf32_codepoints() + >> amelie2.utf32() = [65, 109, 233, 108, 105, 101] - >> amelie2.bytes() + >> amelie2.utf8() = [0x41, 0x6D, 0xC3, 0xA9, 0x6C, 0x69, 0x65] >> amelie.codepoint_names() @@ -120,8 +120,8 @@ func main() >> c.codepoint_names() = ["LATIN CAPITAL LETTER E WITH ACUTE", "COMBINING VERTICAL LINE BELOW"] assert c == Text.from_codepoint_names(c.codepoint_names())! - assert c == Text.from_codepoints(c.utf32_codepoints()) - assert c == Text.from_bytes(c.bytes())! + assert c == Text.from_utf32(c.utf32())! + assert c == Text.from_utf8(c.utf8())! >> "one\ntwo\nthree".lines() = ["one", "two", "three"] @@ -191,7 +191,7 @@ func main() = 1 >> house.codepoint_names() = ["CJK Unified Ideographs-5BB6"] - >> house.utf32_codepoints() + >> house.utf32() = [23478] >> "🐧".codepoint_names() @@ -250,24 +250,24 @@ func main() do - concat := "e" ++ Text.from_codepoints([Int32(0x300)]) + concat := "e" ++ Text.from_utf32([Int32(0x300)])! >> concat.length = 1 - concat2 := concat ++ Text.from_codepoints([Int32(0x302)]) + concat2 := concat ++ Text.from_utf32([Int32(0x302)])! >> concat2.length = 1 - concat3 := concat2 ++ Text.from_codepoints([Int32(0x303)]) + concat3 := concat2 ++ Text.from_utf32([Int32(0x303)])! >> concat3.length = 1 - final := Text.from_codepoints([Int32(0x65), Int32(0x300), Int32(0x302), Int32(0x303)]) + final := Text.from_utf32([Int32(0x65), Int32(0x300), Int32(0x302), Int32(0x303)])! >> final.length = 1 assert concat3 == final - concat4 := Text.from_codepoints([Int32(0x65), Int32(0x300)]) ++ Text.from_codepoints([Int32(0x302), Int32(0x303)]) + concat4 := Text.from_utf32([Int32(0x65), Int32(0x300)])! ++ Text.from_utf32([Int32(0x302), Int32(0x303)])! >> concat4.length = 1 assert concat4 == final @@ -309,3 +309,13 @@ func main() = "" >> " ".trim(" ,", left=no) = "" + + do + test := "𤭢" + assert test.utf32() == [150370] + assert test.utf16() == [-10158, -8350] + assert test.utf8() == [0xf0, 0xa4, 0xad, 0xa2] + + assert Text.from_utf32([150370]) == test + assert Text.from_utf16([-10158, -8350]) == test + assert Text.from_utf8([0xf0, 0xa4, 0xad, 0xa2]) == test |
