Merge branch 'main' into simplified-quotessimplified-quotes

author: Bruce Hill <bruce@bruce-hill.com> 2025-09-09 20:09:22 -0400
committer: Bruce Hill <bruce@bruce-hill.com> 2025-09-09 20:09:22 -0400
commit: ba6d1b12291398c804f5cdf653265a0322ca4a01 (patch)
tree: 7e8c863817a64ec5ba5b1e457d05581bddf00852 /api
parent: ca9eabb810af3aca14aad5c8e0266ddc9286dd99 (diff)
parent: d64dcab138a34d5f5105e08f0a840f7cb5a1d159 (diff)
3 files changed, 245 insertions, 108 deletions
diff --git a/api/api.md b/api/api.md
index 09d24e73..2eeecd89 100644
--- a/api/api.md
+++ b/api/api.md
@@ -4182,27 +4182,6 @@ for chunk in text.by_split_any(",;")
 say(chunk)
 
 ```
-## Text.bytes
-
-```tomo
-Text.bytes : func(text: Text -> [Byte])
-```
-
-Converts a `Text` value to a list of bytes representing a UTF8 encoding of the text.
-
-Argument | Type | Description | Default
----------|------|-------------|---------
-text | `Text` | The text to be converted to UTF8 bytes.  | -
-
-**Return:** A list of bytes (`[Byte]`) representing the text in UTF8 encoding.
-
-
-**Example:**
-```tomo
->> "Amélie".bytes()
-= [65, 109, 195, 169, 108, 105, 101]
-
-```
 ## Text.caseless_equals
 
 ```tomo
@@ -4306,29 +4285,6 @@ first | `Int` | The index to begin the slice.  | -
 = "lo"
 
 ```
-## Text.from_bytes
-
-```tomo
-Text.from_bytes : func(bytes: [Byte] -> [Text])
-```
-
-Returns text that has been constructed from the given UTF8 bytes.
-
-The text will be normalized, so the resulting text's UTF8 bytes may not exactly match the input.
-
-Argument | Type | Description | Default
----------|------|-------------|---------
-bytes | `[Byte]` | The UTF-8 bytes of the desired text.  | -
-
-**Return:** A new text based on the input UTF8 bytes after normalization has been applied.
-
-
-**Example:**
-```tomo
->> Text.from_bytes([195, 133, 107, 101])
-= "Åke"
-
-```
 ## Text.from_c_string
 
 ```tomo
@@ -4377,10 +4333,35 @@ codepoint_names | `[Text]` | The names of each codepoint in the desired text (ca
 = "Åke"
 
 ```
-## Text.from_codepoints
+## Text.from_utf16
+
+```tomo
+Text.from_utf16 : func(bytes: [Int16] -> [Text])
+```
+
+Returns text that has been constructed from the given UTF16 sequence.
+
+The text will be normalized, so the resulting text's UTF16 sequence may not exactly match the input.
+
+Argument | Type | Description | Default
+---------|------|-------------|---------
+bytes | `[Int16]` | The UTF-16 integers of the desired text.  | -
+
+**Return:** A new text based on the input UTF16 sequence after normalization has been applied.
+
+
+**Example:**
+```tomo
+>> Text.from_utf16([197, 107, 101])
+= "Åke"
+>> Text.from_utf16([12371, 12435, 12395, 12385, 12399, 19990, 30028])
+= "こんにちは世界".utf16()
+
+```
+## Text.from_utf32
 
 ```tomo
-Text.from_codepoints : func(codepoints: [Int32] -> [Text])
+Text.from_utf32 : func(codepoints: [Int32] -> [Text])
 ```
 
 Returns text that has been constructed from the given UTF32 codepoints.
@@ -4396,7 +4377,30 @@ codepoints | `[Int32]` | The UTF32 codepoints in the desired text.  | -
 
 **Example:**
 ```tomo
->> Text.from_codepoints([197, 107, 101])
+>> Text.from_utf32([197, 107, 101])
+= "Åke"
+
+```
+## Text.from_utf8
+
+```tomo
+Text.from_utf8 : func(bytes: [Byte] -> [Text])
+```
+
+Returns text that has been constructed from the given UTF8 bytes.
+
+The text will be normalized, so the resulting text's UTF8 bytes may not exactly match the input.
+
+Argument | Type | Description | Default
+---------|------|-------------|---------
+bytes | `[Byte]` | The UTF-8 bytes of the desired text.  | -
+
+**Return:** A new text based on the input UTF8 bytes after normalization has been applied.
+
+
+**Example:**
+```tomo
+>> Text.from_utf8([195, 133, 107, 101])
 = "Åke"
 
 ```
@@ -4916,10 +4920,33 @@ language | `Text` | The ISO 639 language code for which casing rules to use.  |
 = "İ"
 
 ```
-## Text.utf32_codepoints
+## Text.utf16
 
 ```tomo
-Text.utf32_codepoints : func(text: Text -> [Int32])
+Text.utf16 : func(text: Text -> [Int16])
+```
+
+Returns a list of Unicode code points for UTF16 encoding of the text.
+
+Argument | Type | Description | Default
+---------|------|-------------|---------
+text | `Text` | The text from which to extract Unicode code points.  | -
+
+**Return:** A list of 16-bit integer Unicode code points (`[Int16]`).
+
+
+**Example:**
+```tomo
+>> "Åke".utf16()
+= [197, 107, 101]
+>> "こんにちは世界".utf16()
+= [12371, 12435, 12395, 12385, 12399, 19990, 30028]
+
+```
+## Text.utf32
+
+```tomo
+Text.utf32 : func(text: Text -> [Int32])
 ```
 
 Returns a list of Unicode code points for UTF32 encoding of the text.
@@ -4933,10 +4960,31 @@ text | `Text` | The text from which to extract Unicode code points.  | -
 
 **Example:**
 ```tomo
->> "Amélie".utf32_codepoints()
+>> "Amélie".utf32()
 = [65, 109, 233, 108, 105, 101]
 
 ```
+## Text.utf8
+
+```tomo
+Text.utf8 : func(text: Text -> [Byte])
+```
+
+Converts a `Text` value to a list of bytes representing a UTF8 encoding of the text.
+
+Argument | Type | Description | Default
+---------|------|-------------|---------
+text | `Text` | The text to be converted to UTF8 bytes.  | -
+
+**Return:** A list of bytes (`[Byte]`) representing the text in UTF8 encoding.
+
+
+**Example:**
+```tomo
+>> "Amélie".utf8()
+= [65, 109, 195, 169, 108, 105, 101]
+
+```
 ## Text.width
 
 ```tomo
diff --git a/api/text.md b/api/text.md
index bdff6841..0d50ee24 100644
--- a/api/text.md
+++ b/api/text.md
@@ -130,27 +130,6 @@ for chunk in text.by_split_any(",;")
 say(chunk)
 
 ```
-## Text.bytes
-
-```tomo
-Text.bytes : func(text: Text -> [Byte])
-```
-
-Converts a `Text` value to a list of bytes representing a UTF8 encoding of the text.
-
-Argument | Type | Description | Default
----------|------|-------------|---------
-text | `Text` | The text to be converted to UTF8 bytes.  | -
-
-**Return:** A list of bytes (`[Byte]`) representing the text in UTF8 encoding.
-
-
-**Example:**
-```tomo
->> "Amélie".bytes()
-= [65, 109, 195, 169, 108, 105, 101]
-
-```
 ## Text.caseless_equals
 
 ```tomo
@@ -254,29 +233,6 @@ first | `Int` | The index to begin the slice.  | -
 = "lo"
 
 ```
-## Text.from_bytes
-
-```tomo
-Text.from_bytes : func(bytes: [Byte] -> [Text])
-```
-
-Returns text that has been constructed from the given UTF8 bytes.
-
-The text will be normalized, so the resulting text's UTF8 bytes may not exactly match the input.
-
-Argument | Type | Description | Default
----------|------|-------------|---------
-bytes | `[Byte]` | The UTF-8 bytes of the desired text.  | -
-
-**Return:** A new text based on the input UTF8 bytes after normalization has been applied.
-
-
-**Example:**
-```tomo
->> Text.from_bytes([195, 133, 107, 101])
-= "Åke"
-
-```
 ## Text.from_c_string
 
 ```tomo
@@ -325,10 +281,35 @@ codepoint_names | `[Text]` | The names of each codepoint in the desired text (ca
 = "Åke"
 
 ```
-## Text.from_codepoints
+## Text.from_utf16
+
+```tomo
+Text.from_utf16 : func(bytes: [Int16] -> [Text])
+```
+
+Returns text that has been constructed from the given UTF16 sequence.
+
+The text will be normalized, so the resulting text's UTF16 sequence may not exactly match the input.
+
+Argument | Type | Description | Default
+---------|------|-------------|---------
+bytes | `[Int16]` | The UTF-16 integers of the desired text.  | -
+
+**Return:** A new text based on the input UTF16 sequence after normalization has been applied.
+
+
+**Example:**
+```tomo
+>> Text.from_utf16([197, 107, 101])
+= "Åke"
+>> Text.from_utf16([12371, 12435, 12395, 12385, 12399, 19990, 30028])
+= "こんにちは世界".utf16()
+
+```
+## Text.from_utf32
 
 ```tomo
-Text.from_codepoints : func(codepoints: [Int32] -> [Text])
+Text.from_utf32 : func(codepoints: [Int32] -> [Text])
 ```
 
 Returns text that has been constructed from the given UTF32 codepoints.
@@ -344,7 +325,30 @@ codepoints | `[Int32]` | The UTF32 codepoints in the desired text.  | -
 
 **Example:**
 ```tomo
->> Text.from_codepoints([197, 107, 101])
+>> Text.from_utf32([197, 107, 101])
+= "Åke"
+
+```
+## Text.from_utf8
+
+```tomo
+Text.from_utf8 : func(bytes: [Byte] -> [Text])
+```
+
+Returns text that has been constructed from the given UTF8 bytes.
+
+The text will be normalized, so the resulting text's UTF8 bytes may not exactly match the input.
+
+Argument | Type | Description | Default
+---------|------|-------------|---------
+bytes | `[Byte]` | The UTF-8 bytes of the desired text.  | -
+
+**Return:** A new text based on the input UTF8 bytes after normalization has been applied.
+
+
+**Example:**
+```tomo
+>> Text.from_utf8([195, 133, 107, 101])
 = "Åke"
 
 ```
@@ -864,10 +868,33 @@ language | `Text` | The ISO 639 language code for which casing rules to use.  |
 = "İ"
 
 ```
-## Text.utf32_codepoints
+## Text.utf16
 
 ```tomo
-Text.utf32_codepoints : func(text: Text -> [Int32])
+Text.utf16 : func(text: Text -> [Int16])
+```
+
+Returns a list of Unicode code points for UTF16 encoding of the text.
+
+Argument | Type | Description | Default
+---------|------|-------------|---------
+text | `Text` | The text from which to extract Unicode code points.  | -
+
+**Return:** A list of 16-bit integer Unicode code points (`[Int16]`).
+
+
+**Example:**
+```tomo
+>> "Åke".utf16()
+= [197, 107, 101]
+>> "こんにちは世界".utf16()
+= [12371, 12435, 12395, 12385, 12399, 19990, 30028]
+
+```
+## Text.utf32
+
+```tomo
+Text.utf32 : func(text: Text -> [Int32])
 ```
 
 Returns a list of Unicode code points for UTF32 encoding of the text.
@@ -881,10 +908,31 @@ text | `Text` | The text from which to extract Unicode code points.  | -
 
 **Example:**
 ```tomo
->> "Amélie".utf32_codepoints()
+>> "Amélie".utf32()
 = [65, 109, 233, 108, 105, 101]
 
 ```
+## Text.utf8
+
+```tomo
+Text.utf8 : func(text: Text -> [Byte])
+```
+
+Converts a `Text` value to a list of bytes representing a UTF8 encoding of the text.
+
+Argument | Type | Description | Default
+---------|------|-------------|---------
+text | `Text` | The text to be converted to UTF8 bytes.  | -
+
+**Return:** A list of bytes (`[Byte]`) representing the text in UTF8 encoding.
+
+
+**Example:**
+```tomo
+>> "Amélie".utf8()
+= [65, 109, 195, 169, 108, 105, 101]
+
+```
 ## Text.width
 
 ```tomo
diff --git a/api/text.yaml b/api/text.yaml
index c8d70f0b..d209f4b3 100644
--- a/api/text.yaml
+++ b/api/text.yaml
@@ -129,7 +129,7 @@ Text.by_split_any:
     # Prints: "one" then "two" then "three":
     say(chunk)
 
-Text.bytes:
+Text.utf8:
   short: get UTF8 bytes
   description: >
     Converts a `Text` value to a list of bytes representing a UTF8 encoding of
@@ -144,7 +144,7 @@ Text.bytes:
       description: >
         The text to be converted to UTF8 bytes.
   example: |
-    >> "Amélie".bytes()
+    >> "Amélie".utf8()
     = [65, 109, 195, 169, 108, 105, 101]
 
 Text.caseless_equals:
@@ -255,7 +255,7 @@ Text.from:
     >> "hello".from(-2)
     = "lo"
 
-Text.from_bytes:
+Text.from_utf8:
   short: convert UTF8 byte list to text
   description: >
     Returns text that has been constructed from the given UTF8 bytes.
@@ -272,9 +272,31 @@ Text.from_bytes:
       description: >
         The UTF-8 bytes of the desired text.
   example: |
-    >> Text.from_bytes([195, 133, 107, 101])
+    >> Text.from_utf8([195, 133, 107, 101])
     = "Åke"
 
+Text.from_utf16:
+  short: convert UTF16 list to text
+  description: >
+    Returns text that has been constructed from the given UTF16 sequence.
+  note: >
+    The text will be normalized, so the resulting text's UTF16 sequence may not
+    exactly match the input.
+  return:
+    type: '[Text]'
+    description: >
+      A new text based on the input UTF16 sequence after normalization has been applied.
+  args:
+    bytes:
+      type: '[Int16]'
+      description: >
+        The UTF-16 integers of the desired text.
+  example: |
+    >> Text.from_utf16([197, 107, 101])
+    = "Åke"
+    >> Text.from_utf16([12371, 12435, 12395, 12385, 12399, 19990, 30028])
+    = "こんにちは世界".utf16()
+
 Text.from_c_string:
   short: convert C-style string to text
   description: >
@@ -318,7 +340,7 @@ Text.from_codepoint_names:
     ]
     = "Åke"
 
-Text.from_codepoints:
+Text.from_utf32:
   short: convert UTF32 codepoints to text
   description: >
     Returns text that has been constructed from the given UTF32 codepoints.
@@ -335,7 +357,7 @@ Text.from_codepoints:
       description: >
         The UTF32 codepoints in the desired text.
   example: |
-    >> Text.from_codepoints([197, 107, 101])
+    >> Text.from_utf32([197, 107, 101])
     = "Åke"
 
 Text.has:
@@ -906,7 +928,26 @@ Text.upper:
     >> "i".upper(language="tr_TR")
     = "İ"
 
-Text.utf32_codepoints:
+Text.utf16:
+  short: get UTF16 codepoints
+  description: >
+    Returns a list of Unicode code points for UTF16 encoding of the text.
+  return:
+    type: '[Int16]'
+    description: >
+      A list of 16-bit integer Unicode code points (`[Int16]`).
+  args:
+    text:
+      type: 'Text'
+      description: >
+        The text from which to extract Unicode code points.
+  example: |
+    >> "Åke".utf16()
+    = [197, 107, 101]
+    >> "こんにちは世界".utf16()
+    = [12371, 12435, 12395, 12385, 12399, 19990, 30028]
+
+Text.utf32:
   short: get UTF32 codepoints
   description: >
     Returns a list of Unicode code points for UTF32 encoding of the text.
@@ -920,7 +961,7 @@ Text.utf32_codepoints:
       description: >
         The text from which to extract Unicode code points.
   example: |
-    >> "Amélie".utf32_codepoints()
+    >> "Amélie".utf32()
     = [65, 109, 233, 108, 105, 101]
 
 Text.width:
author	Bruce Hill <bruce@bruce-hill.com>	2025-09-09 20:09:22 -0400
committer	Bruce Hill <bruce@bruce-hill.com>	2025-09-09 20:09:22 -0400
commit	ba6d1b12291398c804f5cdf653265a0322ca4a01 (patch)
tree	7e8c863817a64ec5ba5b1e457d05581bddf00852 /api
parent	ca9eabb810af3aca14aad5c8e0266ddc9286dd99 (diff)
parent	d64dcab138a34d5f5105e08f0a840f7cb5a1d159 (diff)