From d64dcab138a34d5f5105e08f0a840f7cb5a1d159 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Tue, 9 Sep 2025 19:58:07 -0400 Subject: Overhaul to the unicode encoding/decoding methods for text --- man/man3/tomo-Text.bytes.3 | 34 ------------------------------ man/man3/tomo-Text.from_bytes.3 | 37 --------------------------------- man/man3/tomo-Text.from_codepoints.3 | 37 --------------------------------- man/man3/tomo-Text.from_utf16.3 | 39 +++++++++++++++++++++++++++++++++++ man/man3/tomo-Text.from_utf32.3 | 37 +++++++++++++++++++++++++++++++++ man/man3/tomo-Text.from_utf8.3 | 37 +++++++++++++++++++++++++++++++++ man/man3/tomo-Text.utf16.3 | 36 ++++++++++++++++++++++++++++++++ man/man3/tomo-Text.utf32.3 | 34 ++++++++++++++++++++++++++++++ man/man3/tomo-Text.utf32_codepoints.3 | 34 ------------------------------ man/man3/tomo-Text.utf8.3 | 34 ++++++++++++++++++++++++++++++ 10 files changed, 217 insertions(+), 142 deletions(-) delete mode 100644 man/man3/tomo-Text.bytes.3 delete mode 100644 man/man3/tomo-Text.from_bytes.3 delete mode 100644 man/man3/tomo-Text.from_codepoints.3 create mode 100644 man/man3/tomo-Text.from_utf16.3 create mode 100644 man/man3/tomo-Text.from_utf32.3 create mode 100644 man/man3/tomo-Text.from_utf8.3 create mode 100644 man/man3/tomo-Text.utf16.3 create mode 100644 man/man3/tomo-Text.utf32.3 delete mode 100644 man/man3/tomo-Text.utf32_codepoints.3 create mode 100644 man/man3/tomo-Text.utf8.3 (limited to 'man') diff --git a/man/man3/tomo-Text.bytes.3 b/man/man3/tomo-Text.bytes.3 deleted file mode 100644 index f9203ef3..00000000 --- a/man/man3/tomo-Text.bytes.3 +++ /dev/null @@ -1,34 +0,0 @@ -'\" t -.\" Copyright (c) 2025 Bruce Hill -.\" All rights reserved. -.\" -.TH Text.bytes 3 2025-04-30 "Tomo man-pages" -.SH NAME -Text.bytes \- get UTF8 bytes -.SH LIBRARY -Tomo Standard Library -.SH SYNOPSIS -.nf -.BI Text.bytes\ :\ func(text:\ Text\ ->\ [Byte]) -.fi -.SH DESCRIPTION -Converts a `Text` value to a list of bytes representing a UTF8 encoding of the text. - - -.SH ARGUMENTS - -.TS -allbox; -lb lb lbx lb -l l l l. -Name Type Description Default -text Text The text to be converted to UTF8 bytes. - -.TE -.SH RETURN -A list of bytes (`[Byte]`) representing the text in UTF8 encoding. - -.SH EXAMPLES -.EX ->> "Amélie".bytes() -= [65, 109, 195, 169, 108, 105, 101] -.EE diff --git a/man/man3/tomo-Text.from_bytes.3 b/man/man3/tomo-Text.from_bytes.3 deleted file mode 100644 index eec3843a..00000000 --- a/man/man3/tomo-Text.from_bytes.3 +++ /dev/null @@ -1,37 +0,0 @@ -'\" t -.\" Copyright (c) 2025 Bruce Hill -.\" All rights reserved. -.\" -.TH Text.from_bytes 3 2025-04-30 "Tomo man-pages" -.SH NAME -Text.from_bytes \- convert UTF8 byte list to text -.SH LIBRARY -Tomo Standard Library -.SH SYNOPSIS -.nf -.BI Text.from_bytes\ :\ func(bytes:\ [Byte]\ ->\ [Text]) -.fi -.SH DESCRIPTION -Returns text that has been constructed from the given UTF8 bytes. - - -.SH ARGUMENTS - -.TS -allbox; -lb lb lbx lb -l l l l. -Name Type Description Default -bytes [Byte] The UTF-8 bytes of the desired text. - -.TE -.SH RETURN -A new text based on the input UTF8 bytes after normalization has been applied. - -.SH NOTES -The text will be normalized, so the resulting text's UTF8 bytes may not exactly match the input. - -.SH EXAMPLES -.EX ->> Text.from_bytes([195, 133, 107, 101]) -= "Åke" -.EE diff --git a/man/man3/tomo-Text.from_codepoints.3 b/man/man3/tomo-Text.from_codepoints.3 deleted file mode 100644 index d64abd98..00000000 --- a/man/man3/tomo-Text.from_codepoints.3 +++ /dev/null @@ -1,37 +0,0 @@ -'\" t -.\" Copyright (c) 2025 Bruce Hill -.\" All rights reserved. -.\" -.TH Text.from_codepoints 3 2025-04-30 "Tomo man-pages" -.SH NAME -Text.from_codepoints \- convert UTF32 codepoints to text -.SH LIBRARY -Tomo Standard Library -.SH SYNOPSIS -.nf -.BI Text.from_codepoints\ :\ func(codepoints:\ [Int32]\ ->\ [Text]) -.fi -.SH DESCRIPTION -Returns text that has been constructed from the given UTF32 codepoints. - - -.SH ARGUMENTS - -.TS -allbox; -lb lb lbx lb -l l l l. -Name Type Description Default -codepoints [Int32] The UTF32 codepoints in the desired text. - -.TE -.SH RETURN -A new text with the specified codepoints after normalization has been applied. - -.SH NOTES -The text will be normalized, so the resulting text's codepoints may not exactly match the input codepoints. - -.SH EXAMPLES -.EX ->> Text.from_codepoints([197, 107, 101]) -= "Åke" -.EE diff --git a/man/man3/tomo-Text.from_utf16.3 b/man/man3/tomo-Text.from_utf16.3 new file mode 100644 index 00000000..d4eaea02 --- /dev/null +++ b/man/man3/tomo-Text.from_utf16.3 @@ -0,0 +1,39 @@ +'\" t +.\" Copyright (c) 2025 Bruce Hill +.\" All rights reserved. +.\" +.TH Text.from_utf16 3 2025-09-09 "Tomo man-pages" +.SH NAME +Text.from_utf16 \- convert UTF16 list to text +.SH LIBRARY +Tomo Standard Library +.SH SYNOPSIS +.nf +.BI Text.from_utf16\ :\ func(bytes:\ [Int16]\ ->\ [Text]) +.fi +.SH DESCRIPTION +Returns text that has been constructed from the given UTF16 sequence. + + +.SH ARGUMENTS + +.TS +allbox; +lb lb lbx lb +l l l l. +Name Type Description Default +bytes [Int16] The UTF-16 integers of the desired text. - +.TE +.SH RETURN +A new text based on the input UTF16 sequence after normalization has been applied. + +.SH NOTES +The text will be normalized, so the resulting text's UTF16 sequence may not exactly match the input. + +.SH EXAMPLES +.EX +>> Text.from_utf16([197, 107, 101]) += "Åke" +>> Text.from_utf16([12371, 12435, 12395, 12385, 12399, 19990, 30028]) += "こんにちは世界".utf16() +.EE diff --git a/man/man3/tomo-Text.from_utf32.3 b/man/man3/tomo-Text.from_utf32.3 new file mode 100644 index 00000000..31fc344f --- /dev/null +++ b/man/man3/tomo-Text.from_utf32.3 @@ -0,0 +1,37 @@ +'\" t +.\" Copyright (c) 2025 Bruce Hill +.\" All rights reserved. +.\" +.TH Text.from_utf32 3 2025-09-09 "Tomo man-pages" +.SH NAME +Text.from_utf32 \- convert UTF32 codepoints to text +.SH LIBRARY +Tomo Standard Library +.SH SYNOPSIS +.nf +.BI Text.from_utf32\ :\ func(codepoints:\ [Int32]\ ->\ [Text]) +.fi +.SH DESCRIPTION +Returns text that has been constructed from the given UTF32 codepoints. + + +.SH ARGUMENTS + +.TS +allbox; +lb lb lbx lb +l l l l. +Name Type Description Default +codepoints [Int32] The UTF32 codepoints in the desired text. - +.TE +.SH RETURN +A new text with the specified codepoints after normalization has been applied. + +.SH NOTES +The text will be normalized, so the resulting text's codepoints may not exactly match the input codepoints. + +.SH EXAMPLES +.EX +>> Text.from_utf32([197, 107, 101]) += "Åke" +.EE diff --git a/man/man3/tomo-Text.from_utf8.3 b/man/man3/tomo-Text.from_utf8.3 new file mode 100644 index 00000000..ead65dc6 --- /dev/null +++ b/man/man3/tomo-Text.from_utf8.3 @@ -0,0 +1,37 @@ +'\" t +.\" Copyright (c) 2025 Bruce Hill +.\" All rights reserved. +.\" +.TH Text.from_utf8 3 2025-09-09 "Tomo man-pages" +.SH NAME +Text.from_utf8 \- convert UTF8 byte list to text +.SH LIBRARY +Tomo Standard Library +.SH SYNOPSIS +.nf +.BI Text.from_utf8\ :\ func(bytes:\ [Byte]\ ->\ [Text]) +.fi +.SH DESCRIPTION +Returns text that has been constructed from the given UTF8 bytes. + + +.SH ARGUMENTS + +.TS +allbox; +lb lb lbx lb +l l l l. +Name Type Description Default +bytes [Byte] The UTF-8 bytes of the desired text. - +.TE +.SH RETURN +A new text based on the input UTF8 bytes after normalization has been applied. + +.SH NOTES +The text will be normalized, so the resulting text's UTF8 bytes may not exactly match the input. + +.SH EXAMPLES +.EX +>> Text.from_utf8([195, 133, 107, 101]) += "Åke" +.EE diff --git a/man/man3/tomo-Text.utf16.3 b/man/man3/tomo-Text.utf16.3 new file mode 100644 index 00000000..2b3da2b1 --- /dev/null +++ b/man/man3/tomo-Text.utf16.3 @@ -0,0 +1,36 @@ +'\" t +.\" Copyright (c) 2025 Bruce Hill +.\" All rights reserved. +.\" +.TH Text.utf16 3 2025-09-09 "Tomo man-pages" +.SH NAME +Text.utf16 \- get UTF16 codepoints +.SH LIBRARY +Tomo Standard Library +.SH SYNOPSIS +.nf +.BI Text.utf16\ :\ func(text:\ Text\ ->\ [Int16]) +.fi +.SH DESCRIPTION +Returns a list of Unicode code points for UTF16 encoding of the text. + + +.SH ARGUMENTS + +.TS +allbox; +lb lb lbx lb +l l l l. +Name Type Description Default +text Text The text from which to extract Unicode code points. - +.TE +.SH RETURN +A list of 16-bit integer Unicode code points (`[Int16]`). + +.SH EXAMPLES +.EX +>> "Åke".utf16() += [197, 107, 101] +>> "こんにちは世界".utf16() += [12371, 12435, 12395, 12385, 12399, 19990, 30028] +.EE diff --git a/man/man3/tomo-Text.utf32.3 b/man/man3/tomo-Text.utf32.3 new file mode 100644 index 00000000..ff37ba9c --- /dev/null +++ b/man/man3/tomo-Text.utf32.3 @@ -0,0 +1,34 @@ +'\" t +.\" Copyright (c) 2025 Bruce Hill +.\" All rights reserved. +.\" +.TH Text.utf32 3 2025-09-09 "Tomo man-pages" +.SH NAME +Text.utf32 \- get UTF32 codepoints +.SH LIBRARY +Tomo Standard Library +.SH SYNOPSIS +.nf +.BI Text.utf32\ :\ func(text:\ Text\ ->\ [Int32]) +.fi +.SH DESCRIPTION +Returns a list of Unicode code points for UTF32 encoding of the text. + + +.SH ARGUMENTS + +.TS +allbox; +lb lb lbx lb +l l l l. +Name Type Description Default +text Text The text from which to extract Unicode code points. - +.TE +.SH RETURN +A list of 32-bit integer Unicode code points (`[Int32]`). + +.SH EXAMPLES +.EX +>> "Amélie".utf32() += [65, 109, 233, 108, 105, 101] +.EE diff --git a/man/man3/tomo-Text.utf32_codepoints.3 b/man/man3/tomo-Text.utf32_codepoints.3 deleted file mode 100644 index 0ada8954..00000000 --- a/man/man3/tomo-Text.utf32_codepoints.3 +++ /dev/null @@ -1,34 +0,0 @@ -'\" t -.\" Copyright (c) 2025 Bruce Hill -.\" All rights reserved. -.\" -.TH Text.utf32_codepoints 3 2025-04-30 "Tomo man-pages" -.SH NAME -Text.utf32_codepoints \- get UTF32 codepoints -.SH LIBRARY -Tomo Standard Library -.SH SYNOPSIS -.nf -.BI Text.utf32_codepoints\ :\ func(text:\ Text\ ->\ [Int32]) -.fi -.SH DESCRIPTION -Returns a list of Unicode code points for UTF32 encoding of the text. - - -.SH ARGUMENTS - -.TS -allbox; -lb lb lbx lb -l l l l. -Name Type Description Default -text Text The text from which to extract Unicode code points. - -.TE -.SH RETURN -A list of 32-bit integer Unicode code points (`[Int32]`). - -.SH EXAMPLES -.EX ->> "Amélie".utf32_codepoints() -= [65, 109, 233, 108, 105, 101] -.EE diff --git a/man/man3/tomo-Text.utf8.3 b/man/man3/tomo-Text.utf8.3 new file mode 100644 index 00000000..80a91fb9 --- /dev/null +++ b/man/man3/tomo-Text.utf8.3 @@ -0,0 +1,34 @@ +'\" t +.\" Copyright (c) 2025 Bruce Hill +.\" All rights reserved. +.\" +.TH Text.utf8 3 2025-09-09 "Tomo man-pages" +.SH NAME +Text.utf8 \- get UTF8 bytes +.SH LIBRARY +Tomo Standard Library +.SH SYNOPSIS +.nf +.BI Text.utf8\ :\ func(text:\ Text\ ->\ [Byte]) +.fi +.SH DESCRIPTION +Converts a `Text` value to a list of bytes representing a UTF8 encoding of the text. + + +.SH ARGUMENTS + +.TS +allbox; +lb lb lbx lb +l l l l. +Name Type Description Default +text Text The text to be converted to UTF8 bytes. - +.TE +.SH RETURN +A list of bytes (`[Byte]`) representing the text in UTF8 encoding. + +.SH EXAMPLES +.EX +>> "Amélie".utf8() += [65, 109, 195, 169, 108, 105, 101] +.EE -- cgit v1.2.3