diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2024-11-04 01:17:47 -0500 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2024-11-04 01:17:47 -0500 |
| commit | aabc0a3cff685e31f2492c977c6562d8e0ef8ebc (patch) | |
| tree | 518e70f81407c02978c69f80be6f9872dabaa295 | |
| parent | b69d14b89492919dc5c1669d2c569ee3baf1bbb0 (diff) | |
Update text API to use optional returns when applicable
| -rw-r--r-- | environment.c | 6 | ||||
| -rw-r--r-- | stdlib/text.c | 15 | ||||
| -rw-r--r-- | stdlib/text.h | 9 | ||||
| -rw-r--r-- | test/text.tm | 16 |
4 files changed, 25 insertions, 21 deletions
diff --git a/environment.c b/environment.c index 39e50b02..47e30dba 100644 --- a/environment.c +++ b/environment.c @@ -374,9 +374,9 @@ env_t *new_compilation_unit(CORD libname) {"ends_with", "Text$ends_with", "func(text,suffix:Text -> Bool)"}, {"find", "Text$find", "func(text:Text, pattern:Pattern, start=1 -> Int?)"}, {"find_all", "Text$find_all", "func(text:Text, pattern:Pattern -> [Text])"}, - {"from_bytes", "Text$from_bytes", "func(bytes:[Byte] -> Text)"}, - {"from_c_string", "Text$from_str", "func(str:CString -> Text)"}, - {"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text] -> Text)"}, + {"from_bytes", "Text$from_bytes", "func(bytes:[Byte] -> Text?)"}, + {"from_c_string", "Text$from_str", "func(str:CString -> Text?)"}, + {"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text] -> Text?)"}, {"from_codepoints", "Text$from_codepoints", "func(codepoints:[Int32] -> Text)"}, {"without_escaping", "Path$cleanup", "func(text:Text -> Path)"}, {"has", "Text$has", "func(text:Text, pattern:Pattern -> Bool)"}, diff --git a/stdlib/text.c b/stdlib/text.c index e8ecc786..92c5df48 100644 --- a/stdlib/text.c +++ b/stdlib/text.c @@ -607,7 +607,7 @@ Text_t text_from_u32(ucs4_t *codepoints, int64_t num_codepoints, bool normalize) return ret; } -public Text_t Text$from_strn(const char *str, size_t len) +public OptionalText_t Text$from_strn(const char *str, size_t len) { int64_t ascii_span = 0; for (size_t i = 0; i < len && isascii(str[i]); i++) @@ -626,7 +626,7 @@ public Text_t Text$from_strn(const char *str, size_t len) return ret; } else { if (u8_check((uint8_t*)str, len) != NULL) - return Text(""); + return NULL_TEXT; ucs4_t buf[128]; size_t length = sizeof(buf)/sizeof(buf[0]); @@ -638,7 +638,7 @@ public Text_t Text$from_strn(const char *str, size_t len) } } -public Text_t Text$from_str(const char *str) +public OptionalText_t Text$from_str(const char *str) { return str ? Text$from_strn(str, strlen(str)) : Text(""); } @@ -1270,20 +1270,21 @@ public Text_t Text$from_codepoints(Array_t codepoints) return text_from_u32(codepoints.data, codepoints.length, true); } -public Text_t Text$from_codepoint_names(Array_t codepoint_names) +public OptionalText_t Text$from_codepoint_names(Array_t codepoint_names) { Array_t codepoints = {}; for (int64_t i = 0; i < codepoint_names.length; i++) { Text_t *name = ((Text_t*)(codepoint_names.data + i*codepoint_names.stride)); const char *name_str = Text$as_c_string(*name); ucs4_t codepoint = unicode_name_character(name_str); - if (codepoint != UNINAME_INVALID) - Array$insert(&codepoints, &codepoint, I_small(0), sizeof(ucs4_t)); + if (codepoint == UNINAME_INVALID) + return NULL_TEXT; + Array$insert(&codepoints, &codepoint, I_small(0), sizeof(ucs4_t)); } return Text$from_codepoints(codepoints); } -public Text_t Text$from_bytes(Array_t bytes) +public OptionalText_t Text$from_bytes(Array_t bytes) { if (bytes.stride != sizeof(int8_t)) Array$compact(&bytes, sizeof(int8_t)); diff --git a/stdlib/text.h b/stdlib/text.h index bcdccfeb..45aa00ca 100644 --- a/stdlib/text.h +++ b/stdlib/text.h @@ -9,6 +9,7 @@ #include "datatypes.h" #include "integers.h" +#include "optionals.h" #include "util.h" typedef struct { @@ -27,8 +28,8 @@ Text_t Text$_concat(int n, Text_t items[n]); #define Text$concat(...) Text$_concat(sizeof((Text_t[]){__VA_ARGS__})/sizeof(Text_t), (Text_t[]){__VA_ARGS__}) #define Texts(...) Text$concat(__VA_ARGS__) Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int); -Text_t Text$from_str(const char *str); -Text_t Text$from_strn(const char *str, size_t len); +OptionalText_t Text$from_str(const char *str); +OptionalText_t Text$from_strn(const char *str, size_t len); PUREFUNC uint64_t Text$hash(Text_t *text); PUREFUNC int32_t Text$compare(const Text_t *a, const Text_t *b); PUREFUNC bool Text$equal(const Text_t *a, const Text_t *b); @@ -49,8 +50,8 @@ Array_t Text$utf32_codepoints(Text_t text); Array_t Text$utf8_bytes(Text_t text); Array_t Text$codepoint_names(Text_t text); Text_t Text$from_codepoints(Array_t codepoints); -Text_t Text$from_codepoint_names(Array_t codepoint_names); -Text_t Text$from_bytes(Array_t bytes); +OptionalText_t Text$from_codepoint_names(Array_t codepoint_names); +OptionalText_t Text$from_bytes(Array_t bytes); Array_t Text$lines(Text_t text); Text_t Text$join(Text_t glue, Array_t pieces); Text_t Text$repeat(Text_t text, Int_t count); diff --git a/test/text.tm b/test/text.tm index 1c8988ba..73b9f952 100644 --- a/test/text.tm +++ b/test/text.tm @@ -32,8 +32,10 @@ func main(): = [65[32], 109[32], 233[32], 108[32], 105[32], 101[32]] : [Int32] >> amelie:utf8_bytes() = [65[B], 109[B], 195[B], 169[B], 108[B], 105[B], 101[B]] : [Byte] - >> Text.from_bytes([65[B], 109[B], 195[B], 169[B], 108[B], 105[B], 101[B]]) + >> Text.from_bytes([65[B], 109[B], 195[B], 169[B], 108[B], 105[B], 101[B]])! = "Amélie" + >> Text.from_bytes([255[B]]) + = !Text >> amelie2 := "Am$(\U65\U301)lie" >> amelie2:split() @@ -120,11 +122,11 @@ func main(): >> c := "É̩" >> c:codepoint_names() = ["LATIN CAPITAL LETTER E WITH ACUTE", "COMBINING VERTICAL LINE BELOW"] - >> c == Text.from_codepoint_names(c:codepoint_names()) + >> c == Text.from_codepoint_names(c:codepoint_names())! = yes >> c == Text.from_codepoints(c:utf32_codepoints()) = yes - >> c == Text.from_bytes(c:utf8_bytes()) + >> c == Text.from_bytes(c:utf8_bytes())! = yes >> "one$(\n)two$(\n)three":lines() @@ -220,7 +222,7 @@ func main(): = ["PENGUIN"] >> Text.from_codepoint_names(["not a valid name here buddy"]) - = "" + = !Text >> "one two; three four":find_all($/; {..}/) = ["; three four"] @@ -292,14 +294,14 @@ func main(): do: !! Testing concatenation-stability: - >> ab := Text.from_codepoint_names(["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"]) + >> ab := Text.from_codepoint_names(["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"])! >> ab:codepoint_names() = ["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"] >> ab.length = 1 - >> a := Text.from_codepoint_names(["LATIN SMALL LETTER E"]) - >> b := Text.from_codepoint_names(["COMBINING VERTICAL LINE BELOW"]) + >> a := Text.from_codepoint_names(["LATIN SMALL LETTER E"])! + >> b := Text.from_codepoint_names(["COMBINING VERTICAL LINE BELOW"])! >> (a++b):codepoint_names() = ["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"] >> (a++b) == ab |
