Update text API to use optional returns when applicable

author: Bruce Hill <bruce@bruce-hill.com> 2024-11-04 01:17:47 -0500
committer: Bruce Hill <bruce@bruce-hill.com> 2024-11-04 01:17:47 -0500
commit: aabc0a3cff685e31f2492c977c6562d8e0ef8ebc (patch)
tree: 518e70f81407c02978c69f80be6f9872dabaa295
parent: b69d14b89492919dc5c1669d2c569ee3baf1bbb0 (diff)
4 files changed, 25 insertions, 21 deletions
diff --git a/environment.c b/environment.c
index 39e50b02..47e30dba 100644
--- a/environment.c
+++ b/environment.c
@@ -374,9 +374,9 @@ env_t *new_compilation_unit(CORD libname)
             {"ends_with", "Text$ends_with", "func(text,suffix:Text -> Bool)"},
             {"find", "Text$find", "func(text:Text, pattern:Pattern, start=1 -> Int?)"},
             {"find_all", "Text$find_all", "func(text:Text, pattern:Pattern -> [Text])"},
-            {"from_bytes", "Text$from_bytes", "func(bytes:[Byte] -> Text)"},
-            {"from_c_string", "Text$from_str", "func(str:CString -> Text)"},
-            {"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text] -> Text)"},
+            {"from_bytes", "Text$from_bytes", "func(bytes:[Byte] -> Text?)"},
+            {"from_c_string", "Text$from_str", "func(str:CString -> Text?)"},
+            {"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text] -> Text?)"},
             {"from_codepoints", "Text$from_codepoints", "func(codepoints:[Int32] -> Text)"},
             {"without_escaping", "Path$cleanup", "func(text:Text -> Path)"},
             {"has", "Text$has", "func(text:Text, pattern:Pattern -> Bool)"},
diff --git a/stdlib/text.c b/stdlib/text.c
index e8ecc786..92c5df48 100644
--- a/stdlib/text.c
+++ b/stdlib/text.c
@@ -607,7 +607,7 @@ Text_t text_from_u32(ucs4_t *codepoints, int64_t num_codepoints, bool normalize)
     return ret;
 }
 
-public Text_t Text$from_strn(const char *str, size_t len)
+public OptionalText_t Text$from_strn(const char *str, size_t len)
 {
     int64_t ascii_span = 0;
     for (size_t i = 0; i < len && isascii(str[i]); i++)
@@ -626,7 +626,7 @@ public Text_t Text$from_strn(const char *str, size_t len)
         return ret;
     } else {
         if (u8_check((uint8_t*)str, len) != NULL)
-            return Text("");
+            return NULL_TEXT;
 
         ucs4_t buf[128];
         size_t length = sizeof(buf)/sizeof(buf[0]);
@@ -638,7 +638,7 @@ public Text_t Text$from_strn(const char *str, size_t len)
     }
 }
 
-public Text_t Text$from_str(const char *str)
+public OptionalText_t Text$from_str(const char *str)
 {
     return str ? Text$from_strn(str, strlen(str)) : Text("");
 }
@@ -1270,20 +1270,21 @@ public Text_t Text$from_codepoints(Array_t codepoints)
     return text_from_u32(codepoints.data, codepoints.length, true);
 }
 
-public Text_t Text$from_codepoint_names(Array_t codepoint_names)
+public OptionalText_t Text$from_codepoint_names(Array_t codepoint_names)
 {
     Array_t codepoints = {};
     for (int64_t i = 0; i < codepoint_names.length; i++) {
         Text_t *name = ((Text_t*)(codepoint_names.data + i*codepoint_names.stride));
         const char *name_str = Text$as_c_string(*name);
         ucs4_t codepoint = unicode_name_character(name_str);
-        if (codepoint != UNINAME_INVALID)
-            Array$insert(&codepoints, &codepoint, I_small(0), sizeof(ucs4_t));
+        if (codepoint == UNINAME_INVALID)
+            return NULL_TEXT;
+        Array$insert(&codepoints, &codepoint, I_small(0), sizeof(ucs4_t));
     }
     return Text$from_codepoints(codepoints);
 }
 
-public Text_t Text$from_bytes(Array_t bytes)
+public OptionalText_t Text$from_bytes(Array_t bytes)
 {
     if (bytes.stride != sizeof(int8_t))
         Array$compact(&bytes, sizeof(int8_t));
diff --git a/stdlib/text.h b/stdlib/text.h
index bcdccfeb..45aa00ca 100644
--- a/stdlib/text.h
+++ b/stdlib/text.h
@@ -9,6 +9,7 @@
 
 #include "datatypes.h"
 #include "integers.h"
+#include "optionals.h"
 #include "util.h"
 
 typedef struct {
@@ -27,8 +28,8 @@ Text_t Text$_concat(int n, Text_t items[n]);
 #define Text$concat(...) Text$_concat(sizeof((Text_t[]){__VA_ARGS__})/sizeof(Text_t), (Text_t[]){__VA_ARGS__})
 #define Texts(...) Text$concat(__VA_ARGS__)
 Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int);
-Text_t Text$from_str(const char *str);
-Text_t Text$from_strn(const char *str, size_t len);
+OptionalText_t Text$from_str(const char *str);
+OptionalText_t Text$from_strn(const char *str, size_t len);
 PUREFUNC uint64_t Text$hash(Text_t *text);
 PUREFUNC int32_t Text$compare(const Text_t *a, const Text_t *b);
 PUREFUNC bool Text$equal(const Text_t *a, const Text_t *b);
@@ -49,8 +50,8 @@ Array_t Text$utf32_codepoints(Text_t text);
 Array_t Text$utf8_bytes(Text_t text);
 Array_t Text$codepoint_names(Text_t text);
 Text_t Text$from_codepoints(Array_t codepoints);
-Text_t Text$from_codepoint_names(Array_t codepoint_names);
-Text_t Text$from_bytes(Array_t bytes);
+OptionalText_t Text$from_codepoint_names(Array_t codepoint_names);
+OptionalText_t Text$from_bytes(Array_t bytes);
 Array_t Text$lines(Text_t text);
 Text_t Text$join(Text_t glue, Array_t pieces);
 Text_t Text$repeat(Text_t text, Int_t count);
diff --git a/test/text.tm b/test/text.tm
index 1c8988ba..73b9f952 100644
--- a/test/text.tm
+++ b/test/text.tm
@@ -32,8 +32,10 @@ func main():
 	= [65[32], 109[32], 233[32], 108[32], 105[32], 101[32]] : [Int32]
 	>> amelie:utf8_bytes()
 	= [65[B], 109[B], 195[B], 169[B], 108[B], 105[B], 101[B]] : [Byte]
-	>> Text.from_bytes([65[B], 109[B], 195[B], 169[B], 108[B], 105[B], 101[B]])
+	>> Text.from_bytes([65[B], 109[B], 195[B], 169[B], 108[B], 105[B], 101[B]])!
 	= "Amélie"
+	>> Text.from_bytes([255[B]])
+	= !Text
 
 	>> amelie2 := "Am$(\U65\U301)lie"
 	>> amelie2:split()
@@ -120,11 +122,11 @@ func main():
 	>> c := "É̩"
 	>> c:codepoint_names()
 	= ["LATIN CAPITAL LETTER E WITH ACUTE", "COMBINING VERTICAL LINE BELOW"]
-	>> c == Text.from_codepoint_names(c:codepoint_names())
+	>> c == Text.from_codepoint_names(c:codepoint_names())!
 	= yes
 	>> c == Text.from_codepoints(c:utf32_codepoints())
 	= yes
-	>> c == Text.from_bytes(c:utf8_bytes())
+	>> c == Text.from_bytes(c:utf8_bytes())!
 	= yes
 
 	>> "one$(\n)two$(\n)three":lines()
@@ -220,7 +222,7 @@ func main():
 	= ["PENGUIN"]
 
 	>> Text.from_codepoint_names(["not a valid name here buddy"])
-	= ""
+	= !Text
 
 	>> "one two; three four":find_all($/; {..}/)
 	= ["; three four"]
@@ -292,14 +294,14 @@ func main():
 
 	do:
 		!! Testing concatenation-stability:
-		>> ab := Text.from_codepoint_names(["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"])
+		>> ab := Text.from_codepoint_names(["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"])!
 		>> ab:codepoint_names()
 		= ["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"]
 		>> ab.length
 		= 1
 
-		>> a := Text.from_codepoint_names(["LATIN SMALL LETTER E"])
-		>> b := Text.from_codepoint_names(["COMBINING VERTICAL LINE BELOW"])
+		>> a := Text.from_codepoint_names(["LATIN SMALL LETTER E"])!
+		>> b := Text.from_codepoint_names(["COMBINING VERTICAL LINE BELOW"])!
 		>> (a++b):codepoint_names()
 		= ["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"]
 		>> (a++b) == ab
author	Bruce Hill <bruce@bruce-hill.com>	2024-11-04 01:17:47 -0500
committer	Bruce Hill <bruce@bruce-hill.com>	2024-11-04 01:17:47 -0500
commit	aabc0a3cff685e31f2492c977c6562d8e0ef8ebc (patch)
tree	518e70f81407c02978c69f80be6f9872dabaa295
parent	b69d14b89492919dc5c1669d2c569ee3baf1bbb0 (diff)