Update text API to use optional returns when applicable

2024-11-04 01:17:47 -05:00 · 2024-11-04 01:17:47 -05:00 · aabc0a3cff
commit aabc0a3cff
parent b69d14b894
4 changed files with 25 additions and 21 deletions
--- a/environment.c
+++ b/environment.c
@ -374,9 +374,9 @@ env_t *new_compilation_unit(CORD libname)
            {"ends_with", "Text$ends_with", "func(text,suffix:Text -> Bool)"},
            {"find", "Text$find", "func(text:Text, pattern:Pattern, start=1 -> Int?)"},
            {"find_all", "Text$find_all", "func(text:Text, pattern:Pattern -> [Text])"},
-            {"from_bytes", "Text$from_bytes", "func(bytes:[Byte] -> Text)"},
-            {"from_c_string", "Text$from_str", "func(str:CString -> Text)"},
-            {"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text] -> Text)"},
+            {"from_bytes", "Text$from_bytes", "func(bytes:[Byte] -> Text?)"},
+            {"from_c_string", "Text$from_str", "func(str:CString -> Text?)"},
+            {"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text] -> Text?)"},
            {"from_codepoints", "Text$from_codepoints", "func(codepoints:[Int32] -> Text)"},
            {"without_escaping", "Path$cleanup", "func(text:Text -> Path)"},
            {"has", "Text$has", "func(text:Text, pattern:Pattern -> Bool)"},
--- a/stdlib/text.c
+++ b/stdlib/text.c
@ -607,7 +607,7 @@ Text_t text_from_u32(ucs4_t *codepoints, int64_t num_codepoints, bool normalize)
    return ret;
 }

-public Text_t Text$from_strn(const char *str, size_t len)
+public OptionalText_t Text$from_strn(const char *str, size_t len)
 {
    int64_t ascii_span = 0;
    for (size_t i = 0; i < len && isascii(str[i]); i++)
@ -626,7 +626,7 @@ public Text_t Text$from_strn(const char *str, size_t len)
        return ret;
    } else {
        if (u8_check((uint8_t*)str, len) != NULL)
-            return Text("");
+            return NULL_TEXT;

        ucs4_t buf[128];
        size_t length = sizeof(buf)/sizeof(buf[0]);
@ -638,7 +638,7 @@ public Text_t Text$from_strn(const char *str, size_t len)
    }
 }

-public Text_t Text$from_str(const char *str)
+public OptionalText_t Text$from_str(const char *str)
 {
    return str ? Text$from_strn(str, strlen(str)) : Text("");
 }
@ -1270,20 +1270,21 @@ public Text_t Text$from_codepoints(Array_t codepoints)
    return text_from_u32(codepoints.data, codepoints.length, true);
 }

-public Text_t Text$from_codepoint_names(Array_t codepoint_names)
+public OptionalText_t Text$from_codepoint_names(Array_t codepoint_names)
 {
    Array_t codepoints = {};
    for (int64_t i = 0; i < codepoint_names.length; i++) {
        Text_t *name = ((Text_t*)(codepoint_names.data + i*codepoint_names.stride));
        const char *name_str = Text$as_c_string(*name);
        ucs4_t codepoint = unicode_name_character(name_str);
-        if (codepoint != UNINAME_INVALID)
-            Array$insert(&codepoints, &codepoint, I_small(0), sizeof(ucs4_t));
+        if (codepoint == UNINAME_INVALID)
+            return NULL_TEXT;
+        Array$insert(&codepoints, &codepoint, I_small(0), sizeof(ucs4_t));
    }
    return Text$from_codepoints(codepoints);
 }

-public Text_t Text$from_bytes(Array_t bytes)
+public OptionalText_t Text$from_bytes(Array_t bytes)
 {
    if (bytes.stride != sizeof(int8_t))
        Array$compact(&bytes, sizeof(int8_t));
--- a/stdlib/text.h
+++ b/stdlib/text.h
@ -9,6 +9,7 @@

 #include "datatypes.h"
 #include "integers.h"
+#include "optionals.h"
 #include "util.h"

 typedef struct {
@ -27,8 +28,8 @@ Text_t Text$_concat(int n, Text_t items[n]);
 #define Text$concat(...) Text$_concat(sizeof((Text_t[]){__VA_ARGS__})/sizeof(Text_t), (Text_t[]){__VA_ARGS__})
 #define Texts(...) Text$concat(__VA_ARGS__)
 Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int);
-Text_t Text$from_str(const char *str);
-Text_t Text$from_strn(const char *str, size_t len);
+OptionalText_t Text$from_str(const char *str);
+OptionalText_t Text$from_strn(const char *str, size_t len);
 PUREFUNC uint64_t Text$hash(Text_t *text);
 PUREFUNC int32_t Text$compare(const Text_t *a, const Text_t *b);
 PUREFUNC bool Text$equal(const Text_t *a, const Text_t *b);
@ -49,8 +50,8 @@ Array_t Text$utf32_codepoints(Text_t text);
 Array_t Text$utf8_bytes(Text_t text);
 Array_t Text$codepoint_names(Text_t text);
 Text_t Text$from_codepoints(Array_t codepoints);
-Text_t Text$from_codepoint_names(Array_t codepoint_names);
-Text_t Text$from_bytes(Array_t bytes);
+OptionalText_t Text$from_codepoint_names(Array_t codepoint_names);
+OptionalText_t Text$from_bytes(Array_t bytes);
 Array_t Text$lines(Text_t text);
 Text_t Text$join(Text_t glue, Array_t pieces);
 Text_t Text$repeat(Text_t text, Int_t count);
--- a/test/text.tm
+++ b/test/text.tm
@ -32,8 +32,10 @@ func main():
 	= [65[32], 109[32], 233[32], 108[32], 105[32], 101[32]] : [Int32]
 	>> amelie:utf8_bytes()
 	= [65[B], 109[B], 195[B], 169[B], 108[B], 105[B], 101[B]] : [Byte]
-	>> Text.from_bytes([65[B], 109[B], 195[B], 169[B], 108[B], 105[B], 101[B]])
+	>> Text.from_bytes([65[B], 109[B], 195[B], 169[B], 108[B], 105[B], 101[B]])!
 	= "Amélie"
+	>> Text.from_bytes([255[B]])
+	= !Text

 	>> amelie2 := "Am$(\U65\U301)lie"
 	>> amelie2:split()
@ -120,11 +122,11 @@ func main():
 	>> c := "É̩"
 	>> c:codepoint_names()
 	= ["LATIN CAPITAL LETTER E WITH ACUTE", "COMBINING VERTICAL LINE BELOW"]
-	>> c == Text.from_codepoint_names(c:codepoint_names())
+	>> c == Text.from_codepoint_names(c:codepoint_names())!
 	= yes
 	>> c == Text.from_codepoints(c:utf32_codepoints())
 	= yes
-	>> c == Text.from_bytes(c:utf8_bytes())
+	>> c == Text.from_bytes(c:utf8_bytes())!
 	= yes

 	>> "one$(\n)two$(\n)three":lines()
@ -220,7 +222,7 @@ func main():
 	= ["PENGUIN"]

 	>> Text.from_codepoint_names(["not a valid name here buddy"])
-	= ""
+	= !Text

 	>> "one two; three four":find_all($/; {..}/)
 	= ["; three four"]
@ -292,14 +294,14 @@ func main():

 	do:
 		!! Testing concatenation-stability:
-		>> ab := Text.from_codepoint_names(["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"])
+		>> ab := Text.from_codepoint_names(["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"])!
 		>> ab:codepoint_names()
 		= ["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"]
 		>> ab.length
 		= 1

-		>> a := Text.from_codepoint_names(["LATIN SMALL LETTER E"])
-		>> b := Text.from_codepoint_names(["COMBINING VERTICAL LINE BELOW"])
+		>> a := Text.from_codepoint_names(["LATIN SMALL LETTER E"])!
+		>> b := Text.from_codepoint_names(["COMBINING VERTICAL LINE BELOW"])!
 		>> (a++b):codepoint_names()
 		= ["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"]
 		>> (a++b) == ab