aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2024-11-04 01:17:47 -0500
committerBruce Hill <bruce@bruce-hill.com>2024-11-04 01:17:47 -0500
commitaabc0a3cff685e31f2492c977c6562d8e0ef8ebc (patch)
tree518e70f81407c02978c69f80be6f9872dabaa295
parentb69d14b89492919dc5c1669d2c569ee3baf1bbb0 (diff)
Update text API to use optional returns when applicable
-rw-r--r--environment.c6
-rw-r--r--stdlib/text.c15
-rw-r--r--stdlib/text.h9
-rw-r--r--test/text.tm16
4 files changed, 25 insertions, 21 deletions
diff --git a/environment.c b/environment.c
index 39e50b02..47e30dba 100644
--- a/environment.c
+++ b/environment.c
@@ -374,9 +374,9 @@ env_t *new_compilation_unit(CORD libname)
{"ends_with", "Text$ends_with", "func(text,suffix:Text -> Bool)"},
{"find", "Text$find", "func(text:Text, pattern:Pattern, start=1 -> Int?)"},
{"find_all", "Text$find_all", "func(text:Text, pattern:Pattern -> [Text])"},
- {"from_bytes", "Text$from_bytes", "func(bytes:[Byte] -> Text)"},
- {"from_c_string", "Text$from_str", "func(str:CString -> Text)"},
- {"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text] -> Text)"},
+ {"from_bytes", "Text$from_bytes", "func(bytes:[Byte] -> Text?)"},
+ {"from_c_string", "Text$from_str", "func(str:CString -> Text?)"},
+ {"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text] -> Text?)"},
{"from_codepoints", "Text$from_codepoints", "func(codepoints:[Int32] -> Text)"},
{"without_escaping", "Path$cleanup", "func(text:Text -> Path)"},
{"has", "Text$has", "func(text:Text, pattern:Pattern -> Bool)"},
diff --git a/stdlib/text.c b/stdlib/text.c
index e8ecc786..92c5df48 100644
--- a/stdlib/text.c
+++ b/stdlib/text.c
@@ -607,7 +607,7 @@ Text_t text_from_u32(ucs4_t *codepoints, int64_t num_codepoints, bool normalize)
return ret;
}
-public Text_t Text$from_strn(const char *str, size_t len)
+public OptionalText_t Text$from_strn(const char *str, size_t len)
{
int64_t ascii_span = 0;
for (size_t i = 0; i < len && isascii(str[i]); i++)
@@ -626,7 +626,7 @@ public Text_t Text$from_strn(const char *str, size_t len)
return ret;
} else {
if (u8_check((uint8_t*)str, len) != NULL)
- return Text("");
+ return NULL_TEXT;
ucs4_t buf[128];
size_t length = sizeof(buf)/sizeof(buf[0]);
@@ -638,7 +638,7 @@ public Text_t Text$from_strn(const char *str, size_t len)
}
}
-public Text_t Text$from_str(const char *str)
+public OptionalText_t Text$from_str(const char *str)
{
return str ? Text$from_strn(str, strlen(str)) : Text("");
}
@@ -1270,20 +1270,21 @@ public Text_t Text$from_codepoints(Array_t codepoints)
return text_from_u32(codepoints.data, codepoints.length, true);
}
-public Text_t Text$from_codepoint_names(Array_t codepoint_names)
+public OptionalText_t Text$from_codepoint_names(Array_t codepoint_names)
{
Array_t codepoints = {};
for (int64_t i = 0; i < codepoint_names.length; i++) {
Text_t *name = ((Text_t*)(codepoint_names.data + i*codepoint_names.stride));
const char *name_str = Text$as_c_string(*name);
ucs4_t codepoint = unicode_name_character(name_str);
- if (codepoint != UNINAME_INVALID)
- Array$insert(&codepoints, &codepoint, I_small(0), sizeof(ucs4_t));
+ if (codepoint == UNINAME_INVALID)
+ return NULL_TEXT;
+ Array$insert(&codepoints, &codepoint, I_small(0), sizeof(ucs4_t));
}
return Text$from_codepoints(codepoints);
}
-public Text_t Text$from_bytes(Array_t bytes)
+public OptionalText_t Text$from_bytes(Array_t bytes)
{
if (bytes.stride != sizeof(int8_t))
Array$compact(&bytes, sizeof(int8_t));
diff --git a/stdlib/text.h b/stdlib/text.h
index bcdccfeb..45aa00ca 100644
--- a/stdlib/text.h
+++ b/stdlib/text.h
@@ -9,6 +9,7 @@
#include "datatypes.h"
#include "integers.h"
+#include "optionals.h"
#include "util.h"
typedef struct {
@@ -27,8 +28,8 @@ Text_t Text$_concat(int n, Text_t items[n]);
#define Text$concat(...) Text$_concat(sizeof((Text_t[]){__VA_ARGS__})/sizeof(Text_t), (Text_t[]){__VA_ARGS__})
#define Texts(...) Text$concat(__VA_ARGS__)
Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int);
-Text_t Text$from_str(const char *str);
-Text_t Text$from_strn(const char *str, size_t len);
+OptionalText_t Text$from_str(const char *str);
+OptionalText_t Text$from_strn(const char *str, size_t len);
PUREFUNC uint64_t Text$hash(Text_t *text);
PUREFUNC int32_t Text$compare(const Text_t *a, const Text_t *b);
PUREFUNC bool Text$equal(const Text_t *a, const Text_t *b);
@@ -49,8 +50,8 @@ Array_t Text$utf32_codepoints(Text_t text);
Array_t Text$utf8_bytes(Text_t text);
Array_t Text$codepoint_names(Text_t text);
Text_t Text$from_codepoints(Array_t codepoints);
-Text_t Text$from_codepoint_names(Array_t codepoint_names);
-Text_t Text$from_bytes(Array_t bytes);
+OptionalText_t Text$from_codepoint_names(Array_t codepoint_names);
+OptionalText_t Text$from_bytes(Array_t bytes);
Array_t Text$lines(Text_t text);
Text_t Text$join(Text_t glue, Array_t pieces);
Text_t Text$repeat(Text_t text, Int_t count);
diff --git a/test/text.tm b/test/text.tm
index 1c8988ba..73b9f952 100644
--- a/test/text.tm
+++ b/test/text.tm
@@ -32,8 +32,10 @@ func main():
= [65[32], 109[32], 233[32], 108[32], 105[32], 101[32]] : [Int32]
>> amelie:utf8_bytes()
= [65[B], 109[B], 195[B], 169[B], 108[B], 105[B], 101[B]] : [Byte]
- >> Text.from_bytes([65[B], 109[B], 195[B], 169[B], 108[B], 105[B], 101[B]])
+ >> Text.from_bytes([65[B], 109[B], 195[B], 169[B], 108[B], 105[B], 101[B]])!
= "Amélie"
+ >> Text.from_bytes([255[B]])
+ = !Text
>> amelie2 := "Am$(\U65\U301)lie"
>> amelie2:split()
@@ -120,11 +122,11 @@ func main():
>> c := "É̩"
>> c:codepoint_names()
= ["LATIN CAPITAL LETTER E WITH ACUTE", "COMBINING VERTICAL LINE BELOW"]
- >> c == Text.from_codepoint_names(c:codepoint_names())
+ >> c == Text.from_codepoint_names(c:codepoint_names())!
= yes
>> c == Text.from_codepoints(c:utf32_codepoints())
= yes
- >> c == Text.from_bytes(c:utf8_bytes())
+ >> c == Text.from_bytes(c:utf8_bytes())!
= yes
>> "one$(\n)two$(\n)three":lines()
@@ -220,7 +222,7 @@ func main():
= ["PENGUIN"]
>> Text.from_codepoint_names(["not a valid name here buddy"])
- = ""
+ = !Text
>> "one two; three four":find_all($/; {..}/)
= ["; three four"]
@@ -292,14 +294,14 @@ func main():
do:
!! Testing concatenation-stability:
- >> ab := Text.from_codepoint_names(["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"])
+ >> ab := Text.from_codepoint_names(["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"])!
>> ab:codepoint_names()
= ["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"]
>> ab.length
= 1
- >> a := Text.from_codepoint_names(["LATIN SMALL LETTER E"])
- >> b := Text.from_codepoint_names(["COMBINING VERTICAL LINE BELOW"])
+ >> a := Text.from_codepoint_names(["LATIN SMALL LETTER E"])!
+ >> b := Text.from_codepoint_names(["COMBINING VERTICAL LINE BELOW"])!
>> (a++b):codepoint_names()
= ["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"]
>> (a++b) == ab