Update text API to use optional returns when applicable

This commit is contained in:
Bruce Hill 2024-11-04 01:17:47 -05:00
parent b69d14b894
commit aabc0a3cff
4 changed files with 25 additions and 21 deletions

View File

@ -374,9 +374,9 @@ env_t *new_compilation_unit(CORD libname)
{"ends_with", "Text$ends_with", "func(text,suffix:Text -> Bool)"},
{"find", "Text$find", "func(text:Text, pattern:Pattern, start=1 -> Int?)"},
{"find_all", "Text$find_all", "func(text:Text, pattern:Pattern -> [Text])"},
{"from_bytes", "Text$from_bytes", "func(bytes:[Byte] -> Text)"},
{"from_c_string", "Text$from_str", "func(str:CString -> Text)"},
{"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text] -> Text)"},
{"from_bytes", "Text$from_bytes", "func(bytes:[Byte] -> Text?)"},
{"from_c_string", "Text$from_str", "func(str:CString -> Text?)"},
{"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text] -> Text?)"},
{"from_codepoints", "Text$from_codepoints", "func(codepoints:[Int32] -> Text)"},
{"without_escaping", "Path$cleanup", "func(text:Text -> Path)"},
{"has", "Text$has", "func(text:Text, pattern:Pattern -> Bool)"},

View File

@ -607,7 +607,7 @@ Text_t text_from_u32(ucs4_t *codepoints, int64_t num_codepoints, bool normalize)
return ret;
}
public Text_t Text$from_strn(const char *str, size_t len)
public OptionalText_t Text$from_strn(const char *str, size_t len)
{
int64_t ascii_span = 0;
for (size_t i = 0; i < len && isascii(str[i]); i++)
@ -626,7 +626,7 @@ public Text_t Text$from_strn(const char *str, size_t len)
return ret;
} else {
if (u8_check((uint8_t*)str, len) != NULL)
return Text("");
return NULL_TEXT;
ucs4_t buf[128];
size_t length = sizeof(buf)/sizeof(buf[0]);
@ -638,7 +638,7 @@ public Text_t Text$from_strn(const char *str, size_t len)
}
}
public Text_t Text$from_str(const char *str)
public OptionalText_t Text$from_str(const char *str)
{
return str ? Text$from_strn(str, strlen(str)) : Text("");
}
@ -1270,20 +1270,21 @@ public Text_t Text$from_codepoints(Array_t codepoints)
return text_from_u32(codepoints.data, codepoints.length, true);
}
public Text_t Text$from_codepoint_names(Array_t codepoint_names)
public OptionalText_t Text$from_codepoint_names(Array_t codepoint_names)
{
Array_t codepoints = {};
for (int64_t i = 0; i < codepoint_names.length; i++) {
Text_t *name = ((Text_t*)(codepoint_names.data + i*codepoint_names.stride));
const char *name_str = Text$as_c_string(*name);
ucs4_t codepoint = unicode_name_character(name_str);
if (codepoint != UNINAME_INVALID)
Array$insert(&codepoints, &codepoint, I_small(0), sizeof(ucs4_t));
if (codepoint == UNINAME_INVALID)
return NULL_TEXT;
Array$insert(&codepoints, &codepoint, I_small(0), sizeof(ucs4_t));
}
return Text$from_codepoints(codepoints);
}
public Text_t Text$from_bytes(Array_t bytes)
public OptionalText_t Text$from_bytes(Array_t bytes)
{
if (bytes.stride != sizeof(int8_t))
Array$compact(&bytes, sizeof(int8_t));

View File

@ -9,6 +9,7 @@
#include "datatypes.h"
#include "integers.h"
#include "optionals.h"
#include "util.h"
typedef struct {
@ -27,8 +28,8 @@ Text_t Text$_concat(int n, Text_t items[n]);
#define Text$concat(...) Text$_concat(sizeof((Text_t[]){__VA_ARGS__})/sizeof(Text_t), (Text_t[]){__VA_ARGS__})
#define Texts(...) Text$concat(__VA_ARGS__)
Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int);
Text_t Text$from_str(const char *str);
Text_t Text$from_strn(const char *str, size_t len);
OptionalText_t Text$from_str(const char *str);
OptionalText_t Text$from_strn(const char *str, size_t len);
PUREFUNC uint64_t Text$hash(Text_t *text);
PUREFUNC int32_t Text$compare(const Text_t *a, const Text_t *b);
PUREFUNC bool Text$equal(const Text_t *a, const Text_t *b);
@ -49,8 +50,8 @@ Array_t Text$utf32_codepoints(Text_t text);
Array_t Text$utf8_bytes(Text_t text);
Array_t Text$codepoint_names(Text_t text);
Text_t Text$from_codepoints(Array_t codepoints);
Text_t Text$from_codepoint_names(Array_t codepoint_names);
Text_t Text$from_bytes(Array_t bytes);
OptionalText_t Text$from_codepoint_names(Array_t codepoint_names);
OptionalText_t Text$from_bytes(Array_t bytes);
Array_t Text$lines(Text_t text);
Text_t Text$join(Text_t glue, Array_t pieces);
Text_t Text$repeat(Text_t text, Int_t count);

View File

@ -32,8 +32,10 @@ func main():
= [65[32], 109[32], 233[32], 108[32], 105[32], 101[32]] : [Int32]
>> amelie:utf8_bytes()
= [65[B], 109[B], 195[B], 169[B], 108[B], 105[B], 101[B]] : [Byte]
>> Text.from_bytes([65[B], 109[B], 195[B], 169[B], 108[B], 105[B], 101[B]])
>> Text.from_bytes([65[B], 109[B], 195[B], 169[B], 108[B], 105[B], 101[B]])!
= "Amélie"
>> Text.from_bytes([255[B]])
= !Text
>> amelie2 := "Am$(\U65\U301)lie"
>> amelie2:split()
@ -120,11 +122,11 @@ func main():
>> c := "É̩"
>> c:codepoint_names()
= ["LATIN CAPITAL LETTER E WITH ACUTE", "COMBINING VERTICAL LINE BELOW"]
>> c == Text.from_codepoint_names(c:codepoint_names())
>> c == Text.from_codepoint_names(c:codepoint_names())!
= yes
>> c == Text.from_codepoints(c:utf32_codepoints())
= yes
>> c == Text.from_bytes(c:utf8_bytes())
>> c == Text.from_bytes(c:utf8_bytes())!
= yes
>> "one$(\n)two$(\n)three":lines()
@ -220,7 +222,7 @@ func main():
= ["PENGUIN"]
>> Text.from_codepoint_names(["not a valid name here buddy"])
= ""
= !Text
>> "one two; three four":find_all($/; {..}/)
= ["; three four"]
@ -292,14 +294,14 @@ func main():
do:
!! Testing concatenation-stability:
>> ab := Text.from_codepoint_names(["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"])
>> ab := Text.from_codepoint_names(["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"])!
>> ab:codepoint_names()
= ["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"]
>> ab.length
= 1
>> a := Text.from_codepoint_names(["LATIN SMALL LETTER E"])
>> b := Text.from_codepoint_names(["COMBINING VERTICAL LINE BELOW"])
>> a := Text.from_codepoint_names(["LATIN SMALL LETTER E"])!
>> b := Text.from_codepoint_names(["COMBINING VERTICAL LINE BELOW"])!
>> (a++b):codepoint_names()
= ["LATIN SMALL LETTER E", "COMBINING VERTICAL LINE BELOW"]
>> (a++b) == ab