aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--builtins/text.c19
-rw-r--r--test/text.tm6
2 files changed, 18 insertions, 7 deletions
diff --git a/builtins/text.c b/builtins/text.c
index 66622f5f..a9a246ee 100644
--- a/builtins/text.c
+++ b/builtins/text.c
@@ -1563,6 +1563,17 @@ public array_t Text$utf8_bytes(Text_t text)
return (array_t){.length=strlen(str), .stride=1, .atomic=1, .data=(void*)str};
}
+static inline const char *codepoint_name(uint32_t c)
+{
+ char *name = GC_MALLOC_ATOMIC(UNINAME_MAX);
+ char *found_name = unicode_character_name(c, name);
+ if (found_name) return found_name;
+ const uc_block_t *block = uc_block(c);
+ assert(block);
+ snprintf(name, UNINAME_MAX, "%s-%X", block->name, c);
+ return name;
+}
+
public array_t Text$codepoint_names(Text_t text)
{
array_t names = {};
@@ -1571,16 +1582,12 @@ public array_t Text$codepoint_names(Text_t text)
int32_t grapheme = _next_grapheme(text, &state, i);
if (grapheme < 0) {
for (int64_t c = 0; c < synthetic_graphemes[-grapheme-1].num_codepoints; c++) {
- char *name = GC_MALLOC_ATOMIC(UNINAME_MAX);
- name = unicode_character_name(synthetic_graphemes[-grapheme-1].codepoints[c], name);
- if (!name) name = "???";
+ const char *name = codepoint_name(synthetic_graphemes[-grapheme-1].codepoints[c]);
Text_t name_text = (Text_t){.tag=TEXT_ASCII, .length=strlen(name), .ascii=name};
Array$insert(&names, &name_text, I_small(0), sizeof(Text_t));
}
} else {
- char *name = GC_MALLOC_ATOMIC(UNINAME_MAX);
- name = unicode_character_name(grapheme, name);
- if (!name) name = "???";
+ const char *name = codepoint_name(grapheme);
Text_t name_text = (Text_t){.tag=TEXT_ASCII, .length=strlen(name), .ascii=name};
Array$insert(&names, &name_text, I_small(0), sizeof(Text_t));
}
diff --git a/test/text.tm b/test/text.tm
index 4dd431ca..f0f289fe 100644
--- a/test/text.tm
+++ b/test/text.tm
@@ -210,6 +210,10 @@ func main():
>> house.length
= 1
>> house:codepoint_names()
- = ["???"]
+ = ["CJK Unified Ideographs-5BB6"]
>> house:utf32_codepoints()
= [23478_i32]
+
+ >> "🐧":codepoint_names()
+ = ["PENGUIN"]
+