diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2024-09-03 01:30:07 -0400 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2024-09-03 01:30:07 -0400 |
| commit | b517f3b2872ad25a9f2bb9114da2bcf178f041db (patch) | |
| tree | 58fd947fc12905a325b86d2c70855d9ea7d0eb6b | |
| parent | e98e77e1d3fbc2a5fd4d26408d2b13ddbef07110 (diff) | |
Fix codepoint names by falling back to block names
| -rw-r--r-- | builtins/text.c | 19 | ||||
| -rw-r--r-- | test/text.tm | 6 |
2 files changed, 18 insertions, 7 deletions
diff --git a/builtins/text.c b/builtins/text.c index 66622f5f..a9a246ee 100644 --- a/builtins/text.c +++ b/builtins/text.c @@ -1563,6 +1563,17 @@ public array_t Text$utf8_bytes(Text_t text) return (array_t){.length=strlen(str), .stride=1, .atomic=1, .data=(void*)str}; } +static inline const char *codepoint_name(uint32_t c) +{ + char *name = GC_MALLOC_ATOMIC(UNINAME_MAX); + char *found_name = unicode_character_name(c, name); + if (found_name) return found_name; + const uc_block_t *block = uc_block(c); + assert(block); + snprintf(name, UNINAME_MAX, "%s-%X", block->name, c); + return name; +} + public array_t Text$codepoint_names(Text_t text) { array_t names = {}; @@ -1571,16 +1582,12 @@ public array_t Text$codepoint_names(Text_t text) int32_t grapheme = _next_grapheme(text, &state, i); if (grapheme < 0) { for (int64_t c = 0; c < synthetic_graphemes[-grapheme-1].num_codepoints; c++) { - char *name = GC_MALLOC_ATOMIC(UNINAME_MAX); - name = unicode_character_name(synthetic_graphemes[-grapheme-1].codepoints[c], name); - if (!name) name = "???"; + const char *name = codepoint_name(synthetic_graphemes[-grapheme-1].codepoints[c]); Text_t name_text = (Text_t){.tag=TEXT_ASCII, .length=strlen(name), .ascii=name}; Array$insert(&names, &name_text, I_small(0), sizeof(Text_t)); } } else { - char *name = GC_MALLOC_ATOMIC(UNINAME_MAX); - name = unicode_character_name(grapheme, name); - if (!name) name = "???"; + const char *name = codepoint_name(grapheme); Text_t name_text = (Text_t){.tag=TEXT_ASCII, .length=strlen(name), .ascii=name}; Array$insert(&names, &name_text, I_small(0), sizeof(Text_t)); } diff --git a/test/text.tm b/test/text.tm index 4dd431ca..f0f289fe 100644 --- a/test/text.tm +++ b/test/text.tm @@ -210,6 +210,10 @@ func main(): >> house.length = 1 >> house:codepoint_names() - = ["???"] + = ["CJK Unified Ideographs-5BB6"] >> house:utf32_codepoints() = [23478_i32] + + >> "🐧":codepoint_names() + = ["PENGUIN"] + |
