diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2024-03-03 19:12:53 -0500 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2024-03-03 19:12:53 -0500 |
| commit | dc04286e3a75d8f94a69c204cb4fbb7b22e2d6a9 (patch) | |
| tree | a7e82524f7f29e1b0ce95e4629dc121bc3baf4f2 /builtins/functions.c | |
| parent | d8a533cdf09a9dc7c302913cad514fbeee357b18 (diff) | |
Unicode normalization for equality, hashing, tests, and printing
Diffstat (limited to 'builtins/functions.c')
| -rw-r--r-- | builtins/functions.c | 37 |
1 files changed, 31 insertions, 6 deletions
diff --git a/builtins/functions.c b/builtins/functions.c index 69fe5934..c5514ffe 100644 --- a/builtins/functions.c +++ b/builtins/functions.c @@ -5,6 +5,7 @@ #include <errno.h> #include <stdlib.h> #include <sys/param.h> +#include <uninorm.h> #include "../SipHash/halfsiphash.h" #include "../files.h" @@ -12,6 +13,7 @@ #include "functions.h" #include "array.h" #include "table.h" +#include "text.h" #include "pointer.h" #include "string.h" #include "types.h" @@ -158,25 +160,48 @@ public void __doctest(void *expr, const TypeInfo *type, CORD expected, const cha CORD_fprintf(stderr, USE_COLOR ? "\x1b[33;1m>> \x1b[0m%.*s\x1b[m\n" : ">> %.*s\n", (end - start), file->text + start); if (expr) { - CORD expr_str = generic_as_text(expr, USE_COLOR, type); + CORD expr_cord = generic_as_text(expr, USE_COLOR, type); CORD type_name = generic_as_text(NULL, false, type); - CORD_fprintf(stderr, USE_COLOR ? "\x1b[2m=\x1b[0m %r \x1b[2m: %r\x1b[m\n" : "= %r : %r\n", expr_str, type_name); + uint8_t buf[512] = {0}; + size_t buf_len = sizeof(buf)-1; + const char *expr_str = CORD_to_const_char_star(expr_cord); + uint8_t *normalized_str = u8_normalize(UNINORM_NFD, (uint8_t*)expr_str, strlen(expr_str), buf, &buf_len); + if (!normalized_str) errx(1, "Couldn't normalize unicode string!"); + CORD expr_normalized = CORD_from_char_star((char*)buf); + if (normalized_str != buf) + free(normalized_str); + + CORD_fprintf(stderr, USE_COLOR ? "\x1b[2m=\x1b[0m %r \x1b[2m: %r\x1b[m\n" : "= %r : %r\n", expr_normalized, type_name); if (expected) { - CORD expr_plain = USE_COLOR ? generic_as_text(expr, false, type) : expr_str; - bool success = (CORD_cmp(expr_plain, expected) == 0); + CORD expr_plain = USE_COLOR ? generic_as_text(expr, false, type) : expr_normalized; + bool success = Text__equal(&expr_plain, &expected); if (!success && CORD_chr(expected, 0, ':')) { - success = (CORD_cmp(CORD_catn(3, expr_plain, " : ", type_name), expected) == 0); + CORD with_type = CORD_catn(3, expr_plain, " : ", type_name); + success = Text__equal(&with_type, &expected); } if (!success) { fail_source(filename, start, end, USE_COLOR ? "\x1b[31;1mDoctest failure:\nExpected: \x1b[32;1m%s\x1b[0m\n\x1b[31;1m But got: \x1b[31;7m%s\x1b[0m\n" : "Doctest failure:\nExpected: %s\n But got: %s\n", - CORD_to_const_char_star(expected), CORD_to_const_char_star(expr_str)); + CORD_to_const_char_star(expected), CORD_to_const_char_star(expr_normalized)); } } } } +public void say(CORD text) +{ + uint8_t buf[512] = {0}; + size_t buf_len = sizeof(buf)-1; + const char *str = CORD_to_const_char_star(text); + uint8_t *normalized = u8_normalize(UNINORM_NFD, (uint8_t*)str, strlen(str), buf, &buf_len); + if (normalized) { + puts((char*)normalized); + if (normalized != buf) + free(normalized); + } +} + // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 |
