From dc04286e3a75d8f94a69c204cb4fbb7b22e2d6a9 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Sun, 3 Mar 2024 19:12:53 -0500 Subject: Unicode normalization for equality, hashing, tests, and printing --- compile.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'compile.c') diff --git a/compile.c b/compile.c index ae1a66bf..79a92e0d 100644 --- a/compile.c +++ b/compile.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "ast.h" #include "builtins/text.h" @@ -855,6 +856,16 @@ CORD compile(env_t *env, ast_t *ast) if (!expr_t) code_err(test->expr, "I couldn't figure out the type of this expression"); + CORD output = NULL; + if (test->output) { + const uint8_t *raw = (const uint8_t*)CORD_to_const_char_star(test->output); + uint8_t buf[128] = {0}; + size_t norm_len = sizeof(buf)-1; + uint8_t *norm = u8_normalize(UNINORM_NFD, (uint8_t*)raw, strlen((char*)raw), buf, &norm_len); + output = CORD_from_char_star((char*)norm); + if (norm && norm != buf) free(norm); + } + if (test->expr->tag == Declare) { auto decl = Match(test->expr, Declare); return CORD_asprintf( @@ -863,7 +874,7 @@ CORD compile(env_t *env, ast_t *ast) compile(env, test->expr), compile(env, decl->var), compile_type_info(env, get_type(env, decl->value)), - compile(env, WrapAST(test->expr, TextLiteral, .cord=test->output)), + compile(env, WrapAST(test->expr, TextLiteral, .cord=output)), compile(env, WrapAST(test->expr, TextLiteral, .cord=test->expr->file->filename)), (int64_t)(test->expr->start - test->expr->file->text), (int64_t)(test->expr->end - test->expr->file->text)); @@ -890,7 +901,7 @@ CORD compile(env_t *env, ast_t *ast) CORD_appendf(&code, "$test(%r, %r, %r);", compile(env, WrapAST(test->expr, TextLiteral, .cord=src)), expr_cord, - compile(env, WrapAST(test->expr, TextLiteral, .cord=test->output))); + compile(env, WrapAST(test->expr, TextLiteral, .cord=output))); return CORD_cat(code, "\n}"); } else if (expr_t->tag == VoidType || expr_t->tag == AbortType) { return CORD_asprintf( @@ -908,7 +919,7 @@ CORD compile(env_t *env, ast_t *ast) compile_type(expr_t), compile(env, test->expr), compile_type_info(env, expr_t), - compile(env, WrapAST(test->expr, TextLiteral, .cord=test->output)), + compile(env, WrapAST(test->expr, TextLiteral, .cord=output)), compile(env, WrapAST(test->expr, TextLiteral, .cord=test->expr->file->filename)), (int64_t)(test->expr->start - test->expr->file->text), (int64_t)(test->expr->end - test->expr->file->text)); -- cgit v1.2.3