From 0fe255a7c1ea7f8c324bfa58cdfb2c77f06bd823 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Sun, 3 Mar 2024 17:49:40 -0500 Subject: Fix up unicode escapes --- compile.c | 2 +- parse.c | 14 ++++++++++++-- test/strings.tm | 3 +++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/compile.c b/compile.c index 20e16ad1..3ef93325 100644 --- a/compile.c +++ b/compile.c @@ -452,7 +452,7 @@ CORD compile(env_t *env, ast_t *ast) if (isprint(c)) code = CORD_cat_char(code, c); else - CORD_sprintf(&code, "%r\\x%02X", code, c); + CORD_sprintf(&code, "%r\\x%02X", code, (uint8_t)c); break; } } diff --git a/parse.c b/parse.c index 19a3a288..b67ea418 100644 --- a/parse.c +++ b/parse.c @@ -140,6 +140,15 @@ const char *unescape(const char **out) { if (unescapes[(int)escape[1]]) { *endpos = escape + 2; return heap_str(unescapes[(int)escape[1]]); + } else if (escape[1] == 'U' && escape[2]) { + char *endptr = NULL; + long codepoint = strtol(escape+2, &endptr, 16); + uint32_t ustr[2] = {codepoint, 0}; + size_t bufsize = 8; + uint8_t buf[bufsize]; + (void)u32_to_u8(ustr, bufsize, buf, &bufsize); + *endpos = endptr; + return heap_strn((char*)buf, bufsize); } else if (escape[1] == 'x' && escape[2] && escape[3]) { char *endptr = (char*)&escape[3+1]; char c = (char)strtol(escape+2, &endptr, 16); @@ -940,8 +949,9 @@ PARSER(parse_string) { if (*pos == '\\') { CORD cord = CORD_EMPTY; do { - char c = unescape(&pos)[0]; - cord = CORD_cat_char(cord, c); + const char *c = unescape(&pos); + cord = CORD_cat(cord, c); + // cord = CORD_cat_char(cord, c); } while (*pos == '\\'); return NewAST(ctx->file, start, pos, StringLiteral, .cord=cord); } diff --git a/test/strings.tm b/test/strings.tm index 6ef139ac..a7631824 100644 --- a/test/strings.tm +++ b/test/strings.tm @@ -5,3 +5,6 @@ = "hello amélie!" >> str:lower():title() = "Hello Amélie!" + +>> \UE9 += "é" -- cgit v1.2.3