From 55afc5a5fd93f4e45aeab7f08ed8faee86377d12 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Sat, 12 Oct 2024 13:29:23 -0400 Subject: Further progress and bugfixes --- parse.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'parse.c') diff --git a/parse.c b/parse.c index 5e127037..d889ba6b 100644 --- a/parse.c +++ b/parse.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -1404,9 +1405,13 @@ PARSER(parse_text) { } else { parser_err(ctx, pos, strchrnul(pos, '\n'), "This multi-line string should be either indented or have '..' at the front"); } - } else { // Plain character - chunk = Texts(chunk, Text$format("%c", *pos)); - ++pos; + } else { // Regular grapheme cluster (no escapes etc.) + char *next = (char*)u8_grapheme_next((const uint8_t*)pos, (const uint8_t*)ctx->file->text + ctx->file->len); + while (next < ctx->file->text + ctx->file->len && !isascii(*next)) + next = (char*)u8_grapheme_next((const uint8_t*)next, (const uint8_t*)ctx->file->text + ctx->file->len); + Text_t cluster = Text$from_strn(pos, (size_t)(next-pos)); + chunk = Texts(chunk, cluster); + pos = next; } } -- cgit v1.2.3