aboutsummaryrefslogtreecommitdiff
path: root/parse.c
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2024-10-12 13:29:23 -0400
committerBruce Hill <bruce@bruce-hill.com>2024-10-12 13:29:23 -0400
commit55afc5a5fd93f4e45aeab7f08ed8faee86377d12 (patch)
tree9387c550f0e6f5748c77376656c53abdf579e2c1 /parse.c
parenta92021f40b2489e0a6d915f02b6d6ad912fd3d77 (diff)
Further progress and bugfixes
Diffstat (limited to 'parse.c')
-rw-r--r--parse.c11
1 files changed, 8 insertions, 3 deletions
diff --git a/parse.c b/parse.c
index 5e127037..d889ba6b 100644
--- a/parse.c
+++ b/parse.c
@@ -7,6 +7,7 @@
#include <stdbool.h>
#include <string.h>
#include <unistr.h>
+#include <unigbrk.h>
#include <unictype.h>
#include <uniname.h>
#include <signal.h>
@@ -1404,9 +1405,13 @@ PARSER(parse_text) {
} else {
parser_err(ctx, pos, strchrnul(pos, '\n'), "This multi-line string should be either indented or have '..' at the front");
}
- } else { // Plain character
- chunk = Texts(chunk, Text$format("%c", *pos));
- ++pos;
+ } else { // Regular grapheme cluster (no escapes etc.)
+ char *next = (char*)u8_grapheme_next((const uint8_t*)pos, (const uint8_t*)ctx->file->text + ctx->file->len);
+ while (next < ctx->file->text + ctx->file->len && !isascii(*next))
+ next = (char*)u8_grapheme_next((const uint8_t*)next, (const uint8_t*)ctx->file->text + ctx->file->len);
+ Text_t cluster = Text$from_strn(pos, (size_t)(next-pos));
+ chunk = Texts(chunk, cluster);
+ pos = next;
}
}