diff options
Diffstat (limited to 'parse.c')
| -rw-r--r-- | parse.c | 74 |
1 files changed, 38 insertions, 36 deletions
@@ -7,6 +7,7 @@ #include <stdbool.h> #include <string.h> #include <unistr.h> +#include <unigbrk.h> #include <unictype.h> #include <uniname.h> #include <signal.h> @@ -1283,13 +1284,12 @@ PARSER(parse_text) { // Escape sequence, e.g. \r\n if (*pos == '\\') { - CORD cord = CORD_EMPTY; + Text_t text = Text(""); do { const char *c = unescape(ctx, &pos); - cord = CORD_cat(cord, c); - // cord = CORD_cat_char(cord, c); + text = Texts(text, Text$from_str(c)); } while (*pos == '\\'); - return NewAST(ctx->file, start, pos, TextLiteral, .cord=cord); + return NewAST(ctx->file, start, pos, TextLiteral, .text=text); } char open_quote, close_quote, open_interp = '$'; @@ -1330,17 +1330,17 @@ PARSER(parse_text) { int64_t string_indent = starting_indent + 1; ast_list_t *chunks = NULL; - CORD chunk = CORD_EMPTY; + Text_t chunk = Text(""); const char *chunk_start = pos; int depth = 1; bool leading_newline = false; for (; pos < ctx->file->text + ctx->file->len && depth > 0; ) { if (*pos == open_interp) { // Interpolation const char *interp_start = pos; - if (chunk) { - ast_t *literal = NewAST(ctx->file, chunk_start, pos, TextLiteral, .cord=chunk); + if (chunk.length > 0) { + ast_t *literal = NewAST(ctx->file, chunk_start, pos, TextLiteral, .text=Text$format("%k", &chunk)); // Collapse text chunks = new(ast_list_t, .ast=literal, .next=chunks); - chunk = NULL; + chunk = Text(""); } ++pos; ast_t *interp; @@ -1353,7 +1353,7 @@ PARSER(parse_text) { if (get_indent(ctx, pos) == starting_indent) { ++depth; } - chunk = CORD_cat_char(chunk, *pos); + chunk = Texts(chunk, Text$format("%c", *pos)); ++pos; } else if (!leading_newline && *pos == close_quote) { // Nested pair end if (get_indent(ctx, pos) == starting_indent) { @@ -1361,13 +1361,13 @@ PARSER(parse_text) { if (depth == 0) break; } - chunk = CORD_cat_char(chunk, *pos); + chunk = Texts(chunk, Text$format("%c", *pos)); ++pos; } else if (newline_with_indentation(&pos, string_indent)) { // Newline - if (!leading_newline && !(chunk || chunks)) { + if (!leading_newline && !(chunk.length > 0 || chunks)) { leading_newline = true; } else { - chunk = CORD_cat_char(chunk, '\n'); + chunk = Texts(chunk, Text("\n")); } } else if (newline_with_indentation(&pos, starting_indent)) { // Line continuation (..) if (*pos == close_quote) { @@ -1378,16 +1378,19 @@ PARSER(parse_text) { } else { parser_err(ctx, pos, strchrnul(pos, '\n'), "This multi-line string should be either indented or have '..' at the front"); } - } else { // Plain character - chunk = CORD_cat_char(chunk, *pos); - ++pos; + } else { // Regular grapheme cluster (no escapes etc.) + char *next = (char*)u8_grapheme_next((const uint8_t*)pos, (const uint8_t*)ctx->file->text + ctx->file->len); + while (next < ctx->file->text + ctx->file->len && !isascii(*next)) + next = (char*)u8_grapheme_next((const uint8_t*)next, (const uint8_t*)ctx->file->text + ctx->file->len); + Text_t cluster = Text$from_strn(pos, (size_t)(next-pos)); + chunk = Texts(chunk, cluster); + pos = next; } } - if (chunk) { - ast_t *literal = NewAST(ctx->file, chunk_start, pos, TextLiteral, .cord=chunk); + if (chunk.length > 0) { + ast_t *literal = NewAST(ctx->file, chunk_start, pos, TextLiteral, .text=Text$format("%k", &chunk)); // Collapse text chunks = new(ast_list_t, .ast=literal, .next=chunks); - chunk = NULL; } REVERSE_LIST(chunks); @@ -1407,13 +1410,13 @@ PARSER(parse_path) { const char *chunk_start = start + 1; ast_list_t *chunks = NULL; - CORD chunk_text = CORD_EMPTY; + Text_t chunk_text = Text(""); int paren_depth = 1; while (pos < ctx->file->text + ctx->file->len) { switch (*pos) { case '\\': { ++pos; - chunk_text = CORD_asprintf("%r%.*s%c", chunk_text, (size_t)(pos - chunk_start), chunk_start, *pos); + chunk_text = Text$format("%k%.*s%c", &chunk_text, (size_t)(pos - chunk_start), chunk_start, *pos); ++pos; chunk_start = pos; continue; @@ -1422,12 +1425,12 @@ PARSER(parse_path) { const char *interp_start = pos; if (pos > chunk_start) - chunk_text = CORD_asprintf("%r%.*s", chunk_text, (size_t)(pos - chunk_start), chunk_start); + chunk_text = Text$format("%k%.*s", &chunk_text, (size_t)(pos - chunk_start), chunk_start); - if (chunk_text) { - ast_t *literal = NewAST(ctx->file, chunk_start, pos, TextLiteral, .cord=chunk_text); + if (chunk_text.length > 0) { + ast_t *literal = NewAST(ctx->file, chunk_start, pos, TextLiteral, .text=chunk_text); chunks = new(ast_list_t, .ast=literal, .next=chunks); - chunk_text = CORD_EMPTY; + chunk_text = Text(""); } ++pos; if (*pos == ' ' || *pos == '\t') @@ -1455,10 +1458,10 @@ PARSER(parse_path) { end_of_path:; if (pos > chunk_start) - chunk_text = CORD_asprintf("%r%.*s", chunk_text, (size_t)(pos - chunk_start), chunk_start); + chunk_text = Text$format("%k%.*s", &chunk_text, (size_t)(pos - chunk_start), chunk_start); - if (chunk_text != CORD_EMPTY) { - ast_t *literal = NewAST(ctx->file, chunk_start, pos, TextLiteral, .cord=chunk_text); + if (chunk_text.length > 0) { + ast_t *literal = NewAST(ctx->file, chunk_start, pos, TextLiteral, .text=chunk_text); chunks = new(ast_list_t, .ast=literal, .next=chunks); } @@ -2283,7 +2286,7 @@ PARSER(parse_inline_c) { if (depth != 0) parser_err(ctx, start, start+1, "I couldn't find the closing '}' for this inline C code"); - CORD c_code = GC_strndup(c_code_start, (size_t)((pos-1) - c_code_start)); + Text_t c_code = Text$format("%.*s", (size_t)((pos-1) - c_code_start), c_code_start); return NewAST(ctx->file, start, pos, InlineCCode, .code=c_code, .type_ast=type); } @@ -2322,16 +2325,16 @@ PARSER(parse_say) { spaces(&pos); ast_list_t *chunks = NULL; - CORD chunk = CORD_EMPTY; + Text_t chunk = Text(""); const char *chunk_start = pos; const char open_interp = '$'; while (pos < ctx->file->text + ctx->file->len) { if (*pos == open_interp) { // Interpolation const char *interp_start = pos; - if (chunk) { - ast_t *literal = NewAST(ctx->file, chunk_start, pos, TextLiteral, .cord=chunk); + if (chunk.length > 0) { + ast_t *literal = NewAST(ctx->file, chunk_start, pos, TextLiteral, .text=Text$format("%k", &chunk)); chunks = new(ast_list_t, .ast=literal, .next=chunks); - chunk = NULL; + chunk = Text(""); } ++pos; ast_t *interp; @@ -2343,15 +2346,14 @@ PARSER(parse_say) { } else if (*pos == '\r' || *pos == '\n') { // Newline break; } else { // Plain character - chunk = CORD_cat_char(chunk, *pos); + chunk = Texts(chunk, Text$format("%c", *pos)); ++pos; } } - if (chunk) { - ast_t *literal = NewAST(ctx->file, chunk_start, pos, TextLiteral, .cord=chunk); + if (chunk.length > 0) { + ast_t *literal = NewAST(ctx->file, chunk_start, pos, TextLiteral, .text=Text$format("%k", &chunk)); chunks = new(ast_list_t, .ast=literal, .next=chunks); - chunk = NULL; } REVERSE_LIST(chunks); |
