From 1e8cdfa920b93747d4e0f37b268846638b8597c1 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Sun, 11 Feb 2024 19:30:02 -0500 Subject: [PATCH] Changes to string parsing --- compile.c | 4 +-- compile.h | 1 + files.c | 4 +-- nextlang.c | 4 +-- parse.c | 74 +++++++++++++++++++++++------------------------------- 5 files changed, 39 insertions(+), 48 deletions(-) diff --git a/compile.c b/compile.c index 4444a13..96431eb 100644 --- a/compile.c +++ b/compile.c @@ -16,7 +16,7 @@ CORD compile_type(type_ast_t *t) } } -static inline CORD compile_statement(ast_t *ast) +CORD compile_statement(ast_t *ast) { switch (ast->tag) { case If: case For: case While: case FunctionDef: case Return: case StructDef: case EnumDef: @@ -164,7 +164,7 @@ CORD compile(ast_t *ast) } case Declare: { auto decl = Match(ast, Declare); - return CORD_asprintf("__declare(%r, %r)", compile(decl->var), compile(decl->value)); + return CORD_asprintf("__declare(%r, %r);", compile(decl->var), compile(decl->value)); } case Assign: { auto assign = Match(ast, Assign); diff --git a/compile.h b/compile.h index 2b56a1a..d8b9878 100644 --- a/compile.h +++ b/compile.h @@ -8,5 +8,6 @@ CORD compile_type(type_ast_t *t); CORD compile(ast_t *ast); +CORD compile_statement(ast_t *ast); // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/files.c b/files.c index 54b784c..9fe9b91 100644 --- a/files.c +++ b/files.c @@ -77,8 +77,8 @@ static file_t *_load_file(const char* filename, FILE *file) while ((line_len = getline(&line_buf, &line_cap, file)) >= 0) { file_line_t line_info = {.offset=file_size, .indent=0, .is_empty=false}; char *p; - for (p = line_buf; *p == ' ' || *p == '\t'; ++p) - line_info.indent += *p == ' ' ? 1 : 4; + for (p = line_buf; *p == '\t'; ++p) + line_info.indent += 1; line_info.is_empty = *p != '\r' && *p != '\n'; if (ret->line_capacity <= ret->num_lines) { ret->lines = GC_REALLOC(ret->lines, sizeof(file_line_t)*(ret->line_capacity += 32)); diff --git a/nextlang.c b/nextlang.c index 7ac69f6..aa0a42b 100644 --- a/nextlang.c +++ b/nextlang.c @@ -98,8 +98,8 @@ int main(int argc, char *argv[]) switch (stmt->ast->tag) { case FunctionDef: case StructDef: case EnumDef: break; default: { - program = CORD_cat(program, compile(stmt->ast)); - program = CORD_cat(program, ";\n"); + program = CORD_cat(program, compile_statement(stmt->ast)); + program = CORD_cat(program, "\n"); break; } } diff --git a/parse.c b/parse.c index bd2e3f4..7c01661 100644 --- a/parse.c +++ b/parse.c @@ -322,28 +322,33 @@ bool indent(parse_ctx_t *ctx, const char **out) { const char *pos = *out; int64_t starting_indent = get_indent(ctx->file, pos); whitespace(&pos); - if (get_line_number(ctx->file, pos) == get_line_number(ctx->file, *out)) + const char *start_of_line = get_line(ctx->file, get_line_number(ctx->file, pos)); + if (start_of_line <= *out) return false; - if (get_indent(ctx->file, pos) > starting_indent) { + if ((int64_t)strspn(start_of_line, "\t") < starting_indent) + return false; + + *out = start_of_line + starting_indent + 1; + return true; +} + +bool newline_with_indentation(const char **out, int64_t target) { + const char *pos = *out; + if (*pos == '\r') ++pos; + if (*pos != '\n') return false; + ++pos; + if (*pos == '\r' || *pos == '\n' || *pos == '\0') { + // Empty line *out = pos; return true; } - return false; -} - -bool match_indentation(const char **out, int64_t target) { - const char *pos = *out; - for (int64_t indentation = 0; indentation < target; ) { - switch (*pos) { - case ' ': indentation += 1; ++pos; break; - case '\t': indentation += 4; ++pos; break; - default: return false; - } + if ((int64_t)strspn(pos, "\t") >= target) { + *out = pos + target; + return true; } - *out = pos; - return true; + return false; } /////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -899,16 +904,7 @@ PARSER(parse_string) { // printf("Parsing string: '%c' .. '%c' interp: '%c%c'\n", *start, close_quote, open_interp, close_interp); int64_t starting_indent = get_indent(ctx->file, pos); - int64_t string_indent; - if (*pos == '\r' || *pos == '\n') { - const char *first_line = pos; - whitespace(&first_line); - string_indent = get_indent(ctx->file, first_line); - if (string_indent <= starting_indent) - parser_err(ctx, start, first_line, "Multi-line strings must be indented on their first line"); - } else { - string_indent = starting_indent + 4; - } + int64_t string_indent = starting_indent + 1; ast_list_t *chunks = NULL; CORD chunk = CORD_EMPTY; @@ -946,26 +942,20 @@ PARSER(parse_string) { break; } chunk = CORD_cat_char(chunk, *pos); - } else if (*pos == '\r' || *pos == '\n') { - // Newline handling - match(&pos, "\r"); - match(&pos, "\n"); - if (match_indentation(&pos, string_indent)) { - if (chunk || chunks) - chunk = CORD_cat_char(chunk, '\n'); + } else if (newline_with_indentation(&pos, string_indent)) { + if (chunk || chunks) + chunk = CORD_cat_char(chunk, '\n'); + --pos; + } else if (newline_with_indentation(&pos, starting_indent)) { + if (*pos == close_quote) { + break; + } else if (some_of(&pos, ".") >= 2) { + // Multi-line split --pos; continue; + } else { + parser_err(ctx, pos, strchrnul(pos, '\n'), "This multi-line string should be either indented or have '..' at the front"); } - if (get_indent(ctx->file, pos) == starting_indent) { - if (*pos == close_quote) { - break; - } else if (some_of(&pos, ".") >= 2) { - // Multi-line split - --pos; - continue; - } - } - parser_err(ctx, pos, strchrnul(pos, '\n'), "This string line isn't correctly indented"); } else { chunk = CORD_cat_char(chunk, *pos); }