// Parsing logic for parsing files #include #include #include #include #include #include "../ast.h" #include "../stdlib/datatypes.h" #include "../stdlib/paths.h" #include "../stdlib/stdlib.h" #include "../stdlib/tables.h" #include "../stdlib/text.h" #include "../util.h" #include "context.h" #include "errors.h" #include "expressions.h" #include "files.h" #include "functions.h" #include "statements.h" #include "text.h" #include "typedefs.h" #include "utils.h" // The cache of {filename -> parsed AST} will hold at most this many entries: #ifndef PARSE_CACHE_SIZE #define PARSE_CACHE_SIZE 100 #endif static ast_t *parse_top_declaration(parse_ctx_t *ctx, const char *pos) { ast_t *declaration = parse_declaration(ctx, pos); if (declaration) declaration->__data.Declare.top_level = true; return declaration; } static ast_t *parse_metadata(parse_ctx_t *ctx, const char *pos) { const char *start = pos; const char *key = get_id(&pos); if (!key) return NULL; spaces(&pos); if (!match(&pos, ":")) return NULL; spaces(&pos); ast_t *value = parse_text(ctx, pos, false); Text_t value_text = EMPTY_TEXT; if (value) { for (ast_list_t *child = Match(value, TextJoin)->children; child; child = child->next) { if (child->ast->tag != TextLiteral) parser_err(ctx, child->ast->start, child->ast->end, "Text interpolations are not allowed in metadata"); value_text = Texts(value_text, Match(child->ast, TextLiteral)->text); } } else { value = parse_path(ctx, pos); if (!value) return NULL; Path_t path = Path$from_str(Match(value, Path)->path); path = Path$resolved(path, Path$parent(Path$from_str(ctx->file->filename))); OptionalText_t contents = Path$read(path); if (contents.tag == TEXT_NONE) parser_err(ctx, value->start, value->end, "File not found: ", path); value_text = Text$trim(contents, Text("\r\n\t "), true, true); } pos = value->end; return NewAST(ctx->file, start, pos, Metadata, .key = Text$from_str(key), .value = value_text); } ast_t *parse_file_body(parse_ctx_t *ctx, const char *pos) { const char *start = pos; whitespace(ctx, &pos); ast_list_t *statements = NULL; for (;;) { const char *next = pos; whitespace(ctx, &next); if (get_indent(ctx, next) != 0) break; ast_t *stmt; if ((stmt = optional(ctx, &pos, parse_metadata)) || (stmt = optional(ctx, &pos, parse_struct_def)) || (stmt = optional(ctx, &pos, parse_func_def)) || (stmt = optional(ctx, &pos, parse_enum_def)) || (stmt = optional(ctx, &pos, parse_lang_def)) || (stmt = optional(ctx, &pos, parse_convert_def)) || (stmt = optional(ctx, &pos, parse_use)) || (stmt = optional(ctx, &pos, parse_inline_c)) || (stmt = optional(ctx, &pos, parse_top_declaration))) { statements = new (ast_list_t, .ast = stmt, .next = statements); pos = stmt->end; whitespace(ctx, &pos); // TODO: check for newline } else { break; } } whitespace(ctx, &pos); if (pos < ctx->file->text + ctx->file->len && *pos != '\0') { parser_err(ctx, pos, eol(pos), "I expect all top-level statements to be declarations of some kind"); } REVERSE_LIST(statements); return NewAST(ctx->file, start, pos, Block, .statements = statements); } public ast_t *parse_file(const char *path, jmp_buf *on_err) { if (path[0] != '<' && path[0] != '/') fail("Path is not fully resolved: ", path); // NOTE: this cache leaks a bounded amount of memory. The cache will never // hold more than PARSE_CACHE_SIZE entries (see below), but each entry's // AST holds onto a reference to the file it came from, so they could // potentially be somewhat large. static Table_t cached = EMPTY_TABLE; ast_t *ast = Table$str_get(cached, path); if (ast) return ast; file_t *file; if (path[0] == '<') { const char *endbracket = strchr(path, '>'); if (!endbracket) return NULL; file = spoof_file(GC_strndup(path, (size_t)(endbracket + 1 - path)), endbracket + 1); } else { file = load_file(path); if (!file) return NULL; } parse_ctx_t ctx = { .file = file, .on_err = on_err, }; const char *pos = file->text; if (match(&pos, "#!")) // shebang some_not(&pos, "\r\n"); whitespace(&ctx, &pos); ast = parse_file_body(&ctx, pos); pos = ast->end; whitespace(&ctx, &pos); if (pos < file->text + file->len && *pos != '\0') { parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the file"); } // If cache is getting too big, evict a random entry: if (cached.entries.length > PARSE_CACHE_SIZE) { // FIXME: this currently evicts the first entry, but it should be more like // an LRU cache struct { const char *path; ast_t *ast; } *to_remove = Table$entry(cached, 1); assert(to_remove); Table$str_remove(&cached, to_remove->path); } // Save the AST in the cache: Table$str_set(&cached, path, ast); return ast; } ast_t *parse_use(parse_ctx_t *ctx, const char *pos) { const char *start = pos; ast_t *var = parse_var(ctx, pos); if (var) { pos = var->end; spaces(&pos); if (!match(&pos, ":=")) return NULL; spaces(&pos); } if (!match_word(&pos, "use")) return NULL; spaces(&pos); size_t name_len = strcspn(pos, " \t\r\n;"); if (name_len < 1) parser_err(ctx, start, pos, "There is no module name here to use"); char *name = GC_strndup(pos, name_len); pos += name_len; while (match(&pos, ";")) continue; int what; if (name[0] == '<' || ends_with(name, ".h")) { what = USE_HEADER; } else if (starts_with(name, "-l")) { what = USE_SHARED_OBJECT; } else if (ends_with(name, ".c")) { what = USE_C_CODE; } else if (ends_with(name, ".S") || ends_with(name, ".s")) { what = USE_ASM; } else if (starts_with(name, "./") || starts_with(name, "/") || starts_with(name, "../") || starts_with(name, "~/")) { what = USE_LOCAL; } else { what = USE_MODULE; } return NewAST(ctx->file, start, pos, Use, .var = var, .path = name, .what = what); } public ast_t *parse_file_str(const char *str) { file_t *file = spoof_file("", str); parse_ctx_t ctx = { .file = file, .on_err = NULL, }; const char *pos = file->text; whitespace(&ctx, &pos); ast_t *ast = parse_file_body(&ctx, pos); pos = ast->end; whitespace(&ctx, &pos); if (pos < file->text + file->len && *pos != '\0') parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the string"); return ast; }