From a86dc05d366c0733b645763dd5c3e7396041bd7b Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Fri, 5 Jul 2024 15:51:23 -0400 Subject: Cache AST parsings so we don't have to re-parse files --- parse.c | 38 ++++++++++++++++++++++++++++++++++++-- parse.h | 2 +- repl.c | 3 +-- tomo.c | 30 ++++++++---------------------- typecheck.c | 18 +++++------------- 5 files changed, 51 insertions(+), 40 deletions(-) diff --git a/parse.c b/parse.c index 10d42df8..f549c257 100644 --- a/parse.c +++ b/parse.c @@ -12,8 +12,14 @@ #include #include "ast.h" +#include "builtins/table.h" #include "builtins/util.h" +// The cache of {filename -> parsed AST} will hold at most this many entries: +#ifndef PARSE_CACHE_SIZE +#define PARSE_CACHE_SIZE 100 +#endif + static const char closing[128] = {['(']=')', ['[']=']', ['<']='>', ['{']='}'}; typedef struct { @@ -2118,7 +2124,25 @@ PARSER(parse_linker) { return NewAST(ctx->file, start, pos, LinkerDirective, .directive=directive); } -ast_t *parse_file(file_t *file, jmp_buf *on_err) { +ast_t *parse_file(const char *path, jmp_buf *on_err) { + // NOTE: this cache leaks a bounded amount of memory. The cache will never + // hold more than PARSE_CACHE_SIZE entries (see below), but each entry's + // AST holds onto a reference to the file it came from, so they could + // potentially be somewhat large. + static table_t cached = {}; + ast_t *ast = Table$str_get(cached, path); + if (ast) return ast; + + file_t *file; + if (path[0] == '<') { + const char *endbracket = strchr(path, '>'); + if (!endbracket) return NULL; + file = spoof_file(heap_strn(path, (size_t)(endbracket + 1 - path)), endbracket + 1); + } else { + file = load_file(path); + if (!file) return NULL; + } + parse_ctx_t ctx = { .file=file, .on_err=on_err, @@ -2129,12 +2153,22 @@ ast_t *parse_file(file_t *file, jmp_buf *on_err) { some_not(&pos, "\r\n"); whitespace(&pos); - ast_t *ast = parse_file_body(&ctx, pos); + ast = parse_file_body(&ctx, pos); pos = ast->end; whitespace(&pos); if (pos < file->text + file->len && *pos != '\0') { parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the file"); } + + // If cache is getting too big, evict a random entry: + if (cached.entries.length > PARSE_CACHE_SIZE) { + uint32_t i = arc4random_uniform(cached.entries.length); + struct {const char *path; ast_t *ast; } *to_remove = Table$entry(cached, i+1); + Table$str_remove(&cached, to_remove->path); + } + + // Save the AST in the cache: + Table$str_set(&cached, path, ast); return ast; } diff --git a/parse.h b/parse.h index 9dae2c74..ebd49d5f 100644 --- a/parse.h +++ b/parse.h @@ -8,6 +8,6 @@ type_ast_t *parse_type_str(const char *str); ast_t *parse_expression_str(const char *str); -ast_t *parse_file(file_t *file, jmp_buf *on_err); +ast_t *parse_file(const char *path, jmp_buf *on_err); // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/repl.c b/repl.c index d3d433dc..f4051451 100644 --- a/repl.c +++ b/repl.c @@ -60,8 +60,7 @@ void repl(void) } else { code = heap_strf("func main(): >> %s", code); } - file_t *f = spoof_file("", code); - ast_t *ast = parse_file(f, &on_err); + ast_t *ast = parse_file(heap_strf("%s", code), &on_err); ast_t *doctest = Match(Match(Match(ast, Block)->statements->ast, FunctionDef)->body, Block)->statements->ast; if (doctest->tag == DocTest) doctest->__data.DocTest.skip_source = 1; run(env, doctest); diff --git a/tomo.c b/tomo.c index a0ce47c0..a0052bac 100644 --- a/tomo.c +++ b/tomo.c @@ -196,10 +196,8 @@ int main(int argc, char *argv[]) fputs("#pragma once\n", header_prog); for (int i = after_flags; i < argc; i++) { const char *filename = argv[i]; - file_t *f = load_file(filename); - if (!f) errx(1, "No such file: %s", filename); - ast_t *ast = parse_file(f, NULL); - if (!ast) errx(1, "Could not parse %s", f); + ast_t *ast = parse_file(filename, NULL); + if (!ast) errx(1, "Could not parse file %s", filename); env->namespace = new(namespace_t, .name=file_base_name(filename)); for (ast_list_t *stmt = Match(ast, Block)->statements; stmt; stmt = stmt->next) { if (stmt->ast->tag == Import || (stmt->ast->tag == Declare && Match(stmt->ast, Declare)->value->tag == Import)) @@ -296,13 +294,9 @@ void build_file_dependency_graph(const char *filename, table_t *to_compile, tabl size_t len = strlen(filename); assert(strncmp(filename + len - 3, ".tm", 3) == 0); - file_t *f = load_file(filename); - if (!f) - errx(1, "No such file: %s", filename); - - ast_t *ast = parse_file(f, NULL); + ast_t *ast = parse_file(filename, NULL); if (!ast) - errx(1, "Could not parse %s", f); + errx(1, "Could not parse file %s", filename); char *file_dir = realpath(filename, NULL); dirname(file_dir); @@ -352,13 +346,9 @@ int transpile_header(env_t *base_env, const char *filename, bool force_retranspi return 0; } - file_t *f = load_file(filename); - if (!f) - errx(1, "No such file: %s", filename); - - ast_t *ast = parse_file(f, NULL); + ast_t *ast = parse_file(filename, NULL); if (!ast) - errx(1, "Could not parse %s", f); + errx(1, "Could not parse file %s", filename); env_t *module_env = load_module_env(base_env, ast); @@ -396,13 +386,9 @@ int transpile_code(env_t *base_env, const char *filename, bool force_retranspile return 0; } - file_t *f = load_file(filename); - if (!f) - errx(1, "No such file: %s", filename); - - ast_t *ast = parse_file(f, NULL); + ast_t *ast = parse_file(filename, NULL); if (!ast) - errx(1, "Could not parse %s", f); + errx(1, "Could not parse file %s", filename); env_t *module_env = load_module_env(base_env, ast); diff --git a/typecheck.c b/typecheck.c index 53946dc8..9a93211d 100644 --- a/typecheck.c +++ b/typecheck.c @@ -116,11 +116,8 @@ static env_t *load_module(env_t *env, ast_t *module_ast) if (!resolved_path) code_err(module_ast, "No such file exists: \"%s\"", path); - file_t *f = load_file(resolved_path); - if (!f) errx(1, "No such file: %s", resolved_path); - - ast_t *ast = parse_file(f, NULL); - if (!ast) errx(1, "Could not compile!"); + ast_t *ast = parse_file(resolved_path, NULL); + if (!ast) errx(1, "Could not compile file %s", resolved_path); return load_module_env(env, ast); } else if (module_ast->tag == Use) { const char *libname = Match(module_ast, Use)->name; @@ -147,11 +144,8 @@ static env_t *load_module(env_t *env, ast_t *module_ast) const char *tm_path = resolve_path(line, resolved_path, "."); if (!tm_path) errx(1, "Couldn't find library %s dependency: %s", libname, line); - file_t *tm_f = load_file(tm_path); - if (!tm_f) errx(1, "No such file: %s", tm_path); - - ast_t *ast = parse_file(tm_f, NULL); - if (!ast) errx(1, "Could not compile!"); + ast_t *ast = parse_file(tm_path, NULL); + if (!ast) errx(1, "Could not compile file %s", tm_path); env_t *module_file_env = fresh_scope(module_env); char *file_prefix = heap_str(file_base_name(line)); for (char *p = file_prefix; *p; p++) { @@ -1101,9 +1095,7 @@ bool is_discardable(env_t *env, ast_t *ast) type_t *get_file_type(env_t *env, const char *path) { - // auto info = get_file_info(env, path); - file_t *f = load_file(path); - ast_t *ast = parse_file(f, NULL); + ast_t *ast = parse_file(path, NULL); if (!ast) compiler_err(NULL, NULL, NULL, "Couldn't parse file: %s", path); arg_t *ns_fields = NULL; -- cgit v1.2.3