src/parse/files.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112


#include <gc.h>
#include <setjmp.h>
#include <stdarg.h>
#include <stdbool.h>
#include <string.h>

#include "../ast.h"
#include "../stdlib/stdlib.h"
#include "../stdlib/tables.h"
#include "../stdlib/util.h"
#include "context.h"
#include "errors.h"
#include "files.h"
#include "functions.h"
#include "parse.h"
#include "utils.h"

// The cache of {filename -> parsed AST} will hold at most this many entries:
#ifndef PARSE_CACHE_SIZE
#define PARSE_CACHE_SIZE 100
#endif

static ast_t *parse_top_declaration(parse_ctx_t *ctx, const char *pos) {
    ast_t *declaration = parse_declaration(ctx, pos);
    if (declaration) declaration->__data.Declare.top_level = true;
    return declaration;
}

public
ast_t *parse_file_body(parse_ctx_t *ctx, const char *pos) {
    const char *start = pos;
    whitespace(&pos);
    ast_list_t *statements = NULL;
    for (;;) {
        const char *next = pos;
        whitespace(&next);
        if (get_indent(ctx, next) != 0) break;
        ast_t *stmt;
        if ((stmt = optional(ctx, &pos, parse_struct_def)) || (stmt = optional(ctx, &pos, parse_func_def))
            || (stmt = optional(ctx, &pos, parse_enum_def)) || (stmt = optional(ctx, &pos, parse_lang_def))
            || (stmt = optional(ctx, &pos, parse_extend)) || (stmt = optional(ctx, &pos, parse_convert_def))
            || (stmt = optional(ctx, &pos, parse_use)) || (stmt = optional(ctx, &pos, parse_extern))
            || (stmt = optional(ctx, &pos, parse_inline_c)) || (stmt = optional(ctx, &pos, parse_top_declaration))) {
            statements = new (ast_list_t, .ast = stmt, .next = statements);
            pos = stmt->end;
            whitespace(&pos); // TODO: check for newline
        } else {
            break;
        }
    }
    whitespace(&pos);
    if (pos < ctx->file->text + ctx->file->len && *pos != '\0') {
        parser_err(ctx, pos, eol(pos), "I expect all top-level statements to be declarations of some kind");
    }
    REVERSE_LIST(statements);
    return NewAST(ctx->file, start, pos, Block, .statements = statements);
}

public
ast_t *parse_file(const char *path, jmp_buf *on_err) {
    if (path[0] != '<' && path[0] != '/') fail("Path is not fully resolved: ", path);
    // NOTE: this cache leaks a bounded amount of memory. The cache will never
    // hold more than PARSE_CACHE_SIZE entries (see below), but each entry's
    // AST holds onto a reference to the file it came from, so they could
    // potentially be somewhat large.
    static Table_t cached = {};
    ast_t *ast = Table$str_get(cached, path);
    if (ast) return ast;

    file_t *file;
    if (path[0] == '<') {
        const char *endbracket = strchr(path, '>');
        if (!endbracket) return NULL;
        file = spoof_file(GC_strndup(path, (size_t)(endbracket + 1 - path)), endbracket + 1);
    } else {
        file = load_file(path);
        if (!file) return NULL;
    }

    parse_ctx_t ctx = {
        .file = file,
        .on_err = on_err,
    };

    const char *pos = file->text;
    if (match(&pos, "#!")) // shebang
        some_not(&pos, "\r\n");

    whitespace(&pos);
    ast = parse_file_body(&ctx, pos);
    pos = ast->end;
    whitespace(&pos);
    if (pos < file->text + file->len && *pos != '\0') {
        parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the file");
    }

    // If cache is getting too big, evict a random entry:
    if (cached.entries.length > PARSE_CACHE_SIZE) {
        // FIXME: this currently evicts the first entry, but it should be more like
        // an LRU cache
        struct {
            const char *path;
            ast_t *ast;
        } *to_remove = Table$entry(cached, 1);
        Table$str_remove(&cached, to_remove->path);
    }

    // Save the AST in the cache:
    Table$str_set(&cached, path, ast);
    return ast;
}