1 // Parsing logic for parsing files
10 #include "../stdlib/datatypes.h"
11 #include "../stdlib/paths.h"
12 #include "../stdlib/stdlib.h"
13 #include "../stdlib/tables.h"
14 #include "../stdlib/text.h"
15 #include "../stdlib/util.h"
18 #include "expressions.h"
20 #include "functions.h"
22 #include "statements.h"
27 // The cache of {filename -> parsed AST} will hold at most this many entries:
28 #ifndef PARSE_CACHE_SIZE
29 #define PARSE_CACHE_SIZE 100
32 static ast_t *parse_top_declaration(parse_ctx_t *ctx, const char *pos) {
33 ast_t *declaration = parse_declaration(ctx, pos);
34 if (declaration) declaration->__data.Declare.top_level = true;
38 static ast_t *parse_metadata(parse_ctx_t *ctx, const char *pos) {
39 const char *start = pos;
40 const char *key = get_id(&pos);
41 if (!key) return NULL;
43 if (!match(&pos, ":")) return NULL;
45 ast_t *value = parse_text(ctx, pos, false);
46 Text_t value_text = EMPTY_TEXT;
48 for (ast_list_t *child = Match(value, TextJoin)->children; child; child = child->next) {
49 if (child->ast->tag != TextLiteral)
50 parser_err(ctx, child->ast->start, child->ast->end, "Text interpolations are not allowed in metadata");
51 value_text = Texts(value_text, Match(child->ast, TextLiteral)->text);
54 value = parse_path(ctx, pos);
55 if (!value) return NULL;
56 Path_t path = Path$from_str(Match(value, Path)->path);
57 path = Path$resolved(path, Path$parent(Path$from_str(ctx->file->filename)));
58 OptionalText_t contents = Path$read(path);
59 if (contents.tag == TEXT_NONE) parser_err(ctx, value->start, value->end, "File not found: ", path);
60 value_text = Text$trim(contents, Text("\r\n\t "), true, true);
64 return NewAST(ctx->file, start, pos, Metadata, .key = Text$from_str(key), .value = value_text);
67 ast_t *parse_file_body(parse_ctx_t *ctx, const char *pos) {
68 const char *start = pos;
69 whitespace(ctx, &pos);
70 ast_list_t *statements = NULL;
72 const char *next = pos;
73 whitespace(ctx, &next);
74 if (get_indent(ctx, next) != 0) break;
76 if ((stmt = optional(ctx, &pos, parse_metadata)) || (stmt = optional(ctx, &pos, parse_struct_def))
77 || (stmt = optional(ctx, &pos, parse_func_def)) || (stmt = optional(ctx, &pos, parse_enum_def))
78 || (stmt = optional(ctx, &pos, parse_lang_def)) || (stmt = optional(ctx, &pos, parse_convert_def))
79 || (stmt = optional(ctx, &pos, parse_use)) || (stmt = optional(ctx, &pos, parse_inline_c))
80 || (stmt = optional(ctx, &pos, parse_top_declaration))) {
81 statements = new (ast_list_t, .ast = stmt, .next = statements);
83 whitespace(ctx, &pos); // TODO: check for newline
88 whitespace(ctx, &pos);
89 if (pos < ctx->file->text + ctx->file->len && *pos != '\0') {
90 parser_err(ctx, pos, eol(pos), "I expect all top-level statements to be declarations of some kind");
92 REVERSE_LIST(statements);
93 return NewAST(ctx->file, start, pos, Block, .statements = statements);
97 ast_t *parse_file(const char *path, jmp_buf *on_err) {
98 if (path[0] != '<' && path[0] != '/') fail("Path is not fully resolved: ", path);
99 // NOTE: this cache leaks a bounded amount of memory. The cache will never
100 // hold more than PARSE_CACHE_SIZE entries (see below), but each entry's
101 // AST holds onto a reference to the file it came from, so they could
102 // potentially be somewhat large.
103 static Table_t cached = EMPTY_TABLE;
104 ast_t *ast = Table$str_get(cached, path);
108 if (path[0] == '<') {
109 const char *endbracket = strchr(path, '>');
110 if (!endbracket) return NULL;
111 file = spoof_file(GC_strndup(path, (size_t)(endbracket + 1 - path)), endbracket + 1);
113 file = load_file(path);
114 if (!file) return NULL;
122 const char *pos = file->text;
123 if (match(&pos, "#!")) // shebang
124 some_not(&pos, "\r\n");
126 whitespace(&ctx, &pos);
127 ast = parse_file_body(&ctx, pos);
129 whitespace(&ctx, &pos);
130 if (pos < file->text + file->len && *pos != '\0') {
131 parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the file");
134 // If cache is getting too big, evict a random entry:
135 if (cached.entries.length > PARSE_CACHE_SIZE) {
136 // FIXME: this currently evicts the first entry, but it should be more like
141 } *to_remove = Table$entry(cached, 1);
143 Table$str_remove(&cached, to_remove->path);
146 // Save the AST in the cache:
147 Table$str_set(&cached, path, ast);
151 ast_t *parse_use(parse_ctx_t *ctx, const char *pos) {
152 const char *start = pos;
154 ast_t *var = parse_var(ctx, pos);
158 if (!match(&pos, ":=")) return NULL;
162 if (!match_word(&pos, "use")) return NULL;
164 size_t name_len = strcspn(pos, " \t\r\n;");
165 if (name_len < 1) parser_err(ctx, start, pos, "There is no module name here to use");
166 char *name = GC_strndup(pos, name_len);
168 while (match(&pos, ";"))
171 if (name[0] == '<' || ends_with(name, ".h")) {
173 } else if (starts_with(name, "-l")) {
174 what = USE_SHARED_OBJECT;
175 } else if (ends_with(name, ".c")) {
177 } else if (ends_with(name, ".S") || ends_with(name, ".s")) {
179 } else if (starts_with(name, "./") || starts_with(name, "/") || starts_with(name, "../")
180 || starts_with(name, "~/")) {
185 return NewAST(ctx->file, start, pos, Use, .var = var, .path = name, .what = what);
189 ast_t *parse_file_str(const char *str) {
190 file_t *file = spoof_file("<string>", str);
196 const char *pos = file->text;
197 whitespace(&ctx, &pos);
198 ast_t *ast = parse_file_body(&ctx, pos);
200 whitespace(&ctx, &pos);
201 if (pos < file->text + file->len && *pos != '\0')
202 parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the string");