// Parsing logic for parsing files

#include <gc.h>
#include <setjmp.h>
#include <stdarg.h>
#include <stdbool.h>
#include <string.h>

#include "../ast.h"
#include "../stdlib/datatypes.h"
#include "../stdlib/paths.h"
#include "../stdlib/stdlib.h"
#include "../stdlib/tables.h"
#include "../stdlib/text.h"
#include "../util.h"
#include "context.h"
#include "errors.h"
#include "expressions.h"
#include "files.h"
#include "functions.h"
#include "statements.h"
#include "text.h"
#include "typedefs.h"
#include "utils.h"

// The cache of {filename -> parsed AST} will hold at most this many entries:
#ifndef PARSE_CACHE_SIZE
#define PARSE_CACHE_SIZE 100
#endif

static ast_t *parse_top_declaration(parse_ctx_t *ctx, const char *pos) {
    ast_t *declaration = parse_declaration(ctx, pos);
    if (declaration) declaration->__data.Declare.top_level = true;
    return declaration;
}

static ast_t *parse_metadata(parse_ctx_t *ctx, const char *pos) {
    const char *start = pos;
    const char *key = get_id(&pos);
    if (!key) return NULL;
    spaces(&pos);
    if (!match(&pos, ":")) return NULL;
    spaces(&pos);
    ast_t *value = parse_text(ctx, pos, false);
    Text_t value_text = EMPTY_TEXT;
    if (value) {
        for (ast_list_t *child = Match(value, TextJoin)->children; child; child = child->next) {
            if (child->ast->tag != TextLiteral)
                parser_err(ctx, child->ast->start, child->ast->end, "Text interpolations are not allowed in metadata");
            value_text = Texts(value_text, Match(child->ast, TextLiteral)->text);
        }
    } else {
        value = parse_path(ctx, pos);
        if (!value) return NULL;
        Path_t path = Path$from_str(Match(value, Path)->path);
        path = Path$resolved(path, Path$parent(Path$from_str(ctx->file->filename)));
        OptionalText_t contents = Path$read(path);
        if (contents.tag == TEXT_NONE) parser_err(ctx, value->start, value->end, "File not found: ", path);
        value_text = Text$trim(contents, Text("\r\n\t "), true, true);
    }
    pos = value->end;

    return NewAST(ctx->file, start, pos, Metadata, .key = Text$from_str(key), .value = value_text);
}

ast_t *parse_file_body(parse_ctx_t *ctx, const char *pos) {
    const char *start = pos;
    whitespace(ctx, &pos);
    ast_list_t *statements = NULL;
    for (;;) {
        const char *next = pos;
        whitespace(ctx, &next);
        if (get_indent(ctx, next) != 0) break;
        ast_t *stmt;
        if ((stmt = optional(ctx, &pos, parse_metadata)) || (stmt = optional(ctx, &pos, parse_struct_def))
            || (stmt = optional(ctx, &pos, parse_func_def)) || (stmt = optional(ctx, &pos, parse_enum_def))
            || (stmt = optional(ctx, &pos, parse_lang_def)) || (stmt = optional(ctx, &pos, parse_convert_def))
            || (stmt = optional(ctx, &pos, parse_use)) || (stmt = optional(ctx, &pos, parse_inline_c))
            || (stmt = optional(ctx, &pos, parse_top_declaration))) {
            statements = new (ast_list_t, .ast = stmt, .next = statements);
            pos = stmt->end;
            whitespace(ctx, &pos); // TODO: check for newline
        } else {
            break;
        }
    }
    whitespace(ctx, &pos);
    if (pos < ctx->file->text + ctx->file->len && *pos != '\0') {
        parser_err(ctx, pos, eol(pos), "I expect all top-level statements to be declarations of some kind");
    }
    REVERSE_LIST(statements);
    return NewAST(ctx->file, start, pos, Block, .statements = statements);
}

public
ast_t *parse_file(const char *path, jmp_buf *on_err) {
    if (path[0] != '<' && path[0] != '/') fail("Path is not fully resolved: ", path);
    // NOTE: this cache leaks a bounded amount of memory. The cache will never
    // hold more than PARSE_CACHE_SIZE entries (see below), but each entry's
    // AST holds onto a reference to the file it came from, so they could
    // potentially be somewhat large.
    static Table_t cached = EMPTY_TABLE;
    ast_t *ast = Table$str_get(cached, path);
    if (ast) return ast;

    file_t *file;
    if (path[0] == '<') {
        const char *endbracket = strchr(path, '>');
        if (!endbracket) return NULL;
        file = spoof_file(GC_strndup(path, (size_t)(endbracket + 1 - path)), endbracket + 1);
    } else {
        file = load_file(path);
        if (!file) return NULL;
    }

    parse_ctx_t ctx = {
        .file = file,
        .on_err = on_err,
    };

    const char *pos = file->text;
    if (match(&pos, "#!")) // shebang
        some_not(&pos, "\r\n");

    whitespace(&ctx, &pos);
    ast = parse_file_body(&ctx, pos);
    pos = ast->end;
    whitespace(&ctx, &pos);
    if (pos < file->text + file->len && *pos != '\0') {
        parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the file");
    }

    // If cache is getting too big, evict a random entry:
    if (cached.entries.length > PARSE_CACHE_SIZE) {
        // FIXME: this currently evicts the first entry, but it should be more like
        // an LRU cache
        struct {
            const char *path;
            ast_t *ast;
        } *to_remove = Table$entry(cached, 1);
        assert(to_remove);
        Table$str_remove(&cached, to_remove->path);
    }

    // Save the AST in the cache:
    Table$str_set(&cached, path, ast);
    return ast;
}

ast_t *parse_use(parse_ctx_t *ctx, const char *pos) {
    const char *start = pos;

    ast_t *var = parse_var(ctx, pos);
    if (var) {
        pos = var->end;
        spaces(&pos);
        if (!match(&pos, ":=")) return NULL;
        spaces(&pos);
    }

    if (!match_word(&pos, "use")) return NULL;
    spaces(&pos);
    size_t name_len = strcspn(pos, " \t\r\n;");
    if (name_len < 1) parser_err(ctx, start, pos, "There is no module name here to use");
    char *name = GC_strndup(pos, name_len);
    pos += name_len;
    while (match(&pos, ";"))
        continue;
    int what;
    if (name[0] == '<' || ends_with(name, ".h")) {
        what = USE_HEADER;
    } else if (starts_with(name, "-l")) {
        what = USE_SHARED_OBJECT;
    } else if (ends_with(name, ".c")) {
        what = USE_C_CODE;
    } else if (ends_with(name, ".S") || ends_with(name, ".s")) {
        what = USE_ASM;
    } else if (starts_with(name, "./") || starts_with(name, "/") || starts_with(name, "../")
               || starts_with(name, "~/")) {
        what = USE_LOCAL;
    } else {
        what = USE_MODULE;
    }
    return NewAST(ctx->file, start, pos, Use, .var = var, .path = name, .what = what);
}

public
ast_t *parse_file_str(const char *str) {
    file_t *file = spoof_file("<string>", str);
    parse_ctx_t ctx = {
        .file = file,
        .on_err = NULL,
    };

    const char *pos = file->text;
    whitespace(&ctx, &pos);
    ast_t *ast = parse_file_body(&ctx, pos);
    pos = ast->end;
    whitespace(&ctx, &pos);
    if (pos < file->text + file->len && *pos != '\0')
        parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the string");
    return ast;
}