Support space indents
This commit is contained in:
parent
64baf34ea0
commit
a20f522fd6
@ -93,15 +93,9 @@ static file_t *_load_file(const char* filename, FILE *file)
|
||||
FILE *mem = open_memstream(&file_buf, &file_size);
|
||||
int64_t line_len = 0;
|
||||
while ((line_len = getline(&line_buf, &line_cap, file)) >= 0) {
|
||||
file_line_t line_info = {.offset=file_size, .indent=0, .is_empty=false};
|
||||
char *p;
|
||||
for (p = line_buf; *p == '\t'; ++p)
|
||||
line_info.indent += 1;
|
||||
line_info.is_empty = *p != '\r' && *p != '\n';
|
||||
if (ret->line_capacity <= ret->num_lines) {
|
||||
ret->lines = GC_REALLOC(ret->lines, sizeof(file_line_t)*(ret->line_capacity += 32));
|
||||
}
|
||||
ret->lines[ret->num_lines++] = line_info;
|
||||
if (ret->line_capacity <= ret->num_lines)
|
||||
ret->line_offsets = GC_REALLOC(ret->line_offsets, sizeof(int64_t)*(ret->line_capacity += 32));
|
||||
ret->line_offsets[ret->num_lines++] = file_size;
|
||||
fwrite(line_buf, sizeof(char), line_len, mem);
|
||||
fflush(mem);
|
||||
}
|
||||
@ -157,12 +151,12 @@ public int64_t get_line_number(file_t *f, const char *p)
|
||||
int64_t offset = (int64_t)(p - f->text);
|
||||
while (lo <= hi) {
|
||||
int64_t mid = (lo + hi) / 2;
|
||||
file_line_t *line = &f->lines[mid];
|
||||
if (line->offset == offset)
|
||||
int64_t line_offset = f->line_offsets[mid];
|
||||
if (line_offset == offset)
|
||||
return mid + 1;
|
||||
else if (line->offset < offset)
|
||||
else if (line_offset < offset)
|
||||
lo = mid + 1;
|
||||
else if (line->offset > offset)
|
||||
else if (line_offset > offset)
|
||||
hi = mid - 1;
|
||||
}
|
||||
return lo; // Return the line number whose line starts closest before p
|
||||
@ -174,18 +168,8 @@ public int64_t get_line_number(file_t *f, const char *p)
|
||||
public int64_t get_line_column(file_t *f, const char *p)
|
||||
{
|
||||
int64_t line_no = get_line_number(f, p);
|
||||
file_line_t *line = &f->lines[line_no-1];
|
||||
return 1 + (int64_t)(p - (f->text + line->offset));
|
||||
}
|
||||
|
||||
//
|
||||
// Given a pointer, get the indentation of the line it's on.
|
||||
//
|
||||
public int64_t get_indent(file_t *f, const char *p)
|
||||
{
|
||||
int64_t line_no = get_line_number(f, p);
|
||||
file_line_t *line = &f->lines[line_no-1];
|
||||
return line->indent;
|
||||
int64_t line_offset = f->line_offsets[line_no-1];
|
||||
return 1 + (int64_t)(p - (f->text + line_offset));
|
||||
}
|
||||
|
||||
//
|
||||
@ -194,8 +178,8 @@ public int64_t get_indent(file_t *f, const char *p)
|
||||
public const char *get_line(file_t *f, int64_t line_number)
|
||||
{
|
||||
if (line_number == 0 || line_number > (int64_t)f->num_lines) return NULL;
|
||||
file_line_t *line = &f->lines[line_number-1];
|
||||
return f->text + line->offset;
|
||||
int64_t line_offset = f->line_offsets[line_number-1];
|
||||
return f->text + line_offset;
|
||||
}
|
||||
|
||||
//
|
||||
|
@ -8,18 +8,12 @@
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
||||
typedef struct {
|
||||
int64_t offset;
|
||||
int64_t indent:63;
|
||||
bool is_empty:1;
|
||||
} file_line_t;
|
||||
|
||||
typedef struct {
|
||||
const char *filename, *relative_filename;
|
||||
const char *text;
|
||||
int64_t len;
|
||||
int64_t num_lines, line_capacity;
|
||||
file_line_t *lines;
|
||||
int64_t *line_offsets;
|
||||
} file_t;
|
||||
|
||||
char *resolve_path(const char *path, const char *relative_to, const char *system_path);
|
||||
@ -34,8 +28,6 @@ int64_t get_line_number(file_t *f, const char *p);
|
||||
__attribute__((pure, nonnull))
|
||||
int64_t get_line_column(file_t *f, const char *p);
|
||||
__attribute__((pure, nonnull))
|
||||
int64_t get_indent(file_t *f, const char *p);
|
||||
__attribute__((pure, nonnull))
|
||||
const char *get_line(file_t *f, int64_t line_number);
|
||||
__attribute__((pure, nonnull))
|
||||
const char *get_file_pos(file_t *f, const char *p);
|
||||
|
92
parse.c
92
parse.c
@ -25,6 +25,8 @@ typedef ast_t* (parser_t)(parse_ctx_t*,const char*);
|
||||
|
||||
extern void builtin_fail(const char *fmt, ...);
|
||||
|
||||
#define SPACES_PER_INDENT 4
|
||||
|
||||
#define PARSER(name) ast_t *name(parse_ctx_t *ctx, const char *pos)
|
||||
|
||||
#define STUB_PARSER(name) PARSER(name) { (void)ctx; (void)pos; return NULL; }
|
||||
@ -171,6 +173,29 @@ const char *unescape(const char **out) {
|
||||
}
|
||||
}
|
||||
|
||||
static inline int64_t get_indent(parse_ctx_t *ctx, const char *pos)
|
||||
{
|
||||
int64_t line_num = get_line_number(ctx->file, pos);
|
||||
const char *line = get_line(ctx->file, line_num);
|
||||
if (*line == ' ') {
|
||||
int64_t spaces = strspn(line, " ");
|
||||
if (spaces % SPACES_PER_INDENT != 0)
|
||||
parser_err(ctx, line + spaces - (spaces % SPACES_PER_INDENT), line + spaces,
|
||||
"Indentation must be a multiple of 4 spaces, not %ld", spaces);
|
||||
int64_t indent = spaces / SPACES_PER_INDENT;
|
||||
if (line[indent] == '\t')
|
||||
parser_err(ctx, line + indent, line + indent + 1, "This is a tab following spaces, and you can't mix tabs and spaces");
|
||||
return indent;
|
||||
} else if (*line == '\t') {
|
||||
int64_t indent = strspn(line, "\t");
|
||||
if (line[indent] == ' ')
|
||||
parser_err(ctx, line + indent, line + indent + 1, "This is a space following tabs, and you can't mix tabs and spaces");
|
||||
return indent;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////// Text-based parsing primitives ///////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -345,16 +370,16 @@ bool comment(const char **pos) {
|
||||
|
||||
bool indent(parse_ctx_t *ctx, const char **out) {
|
||||
const char *pos = *out;
|
||||
int64_t starting_indent = get_indent(ctx->file, pos);
|
||||
int64_t starting_indent = get_indent(ctx, pos);
|
||||
whitespace(&pos);
|
||||
const char *start_of_line = get_line(ctx->file, get_line_number(ctx->file, pos));
|
||||
if (start_of_line <= *out)
|
||||
const char *next_line = get_line(ctx->file, get_line_number(ctx->file, pos));
|
||||
if (next_line <= *out)
|
||||
return false;
|
||||
|
||||
if ((int64_t)strspn(start_of_line, "\t") < starting_indent)
|
||||
if (get_indent(ctx, next_line) != starting_indent + 1)
|
||||
return false;
|
||||
|
||||
*out = start_of_line + starting_indent + 1;
|
||||
*out = next_line + strspn(next_line, " \t");
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -369,7 +394,12 @@ bool newline_with_indentation(const char **out, int64_t target) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if ((int64_t)strspn(pos, "\t") >= target) {
|
||||
if (*pos == ' ') {
|
||||
if ((int64_t)strspn(pos, " ") >= SPACES_PER_INDENT * target) {
|
||||
*out = pos + SPACES_PER_INDENT * target;
|
||||
return true;
|
||||
}
|
||||
} else if ((int64_t)strspn(pos, "\t") >= target) {
|
||||
*out = pos + target;
|
||||
return true;
|
||||
}
|
||||
@ -840,7 +870,7 @@ ast_t *parse_comprehension_suffix(parse_ctx_t *ctx, ast_t *expr) {
|
||||
PARSER(parse_if) {
|
||||
// if <condition> [then] <body> [else <body>]
|
||||
const char *start = pos;
|
||||
int64_t starting_indent = get_indent(ctx->file, pos);
|
||||
int64_t starting_indent = get_indent(ctx, pos);
|
||||
|
||||
if (!match_word(&pos, "if"))
|
||||
return NULL;
|
||||
@ -857,7 +887,7 @@ PARSER(parse_if) {
|
||||
whitespace(&tmp);
|
||||
ast_t *else_body = NULL;
|
||||
const char *else_start = pos;
|
||||
if (get_indent(ctx->file, tmp) == starting_indent && match_word(&tmp, "else")) {
|
||||
if (get_indent(ctx, tmp) == starting_indent && match_word(&tmp, "else")) {
|
||||
pos = tmp;
|
||||
expect_str(ctx, start, &pos, ":", "I expected a ':' here");
|
||||
else_body = expect(ctx, else_start, &pos, parse_opt_indented_block, "I expected a body for this 'else'");
|
||||
@ -868,7 +898,7 @@ PARSER(parse_if) {
|
||||
PARSER(parse_when) {
|
||||
// when <expr> (is var : Tag [then] <body>)* [else <body>]
|
||||
const char *start = pos;
|
||||
int64_t starting_indent = get_indent(ctx->file, pos);
|
||||
int64_t starting_indent = get_indent(ctx, pos);
|
||||
|
||||
if (!match_word(&pos, "when"))
|
||||
return NULL;
|
||||
@ -880,7 +910,7 @@ PARSER(parse_when) {
|
||||
when_clause_t *clauses = NULL;
|
||||
const char *tmp = pos;
|
||||
whitespace(&tmp);
|
||||
while (get_indent(ctx->file, tmp) == starting_indent && match_word(&tmp, "is")) {
|
||||
while (get_indent(ctx, tmp) == starting_indent && match_word(&tmp, "is")) {
|
||||
pos = tmp;
|
||||
spaces(&pos);
|
||||
ast_t *tag_name = expect(ctx, start, &pos, parse_var, "I expected a tag name here");
|
||||
@ -911,7 +941,7 @@ PARSER(parse_when) {
|
||||
|
||||
ast_t *else_body = NULL;
|
||||
const char *else_start = pos;
|
||||
if (get_indent(ctx->file, tmp) == starting_indent && match_word(&tmp, "else")) {
|
||||
if (get_indent(ctx, tmp) == starting_indent && match_word(&tmp, "else")) {
|
||||
pos = tmp;
|
||||
expect_str(ctx, start, &pos, ":", "I expected a ':' here");
|
||||
else_body = expect(ctx, else_start, &pos, parse_opt_indented_block, "I expected a body for this 'else'");
|
||||
@ -923,7 +953,7 @@ PARSER(parse_for) {
|
||||
// for [k,] v in iter [<indent>] body
|
||||
const char *start = pos;
|
||||
if (!match_word(&pos, "for")) return NULL;
|
||||
int64_t starting_indent = get_indent(ctx->file, pos);
|
||||
int64_t starting_indent = get_indent(ctx, pos);
|
||||
ast_t *index = expect(ctx, start, &pos, parse_var, "I expected an iteration variable for this 'for'");
|
||||
spaces(&pos);
|
||||
ast_t *value = NULL;
|
||||
@ -939,7 +969,7 @@ PARSER(parse_for) {
|
||||
const char *else_start = pos;
|
||||
whitespace(&else_start);
|
||||
ast_t *empty = NULL;
|
||||
if (match_word(&else_start, "else") && get_indent(ctx->file, else_start) == starting_indent) {
|
||||
if (match_word(&else_start, "else") && get_indent(ctx, else_start) == starting_indent) {
|
||||
pos = else_start;
|
||||
expect_str(ctx, start, &pos, ":", "I expected a ':' here");
|
||||
empty = expect(ctx, pos, &pos, parse_opt_indented_block, "I expected a body for this 'else'");
|
||||
@ -1057,7 +1087,7 @@ PARSER(parse_text) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int64_t starting_indent = get_indent(ctx->file, pos);
|
||||
int64_t starting_indent = get_indent(ctx, pos);
|
||||
int64_t string_indent = starting_indent + 1;
|
||||
|
||||
ast_list_t *chunks = NULL;
|
||||
@ -1088,13 +1118,13 @@ PARSER(parse_text) {
|
||||
chunks = new(ast_list_t, .ast=interp, .next=chunks);
|
||||
chunk_start = pos;
|
||||
} else if (!leading_newline && *pos == open_quote && closing[(int)open_quote]) { // Nested pair begin
|
||||
if (get_indent(ctx->file, pos) == starting_indent) {
|
||||
if (get_indent(ctx, pos) == starting_indent) {
|
||||
++depth;
|
||||
}
|
||||
chunk = CORD_cat_char(chunk, *pos);
|
||||
++pos;
|
||||
} else if (!leading_newline && *pos == close_quote) { // Nested pair end
|
||||
if (get_indent(ctx->file, pos) == starting_indent) {
|
||||
if (get_indent(ctx, pos) == starting_indent) {
|
||||
--depth;
|
||||
if (depth == 0)
|
||||
break;
|
||||
@ -1517,17 +1547,11 @@ PARSER(parse_extended_expr) {
|
||||
}
|
||||
|
||||
PARSER(parse_block) {
|
||||
int64_t block_indent = get_indent(ctx->file, pos);
|
||||
int64_t block_indent = get_indent(ctx, pos);
|
||||
const char *start = pos;
|
||||
whitespace(&pos);
|
||||
ast_list_t *statements = NULL;
|
||||
while (*pos) {
|
||||
if (pos > start && pos[-1] == ' ') {
|
||||
const char *space_start = pos-1;
|
||||
while (*space_start == ' ')
|
||||
--space_start;
|
||||
parser_err(ctx, space_start, pos, "Spaces are not allowed for indentation, only tabs!");
|
||||
}
|
||||
ast_t *stmt = optional(ctx, &pos, parse_statement);
|
||||
if (!stmt) {
|
||||
const char *line_start = pos;
|
||||
@ -1547,7 +1571,7 @@ PARSER(parse_block) {
|
||||
}
|
||||
statements = new(ast_list_t, .ast=stmt, .next=statements);
|
||||
whitespace(&pos); // TODO: check for newline
|
||||
if (get_indent(ctx->file, pos) != block_indent) {
|
||||
if (get_indent(ctx, pos) != block_indent) {
|
||||
pos = stmt->end; // backtrack
|
||||
break;
|
||||
}
|
||||
@ -1563,12 +1587,12 @@ PARSER(parse_opt_indented_block) {
|
||||
PARSER(parse_namespace) {
|
||||
const char *start = pos;
|
||||
whitespace(&pos);
|
||||
int64_t indent = get_indent(ctx->file, pos);
|
||||
int64_t indent = get_indent(ctx, pos);
|
||||
ast_list_t *statements = NULL;
|
||||
for (;;) {
|
||||
const char *next = pos;
|
||||
whitespace(&next);
|
||||
if (get_indent(ctx->file, next) != indent) break;
|
||||
if (get_indent(ctx, next) != indent) break;
|
||||
ast_t *stmt;
|
||||
if ((stmt=optional(ctx, &pos, parse_struct_def))
|
||||
||(stmt=optional(ctx, &pos, parse_enum_def))
|
||||
@ -1587,7 +1611,7 @@ PARSER(parse_namespace) {
|
||||
// break;
|
||||
// }
|
||||
} else {
|
||||
if (get_indent(ctx->file, next) > indent && next < strchrnul(next, '\n'))
|
||||
if (get_indent(ctx, next) > indent && next < strchrnul(next, '\n'))
|
||||
parser_err(ctx, next, strchrnul(next, '\n'), "I couldn't parse this namespace declaration");
|
||||
break;
|
||||
}
|
||||
@ -1603,7 +1627,7 @@ PARSER(parse_file_body) {
|
||||
for (;;) {
|
||||
const char *next = pos;
|
||||
whitespace(&next);
|
||||
if (get_indent(ctx->file, next) != 0) break;
|
||||
if (get_indent(ctx, next) != 0) break;
|
||||
ast_t *stmt;
|
||||
if ((stmt=optional(ctx, &pos, parse_struct_def))
|
||||
||(stmt=optional(ctx, &pos, parse_enum_def))
|
||||
@ -1634,7 +1658,7 @@ PARSER(parse_struct_def) {
|
||||
const char *start = pos;
|
||||
if (!match_word(&pos, "struct")) return NULL;
|
||||
|
||||
int64_t starting_indent = get_indent(ctx->file, pos);
|
||||
int64_t starting_indent = get_indent(ctx, pos);
|
||||
|
||||
spaces(&pos);
|
||||
const char *name = get_id(&pos);
|
||||
@ -1667,7 +1691,7 @@ PARSER(parse_struct_def) {
|
||||
if (match(&pos, ":")) {
|
||||
const char *ns_pos = pos;
|
||||
whitespace(&ns_pos);
|
||||
int64_t ns_indent = get_indent(ctx->file, ns_pos);
|
||||
int64_t ns_indent = get_indent(ctx, ns_pos);
|
||||
if (ns_indent > starting_indent) {
|
||||
pos = ns_pos;
|
||||
namespace = optional(ctx, &pos, parse_namespace);
|
||||
@ -1682,7 +1706,7 @@ ast_t *parse_enum_def(parse_ctx_t *ctx, const char *pos) {
|
||||
// tagged union: enum Foo(a, b(x:Int,y:Int)=5, ...) [: \n namespace]
|
||||
const char *start = pos;
|
||||
if (!match_word(&pos, "enum")) return NULL;
|
||||
int64_t starting_indent = get_indent(ctx->file, pos);
|
||||
int64_t starting_indent = get_indent(ctx, pos);
|
||||
spaces(&pos);
|
||||
const char *name = get_id(&pos);
|
||||
if (!name)
|
||||
@ -1746,7 +1770,7 @@ ast_t *parse_enum_def(parse_ctx_t *ctx, const char *pos) {
|
||||
if (match(&pos, ":")) {
|
||||
const char *ns_pos = pos;
|
||||
whitespace(&ns_pos);
|
||||
int64_t ns_indent = get_indent(ctx->file, ns_pos);
|
||||
int64_t ns_indent = get_indent(ctx, ns_pos);
|
||||
if (ns_indent > starting_indent) {
|
||||
pos = ns_pos;
|
||||
namespace = optional(ctx, &pos, parse_namespace);
|
||||
@ -1762,7 +1786,7 @@ PARSER(parse_lang_def) {
|
||||
const char *start = pos;
|
||||
// lang Name: [namespace...]
|
||||
if (!match_word(&pos, "lang")) return NULL;
|
||||
int64_t starting_indent = get_indent(ctx->file, pos);
|
||||
int64_t starting_indent = get_indent(ctx, pos);
|
||||
spaces(&pos);
|
||||
const char *name = get_id(&pos);
|
||||
if (!name)
|
||||
@ -1773,7 +1797,7 @@ PARSER(parse_lang_def) {
|
||||
if (match(&pos, ":")) {
|
||||
const char *ns_pos = pos;
|
||||
whitespace(&ns_pos);
|
||||
int64_t ns_indent = get_indent(ctx->file, ns_pos);
|
||||
int64_t ns_indent = get_indent(ctx, ns_pos);
|
||||
if (ns_indent > starting_indent) {
|
||||
pos = ns_pos;
|
||||
namespace = optional(ctx, &pos, parse_namespace);
|
||||
|
2
tomo.c
2
tomo.c
@ -173,7 +173,7 @@ static void build_file_dependency_graph(const char *filename, table_t *dependenc
|
||||
file_t *f = load_file(to_scan[s]);
|
||||
if (!f) errx(1, "Couldn't find file: %s", to_scan[s]);
|
||||
for (int64_t i = 0; i < f->num_lines; i++) {
|
||||
const char *line = f->text + f->lines[i].offset;
|
||||
const char *line = f->text + f->line_offsets[i];
|
||||
const char *prefix = "#include \"";
|
||||
if (strncmp(line, prefix, strlen(prefix)) == 0) {
|
||||
char *tmp = realpath(heap_strf("%s/%.*s", file_dir, strcspn(line + strlen(prefix), "\"") - 2, line + strlen(prefix)), NULL);
|
||||
|
Loading…
Reference in New Issue
Block a user