diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2025-08-25 01:03:31 -0400 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2025-08-25 01:03:31 -0400 |
| commit | 47d38fb5ac183180edd297830261cedd8afee2d3 (patch) | |
| tree | 2ad93b15686a9c7d94e163d9a5f46350374f4571 /src/parse | |
| parent | 3cf0d5f0bee3787a3d46126d5c0e1310e35a7cb9 (diff) | |
Split out binops
Diffstat (limited to 'src/parse')
| -rw-r--r-- | src/parse/binops.c | 137 | ||||
| -rw-r--r-- | src/parse/binops.h | 8 | ||||
| -rw-r--r-- | src/parse/numbers.c | 1 | ||||
| -rw-r--r-- | src/parse/parse.c | 161 | ||||
| -rw-r--r-- | src/parse/parse.h | 2 | ||||
| -rw-r--r-- | src/parse/text.c | 42 | ||||
| -rw-r--r-- | src/parse/text.h | 1 |
7 files changed, 190 insertions, 162 deletions
diff --git a/src/parse/binops.c b/src/parse/binops.c new file mode 100644 index 00000000..f8d0854d --- /dev/null +++ b/src/parse/binops.c @@ -0,0 +1,137 @@ +#include <stdbool.h> +#include <string.h> + +#include "../ast.h" +#include "../stdlib/print.h" +#include "../stdlib/util.h" +#include "containers.h" +#include "context.h" +#include "errors.h" +#include "files.h" +#include "functions.h" +#include "numbers.h" +#include "parse.h" +#include "text.h" +#include "types.h" +#include "utils.h" + +int op_tightness[] = { + [Power] = 9, + [Multiply] = 8, + [Divide] = 8, + [Mod] = 8, + [Mod1] = 8, + [Plus] = 7, + [Minus] = 7, + [Concat] = 6, + [LeftShift] = 5, + [RightShift] = 5, + [UnsignedLeftShift] = 5, + [UnsignedRightShift] = 5, + [Min] = 4, + [Max] = 4, + [Equals] = 3, + [NotEquals] = 3, + [LessThan] = 2, + [LessThanOrEquals] = 2, + [GreaterThan] = 2, + [GreaterThanOrEquals] = 2, + [Compare] = 2, + [And] = 1, + [Or] = 1, + [Xor] = 1, +}; + +public +ast_e match_binary_operator(const char **pos) { + switch (**pos) { + case '+': { + *pos += 1; + return match(pos, "+") ? Concat : Plus; + } + case '-': { + *pos += 1; + if ((*pos)[0] != ' ' && (*pos)[-2] == ' ') // looks like `fn -5` + return Unknown; + return Minus; + } + case '*': *pos += 1; return Multiply; + case '/': *pos += 1; return Divide; + case '^': *pos += 1; return Power; + case '<': { + *pos += 1; + if (match(pos, "=")) return LessThanOrEquals; // "<=" + else if (match(pos, ">")) return Compare; // "<>" + else if (match(pos, "<")) { + if (match(pos, "<")) return UnsignedLeftShift; // "<<<" + return LeftShift; // "<<" + } else return LessThan; + } + case '>': { + *pos += 1; + if (match(pos, "=")) return GreaterThanOrEquals; // ">=" + if (match(pos, ">")) { + if (match(pos, ">")) return UnsignedRightShift; // ">>>" + return RightShift; // ">>" + } + return GreaterThan; + } + default: { + if (match(pos, "!=")) return NotEquals; + else if (match(pos, "==") && **pos != '=') return Equals; + else if (match_word(pos, "and")) return And; + else if (match_word(pos, "or")) return Or; + else if (match_word(pos, "xor")) return Xor; + else if (match_word(pos, "mod1")) return Mod1; + else if (match_word(pos, "mod")) return Mod; + else if (match_word(pos, "_min_")) return Min; + else if (match_word(pos, "_max_")) return Max; + else return Unknown; + } + } +} + +public +ast_t *parse_infix_expr(parse_ctx_t *ctx, const char *pos, int min_tightness) { + ast_t *lhs = optional(ctx, &pos, parse_term); + if (!lhs) return NULL; + + int64_t starting_line = get_line_number(ctx->file, pos); + int64_t starting_indent = get_indent(ctx, pos); + spaces(&pos); + for (ast_e op; (op = match_binary_operator(&pos)) != Unknown && op_tightness[op] >= min_tightness; spaces(&pos)) { + ast_t *key = NULL; + if (op == Min || op == Max) { + key = NewAST(ctx->file, pos, pos, Var, .name = "$"); + for (bool progress = true; progress;) { + ast_t *new_term; + progress = + (false || (new_term = parse_index_suffix(ctx, key)) + || (new_term = parse_method_call_suffix(ctx, key)) || (new_term = parse_field_suffix(ctx, key)) + || (new_term = parse_fncall_suffix(ctx, key)) || (new_term = parse_optional_suffix(ctx, key)) + || (new_term = parse_non_optional_suffix(ctx, key))); + if (progress) key = new_term; + } + if (key && key->tag == Var) key = NULL; + else if (key) pos = key->end; + } + + whitespace(&pos); + if (get_line_number(ctx->file, pos) != starting_line && get_indent(ctx, pos) < starting_indent) + parser_err(ctx, pos, eol(pos), "I expected this line to be at least as indented than the line above it"); + + ast_t *rhs = parse_infix_expr(ctx, pos, op_tightness[op] + 1); + if (!rhs) break; + pos = rhs->end; + + if (op == Min) { + return NewAST(ctx->file, lhs->start, rhs->end, Min, .lhs = lhs, .rhs = rhs, .key = key); + } else if (op == Max) { + return NewAST(ctx->file, lhs->start, rhs->end, Max, .lhs = lhs, .rhs = rhs, .key = key); + } else { + lhs = new (ast_t, .file = ctx->file, .start = lhs->start, .end = rhs->end, .tag = op, + .__data.Plus.lhs = lhs, .__data.Plus.rhs = rhs); + } + } + return lhs; +} diff --git a/src/parse/binops.h b/src/parse/binops.h new file mode 100644 index 00000000..95785619 --- /dev/null +++ b/src/parse/binops.h @@ -0,0 +1,8 @@ +// Logic for parsing numbers +#pragma once + +#include "../ast.h" +#include "context.h" + +ast_e match_binary_operator(const char **pos); +ast_t *parse_infix_expr(parse_ctx_t *ctx, const char *pos, int min_tightness); diff --git a/src/parse/numbers.c b/src/parse/numbers.c index a0db1273..39851f2a 100644 --- a/src/parse/numbers.c +++ b/src/parse/numbers.c @@ -1,3 +1,4 @@ +// Logic for parsing numbers #include <ctype.h> #include <gc.h> diff --git a/src/parse/parse.c b/src/parse/parse.c index d25f0817..77461525 100644 --- a/src/parse/parse.c +++ b/src/parse/parse.c @@ -7,6 +7,7 @@ #include "../ast.h" #include "../stdlib/print.h" #include "../stdlib/util.h" +#include "binops.h" #include "containers.h" #include "context.h" #include "errors.h" @@ -18,33 +19,6 @@ #include "types.h" #include "utils.h" -int op_tightness[] = { - [Power] = 9, - [Multiply] = 8, - [Divide] = 8, - [Mod] = 8, - [Mod1] = 8, - [Plus] = 7, - [Minus] = 7, - [Concat] = 6, - [LeftShift] = 5, - [RightShift] = 5, - [UnsignedLeftShift] = 5, - [UnsignedRightShift] = 5, - [Min] = 4, - [Max] = 4, - [Equals] = 3, - [NotEquals] = 3, - [LessThan] = 2, - [LessThanOrEquals] = 2, - [GreaterThan] = 2, - [GreaterThanOrEquals] = 2, - [Compare] = 2, - [And] = 1, - [Or] = 1, - [Xor] = 1, -}; - ast_t *parse_parens(parse_ctx_t *ctx, const char *pos) { const char *start = pos; spaces(&pos); @@ -416,47 +390,6 @@ ast_t *parse_bool(parse_ctx_t *ctx, const char *pos) { else return NULL; } -ast_t *parse_path(parse_ctx_t *ctx, const char *pos) { - // "(" ("~/" / "./" / "../" / "/") ... ")" - const char *start = pos; - - if (!match(&pos, "(")) return NULL; - - if (!(*pos == '~' || *pos == '.' || *pos == '/')) return NULL; - - const char *path_start = pos; - size_t len = 1; - int paren_depth = 1; - while (pos + len < ctx->file->text + ctx->file->len - 1) { - if (pos[len] == '\\') { - len += 2; - continue; - } else if (pos[len] == '(') { - paren_depth += 1; - } else if (pos[len] == ')') { - paren_depth -= 1; - if (paren_depth <= 0) break; - } else if (pos[len] == '\r' || pos[len] == '\n') { - parser_err(ctx, path_start, &pos[len - 1], "This path was not closed"); - } - len += 1; - } - pos += len + 1; - char *path = String(string_slice(path_start, .length = len)); - for (char *src = path, *dest = path;;) { - if (src[0] == '\\') { - *(dest++) = src[1]; - src += 2; - } else if (*src) { - *(dest++) = *(src++); - } else { - *(dest++) = '\0'; - break; - } - } - return NewAST(ctx->file, start, pos, Path, .path = path); -} - ast_t *parse_pass(parse_ctx_t *ctx, const char *pos) { const char *start = pos; return match_word(&pos, "pass") ? NewAST(ctx->file, start, pos, Pass) : NULL; @@ -645,98 +578,6 @@ ast_t *parse_fncall_suffix(parse_ctx_t *ctx, ast_t *fn) { return NewAST(ctx->file, start, pos, FunctionCall, .fn = fn, .args = args); } -ast_e match_binary_operator(const char **pos) { - switch (**pos) { - case '+': { - *pos += 1; - return match(pos, "+") ? Concat : Plus; - } - case '-': { - *pos += 1; - if ((*pos)[0] != ' ' && (*pos)[-2] == ' ') // looks like `fn -5` - return Unknown; - return Minus; - } - case '*': *pos += 1; return Multiply; - case '/': *pos += 1; return Divide; - case '^': *pos += 1; return Power; - case '<': { - *pos += 1; - if (match(pos, "=")) return LessThanOrEquals; // "<=" - else if (match(pos, ">")) return Compare; // "<>" - else if (match(pos, "<")) { - if (match(pos, "<")) return UnsignedLeftShift; // "<<<" - return LeftShift; // "<<" - } else return LessThan; - } - case '>': { - *pos += 1; - if (match(pos, "=")) return GreaterThanOrEquals; // ">=" - if (match(pos, ">")) { - if (match(pos, ">")) return UnsignedRightShift; // ">>>" - return RightShift; // ">>" - } - return GreaterThan; - } - default: { - if (match(pos, "!=")) return NotEquals; - else if (match(pos, "==") && **pos != '=') return Equals; - else if (match_word(pos, "and")) return And; - else if (match_word(pos, "or")) return Or; - else if (match_word(pos, "xor")) return Xor; - else if (match_word(pos, "mod1")) return Mod1; - else if (match_word(pos, "mod")) return Mod; - else if (match_word(pos, "_min_")) return Min; - else if (match_word(pos, "_max_")) return Max; - else return Unknown; - } - } -} - -static ast_t *parse_infix_expr(parse_ctx_t *ctx, const char *pos, int min_tightness) { - ast_t *lhs = optional(ctx, &pos, parse_term); - if (!lhs) return NULL; - - int64_t starting_line = get_line_number(ctx->file, pos); - int64_t starting_indent = get_indent(ctx, pos); - spaces(&pos); - for (ast_e op; (op = match_binary_operator(&pos)) != Unknown && op_tightness[op] >= min_tightness; spaces(&pos)) { - ast_t *key = NULL; - if (op == Min || op == Max) { - key = NewAST(ctx->file, pos, pos, Var, .name = "$"); - for (bool progress = true; progress;) { - ast_t *new_term; - progress = - (false || (new_term = parse_index_suffix(ctx, key)) - || (new_term = parse_method_call_suffix(ctx, key)) || (new_term = parse_field_suffix(ctx, key)) - || (new_term = parse_fncall_suffix(ctx, key)) || (new_term = parse_optional_suffix(ctx, key)) - || (new_term = parse_non_optional_suffix(ctx, key))); - if (progress) key = new_term; - } - if (key && key->tag == Var) key = NULL; - else if (key) pos = key->end; - } - - whitespace(&pos); - if (get_line_number(ctx->file, pos) != starting_line && get_indent(ctx, pos) < starting_indent) - parser_err(ctx, pos, eol(pos), "I expected this line to be at least as indented than the line above it"); - - ast_t *rhs = parse_infix_expr(ctx, pos, op_tightness[op] + 1); - if (!rhs) break; - pos = rhs->end; - - if (op == Min) { - return NewAST(ctx->file, lhs->start, rhs->end, Min, .lhs = lhs, .rhs = rhs, .key = key); - } else if (op == Max) { - return NewAST(ctx->file, lhs->start, rhs->end, Max, .lhs = lhs, .rhs = rhs, .key = key); - } else { - lhs = new (ast_t, .file = ctx->file, .start = lhs->start, .end = rhs->end, .tag = op, - .__data.Plus.lhs = lhs, .__data.Plus.rhs = rhs); - } - } - return lhs; -} - ast_t *parse_expr(parse_ctx_t *ctx, const char *pos) { return parse_infix_expr(ctx, pos, 0); } ast_t *parse_declaration(parse_ctx_t *ctx, const char *pos) { diff --git a/src/parse/parse.h b/src/parse/parse.h index b3a36f72..8af16e1b 100644 --- a/src/parse/parse.h +++ b/src/parse/parse.h @@ -8,7 +8,6 @@ ast_t *parse(const char *str); ast_t *parse_expression(const char *str); -ast_e match_binary_operator(const char **pos); ast_t *parse_comprehension_suffix(parse_ctx_t *ctx, ast_t *expr); ast_t *parse_field_suffix(parse_ctx_t *ctx, ast_t *lhs); ast_t *parse_fncall_suffix(parse_ctx_t *ctx, ast_t *fn); @@ -40,7 +39,6 @@ ast_t *parse_not(parse_ctx_t *ctx, const char *pos); ast_t *parse_none(parse_ctx_t *ctx, const char *pos); ast_t *parse_parens(parse_ctx_t *ctx, const char *pos); ast_t *parse_pass(parse_ctx_t *ctx, const char *pos); -ast_t *parse_path(parse_ctx_t *ctx, const char *pos); ast_t *parse_reduction(parse_ctx_t *ctx, const char *pos); ast_t *parse_repeat(parse_ctx_t *ctx, const char *pos); ast_t *parse_return(parse_ctx_t *ctx, const char *pos); diff --git a/src/parse/text.c b/src/parse/text.c index b9827644..5fe58da4 100644 --- a/src/parse/text.c +++ b/src/parse/text.c @@ -183,3 +183,45 @@ ast_t *parse_inline_c(parse_ctx_t *ctx, const char *pos) { return NewAST(ctx->file, start, pos, InlineCCode, .chunks = chunks, .type_ast = type); } + +public +ast_t *parse_path(parse_ctx_t *ctx, const char *pos) { + // "(" ("~/" / "./" / "../" / "/") ... ")" + const char *start = pos; + + if (!match(&pos, "(")) return NULL; + + if (!(*pos == '~' || *pos == '.' || *pos == '/')) return NULL; + + const char *path_start = pos; + size_t len = 1; + int paren_depth = 1; + while (pos + len < ctx->file->text + ctx->file->len - 1) { + if (pos[len] == '\\') { + len += 2; + continue; + } else if (pos[len] == '(') { + paren_depth += 1; + } else if (pos[len] == ')') { + paren_depth -= 1; + if (paren_depth <= 0) break; + } else if (pos[len] == '\r' || pos[len] == '\n') { + parser_err(ctx, path_start, &pos[len - 1], "This path was not closed"); + } + len += 1; + } + pos += len + 1; + char *path = String(string_slice(path_start, .length = len)); + for (char *src = path, *dest = path;;) { + if (src[0] == '\\') { + *(dest++) = src[1]; + src += 2; + } else if (*src) { + *(dest++) = *(src++); + } else { + *(dest++) = '\0'; + break; + } + } + return NewAST(ctx->file, start, pos, Path, .path = path); +} diff --git a/src/parse/text.h b/src/parse/text.h index cd07e0e1..6ab3cab2 100644 --- a/src/parse/text.h +++ b/src/parse/text.h @@ -6,3 +6,4 @@ ast_t *parse_text(parse_ctx_t *ctx, const char *pos); ast_t *parse_inline_c(parse_ctx_t *ctx, const char *pos); +ast_t *parse_path(parse_ctx_t *ctx, const char *pos); |
