aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2024-02-10 15:23:06 -0500
committerBruce Hill <bruce@bruce-hill.com>2024-02-10 15:23:06 -0500
commit4e545c67985299dabc2a061160e126068d43541e (patch)
treef3b57ff6abd221894f01a4b6b85c068197b83254
parent930c09f46d1e249fc889e8f1179046a48c1eaf32 (diff)
Better string parsing.
-rw-r--r--ast.c3
-rw-r--r--ast.h6
-rw-r--r--compile.c3
-rw-r--r--nextlang.h3
-rw-r--r--parse.c217
5 files changed, 89 insertions, 143 deletions
diff --git a/ast.c b/ast.c
index bb151aa6..2623a46b 100644
--- a/ast.c
+++ b/ast.c
@@ -92,8 +92,7 @@ CORD ast_to_cord(ast_t *ast)
T(Char, "(\x1b[35m'%c'\x1b[m)", data.c)
T(StringLiteral, "\x1b[35m\"%s\"\x1b[m", data.str)
T(StringJoin, "(%r)", ast_list_to_cord(data.children))
- T(Interp, "(%r)", ast_to_cord(data.value))
- T(Declare, "(var=%s, value=%s)", ast_to_cord(data.var), ast_to_cord(data.value))
+ T(Declare, "(var=%s, value=%r)", ast_to_cord(data.var), ast_to_cord(data.value))
T(Assign, "(targets=%r, values=%r)", ast_list_to_cord(data.targets), ast_list_to_cord(data.values))
T(BinaryOp, "(%r, %s, %r)", ast_to_cord(data.lhs), OP_NAMES[data.op], ast_to_cord(data.rhs))
T(UpdateAssign, "(%r, %s, %r)", ast_to_cord(data.lhs), OP_NAMES[data.op], ast_to_cord(data.rhs))
diff --git a/ast.h b/ast.h
index d431a988..157d7af9 100644
--- a/ast.h
+++ b/ast.h
@@ -92,7 +92,7 @@ typedef enum {
Unknown = 0,
Nil, Bool, Var,
Int, Num, Char,
- StringLiteral, StringJoin, Interp,
+ StringLiteral, StringJoin,
Declare, Assign,
BinaryOp, UnaryOp, UpdateAssign,
Min, Max,
@@ -146,10 +146,6 @@ struct ast_s {
ast_list_t *children;
} StringJoin;
struct {
- ast_t *value;
- bool labelled:1, colorize:1, quote_string:1;
- } Interp;
- struct {
ast_t *var;
ast_t *value;
} Declare;
diff --git a/compile.c b/compile.c
index 7193b5ae..97a7d21c 100644
--- a/compile.c
+++ b/compile.c
@@ -130,9 +130,6 @@ CORD compile(ast_t *ast)
}
return code;
}
- case Interp: {
- return CORD_asprintf("__cord(%r)", compile(Match(ast, Interp)->value));
- }
case Block: {
ast_list_t *stmts = Match(ast, Block)->statements;
if (stmts && !stmts->next)
diff --git a/nextlang.h b/nextlang.h
index 2cbcac0b..2e55b1cf 100644
--- a/nextlang.h
+++ b/nextlang.h
@@ -35,6 +35,7 @@
int32_t: CORD_asprintf("%d", x), int64_t: CORD_asprintf("%ld", x), \
double: CORD_asprintf("%g", x), float: CORD_asprintf("%g", x), \
CORD: x, \
+ char*: x, \
default: "???")
#define __heap(x) (__typeof(x)*)memcpy(GC_MALLOC(sizeof(x)), (__typeof(x)[1]){x}, sizeof(x))
#define __stack(x) (&(__typeof(x)){x})
@@ -56,3 +57,5 @@
#define mod1(x, n) (((x) % (n)) + (__typeof(x))1)
#define say(str) puts(CORD_to_const_char_star(str))
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/parse.c b/parse.c
index 2e3fa51c..75a0d1e2 100644
--- a/parse.c
+++ b/parse.c
@@ -14,6 +14,7 @@
#include "ast.h"
#include "util.h"
+static const char closing[128] = {['(']=')', ['[']=']', ['<']='>', ['{']='}', ['|']='|', ['/']='/'};
typedef struct {
sss_file_t *file;
@@ -80,6 +81,7 @@ static PARSER(parse_opt_indented_block);
static PARSER(parse_var);
static PARSER(parse_enum_def);
static PARSER(parse_struct_def);
+static PARSER(parse_string);
static PARSER(parse_func_def);
static PARSER(parse_extern);
static PARSER(parse_declaration);
@@ -166,7 +168,10 @@ size_t some_not(const char **pos, const char *forbid) {
return len;
}
-size_t spaces(const char **pos) { return some_of(pos, " \t"); }
+size_t spaces(const char **pos) {
+ return some_of(pos, " \t");
+}
+
size_t whitespace(const char **pos) {
const char *p0 = *pos;
while (some_of(pos, " \t\r\n") || comment(pos))
@@ -222,7 +227,7 @@ static void expect_closing(
const char *eol = strchr(*pos, '\n');
const char *next = strstr(*pos, closing);
-
+
const char *end = eol < next ? eol : next;
if (isatty(STDERR_FILENO) && !getenv("NO_COLOR"))
@@ -868,154 +873,100 @@ PARSER(parse_char) {
}
}
-PARSER(parse_interpolation) {
- const char *start = pos;
- ++pos; // ignore the initial character, typically a '$', but might be other stuff like '@' in different contexts
- bool labelled = match(&pos, ":");
- ast_t *value = optional(ctx, &pos, parse_parens);
- if (!value) value = optional(ctx, &pos, parse_term);
- if (!value) {
- match_group(&pos, '(');
- parser_err(ctx, start, pos, "This interpolation didn't parse");
- }
- return NewAST(ctx->file, start, pos, Interp, .value=value, .labelled=labelled);
-}
-
PARSER(parse_string) {
- static const char closing[128] = {['(']=')', ['[']=']', ['<']='>', ['{']='}'};
- static const bool escapes[128] = {['\'']='\x1B', ['(']='\x1B', ['>']='\x1B', ['/']='\x1B'};
- static const char interps[128] = {['>']='@', ['/']='@', ['\'']='\x1A', ['(']='\x1A'};
-
- const char *string_start = pos;
- char open, close;
- if (match(&pos, "$")) {
- open = *pos;
- close = closing[(int)open] ? closing[(int)open] : open;
- ++pos;
+ // ["$" [interp-char [closing-interp-char]]] ('"' ... '"' / "'" ... "'")
+ const char *start = pos;
+ char close_quote, start_interp = '\x03', close_interp = '\x02';
+ if (match(&pos, "\"")) {
+ close_quote = '"', start_interp = '{', close_interp = '}';
+ } else if (match(&pos, "'")) {
+ close_quote = '\'';
+ } else if (match(&pos, "$")) {
+ if (*pos == '"' || *pos == '\'' || *pos == '/' || *pos == ';') {
+ close_quote = *pos;
+ } else {
+ start_interp = *(pos++);
+ close_interp = closing[(int)start_interp];
+ if (*pos == close_interp) ++pos;
+ close_quote = closing[(int)*pos] ? closing[(int)*pos] : *pos;
+ ++pos;
+ }
} else {
- if (*pos != '\'' && *pos != '"')
- return NULL;
- open = *pos;
- close = *pos;
- ++pos;
+ return NULL;
}
- char interp_char = interps[(int)open] ? interps[(int)open] : '$';
- char escape_char = escapes[(int)open] ? escapes[(int)open] : '\\';
+ // printf("Parsing string: '%c' .. '%c' interp: '%c%c'\n", *start, close_quote, start_interp, close_interp);
- if (open == ':' || open == '>')
- spaces(&pos);
+ int64_t starting_indent = sss_get_indent(ctx->file, pos);
+ int64_t string_indent;
+ if (*pos == '\r' || *pos == '\n') {
+ const char *first_line = pos;
+ whitespace(&first_line);
+ string_indent = sss_get_indent(ctx->file, first_line);
+ if (string_indent <= starting_indent)
+ parser_err(ctx, start, first_line, "Multi-line strings must be indented on their first line");
+ } else {
+ string_indent = starting_indent + 4;
+ }
ast_list_t *chunks = NULL;
- if (*pos == '\r' || *pos == '\n') { // Multiline string
- char special[] = {'\n','\r',interp_char,escape_char,'\0'};
- int64_t starting_indent = sss_get_indent(ctx->file, pos);
- // indentation-delimited string
- match(&pos, "\r");
- match(&pos, "\n");
- int64_t first_line = sss_get_line_number(ctx->file, pos);
- int64_t indented = sss_get_indent(ctx->file, pos);
- pos = sss_get_line(ctx->file, first_line);
- while (pos < ctx->file->text + ctx->file->len) {
- const char *eol = strchrnul(pos, '\n');
- if (eol == pos + strspn(pos, " \t\r")) { // Empty line
- ast_t *ast = NewAST(ctx->file, pos, eol, StringLiteral, .str="\n");
- chunks = new(ast_list_t, .ast=ast, .next=chunks);
- pos = eol + 1;
- continue;
+ CORD chunk = NULL;
+ const char *chunk_start = pos;
+ for (; pos < ctx->file->text + ctx->file->len && *pos != close_quote; ++pos) {
+ if (*pos == start_interp) {
+ if (chunk) {
+ ast_t *literal = NewAST(ctx->file, chunk_start, pos, StringLiteral, .str=CORD_to_const_char_star(chunk));
+ chunks = new(ast_list_t, .ast=literal, .next=chunks);
+ chunk = NULL;
}
- if (!match_indentation(&pos, starting_indent))
- parser_err(ctx, pos, strchrnul(pos, '\n'), "This isn't a valid indentation level for this unterminated string");
-
- if (*pos == close) {
- ++pos;
- goto finished;
+ ++pos;
+ spaces(&pos);
+ for (ast_t *interp; (interp=optional(ctx, &pos, parse_expr)); spaces(&pos)) {
+ interp = WrapAST(interp, FunctionCall, .fn=WrapAST(interp, Var, .name="__cord"), .args=new(ast_list_t, .ast=interp));
+ chunks = new(ast_list_t, .ast=interp, .next=chunks);
+ chunk_start = pos;
}
-
- if (!match_indentation(&pos, (indented - starting_indent)))
- parser_err(ctx, pos, strchrnul(pos, '\n'), "I was expecting this to have %lu extra indentation beyond %lu",
- (indented - starting_indent), starting_indent);
-
- while (pos < eol+1) {
- size_t len = strcspn(pos, special);
- if (pos[len] == '\r') ++len;
- if (pos[len] == '\n') ++len;
-
- if (len > 0) {
- ast_t *chunk = NewAST(ctx->file, pos, pos+len-1, StringLiteral, .str=heap_strn(pos, len));
- chunks = new(ast_list_t, .ast=chunk, .next=chunks);
- }
-
- pos += len;
-
- if (*pos == escape_char) {
- const char *start = pos;
- const char* unescaped = unescape(&pos);
- ast_t *chunk = NewAST(ctx->file, start, pos, StringLiteral, .str=unescaped);
- chunks = new(ast_list_t, .ast=chunk, .next=chunks);
- ++pos;
- } else if (*pos == interp_char) {
- ast_t *chunk = parse_interpolation(ctx, pos);
- chunks = new(ast_list_t, .ast=chunk, .next=chunks);
- pos = chunk->end;
+ if (close_interp) {
+ const char *closing = pos;
+ spaces(&closing);
+ if (*closing == close_interp) {
+ pos = closing;
}
}
- }
- finished:;
- // Strip trailing newline:
- if (chunks) {
- ast_t *last_chunk = chunks->ast;
- if (last_chunk->tag == StringLiteral) {
- auto str = Match(last_chunk, StringLiteral);
- const char* trimmed = heap_strn(str->str, strlen(str->str)-1);
- chunks->ast = NewAST(ctx->file, last_chunk->start, last_chunk->end-1, StringLiteral, .str=trimmed);
- }
- }
- } else { // Inline string
- char special[] = {'\n','\r',open,close,interp_char,escape_char,'\0'};
- int depth = 1;
- while (depth > 0 && *pos) {
- size_t len = strcspn(pos, special);
- if (len > 0) {
- ast_t *chunk = NewAST(ctx->file, pos, pos+len-1, StringLiteral, .str=heap_strn(pos, len));
- chunks = new(ast_list_t, .ast=chunk, .next=chunks);
- pos += len;
+ } else if (*pos == '\r' || *pos == '\n') {
+ // Newline handling
+ match(&pos, "\r");
+ match(&pos, "\n");
+ if (match_indentation(&pos, string_indent)) {
+ if (chunk || chunks)
+ chunk = CORD_cat_char(chunk, '\n');
+ --pos;
+ continue;
}
-
- if (*pos == interp_char) {
- ast_t *chunk = parse_interpolation(ctx, pos);
- chunks = new(ast_list_t, .ast=chunk, .next=chunks);
- pos = chunk->end;
- } else if (*pos == escape_char) {
- const char *start = pos;
- const char* unescaped = unescape(&pos);
- ast_t *chunk = NewAST(ctx->file, start, pos, StringLiteral, .str=unescaped);
- chunks = new(ast_list_t, .ast=chunk, .next=chunks);
- } else if (*pos == '\r' || *pos == '\n') {
- if (open == ' ' || open == ':' || open == '>') goto string_finished;
- parser_err(ctx, string_start, pos, "This line ended without closing the string");
- } else if (*pos == close) { // if open == close, then don't do nesting (i.e. check 'close' first)
- --depth;
- if (depth > 0) {
- ast_t *chunk = NewAST(ctx->file, pos, pos+1, StringLiteral, .str=heap_strn(pos, 1));
- chunks = new(ast_list_t, .ast=chunk, .next=chunks);
+ if (sss_get_indent(ctx->file, pos) == starting_indent) {
+ if (*pos == close_quote) {
+ break;
+ } else if (some_of(&pos, ".") >= 2) {
+ // Multi-line split
+ --pos;
+ continue;
}
- ++pos;
- } else if (*pos == open) {
- ++depth;
- ast_t *chunk = NewAST(ctx->file, pos, pos+1, StringLiteral, .str=heap_strn(pos, 1));
- chunks = new(ast_list_t, .ast=chunk, .next=chunks);
- ++pos;
- } else {
- ast_t *chunk = NewAST(ctx->file, pos, pos+1, StringLiteral, .str=heap_strn(pos, 1));
- ++pos;
- chunks = new(ast_list_t, .ast=chunk, .next=chunks);
}
+ parser_err(ctx, pos, strchrnul(pos, '\n'), "This string line isn't correctly indented");
+ } else {
+ chunk = CORD_cat_char(chunk, *pos);
}
}
- string_finished:;
+
+ if (chunk) {
+ ast_t *literal = NewAST(ctx->file, chunk_start, pos, StringLiteral, .str=CORD_to_const_char_star(chunk));
+ chunks = new(ast_list_t, .ast=literal, .next=chunks);
+ chunk = NULL;
+ }
+
REVERSE_LIST(chunks);
- return NewAST(ctx->file, string_start, pos, StringJoin, .children=chunks);
+ expect_closing(ctx, &pos, (char[]){close_quote, 0}, "I was expecting a '%c' to finish this string", close_quote);
+ return NewAST(ctx->file, start, pos, StringJoin, .children=chunks);
}
PARSER(parse_skip) {