aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2025-08-29 13:32:17 -0400
committerBruce Hill <bruce@bruce-hill.com>2025-08-29 13:32:17 -0400
commit43105107b9d1e985e9c182b904f2ac79b17fb460 (patch)
treec2dbb1c0312d410a387d8da26a2475d862b5ddcf
parentd8116c27f406ac4915aaa7cdae97fb73c9847c8a (diff)
Improvements to text and inline C code formatting/parsing
-rw-r--r--src/ast.c2
-rw-r--r--src/compile/expressions.c3
-rw-r--r--src/formatter/formatter.c152
-rw-r--r--src/parse/text.c20
-rw-r--r--src/stdlib/text.c43
-rw-r--r--src/stdlib/text.h1
6 files changed, 112 insertions, 109 deletions
diff --git a/src/ast.c b/src/ast.c
index 03843438..486e0262 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -291,7 +291,7 @@ Text_t ast_to_sexp(ast_t *ast) {
")");
T(When, "(When ", ast_to_sexp(data.subject), when_clauses_to_sexp(data.clauses),
optional_sexp("else", data.else_body), ")");
- T(Reduction, "(Reduction ", quoted_text(binop_method_name(data.op)), " ", ast_to_sexp(data.key), " ",
+ T(Reduction, "(Reduction ", quoted_text(binop_operator(data.op)), " ", ast_to_sexp(data.key), " ",
ast_to_sexp(data.iter), ")");
T(Skip, "(Skip ", quoted_text(data.target), ")");
T(Stop, "(Stop ", quoted_text(data.target), ")");
diff --git a/src/compile/expressions.c b/src/compile/expressions.c
index 58288c50..544b7723 100644
--- a/src/compile/expressions.c
+++ b/src/compile/expressions.c
@@ -237,7 +237,8 @@ Text_t compile(env_t *env, ast_t *ast) {
case Index: return compile_indexing(env, ast);
case InlineCCode: {
type_t *t = get_type(env, ast);
- if (t->tag == VoidType) return Texts("{\n", compile_statement(env, ast), "\n}");
+ if (Match(ast, InlineCCode)->type_ast != NULL) return Texts("({", compile_statement(env, ast), "; })");
+ else if (t->tag == VoidType) return Texts("{\n", compile_statement(env, ast), "\n}");
else return compile_statement(env, ast);
}
case Use: code_err(ast, "Compiling 'use' as expression!");
diff --git a/src/formatter/formatter.c b/src/formatter/formatter.c
index 49670e0d..2d88b486 100644
--- a/src/formatter/formatter.c
+++ b/src/formatter/formatter.c
@@ -10,7 +10,6 @@
#include "../parse/files.h"
#include "../parse/utils.h"
#include "../stdlib/datatypes.h"
-#include "../stdlib/integers.h"
#include "../stdlib/optionals.h"
#include "../stdlib/stdlib.h"
#include "../stdlib/text.h"
@@ -28,6 +27,66 @@ Text_t format_namespace(ast_t *namespace, Table_t comments, Text_t indent) {
return Texts("\n", indent, single_indent, fmt(namespace, comments, Texts(indent, single_indent)));
}
+typedef struct {
+ Text_t quote, unquote, interp;
+} text_opts_t;
+
+text_opts_t choose_text_options(ast_list_t *chunks) {
+ int double_quotes = 0, single_quotes = 0, backticks = 0;
+ for (ast_list_t *chunk = chunks; chunk; chunk = chunk->next) {
+ if (chunk->ast->tag == TextLiteral) {
+ Text_t literal = Match(chunk->ast, TextLiteral)->text;
+ if (Text$has(literal, Text("\""))) double_quotes += 1;
+ if (Text$has(literal, Text("'"))) single_quotes += 1;
+ if (Text$has(literal, Text("`"))) backticks += 1;
+ }
+ }
+ Text_t quote;
+ if (double_quotes == 0) quote = Text("\"");
+ else if (single_quotes == 0) quote = Text("'");
+ else if (backticks == 0) quote = Text("`");
+ else quote = Text("\"");
+
+ text_opts_t opts = {.quote = quote, .unquote = quote, .interp = Text("$")};
+ return opts;
+}
+
+static OptionalText_t format_inline_text(text_opts_t opts, ast_list_t *chunks, Table_t comments) {
+ Text_t code = opts.quote;
+ for (ast_list_t *chunk = chunks; chunk; chunk = chunk->next) {
+ if (chunk->ast->tag == TextLiteral) {
+ Text_t literal = Match(chunk->ast, TextLiteral)->text;
+ Text_t segment = Text$escaped(literal, false, Texts(opts.unquote, opts.interp));
+ code = Texts(code, segment);
+ } else {
+ code = Texts(code, opts.interp, "(", fmt_inline(chunk->ast, comments), ")");
+ }
+ }
+ return Texts(code, opts.unquote);
+}
+
+static Text_t format_text(text_opts_t opts, ast_list_t *chunks, Table_t comments, Text_t indent) {
+ Text_t code = EMPTY_TEXT;
+ Text_t current_line = EMPTY_TEXT;
+ for (ast_list_t *chunk = chunks; chunk; chunk = chunk->next) {
+ if (chunk->ast->tag == TextLiteral) {
+ Text_t literal = Match(chunk->ast, TextLiteral)->text;
+ List_t lines = Text$lines(literal);
+ if (lines.length == 0) continue;
+ current_line = Texts(current_line, Text$escaped(*(Text_t *)lines.data, false, opts.interp));
+ for (int64_t i = 1; i < lines.length; i += 1) {
+ add_line(&code, current_line, Texts(indent, single_indent));
+ current_line = Text$escaped(*(Text_t *)(lines.data + i * lines.stride), false, opts.interp);
+ }
+ } else {
+ current_line = Texts(current_line, opts.interp, "(", fmt(chunk->ast, comments, indent), ")");
+ }
+ }
+ add_line(&code, current_line, Texts(indent, single_indent));
+ code = Texts(opts.quote, "\n", indent, single_indent, code, "\n", indent, opts.unquote);
+ return code;
+}
+
OptionalText_t format_inline_code(ast_t *ast, Table_t comments) {
if (range_has_comment(ast->start, ast->end, comments)) return NONE_TEXT;
switch (ast->tag) {
@@ -211,37 +270,21 @@ OptionalText_t format_inline_code(ast_t *ast, Table_t comments) {
else return Texts(indexed, "[]");
}
/*inline*/ case TextJoin: {
- // TODO: choose quotation mark more smartly
- Text_t source = Text$from_strn(ast->start, (int64_t)(ast->end - ast->start));
- Text_t quote = Text$to(source, I(1));
+ text_opts_t opts = choose_text_options(Match(ast, TextJoin)->children);
+ Text_t ret = must(format_inline_text(opts, Match(ast, TextJoin)->children, comments));
const char *lang = Match(ast, TextJoin)->lang;
- Text_t code = lang ? Texts("$", Text$from_str(lang), quote) : quote;
- for (ast_list_t *chunk = Match(ast, TextJoin)->children; chunk; chunk = chunk->next) {
- if (chunk->ast->tag == TextLiteral) {
- Text_t literal = Match(chunk->ast, TextLiteral)->text;
- code = Texts(code, Text$slice(Text$quoted(literal, false, quote), I(2), I(-2)));
- } else {
- code = Texts(code, "$(", fmt_inline(chunk->ast, comments), ")");
- }
- }
- return Texts(code, quote);
+ return lang ? Texts("$", Text$from_str(lang), ret) : ret;
}
/*inline*/ case InlineCCode: {
DeclareMatch(c_code, ast, InlineCCode);
- Text_t code = c_code->type_ast ? Texts("C_code:", format_type(c_code->type_ast), "(") : Text("C_code{");
- for (ast_list_t *chunk = c_code->chunks; chunk; chunk = chunk->next) {
- if (chunk->ast->tag == TextLiteral) {
- Text_t literal = Match(chunk->ast, TextLiteral)->text;
- if (Text$has(literal, Text("\n"))) return NONE_TEXT;
- code = Texts(code, Text$slice(Text$quoted(literal, false, Text("`")), I(2), I(-2)));
- } else {
- code = Texts(code, "@(", fmt_inline(chunk->ast, comments), ")");
- }
- }
- return Texts(code, c_code->type_ast ? Text(")") : Text("}"));
+ Text_t code = c_code->type_ast ? Texts("C_code:", format_type(c_code->type_ast)) : Text("C_code");
+ text_opts_t opts = {.quote = Text("`"), .unquote = Text("`"), .interp = Text("@")};
+ return Texts(code, must(format_inline_text(opts, Match(ast, InlineCCode)->chunks, comments)));
}
/*inline*/ case TextLiteral: { fail("Something went wrong, we shouldn't be formatting text literals directly"); }
- /*inline*/ case Path: { return Texts("(", Text$from_str(Match(ast, Path)->path), ")"); }
+ /*inline*/ case Path: {
+ return Texts("(", Text$escaped(Text$from_str(Match(ast, Path)->path), false, Text("()")), ")");
+ }
/*inline*/ case Stop: {
const char *target = Match(ast, Stop)->target;
return target ? Texts("stop ", Text$from_str(target)) : Text("stop");
@@ -609,59 +652,18 @@ Text_t format_code(ast_t *ast, Table_t comments, Text_t indent) {
}
/*multiline*/ case TextJoin: {
if (inlined_fits) return inlined;
- // TODO: choose quotation mark more smartly
- Text_t source = Text$from_strn(ast->start, (int64_t)(ast->end - ast->start));
- Text_t quote = Text$to(source, I(1));
+
+ text_opts_t opts = choose_text_options(Match(ast, TextJoin)->children);
+ Text_t ret = format_text(opts, Match(ast, TextJoin)->children, comments, indent);
const char *lang = Match(ast, TextJoin)->lang;
- Text_t code = EMPTY_TEXT;
- Text_t current_line = EMPTY_TEXT;
- for (ast_list_t *chunk = Match(ast, TextJoin)->children; chunk; chunk = chunk->next) {
- if (chunk->ast->tag == TextLiteral) {
- Text_t literal = Match(chunk->ast, TextLiteral)->text;
- List_t lines = Text$lines(literal);
- if (lines.length == 0) continue;
- current_line = Texts(current_line, *(Text_t *)lines.data);
- for (int64_t i = 1; i < lines.length; i += 1) {
- add_line(&code, current_line, Texts(indent, single_indent));
- current_line = *(Text_t *)(lines.data + i * lines.stride);
- }
- } else {
- current_line = Texts(current_line, "$(", fmt_inline(chunk->ast, comments), ")");
- }
- }
- add_line(&code, current_line, Texts(indent, single_indent));
- code = Texts(quote, "\n", indent, single_indent, code, "\n", indent, quote);
- if (lang) code = Texts("$", Text$from_str(lang), code);
- return code;
+ return lang ? Texts("$", Text$from_str(lang), ret) : ret;
}
/*multiline*/ case InlineCCode: {
DeclareMatch(c_code, ast, InlineCCode);
- if (inlined_fits && c_code->type_ast) return inlined;
-
- Text_t code = EMPTY_TEXT;
- Text_t current_line = EMPTY_TEXT;
- for (ast_list_t *chunk = c_code->chunks; chunk; chunk = chunk->next) {
- if (chunk->ast->tag == TextLiteral) {
- Text_t literal = Match(chunk->ast, TextLiteral)->text;
- List_t lines = Text$lines(literal);
- if (lines.length == 0) continue;
- current_line = Texts(current_line, *(Text_t *)lines.data);
- for (int64_t i = 1; i < lines.length; i += 1) {
- add_line(&code, current_line, Texts(indent, single_indent));
- current_line = *(Text_t *)(lines.data + i * lines.stride);
- }
- } else {
- current_line = Texts(current_line, "@(", fmt_inline(chunk->ast, comments), ")");
- }
- }
- add_line(&code, current_line, Texts(indent, single_indent));
-
- if (c_code->type_ast) {
- return Texts("C_code:", format_type(c_code->type_ast), "(\n", indent, single_indent, code, "\n", indent,
- ")");
- } else {
- return Texts("C_code{\n", indent, single_indent, code, "\n", indent, "}");
- }
+ if (inlined_fits && c_code->type != NULL) return inlined;
+ Text_t code = c_code->type_ast ? Texts("C_code:", format_type(c_code->type_ast)) : Text("C_code");
+ text_opts_t opts = {.quote = Text("`"), .unquote = Text("`"), .interp = Text("@")};
+ return Texts(code, format_text(opts, Match(ast, InlineCCode)->chunks, comments, indent));
}
/*multiline*/ case TextLiteral: { fail("Something went wrong, we shouldn't be formatting text literals directly"); }
/*multiline*/ case Path: {
diff --git a/src/parse/text.c b/src/parse/text.c
index c554273f..6e7201bb 100644
--- a/src/parse/text.c
+++ b/src/parse/text.c
@@ -162,23 +162,19 @@ ast_t *parse_inline_c(parse_ctx_t *ctx, const char *pos) {
spaces(&pos);
type_ast_t *type = NULL;
- ast_list_t *chunks;
if (match(&pos, ":")) {
type = expect(ctx, start, &pos, parse_type, "I couldn't parse the type for this C_code code");
spaces(&pos);
- if (!match(&pos, "(")) parser_err(ctx, start, pos, "I expected a '(' here");
- chunks = new (ast_list_t, .ast = NewAST(ctx->file, pos, pos, TextLiteral, Text("({")),
- .next = _parse_text_helper(ctx, &pos, '(', ')', '@', false));
- if (type) {
- REVERSE_LIST(chunks);
- chunks = new (ast_list_t, .ast = NewAST(ctx->file, pos, pos, TextLiteral, Text("; })")), .next = chunks);
- REVERSE_LIST(chunks);
- }
- } else {
- if (!match(&pos, "{")) parser_err(ctx, start, pos, "I expected a '{' here");
- chunks = _parse_text_helper(ctx, &pos, '{', '}', '@', false);
}
+ static const char *quote_chars = "\"'`|/;([{<";
+ if (!strchr(quote_chars, *pos))
+ parser_err(ctx, pos, pos + 1,
+ "This is not a valid string quotation character. Valid characters are: \"'`|/;([{<");
+
+ char quote = *(pos++);
+ char unquote = closing[(int)quote] ? closing[(int)quote] : quote;
+ ast_list_t *chunks = _parse_text_helper(ctx, &pos, quote, unquote, '@', false);
return NewAST(ctx->file, start, pos, InlineCCode, .chunks = chunks, .type_ast = type);
}
diff --git a/src/stdlib/text.c b/src/stdlib/text.c
index ed4023a4..57465034 100644
--- a/src/stdlib/text.c
+++ b/src/stdlib/text.c
@@ -1390,17 +1390,8 @@ Text_t Text$title(Text_t text, Text_t language) {
}
public
-Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) {
- if (quotation_mark.length != 1) fail("Invalid quote text: ", quotation_mark, " (must have length == 1)");
-
+Text_t Text$escaped(Text_t text, bool colorize, Text_t extra_escapes) {
Text_t ret = colorize ? Text("\x1b[35m") : EMPTY_TEXT;
- if (!Text$equal_values(quotation_mark, Text("\"")) && !Text$equal_values(quotation_mark, Text("'"))
- && !Text$equal_values(quotation_mark, Text("`")))
- ret = concat2_assuming_safe(ret, Text("$"));
-
- ret = concat2_assuming_safe(ret, quotation_mark);
- int32_t quote_char = Text$get_grapheme(quotation_mark, 0);
-
#define flush_unquoted() \
({ \
if (unquoted_span > 0) { \
@@ -1454,15 +1445,18 @@ Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) {
break;
}
default: {
- if (g == quote_char) {
- flush_unquoted();
- if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[34;1m"));
- ret = concat2_assuming_safe(ret, Text("\\"));
- ret = concat2_assuming_safe(ret, quotation_mark);
- if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[0;35m"));
- } else {
- unquoted_span += 1;
+ TextIter_t esc_state = NEW_TEXT_ITER_STATE(extra_escapes);
+ for (int64_t j = 0; j < extra_escapes.length; j++) {
+ int32_t esc = Text$get_grapheme_fast(&esc_state, j);
+ if (g == esc) {
+ flush_unquoted();
+ if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[34;1m"));
+ ret = concat2_assuming_safe(ret, Text("\\"));
+ if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[0;35m"));
+ break;
+ }
}
+ unquoted_span += 1;
break;
}
}
@@ -1470,10 +1464,19 @@ Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) {
flush_unquoted();
#undef add_escaped
#undef flush_unquoted
-
- ret = concat2_assuming_safe(ret, quotation_mark);
if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[m"));
+ return ret;
+}
+
+public
+Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) {
+ if (quotation_mark.length != 1) fail("Invalid quote text: ", quotation_mark, " (must have length == 1)");
+ Text_t ret = Text$escaped(text, colorize, quotation_mark);
+ if (!(Text$equal_values(quotation_mark, Text("\"")) || Text$equal_values(quotation_mark, Text("'"))
+ || Text$equal_values(quotation_mark, Text("`"))))
+ ret = Texts("$", quotation_mark, ret, quotation_mark);
+ else ret = Texts(quotation_mark, ret, quotation_mark);
return ret;
}
diff --git a/src/stdlib/text.h b/src/stdlib/text.h
index 5fa95675..d118cffd 100644
--- a/src/stdlib/text.h
+++ b/src/stdlib/text.h
@@ -54,6 +54,7 @@ Text_t Text$upper(Text_t text, Text_t language);
Text_t Text$lower(Text_t text, Text_t language);
Text_t Text$title(Text_t text, Text_t language);
Text_t Text$as_text(const void *text, bool colorize, const TypeInfo_t *info);
+Text_t Text$escaped(Text_t text, bool colorize, Text_t extra_escapes);
Text_t Text$quoted(Text_t str, bool colorize, Text_t quotation_mark);
PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix, Text_t *remainder);
PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix, Text_t *remainder);