diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2025-08-29 13:32:17 -0400 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2025-08-29 13:32:17 -0400 |
| commit | 43105107b9d1e985e9c182b904f2ac79b17fb460 (patch) | |
| tree | c2dbb1c0312d410a387d8da26a2475d862b5ddcf /src | |
| parent | d8116c27f406ac4915aaa7cdae97fb73c9847c8a (diff) | |
Improvements to text and inline C code formatting/parsing
Diffstat (limited to 'src')
| -rw-r--r-- | src/ast.c | 2 | ||||
| -rw-r--r-- | src/compile/expressions.c | 3 | ||||
| -rw-r--r-- | src/formatter/formatter.c | 152 | ||||
| -rw-r--r-- | src/parse/text.c | 20 | ||||
| -rw-r--r-- | src/stdlib/text.c | 43 | ||||
| -rw-r--r-- | src/stdlib/text.h | 1 |
6 files changed, 112 insertions, 109 deletions
@@ -291,7 +291,7 @@ Text_t ast_to_sexp(ast_t *ast) { ")"); T(When, "(When ", ast_to_sexp(data.subject), when_clauses_to_sexp(data.clauses), optional_sexp("else", data.else_body), ")"); - T(Reduction, "(Reduction ", quoted_text(binop_method_name(data.op)), " ", ast_to_sexp(data.key), " ", + T(Reduction, "(Reduction ", quoted_text(binop_operator(data.op)), " ", ast_to_sexp(data.key), " ", ast_to_sexp(data.iter), ")"); T(Skip, "(Skip ", quoted_text(data.target), ")"); T(Stop, "(Stop ", quoted_text(data.target), ")"); diff --git a/src/compile/expressions.c b/src/compile/expressions.c index 58288c50..544b7723 100644 --- a/src/compile/expressions.c +++ b/src/compile/expressions.c @@ -237,7 +237,8 @@ Text_t compile(env_t *env, ast_t *ast) { case Index: return compile_indexing(env, ast); case InlineCCode: { type_t *t = get_type(env, ast); - if (t->tag == VoidType) return Texts("{\n", compile_statement(env, ast), "\n}"); + if (Match(ast, InlineCCode)->type_ast != NULL) return Texts("({", compile_statement(env, ast), "; })"); + else if (t->tag == VoidType) return Texts("{\n", compile_statement(env, ast), "\n}"); else return compile_statement(env, ast); } case Use: code_err(ast, "Compiling 'use' as expression!"); diff --git a/src/formatter/formatter.c b/src/formatter/formatter.c index 49670e0d..2d88b486 100644 --- a/src/formatter/formatter.c +++ b/src/formatter/formatter.c @@ -10,7 +10,6 @@ #include "../parse/files.h" #include "../parse/utils.h" #include "../stdlib/datatypes.h" -#include "../stdlib/integers.h" #include "../stdlib/optionals.h" #include "../stdlib/stdlib.h" #include "../stdlib/text.h" @@ -28,6 +27,66 @@ Text_t format_namespace(ast_t *namespace, Table_t comments, Text_t indent) { return Texts("\n", indent, single_indent, fmt(namespace, comments, Texts(indent, single_indent))); } +typedef struct { + Text_t quote, unquote, interp; +} text_opts_t; + +text_opts_t choose_text_options(ast_list_t *chunks) { + int double_quotes = 0, single_quotes = 0, backticks = 0; + for (ast_list_t *chunk = chunks; chunk; chunk = chunk->next) { + if (chunk->ast->tag == TextLiteral) { + Text_t literal = Match(chunk->ast, TextLiteral)->text; + if (Text$has(literal, Text("\""))) double_quotes += 1; + if (Text$has(literal, Text("'"))) single_quotes += 1; + if (Text$has(literal, Text("`"))) backticks += 1; + } + } + Text_t quote; + if (double_quotes == 0) quote = Text("\""); + else if (single_quotes == 0) quote = Text("'"); + else if (backticks == 0) quote = Text("`"); + else quote = Text("\""); + + text_opts_t opts = {.quote = quote, .unquote = quote, .interp = Text("$")}; + return opts; +} + +static OptionalText_t format_inline_text(text_opts_t opts, ast_list_t *chunks, Table_t comments) { + Text_t code = opts.quote; + for (ast_list_t *chunk = chunks; chunk; chunk = chunk->next) { + if (chunk->ast->tag == TextLiteral) { + Text_t literal = Match(chunk->ast, TextLiteral)->text; + Text_t segment = Text$escaped(literal, false, Texts(opts.unquote, opts.interp)); + code = Texts(code, segment); + } else { + code = Texts(code, opts.interp, "(", fmt_inline(chunk->ast, comments), ")"); + } + } + return Texts(code, opts.unquote); +} + +static Text_t format_text(text_opts_t opts, ast_list_t *chunks, Table_t comments, Text_t indent) { + Text_t code = EMPTY_TEXT; + Text_t current_line = EMPTY_TEXT; + for (ast_list_t *chunk = chunks; chunk; chunk = chunk->next) { + if (chunk->ast->tag == TextLiteral) { + Text_t literal = Match(chunk->ast, TextLiteral)->text; + List_t lines = Text$lines(literal); + if (lines.length == 0) continue; + current_line = Texts(current_line, Text$escaped(*(Text_t *)lines.data, false, opts.interp)); + for (int64_t i = 1; i < lines.length; i += 1) { + add_line(&code, current_line, Texts(indent, single_indent)); + current_line = Text$escaped(*(Text_t *)(lines.data + i * lines.stride), false, opts.interp); + } + } else { + current_line = Texts(current_line, opts.interp, "(", fmt(chunk->ast, comments, indent), ")"); + } + } + add_line(&code, current_line, Texts(indent, single_indent)); + code = Texts(opts.quote, "\n", indent, single_indent, code, "\n", indent, opts.unquote); + return code; +} + OptionalText_t format_inline_code(ast_t *ast, Table_t comments) { if (range_has_comment(ast->start, ast->end, comments)) return NONE_TEXT; switch (ast->tag) { @@ -211,37 +270,21 @@ OptionalText_t format_inline_code(ast_t *ast, Table_t comments) { else return Texts(indexed, "[]"); } /*inline*/ case TextJoin: { - // TODO: choose quotation mark more smartly - Text_t source = Text$from_strn(ast->start, (int64_t)(ast->end - ast->start)); - Text_t quote = Text$to(source, I(1)); + text_opts_t opts = choose_text_options(Match(ast, TextJoin)->children); + Text_t ret = must(format_inline_text(opts, Match(ast, TextJoin)->children, comments)); const char *lang = Match(ast, TextJoin)->lang; - Text_t code = lang ? Texts("$", Text$from_str(lang), quote) : quote; - for (ast_list_t *chunk = Match(ast, TextJoin)->children; chunk; chunk = chunk->next) { - if (chunk->ast->tag == TextLiteral) { - Text_t literal = Match(chunk->ast, TextLiteral)->text; - code = Texts(code, Text$slice(Text$quoted(literal, false, quote), I(2), I(-2))); - } else { - code = Texts(code, "$(", fmt_inline(chunk->ast, comments), ")"); - } - } - return Texts(code, quote); + return lang ? Texts("$", Text$from_str(lang), ret) : ret; } /*inline*/ case InlineCCode: { DeclareMatch(c_code, ast, InlineCCode); - Text_t code = c_code->type_ast ? Texts("C_code:", format_type(c_code->type_ast), "(") : Text("C_code{"); - for (ast_list_t *chunk = c_code->chunks; chunk; chunk = chunk->next) { - if (chunk->ast->tag == TextLiteral) { - Text_t literal = Match(chunk->ast, TextLiteral)->text; - if (Text$has(literal, Text("\n"))) return NONE_TEXT; - code = Texts(code, Text$slice(Text$quoted(literal, false, Text("`")), I(2), I(-2))); - } else { - code = Texts(code, "@(", fmt_inline(chunk->ast, comments), ")"); - } - } - return Texts(code, c_code->type_ast ? Text(")") : Text("}")); + Text_t code = c_code->type_ast ? Texts("C_code:", format_type(c_code->type_ast)) : Text("C_code"); + text_opts_t opts = {.quote = Text("`"), .unquote = Text("`"), .interp = Text("@")}; + return Texts(code, must(format_inline_text(opts, Match(ast, InlineCCode)->chunks, comments))); } /*inline*/ case TextLiteral: { fail("Something went wrong, we shouldn't be formatting text literals directly"); } - /*inline*/ case Path: { return Texts("(", Text$from_str(Match(ast, Path)->path), ")"); } + /*inline*/ case Path: { + return Texts("(", Text$escaped(Text$from_str(Match(ast, Path)->path), false, Text("()")), ")"); + } /*inline*/ case Stop: { const char *target = Match(ast, Stop)->target; return target ? Texts("stop ", Text$from_str(target)) : Text("stop"); @@ -609,59 +652,18 @@ Text_t format_code(ast_t *ast, Table_t comments, Text_t indent) { } /*multiline*/ case TextJoin: { if (inlined_fits) return inlined; - // TODO: choose quotation mark more smartly - Text_t source = Text$from_strn(ast->start, (int64_t)(ast->end - ast->start)); - Text_t quote = Text$to(source, I(1)); + + text_opts_t opts = choose_text_options(Match(ast, TextJoin)->children); + Text_t ret = format_text(opts, Match(ast, TextJoin)->children, comments, indent); const char *lang = Match(ast, TextJoin)->lang; - Text_t code = EMPTY_TEXT; - Text_t current_line = EMPTY_TEXT; - for (ast_list_t *chunk = Match(ast, TextJoin)->children; chunk; chunk = chunk->next) { - if (chunk->ast->tag == TextLiteral) { - Text_t literal = Match(chunk->ast, TextLiteral)->text; - List_t lines = Text$lines(literal); - if (lines.length == 0) continue; - current_line = Texts(current_line, *(Text_t *)lines.data); - for (int64_t i = 1; i < lines.length; i += 1) { - add_line(&code, current_line, Texts(indent, single_indent)); - current_line = *(Text_t *)(lines.data + i * lines.stride); - } - } else { - current_line = Texts(current_line, "$(", fmt_inline(chunk->ast, comments), ")"); - } - } - add_line(&code, current_line, Texts(indent, single_indent)); - code = Texts(quote, "\n", indent, single_indent, code, "\n", indent, quote); - if (lang) code = Texts("$", Text$from_str(lang), code); - return code; + return lang ? Texts("$", Text$from_str(lang), ret) : ret; } /*multiline*/ case InlineCCode: { DeclareMatch(c_code, ast, InlineCCode); - if (inlined_fits && c_code->type_ast) return inlined; - - Text_t code = EMPTY_TEXT; - Text_t current_line = EMPTY_TEXT; - for (ast_list_t *chunk = c_code->chunks; chunk; chunk = chunk->next) { - if (chunk->ast->tag == TextLiteral) { - Text_t literal = Match(chunk->ast, TextLiteral)->text; - List_t lines = Text$lines(literal); - if (lines.length == 0) continue; - current_line = Texts(current_line, *(Text_t *)lines.data); - for (int64_t i = 1; i < lines.length; i += 1) { - add_line(&code, current_line, Texts(indent, single_indent)); - current_line = *(Text_t *)(lines.data + i * lines.stride); - } - } else { - current_line = Texts(current_line, "@(", fmt_inline(chunk->ast, comments), ")"); - } - } - add_line(&code, current_line, Texts(indent, single_indent)); - - if (c_code->type_ast) { - return Texts("C_code:", format_type(c_code->type_ast), "(\n", indent, single_indent, code, "\n", indent, - ")"); - } else { - return Texts("C_code{\n", indent, single_indent, code, "\n", indent, "}"); - } + if (inlined_fits && c_code->type != NULL) return inlined; + Text_t code = c_code->type_ast ? Texts("C_code:", format_type(c_code->type_ast)) : Text("C_code"); + text_opts_t opts = {.quote = Text("`"), .unquote = Text("`"), .interp = Text("@")}; + return Texts(code, format_text(opts, Match(ast, InlineCCode)->chunks, comments, indent)); } /*multiline*/ case TextLiteral: { fail("Something went wrong, we shouldn't be formatting text literals directly"); } /*multiline*/ case Path: { diff --git a/src/parse/text.c b/src/parse/text.c index c554273f..6e7201bb 100644 --- a/src/parse/text.c +++ b/src/parse/text.c @@ -162,23 +162,19 @@ ast_t *parse_inline_c(parse_ctx_t *ctx, const char *pos) { spaces(&pos); type_ast_t *type = NULL; - ast_list_t *chunks; if (match(&pos, ":")) { type = expect(ctx, start, &pos, parse_type, "I couldn't parse the type for this C_code code"); spaces(&pos); - if (!match(&pos, "(")) parser_err(ctx, start, pos, "I expected a '(' here"); - chunks = new (ast_list_t, .ast = NewAST(ctx->file, pos, pos, TextLiteral, Text("({")), - .next = _parse_text_helper(ctx, &pos, '(', ')', '@', false)); - if (type) { - REVERSE_LIST(chunks); - chunks = new (ast_list_t, .ast = NewAST(ctx->file, pos, pos, TextLiteral, Text("; })")), .next = chunks); - REVERSE_LIST(chunks); - } - } else { - if (!match(&pos, "{")) parser_err(ctx, start, pos, "I expected a '{' here"); - chunks = _parse_text_helper(ctx, &pos, '{', '}', '@', false); } + static const char *quote_chars = "\"'`|/;([{<"; + if (!strchr(quote_chars, *pos)) + parser_err(ctx, pos, pos + 1, + "This is not a valid string quotation character. Valid characters are: \"'`|/;([{<"); + + char quote = *(pos++); + char unquote = closing[(int)quote] ? closing[(int)quote] : quote; + ast_list_t *chunks = _parse_text_helper(ctx, &pos, quote, unquote, '@', false); return NewAST(ctx->file, start, pos, InlineCCode, .chunks = chunks, .type_ast = type); } diff --git a/src/stdlib/text.c b/src/stdlib/text.c index ed4023a4..57465034 100644 --- a/src/stdlib/text.c +++ b/src/stdlib/text.c @@ -1390,17 +1390,8 @@ Text_t Text$title(Text_t text, Text_t language) { } public -Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) { - if (quotation_mark.length != 1) fail("Invalid quote text: ", quotation_mark, " (must have length == 1)"); - +Text_t Text$escaped(Text_t text, bool colorize, Text_t extra_escapes) { Text_t ret = colorize ? Text("\x1b[35m") : EMPTY_TEXT; - if (!Text$equal_values(quotation_mark, Text("\"")) && !Text$equal_values(quotation_mark, Text("'")) - && !Text$equal_values(quotation_mark, Text("`"))) - ret = concat2_assuming_safe(ret, Text("$")); - - ret = concat2_assuming_safe(ret, quotation_mark); - int32_t quote_char = Text$get_grapheme(quotation_mark, 0); - #define flush_unquoted() \ ({ \ if (unquoted_span > 0) { \ @@ -1454,15 +1445,18 @@ Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) { break; } default: { - if (g == quote_char) { - flush_unquoted(); - if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[34;1m")); - ret = concat2_assuming_safe(ret, Text("\\")); - ret = concat2_assuming_safe(ret, quotation_mark); - if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[0;35m")); - } else { - unquoted_span += 1; + TextIter_t esc_state = NEW_TEXT_ITER_STATE(extra_escapes); + for (int64_t j = 0; j < extra_escapes.length; j++) { + int32_t esc = Text$get_grapheme_fast(&esc_state, j); + if (g == esc) { + flush_unquoted(); + if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[34;1m")); + ret = concat2_assuming_safe(ret, Text("\\")); + if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[0;35m")); + break; + } } + unquoted_span += 1; break; } } @@ -1470,10 +1464,19 @@ Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) { flush_unquoted(); #undef add_escaped #undef flush_unquoted - - ret = concat2_assuming_safe(ret, quotation_mark); if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[m")); + return ret; +} + +public +Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) { + if (quotation_mark.length != 1) fail("Invalid quote text: ", quotation_mark, " (must have length == 1)"); + Text_t ret = Text$escaped(text, colorize, quotation_mark); + if (!(Text$equal_values(quotation_mark, Text("\"")) || Text$equal_values(quotation_mark, Text("'")) + || Text$equal_values(quotation_mark, Text("`")))) + ret = Texts("$", quotation_mark, ret, quotation_mark); + else ret = Texts(quotation_mark, ret, quotation_mark); return ret; } diff --git a/src/stdlib/text.h b/src/stdlib/text.h index 5fa95675..d118cffd 100644 --- a/src/stdlib/text.h +++ b/src/stdlib/text.h @@ -54,6 +54,7 @@ Text_t Text$upper(Text_t text, Text_t language); Text_t Text$lower(Text_t text, Text_t language); Text_t Text$title(Text_t text, Text_t language); Text_t Text$as_text(const void *text, bool colorize, const TypeInfo_t *info); +Text_t Text$escaped(Text_t text, bool colorize, Text_t extra_escapes); Text_t Text$quoted(Text_t str, bool colorize, Text_t quotation_mark); PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix, Text_t *remainder); PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix, Text_t *remainder); |
