Improvements to text and inline C code formatting/parsing

author: Bruce Hill <bruce@bruce-hill.com> 2025-08-29 13:32:17 -0400
committer: Bruce Hill <bruce@bruce-hill.com> 2025-08-29 13:32:17 -0400
commit: 43105107b9d1e985e9c182b904f2ac79b17fb460 (patch)
tree: c2dbb1c0312d410a387d8da26a2475d862b5ddcf
parent: d8116c27f406ac4915aaa7cdae97fb73c9847c8a (diff)
6 files changed, 112 insertions, 109 deletions
diff --git a/src/ast.c b/src/ast.c
index 03843438..486e0262 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -291,7 +291,7 @@ Text_t ast_to_sexp(ast_t *ast) {
           ")");
         T(When, "(When ", ast_to_sexp(data.subject), when_clauses_to_sexp(data.clauses),
           optional_sexp("else", data.else_body), ")");
-        T(Reduction, "(Reduction ", quoted_text(binop_method_name(data.op)), " ", ast_to_sexp(data.key), " ",
+        T(Reduction, "(Reduction ", quoted_text(binop_operator(data.op)), " ", ast_to_sexp(data.key), " ",
           ast_to_sexp(data.iter), ")");
         T(Skip, "(Skip ", quoted_text(data.target), ")");
         T(Stop, "(Stop ", quoted_text(data.target), ")");
diff --git a/src/compile/expressions.c b/src/compile/expressions.c
index 58288c50..544b7723 100644
--- a/src/compile/expressions.c
+++ b/src/compile/expressions.c
@@ -237,7 +237,8 @@ Text_t compile(env_t *env, ast_t *ast) {
     case Index: return compile_indexing(env, ast);
     case InlineCCode: {
         type_t *t = get_type(env, ast);
-        if (t->tag == VoidType) return Texts("{\n", compile_statement(env, ast), "\n}");
+        if (Match(ast, InlineCCode)->type_ast != NULL) return Texts("({", compile_statement(env, ast), "; })");
+        else if (t->tag == VoidType) return Texts("{\n", compile_statement(env, ast), "\n}");
         else return compile_statement(env, ast);
     }
     case Use: code_err(ast, "Compiling 'use' as expression!");
diff --git a/src/formatter/formatter.c b/src/formatter/formatter.c
index 49670e0d..2d88b486 100644
--- a/src/formatter/formatter.c
+++ b/src/formatter/formatter.c
@@ -10,7 +10,6 @@
 #include "../parse/files.h"
 #include "../parse/utils.h"
 #include "../stdlib/datatypes.h"
-#include "../stdlib/integers.h"
 #include "../stdlib/optionals.h"
 #include "../stdlib/stdlib.h"
 #include "../stdlib/text.h"
@@ -28,6 +27,66 @@ Text_t format_namespace(ast_t *namespace, Table_t comments, Text_t indent) {
     return Texts("\n", indent, single_indent, fmt(namespace, comments, Texts(indent, single_indent)));
 }
 
+typedef struct {
+    Text_t quote, unquote, interp;
+} text_opts_t;
+
+text_opts_t choose_text_options(ast_list_t *chunks) {
+    int double_quotes = 0, single_quotes = 0, backticks = 0;
+    for (ast_list_t *chunk = chunks; chunk; chunk = chunk->next) {
+        if (chunk->ast->tag == TextLiteral) {
+            Text_t literal = Match(chunk->ast, TextLiteral)->text;
+            if (Text$has(literal, Text("\""))) double_quotes += 1;
+            if (Text$has(literal, Text("'"))) single_quotes += 1;
+            if (Text$has(literal, Text("`"))) backticks += 1;
+        }
+    }
+    Text_t quote;
+    if (double_quotes == 0) quote = Text("\"");
+    else if (single_quotes == 0) quote = Text("'");
+    else if (backticks == 0) quote = Text("`");
+    else quote = Text("\"");
+
+    text_opts_t opts = {.quote = quote, .unquote = quote, .interp = Text("$")};
+    return opts;
+}
+
+static OptionalText_t format_inline_text(text_opts_t opts, ast_list_t *chunks, Table_t comments) {
+    Text_t code = opts.quote;
+    for (ast_list_t *chunk = chunks; chunk; chunk = chunk->next) {
+        if (chunk->ast->tag == TextLiteral) {
+            Text_t literal = Match(chunk->ast, TextLiteral)->text;
+            Text_t segment = Text$escaped(literal, false, Texts(opts.unquote, opts.interp));
+            code = Texts(code, segment);
+        } else {
+            code = Texts(code, opts.interp, "(", fmt_inline(chunk->ast, comments), ")");
+        }
+    }
+    return Texts(code, opts.unquote);
+}
+
+static Text_t format_text(text_opts_t opts, ast_list_t *chunks, Table_t comments, Text_t indent) {
+    Text_t code = EMPTY_TEXT;
+    Text_t current_line = EMPTY_TEXT;
+    for (ast_list_t *chunk = chunks; chunk; chunk = chunk->next) {
+        if (chunk->ast->tag == TextLiteral) {
+            Text_t literal = Match(chunk->ast, TextLiteral)->text;
+            List_t lines = Text$lines(literal);
+            if (lines.length == 0) continue;
+            current_line = Texts(current_line, Text$escaped(*(Text_t *)lines.data, false, opts.interp));
+            for (int64_t i = 1; i < lines.length; i += 1) {
+                add_line(&code, current_line, Texts(indent, single_indent));
+                current_line = Text$escaped(*(Text_t *)(lines.data + i * lines.stride), false, opts.interp);
+            }
+        } else {
+            current_line = Texts(current_line, opts.interp, "(", fmt(chunk->ast, comments, indent), ")");
+        }
+    }
+    add_line(&code, current_line, Texts(indent, single_indent));
+    code = Texts(opts.quote, "\n", indent, single_indent, code, "\n", indent, opts.unquote);
+    return code;
+}
+
 OptionalText_t format_inline_code(ast_t *ast, Table_t comments) {
     if (range_has_comment(ast->start, ast->end, comments)) return NONE_TEXT;
     switch (ast->tag) {
@@ -211,37 +270,21 @@ OptionalText_t format_inline_code(ast_t *ast, Table_t comments) {
         else return Texts(indexed, "[]");
     }
     /*inline*/ case TextJoin: {
-        // TODO: choose quotation mark more smartly
-        Text_t source = Text$from_strn(ast->start, (int64_t)(ast->end - ast->start));
-        Text_t quote = Text$to(source, I(1));
+        text_opts_t opts = choose_text_options(Match(ast, TextJoin)->children);
+        Text_t ret = must(format_inline_text(opts, Match(ast, TextJoin)->children, comments));
         const char *lang = Match(ast, TextJoin)->lang;
-        Text_t code = lang ? Texts("$", Text$from_str(lang), quote) : quote;
-        for (ast_list_t *chunk = Match(ast, TextJoin)->children; chunk; chunk = chunk->next) {
-            if (chunk->ast->tag == TextLiteral) {
-                Text_t literal = Match(chunk->ast, TextLiteral)->text;
-                code = Texts(code, Text$slice(Text$quoted(literal, false, quote), I(2), I(-2)));
-            } else {
-                code = Texts(code, "$(", fmt_inline(chunk->ast, comments), ")");
-            }
-        }
-        return Texts(code, quote);
+        return lang ? Texts("$", Text$from_str(lang), ret) : ret;
     }
     /*inline*/ case InlineCCode: {
         DeclareMatch(c_code, ast, InlineCCode);
-        Text_t code = c_code->type_ast ? Texts("C_code:", format_type(c_code->type_ast), "(") : Text("C_code{");
-        for (ast_list_t *chunk = c_code->chunks; chunk; chunk = chunk->next) {
-            if (chunk->ast->tag == TextLiteral) {
-                Text_t literal = Match(chunk->ast, TextLiteral)->text;
-                if (Text$has(literal, Text("\n"))) return NONE_TEXT;
-                code = Texts(code, Text$slice(Text$quoted(literal, false, Text("`")), I(2), I(-2)));
-            } else {
-                code = Texts(code, "@(", fmt_inline(chunk->ast, comments), ")");
-            }
-        }
-        return Texts(code, c_code->type_ast ? Text(")") : Text("}"));
+        Text_t code = c_code->type_ast ? Texts("C_code:", format_type(c_code->type_ast)) : Text("C_code");
+        text_opts_t opts = {.quote = Text("`"), .unquote = Text("`"), .interp = Text("@")};
+        return Texts(code, must(format_inline_text(opts, Match(ast, InlineCCode)->chunks, comments)));
     }
     /*inline*/ case TextLiteral: { fail("Something went wrong, we shouldn't be formatting text literals directly"); }
-    /*inline*/ case Path: { return Texts("(", Text$from_str(Match(ast, Path)->path), ")"); }
+    /*inline*/ case Path: {
+        return Texts("(", Text$escaped(Text$from_str(Match(ast, Path)->path), false, Text("()")), ")");
+    }
     /*inline*/ case Stop: {
         const char *target = Match(ast, Stop)->target;
         return target ? Texts("stop ", Text$from_str(target)) : Text("stop");
@@ -609,59 +652,18 @@ Text_t format_code(ast_t *ast, Table_t comments, Text_t indent) {
     }
     /*multiline*/ case TextJoin: {
         if (inlined_fits) return inlined;
-        // TODO: choose quotation mark more smartly
-        Text_t source = Text$from_strn(ast->start, (int64_t)(ast->end - ast->start));
-        Text_t quote = Text$to(source, I(1));
+
+        text_opts_t opts = choose_text_options(Match(ast, TextJoin)->children);
+        Text_t ret = format_text(opts, Match(ast, TextJoin)->children, comments, indent);
         const char *lang = Match(ast, TextJoin)->lang;
-        Text_t code = EMPTY_TEXT;
-        Text_t current_line = EMPTY_TEXT;
-        for (ast_list_t *chunk = Match(ast, TextJoin)->children; chunk; chunk = chunk->next) {
-            if (chunk->ast->tag == TextLiteral) {
-                Text_t literal = Match(chunk->ast, TextLiteral)->text;
-                List_t lines = Text$lines(literal);
-                if (lines.length == 0) continue;
-                current_line = Texts(current_line, *(Text_t *)lines.data);
-                for (int64_t i = 1; i < lines.length; i += 1) {
-                    add_line(&code, current_line, Texts(indent, single_indent));
-                    current_line = *(Text_t *)(lines.data + i * lines.stride);
-                }
-            } else {
-                current_line = Texts(current_line, "$(", fmt_inline(chunk->ast, comments), ")");
-            }
-        }
-        add_line(&code, current_line, Texts(indent, single_indent));
-        code = Texts(quote, "\n", indent, single_indent, code, "\n", indent, quote);
-        if (lang) code = Texts("$", Text$from_str(lang), code);
-        return code;
+        return lang ? Texts("$", Text$from_str(lang), ret) : ret;
     }
     /*multiline*/ case InlineCCode: {
         DeclareMatch(c_code, ast, InlineCCode);
-        if (inlined_fits && c_code->type_ast) return inlined;
-
-        Text_t code = EMPTY_TEXT;
-        Text_t current_line = EMPTY_TEXT;
-        for (ast_list_t *chunk = c_code->chunks; chunk; chunk = chunk->next) {
-            if (chunk->ast->tag == TextLiteral) {
-                Text_t literal = Match(chunk->ast, TextLiteral)->text;
-                List_t lines = Text$lines(literal);
-                if (lines.length == 0) continue;
-                current_line = Texts(current_line, *(Text_t *)lines.data);
-                for (int64_t i = 1; i < lines.length; i += 1) {
-                    add_line(&code, current_line, Texts(indent, single_indent));
-                    current_line = *(Text_t *)(lines.data + i * lines.stride);
-                }
-            } else {
-                current_line = Texts(current_line, "@(", fmt_inline(chunk->ast, comments), ")");
-            }
-        }
-        add_line(&code, current_line, Texts(indent, single_indent));
-
-        if (c_code->type_ast) {
-            return Texts("C_code:", format_type(c_code->type_ast), "(\n", indent, single_indent, code, "\n", indent,
-                         ")");
-        } else {
-            return Texts("C_code{\n", indent, single_indent, code, "\n", indent, "}");
-        }
+        if (inlined_fits && c_code->type != NULL) return inlined;
+        Text_t code = c_code->type_ast ? Texts("C_code:", format_type(c_code->type_ast)) : Text("C_code");
+        text_opts_t opts = {.quote = Text("`"), .unquote = Text("`"), .interp = Text("@")};
+        return Texts(code, format_text(opts, Match(ast, InlineCCode)->chunks, comments, indent));
     }
     /*multiline*/ case TextLiteral: { fail("Something went wrong, we shouldn't be formatting text literals directly"); }
     /*multiline*/ case Path: {
diff --git a/src/parse/text.c b/src/parse/text.c
index c554273f..6e7201bb 100644
--- a/src/parse/text.c
+++ b/src/parse/text.c
@@ -162,23 +162,19 @@ ast_t *parse_inline_c(parse_ctx_t *ctx, const char *pos) {
 
     spaces(&pos);
     type_ast_t *type = NULL;
-    ast_list_t *chunks;
     if (match(&pos, ":")) {
         type = expect(ctx, start, &pos, parse_type, "I couldn't parse the type for this C_code code");
         spaces(&pos);
-        if (!match(&pos, "(")) parser_err(ctx, start, pos, "I expected a '(' here");
-        chunks = new (ast_list_t, .ast = NewAST(ctx->file, pos, pos, TextLiteral, Text("({")),
-                      .next = _parse_text_helper(ctx, &pos, '(', ')', '@', false));
-        if (type) {
-            REVERSE_LIST(chunks);
-            chunks = new (ast_list_t, .ast = NewAST(ctx->file, pos, pos, TextLiteral, Text("; })")), .next = chunks);
-            REVERSE_LIST(chunks);
-        }
-    } else {
-        if (!match(&pos, "{")) parser_err(ctx, start, pos, "I expected a '{' here");
-        chunks = _parse_text_helper(ctx, &pos, '{', '}', '@', false);
     }
 
+    static const char *quote_chars = "\"'`|/;([{<";
+    if (!strchr(quote_chars, *pos))
+        parser_err(ctx, pos, pos + 1,
+                   "This is not a valid string quotation character. Valid characters are: \"'`|/;([{<");
+
+    char quote = *(pos++);
+    char unquote = closing[(int)quote] ? closing[(int)quote] : quote;
+    ast_list_t *chunks = _parse_text_helper(ctx, &pos, quote, unquote, '@', false);
     return NewAST(ctx->file, start, pos, InlineCCode, .chunks = chunks, .type_ast = type);
 }
 
diff --git a/src/stdlib/text.c b/src/stdlib/text.c
index ed4023a4..57465034 100644
--- a/src/stdlib/text.c
+++ b/src/stdlib/text.c
@@ -1390,17 +1390,8 @@ Text_t Text$title(Text_t text, Text_t language) {
 }
 
 public
-Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) {
-    if (quotation_mark.length != 1) fail("Invalid quote text: ", quotation_mark, " (must have length == 1)");
-
+Text_t Text$escaped(Text_t text, bool colorize, Text_t extra_escapes) {
     Text_t ret = colorize ? Text("\x1b[35m") : EMPTY_TEXT;
-    if (!Text$equal_values(quotation_mark, Text("\"")) && !Text$equal_values(quotation_mark, Text("'"))
-        && !Text$equal_values(quotation_mark, Text("`")))
-        ret = concat2_assuming_safe(ret, Text("$"));
-
-    ret = concat2_assuming_safe(ret, quotation_mark);
-    int32_t quote_char = Text$get_grapheme(quotation_mark, 0);
-
 #define flush_unquoted()                                                                                               \
     ({                                                                                                                 \
         if (unquoted_span > 0) {                                                                                       \
@@ -1454,15 +1445,18 @@ Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) {
             break;
         }
         default: {
-            if (g == quote_char) {
-                flush_unquoted();
-                if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[34;1m"));
-                ret = concat2_assuming_safe(ret, Text("\\"));
-                ret = concat2_assuming_safe(ret, quotation_mark);
-                if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[0;35m"));
-            } else {
-                unquoted_span += 1;
+            TextIter_t esc_state = NEW_TEXT_ITER_STATE(extra_escapes);
+            for (int64_t j = 0; j < extra_escapes.length; j++) {
+                int32_t esc = Text$get_grapheme_fast(&esc_state, j);
+                if (g == esc) {
+                    flush_unquoted();
+                    if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[34;1m"));
+                    ret = concat2_assuming_safe(ret, Text("\\"));
+                    if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[0;35m"));
+                    break;
+                }
             }
+            unquoted_span += 1;
             break;
         }
         }
@@ -1470,10 +1464,19 @@ Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) {
     flush_unquoted();
 #undef add_escaped
 #undef flush_unquoted
-
-    ret = concat2_assuming_safe(ret, quotation_mark);
     if (colorize) ret = concat2_assuming_safe(ret, Text("\x1b[m"));
+    return ret;
+}
+
+public
+Text_t Text$quoted(Text_t text, bool colorize, Text_t quotation_mark) {
+    if (quotation_mark.length != 1) fail("Invalid quote text: ", quotation_mark, " (must have length == 1)");
 
+    Text_t ret = Text$escaped(text, colorize, quotation_mark);
+    if (!(Text$equal_values(quotation_mark, Text("\"")) || Text$equal_values(quotation_mark, Text("'"))
+          || Text$equal_values(quotation_mark, Text("`"))))
+        ret = Texts("$", quotation_mark, ret, quotation_mark);
+    else ret = Texts(quotation_mark, ret, quotation_mark);
     return ret;
 }
 
diff --git a/src/stdlib/text.h b/src/stdlib/text.h
index 5fa95675..d118cffd 100644
--- a/src/stdlib/text.h
+++ b/src/stdlib/text.h
@@ -54,6 +54,7 @@ Text_t Text$upper(Text_t text, Text_t language);
 Text_t Text$lower(Text_t text, Text_t language);
 Text_t Text$title(Text_t text, Text_t language);
 Text_t Text$as_text(const void *text, bool colorize, const TypeInfo_t *info);
+Text_t Text$escaped(Text_t text, bool colorize, Text_t extra_escapes);
 Text_t Text$quoted(Text_t str, bool colorize, Text_t quotation_mark);
 PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix, Text_t *remainder);
 PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix, Text_t *remainder);
author	Bruce Hill <bruce@bruce-hill.com>	2025-08-29 13:32:17 -0400
committer	Bruce Hill <bruce@bruce-hill.com>	2025-08-29 13:32:17 -0400
commit	43105107b9d1e985e9c182b904f2ac79b17fb460 (patch)
tree	c2dbb1c0312d410a387d8da26a2475d862b5ddcf
parent	d8116c27f406ac4915aaa7cdae97fb73c9847c8a (diff)