1 // Logic for parsing text literals
7 #include "../unistr-fixed.h"
12 #include "../stdlib/text.h"
13 #include "../stdlib/util.h"
16 #include "expressions.h"
20 static ast_list_t *_parse_text_helper(parse_ctx_t *ctx, const char **out_pos, bool allow_interps) {
21 const char *pos = *out_pos;
23 int64_t starting_indent = get_indent(ctx, pos);
24 int64_t string_indent = starting_indent + SPACES_PER_INDENT;
26 const char *quote, *interp;
27 bool allow_escapes = true;
28 if (match(&pos, "\"\"\"")) { // Triple double quote
29 quote = "\"\"\"", interp = "$", allow_escapes = false;
30 } else if (match(&pos, "'''")) { // Triple single quote
31 quote = "'''", interp = "$", allow_escapes = false;
32 } else if (match(&pos, "```")) { // Triple backtick
33 quote = "```", interp = "@", allow_escapes = false;
34 } else if (match(&pos, "\"")) { // Double quote
35 quote = "\"", interp = "$", allow_escapes = true;
36 } else if (match(&pos, "'")) { // Single quote
37 quote = "'", interp = "$", allow_escapes = true;
38 } else if (match(&pos, "`")) { // Backtick
39 quote = "`", interp = "@", allow_escapes = true;
41 parser_err(ctx, pos, pos, "I expected a valid text here");
44 if (!allow_interps) interp = NULL;
46 ast_list_t *chunks = NULL;
47 Text_t chunk = EMPTY_TEXT;
48 const char *chunk_start = pos;
49 bool leading_newline = false;
50 int64_t plain_span_len = 0;
51 #define FLUSH_PLAIN_SPAN() \
53 if (plain_span_len > 0) { \
54 chunk = Texts(chunk, Text$from_strn(pos - plain_span_len, (size_t)plain_span_len)); \
59 for (const char *end = ctx->file->text + ctx->file->len; pos < end;) {
60 const char *after_indentation = pos;
61 const char *interp_start = pos;
62 if (interp != NULL && strncmp(pos, interp, strlen(interp)) == 0) { // Interpolation
64 if (chunk.length > 0) {
65 ast_t *literal = NewAST(ctx->file, chunk_start, pos, TextLiteral, .text = chunk);
66 chunks = new (ast_list_t, .ast = literal, .next = chunks);
69 pos += strlen(interp);
70 if (*pos == ' ' || *pos == '\t')
71 parser_err(ctx, pos, pos + 1, "Whitespace is not allowed before an interpolation here");
73 expect(ctx, interp_start, &pos, parse_term_no_suffix, "I expected an interpolation term here");
74 chunks = new (ast_list_t, .ast = value, .next = chunks);
76 } else if (allow_escapes && *pos == '\\') {
78 const char *c = unescape(ctx, &pos);
79 chunk = Texts(chunk, Text$from_str(c));
80 } else if (!leading_newline && strncmp(pos, quote, strlen(quote)) == 0) { // Nested pair end
81 if (get_indent(ctx, pos) == starting_indent) break;
84 } else if (newline_with_indentation(&after_indentation, string_indent)) { // Newline
86 pos = after_indentation;
87 if (!leading_newline && !(chunk.length > 0 || chunks)) {
88 leading_newline = true;
90 chunk = Texts(chunk, Text("\n"));
92 } else if (newline_with_indentation(&after_indentation, starting_indent)) { // Line continuation (..)
94 pos = after_indentation;
95 if (strncmp(pos, quote, strlen(quote)) == 0) {
97 } else if (some_of(&pos, ".") >= 2) {
101 parser_err(ctx, pos, eol(pos),
102 "This multi-line string should be either indented or have '..' at the front");
104 } else { // Plain character
106 const char *next = (const char *)u8_next(&codepoint, (const uint8_t *)pos);
107 plain_span_len += (int64_t)(next - pos);
108 if (next == NULL) break;
114 #undef FLUSH_PLAIN_SPAN
116 expect_closing(ctx, &pos, quote, "I was expecting a ", quote, " to finish this string");
118 if (chunk.length > 0) {
119 ast_t *literal = NewAST(ctx->file, chunk_start, pos, TextLiteral, .text = chunk);
120 chunks = new (ast_list_t, .ast = literal, .next = chunks);
123 REVERSE_LIST(chunks);
128 ast_t *parse_text(parse_ctx_t *ctx, const char *pos, bool allow_interps) {
129 // ('"' ... '"' / "'" ... "'" / "`" ... "`")
130 // "$" [name] quote-char ... close-quote
131 const char *start = pos;
132 const char *lang = NULL;
134 if (match(&pos, "$")) {
136 if (lang == NULL) parser_err(ctx, start, pos, "I expected a language name after the `$`");
139 if (!(*pos == '"' || *pos == '\'' || *pos == '`')) return NULL;
141 ast_list_t *chunks = _parse_text_helper(ctx, &pos, allow_interps);
142 bool colorize = match(&pos, "~") && match_word(&pos, "colorized");
143 return NewAST(ctx->file, start, pos, TextJoin, .lang = lang, .children = chunks, .colorize = colorize);
146 ast_t *parse_inline_c(parse_ctx_t *ctx, const char *pos) {
147 const char *start = pos;
148 if (!match_word(&pos, "C_code")) return NULL;
151 type_ast_t *type = NULL;
152 if (match(&pos, ":")) {
153 type = expect(ctx, start, &pos, parse_type, "I couldn't parse the type for this C_code code");
157 static const char *quote_chars = "\"'`|/;([{<";
158 if (!strchr(quote_chars, *pos))
159 parser_err(ctx, pos, pos + 1,
160 "This is not a valid string quotation character. Valid characters are: \"'`|/;([{<");
162 ast_list_t *chunks = _parse_text_helper(ctx, &pos, true);
163 return NewAST(ctx->file, start, pos, InlineCCode, .chunks = chunks, .type_ast = type);