Improved syntax for dollar-string literals

2024-08-18 14:44:15 -04:00 · 2024-08-18 14:44:15 -04:00 · f4b04a1b8c
commit f4b04a1b8c
parent 04603308af
14 changed files with 123 additions and 104 deletions
--- a/docs/strings.md
+++ b/docs/strings.md
@ -17,6 +17,7 @@ without using printf-style string formatting.
 // Basic string:
 str := "Hello world"
 str2 := 'Also a string'
+str3 := `Backticks too`
 ```

 ## Line Splits
@ -59,13 +60,18 @@ multi_line := "

 ## String Interpolations

-Inside a double quoted string, you can use curly braces (`{...}`) to insert an
+Inside a double quoted string, you can use a dollar sign (`$`) to insert an
 expression that you want converted to a string. This is called string
 interpolation:

 ```
 // Interpolation:
-str := "Sum: {1 + 2}"
+my_var := 5
+str := "My var is $my_var!"
+// Equivalent to "My var is 5!"
+
+// Using parentheses:
+str := "Sum: $(1 + 2)"
 // equivalent to "Sum: 3"
 ```

@ -73,7 +79,7 @@ Single-quoted strings do not have interpolations:

 ```
 // No interpolation here:
-str := 'Sum: {1 + 2}'
+str := 'Sum: $(1 + 2)'
 ```

 ## String Escapes
@ -89,11 +95,12 @@ crlf := \r\n
 quote := \"
 ```

-These string literals can be used as interpolation values:
+These string literals can be used as interpolation values with or without
+parentheses, depending on which you find more readable:

 ```
-two_lines := "one{\n}two"
-has_quotes := "some {\"}quotes{\"} here"
+two_lines := "one$(\n)two"
+has_quotes := "some $\"quotes$\" here"
 ```

 However, in general it is best practice to use multi-line strings to avoid these problems:
@ -163,32 +170,35 @@ str := "
 >>> str == "{\n}blank lines{\n}"
 ```

-### Advanced $-Strings
+### Customizable $-Strings

-Sometimes you need to use many `{`s or `"`s inside a string, but you don't want
-to type `{\{}` or `{\"}` each time. In such cases, you can use the more
-advanced form of strings. The advanced form lets you explicitly specify which
-characters are used for interpolation and which characters are used for
-opening/closing the string. Advanced strings begin with a dollar sign (`$`),
-followed by what interpolation style to use, followed by the character to use
-to delimit the string, followed by the string contents and a closing string
-delimiter. The interpolation style can be a matching pair (`()`, `[]`, `{}`, or
-`<>`) or any other single character. When the interpolation style is a matching
-pair, the interpolation is any expression enclosed in that pair (e.g.
-`${}"interpolate {1 + 2}"`). When the interpolation style is a single
-character, the interpolation must be either a parenthesized expression or a
-single term with no infix operators (e.g. a variable), for example:
-`$@"Interpolate @var or @(1 + 2)"`.
+Sometimes you might need to use a lot of literal `$`s or quotation marks in a
+string. In such cases, you can use the more customizable form of strings. The
+customizable form lets you explicitly specify which character to use for
+interpolation and which characters to use for delimiting the string.
+
+The first character after the `$` is the custom interpolation character, which
+can be any of the following symbols: `~!@#$%^&*+=\?`. If none of these
+characters is used, the default interpolation character is `$`. Since this is
+the default, you can disable interpolation altogether by using `$` here (i.e. a
+double `$$`).
+
+The next thing in a customizable string is the character used to delimit the
+string. The string delimiter can be any of the following symbols: `` "'`|/;([{< ``
+If the string delimiter is one of `([{<`, then the string will continue until a
+matching `)]}>` is found, not terminating unless the delimiters are balanced
+(i.e. nested pairs of delimiters are considered part of the string).

 Here are some examples:

 ```
-$[]"In here, quotes delimit the string and square brackets interpolate: [1 + 2]"
-$@"For single-letter interpolations, the interpolation is a single term like @my_var without a closing symbol"
-$@"But you can parenthesize expressions like: @(x + y) if you need to"
-$$"Double dollars means dollar signs interpolate: $my_var $(1 + 2)"
-$${If you have a string with "quotes" and 'single quotes', you can choose something else like curly braces to delimit the string}
-$?#Here hashes delimit the string and question marks interpolate: ?(1 + 2)#
+$"Equivalent to a normal string with dollar interps: $(1 + 2)"
+$@"The same, but the AT symbol interpolates: @(1 + 2)"
+$$"No interpolation here, $ is just a literal character"
+$|This string is pipe-delimited, so it can have "quotes" and 'single quotes' and interpolates with dollar sign: $(1+2)|
+$(This string is parens-delimited, so you can have (nested) parens without ending the string)
+$=[This string is square-bracket delimited [which can be nested] and uses equals for interps: =(1 + 2)]
+$@/look ma, regex literals!/
 ```

 When strings are delimited by matching pairs (`()`, `[]`, `{}`, or `<>`), they
--- a/parse.c
+++ b/parse.c
@ -1141,7 +1141,8 @@ PARSER(parse_bool) {
 }

 PARSER(parse_text) {
-    // ["$" [name] [interp-char [closing-interp-char]]] ('"' ... '"' / "'" ... "'")
+    // ('"' ... '"' / "'" ... "'" / "`" ... "`")
+    // "$" [name] [interp-char] quote-char ... close-quote
    const char *start = pos;
    const char *lang = NULL;

@ -1156,30 +1157,29 @@ PARSER(parse_text) {
        return NewAST(ctx->file, start, pos, TextLiteral, .cord=cord);
    }

-    char open_quote, close_quote, open_interp = '\x03', close_interp = '\x02';
-    if (match(&pos, "\"")) {
-        open_quote = '"', close_quote = '"', open_interp = '{', close_interp = '}';
-    } else if (match(&pos, "'")) {
-        open_quote = '\'', close_quote = '\'';
-    } else if (match(&pos, "$")) {
+    char open_quote, close_quote, open_interp = '$';
+    if (match(&pos, "\"")) { // Double quote
+        open_quote = '"', close_quote = '"', open_interp = '$';
+    } else if (match(&pos, "`")) { // Backtick
+        open_quote = '`', close_quote = '`', open_interp = '$';
+    } else if (match(&pos, "'")) { // Single quote
+        open_quote = '\'', close_quote = '\'', open_interp = '\x03';
+    } else if (match(&pos, "$")) { // Customized strings
        lang = get_id(&pos);
-        if (pos[1] == pos[0]) {
-            // Disable interp using a double opener: $;;...; or $``text`
-            open_quote = *pos;
-            pos += 2;
-        } else {
-            // $@"...." or $()"....."
+        // $"..." or $@"...."
+        static const char *interp_chars = "~!@#$%^&*+=\\?";
+        if (match(&pos, "$")) { // Disable interpolation with $
+            open_interp = '\x03';
+        } else if (strchr(interp_chars, *pos)) {
            open_interp = *pos;
            ++pos;
-            close_interp = closing[(int)open_interp];
-            if (close_interp && *pos == close_interp)
-                ++pos;
-            open_quote = *pos;
-            ++pos;
        }
+        static const char *quote_chars = "\"'`|/;([{<";
+        if (!strchr(quote_chars, *pos))
+            parser_err(ctx, pos, pos+1, "This is not a valid string quotation character. Valid characters are: \"'`|/;([{<");
+        open_quote = *pos;
+        ++pos;
        close_quote = closing[(int)open_quote] ? closing[(int)open_quote] : open_quote;
-        if (open_interp == close_quote)
-            open_interp = '\0';
    } else {
        return NULL;
    }
@ -1202,16 +1202,9 @@ PARSER(parse_text) {
            }
            ++pos;
            ast_t *interp;
-            if (close_interp) {
-                whitespace(&pos);
-                interp = expect(ctx, interp_start, &pos, parse_expr, "I expected an interpolation expression here");
-                whitespace(&pos);
-                expect_closing(ctx, &pos, (char[]){close_interp, 0}, "I was expecting a '%c' to finish this interpolation", close_interp);
-            } else {
-                if (*pos == ' ' || *pos == '\t')
-                    parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here");
-                interp = expect(ctx, interp_start, &pos, parse_term, "I expected an interpolation term here");
-            }
+            if (*pos == ' ' || *pos == '\t')
+                parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here");
+            interp = expect(ctx, interp_start, &pos, parse_term, "I expected an interpolation term here");
            chunks = new(ast_list_t, .ast=interp, .next=chunks);
            chunk_start = pos;
        } else if (!leading_newline && *pos == open_quote && closing[(int)open_quote]) { // Nested pair begin
@ -2128,7 +2121,7 @@ PARSER(parse_say) {
    ast_list_t *chunks = NULL;
    CORD chunk = CORD_EMPTY;
    const char *chunk_start = pos;
-    const char open_interp = '{', close_interp = '}';
+    const char open_interp = '$';
    while (pos < ctx->file->text + ctx->file->len) {
        if (*pos == open_interp) { // Interpolation
            const char *interp_start = pos;
@ -2139,16 +2132,9 @@ PARSER(parse_say) {
            }
            ++pos;
            ast_t *interp;
-            if (close_interp) {
-                whitespace(&pos);
-                interp = expect(ctx, interp_start, &pos, parse_expr, "I expected an interpolation expression here");
-                whitespace(&pos);
-                expect_closing(ctx, &pos, (char[]){close_interp, 0}, "I was expecting a '%c' to finish this interpolation", close_interp);
-            } else {
-                if (*pos == ' ' || *pos == '\t')
-                    parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here");
-                interp = expect(ctx, interp_start, &pos, parse_term, "I expected an interpolation term here");
-            }
+            if (*pos == ' ' || *pos == '\t')
+                parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here");
+            interp = expect(ctx, interp_start, &pos, parse_term, "I expected an interpolation term here");
            chunks = new(ast_list_t, .ast=interp, .next=chunks);
            chunk_start = pos;
        } else if (*pos == '\r' || *pos == '\n') { // Newline
--- a/test/arrays.tm
+++ b/test/arrays.tm
@ -23,7 +23,7 @@ func main():

 		str := ""
 		for i,x in arr:
-			str ++= "({i},{x})"
+			str ++= "($i,$x)"
 		>> str
 		= "(1,10)(2,20)(3,30)"

@ -148,7 +148,7 @@ func main():
 		xs := ["A", "B", "C", "D"]
 		for i,x in xs:to(-2):
 			for y in xs:from(i+1):
-				say("{x}{y}")
+				say("$(x)$(y)")

 	do:
 		>> nums := [-7, -4, -1, 2, 5]
--- a/test/corecursive_func.tm
+++ b/test/corecursive_func.tm
@ -1,14 +1,14 @@
 func ping(x:Int)->[Text]:
 	if x > 0:
-		return ["ping: {x}"] ++ pong(x-1)
+		return ["ping: $x"] ++ pong(x-1)
 	else:
-		return ["ping: {x}"]
+		return ["ping: $x"]

 func pong(x:Int)->[Text]:
 	if x > 0:
-		return ["pong: {x}"] ++ ping(x-1)
+		return ["pong: $x"] ++ ping(x-1)
 	else:
-		return ["pong: {x}"]
+		return ["pong: $x"]

 func main():
 	>> ping(3)
--- a/test/defer.tm
+++ b/test/defer.tm
@ -16,7 +16,7 @@ func main():

    for word in ["first", "second", "third"]:
        defer:
-            say("Got {word} deferred")
+            say("Got $word deferred")

        if word == "second":
            say("<skipped>")
@ -27,7 +27,7 @@ func main():

        for i in 3:
            defer:
-                say("Inner loop deferred {i}")
+                say("Inner loop deferred $i")

            if i == 2:
                say("<skipped inner>")
--- a/test/enums.tm
+++ b/test/enums.tm
@ -5,15 +5,15 @@ func choose_text(f:Foo)->Text:
 	when f is Zero:
 		return "Zero"
 	is One(one):
-		return "One: {one}"
+		return "One: $one"
 	is Two(x, y):
-		return "Two: x={x}, y={y}"
+		return "Two: x=$x, y=$y"
 	is Three(three):
-		return "Three: {three}"
+		return "Three: $three"
 	is Four:
 		return "Four"
 	else:
-		return "else: {f}"
+		return "else: $f"

 func main():
 	>> Foo.Zero
--- a/test/for.tm
+++ b/test/for.tm
@ -2,7 +2,7 @@
 func all_nums(nums:[Int])->Text:
 	result := ""
 	for num in nums:
-		result ++= "{num},"
+		result ++= "$num,"
 	else:
 		return "EMPTY"
 	return result
@ -10,7 +10,7 @@ func all_nums(nums:[Int])->Text:
 func labeled_nums(nums:[Int])->Text:
 	result := ""
 	for i,num in nums:
-		result ++= "{i}:{num},"
+		result ++= "$i:$num,"
 	else:
 		return "EMPTY"
 	return result
@ -18,14 +18,14 @@ func labeled_nums(nums:[Int])->Text:
 func table_str(t:{Text:Text})->Text:
 	str := ""
 	for k,v in t:
-		str ++= "{k}:{v},"
+		str ++= "$k:$v,"
 	else: return "EMPTY"
 	return str

 func table_key_str(t:{Text:Text})->Text:
 	str := ""
 	for k in t:
-		str ++= "{k},"
+		str ++= "$k,"
 	else: return "EMPTY"
 	return str

--- a/test/integers.tm
+++ b/test/integers.tm
@ -28,7 +28,7 @@ func main():

 	nums := ""
 	for x in 5:
-		nums ++= "{x},"
+		nums ++= "$x,"
 	>> nums
 	= "1,2,3,4,5,"

@ -79,6 +79,6 @@ func main():
 		for in 20:
 			>> n := Int.random(-999999, 999999)
 			>> d := Int.random(-999, 999)
-			//! n={n}, d={d}:
+			//! n=$n, d=$d:
 			>> (n/d)*d + (n mod d) == n
 			= yes
--- a/test/iterators.tm
+++ b/test/iterators.tm
@ -19,15 +19,15 @@ func range(first:Int, last:Int)->func()->RangeIteration:
 func main():
    values := ["A", "B", "C", "D"]

-    >> ((++) "({foo}{baz})" for foo, baz in pairwise(values))
+    >> ((++) "($(foo)$(baz))" for foo, baz in pairwise(values))
    = "(AB)(BC)(CD)"
-    >> ["{foo}{baz}" for foo, baz in pairwise(values)]
+    >> ["$(foo)$(baz)" for foo, baz in pairwise(values)]
    = ["AB", "BC", "CD"]

    do:
        result := [:Text]
        for foo, baz in pairwise(values):
-            result:insert("{foo}{baz}")
+            result:insert("$(foo)$(baz)")
        >> result
        = ["AB", "BC", "CD"]

--- a/test/lambdas.tm
+++ b/test/lambdas.tm
@ -12,7 +12,7 @@ func main():
 	>> add_one(10)
 	= 11

-	>> shout := func(msg:Text): say("{msg:upper()}!")
+	>> shout := func(msg:Text): say("$(msg:upper())!")
 	>> shout("hello")

 	>> asdf := add_one
@ -36,7 +36,7 @@ func main():
    fn := func():
        return func():
            return func():
-                defer: //! {outer}
+                defer: //! $outer
                return outer
    >> fn()()()
 	= "Hello"
--- a/test/lang.tm
+++ b/test/lang.tm
@ -1,5 +1,5 @@
 lang HTML:
-	HEADER := $HTML$"<!DOCTYPE HTML>"
+	HEADER := $HTML"<!DOCTYPE HTML>"
 	func escape(t:Text)->HTML:
 		t = t:replace("&", "&amp;")
 		t = t:replace("<", "&lt;")
@ -9,25 +9,25 @@ lang HTML:
 		return HTML.from_unsafe_text(t)

 	func escape_int(i:Int)->HTML:
-		return HTML.from_unsafe_text("{i}")
+		return HTML.from_unsafe_text("$i")
 	
 	func paragraph(content:HTML)->HTML:
-		return $HTML$"<p>$content</p>"
+		return $HTML"<p>$content</p>"

 func main():
 	>> HTML.HEADER
 	= $HTML"<!DOCTYPE HTML>"

 	>> user := "I <3 hax"
-	>> html := $HTML$"Hello $user!"
+	>> html := $HTML"Hello $user!"
 	= $HTML"Hello I &lt;3 hax!"
-	>> html ++ $HTML$"<br>"
+	>> html ++ $HTML"<br>"
 	= $HTML"Hello I &lt;3 hax!<br>"

-	>> $HTML{}"{1 + 2}"
+	>> $HTML"$(1 + 2)"
 	= $HTML"3"

-	>> $HTML{}"{3_i8}"
+	>> $HTML"$(3_i8)"
 	= $HTML"3"

 	>> html:paragraph()
--- a/test/tables.tm
+++ b/test/tables.tm
@ -11,7 +11,7 @@ func main():

 	t_str := ""
 	for k,v in t:
-		t_str ++= "({k}:{v})"
+		t_str ++= "($k:$v)"
 	>> t_str
 	= "(one:1)(two:2)"

@ -42,7 +42,7 @@ func main():

 	t2_str := ""
 	for k,v in t2:
-		t2_str ++= "({k}:{v})"
+		t2_str ++= "($k:$v)"
 	>> t2_str
 	= "(three:3)"

--- a/test/text.tm
+++ b/test/text.tm
@ -1,6 +1,6 @@
 func main():
 	>> str := "Hello Amélie!"
-	//! Testing strings like {str}
+	//! Testing strings like $str

 	>> str:upper()
 	= "HELLO AMÉLIE!"
@ -19,7 +19,7 @@ func main():
 	>> \UE9 == \U65\U301
 	= yes

-	>> amelie := "Am{\UE9}lie"
+	>> amelie := "Am$(\UE9)lie"
 	>> amelie:clusters()
 	= ["A", "m", "é", "l", "i", "e"] : [Text]
 	>> amelie:codepoints()
@ -35,7 +35,7 @@ func main():
 	>> amelie:num_bytes()
 	= 8

-	>> amelie2 := "Am{\U65\U301}lie"
+	>> amelie2 := "Am$(\U65\U301)lie"
 	>> amelie2:clusters()
 	= ["A", "m", "é", "l", "i", "e"] : [Text]
 	>> amelie2:codepoints()
@ -103,3 +103,19 @@ func main():
 	"
 	= "line one\nline two"

+	//! Interpolation tests:
+	>> "A $(1+2)"
+	= "A 3"
+	>> 'A $(1+2)'
+	= "A $(1+2)"
+	>> `A $(1+2)`
+	= "A 3"
+
+	>> $"A $(1+2)"
+	= "A 3"
+	>> $$"A $(1+2)"
+	= "A $(1+2)"
+	>> $="A =(1+2)"
+	= "A 3"
+	>> $(one (nested) two $(1+2))
+	= "one (nested) two 3"
--- a/typecheck.c
+++ b/typecheck.c
@ -525,7 +525,14 @@ type_t *get_type(env_t *env, ast_t *ast)
    case TextLiteral: return TEXT_TYPE;
    case TextJoin: {
        const char *lang = Match(ast, TextJoin)->lang;
-        return lang ? Match(get_binding(env, lang)->type, TypeInfoType)->type : TEXT_TYPE;
+        if (lang) {
+            binding_t *b = get_binding(env, lang);
+            if (!b || b->type->tag != TypeInfoType || Match(b->type, TypeInfoType)->type->tag != TextType)
+                code_err(ast, "There is no text language called '%s'", lang);
+            return Match(get_binding(env, lang)->type, TypeInfoType)->type;
+        } else {
+            return TEXT_TYPE;
+        }
    }
    case Var: {
        auto var = Match(ast, Var);