From f4b04a1b8cd882e25fee592c819650c9b7e8566b Mon Sep 17 00:00:00 2001
From: Bruce Hill <bruce@bruce-hill.com>
Date: Sun, 18 Aug 2024 14:44:15 -0400
Subject: Improved syntax for dollar-string literals

---
 docs/strings.md          | 64 ++++++++++++++++++++++++++-------------------
 parse.c                  | 68 +++++++++++++++++++-----------------------------
 test/arrays.tm           |  4 +--
 test/corecursive_func.tm |  8 +++---
 test/defer.tm            |  4 +--
 test/enums.tm            |  8 +++---
 test/for.tm              |  8 +++---
 test/integers.tm         |  4 +--
 test/iterators.tm        |  6 ++---
 test/lambdas.tm          |  4 +--
 test/lang.tm             | 14 +++++-----
 test/tables.tm           |  4 +--
 test/text.tm             | 22 +++++++++++++---
 typecheck.c              |  9 ++++++-
 14 files changed, 123 insertions(+), 104 deletions(-)

diff --git a/docs/strings.md b/docs/strings.md
index 343b6bc3..8e740765 100644
--- a/docs/strings.md
+++ b/docs/strings.md
@@ -17,6 +17,7 @@ without using printf-style string formatting.
 // Basic string:
 str := "Hello world"
 str2 := 'Also a string'
+str3 := `Backticks too`
 ```
 
 ## Line Splits
@@ -59,13 +60,18 @@ multi_line := "
 
 ## String Interpolations
 
-Inside a double quoted string, you can use curly braces (`{...}`) to insert an
+Inside a double quoted string, you can use a dollar sign (`$`) to insert an
 expression that you want converted to a string. This is called string
 interpolation:
 
 ```
 // Interpolation:
-str := "Sum: {1 + 2}"
+my_var := 5
+str := "My var is $my_var!"
+// Equivalent to "My var is 5!"
+
+// Using parentheses:
+str := "Sum: $(1 + 2)"
 // equivalent to "Sum: 3"
 ```
 
@@ -73,7 +79,7 @@ Single-quoted strings do not have interpolations:
 
 ```
 // No interpolation here:
-str := 'Sum: {1 + 2}'
+str := 'Sum: $(1 + 2)'
 ```
 
 ## String Escapes
@@ -89,11 +95,12 @@ crlf := \r\n
 quote := \"
 ```
 
-These string literals can be used as interpolation values:
+These string literals can be used as interpolation values with or without
+parentheses, depending on which you find more readable:
 
 ```
-two_lines := "one{\n}two"
-has_quotes := "some {\"}quotes{\"} here"
+two_lines := "one$(\n)two"
+has_quotes := "some $\"quotes$\" here"
 ```
 
 However, in general it is best practice to use multi-line strings to avoid these problems:
@@ -163,32 +170,35 @@ str := "
 >>> str == "{\n}blank lines{\n}"
 ```
 
-### Advanced $-Strings
+### Customizable $-Strings
+
+Sometimes you might need to use a lot of literal `$`s or quotation marks in a
+string. In such cases, you can use the more customizable form of strings. The
+customizable form lets you explicitly specify which character to use for
+interpolation and which characters to use for delimiting the string.
+
+The first character after the `$` is the custom interpolation character, which
+can be any of the following symbols: `~!@#$%^&*+=\?`. If none of these
+characters is used, the default interpolation character is `$`. Since this is
+the default, you can disable interpolation altogether by using `$` here (i.e. a
+double `$$`).
 
-Sometimes you need to use many `{`s or `"`s inside a string, but you don't want
-to type `{\{}` or `{\"}` each time. In such cases, you can use the more
-advanced form of strings. The advanced form lets you explicitly specify which
-characters are used for interpolation and which characters are used for
-opening/closing the string. Advanced strings begin with a dollar sign (`$`),
-followed by what interpolation style to use, followed by the character to use
-to delimit the string, followed by the string contents and a closing string
-delimiter. The interpolation style can be a matching pair (`()`, `[]`, `{}`, or
-`<>`) or any other single character. When the interpolation style is a matching
-pair, the interpolation is any expression enclosed in that pair (e.g.
-`${}"interpolate {1 + 2}"`). When the interpolation style is a single
-character, the interpolation must be either a parenthesized expression or a
-single term with no infix operators (e.g. a variable), for example:
-`$@"Interpolate @var or @(1 + 2)"`.
+The next thing in a customizable string is the character used to delimit the
+string. The string delimiter can be any of the following symbols: `` "'`|/;([{< ``
+If the string delimiter is one of `([{<`, then the string will continue until a
+matching `)]}>` is found, not terminating unless the delimiters are balanced
+(i.e. nested pairs of delimiters are considered part of the string).
 
 Here are some examples:
 
 ```
-$[]"In here, quotes delimit the string and square brackets interpolate: [1 + 2]"
-$@"For single-letter interpolations, the interpolation is a single term like @my_var without a closing symbol"
-$@"But you can parenthesize expressions like: @(x + y) if you need to"
-$$"Double dollars means dollar signs interpolate: $my_var $(1 + 2)"
-$${If you have a string with "quotes" and 'single quotes', you can choose something else like curly braces to delimit the string}
-$?#Here hashes delimit the string and question marks interpolate: ?(1 + 2)#
+$"Equivalent to a normal string with dollar interps: $(1 + 2)"
+$@"The same, but the AT symbol interpolates: @(1 + 2)"
+$$"No interpolation here, $ is just a literal character"
+$|This string is pipe-delimited, so it can have "quotes" and 'single quotes' and interpolates with dollar sign: $(1+2)|
+$(This string is parens-delimited, so you can have (nested) parens without ending the string)
+$=[This string is square-bracket delimited [which can be nested] and uses equals for interps: =(1 + 2)]
+$@/look ma, regex literals!/
 ```
 
 When strings are delimited by matching pairs (`()`, `[]`, `{}`, or `<>`), they
diff --git a/parse.c b/parse.c
index f8f1a512..740791b5 100644
--- a/parse.c
+++ b/parse.c
@@ -1141,7 +1141,8 @@ PARSER(parse_bool) {
 }
 
 PARSER(parse_text) {
-    // ["$" [name] [interp-char [closing-interp-char]]] ('"' ... '"' / "'" ... "'")
+    // ('"' ... '"' / "'" ... "'" / "`" ... "`")
+    // "$" [name] [interp-char] quote-char ... close-quote
     const char *start = pos;
     const char *lang = NULL;
 
@@ -1156,30 +1157,29 @@ PARSER(parse_text) {
         return NewAST(ctx->file, start, pos, TextLiteral, .cord=cord);
     }
 
-    char open_quote, close_quote, open_interp = '\x03', close_interp = '\x02';
-    if (match(&pos, "\"")) {
-        open_quote = '"', close_quote = '"', open_interp = '{', close_interp = '}';
-    } else if (match(&pos, "'")) {
-        open_quote = '\'', close_quote = '\'';
-    } else if (match(&pos, "$")) {
+    char open_quote, close_quote, open_interp = '$';
+    if (match(&pos, "\"")) { // Double quote
+        open_quote = '"', close_quote = '"', open_interp = '$';
+    } else if (match(&pos, "`")) { // Backtick
+        open_quote = '`', close_quote = '`', open_interp = '$';
+    } else if (match(&pos, "'")) { // Single quote
+        open_quote = '\'', close_quote = '\'', open_interp = '\x03';
+    } else if (match(&pos, "$")) { // Customized strings
         lang = get_id(&pos);
-        if (pos[1] == pos[0]) {
-            // Disable interp using a double opener: $;;...; or $``text`
-            open_quote = *pos;
-            pos += 2;
-        } else {
-            // $@"...." or $()"....."
+        // $"..." or $@"...."
+        static const char *interp_chars = "~!@#$%^&*+=\\?";
+        if (match(&pos, "$")) { // Disable interpolation with $
+            open_interp = '\x03';
+        } else if (strchr(interp_chars, *pos)) {
             open_interp = *pos;
             ++pos;
-            close_interp = closing[(int)open_interp];
-            if (close_interp && *pos == close_interp)
-                ++pos;
-            open_quote = *pos;
-            ++pos;
         }
+        static const char *quote_chars = "\"'`|/;([{<";
+        if (!strchr(quote_chars, *pos))
+            parser_err(ctx, pos, pos+1, "This is not a valid string quotation character. Valid characters are: \"'`|/;([{<");
+        open_quote = *pos;
+        ++pos;
         close_quote = closing[(int)open_quote] ? closing[(int)open_quote] : open_quote;
-        if (open_interp == close_quote)
-            open_interp = '\0';
     } else {
         return NULL;
     }
@@ -1202,16 +1202,9 @@ PARSER(parse_text) {
             }
             ++pos;
             ast_t *interp;
-            if (close_interp) {
-                whitespace(&pos);
-                interp = expect(ctx, interp_start, &pos, parse_expr, "I expected an interpolation expression here");
-                whitespace(&pos);
-                expect_closing(ctx, &pos, (char[]){close_interp, 0}, "I was expecting a '%c' to finish this interpolation", close_interp);
-            } else {
-                if (*pos == ' ' || *pos == '\t')
-                    parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here");
-                interp = expect(ctx, interp_start, &pos, parse_term, "I expected an interpolation term here");
-            }
+            if (*pos == ' ' || *pos == '\t')
+                parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here");
+            interp = expect(ctx, interp_start, &pos, parse_term, "I expected an interpolation term here");
             chunks = new(ast_list_t, .ast=interp, .next=chunks);
             chunk_start = pos;
         } else if (!leading_newline && *pos == open_quote && closing[(int)open_quote]) { // Nested pair begin
@@ -2128,7 +2121,7 @@ PARSER(parse_say) {
     ast_list_t *chunks = NULL;
     CORD chunk = CORD_EMPTY;
     const char *chunk_start = pos;
-    const char open_interp = '{', close_interp = '}';
+    const char open_interp = '$';
     while (pos < ctx->file->text + ctx->file->len) {
         if (*pos == open_interp) { // Interpolation
             const char *interp_start = pos;
@@ -2139,16 +2132,9 @@ PARSER(parse_say) {
             }
             ++pos;
             ast_t *interp;
-            if (close_interp) {
-                whitespace(&pos);
-                interp = expect(ctx, interp_start, &pos, parse_expr, "I expected an interpolation expression here");
-                whitespace(&pos);
-                expect_closing(ctx, &pos, (char[]){close_interp, 0}, "I was expecting a '%c' to finish this interpolation", close_interp);
-            } else {
-                if (*pos == ' ' || *pos == '\t')
-                    parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here");
-                interp = expect(ctx, interp_start, &pos, parse_term, "I expected an interpolation term here");
-            }
+            if (*pos == ' ' || *pos == '\t')
+                parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here");
+            interp = expect(ctx, interp_start, &pos, parse_term, "I expected an interpolation term here");
             chunks = new(ast_list_t, .ast=interp, .next=chunks);
             chunk_start = pos;
         } else if (*pos == '\r' || *pos == '\n') { // Newline
diff --git a/test/arrays.tm b/test/arrays.tm
index c870ce04..76507c5b 100644
--- a/test/arrays.tm
+++ b/test/arrays.tm
@@ -23,7 +23,7 @@ func main():
 
 		str := ""
 		for i,x in arr:
-			str ++= "({i},{x})"
+			str ++= "($i,$x)"
 		>> str
 		= "(1,10)(2,20)(3,30)"
 
@@ -148,7 +148,7 @@ func main():
 		xs := ["A", "B", "C", "D"]
 		for i,x in xs:to(-2):
 			for y in xs:from(i+1):
-				say("{x}{y}")
+				say("$(x)$(y)")
 
 	do:
 		>> nums := [-7, -4, -1, 2, 5]
diff --git a/test/corecursive_func.tm b/test/corecursive_func.tm
index a5c13dde..168300d4 100644
--- a/test/corecursive_func.tm
+++ b/test/corecursive_func.tm
@@ -1,14 +1,14 @@
 func ping(x:Int)->[Text]:
 	if x > 0:
-		return ["ping: {x}"] ++ pong(x-1)
+		return ["ping: $x"] ++ pong(x-1)
 	else:
-		return ["ping: {x}"]
+		return ["ping: $x"]
 
 func pong(x:Int)->[Text]:
 	if x > 0:
-		return ["pong: {x}"] ++ ping(x-1)
+		return ["pong: $x"] ++ ping(x-1)
 	else:
-		return ["pong: {x}"]
+		return ["pong: $x"]
 
 func main():
 	>> ping(3)
diff --git a/test/defer.tm b/test/defer.tm
index 121878b1..deccaa70 100644
--- a/test/defer.tm
+++ b/test/defer.tm
@@ -16,7 +16,7 @@ func main():
 
     for word in ["first", "second", "third"]:
         defer:
-            say("Got {word} deferred")
+            say("Got $word deferred")
 
         if word == "second":
             say("<skipped>")
@@ -27,7 +27,7 @@ func main():
 
         for i in 3:
             defer:
-                say("Inner loop deferred {i}")
+                say("Inner loop deferred $i")
 
             if i == 2:
                 say("<skipped inner>")
diff --git a/test/enums.tm b/test/enums.tm
index 65ef6398..b734d487 100644
--- a/test/enums.tm
+++ b/test/enums.tm
@@ -5,15 +5,15 @@ func choose_text(f:Foo)->Text:
 	when f is Zero:
 		return "Zero"
 	is One(one):
-		return "One: {one}"
+		return "One: $one"
 	is Two(x, y):
-		return "Two: x={x}, y={y}"
+		return "Two: x=$x, y=$y"
 	is Three(three):
-		return "Three: {three}"
+		return "Three: $three"
 	is Four:
 		return "Four"
 	else:
-		return "else: {f}"
+		return "else: $f"
 
 func main():
 	>> Foo.Zero
diff --git a/test/for.tm b/test/for.tm
index a050c892..6ac77be6 100644
--- a/test/for.tm
+++ b/test/for.tm
@@ -2,7 +2,7 @@
 func all_nums(nums:[Int])->Text:
 	result := ""
 	for num in nums:
-		result ++= "{num},"
+		result ++= "$num,"
 	else:
 		return "EMPTY"
 	return result
@@ -10,7 +10,7 @@ func all_nums(nums:[Int])->Text:
 func labeled_nums(nums:[Int])->Text:
 	result := ""
 	for i,num in nums:
-		result ++= "{i}:{num},"
+		result ++= "$i:$num,"
 	else:
 		return "EMPTY"
 	return result
@@ -18,14 +18,14 @@ func labeled_nums(nums:[Int])->Text:
 func table_str(t:{Text:Text})->Text:
 	str := ""
 	for k,v in t:
-		str ++= "{k}:{v},"
+		str ++= "$k:$v,"
 	else: return "EMPTY"
 	return str
 
 func table_key_str(t:{Text:Text})->Text:
 	str := ""
 	for k in t:
-		str ++= "{k},"
+		str ++= "$k,"
 	else: return "EMPTY"
 	return str
 
diff --git a/test/integers.tm b/test/integers.tm
index 36119f01..11603b22 100644
--- a/test/integers.tm
+++ b/test/integers.tm
@@ -28,7 +28,7 @@ func main():
 
 	nums := ""
 	for x in 5:
-		nums ++= "{x},"
+		nums ++= "$x,"
 	>> nums
 	= "1,2,3,4,5,"
 
@@ -79,6 +79,6 @@ func main():
 		for in 20:
 			>> n := Int.random(-999999, 999999)
 			>> d := Int.random(-999, 999)
-			//! n={n}, d={d}:
+			//! n=$n, d=$d:
 			>> (n/d)*d + (n mod d) == n
 			= yes
diff --git a/test/iterators.tm b/test/iterators.tm
index 999194d9..1f7ce342 100644
--- a/test/iterators.tm
+++ b/test/iterators.tm
@@ -19,15 +19,15 @@ func range(first:Int, last:Int)->func()->RangeIteration:
 func main():
     values := ["A", "B", "C", "D"]
 
-    >> ((++) "({foo}{baz})" for foo, baz in pairwise(values))
+    >> ((++) "($(foo)$(baz))" for foo, baz in pairwise(values))
     = "(AB)(BC)(CD)"
-    >> ["{foo}{baz}" for foo, baz in pairwise(values)]
+    >> ["$(foo)$(baz)" for foo, baz in pairwise(values)]
     = ["AB", "BC", "CD"]
 
     do:
         result := [:Text]
         for foo, baz in pairwise(values):
-            result:insert("{foo}{baz}")
+            result:insert("$(foo)$(baz)")
         >> result
         = ["AB", "BC", "CD"]
 
diff --git a/test/lambdas.tm b/test/lambdas.tm
index 40f24abb..8d543bfc 100644
--- a/test/lambdas.tm
+++ b/test/lambdas.tm
@@ -12,7 +12,7 @@ func main():
 	>> add_one(10)
 	= 11
 
-	>> shout := func(msg:Text): say("{msg:upper()}!")
+	>> shout := func(msg:Text): say("$(msg:upper())!")
 	>> shout("hello")
 
 	>> asdf := add_one
@@ -36,7 +36,7 @@ func main():
     fn := func():
         return func():
             return func():
-                defer: //! {outer}
+                defer: //! $outer
                 return outer
     >> fn()()()
 	= "Hello"
diff --git a/test/lang.tm b/test/lang.tm
index 01551e27..dfe1c663 100644
--- a/test/lang.tm
+++ b/test/lang.tm
@@ -1,5 +1,5 @@
 lang HTML:
-	HEADER := $HTML$"<!DOCTYPE HTML>"
+	HEADER := $HTML"<!DOCTYPE HTML>"
 	func escape(t:Text)->HTML:
 		t = t:replace("&", "&amp;")
 		t = t:replace("<", "&lt;")
@@ -9,25 +9,25 @@ lang HTML:
 		return HTML.from_unsafe_text(t)
 
 	func escape_int(i:Int)->HTML:
-		return HTML.from_unsafe_text("{i}")
+		return HTML.from_unsafe_text("$i")
 	
 	func paragraph(content:HTML)->HTML:
-		return $HTML$"<p>$content</p>"
+		return $HTML"<p>$content</p>"
 
 func main():
 	>> HTML.HEADER
 	= $HTML"<!DOCTYPE HTML>"
 
 	>> user := "I <3 hax"
-	>> html := $HTML$"Hello $user!"
+	>> html := $HTML"Hello $user!"
 	= $HTML"Hello I &lt;3 hax!"
-	>> html ++ $HTML$"<br>"
+	>> html ++ $HTML"<br>"
 	= $HTML"Hello I &lt;3 hax!<br>"
 
-	>> $HTML{}"{1 + 2}"
+	>> $HTML"$(1 + 2)"
 	= $HTML"3"
 
-	>> $HTML{}"{3_i8}"
+	>> $HTML"$(3_i8)"
 	= $HTML"3"
 
 	>> html:paragraph()
diff --git a/test/tables.tm b/test/tables.tm
index d02a5272..7f8383d8 100644
--- a/test/tables.tm
+++ b/test/tables.tm
@@ -11,7 +11,7 @@ func main():
 
 	t_str := ""
 	for k,v in t:
-		t_str ++= "({k}:{v})"
+		t_str ++= "($k:$v)"
 	>> t_str
 	= "(one:1)(two:2)"
 
@@ -42,7 +42,7 @@ func main():
 
 	t2_str := ""
 	for k,v in t2:
-		t2_str ++= "({k}:{v})"
+		t2_str ++= "($k:$v)"
 	>> t2_str
 	= "(three:3)"
 
diff --git a/test/text.tm b/test/text.tm
index 4051a16f..2666b6c8 100644
--- a/test/text.tm
+++ b/test/text.tm
@@ -1,6 +1,6 @@
 func main():
 	>> str := "Hello Amélie!"
-	//! Testing strings like {str}
+	//! Testing strings like $str
 
 	>> str:upper()
 	= "HELLO AMÉLIE!"
@@ -19,7 +19,7 @@ func main():
 	>> \UE9 == \U65\U301
 	= yes
 
-	>> amelie := "Am{\UE9}lie"
+	>> amelie := "Am$(\UE9)lie"
 	>> amelie:clusters()
 	= ["A", "m", "é", "l", "i", "e"] : [Text]
 	>> amelie:codepoints()
@@ -35,7 +35,7 @@ func main():
 	>> amelie:num_bytes()
 	= 8
 
-	>> amelie2 := "Am{\U65\U301}lie"
+	>> amelie2 := "Am$(\U65\U301)lie"
 	>> amelie2:clusters()
 	= ["A", "m", "é", "l", "i", "e"] : [Text]
 	>> amelie2:codepoints()
@@ -103,3 +103,19 @@ func main():
 	"
 	= "line one\nline two"
 
+	//! Interpolation tests:
+	>> "A $(1+2)"
+	= "A 3"
+	>> 'A $(1+2)'
+	= "A $(1+2)"
+	>> `A $(1+2)`
+	= "A 3"
+
+	>> $"A $(1+2)"
+	= "A 3"
+	>> $$"A $(1+2)"
+	= "A $(1+2)"
+	>> $="A =(1+2)"
+	= "A 3"
+	>> $(one (nested) two $(1+2))
+	= "one (nested) two 3"
diff --git a/typecheck.c b/typecheck.c
index 3e7084c8..b79b903c 100644
--- a/typecheck.c
+++ b/typecheck.c
@@ -525,7 +525,14 @@ type_t *get_type(env_t *env, ast_t *ast)
     case TextLiteral: return TEXT_TYPE;
     case TextJoin: {
         const char *lang = Match(ast, TextJoin)->lang;
-        return lang ? Match(get_binding(env, lang)->type, TypeInfoType)->type : TEXT_TYPE;
+        if (lang) {
+            binding_t *b = get_binding(env, lang);
+            if (!b || b->type->tag != TypeInfoType || Match(b->type, TypeInfoType)->type->tag != TextType)
+                code_err(ast, "There is no text language called '%s'", lang);
+            return Match(get_binding(env, lang)->type, TypeInfoType)->type;
+        } else {
+            return TEXT_TYPE;
+        }
     }
     case Var: {
         auto var = Match(ast, Var);
-- 
cgit v1.2.3