aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2024-08-18 14:44:15 -0400
committerBruce Hill <bruce@bruce-hill.com>2024-08-18 14:44:15 -0400
commitf4b04a1b8cd882e25fee592c819650c9b7e8566b (patch)
treedcecb8b4f83d569ebb00beb79988222d195b8f4c
parent04603308af3a2984d42eaa9e301cac0ffbded2a4 (diff)
Improved syntax for dollar-string literals
-rw-r--r--docs/strings.md64
-rw-r--r--parse.c68
-rw-r--r--test/arrays.tm4
-rw-r--r--test/corecursive_func.tm8
-rw-r--r--test/defer.tm4
-rw-r--r--test/enums.tm8
-rw-r--r--test/for.tm8
-rw-r--r--test/integers.tm4
-rw-r--r--test/iterators.tm6
-rw-r--r--test/lambdas.tm4
-rw-r--r--test/lang.tm14
-rw-r--r--test/tables.tm4
-rw-r--r--test/text.tm22
-rw-r--r--typecheck.c9
14 files changed, 123 insertions, 104 deletions
diff --git a/docs/strings.md b/docs/strings.md
index 343b6bc3..8e740765 100644
--- a/docs/strings.md
+++ b/docs/strings.md
@@ -17,6 +17,7 @@ without using printf-style string formatting.
// Basic string:
str := "Hello world"
str2 := 'Also a string'
+str3 := `Backticks too`
```
## Line Splits
@@ -59,13 +60,18 @@ multi_line := "
## String Interpolations
-Inside a double quoted string, you can use curly braces (`{...}`) to insert an
+Inside a double quoted string, you can use a dollar sign (`$`) to insert an
expression that you want converted to a string. This is called string
interpolation:
```
// Interpolation:
-str := "Sum: {1 + 2}"
+my_var := 5
+str := "My var is $my_var!"
+// Equivalent to "My var is 5!"
+
+// Using parentheses:
+str := "Sum: $(1 + 2)"
// equivalent to "Sum: 3"
```
@@ -73,7 +79,7 @@ Single-quoted strings do not have interpolations:
```
// No interpolation here:
-str := 'Sum: {1 + 2}'
+str := 'Sum: $(1 + 2)'
```
## String Escapes
@@ -89,11 +95,12 @@ crlf := \r\n
quote := \"
```
-These string literals can be used as interpolation values:
+These string literals can be used as interpolation values with or without
+parentheses, depending on which you find more readable:
```
-two_lines := "one{\n}two"
-has_quotes := "some {\"}quotes{\"} here"
+two_lines := "one$(\n)two"
+has_quotes := "some $\"quotes$\" here"
```
However, in general it is best practice to use multi-line strings to avoid these problems:
@@ -163,32 +170,35 @@ str := "
>>> str == "{\n}blank lines{\n}"
```
-### Advanced $-Strings
+### Customizable $-Strings
+
+Sometimes you might need to use a lot of literal `$`s or quotation marks in a
+string. In such cases, you can use the more customizable form of strings. The
+customizable form lets you explicitly specify which character to use for
+interpolation and which characters to use for delimiting the string.
+
+The first character after the `$` is the custom interpolation character, which
+can be any of the following symbols: `~!@#$%^&*+=\?`. If none of these
+characters is used, the default interpolation character is `$`. Since this is
+the default, you can disable interpolation altogether by using `$` here (i.e. a
+double `$$`).
-Sometimes you need to use many `{`s or `"`s inside a string, but you don't want
-to type `{\{}` or `{\"}` each time. In such cases, you can use the more
-advanced form of strings. The advanced form lets you explicitly specify which
-characters are used for interpolation and which characters are used for
-opening/closing the string. Advanced strings begin with a dollar sign (`$`),
-followed by what interpolation style to use, followed by the character to use
-to delimit the string, followed by the string contents and a closing string
-delimiter. The interpolation style can be a matching pair (`()`, `[]`, `{}`, or
-`<>`) or any other single character. When the interpolation style is a matching
-pair, the interpolation is any expression enclosed in that pair (e.g.
-`${}"interpolate {1 + 2}"`). When the interpolation style is a single
-character, the interpolation must be either a parenthesized expression or a
-single term with no infix operators (e.g. a variable), for example:
-`$@"Interpolate @var or @(1 + 2)"`.
+The next thing in a customizable string is the character used to delimit the
+string. The string delimiter can be any of the following symbols: `` "'`|/;([{< ``
+If the string delimiter is one of `([{<`, then the string will continue until a
+matching `)]}>` is found, not terminating unless the delimiters are balanced
+(i.e. nested pairs of delimiters are considered part of the string).
Here are some examples:
```
-$[]"In here, quotes delimit the string and square brackets interpolate: [1 + 2]"
-$@"For single-letter interpolations, the interpolation is a single term like @my_var without a closing symbol"
-$@"But you can parenthesize expressions like: @(x + y) if you need to"
-$$"Double dollars means dollar signs interpolate: $my_var $(1 + 2)"
-$${If you have a string with "quotes" and 'single quotes', you can choose something else like curly braces to delimit the string}
-$?#Here hashes delimit the string and question marks interpolate: ?(1 + 2)#
+$"Equivalent to a normal string with dollar interps: $(1 + 2)"
+$@"The same, but the AT symbol interpolates: @(1 + 2)"
+$$"No interpolation here, $ is just a literal character"
+$|This string is pipe-delimited, so it can have "quotes" and 'single quotes' and interpolates with dollar sign: $(1+2)|
+$(This string is parens-delimited, so you can have (nested) parens without ending the string)
+$=[This string is square-bracket delimited [which can be nested] and uses equals for interps: =(1 + 2)]
+$@/look ma, regex literals!/
```
When strings are delimited by matching pairs (`()`, `[]`, `{}`, or `<>`), they
diff --git a/parse.c b/parse.c
index f8f1a512..740791b5 100644
--- a/parse.c
+++ b/parse.c
@@ -1141,7 +1141,8 @@ PARSER(parse_bool) {
}
PARSER(parse_text) {
- // ["$" [name] [interp-char [closing-interp-char]]] ('"' ... '"' / "'" ... "'")
+ // ('"' ... '"' / "'" ... "'" / "`" ... "`")
+ // "$" [name] [interp-char] quote-char ... close-quote
const char *start = pos;
const char *lang = NULL;
@@ -1156,30 +1157,29 @@ PARSER(parse_text) {
return NewAST(ctx->file, start, pos, TextLiteral, .cord=cord);
}
- char open_quote, close_quote, open_interp = '\x03', close_interp = '\x02';
- if (match(&pos, "\"")) {
- open_quote = '"', close_quote = '"', open_interp = '{', close_interp = '}';
- } else if (match(&pos, "'")) {
- open_quote = '\'', close_quote = '\'';
- } else if (match(&pos, "$")) {
+ char open_quote, close_quote, open_interp = '$';
+ if (match(&pos, "\"")) { // Double quote
+ open_quote = '"', close_quote = '"', open_interp = '$';
+ } else if (match(&pos, "`")) { // Backtick
+ open_quote = '`', close_quote = '`', open_interp = '$';
+ } else if (match(&pos, "'")) { // Single quote
+ open_quote = '\'', close_quote = '\'', open_interp = '\x03';
+ } else if (match(&pos, "$")) { // Customized strings
lang = get_id(&pos);
- if (pos[1] == pos[0]) {
- // Disable interp using a double opener: $;;...; or $``text`
- open_quote = *pos;
- pos += 2;
- } else {
- // $@"...." or $()"....."
+ // $"..." or $@"...."
+ static const char *interp_chars = "~!@#$%^&*+=\\?";
+ if (match(&pos, "$")) { // Disable interpolation with $
+ open_interp = '\x03';
+ } else if (strchr(interp_chars, *pos)) {
open_interp = *pos;
++pos;
- close_interp = closing[(int)open_interp];
- if (close_interp && *pos == close_interp)
- ++pos;
- open_quote = *pos;
- ++pos;
}
+ static const char *quote_chars = "\"'`|/;([{<";
+ if (!strchr(quote_chars, *pos))
+ parser_err(ctx, pos, pos+1, "This is not a valid string quotation character. Valid characters are: \"'`|/;([{<");
+ open_quote = *pos;
+ ++pos;
close_quote = closing[(int)open_quote] ? closing[(int)open_quote] : open_quote;
- if (open_interp == close_quote)
- open_interp = '\0';
} else {
return NULL;
}
@@ -1202,16 +1202,9 @@ PARSER(parse_text) {
}
++pos;
ast_t *interp;
- if (close_interp) {
- whitespace(&pos);
- interp = expect(ctx, interp_start, &pos, parse_expr, "I expected an interpolation expression here");
- whitespace(&pos);
- expect_closing(ctx, &pos, (char[]){close_interp, 0}, "I was expecting a '%c' to finish this interpolation", close_interp);
- } else {
- if (*pos == ' ' || *pos == '\t')
- parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here");
- interp = expect(ctx, interp_start, &pos, parse_term, "I expected an interpolation term here");
- }
+ if (*pos == ' ' || *pos == '\t')
+ parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here");
+ interp = expect(ctx, interp_start, &pos, parse_term, "I expected an interpolation term here");
chunks = new(ast_list_t, .ast=interp, .next=chunks);
chunk_start = pos;
} else if (!leading_newline && *pos == open_quote && closing[(int)open_quote]) { // Nested pair begin
@@ -2128,7 +2121,7 @@ PARSER(parse_say) {
ast_list_t *chunks = NULL;
CORD chunk = CORD_EMPTY;
const char *chunk_start = pos;
- const char open_interp = '{', close_interp = '}';
+ const char open_interp = '$';
while (pos < ctx->file->text + ctx->file->len) {
if (*pos == open_interp) { // Interpolation
const char *interp_start = pos;
@@ -2139,16 +2132,9 @@ PARSER(parse_say) {
}
++pos;
ast_t *interp;
- if (close_interp) {
- whitespace(&pos);
- interp = expect(ctx, interp_start, &pos, parse_expr, "I expected an interpolation expression here");
- whitespace(&pos);
- expect_closing(ctx, &pos, (char[]){close_interp, 0}, "I was expecting a '%c' to finish this interpolation", close_interp);
- } else {
- if (*pos == ' ' || *pos == '\t')
- parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here");
- interp = expect(ctx, interp_start, &pos, parse_term, "I expected an interpolation term here");
- }
+ if (*pos == ' ' || *pos == '\t')
+ parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here");
+ interp = expect(ctx, interp_start, &pos, parse_term, "I expected an interpolation term here");
chunks = new(ast_list_t, .ast=interp, .next=chunks);
chunk_start = pos;
} else if (*pos == '\r' || *pos == '\n') { // Newline
diff --git a/test/arrays.tm b/test/arrays.tm
index c870ce04..76507c5b 100644
--- a/test/arrays.tm
+++ b/test/arrays.tm
@@ -23,7 +23,7 @@ func main():
str := ""
for i,x in arr:
- str ++= "({i},{x})"
+ str ++= "($i,$x)"
>> str
= "(1,10)(2,20)(3,30)"
@@ -148,7 +148,7 @@ func main():
xs := ["A", "B", "C", "D"]
for i,x in xs:to(-2):
for y in xs:from(i+1):
- say("{x}{y}")
+ say("$(x)$(y)")
do:
>> nums := [-7, -4, -1, 2, 5]
diff --git a/test/corecursive_func.tm b/test/corecursive_func.tm
index a5c13dde..168300d4 100644
--- a/test/corecursive_func.tm
+++ b/test/corecursive_func.tm
@@ -1,14 +1,14 @@
func ping(x:Int)->[Text]:
if x > 0:
- return ["ping: {x}"] ++ pong(x-1)
+ return ["ping: $x"] ++ pong(x-1)
else:
- return ["ping: {x}"]
+ return ["ping: $x"]
func pong(x:Int)->[Text]:
if x > 0:
- return ["pong: {x}"] ++ ping(x-1)
+ return ["pong: $x"] ++ ping(x-1)
else:
- return ["pong: {x}"]
+ return ["pong: $x"]
func main():
>> ping(3)
diff --git a/test/defer.tm b/test/defer.tm
index 121878b1..deccaa70 100644
--- a/test/defer.tm
+++ b/test/defer.tm
@@ -16,7 +16,7 @@ func main():
for word in ["first", "second", "third"]:
defer:
- say("Got {word} deferred")
+ say("Got $word deferred")
if word == "second":
say("<skipped>")
@@ -27,7 +27,7 @@ func main():
for i in 3:
defer:
- say("Inner loop deferred {i}")
+ say("Inner loop deferred $i")
if i == 2:
say("<skipped inner>")
diff --git a/test/enums.tm b/test/enums.tm
index 65ef6398..b734d487 100644
--- a/test/enums.tm
+++ b/test/enums.tm
@@ -5,15 +5,15 @@ func choose_text(f:Foo)->Text:
when f is Zero:
return "Zero"
is One(one):
- return "One: {one}"
+ return "One: $one"
is Two(x, y):
- return "Two: x={x}, y={y}"
+ return "Two: x=$x, y=$y"
is Three(three):
- return "Three: {three}"
+ return "Three: $three"
is Four:
return "Four"
else:
- return "else: {f}"
+ return "else: $f"
func main():
>> Foo.Zero
diff --git a/test/for.tm b/test/for.tm
index a050c892..6ac77be6 100644
--- a/test/for.tm
+++ b/test/for.tm
@@ -2,7 +2,7 @@
func all_nums(nums:[Int])->Text:
result := ""
for num in nums:
- result ++= "{num},"
+ result ++= "$num,"
else:
return "EMPTY"
return result
@@ -10,7 +10,7 @@ func all_nums(nums:[Int])->Text:
func labeled_nums(nums:[Int])->Text:
result := ""
for i,num in nums:
- result ++= "{i}:{num},"
+ result ++= "$i:$num,"
else:
return "EMPTY"
return result
@@ -18,14 +18,14 @@ func labeled_nums(nums:[Int])->Text:
func table_str(t:{Text:Text})->Text:
str := ""
for k,v in t:
- str ++= "{k}:{v},"
+ str ++= "$k:$v,"
else: return "EMPTY"
return str
func table_key_str(t:{Text:Text})->Text:
str := ""
for k in t:
- str ++= "{k},"
+ str ++= "$k,"
else: return "EMPTY"
return str
diff --git a/test/integers.tm b/test/integers.tm
index 36119f01..11603b22 100644
--- a/test/integers.tm
+++ b/test/integers.tm
@@ -28,7 +28,7 @@ func main():
nums := ""
for x in 5:
- nums ++= "{x},"
+ nums ++= "$x,"
>> nums
= "1,2,3,4,5,"
@@ -79,6 +79,6 @@ func main():
for in 20:
>> n := Int.random(-999999, 999999)
>> d := Int.random(-999, 999)
- //! n={n}, d={d}:
+ //! n=$n, d=$d:
>> (n/d)*d + (n mod d) == n
= yes
diff --git a/test/iterators.tm b/test/iterators.tm
index 999194d9..1f7ce342 100644
--- a/test/iterators.tm
+++ b/test/iterators.tm
@@ -19,15 +19,15 @@ func range(first:Int, last:Int)->func()->RangeIteration:
func main():
values := ["A", "B", "C", "D"]
- >> ((++) "({foo}{baz})" for foo, baz in pairwise(values))
+ >> ((++) "($(foo)$(baz))" for foo, baz in pairwise(values))
= "(AB)(BC)(CD)"
- >> ["{foo}{baz}" for foo, baz in pairwise(values)]
+ >> ["$(foo)$(baz)" for foo, baz in pairwise(values)]
= ["AB", "BC", "CD"]
do:
result := [:Text]
for foo, baz in pairwise(values):
- result:insert("{foo}{baz}")
+ result:insert("$(foo)$(baz)")
>> result
= ["AB", "BC", "CD"]
diff --git a/test/lambdas.tm b/test/lambdas.tm
index 40f24abb..8d543bfc 100644
--- a/test/lambdas.tm
+++ b/test/lambdas.tm
@@ -12,7 +12,7 @@ func main():
>> add_one(10)
= 11
- >> shout := func(msg:Text): say("{msg:upper()}!")
+ >> shout := func(msg:Text): say("$(msg:upper())!")
>> shout("hello")
>> asdf := add_one
@@ -36,7 +36,7 @@ func main():
fn := func():
return func():
return func():
- defer: //! {outer}
+ defer: //! $outer
return outer
>> fn()()()
= "Hello"
diff --git a/test/lang.tm b/test/lang.tm
index 01551e27..dfe1c663 100644
--- a/test/lang.tm
+++ b/test/lang.tm
@@ -1,5 +1,5 @@
lang HTML:
- HEADER := $HTML$"<!DOCTYPE HTML>"
+ HEADER := $HTML"<!DOCTYPE HTML>"
func escape(t:Text)->HTML:
t = t:replace("&", "&amp;")
t = t:replace("<", "&lt;")
@@ -9,25 +9,25 @@ lang HTML:
return HTML.from_unsafe_text(t)
func escape_int(i:Int)->HTML:
- return HTML.from_unsafe_text("{i}")
+ return HTML.from_unsafe_text("$i")
func paragraph(content:HTML)->HTML:
- return $HTML$"<p>$content</p>"
+ return $HTML"<p>$content</p>"
func main():
>> HTML.HEADER
= $HTML"<!DOCTYPE HTML>"
>> user := "I <3 hax"
- >> html := $HTML$"Hello $user!"
+ >> html := $HTML"Hello $user!"
= $HTML"Hello I &lt;3 hax!"
- >> html ++ $HTML$"<br>"
+ >> html ++ $HTML"<br>"
= $HTML"Hello I &lt;3 hax!<br>"
- >> $HTML{}"{1 + 2}"
+ >> $HTML"$(1 + 2)"
= $HTML"3"
- >> $HTML{}"{3_i8}"
+ >> $HTML"$(3_i8)"
= $HTML"3"
>> html:paragraph()
diff --git a/test/tables.tm b/test/tables.tm
index d02a5272..7f8383d8 100644
--- a/test/tables.tm
+++ b/test/tables.tm
@@ -11,7 +11,7 @@ func main():
t_str := ""
for k,v in t:
- t_str ++= "({k}:{v})"
+ t_str ++= "($k:$v)"
>> t_str
= "(one:1)(two:2)"
@@ -42,7 +42,7 @@ func main():
t2_str := ""
for k,v in t2:
- t2_str ++= "({k}:{v})"
+ t2_str ++= "($k:$v)"
>> t2_str
= "(three:3)"
diff --git a/test/text.tm b/test/text.tm
index 4051a16f..2666b6c8 100644
--- a/test/text.tm
+++ b/test/text.tm
@@ -1,6 +1,6 @@
func main():
>> str := "Hello Amélie!"
- //! Testing strings like {str}
+ //! Testing strings like $str
>> str:upper()
= "HELLO AMÉLIE!"
@@ -19,7 +19,7 @@ func main():
>> \UE9 == \U65\U301
= yes
- >> amelie := "Am{\UE9}lie"
+ >> amelie := "Am$(\UE9)lie"
>> amelie:clusters()
= ["A", "m", "é", "l", "i", "e"] : [Text]
>> amelie:codepoints()
@@ -35,7 +35,7 @@ func main():
>> amelie:num_bytes()
= 8
- >> amelie2 := "Am{\U65\U301}lie"
+ >> amelie2 := "Am$(\U65\U301)lie"
>> amelie2:clusters()
= ["A", "m", "é", "l", "i", "e"] : [Text]
>> amelie2:codepoints()
@@ -103,3 +103,19 @@ func main():
"
= "line one\nline two"
+ //! Interpolation tests:
+ >> "A $(1+2)"
+ = "A 3"
+ >> 'A $(1+2)'
+ = "A $(1+2)"
+ >> `A $(1+2)`
+ = "A 3"
+
+ >> $"A $(1+2)"
+ = "A 3"
+ >> $$"A $(1+2)"
+ = "A $(1+2)"
+ >> $="A =(1+2)"
+ = "A 3"
+ >> $(one (nested) two $(1+2))
+ = "one (nested) two 3"
diff --git a/typecheck.c b/typecheck.c
index 3e7084c8..b79b903c 100644
--- a/typecheck.c
+++ b/typecheck.c
@@ -525,7 +525,14 @@ type_t *get_type(env_t *env, ast_t *ast)
case TextLiteral: return TEXT_TYPE;
case TextJoin: {
const char *lang = Match(ast, TextJoin)->lang;
- return lang ? Match(get_binding(env, lang)->type, TypeInfoType)->type : TEXT_TYPE;
+ if (lang) {
+ binding_t *b = get_binding(env, lang);
+ if (!b || b->type->tag != TypeInfoType || Match(b->type, TypeInfoType)->type->tag != TextType)
+ code_err(ast, "There is no text language called '%s'", lang);
+ return Match(get_binding(env, lang)->type, TypeInfoType)->type;
+ } else {
+ return TEXT_TYPE;
+ }
}
case Var: {
auto var = Match(ast, Var);