Improved syntax for dollar-string literals

This commit is contained in:
Bruce Hill 2024-08-18 14:44:15 -04:00
parent 04603308af
commit f4b04a1b8c
14 changed files with 123 additions and 104 deletions

View File

@ -17,6 +17,7 @@ without using printf-style string formatting.
// Basic string:
str := "Hello world"
str2 := 'Also a string'
str3 := `Backticks too`
```
## Line Splits
@ -59,13 +60,18 @@ multi_line := "
## String Interpolations
Inside a double quoted string, you can use curly braces (`{...}`) to insert an
Inside a double quoted string, you can use a dollar sign (`$`) to insert an
expression that you want converted to a string. This is called string
interpolation:
```
// Interpolation:
str := "Sum: {1 + 2}"
my_var := 5
str := "My var is $my_var!"
// Equivalent to "My var is 5!"
// Using parentheses:
str := "Sum: $(1 + 2)"
// equivalent to "Sum: 3"
```
@ -73,7 +79,7 @@ Single-quoted strings do not have interpolations:
```
// No interpolation here:
str := 'Sum: {1 + 2}'
str := 'Sum: $(1 + 2)'
```
## String Escapes
@ -89,11 +95,12 @@ crlf := \r\n
quote := \"
```
These string literals can be used as interpolation values:
These string literals can be used as interpolation values with or without
parentheses, depending on which you find more readable:
```
two_lines := "one{\n}two"
has_quotes := "some {\"}quotes{\"} here"
two_lines := "one$(\n)two"
has_quotes := "some $\"quotes$\" here"
```
However, in general it is best practice to use multi-line strings to avoid these problems:
@ -163,32 +170,35 @@ str := "
>>> str == "{\n}blank lines{\n}"
```
### Advanced $-Strings
### Customizable $-Strings
Sometimes you need to use many `{`s or `"`s inside a string, but you don't want
to type `{\{}` or `{\"}` each time. In such cases, you can use the more
advanced form of strings. The advanced form lets you explicitly specify which
characters are used for interpolation and which characters are used for
opening/closing the string. Advanced strings begin with a dollar sign (`$`),
followed by what interpolation style to use, followed by the character to use
to delimit the string, followed by the string contents and a closing string
delimiter. The interpolation style can be a matching pair (`()`, `[]`, `{}`, or
`<>`) or any other single character. When the interpolation style is a matching
pair, the interpolation is any expression enclosed in that pair (e.g.
`${}"interpolate {1 + 2}"`). When the interpolation style is a single
character, the interpolation must be either a parenthesized expression or a
single term with no infix operators (e.g. a variable), for example:
`$@"Interpolate @var or @(1 + 2)"`.
Sometimes you might need to use a lot of literal `$`s or quotation marks in a
string. In such cases, you can use the more customizable form of strings. The
customizable form lets you explicitly specify which character to use for
interpolation and which characters to use for delimiting the string.
The first character after the `$` is the custom interpolation character, which
can be any of the following symbols: `~!@#$%^&*+=\?`. If none of these
characters is used, the default interpolation character is `$`. Since this is
the default, you can disable interpolation altogether by using `$` here (i.e. a
double `$$`).
The next thing in a customizable string is the character used to delimit the
string. The string delimiter can be any of the following symbols: `` "'`|/;([{< ``
If the string delimiter is one of `([{<`, then the string will continue until a
matching `)]}>` is found, not terminating unless the delimiters are balanced
(i.e. nested pairs of delimiters are considered part of the string).
Here are some examples:
```
$[]"In here, quotes delimit the string and square brackets interpolate: [1 + 2]"
$@"For single-letter interpolations, the interpolation is a single term like @my_var without a closing symbol"
$@"But you can parenthesize expressions like: @(x + y) if you need to"
$$"Double dollars means dollar signs interpolate: $my_var $(1 + 2)"
$${If you have a string with "quotes" and 'single quotes', you can choose something else like curly braces to delimit the string}
$?#Here hashes delimit the string and question marks interpolate: ?(1 + 2)#
$"Equivalent to a normal string with dollar interps: $(1 + 2)"
$@"The same, but the AT symbol interpolates: @(1 + 2)"
$$"No interpolation here, $ is just a literal character"
$|This string is pipe-delimited, so it can have "quotes" and 'single quotes' and interpolates with dollar sign: $(1+2)|
$(This string is parens-delimited, so you can have (nested) parens without ending the string)
$=[This string is square-bracket delimited [which can be nested] and uses equals for interps: =(1 + 2)]
$@/look ma, regex literals!/
```
When strings are delimited by matching pairs (`()`, `[]`, `{}`, or `<>`), they

68
parse.c
View File

@ -1141,7 +1141,8 @@ PARSER(parse_bool) {
}
PARSER(parse_text) {
// ["$" [name] [interp-char [closing-interp-char]]] ('"' ... '"' / "'" ... "'")
// ('"' ... '"' / "'" ... "'" / "`" ... "`")
// "$" [name] [interp-char] quote-char ... close-quote
const char *start = pos;
const char *lang = NULL;
@ -1156,30 +1157,29 @@ PARSER(parse_text) {
return NewAST(ctx->file, start, pos, TextLiteral, .cord=cord);
}
char open_quote, close_quote, open_interp = '\x03', close_interp = '\x02';
if (match(&pos, "\"")) {
open_quote = '"', close_quote = '"', open_interp = '{', close_interp = '}';
} else if (match(&pos, "'")) {
open_quote = '\'', close_quote = '\'';
} else if (match(&pos, "$")) {
char open_quote, close_quote, open_interp = '$';
if (match(&pos, "\"")) { // Double quote
open_quote = '"', close_quote = '"', open_interp = '$';
} else if (match(&pos, "`")) { // Backtick
open_quote = '`', close_quote = '`', open_interp = '$';
} else if (match(&pos, "'")) { // Single quote
open_quote = '\'', close_quote = '\'', open_interp = '\x03';
} else if (match(&pos, "$")) { // Customized strings
lang = get_id(&pos);
if (pos[1] == pos[0]) {
// Disable interp using a double opener: $;;...; or $``text`
open_quote = *pos;
pos += 2;
} else {
// $@"...." or $()"....."
// $"..." or $@"...."
static const char *interp_chars = "~!@#$%^&*+=\\?";
if (match(&pos, "$")) { // Disable interpolation with $
open_interp = '\x03';
} else if (strchr(interp_chars, *pos)) {
open_interp = *pos;
++pos;
close_interp = closing[(int)open_interp];
if (close_interp && *pos == close_interp)
++pos;
open_quote = *pos;
++pos;
}
static const char *quote_chars = "\"'`|/;([{<";
if (!strchr(quote_chars, *pos))
parser_err(ctx, pos, pos+1, "This is not a valid string quotation character. Valid characters are: \"'`|/;([{<");
open_quote = *pos;
++pos;
close_quote = closing[(int)open_quote] ? closing[(int)open_quote] : open_quote;
if (open_interp == close_quote)
open_interp = '\0';
} else {
return NULL;
}
@ -1202,16 +1202,9 @@ PARSER(parse_text) {
}
++pos;
ast_t *interp;
if (close_interp) {
whitespace(&pos);
interp = expect(ctx, interp_start, &pos, parse_expr, "I expected an interpolation expression here");
whitespace(&pos);
expect_closing(ctx, &pos, (char[]){close_interp, 0}, "I was expecting a '%c' to finish this interpolation", close_interp);
} else {
if (*pos == ' ' || *pos == '\t')
parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here");
interp = expect(ctx, interp_start, &pos, parse_term, "I expected an interpolation term here");
}
if (*pos == ' ' || *pos == '\t')
parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here");
interp = expect(ctx, interp_start, &pos, parse_term, "I expected an interpolation term here");
chunks = new(ast_list_t, .ast=interp, .next=chunks);
chunk_start = pos;
} else if (!leading_newline && *pos == open_quote && closing[(int)open_quote]) { // Nested pair begin
@ -2128,7 +2121,7 @@ PARSER(parse_say) {
ast_list_t *chunks = NULL;
CORD chunk = CORD_EMPTY;
const char *chunk_start = pos;
const char open_interp = '{', close_interp = '}';
const char open_interp = '$';
while (pos < ctx->file->text + ctx->file->len) {
if (*pos == open_interp) { // Interpolation
const char *interp_start = pos;
@ -2139,16 +2132,9 @@ PARSER(parse_say) {
}
++pos;
ast_t *interp;
if (close_interp) {
whitespace(&pos);
interp = expect(ctx, interp_start, &pos, parse_expr, "I expected an interpolation expression here");
whitespace(&pos);
expect_closing(ctx, &pos, (char[]){close_interp, 0}, "I was expecting a '%c' to finish this interpolation", close_interp);
} else {
if (*pos == ' ' || *pos == '\t')
parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here");
interp = expect(ctx, interp_start, &pos, parse_term, "I expected an interpolation term here");
}
if (*pos == ' ' || *pos == '\t')
parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here");
interp = expect(ctx, interp_start, &pos, parse_term, "I expected an interpolation term here");
chunks = new(ast_list_t, .ast=interp, .next=chunks);
chunk_start = pos;
} else if (*pos == '\r' || *pos == '\n') { // Newline

View File

@ -23,7 +23,7 @@ func main():
str := ""
for i,x in arr:
str ++= "({i},{x})"
str ++= "($i,$x)"
>> str
= "(1,10)(2,20)(3,30)"
@ -148,7 +148,7 @@ func main():
xs := ["A", "B", "C", "D"]
for i,x in xs:to(-2):
for y in xs:from(i+1):
say("{x}{y}")
say("$(x)$(y)")
do:
>> nums := [-7, -4, -1, 2, 5]

View File

@ -1,14 +1,14 @@
func ping(x:Int)->[Text]:
if x > 0:
return ["ping: {x}"] ++ pong(x-1)
return ["ping: $x"] ++ pong(x-1)
else:
return ["ping: {x}"]
return ["ping: $x"]
func pong(x:Int)->[Text]:
if x > 0:
return ["pong: {x}"] ++ ping(x-1)
return ["pong: $x"] ++ ping(x-1)
else:
return ["pong: {x}"]
return ["pong: $x"]
func main():
>> ping(3)

View File

@ -16,7 +16,7 @@ func main():
for word in ["first", "second", "third"]:
defer:
say("Got {word} deferred")
say("Got $word deferred")
if word == "second":
say("<skipped>")
@ -27,7 +27,7 @@ func main():
for i in 3:
defer:
say("Inner loop deferred {i}")
say("Inner loop deferred $i")
if i == 2:
say("<skipped inner>")

View File

@ -5,15 +5,15 @@ func choose_text(f:Foo)->Text:
when f is Zero:
return "Zero"
is One(one):
return "One: {one}"
return "One: $one"
is Two(x, y):
return "Two: x={x}, y={y}"
return "Two: x=$x, y=$y"
is Three(three):
return "Three: {three}"
return "Three: $three"
is Four:
return "Four"
else:
return "else: {f}"
return "else: $f"
func main():
>> Foo.Zero

View File

@ -2,7 +2,7 @@
func all_nums(nums:[Int])->Text:
result := ""
for num in nums:
result ++= "{num},"
result ++= "$num,"
else:
return "EMPTY"
return result
@ -10,7 +10,7 @@ func all_nums(nums:[Int])->Text:
func labeled_nums(nums:[Int])->Text:
result := ""
for i,num in nums:
result ++= "{i}:{num},"
result ++= "$i:$num,"
else:
return "EMPTY"
return result
@ -18,14 +18,14 @@ func labeled_nums(nums:[Int])->Text:
func table_str(t:{Text:Text})->Text:
str := ""
for k,v in t:
str ++= "{k}:{v},"
str ++= "$k:$v,"
else: return "EMPTY"
return str
func table_key_str(t:{Text:Text})->Text:
str := ""
for k in t:
str ++= "{k},"
str ++= "$k,"
else: return "EMPTY"
return str

View File

@ -28,7 +28,7 @@ func main():
nums := ""
for x in 5:
nums ++= "{x},"
nums ++= "$x,"
>> nums
= "1,2,3,4,5,"
@ -79,6 +79,6 @@ func main():
for in 20:
>> n := Int.random(-999999, 999999)
>> d := Int.random(-999, 999)
//! n={n}, d={d}:
//! n=$n, d=$d:
>> (n/d)*d + (n mod d) == n
= yes

View File

@ -19,15 +19,15 @@ func range(first:Int, last:Int)->func()->RangeIteration:
func main():
values := ["A", "B", "C", "D"]
>> ((++) "({foo}{baz})" for foo, baz in pairwise(values))
>> ((++) "($(foo)$(baz))" for foo, baz in pairwise(values))
= "(AB)(BC)(CD)"
>> ["{foo}{baz}" for foo, baz in pairwise(values)]
>> ["$(foo)$(baz)" for foo, baz in pairwise(values)]
= ["AB", "BC", "CD"]
do:
result := [:Text]
for foo, baz in pairwise(values):
result:insert("{foo}{baz}")
result:insert("$(foo)$(baz)")
>> result
= ["AB", "BC", "CD"]

View File

@ -12,7 +12,7 @@ func main():
>> add_one(10)
= 11
>> shout := func(msg:Text): say("{msg:upper()}!")
>> shout := func(msg:Text): say("$(msg:upper())!")
>> shout("hello")
>> asdf := add_one
@ -36,7 +36,7 @@ func main():
fn := func():
return func():
return func():
defer: //! {outer}
defer: //! $outer
return outer
>> fn()()()
= "Hello"

View File

@ -1,5 +1,5 @@
lang HTML:
HEADER := $HTML$"<!DOCTYPE HTML>"
HEADER := $HTML"<!DOCTYPE HTML>"
func escape(t:Text)->HTML:
t = t:replace("&", "&amp;")
t = t:replace("<", "&lt;")
@ -9,25 +9,25 @@ lang HTML:
return HTML.from_unsafe_text(t)
func escape_int(i:Int)->HTML:
return HTML.from_unsafe_text("{i}")
return HTML.from_unsafe_text("$i")
func paragraph(content:HTML)->HTML:
return $HTML$"<p>$content</p>"
return $HTML"<p>$content</p>"
func main():
>> HTML.HEADER
= $HTML"<!DOCTYPE HTML>"
>> user := "I <3 hax"
>> html := $HTML$"Hello $user!"
>> html := $HTML"Hello $user!"
= $HTML"Hello I &lt;3 hax!"
>> html ++ $HTML$"<br>"
>> html ++ $HTML"<br>"
= $HTML"Hello I &lt;3 hax!<br>"
>> $HTML{}"{1 + 2}"
>> $HTML"$(1 + 2)"
= $HTML"3"
>> $HTML{}"{3_i8}"
>> $HTML"$(3_i8)"
= $HTML"3"
>> html:paragraph()

View File

@ -11,7 +11,7 @@ func main():
t_str := ""
for k,v in t:
t_str ++= "({k}:{v})"
t_str ++= "($k:$v)"
>> t_str
= "(one:1)(two:2)"
@ -42,7 +42,7 @@ func main():
t2_str := ""
for k,v in t2:
t2_str ++= "({k}:{v})"
t2_str ++= "($k:$v)"
>> t2_str
= "(three:3)"

View File

@ -1,6 +1,6 @@
func main():
>> str := "Hello Amélie!"
//! Testing strings like {str}
//! Testing strings like $str
>> str:upper()
= "HELLO AMÉLIE!"
@ -19,7 +19,7 @@ func main():
>> \UE9 == \U65\U301
= yes
>> amelie := "Am{\UE9}lie"
>> amelie := "Am$(\UE9)lie"
>> amelie:clusters()
= ["A", "m", "é", "l", "i", "e"] : [Text]
>> amelie:codepoints()
@ -35,7 +35,7 @@ func main():
>> amelie:num_bytes()
= 8
>> amelie2 := "Am{\U65\U301}lie"
>> amelie2 := "Am$(\U65\U301)lie"
>> amelie2:clusters()
= ["A", "m", "é", "l", "i", "e"] : [Text]
>> amelie2:codepoints()
@ -103,3 +103,19 @@ func main():
"
= "line one\nline two"
//! Interpolation tests:
>> "A $(1+2)"
= "A 3"
>> 'A $(1+2)'
= "A $(1+2)"
>> `A $(1+2)`
= "A 3"
>> $"A $(1+2)"
= "A 3"
>> $$"A $(1+2)"
= "A $(1+2)"
>> $="A =(1+2)"
= "A 3"
>> $(one (nested) two $(1+2))
= "one (nested) two 3"

View File

@ -525,7 +525,14 @@ type_t *get_type(env_t *env, ast_t *ast)
case TextLiteral: return TEXT_TYPE;
case TextJoin: {
const char *lang = Match(ast, TextJoin)->lang;
return lang ? Match(get_binding(env, lang)->type, TypeInfoType)->type : TEXT_TYPE;
if (lang) {
binding_t *b = get_binding(env, lang);
if (!b || b->type->tag != TypeInfoType || Match(b->type, TypeInfoType)->type->tag != TextType)
code_err(ast, "There is no text language called '%s'", lang);
return Match(get_binding(env, lang)->type, TypeInfoType)->type;
} else {
return TEXT_TYPE;
}
}
case Var: {
auto var = Match(ast, Var);