From f4b04a1b8cd882e25fee592c819650c9b7e8566b Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Sun, 18 Aug 2024 14:44:15 -0400 Subject: Improved syntax for dollar-string literals --- docs/strings.md | 64 ++++++++++++++++++++++++++------------------- parse.c | 68 +++++++++++++++++++----------------------------- test/arrays.tm | 4 +-- test/corecursive_func.tm | 8 +++--- test/defer.tm | 4 +-- test/enums.tm | 8 +++--- test/for.tm | 8 +++--- test/integers.tm | 4 +-- test/iterators.tm | 6 ++--- test/lambdas.tm | 4 +-- test/lang.tm | 14 +++++----- test/tables.tm | 4 +-- test/text.tm | 22 +++++++++++++--- typecheck.c | 9 ++++++- 14 files changed, 123 insertions(+), 104 deletions(-) diff --git a/docs/strings.md b/docs/strings.md index 343b6bc3..8e740765 100644 --- a/docs/strings.md +++ b/docs/strings.md @@ -17,6 +17,7 @@ without using printf-style string formatting. // Basic string: str := "Hello world" str2 := 'Also a string' +str3 := `Backticks too` ``` ## Line Splits @@ -59,13 +60,18 @@ multi_line := " ## String Interpolations -Inside a double quoted string, you can use curly braces (`{...}`) to insert an +Inside a double quoted string, you can use a dollar sign (`$`) to insert an expression that you want converted to a string. This is called string interpolation: ``` // Interpolation: -str := "Sum: {1 + 2}" +my_var := 5 +str := "My var is $my_var!" +// Equivalent to "My var is 5!" + +// Using parentheses: +str := "Sum: $(1 + 2)" // equivalent to "Sum: 3" ``` @@ -73,7 +79,7 @@ Single-quoted strings do not have interpolations: ``` // No interpolation here: -str := 'Sum: {1 + 2}' +str := 'Sum: $(1 + 2)' ``` ## String Escapes @@ -89,11 +95,12 @@ crlf := \r\n quote := \" ``` -These string literals can be used as interpolation values: +These string literals can be used as interpolation values with or without +parentheses, depending on which you find more readable: ``` -two_lines := "one{\n}two" -has_quotes := "some {\"}quotes{\"} here" +two_lines := "one$(\n)two" +has_quotes := "some $\"quotes$\" here" ``` However, in general it is best practice to use multi-line strings to avoid these problems: @@ -163,32 +170,35 @@ str := " >>> str == "{\n}blank lines{\n}" ``` -### Advanced $-Strings +### Customizable $-Strings + +Sometimes you might need to use a lot of literal `$`s or quotation marks in a +string. In such cases, you can use the more customizable form of strings. The +customizable form lets you explicitly specify which character to use for +interpolation and which characters to use for delimiting the string. + +The first character after the `$` is the custom interpolation character, which +can be any of the following symbols: `~!@#$%^&*+=\?`. If none of these +characters is used, the default interpolation character is `$`. Since this is +the default, you can disable interpolation altogether by using `$` here (i.e. a +double `$$`). -Sometimes you need to use many `{`s or `"`s inside a string, but you don't want -to type `{\{}` or `{\"}` each time. In such cases, you can use the more -advanced form of strings. The advanced form lets you explicitly specify which -characters are used for interpolation and which characters are used for -opening/closing the string. Advanced strings begin with a dollar sign (`$`), -followed by what interpolation style to use, followed by the character to use -to delimit the string, followed by the string contents and a closing string -delimiter. The interpolation style can be a matching pair (`()`, `[]`, `{}`, or -`<>`) or any other single character. When the interpolation style is a matching -pair, the interpolation is any expression enclosed in that pair (e.g. -`${}"interpolate {1 + 2}"`). When the interpolation style is a single -character, the interpolation must be either a parenthesized expression or a -single term with no infix operators (e.g. a variable), for example: -`$@"Interpolate @var or @(1 + 2)"`. +The next thing in a customizable string is the character used to delimit the +string. The string delimiter can be any of the following symbols: `` "'`|/;([{< `` +If the string delimiter is one of `([{<`, then the string will continue until a +matching `)]}>` is found, not terminating unless the delimiters are balanced +(i.e. nested pairs of delimiters are considered part of the string). Here are some examples: ``` -$[]"In here, quotes delimit the string and square brackets interpolate: [1 + 2]" -$@"For single-letter interpolations, the interpolation is a single term like @my_var without a closing symbol" -$@"But you can parenthesize expressions like: @(x + y) if you need to" -$$"Double dollars means dollar signs interpolate: $my_var $(1 + 2)" -$${If you have a string with "quotes" and 'single quotes', you can choose something else like curly braces to delimit the string} -$?#Here hashes delimit the string and question marks interpolate: ?(1 + 2)# +$"Equivalent to a normal string with dollar interps: $(1 + 2)" +$@"The same, but the AT symbol interpolates: @(1 + 2)" +$$"No interpolation here, $ is just a literal character" +$|This string is pipe-delimited, so it can have "quotes" and 'single quotes' and interpolates with dollar sign: $(1+2)| +$(This string is parens-delimited, so you can have (nested) parens without ending the string) +$=[This string is square-bracket delimited [which can be nested] and uses equals for interps: =(1 + 2)] +$@/look ma, regex literals!/ ``` When strings are delimited by matching pairs (`()`, `[]`, `{}`, or `<>`), they diff --git a/parse.c b/parse.c index f8f1a512..740791b5 100644 --- a/parse.c +++ b/parse.c @@ -1141,7 +1141,8 @@ PARSER(parse_bool) { } PARSER(parse_text) { - // ["$" [name] [interp-char [closing-interp-char]]] ('"' ... '"' / "'" ... "'") + // ('"' ... '"' / "'" ... "'" / "`" ... "`") + // "$" [name] [interp-char] quote-char ... close-quote const char *start = pos; const char *lang = NULL; @@ -1156,30 +1157,29 @@ PARSER(parse_text) { return NewAST(ctx->file, start, pos, TextLiteral, .cord=cord); } - char open_quote, close_quote, open_interp = '\x03', close_interp = '\x02'; - if (match(&pos, "\"")) { - open_quote = '"', close_quote = '"', open_interp = '{', close_interp = '}'; - } else if (match(&pos, "'")) { - open_quote = '\'', close_quote = '\''; - } else if (match(&pos, "$")) { + char open_quote, close_quote, open_interp = '$'; + if (match(&pos, "\"")) { // Double quote + open_quote = '"', close_quote = '"', open_interp = '$'; + } else if (match(&pos, "`")) { // Backtick + open_quote = '`', close_quote = '`', open_interp = '$'; + } else if (match(&pos, "'")) { // Single quote + open_quote = '\'', close_quote = '\'', open_interp = '\x03'; + } else if (match(&pos, "$")) { // Customized strings lang = get_id(&pos); - if (pos[1] == pos[0]) { - // Disable interp using a double opener: $;;...; or $``text` - open_quote = *pos; - pos += 2; - } else { - // $@"...." or $()"....." + // $"..." or $@"...." + static const char *interp_chars = "~!@#$%^&*+=\\?"; + if (match(&pos, "$")) { // Disable interpolation with $ + open_interp = '\x03'; + } else if (strchr(interp_chars, *pos)) { open_interp = *pos; ++pos; - close_interp = closing[(int)open_interp]; - if (close_interp && *pos == close_interp) - ++pos; - open_quote = *pos; - ++pos; } + static const char *quote_chars = "\"'`|/;([{<"; + if (!strchr(quote_chars, *pos)) + parser_err(ctx, pos, pos+1, "This is not a valid string quotation character. Valid characters are: \"'`|/;([{<"); + open_quote = *pos; + ++pos; close_quote = closing[(int)open_quote] ? closing[(int)open_quote] : open_quote; - if (open_interp == close_quote) - open_interp = '\0'; } else { return NULL; } @@ -1202,16 +1202,9 @@ PARSER(parse_text) { } ++pos; ast_t *interp; - if (close_interp) { - whitespace(&pos); - interp = expect(ctx, interp_start, &pos, parse_expr, "I expected an interpolation expression here"); - whitespace(&pos); - expect_closing(ctx, &pos, (char[]){close_interp, 0}, "I was expecting a '%c' to finish this interpolation", close_interp); - } else { - if (*pos == ' ' || *pos == '\t') - parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here"); - interp = expect(ctx, interp_start, &pos, parse_term, "I expected an interpolation term here"); - } + if (*pos == ' ' || *pos == '\t') + parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here"); + interp = expect(ctx, interp_start, &pos, parse_term, "I expected an interpolation term here"); chunks = new(ast_list_t, .ast=interp, .next=chunks); chunk_start = pos; } else if (!leading_newline && *pos == open_quote && closing[(int)open_quote]) { // Nested pair begin @@ -2128,7 +2121,7 @@ PARSER(parse_say) { ast_list_t *chunks = NULL; CORD chunk = CORD_EMPTY; const char *chunk_start = pos; - const char open_interp = '{', close_interp = '}'; + const char open_interp = '$'; while (pos < ctx->file->text + ctx->file->len) { if (*pos == open_interp) { // Interpolation const char *interp_start = pos; @@ -2139,16 +2132,9 @@ PARSER(parse_say) { } ++pos; ast_t *interp; - if (close_interp) { - whitespace(&pos); - interp = expect(ctx, interp_start, &pos, parse_expr, "I expected an interpolation expression here"); - whitespace(&pos); - expect_closing(ctx, &pos, (char[]){close_interp, 0}, "I was expecting a '%c' to finish this interpolation", close_interp); - } else { - if (*pos == ' ' || *pos == '\t') - parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here"); - interp = expect(ctx, interp_start, &pos, parse_term, "I expected an interpolation term here"); - } + if (*pos == ' ' || *pos == '\t') + parser_err(ctx, pos, pos+1, "Whitespace is not allowed before an interpolation here"); + interp = expect(ctx, interp_start, &pos, parse_term, "I expected an interpolation term here"); chunks = new(ast_list_t, .ast=interp, .next=chunks); chunk_start = pos; } else if (*pos == '\r' || *pos == '\n') { // Newline diff --git a/test/arrays.tm b/test/arrays.tm index c870ce04..76507c5b 100644 --- a/test/arrays.tm +++ b/test/arrays.tm @@ -23,7 +23,7 @@ func main(): str := "" for i,x in arr: - str ++= "({i},{x})" + str ++= "($i,$x)" >> str = "(1,10)(2,20)(3,30)" @@ -148,7 +148,7 @@ func main(): xs := ["A", "B", "C", "D"] for i,x in xs:to(-2): for y in xs:from(i+1): - say("{x}{y}") + say("$(x)$(y)") do: >> nums := [-7, -4, -1, 2, 5] diff --git a/test/corecursive_func.tm b/test/corecursive_func.tm index a5c13dde..168300d4 100644 --- a/test/corecursive_func.tm +++ b/test/corecursive_func.tm @@ -1,14 +1,14 @@ func ping(x:Int)->[Text]: if x > 0: - return ["ping: {x}"] ++ pong(x-1) + return ["ping: $x"] ++ pong(x-1) else: - return ["ping: {x}"] + return ["ping: $x"] func pong(x:Int)->[Text]: if x > 0: - return ["pong: {x}"] ++ ping(x-1) + return ["pong: $x"] ++ ping(x-1) else: - return ["pong: {x}"] + return ["pong: $x"] func main(): >> ping(3) diff --git a/test/defer.tm b/test/defer.tm index 121878b1..deccaa70 100644 --- a/test/defer.tm +++ b/test/defer.tm @@ -16,7 +16,7 @@ func main(): for word in ["first", "second", "third"]: defer: - say("Got {word} deferred") + say("Got $word deferred") if word == "second": say("") @@ -27,7 +27,7 @@ func main(): for i in 3: defer: - say("Inner loop deferred {i}") + say("Inner loop deferred $i") if i == 2: say("") diff --git a/test/enums.tm b/test/enums.tm index 65ef6398..b734d487 100644 --- a/test/enums.tm +++ b/test/enums.tm @@ -5,15 +5,15 @@ func choose_text(f:Foo)->Text: when f is Zero: return "Zero" is One(one): - return "One: {one}" + return "One: $one" is Two(x, y): - return "Two: x={x}, y={y}" + return "Two: x=$x, y=$y" is Three(three): - return "Three: {three}" + return "Three: $three" is Four: return "Four" else: - return "else: {f}" + return "else: $f" func main(): >> Foo.Zero diff --git a/test/for.tm b/test/for.tm index a050c892..6ac77be6 100644 --- a/test/for.tm +++ b/test/for.tm @@ -2,7 +2,7 @@ func all_nums(nums:[Int])->Text: result := "" for num in nums: - result ++= "{num}," + result ++= "$num," else: return "EMPTY" return result @@ -10,7 +10,7 @@ func all_nums(nums:[Int])->Text: func labeled_nums(nums:[Int])->Text: result := "" for i,num in nums: - result ++= "{i}:{num}," + result ++= "$i:$num," else: return "EMPTY" return result @@ -18,14 +18,14 @@ func labeled_nums(nums:[Int])->Text: func table_str(t:{Text:Text})->Text: str := "" for k,v in t: - str ++= "{k}:{v}," + str ++= "$k:$v," else: return "EMPTY" return str func table_key_str(t:{Text:Text})->Text: str := "" for k in t: - str ++= "{k}," + str ++= "$k," else: return "EMPTY" return str diff --git a/test/integers.tm b/test/integers.tm index 36119f01..11603b22 100644 --- a/test/integers.tm +++ b/test/integers.tm @@ -28,7 +28,7 @@ func main(): nums := "" for x in 5: - nums ++= "{x}," + nums ++= "$x," >> nums = "1,2,3,4,5," @@ -79,6 +79,6 @@ func main(): for in 20: >> n := Int.random(-999999, 999999) >> d := Int.random(-999, 999) - //! n={n}, d={d}: + //! n=$n, d=$d: >> (n/d)*d + (n mod d) == n = yes diff --git a/test/iterators.tm b/test/iterators.tm index 999194d9..1f7ce342 100644 --- a/test/iterators.tm +++ b/test/iterators.tm @@ -19,15 +19,15 @@ func range(first:Int, last:Int)->func()->RangeIteration: func main(): values := ["A", "B", "C", "D"] - >> ((++) "({foo}{baz})" for foo, baz in pairwise(values)) + >> ((++) "($(foo)$(baz))" for foo, baz in pairwise(values)) = "(AB)(BC)(CD)" - >> ["{foo}{baz}" for foo, baz in pairwise(values)] + >> ["$(foo)$(baz)" for foo, baz in pairwise(values)] = ["AB", "BC", "CD"] do: result := [:Text] for foo, baz in pairwise(values): - result:insert("{foo}{baz}") + result:insert("$(foo)$(baz)") >> result = ["AB", "BC", "CD"] diff --git a/test/lambdas.tm b/test/lambdas.tm index 40f24abb..8d543bfc 100644 --- a/test/lambdas.tm +++ b/test/lambdas.tm @@ -12,7 +12,7 @@ func main(): >> add_one(10) = 11 - >> shout := func(msg:Text): say("{msg:upper()}!") + >> shout := func(msg:Text): say("$(msg:upper())!") >> shout("hello") >> asdf := add_one @@ -36,7 +36,7 @@ func main(): fn := func(): return func(): return func(): - defer: //! {outer} + defer: //! $outer return outer >> fn()()() = "Hello" diff --git a/test/lang.tm b/test/lang.tm index 01551e27..dfe1c663 100644 --- a/test/lang.tm +++ b/test/lang.tm @@ -1,5 +1,5 @@ lang HTML: - HEADER := $HTML$"" + HEADER := $HTML"" func escape(t:Text)->HTML: t = t:replace("&", "&") t = t:replace("<", "<") @@ -9,25 +9,25 @@ lang HTML: return HTML.from_unsafe_text(t) func escape_int(i:Int)->HTML: - return HTML.from_unsafe_text("{i}") + return HTML.from_unsafe_text("$i") func paragraph(content:HTML)->HTML: - return $HTML$"

$content

" + return $HTML"

$content

" func main(): >> HTML.HEADER = $HTML"" >> user := "I <3 hax" - >> html := $HTML$"Hello $user!" + >> html := $HTML"Hello $user!" = $HTML"Hello I <3 hax!" - >> html ++ $HTML$"
" + >> html ++ $HTML"
" = $HTML"Hello I <3 hax!
" - >> $HTML{}"{1 + 2}" + >> $HTML"$(1 + 2)" = $HTML"3" - >> $HTML{}"{3_i8}" + >> $HTML"$(3_i8)" = $HTML"3" >> html:paragraph() diff --git a/test/tables.tm b/test/tables.tm index d02a5272..7f8383d8 100644 --- a/test/tables.tm +++ b/test/tables.tm @@ -11,7 +11,7 @@ func main(): t_str := "" for k,v in t: - t_str ++= "({k}:{v})" + t_str ++= "($k:$v)" >> t_str = "(one:1)(two:2)" @@ -42,7 +42,7 @@ func main(): t2_str := "" for k,v in t2: - t2_str ++= "({k}:{v})" + t2_str ++= "($k:$v)" >> t2_str = "(three:3)" diff --git a/test/text.tm b/test/text.tm index 4051a16f..2666b6c8 100644 --- a/test/text.tm +++ b/test/text.tm @@ -1,6 +1,6 @@ func main(): >> str := "Hello Amélie!" - //! Testing strings like {str} + //! Testing strings like $str >> str:upper() = "HELLO AMÉLIE!" @@ -19,7 +19,7 @@ func main(): >> \UE9 == \U65\U301 = yes - >> amelie := "Am{\UE9}lie" + >> amelie := "Am$(\UE9)lie" >> amelie:clusters() = ["A", "m", "é", "l", "i", "e"] : [Text] >> amelie:codepoints() @@ -35,7 +35,7 @@ func main(): >> amelie:num_bytes() = 8 - >> amelie2 := "Am{\U65\U301}lie" + >> amelie2 := "Am$(\U65\U301)lie" >> amelie2:clusters() = ["A", "m", "é", "l", "i", "e"] : [Text] >> amelie2:codepoints() @@ -103,3 +103,19 @@ func main(): " = "line one\nline two" + //! Interpolation tests: + >> "A $(1+2)" + = "A 3" + >> 'A $(1+2)' + = "A $(1+2)" + >> `A $(1+2)` + = "A 3" + + >> $"A $(1+2)" + = "A 3" + >> $$"A $(1+2)" + = "A $(1+2)" + >> $="A =(1+2)" + = "A 3" + >> $(one (nested) two $(1+2)) + = "one (nested) two 3" diff --git a/typecheck.c b/typecheck.c index 3e7084c8..b79b903c 100644 --- a/typecheck.c +++ b/typecheck.c @@ -525,7 +525,14 @@ type_t *get_type(env_t *env, ast_t *ast) case TextLiteral: return TEXT_TYPE; case TextJoin: { const char *lang = Match(ast, TextJoin)->lang; - return lang ? Match(get_binding(env, lang)->type, TypeInfoType)->type : TEXT_TYPE; + if (lang) { + binding_t *b = get_binding(env, lang); + if (!b || b->type->tag != TypeInfoType || Match(b->type, TypeInfoType)->type->tag != TextType) + code_err(ast, "There is no text language called '%s'", lang); + return Match(get_binding(env, lang)->type, TypeInfoType)->type; + } else { + return TEXT_TYPE; + } } case Var: { auto var = Match(ast, Var); -- cgit v1.2.3