Syntax overhaul (comments back to `#`, print statments to `!!`),

using `$/.../` for patterns and using a DSL for patterns
author: Bruce Hill <bruce@bruce-hill.com> 2024-09-03 13:19:41 -0400
committer: Bruce Hill <bruce@bruce-hill.com> 2024-09-03 13:19:41 -0400
commit: 64143f0a131a053414e4b73c17bff994522b11c2 (patch)
tree: 2545507fde623f8846bf183388acdbb0234b5e65
parent: 5feecff9d93522002c74a1423d138c2aa8bc150d (diff)
13 files changed, 344 insertions, 200 deletions
diff --git a/builtins/datatypes.h b/builtins/datatypes.h
index 433e1dd9..1311797c 100644
--- a/builtins/datatypes.h
+++ b/builtins/datatypes.h
@@ -86,4 +86,6 @@ typedef struct Text_s {
     };
 } Text_t;
 
+#define Pattern_t Text_t
+
 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/text.c b/builtins/text.c
index 99d17577..d9da1248 100644
--- a/builtins/text.c
+++ b/builtins/text.c
@@ -1034,7 +1034,7 @@ int64_t match_uri(Text_t text, int64_t text_index)
     return text_index - start_index;
 }
 
-int64_t match(Text_t text, Text_t pattern, int64_t text_index, int64_t pattern_index)
+int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t pattern_index)
 {
     if (pattern_index >= pattern.length) return 0;
     int64_t start_index = text_index;
@@ -1306,7 +1306,7 @@ int64_t match(Text_t text, Text_t pattern, int64_t text_index, int64_t pattern_i
 #undef EAT1
 #undef EAT_MANY
 
-public Int_t Text$find(Text_t text, Text_t pattern, Int_t from_index, int64_t *match_length)
+public Int_t Text$find(Text_t text, Pattern_t pattern, Int_t from_index, int64_t *match_length)
 {
     int64_t first = Int_to_Int64(from_index, false);
     if (first == 0) fail("Invalid index: 0");
@@ -1340,7 +1340,7 @@ public Int_t Text$find(Text_t text, Text_t pattern, Int_t from_index, int64_t *m
     return I(0);
 }
 
-public bool Text$has(Text_t text, Text_t pattern)
+public bool Text$has(Text_t text, Pattern_t pattern)
 {
     return !I_is_zero(Text$find(text, pattern, I_small(1), NULL));
 }
@@ -1363,26 +1363,17 @@ public int printf_text(FILE *stream, const struct printf_info *info, const void
         return Text$print(stream, t);
 }
 
-public Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info)
-{
-    (void)info;
-    if (!text) return info && info->TextInfo.lang ? Text$from_str(info->TextInfo.lang) : Text$from_str("Text");
-    Text_t as_text = Text$quoted(*(Text_t*)text, colorize);
-    if (info && info->TextInfo.lang && info != &$Text)
-        as_text = Text$concat(Text$from_str(colorize ? "\x1b[1m$" : "$"), Text$from_str(info->TextInfo.lang), as_text);
-    return as_text;
-}
-
-public Text_t Text$quoted(Text_t text, bool colorize)
+static inline Text_t _quoted(Text_t text, bool colorize, char quote_char)
 {
     // TODO: optimize for ASCII and short strings
     array_t graphemes = {.atomic=1};
 #define add_char(c) Array$insert_value(&graphemes, (uint32_t)c, I_small(0), sizeof(uint32_t))
 #define add_str(s) ({ for (char *_c = s; *_c; ++_c) Array$insert_value(&graphemes, (uint32_t)*_c, I_small(0), sizeof(uint32_t)); })
     if (colorize)
-        add_str("\x1b[35m\"");
-    else
-        add_char('"');
+        add_str("\x1b[35m");
+    if (quote_char != '"' && quote_char != '\"' && quote_char != '`')
+        add_char('$');
+    add_char(quote_char);
 
 #define add_escaped(str) ({ if (colorize) add_str("\x1b[34;1m"); add_char('\\'); add_str(str); if (colorize) add_str("\x1b[0;35m"); })
     iteration_state_t state = {0, 0};
@@ -1397,7 +1388,6 @@ public Text_t Text$quoted(Text_t text, bool colorize)
         case '\r': add_escaped("r"); break;
         case '\t': add_escaped("t"); break;
         case '\v': add_escaped("v"); break;
-        case '"': add_escaped("\""); break;
         case '\\': add_escaped("\\"); break;
         case '\x00' ... '\x06': case '\x0E' ... '\x1A':
         case '\x1C' ... '\x1F': case '\x7F' ... '\x7F': {
@@ -1411,14 +1401,19 @@ public Text_t Text$quoted(Text_t text, bool colorize)
                 add_str("\x1b[0;35m");
             break;
         }
-        default: add_char(g); break;
+        default: {
+            if (g == quote_char)
+                add_escaped(((char[2]){quote_char, 0}));
+            else
+               add_char(g);
+            break;
+        }
         }
     }
 
+    add_char(quote_char);
     if (colorize)
-        add_str("\"\x1b[m");
-    else
-        add_char('"');
+        add_str("\x1b[m");
 
     return (Text_t){.length=graphemes.length, .tag=TEXT_GRAPHEMES, .graphemes=graphemes.data};
 #undef add_str
@@ -1426,7 +1421,22 @@ public Text_t Text$quoted(Text_t text, bool colorize)
 #undef add_escaped
 }
 
-public array_t Text$find_all(Text_t text, Text_t pattern)
+public Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info)
+{
+    (void)info;
+    if (!text) return info && info->TextInfo.lang ? Text$from_str(info->TextInfo.lang) : Text$from_str("Text");
+    Text_t as_text = _quoted(*(Text_t*)text, colorize, info == &Pattern ? '/' : '"');
+    if (info && info->TextInfo.lang && info != &$Text && info != &Pattern)
+        as_text = Text$concat(Text$from_str(colorize ? "\x1b[1m$" : "$"), Text$from_str(info->TextInfo.lang), as_text);
+    return as_text;
+}
+
+public Text_t Text$quoted(Text_t text, bool colorize)
+{
+    return _quoted(text, colorize, '"');
+}
+
+public array_t Text$find_all(Text_t text, Pattern_t pattern)
 {
     if (pattern.length == 0) // special case
         return (array_t){.length=0};
@@ -1446,7 +1456,7 @@ public array_t Text$find_all(Text_t text, Text_t pattern)
     return matches;
 }
 
-public Text_t Text$replace(Text_t text, Text_t pattern, Text_t replacement)
+public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement)
 {
     Text_t ret = {.length=0};
 
@@ -1470,7 +1480,7 @@ public Text_t Text$replace(Text_t text, Text_t pattern, Text_t replacement)
     return ret;
 }
 
-public array_t Text$split(Text_t text, Text_t pattern)
+public array_t Text$split(Text_t text, Pattern_t pattern)
 {
     if (text.length == 0) // special case
         return (array_t){.length=0};
@@ -1656,4 +1666,42 @@ public const TypeInfo $Text = {
     .TextInfo={.lang="Text"},
 };
 
+public Pattern_t Pattern$escape_text(Text_t text)
+{
+    // TODO: optimize for ASCII and short strings
+    array_t graphemes = {.atomic=1};
+#define add_char(c) Array$insert_value(&graphemes, (uint32_t)c, I_small(0), sizeof(uint32_t))
+#define add_str(s) ({ for (char *_c = s; *_c; ++_c) Array$insert_value(&graphemes, (uint32_t)*_c, I_small(0), sizeof(uint32_t)); })
+    iteration_state_t state = {0, 0};
+    for (int64_t i = 0; i < text.length; i++) {
+        int32_t g = _next_grapheme(text, &state, i);
+        uint32_t g0 = g < 0 ? synthetic_graphemes[-g-1].codepoints[0] : (uint32_t)g;
+
+        if (g == '[') {
+            add_str("[..1[]");
+        } else if (uc_is_property_quotation_mark(g0)) {
+            add_str("[..1");
+            add_char(g);
+            add_char(']');
+        } else if (uc_is_property_paired_punctuation(g0)) {
+            add_str("[..1");
+            add_char(g);
+            add_char(']');
+        } else {
+            add_char(g);
+        }
+    }
+    return (Text_t){.length=graphemes.length, .tag=TEXT_GRAPHEMES, .graphemes=graphemes.data};
+#undef add_str
+#undef add_char
+#undef add_escaped
+}
+
+public const TypeInfo Pattern = {
+    .size=sizeof(Text_t),
+    .align=__alignof__(Text_t),
+    .tag=TextInfo,
+    .TextInfo={.lang="Pattern"},
+};
+
 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/text.h b/builtins/text.h
index b3cb6d79..edf3dc9f 100644
--- a/builtins/text.h
+++ b/builtins/text.h
@@ -29,11 +29,11 @@ Text_t Text$lower(Text_t text);
 Text_t Text$title(Text_t text);
 Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info);
 Text_t Text$quoted(Text_t str, bool colorize);
-Text_t Text$replace(Text_t str, Text_t pat, Text_t replacement);
-array_t Text$split(Text_t text, Text_t pattern);
-Int_t Text$find(Text_t text, Text_t pattern, Int_t i, int64_t *match_length);
-array_t Text$find_all(Text_t text, Text_t pattern);
-bool Text$has(Text_t text, Text_t pattern);
+Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement);
+array_t Text$split(Text_t text, Pattern_t pattern);
+Int_t Text$find(Text_t text, Pattern_t pattern, Int_t i, int64_t *match_length);
+array_t Text$find_all(Text_t text, Pattern_t pattern);
+bool Text$has(Text_t text, Pattern_t pattern);
 const char *Text$as_c_string(Text_t text);
 public Text_t Text$format(const char *fmt, ...);
 array_t Text$clusters(Text_t text);
@@ -48,4 +48,7 @@ Text_t Text$join(Text_t glue, array_t pieces);
 
 extern const TypeInfo $Text;
 
+Pattern_t Pattern$escape_text(Text_t text);
+extern const TypeInfo Pattern;
+
 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/docs/text.md b/docs/text.md
index 5d399edd..855c3c6c 100644
--- a/docs/text.md
+++ b/docs/text.md
@@ -262,15 +262,20 @@ Text codebase is around 1.5K lines of code).
 For more advanced use cases, consider linking against a C library for regular
 expressions or pattern matching.
 
+`Pattern` is a [domain-specific language](docs/langs.md), in other words, it's
+like a `Text`, but it has a distinct type. As a convenience, you can use
+`$/.../` to write pattern literals instead of using the general-purpose DSL
+syntax of `$Pattern"..."`.
+
 Patterns are used in a small, but very powerful API that handles many text
 functions that would normally be handled by a more extensive API:
 
 ```
-Text.find(pattern:Text, start=1, length=!&Int64?)->Int
-Text.find_all(pattern:Text)->[Text]
-Text.split(pattern:Text)->[Text]
-Text.replace(pattern:Text, replacement:Text)->[Text]
-Text.has(pattern:Text)->Bool
+Text.find(pattern:Pattern, start=1, length=!&Int64?)->Int
+Text.find_all(pattern:Pattern)->[Text]
+Text.split(pattern:Pattern)->[Text]
+Text.replace(pattern:Pattern, replacement:Text)->[Text]
+Text.has(pattern:Pattern)->Bool
 ```
 
 See [Text Functions](#Text-Functions) for the full API documentation.
@@ -330,6 +335,41 @@ If an exclamation mark (`!`) is placed before a pattern's name, then characters
 are matched only when they _don't_ match the pattern. For example, `[..!alpha]`
 will match all characters _except_ alphabetic ones.
 
+## Interpolating Text and Escaping
+
+To escape a character in a pattern (e.g. if you want to match the literal
+character `?`), you can use the syntax `[..1 ?]`. This is almost never
+necessary unless you have text that looks like a Tomo text pattern and has
+something like `[..` or `(?)` inside it.
+
+However, if you're trying to do an exact match of arbitrary text values, you'll
+want to have the text automatically escaped. Fortunately, Tomo's injection-safe
+DSL text interpolation supports automatic text escaping. This means that if you
+use text interpolation with the `$` sign to insert a text value, the value will
+be automatically escaped using the `[..1 ?]` rule described above:
+
+```tomo
+# Risk of code injection (would cause an error because 'xxx' is not a valid
+# pattern name:
+>> user_input := get_user_input()
+= "[..xxx]"
+
+# Interpolation automatically escapes:
+>> $/$user_input/
+= $/[..1 []..xxx]/
+
+# No error:
+>> some_text:find($/$user_input/)
+= 0
+```
+
+If you prefer, you can also use this to insert literal characters:
+
+```tomo
+>> $/literal $"[..]"/
+= $/literal [..1]]..]/
+```
+
 ## Repetitions
 
 By default, named patterns match 1 or more repetitions, but you can specify how
diff --git a/environment.c b/environment.c
index f960aaec..6ecb8652 100644
--- a/environment.c
+++ b/environment.c
@@ -60,6 +60,7 @@ env_t *new_compilation_unit(CORD *libname)
         THREAD_TYPE = Type(StructType, .name="Thread", .env=thread_env, .opaque=true);
     }
 
+
     struct {
         const char *name;
         type_t *type;
@@ -230,22 +231,25 @@ env_t *new_compilation_unit(CORD *libname)
             {"reversed", "Range$reversed", "func(range:Range)->Range"},
             {"by", "Range$by", "func(range:Range, step:Int)->Range"},
         )},
+        {"Pattern", Type(TextType, .lang="Pattern", .env=namespace_env(env, "Pattern")), "Text_t", "Pattern", TypedArray(ns_entry_t,
+            {"escape_text", "Pattern$escape_text", "func(text:Text)->Pattern"},
+        )},
         {"Text", TEXT_TYPE, "Text_t", "$Text", TypedArray(ns_entry_t,
-            {"find", "Text$find", "func(text:Text, pattern:Text, start=1, length=!&Int64)->Int"},
-            {"find_all", "Text$find_all", "func(text:Text, pattern:Text)->[Text]"},
+            {"find", "Text$find", "func(text:Text, pattern:Pattern, start=1, length=!&Int64)->Int"},
+            {"find_all", "Text$find_all", "func(text:Text, pattern:Pattern)->[Text]"},
             {"as_c_string", "CORD_to_char_star", "func(text:Text)->CString"},
             {"codepoint_names", "Text$codepoint_names", "func(text:Text)->[Text]"},
             {"from_bytes", "Text$from_bytes", "func(bytes:[Int8])->Text"},
             {"from_c_string", "Text$from_str", "func(str:CString)->Text"},
             {"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text])->Text"},
             {"from_codepoints", "Text$from_codepoints", "func(codepoints:[Int32])->Text"},
-            {"has", "Text$has", "func(text:Text, pattern:Text)->Bool"},
+            {"has", "Text$has", "func(text:Text, pattern:Pattern)->Bool"},
             {"join", "Text$join", "func(glue:Text, pieces:[Text])->Text"},
             {"lines", "Text$lines", "func(text:Text)->[Text]"},
             {"lower", "Text$lower", "func(text:Text)->Text"},
             {"quoted", "Text$quoted", "func(text:Text, color=no)->Text"},
-            {"replace", "Text$replace", "func(text:Text, pattern:Text, replacement:Text)->Text"},
-            {"split", "Text$split", "func(text:Text, pattern='')->[Text]"},
+            {"replace", "Text$replace", "func(text:Text, pattern:Pattern, replacement:Text)->Text"},
+            {"split", "Text$split", "func(text:Text, pattern=$Pattern'')->[Text]"},
             {"slice", "Text$slice", "func(text:Text, from=1, to=-1)->Text"},
             {"title", "Text$title", "func(text:Text)->Text"},
             {"upper", "Text$upper", "func(text:Text)->Text"},
@@ -294,6 +298,10 @@ env_t *new_compilation_unit(CORD *libname)
         }
     }
 
+    set_binding(namespace_env(env, "Pattern"), "from_unsafe_text",
+                new(binding_t, .type=Type(FunctionType, .args=new(arg_t, .name="text", .type=TEXT_TYPE), .ret=Type(TextType, .lang="Pattern")),
+                    .code="(Pattern_t)"));
+
     return env;
 }
 
diff --git a/learnxiny.tm b/learnxiny.tm
index 5c8ad6c7..fcf25688 100644
--- a/learnxiny.tm
+++ b/learnxiny.tm
@@ -1,24 +1,27 @@
-// Tomo is a statically typed, garbage collected imperative language with
-// emphasis on simplicity, safety, and speed. Tomo code cross compiles to C,
-// which is compiled to a binary using your C compiler of choice.
+# Tomo is a statically typed, garbage collected imperative language with
+# emphasis on simplicity, safety, and speed. Tomo code cross compiles to C,
+# which is compiled to a binary using your C compiler of choice.
 
-// To begin with, let's define a main function:
+# To begin with, let's define a main function:
 func main():
-    // This function's code will run if you run this file.
+    # This function's code will run if you run this file.
 
-    // Print to the console
+    # Print to the console
     say("Hello world!")
 
-    // Declare an integer variable (types are inferred)
+    # You can also use !! as a shorthand:
+    !! This is the same as using say(), but a bit easier to type
+
+    # Declare a variable with ':=' (the type is inferred to be integer)
     my_variable := 123
 
-    // Assign a new value
+    # Assign a new value
     my_variable = 99
 
-    // Floating point numbers are similar, but have a decimal point:
+    # Floating point numbers are similar, but require a decimal point:
     my_num := 2.0
 
-    // Strings can use interpolation with the dollar sign $:
+    # Strings can use interpolation with the dollar sign $:
     say("My variable is $my_variable and this is a sum: $(1 + 2)")
 
     say("
@@ -30,21 +33,21 @@ func main():
         The multiline string won't include a leading or trailing newline.
     ")
 
-    // Docstring tests use ">>" and when the program runs, they will print
-    // their source code to the console on stderr.
+    # Docstring tests use ">>" and when the program runs, they will print
+    # their source code to the console on stderr.
     >> 1 + 2
 
-    // If there is a following line with "=", you can perform a check that
-    // the output matches what was expected.
+    # If there is a following line with "=", you can perform a check that
+    # the output matches what was expected.
     >> 2 + 3
     = 5
-    // If there is a mismatch, the program will halt and print a useful
-    // error message.
+    # If there is a mismatch, the program will halt and print a useful
+    # error message.
 
-    // Booleans use "yes" and "no" instead of "true" and "false"
+    # Booleans use "yes" and "no" instead of "true" and "false"
     my_bool := yes
 
-    // Conditionals:
+    # Conditionals:
     if my_bool:
         say("It worked!")
     else if my_variable == 99:
@@ -52,125 +55,157 @@ func main():
     else:
         say("else")
 
-    // Arrays:
+    # Arrays:
     my_numbers := [10, 20, 30]
 
-    // Arrays are 1-indexed, so the first element is at index 1:
+    # Arrays are 1-indexed, so the first element is at index 1:
     >> my_numbers[1]
     = 10
 
-    // Negative indices can be used to get items from the back of the array:
+    # Negative indices can be used to get items from the back of the array:
     >> my_numbers[-1]
     = 30
 
-    // If an invalid index outside the array's bounds is used (e.g.
-    // my_numbers[999]), an error message will be printed and the program will
-    // exit.
+    # If an invalid index outside the array's bounds is used (e.g.
+    # my_numbers[999]), an error message will be printed and the program will
+    # exit.
 
-    // Iteration:
+    # Iteration:
     for num in my_numbers:
         >> num
 
-    // Optionally, you can use an iteration index as well:
+    # Optionally, you can use an iteration index as well:
     for index, num in my_numbers:
-        pass // Pass means "do nothing"
+        pass # Pass means "do nothing"
 
-    // Loop control flow uses "skip" and "stop"
-    for num in my_numbers:
-        if num == 20:
-            // You can specify which loop variable you're skipping/stopping if
-            // there is any ambiguity.
-            skip num
+    # Arrays can be created with array comprehensions, which are loops:
+    >> [x*10 for x in my_numbers]
+    = [100, 200, 300]
+    >> [x*10 for x in my_numbers if x != 20]
+    = [100, 300]
 
-        if num == 30:
-            stop
+    # Loop control flow uses "skip" and "stop"
+    for x in my_numbers:
+        for y in my_numbers:
+            if x == y:
+                skip
 
-        >> num
-    
-    // Tables are efficient hash maps
+            # For readability, you can also use postfix conditionals:
+            skip if x == y
+
+            if x + y == 60:
+                # Skip or stop can specify a loop variable if you want to
+                # affect an enclosing loop:
+                stop x
+
+    # Tables are efficient hash maps
     table := {"one": 1, "two": 2}
     >> table:get("two")
     = 2
 
-    // You can supply a default argument in case a key isn't found:
+    # You can supply a default argument in case a key isn't found:
     >> table:get("xxx", default=0)
     = 0
 
-    // Otherwise, a runtime error will be raised:
-    // >> table:get("xxx")
+    # Otherwise, a runtime error will be raised:
+    # >> table:get("xxx")
 
-    // Tables can be iterated over either by key or key,value:
+    # Tables can be iterated over either by key or key,value:
     for key in table:
         pass
 
     for key, value in table:
         pass
 
-    // Tables also have ".keys" and ".values" fields to explicitly access the
-    // array of keys or values in the table.
+    # Tables also have ".keys" and ".values" fields to explicitly access the
+    # array of keys or values in the table.
     >> table.keys
     = ["one", "two"]
     >> table.values
     = [1, 2]
 
-    // Tables can have fallbacks:
+    # Tables can have a fallback table that's used as a fallback when the key
+    # isn't found in the table itself:
     table2 := {"three": 3; fallback=table}
     >> table2:get("two")
     = 2
     >> table2:get("three")
     = 3
 
-    // If no default is provided and a missing key is looked up, the program
-    // will print an error message and halt.
+    # Tables can also be created with comprehension loops:
+    >> {x:10*x for x in 5}
+    = {1:10, 2:20, 3:30, 4:40, 5:50}
 
-    // Any types can be used in tables, for example, a table mapping arrays to
-    // strings:
-    >> {[10, 20]: "one", [30, 40, 50]: "two"}
+    # If no default is provided and a missing key is looked up, the program
+    # will print an error message and halt.
+
+    # Any types can be used in tables, for example, a table mapping arrays to
+    # strings:
+    table3 := {[10, 20]: "one", [30, 40, 50]: "two"}
+    >> table3:get([10, 20])
+    = "one"
+
+    # Sets are similar to tables, but they represent an unordered collection of
+    # unique values:
+    set := {10, 20, 30}
+    >> set:has(20)
+    = yes
+    >> set:has(999)
+    = no
 
-    // So far, the datastructures that have been discussed are all *immutable*,
-    // meaning you can't add, remove, or change their contents. If you want to
-    // have mutable data, you need to allocate an area of memory which can hold
-    // different values using the "@" operator (think: "@llocate").
+    # You can do some operations on sets:
+    other_set := {30, 40, 50}
+    >> set:with(other_set)
+    = {10, 20, 30, 40, 50}
+    >> set:without(other_set)
+    = {10, 20}
+    >> set:overlap(other_set)
+    = {30}
+
+    # So far, the datastructures that have been discussed are all *immutable*,
+    # meaning you can't add, remove, or change their contents. If you want to
+    # have mutable data, you need to allocate an area of memory which can hold
+    # different values using the "@" operator (think: "(a)llocate").
     my_arr := @[10, 20, 30]
     my_arr[1] = 999
     >> my_arr
     = @[999, 20, 30]
 
-    // To call a method, you must use ":" and the name of the method:
+    # To call a method, you must use ":" and the name of the method:
     my_arr:sort()
     >> my_arr
     = @[20, 30, 999]
 
-    // To access the immutable value that resides inside the memory area, you
-    // can use the "[]" operator:
+    # To access the immutable value that resides inside the memory area, you
+    # can use the "[]" operator:
     >> my_arr[]
     = [20, 30, 999]
 
-    // You can think of this like taking a photograph of what's at that memory
-    // location. Later, a new value might end up there, but the photograph will
-    // remain unchanged.
+    # You can think of this like taking a photograph of what's at that memory
+    # location. Later, a new value might end up there, but the photograph will
+    # remain unchanged.
     snapshot := my_arr[]
     my_arr:insert(1000)
     >> my_arr
     = @[20, 30, 999, 1000]
     >> snapshot
     = [20, 30, 999]
-    // Internally, this is implemented using copy-on-write, so it's quite
-    // efficient.
+    # Internally, this is implemented using copy-on-write, so it's quite
+    # efficient.
 
-    // These demos are defined below:
+    # These demos are defined below:
     demo_keyword_args()
     demo_structs()
     demo_enums()
     demo_lambdas()
 
-// Functions must be declared at the top level of a file and must specify the
-// types of all of their arguments and return value (if any):
+# Functions must be declared at the top level of a file and must specify the
+# types of all of their arguments and return value (if any):
 func add(x:Int, y:Int)->Int:
     return x + y
 
-// Default values for arguments can be provided in place of a type (the type is
-// inferred from the default value):
+# Default values for arguments can be provided in place of a type (the type is
+# inferred from the default value):
 func show_both(first:Int, second=0)->Text:
     return "first=$first second=$second"
 
@@ -178,15 +213,15 @@ func demo_keyword_args():
     >> show_both(1, 2)
     = "first=1 second=2"
 
-    // If unspecified, the default argument is used:
+    # If unspecified, the default argument is used:
     >> show_both(1)
     = "first=1 second=0"
 
-    // Arguments can be specified by name, in any order:
+    # Arguments can be specified by name, in any order:
     >> show_both(second=20, 10)
     = "first=10 second=20"
 
-// Here are some different type signatures:
+# Here are some different type signatures:
 func takes_many_types(
     boolean:Bool,
     integer:Int,
@@ -200,42 +235,42 @@ func takes_many_types(
 ):
     pass
 
-// Now let's define our own datastructure, a humble struct:
+# Now let's define our own datastructure, a humble struct:
 struct Person(name:Text, age:Int):
-    // We can define constants here if we want to:
+    # We can define constants here if we want to:
     max_age := 122
 
-    // Methods are defined here as well:
+    # Methods are defined here as well:
     func say_age(self:Person):
         say("My age is $self.age")
 
-    // If you want to mutate a value, you must have a mutable pointer:
+    # If you want to mutate a value, you must have a mutable pointer:
     func increase_age(self:@Person, amount=1):
         self.age += amount
 
-    // Methods don't have to take a Person as their first argument:
+    # Methods don't have to take a Person as their first argument:
     func get_cool_name()->Text:
         return "Blade"
 
 func demo_structs():
-    // Creating a struct:
+    # Creating a struct:
     alice := Person("Alice", 30)
     >> alice
     = Person(name="Alice", age=30)
 
-    // Accessing fields:
+    # Accessing fields:
     >> alice.age
     = 30
 
-    // Calling methods:
+    # Calling methods:
     alice:say_age()
 
-    // You can call static methods by using the class name and ".":
+    # You can call static methods by using the class name and ".":
     >> Person.get_cool_name()
     = "Blade"
 
-    // Comparisons, conversion to text, and hashing are all handled
-    // automatically when you create a struct:
+    # Comparisons, conversion to text, and hashing are all handled
+    # automatically when you create a struct:
     bob := Person("Bob", 30)
     >> alice == bob
     = no
@@ -248,40 +283,40 @@ func demo_structs():
     = "first"
 
 
-// Now let's look at another feature: enums. Tomo enums are tagged unions, also
-// known as "sum types". You enumerate all the different types of values
-// something could have, and it's stored internally as a small integer that
-// indicates which type it is, and any data you want to associate with it.
+# Now let's look at another feature: enums. Tomo enums are tagged unions, also
+# known as "sum types". You enumerate all the different types of values
+# something could have, and it's stored internally as a small integer that
+# indicates which type it is, and any data you want to associate with it.
 enum Shape(
     Point,
     Circle(radius:Num),
     Rectangle(width:Num, height:Num),
 ):
-    // Just like with structs, you define methods and constants inside a level
-    // of indentation:
+    # Just like with structs, you define methods and constants inside a level
+    # of indentation:
     func get_area(self:Shape)->Num:
-        // In order to work with an enum, it's most often handy to use a 'when'
-        // statement to get the internal values:
+        # In order to work with an enum, it's most often handy to use a 'when'
+        # statement to get the internal values:
         when self is Point:
             return 0
         is Circle(r):
             return Num.PI * r^2
         is Rectangle(w, h):
             return w * h
-        // 'when' statements are checked for exhaustiveness, so the compiler
-        // will give an error if you forgot any cases. You can also use 'else:'
-        // if you want a fallback to handle other cases.
+        # 'when' statements are checked for exhaustiveness, so the compiler
+        # will give an error if you forgot any cases. You can also use 'else:'
+        # if you want a fallback to handle other cases.
 
 func demo_enums():
-    // Enums are constructed like this:
+    # Enums are constructed like this:
     my_shape := Shape.Circle(1.0)
 
-    // If an enum type doesn't have any associated data, it is not invoked as a
-    // function, but is just a static value:
+    # If an enum type doesn't have any associated data, it is not invoked as a
+    # function, but is just a static value:
     other_shape := Shape.Point
 
-    // Similar to structs, enums automatically define comparisons, conversion
-    // to text, and hashing:
+    # Similar to structs, enums automatically define comparisons, conversion
+    # to text, and hashing:
     >> my_shape == other_shape
     = no
 
@@ -292,19 +327,19 @@ func demo_enums():
     = {Shape.Circle(radius=1):"nice"}
 
 func demo_lambdas():
-    // Lambdas, or anonymous functions, can be used like this:
+    # Lambdas, or anonymous functions, can be used like this:
     add_one := func(x:Int): x + 1
     >> add_one(5)
     = 6
 
-    // Lambdas can capture closure values, but only as a snapshot from when the
-    // lambda was created:
+    # Lambdas can capture closure values, but only as a snapshot from when the
+    # lambda was created:
     n := 10
     add_n := func(x:Int): x + n
     >> add_n(5)
     = 15
 
-    // The lambda's closure won't change when this variable is reassigned:
+    # The lambda's closure won't change when this variable is reassigned:
     n = -999
     >> add_n(5)
     = 15
diff --git a/parse.c b/parse.c
index 8ab3688e..36ce3ddf 100644
--- a/parse.c
+++ b/parse.c
@@ -378,7 +378,7 @@ const char *get_id(const char **inout) {
 }
 
 bool comment(const char **pos) {
-    if ((*pos)[0] == '/' && (*pos)[1] == '/' && (*pos)[2] != '!') {
+    if ((*pos)[0] == '#') {
         *pos += strcspn(*pos, "\r\n");
         return true;
     } else {
@@ -1194,6 +1194,9 @@ PARSER(parse_text) {
         open_quote = *pos;
         ++pos;
         close_quote = closing[(int)open_quote] ? closing[(int)open_quote] : open_quote;
+
+        if (!lang && open_quote == '/')
+            lang = "Pattern";
     } else {
         return NULL;
     }
@@ -2132,7 +2135,7 @@ PARSER(parse_doctest) {
 
 PARSER(parse_say) {
     const char *start = pos;
-    if (!match(&pos, "//!")) return NULL;
+    if (!match(&pos, "!!")) return NULL;
     spaces(&pos);
 
     ast_list_t *chunks = NULL;
diff --git a/test/arrays.tm b/test/arrays.tm
index 21c8cb5c..16f411a2 100644
--- a/test/arrays.tm
+++ b/test/arrays.tm
@@ -78,23 +78,23 @@ func main():
 		>> arr := [10, 20, 30]
 		>> reversed := arr:reversed()
 		= [30, 20, 10]
-		// Ensure the copy-on-write behavior triggers:
+		# Ensure the copy-on-write behavior triggers:
 		>> arr[1] = 999
 		>> reversed
 		= [30, 20, 10]
 
 	do:
 		>> nums := [10, -20, 30]
-		// Sorted function doesn't mutate original:
+		# Sorted function doesn't mutate original:
 		>> nums:sorted()
 		= [-20, 10, 30]
 		>> nums
 		= [10, -20, 30]
-		// Sort function does mutate in place:
+		# Sort function does mutate in place:
 		>> nums:sort()
 		>> nums
 		= [-20, 10, 30]
-		// Custom sort functions:
+		# Custom sort functions:
 		>> nums:sort(func(x:&%Int,y:&%Int): x:abs() <> y:abs())
 		>> nums
 		= [10, -20, 30]
@@ -148,7 +148,7 @@ func main():
 		>> [i*10 for i in 10]:by(2):by(-1)
 		= [90, 70, 50, 30, 10]
 
-		// Test iterating over array:from() and array:to()
+		# Test iterating over array:from() and array:to()
 		xs := ["A", "B", "C", "D"]
 		for i,x in xs:to(-2):
 			for y in xs:from(i+1):
diff --git a/test/integers.tm b/test/integers.tm
index 5743cbb2..e15b9473 100644
--- a/test/integers.tm
+++ b/test/integers.tm
@@ -79,7 +79,7 @@ func main():
 		for in 20:
 			>> n := Int.random(-999999, 999999)
 			>> d := Int.random(-999, 999)
-			//! n=$n, d=$d:
+			!! n=$n, d=$d:
 			>> (n/d)*d + (n mod d) == n
 			= yes
 
diff --git a/test/lambdas.tm b/test/lambdas.tm
index 8d543bfc..62b5fd36 100644
--- a/test/lambdas.tm
+++ b/test/lambdas.tm
@@ -31,12 +31,12 @@ func main():
 	>> abs100(-5)
 	= 500
 
-	// Test nested lambdas:
+	# Test nested lambdas:
     outer := "Hello"
     fn := func():
         return func():
             return func():
-                defer: //! $outer
+                defer: !! $outer
                 return outer
     >> fn()()()
 	= "Hello"
diff --git a/test/lang.tm b/test/lang.tm
index d78476de..e2093a6e 100644
--- a/test/lang.tm
+++ b/test/lang.tm
@@ -1,11 +1,11 @@
 lang HTML:
 	HEADER := $HTML"<!DOCTYPE HTML>"
 	func escape(t:Text)->HTML:
-		t = t:replace("&", "&amp;")
-		t = t:replace("<", "&lt;")
-		t = t:replace(">", "&gt;")
-		t = t:replace('"', "&quot;")
-		t = t:replace("'", "&#39;")
+		t = t:replace($/&/, "&amp;")
+		t = t:replace($/</, "&lt;")
+		t = t:replace($/>/, "&gt;")
+		t = t:replace($/"/, "&quot;")
+		t = t:replace($/'/, "&#39;")
 		return HTML.from_unsafe_text(t)
 
 	func escape_int(i:Int)->HTML:
diff --git a/test/text.tm b/test/text.tm
index d82a38c7..3049af99 100644
--- a/test/text.tm
+++ b/test/text.tm
@@ -1,6 +1,6 @@
 func main():
 	>> str := "Hello Amélie!"
-	//! Testing strings like $str
+	!! Testing strings like $str
 
 	>> str:upper()
 	= "HELLO AMÉLIE!"
@@ -40,45 +40,45 @@ func main():
 	>> amelie2:codepoint_names()
 	= ["LATIN CAPITAL LETTER A", "LATIN SMALL LETTER M", "LATIN SMALL LETTER E WITH ACUTE", "LATIN SMALL LETTER L", "LATIN SMALL LETTER I", "LATIN SMALL LETTER E"]
 
-	>> "Hello":replace("e", "X")
+	>> "Hello":replace($/e/, "X")
 	= "HXllo"
 
-	>> "Hello":has("l")
+	>> "Hello":has($/l/)
 	= yes
-	>> "Hello":has("l[..end]")
+	>> "Hello":has($/l[..end]/)
 	= no
-	>> "Hello":has("[..start]l")
+	>> "Hello":has($/[..start]l/)
 	= no
 
-	>> "Hello":has("o")
+	>> "Hello":has($/o/)
 	= yes
-	>> "Hello":has("o[..end]")
+	>> "Hello":has($/o[..end]/)
 	= yes
-	>> "Hello":has("[..start]o")
+	>> "Hello":has($/[..start]o/)
 	= no
 
-	>> "Hello":has("H")
+	>> "Hello":has($/H/)
 	= yes
-	>> "Hello":has("H[..end]")
+	>> "Hello":has($/H[..end]/)
 	= no
-	>> "Hello":has("[..start]H")
+	>> "Hello":has($/[..start]H/)
 	= yes
 
-	>> "Hello":replace("l", "")
+	>> "Hello":replace($/l/, "")
 	= "Heo"
-	>> "xxxx":replace("x", "")
+	>> "xxxx":replace($/x/, "")
 	= ""
-	>> "xxxx":replace("y", "")
+	>> "xxxx":replace($/y/, "")
 	= "xxxx"
-	>> "One two three four five six":replace("e ", "")
+	>> "One two three four five six":replace($/e /, "")
 	= "Ontwo threfour fivsix"
 
-	>> " one ":replace("[..start][..space]", "")
+	>> " one ":replace($/[..start][..space]/, "")
 	= "one "
-	>> " one ":replace("[..space][..end]", "")
+	>> " one ":replace($/[..space][..end]/, "")
 	= " one"
 
-	>> amelie:has(amelie2)
+	>> amelie:has($/$amelie2/)
 
 
 	>> multiline := "
@@ -87,7 +87,7 @@ func main():
 	"
 	= "line one\nline two"
 
-	//! Interpolation tests:
+	!! Interpolation tests:
 	>> "A $(1+2)"
 	= "A 3"
 	>> 'A $(1+2)'
@@ -104,9 +104,9 @@ func main():
 	>> $(one (nested) two $(1+2))
 	= "one (nested) two 3"
 
-	>> "one two three":replace("[..alpha]", "")
+	>> "one two three":replace($/[..alpha]/, "")
 	= "  "
-	>> "one two three":replace("[..alpha]", "word")
+	>> "one two three":replace($/[..alpha]/, "word")
 	= "word word word"
 
 	>> c := "É̩"
@@ -130,17 +130,17 @@ func main():
 	>> "":lines()
 	= []
 
-	//! Test splitting and joining text:
-	>> "one two three":split(" ")
+	!! Test splitting and joining text:
+	>> "one two three":split($/ /)
 	= ["one", "two", "three"]
 
-	>> "one,two,three,":split(",")
+	>> "one,two,three,":split($/,/)
 	= ["one", "two", "three", ""]
 
-	>> "one    two three":split("[..space]")
+	>> "one    two three":split($/[..space]/)
 	= ["one", "two", "three"]
 
-	>> "abc":split("")
+	>> "abc":split($//)
 	= ["a", "b", "c"]
 
 	>> ", ":join(["one", "two", "three"])
@@ -158,42 +158,42 @@ func main():
 	>> "":split()
 	= []
 
-	//! Test text:find_all()
-	>> " one  two three   ":find_all("[..alpha]")
+	!! Test text:find_all()
+	>> " one  two three   ":find_all($/[..alpha]/)
 	= ["one", "two", "three"]
 
-	>> " one  two three   ":find_all("[..!space]")
+	>> " one  two three   ":find_all($/[..!space]/)
 	= ["one", "two", "three"]
 
-	>> "    ":find_all("[..alpha]")
+	>> "    ":find_all($/[..alpha]/)
 	= []
 
-	>> " foo(baz(), 1)  doop() ":find_all("[..id](?)")
+	>> " foo(baz(), 1)  doop() ":find_all($/[..id](?)/)
 	= ["foo(baz(), 1)", "doop()"]
 
-	>> "":find_all("")
+	>> "":find_all($Pattern'')
 	= []
 
-	>> "Hello":find_all("")
+	>> "Hello":find_all($Pattern'')
 	= []
 
-	//! Test text:find()
-	>> " one   two  three   ":find("[..id]", start=-999)
+	!! Test text:find()
+	>> " one   two  three   ":find($/[..id]/, start=-999)
 	= 0
-	>> " one   two  three   ":find("[..id]", start=999)
+	>> " one   two  three   ":find($/[..id]/, start=999)
 	= 0
-	>> " one   two  three   ":find("[..id]")
+	>> " one   two  three   ":find($/[..id]/)
 	= 2
-	>> " one   two  three   ":find("[..id]", start=5)
+	>> " one   two  three   ":find($/[..id]/, start=5)
 	= 8
 
 	>> len := 0_i64
-	>> "   one  ":find("[..id]", length=&len)
+	>> "   one  ":find($/[..id]/, length=&len)
 	= 4
 	>> len
 	= 3_i64
 
-	//! Test text slicing:
+	!! Test text slicing:
 	>> "abcdef":slice()
 	= "abcdef"
 	>> "abcdef":slice(from=3)
@@ -220,3 +220,8 @@ func main():
 	>> Text.from_codepoint_names(["not a valid name here buddy"])
 	= ""
 
+	>> malicious := "[..xxx"
+	>> $/$malicious/
+	= $/[..1[]..xxx/
+
+
diff --git a/test/threads.tm b/test/threads.tm
index 986ab864..2f259a0e 100644
--- a/test/threads.tm
+++ b/test/threads.tm
@@ -26,7 +26,7 @@ func main():
 
     results := |:Int; max_size|
     >> thread := Thread.new(func():
-        //! In another thread!
+        !! In another thread!
         while yes:
             >> got := jobs:get()
             when got is Increment(x):
@@ -63,8 +63,8 @@ func main():
     >> results:get()
     = 1001
 
-    //! Canceling...
+    !! Canceling...
     >> thread:cancel()
-    //! Joining...
+    !! Joining...
     >> thread:join()
-    //! Done!
+    !! Done!
author	Bruce Hill <bruce@bruce-hill.com>	2024-09-03 13:19:41 -0400
committer	Bruce Hill <bruce@bruce-hill.com>	2024-09-03 13:19:41 -0400
commit	64143f0a131a053414e4b73c17bff994522b11c2 (patch)
tree	2545507fde623f8846bf183388acdbb0234b5e65
parent	5feecff9d93522002c74a1423d138c2aa8bc150d (diff)