aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2024-09-03 13:19:41 -0400
committerBruce Hill <bruce@bruce-hill.com>2024-09-03 13:19:41 -0400
commit64143f0a131a053414e4b73c17bff994522b11c2 (patch)
tree2545507fde623f8846bf183388acdbb0234b5e65
parent5feecff9d93522002c74a1423d138c2aa8bc150d (diff)
Syntax overhaul (comments back to `#`, print statments to `!!`),
using `$/.../` for patterns and using a DSL for patterns
-rw-r--r--builtins/datatypes.h2
-rw-r--r--builtins/text.c98
-rw-r--r--builtins/text.h13
-rw-r--r--docs/text.md50
-rw-r--r--environment.c18
-rw-r--r--learnxiny.tm237
-rw-r--r--parse.c7
-rw-r--r--test/arrays.tm10
-rw-r--r--test/integers.tm2
-rw-r--r--test/lambdas.tm4
-rw-r--r--test/lang.tm10
-rw-r--r--test/text.tm85
-rw-r--r--test/threads.tm8
13 files changed, 344 insertions, 200 deletions
diff --git a/builtins/datatypes.h b/builtins/datatypes.h
index 433e1dd9..1311797c 100644
--- a/builtins/datatypes.h
+++ b/builtins/datatypes.h
@@ -86,4 +86,6 @@ typedef struct Text_s {
};
} Text_t;
+#define Pattern_t Text_t
+
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/text.c b/builtins/text.c
index 99d17577..d9da1248 100644
--- a/builtins/text.c
+++ b/builtins/text.c
@@ -1034,7 +1034,7 @@ int64_t match_uri(Text_t text, int64_t text_index)
return text_index - start_index;
}
-int64_t match(Text_t text, Text_t pattern, int64_t text_index, int64_t pattern_index)
+int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t pattern_index)
{
if (pattern_index >= pattern.length) return 0;
int64_t start_index = text_index;
@@ -1306,7 +1306,7 @@ int64_t match(Text_t text, Text_t pattern, int64_t text_index, int64_t pattern_i
#undef EAT1
#undef EAT_MANY
-public Int_t Text$find(Text_t text, Text_t pattern, Int_t from_index, int64_t *match_length)
+public Int_t Text$find(Text_t text, Pattern_t pattern, Int_t from_index, int64_t *match_length)
{
int64_t first = Int_to_Int64(from_index, false);
if (first == 0) fail("Invalid index: 0");
@@ -1340,7 +1340,7 @@ public Int_t Text$find(Text_t text, Text_t pattern, Int_t from_index, int64_t *m
return I(0);
}
-public bool Text$has(Text_t text, Text_t pattern)
+public bool Text$has(Text_t text, Pattern_t pattern)
{
return !I_is_zero(Text$find(text, pattern, I_small(1), NULL));
}
@@ -1363,26 +1363,17 @@ public int printf_text(FILE *stream, const struct printf_info *info, const void
return Text$print(stream, t);
}
-public Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info)
-{
- (void)info;
- if (!text) return info && info->TextInfo.lang ? Text$from_str(info->TextInfo.lang) : Text$from_str("Text");
- Text_t as_text = Text$quoted(*(Text_t*)text, colorize);
- if (info && info->TextInfo.lang && info != &$Text)
- as_text = Text$concat(Text$from_str(colorize ? "\x1b[1m$" : "$"), Text$from_str(info->TextInfo.lang), as_text);
- return as_text;
-}
-
-public Text_t Text$quoted(Text_t text, bool colorize)
+static inline Text_t _quoted(Text_t text, bool colorize, char quote_char)
{
// TODO: optimize for ASCII and short strings
array_t graphemes = {.atomic=1};
#define add_char(c) Array$insert_value(&graphemes, (uint32_t)c, I_small(0), sizeof(uint32_t))
#define add_str(s) ({ for (char *_c = s; *_c; ++_c) Array$insert_value(&graphemes, (uint32_t)*_c, I_small(0), sizeof(uint32_t)); })
if (colorize)
- add_str("\x1b[35m\"");
- else
- add_char('"');
+ add_str("\x1b[35m");
+ if (quote_char != '"' && quote_char != '\"' && quote_char != '`')
+ add_char('$');
+ add_char(quote_char);
#define add_escaped(str) ({ if (colorize) add_str("\x1b[34;1m"); add_char('\\'); add_str(str); if (colorize) add_str("\x1b[0;35m"); })
iteration_state_t state = {0, 0};
@@ -1397,7 +1388,6 @@ public Text_t Text$quoted(Text_t text, bool colorize)
case '\r': add_escaped("r"); break;
case '\t': add_escaped("t"); break;
case '\v': add_escaped("v"); break;
- case '"': add_escaped("\""); break;
case '\\': add_escaped("\\"); break;
case '\x00' ... '\x06': case '\x0E' ... '\x1A':
case '\x1C' ... '\x1F': case '\x7F' ... '\x7F': {
@@ -1411,14 +1401,19 @@ public Text_t Text$quoted(Text_t text, bool colorize)
add_str("\x1b[0;35m");
break;
}
- default: add_char(g); break;
+ default: {
+ if (g == quote_char)
+ add_escaped(((char[2]){quote_char, 0}));
+ else
+ add_char(g);
+ break;
+ }
}
}
+ add_char(quote_char);
if (colorize)
- add_str("\"\x1b[m");
- else
- add_char('"');
+ add_str("\x1b[m");
return (Text_t){.length=graphemes.length, .tag=TEXT_GRAPHEMES, .graphemes=graphemes.data};
#undef add_str
@@ -1426,7 +1421,22 @@ public Text_t Text$quoted(Text_t text, bool colorize)
#undef add_escaped
}
-public array_t Text$find_all(Text_t text, Text_t pattern)
+public Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info)
+{
+ (void)info;
+ if (!text) return info && info->TextInfo.lang ? Text$from_str(info->TextInfo.lang) : Text$from_str("Text");
+ Text_t as_text = _quoted(*(Text_t*)text, colorize, info == &Pattern ? '/' : '"');
+ if (info && info->TextInfo.lang && info != &$Text && info != &Pattern)
+ as_text = Text$concat(Text$from_str(colorize ? "\x1b[1m$" : "$"), Text$from_str(info->TextInfo.lang), as_text);
+ return as_text;
+}
+
+public Text_t Text$quoted(Text_t text, bool colorize)
+{
+ return _quoted(text, colorize, '"');
+}
+
+public array_t Text$find_all(Text_t text, Pattern_t pattern)
{
if (pattern.length == 0) // special case
return (array_t){.length=0};
@@ -1446,7 +1456,7 @@ public array_t Text$find_all(Text_t text, Text_t pattern)
return matches;
}
-public Text_t Text$replace(Text_t text, Text_t pattern, Text_t replacement)
+public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement)
{
Text_t ret = {.length=0};
@@ -1470,7 +1480,7 @@ public Text_t Text$replace(Text_t text, Text_t pattern, Text_t replacement)
return ret;
}
-public array_t Text$split(Text_t text, Text_t pattern)
+public array_t Text$split(Text_t text, Pattern_t pattern)
{
if (text.length == 0) // special case
return (array_t){.length=0};
@@ -1656,4 +1666,42 @@ public const TypeInfo $Text = {
.TextInfo={.lang="Text"},
};
+public Pattern_t Pattern$escape_text(Text_t text)
+{
+ // TODO: optimize for ASCII and short strings
+ array_t graphemes = {.atomic=1};
+#define add_char(c) Array$insert_value(&graphemes, (uint32_t)c, I_small(0), sizeof(uint32_t))
+#define add_str(s) ({ for (char *_c = s; *_c; ++_c) Array$insert_value(&graphemes, (uint32_t)*_c, I_small(0), sizeof(uint32_t)); })
+ iteration_state_t state = {0, 0};
+ for (int64_t i = 0; i < text.length; i++) {
+ int32_t g = _next_grapheme(text, &state, i);
+ uint32_t g0 = g < 0 ? synthetic_graphemes[-g-1].codepoints[0] : (uint32_t)g;
+
+ if (g == '[') {
+ add_str("[..1[]");
+ } else if (uc_is_property_quotation_mark(g0)) {
+ add_str("[..1");
+ add_char(g);
+ add_char(']');
+ } else if (uc_is_property_paired_punctuation(g0)) {
+ add_str("[..1");
+ add_char(g);
+ add_char(']');
+ } else {
+ add_char(g);
+ }
+ }
+ return (Text_t){.length=graphemes.length, .tag=TEXT_GRAPHEMES, .graphemes=graphemes.data};
+#undef add_str
+#undef add_char
+#undef add_escaped
+}
+
+public const TypeInfo Pattern = {
+ .size=sizeof(Text_t),
+ .align=__alignof__(Text_t),
+ .tag=TextInfo,
+ .TextInfo={.lang="Pattern"},
+};
+
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/text.h b/builtins/text.h
index b3cb6d79..edf3dc9f 100644
--- a/builtins/text.h
+++ b/builtins/text.h
@@ -29,11 +29,11 @@ Text_t Text$lower(Text_t text);
Text_t Text$title(Text_t text);
Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info);
Text_t Text$quoted(Text_t str, bool colorize);
-Text_t Text$replace(Text_t str, Text_t pat, Text_t replacement);
-array_t Text$split(Text_t text, Text_t pattern);
-Int_t Text$find(Text_t text, Text_t pattern, Int_t i, int64_t *match_length);
-array_t Text$find_all(Text_t text, Text_t pattern);
-bool Text$has(Text_t text, Text_t pattern);
+Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement);
+array_t Text$split(Text_t text, Pattern_t pattern);
+Int_t Text$find(Text_t text, Pattern_t pattern, Int_t i, int64_t *match_length);
+array_t Text$find_all(Text_t text, Pattern_t pattern);
+bool Text$has(Text_t text, Pattern_t pattern);
const char *Text$as_c_string(Text_t text);
public Text_t Text$format(const char *fmt, ...);
array_t Text$clusters(Text_t text);
@@ -48,4 +48,7 @@ Text_t Text$join(Text_t glue, array_t pieces);
extern const TypeInfo $Text;
+Pattern_t Pattern$escape_text(Text_t text);
+extern const TypeInfo Pattern;
+
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/docs/text.md b/docs/text.md
index 5d399edd..855c3c6c 100644
--- a/docs/text.md
+++ b/docs/text.md
@@ -262,15 +262,20 @@ Text codebase is around 1.5K lines of code).
For more advanced use cases, consider linking against a C library for regular
expressions or pattern matching.
+`Pattern` is a [domain-specific language](docs/langs.md), in other words, it's
+like a `Text`, but it has a distinct type. As a convenience, you can use
+`$/.../` to write pattern literals instead of using the general-purpose DSL
+syntax of `$Pattern"..."`.
+
Patterns are used in a small, but very powerful API that handles many text
functions that would normally be handled by a more extensive API:
```
-Text.find(pattern:Text, start=1, length=!&Int64?)->Int
-Text.find_all(pattern:Text)->[Text]
-Text.split(pattern:Text)->[Text]
-Text.replace(pattern:Text, replacement:Text)->[Text]
-Text.has(pattern:Text)->Bool
+Text.find(pattern:Pattern, start=1, length=!&Int64?)->Int
+Text.find_all(pattern:Pattern)->[Text]
+Text.split(pattern:Pattern)->[Text]
+Text.replace(pattern:Pattern, replacement:Text)->[Text]
+Text.has(pattern:Pattern)->Bool
```
See [Text Functions](#Text-Functions) for the full API documentation.
@@ -330,6 +335,41 @@ If an exclamation mark (`!`) is placed before a pattern's name, then characters
are matched only when they _don't_ match the pattern. For example, `[..!alpha]`
will match all characters _except_ alphabetic ones.
+## Interpolating Text and Escaping
+
+To escape a character in a pattern (e.g. if you want to match the literal
+character `?`), you can use the syntax `[..1 ?]`. This is almost never
+necessary unless you have text that looks like a Tomo text pattern and has
+something like `[..` or `(?)` inside it.
+
+However, if you're trying to do an exact match of arbitrary text values, you'll
+want to have the text automatically escaped. Fortunately, Tomo's injection-safe
+DSL text interpolation supports automatic text escaping. This means that if you
+use text interpolation with the `$` sign to insert a text value, the value will
+be automatically escaped using the `[..1 ?]` rule described above:
+
+```tomo
+# Risk of code injection (would cause an error because 'xxx' is not a valid
+# pattern name:
+>> user_input := get_user_input()
+= "[..xxx]"
+
+# Interpolation automatically escapes:
+>> $/$user_input/
+= $/[..1 []..xxx]/
+
+# No error:
+>> some_text:find($/$user_input/)
+= 0
+```
+
+If you prefer, you can also use this to insert literal characters:
+
+```tomo
+>> $/literal $"[..]"/
+= $/literal [..1]]..]/
+```
+
## Repetitions
By default, named patterns match 1 or more repetitions, but you can specify how
diff --git a/environment.c b/environment.c
index f960aaec..6ecb8652 100644
--- a/environment.c
+++ b/environment.c
@@ -60,6 +60,7 @@ env_t *new_compilation_unit(CORD *libname)
THREAD_TYPE = Type(StructType, .name="Thread", .env=thread_env, .opaque=true);
}
+
struct {
const char *name;
type_t *type;
@@ -230,22 +231,25 @@ env_t *new_compilation_unit(CORD *libname)
{"reversed", "Range$reversed", "func(range:Range)->Range"},
{"by", "Range$by", "func(range:Range, step:Int)->Range"},
)},
+ {"Pattern", Type(TextType, .lang="Pattern", .env=namespace_env(env, "Pattern")), "Text_t", "Pattern", TypedArray(ns_entry_t,
+ {"escape_text", "Pattern$escape_text", "func(text:Text)->Pattern"},
+ )},
{"Text", TEXT_TYPE, "Text_t", "$Text", TypedArray(ns_entry_t,
- {"find", "Text$find", "func(text:Text, pattern:Text, start=1, length=!&Int64)->Int"},
- {"find_all", "Text$find_all", "func(text:Text, pattern:Text)->[Text]"},
+ {"find", "Text$find", "func(text:Text, pattern:Pattern, start=1, length=!&Int64)->Int"},
+ {"find_all", "Text$find_all", "func(text:Text, pattern:Pattern)->[Text]"},
{"as_c_string", "CORD_to_char_star", "func(text:Text)->CString"},
{"codepoint_names", "Text$codepoint_names", "func(text:Text)->[Text]"},
{"from_bytes", "Text$from_bytes", "func(bytes:[Int8])->Text"},
{"from_c_string", "Text$from_str", "func(str:CString)->Text"},
{"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text])->Text"},
{"from_codepoints", "Text$from_codepoints", "func(codepoints:[Int32])->Text"},
- {"has", "Text$has", "func(text:Text, pattern:Text)->Bool"},
+ {"has", "Text$has", "func(text:Text, pattern:Pattern)->Bool"},
{"join", "Text$join", "func(glue:Text, pieces:[Text])->Text"},
{"lines", "Text$lines", "func(text:Text)->[Text]"},
{"lower", "Text$lower", "func(text:Text)->Text"},
{"quoted", "Text$quoted", "func(text:Text, color=no)->Text"},
- {"replace", "Text$replace", "func(text:Text, pattern:Text, replacement:Text)->Text"},
- {"split", "Text$split", "func(text:Text, pattern='')->[Text]"},
+ {"replace", "Text$replace", "func(text:Text, pattern:Pattern, replacement:Text)->Text"},
+ {"split", "Text$split", "func(text:Text, pattern=$Pattern'')->[Text]"},
{"slice", "Text$slice", "func(text:Text, from=1, to=-1)->Text"},
{"title", "Text$title", "func(text:Text)->Text"},
{"upper", "Text$upper", "func(text:Text)->Text"},
@@ -294,6 +298,10 @@ env_t *new_compilation_unit(CORD *libname)
}
}
+ set_binding(namespace_env(env, "Pattern"), "from_unsafe_text",
+ new(binding_t, .type=Type(FunctionType, .args=new(arg_t, .name="text", .type=TEXT_TYPE), .ret=Type(TextType, .lang="Pattern")),
+ .code="(Pattern_t)"));
+
return env;
}
diff --git a/learnxiny.tm b/learnxiny.tm
index 5c8ad6c7..fcf25688 100644
--- a/learnxiny.tm
+++ b/learnxiny.tm
@@ -1,24 +1,27 @@
-// Tomo is a statically typed, garbage collected imperative language with
-// emphasis on simplicity, safety, and speed. Tomo code cross compiles to C,
-// which is compiled to a binary using your C compiler of choice.
+# Tomo is a statically typed, garbage collected imperative language with
+# emphasis on simplicity, safety, and speed. Tomo code cross compiles to C,
+# which is compiled to a binary using your C compiler of choice.
-// To begin with, let's define a main function:
+# To begin with, let's define a main function:
func main():
- // This function's code will run if you run this file.
+ # This function's code will run if you run this file.
- // Print to the console
+ # Print to the console
say("Hello world!")
- // Declare an integer variable (types are inferred)
+ # You can also use !! as a shorthand:
+ !! This is the same as using say(), but a bit easier to type
+
+ # Declare a variable with ':=' (the type is inferred to be integer)
my_variable := 123
- // Assign a new value
+ # Assign a new value
my_variable = 99
- // Floating point numbers are similar, but have a decimal point:
+ # Floating point numbers are similar, but require a decimal point:
my_num := 2.0
- // Strings can use interpolation with the dollar sign $:
+ # Strings can use interpolation with the dollar sign $:
say("My variable is $my_variable and this is a sum: $(1 + 2)")
say("
@@ -30,21 +33,21 @@ func main():
The multiline string won't include a leading or trailing newline.
")
- // Docstring tests use ">>" and when the program runs, they will print
- // their source code to the console on stderr.
+ # Docstring tests use ">>" and when the program runs, they will print
+ # their source code to the console on stderr.
>> 1 + 2
- // If there is a following line with "=", you can perform a check that
- // the output matches what was expected.
+ # If there is a following line with "=", you can perform a check that
+ # the output matches what was expected.
>> 2 + 3
= 5
- // If there is a mismatch, the program will halt and print a useful
- // error message.
+ # If there is a mismatch, the program will halt and print a useful
+ # error message.
- // Booleans use "yes" and "no" instead of "true" and "false"
+ # Booleans use "yes" and "no" instead of "true" and "false"
my_bool := yes
- // Conditionals:
+ # Conditionals:
if my_bool:
say("It worked!")
else if my_variable == 99:
@@ -52,125 +55,157 @@ func main():
else:
say("else")
- // Arrays:
+ # Arrays:
my_numbers := [10, 20, 30]
- // Arrays are 1-indexed, so the first element is at index 1:
+ # Arrays are 1-indexed, so the first element is at index 1:
>> my_numbers[1]
= 10
- // Negative indices can be used to get items from the back of the array:
+ # Negative indices can be used to get items from the back of the array:
>> my_numbers[-1]
= 30
- // If an invalid index outside the array's bounds is used (e.g.
- // my_numbers[999]), an error message will be printed and the program will
- // exit.
+ # If an invalid index outside the array's bounds is used (e.g.
+ # my_numbers[999]), an error message will be printed and the program will
+ # exit.
- // Iteration:
+ # Iteration:
for num in my_numbers:
>> num
- // Optionally, you can use an iteration index as well:
+ # Optionally, you can use an iteration index as well:
for index, num in my_numbers:
- pass // Pass means "do nothing"
+ pass # Pass means "do nothing"
- // Loop control flow uses "skip" and "stop"
- for num in my_numbers:
- if num == 20:
- // You can specify which loop variable you're skipping/stopping if
- // there is any ambiguity.
- skip num
+ # Arrays can be created with array comprehensions, which are loops:
+ >> [x*10 for x in my_numbers]
+ = [100, 200, 300]
+ >> [x*10 for x in my_numbers if x != 20]
+ = [100, 300]
- if num == 30:
- stop
+ # Loop control flow uses "skip" and "stop"
+ for x in my_numbers:
+ for y in my_numbers:
+ if x == y:
+ skip
- >> num
-
- // Tables are efficient hash maps
+ # For readability, you can also use postfix conditionals:
+ skip if x == y
+
+ if x + y == 60:
+ # Skip or stop can specify a loop variable if you want to
+ # affect an enclosing loop:
+ stop x
+
+ # Tables are efficient hash maps
table := {"one": 1, "two": 2}
>> table:get("two")
= 2
- // You can supply a default argument in case a key isn't found:
+ # You can supply a default argument in case a key isn't found:
>> table:get("xxx", default=0)
= 0
- // Otherwise, a runtime error will be raised:
- // >> table:get("xxx")
+ # Otherwise, a runtime error will be raised:
+ # >> table:get("xxx")
- // Tables can be iterated over either by key or key,value:
+ # Tables can be iterated over either by key or key,value:
for key in table:
pass
for key, value in table:
pass
- // Tables also have ".keys" and ".values" fields to explicitly access the
- // array of keys or values in the table.
+ # Tables also have ".keys" and ".values" fields to explicitly access the
+ # array of keys or values in the table.
>> table.keys
= ["one", "two"]
>> table.values
= [1, 2]
- // Tables can have fallbacks:
+ # Tables can have a fallback table that's used as a fallback when the key
+ # isn't found in the table itself:
table2 := {"three": 3; fallback=table}
>> table2:get("two")
= 2
>> table2:get("three")
= 3
- // If no default is provided and a missing key is looked up, the program
- // will print an error message and halt.
+ # Tables can also be created with comprehension loops:
+ >> {x:10*x for x in 5}
+ = {1:10, 2:20, 3:30, 4:40, 5:50}
- // Any types can be used in tables, for example, a table mapping arrays to
- // strings:
- >> {[10, 20]: "one", [30, 40, 50]: "two"}
+ # If no default is provided and a missing key is looked up, the program
+ # will print an error message and halt.
+
+ # Any types can be used in tables, for example, a table mapping arrays to
+ # strings:
+ table3 := {[10, 20]: "one", [30, 40, 50]: "two"}
+ >> table3:get([10, 20])
+ = "one"
+
+ # Sets are similar to tables, but they represent an unordered collection of
+ # unique values:
+ set := {10, 20, 30}
+ >> set:has(20)
+ = yes
+ >> set:has(999)
+ = no
- // So far, the datastructures that have been discussed are all *immutable*,
- // meaning you can't add, remove, or change their contents. If you want to
- // have mutable data, you need to allocate an area of memory which can hold
- // different values using the "@" operator (think: "@llocate").
+ # You can do some operations on sets:
+ other_set := {30, 40, 50}
+ >> set:with(other_set)
+ = {10, 20, 30, 40, 50}
+ >> set:without(other_set)
+ = {10, 20}
+ >> set:overlap(other_set)
+ = {30}
+
+ # So far, the datastructures that have been discussed are all *immutable*,
+ # meaning you can't add, remove, or change their contents. If you want to
+ # have mutable data, you need to allocate an area of memory which can hold
+ # different values using the "@" operator (think: "(a)llocate").
my_arr := @[10, 20, 30]
my_arr[1] = 999
>> my_arr
= @[999, 20, 30]
- // To call a method, you must use ":" and the name of the method:
+ # To call a method, you must use ":" and the name of the method:
my_arr:sort()
>> my_arr
= @[20, 30, 999]
- // To access the immutable value that resides inside the memory area, you
- // can use the "[]" operator:
+ # To access the immutable value that resides inside the memory area, you
+ # can use the "[]" operator:
>> my_arr[]
= [20, 30, 999]
- // You can think of this like taking a photograph of what's at that memory
- // location. Later, a new value might end up there, but the photograph will
- // remain unchanged.
+ # You can think of this like taking a photograph of what's at that memory
+ # location. Later, a new value might end up there, but the photograph will
+ # remain unchanged.
snapshot := my_arr[]
my_arr:insert(1000)
>> my_arr
= @[20, 30, 999, 1000]
>> snapshot
= [20, 30, 999]
- // Internally, this is implemented using copy-on-write, so it's quite
- // efficient.
+ # Internally, this is implemented using copy-on-write, so it's quite
+ # efficient.
- // These demos are defined below:
+ # These demos are defined below:
demo_keyword_args()
demo_structs()
demo_enums()
demo_lambdas()
-// Functions must be declared at the top level of a file and must specify the
-// types of all of their arguments and return value (if any):
+# Functions must be declared at the top level of a file and must specify the
+# types of all of their arguments and return value (if any):
func add(x:Int, y:Int)->Int:
return x + y
-// Default values for arguments can be provided in place of a type (the type is
-// inferred from the default value):
+# Default values for arguments can be provided in place of a type (the type is
+# inferred from the default value):
func show_both(first:Int, second=0)->Text:
return "first=$first second=$second"
@@ -178,15 +213,15 @@ func demo_keyword_args():
>> show_both(1, 2)
= "first=1 second=2"
- // If unspecified, the default argument is used:
+ # If unspecified, the default argument is used:
>> show_both(1)
= "first=1 second=0"
- // Arguments can be specified by name, in any order:
+ # Arguments can be specified by name, in any order:
>> show_both(second=20, 10)
= "first=10 second=20"
-// Here are some different type signatures:
+# Here are some different type signatures:
func takes_many_types(
boolean:Bool,
integer:Int,
@@ -200,42 +235,42 @@ func takes_many_types(
):
pass
-// Now let's define our own datastructure, a humble struct:
+# Now let's define our own datastructure, a humble struct:
struct Person(name:Text, age:Int):
- // We can define constants here if we want to:
+ # We can define constants here if we want to:
max_age := 122
- // Methods are defined here as well:
+ # Methods are defined here as well:
func say_age(self:Person):
say("My age is $self.age")
- // If you want to mutate a value, you must have a mutable pointer:
+ # If you want to mutate a value, you must have a mutable pointer:
func increase_age(self:@Person, amount=1):
self.age += amount
- // Methods don't have to take a Person as their first argument:
+ # Methods don't have to take a Person as their first argument:
func get_cool_name()->Text:
return "Blade"
func demo_structs():
- // Creating a struct:
+ # Creating a struct:
alice := Person("Alice", 30)
>> alice
= Person(name="Alice", age=30)
- // Accessing fields:
+ # Accessing fields:
>> alice.age
= 30
- // Calling methods:
+ # Calling methods:
alice:say_age()
- // You can call static methods by using the class name and ".":
+ # You can call static methods by using the class name and ".":
>> Person.get_cool_name()
= "Blade"
- // Comparisons, conversion to text, and hashing are all handled
- // automatically when you create a struct:
+ # Comparisons, conversion to text, and hashing are all handled
+ # automatically when you create a struct:
bob := Person("Bob", 30)
>> alice == bob
= no
@@ -248,40 +283,40 @@ func demo_structs():
= "first"
-// Now let's look at another feature: enums. Tomo enums are tagged unions, also
-// known as "sum types". You enumerate all the different types of values
-// something could have, and it's stored internally as a small integer that
-// indicates which type it is, and any data you want to associate with it.
+# Now let's look at another feature: enums. Tomo enums are tagged unions, also
+# known as "sum types". You enumerate all the different types of values
+# something could have, and it's stored internally as a small integer that
+# indicates which type it is, and any data you want to associate with it.
enum Shape(
Point,
Circle(radius:Num),
Rectangle(width:Num, height:Num),
):
- // Just like with structs, you define methods and constants inside a level
- // of indentation:
+ # Just like with structs, you define methods and constants inside a level
+ # of indentation:
func get_area(self:Shape)->Num:
- // In order to work with an enum, it's most often handy to use a 'when'
- // statement to get the internal values:
+ # In order to work with an enum, it's most often handy to use a 'when'
+ # statement to get the internal values:
when self is Point:
return 0
is Circle(r):
return Num.PI * r^2
is Rectangle(w, h):
return w * h
- // 'when' statements are checked for exhaustiveness, so the compiler
- // will give an error if you forgot any cases. You can also use 'else:'
- // if you want a fallback to handle other cases.
+ # 'when' statements are checked for exhaustiveness, so the compiler
+ # will give an error if you forgot any cases. You can also use 'else:'
+ # if you want a fallback to handle other cases.
func demo_enums():
- // Enums are constructed like this:
+ # Enums are constructed like this:
my_shape := Shape.Circle(1.0)
- // If an enum type doesn't have any associated data, it is not invoked as a
- // function, but is just a static value:
+ # If an enum type doesn't have any associated data, it is not invoked as a
+ # function, but is just a static value:
other_shape := Shape.Point
- // Similar to structs, enums automatically define comparisons, conversion
- // to text, and hashing:
+ # Similar to structs, enums automatically define comparisons, conversion
+ # to text, and hashing:
>> my_shape == other_shape
= no
@@ -292,19 +327,19 @@ func demo_enums():
= {Shape.Circle(radius=1):"nice"}
func demo_lambdas():
- // Lambdas, or anonymous functions, can be used like this:
+ # Lambdas, or anonymous functions, can be used like this:
add_one := func(x:Int): x + 1
>> add_one(5)
= 6
- // Lambdas can capture closure values, but only as a snapshot from when the
- // lambda was created:
+ # Lambdas can capture closure values, but only as a snapshot from when the
+ # lambda was created:
n := 10
add_n := func(x:Int): x + n
>> add_n(5)
= 15
- // The lambda's closure won't change when this variable is reassigned:
+ # The lambda's closure won't change when this variable is reassigned:
n = -999
>> add_n(5)
= 15
diff --git a/parse.c b/parse.c
index 8ab3688e..36ce3ddf 100644
--- a/parse.c
+++ b/parse.c
@@ -378,7 +378,7 @@ const char *get_id(const char **inout) {
}
bool comment(const char **pos) {
- if ((*pos)[0] == '/' && (*pos)[1] == '/' && (*pos)[2] != '!') {
+ if ((*pos)[0] == '#') {
*pos += strcspn(*pos, "\r\n");
return true;
} else {
@@ -1194,6 +1194,9 @@ PARSER(parse_text) {
open_quote = *pos;
++pos;
close_quote = closing[(int)open_quote] ? closing[(int)open_quote] : open_quote;
+
+ if (!lang && open_quote == '/')
+ lang = "Pattern";
} else {
return NULL;
}
@@ -2132,7 +2135,7 @@ PARSER(parse_doctest) {
PARSER(parse_say) {
const char *start = pos;
- if (!match(&pos, "//!")) return NULL;
+ if (!match(&pos, "!!")) return NULL;
spaces(&pos);
ast_list_t *chunks = NULL;
diff --git a/test/arrays.tm b/test/arrays.tm
index 21c8cb5c..16f411a2 100644
--- a/test/arrays.tm
+++ b/test/arrays.tm
@@ -78,23 +78,23 @@ func main():
>> arr := [10, 20, 30]
>> reversed := arr:reversed()
= [30, 20, 10]
- // Ensure the copy-on-write behavior triggers:
+ # Ensure the copy-on-write behavior triggers:
>> arr[1] = 999
>> reversed
= [30, 20, 10]
do:
>> nums := [10, -20, 30]
- // Sorted function doesn't mutate original:
+ # Sorted function doesn't mutate original:
>> nums:sorted()
= [-20, 10, 30]
>> nums
= [10, -20, 30]
- // Sort function does mutate in place:
+ # Sort function does mutate in place:
>> nums:sort()
>> nums
= [-20, 10, 30]
- // Custom sort functions:
+ # Custom sort functions:
>> nums:sort(func(x:&%Int,y:&%Int): x:abs() <> y:abs())
>> nums
= [10, -20, 30]
@@ -148,7 +148,7 @@ func main():
>> [i*10 for i in 10]:by(2):by(-1)
= [90, 70, 50, 30, 10]
- // Test iterating over array:from() and array:to()
+ # Test iterating over array:from() and array:to()
xs := ["A", "B", "C", "D"]
for i,x in xs:to(-2):
for y in xs:from(i+1):
diff --git a/test/integers.tm b/test/integers.tm
index 5743cbb2..e15b9473 100644
--- a/test/integers.tm
+++ b/test/integers.tm
@@ -79,7 +79,7 @@ func main():
for in 20:
>> n := Int.random(-999999, 999999)
>> d := Int.random(-999, 999)
- //! n=$n, d=$d:
+ !! n=$n, d=$d:
>> (n/d)*d + (n mod d) == n
= yes
diff --git a/test/lambdas.tm b/test/lambdas.tm
index 8d543bfc..62b5fd36 100644
--- a/test/lambdas.tm
+++ b/test/lambdas.tm
@@ -31,12 +31,12 @@ func main():
>> abs100(-5)
= 500
- // Test nested lambdas:
+ # Test nested lambdas:
outer := "Hello"
fn := func():
return func():
return func():
- defer: //! $outer
+ defer: !! $outer
return outer
>> fn()()()
= "Hello"
diff --git a/test/lang.tm b/test/lang.tm
index d78476de..e2093a6e 100644
--- a/test/lang.tm
+++ b/test/lang.tm
@@ -1,11 +1,11 @@
lang HTML:
HEADER := $HTML"<!DOCTYPE HTML>"
func escape(t:Text)->HTML:
- t = t:replace("&", "&amp;")
- t = t:replace("<", "&lt;")
- t = t:replace(">", "&gt;")
- t = t:replace('"', "&quot;")
- t = t:replace("'", "&#39;")
+ t = t:replace($/&/, "&amp;")
+ t = t:replace($/</, "&lt;")
+ t = t:replace($/>/, "&gt;")
+ t = t:replace($/"/, "&quot;")
+ t = t:replace($/'/, "&#39;")
return HTML.from_unsafe_text(t)
func escape_int(i:Int)->HTML:
diff --git a/test/text.tm b/test/text.tm
index d82a38c7..3049af99 100644
--- a/test/text.tm
+++ b/test/text.tm
@@ -1,6 +1,6 @@
func main():
>> str := "Hello Amélie!"
- //! Testing strings like $str
+ !! Testing strings like $str
>> str:upper()
= "HELLO AMÉLIE!"
@@ -40,45 +40,45 @@ func main():
>> amelie2:codepoint_names()
= ["LATIN CAPITAL LETTER A", "LATIN SMALL LETTER M", "LATIN SMALL LETTER E WITH ACUTE", "LATIN SMALL LETTER L", "LATIN SMALL LETTER I", "LATIN SMALL LETTER E"]
- >> "Hello":replace("e", "X")
+ >> "Hello":replace($/e/, "X")
= "HXllo"
- >> "Hello":has("l")
+ >> "Hello":has($/l/)
= yes
- >> "Hello":has("l[..end]")
+ >> "Hello":has($/l[..end]/)
= no
- >> "Hello":has("[..start]l")
+ >> "Hello":has($/[..start]l/)
= no
- >> "Hello":has("o")
+ >> "Hello":has($/o/)
= yes
- >> "Hello":has("o[..end]")
+ >> "Hello":has($/o[..end]/)
= yes
- >> "Hello":has("[..start]o")
+ >> "Hello":has($/[..start]o/)
= no
- >> "Hello":has("H")
+ >> "Hello":has($/H/)
= yes
- >> "Hello":has("H[..end]")
+ >> "Hello":has($/H[..end]/)
= no
- >> "Hello":has("[..start]H")
+ >> "Hello":has($/[..start]H/)
= yes
- >> "Hello":replace("l", "")
+ >> "Hello":replace($/l/, "")
= "Heo"
- >> "xxxx":replace("x", "")
+ >> "xxxx":replace($/x/, "")
= ""
- >> "xxxx":replace("y", "")
+ >> "xxxx":replace($/y/, "")
= "xxxx"
- >> "One two three four five six":replace("e ", "")
+ >> "One two three four five six":replace($/e /, "")
= "Ontwo threfour fivsix"
- >> " one ":replace("[..start][..space]", "")
+ >> " one ":replace($/[..start][..space]/, "")
= "one "
- >> " one ":replace("[..space][..end]", "")
+ >> " one ":replace($/[..space][..end]/, "")
= " one"
- >> amelie:has(amelie2)
+ >> amelie:has($/$amelie2/)
>> multiline := "
@@ -87,7 +87,7 @@ func main():
"
= "line one\nline two"
- //! Interpolation tests:
+ !! Interpolation tests:
>> "A $(1+2)"
= "A 3"
>> 'A $(1+2)'
@@ -104,9 +104,9 @@ func main():
>> $(one (nested) two $(1+2))
= "one (nested) two 3"
- >> "one two three":replace("[..alpha]", "")
+ >> "one two three":replace($/[..alpha]/, "")
= " "
- >> "one two three":replace("[..alpha]", "word")
+ >> "one two three":replace($/[..alpha]/, "word")
= "word word word"
>> c := "É̩"
@@ -130,17 +130,17 @@ func main():
>> "":lines()
= []
- //! Test splitting and joining text:
- >> "one two three":split(" ")
+ !! Test splitting and joining text:
+ >> "one two three":split($/ /)
= ["one", "two", "three"]
- >> "one,two,three,":split(",")
+ >> "one,two,three,":split($/,/)
= ["one", "two", "three", ""]
- >> "one two three":split("[..space]")
+ >> "one two three":split($/[..space]/)
= ["one", "two", "three"]
- >> "abc":split("")
+ >> "abc":split($//)
= ["a", "b", "c"]
>> ", ":join(["one", "two", "three"])
@@ -158,42 +158,42 @@ func main():
>> "":split()
= []
- //! Test text:find_all()
- >> " one two three ":find_all("[..alpha]")
+ !! Test text:find_all()
+ >> " one two three ":find_all($/[..alpha]/)
= ["one", "two", "three"]
- >> " one two three ":find_all("[..!space]")
+ >> " one two three ":find_all($/[..!space]/)
= ["one", "two", "three"]
- >> " ":find_all("[..alpha]")
+ >> " ":find_all($/[..alpha]/)
= []
- >> " foo(baz(), 1) doop() ":find_all("[..id](?)")
+ >> " foo(baz(), 1) doop() ":find_all($/[..id](?)/)
= ["foo(baz(), 1)", "doop()"]
- >> "":find_all("")
+ >> "":find_all($Pattern'')
= []
- >> "Hello":find_all("")
+ >> "Hello":find_all($Pattern'')
= []
- //! Test text:find()
- >> " one two three ":find("[..id]", start=-999)
+ !! Test text:find()
+ >> " one two three ":find($/[..id]/, start=-999)
= 0
- >> " one two three ":find("[..id]", start=999)
+ >> " one two three ":find($/[..id]/, start=999)
= 0
- >> " one two three ":find("[..id]")
+ >> " one two three ":find($/[..id]/)
= 2
- >> " one two three ":find("[..id]", start=5)
+ >> " one two three ":find($/[..id]/, start=5)
= 8
>> len := 0_i64
- >> " one ":find("[..id]", length=&len)
+ >> " one ":find($/[..id]/, length=&len)
= 4
>> len
= 3_i64
- //! Test text slicing:
+ !! Test text slicing:
>> "abcdef":slice()
= "abcdef"
>> "abcdef":slice(from=3)
@@ -220,3 +220,8 @@ func main():
>> Text.from_codepoint_names(["not a valid name here buddy"])
= ""
+ >> malicious := "[..xxx"
+ >> $/$malicious/
+ = $/[..1[]..xxx/
+
+
diff --git a/test/threads.tm b/test/threads.tm
index 986ab864..2f259a0e 100644
--- a/test/threads.tm
+++ b/test/threads.tm
@@ -26,7 +26,7 @@ func main():
results := |:Int; max_size|
>> thread := Thread.new(func():
- //! In another thread!
+ !! In another thread!
while yes:
>> got := jobs:get()
when got is Increment(x):
@@ -63,8 +63,8 @@ func main():
>> results:get()
= 1001
- //! Canceling...
+ !! Canceling...
>> thread:cancel()
- //! Joining...
+ !! Joining...
>> thread:join()
- //! Done!
+ !! Done!