diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2024-09-03 13:19:41 -0400 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2024-09-03 13:19:41 -0400 |
| commit | 64143f0a131a053414e4b73c17bff994522b11c2 (patch) | |
| tree | 2545507fde623f8846bf183388acdbb0234b5e65 | |
| parent | 5feecff9d93522002c74a1423d138c2aa8bc150d (diff) | |
Syntax overhaul (comments back to `#`, print statments to `!!`),
using `$/.../` for patterns and using a DSL for patterns
| -rw-r--r-- | builtins/datatypes.h | 2 | ||||
| -rw-r--r-- | builtins/text.c | 98 | ||||
| -rw-r--r-- | builtins/text.h | 13 | ||||
| -rw-r--r-- | docs/text.md | 50 | ||||
| -rw-r--r-- | environment.c | 18 | ||||
| -rw-r--r-- | learnxiny.tm | 237 | ||||
| -rw-r--r-- | parse.c | 7 | ||||
| -rw-r--r-- | test/arrays.tm | 10 | ||||
| -rw-r--r-- | test/integers.tm | 2 | ||||
| -rw-r--r-- | test/lambdas.tm | 4 | ||||
| -rw-r--r-- | test/lang.tm | 10 | ||||
| -rw-r--r-- | test/text.tm | 85 | ||||
| -rw-r--r-- | test/threads.tm | 8 |
13 files changed, 344 insertions, 200 deletions
diff --git a/builtins/datatypes.h b/builtins/datatypes.h index 433e1dd9..1311797c 100644 --- a/builtins/datatypes.h +++ b/builtins/datatypes.h @@ -86,4 +86,6 @@ typedef struct Text_s { }; } Text_t; +#define Pattern_t Text_t + // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/builtins/text.c b/builtins/text.c index 99d17577..d9da1248 100644 --- a/builtins/text.c +++ b/builtins/text.c @@ -1034,7 +1034,7 @@ int64_t match_uri(Text_t text, int64_t text_index) return text_index - start_index; } -int64_t match(Text_t text, Text_t pattern, int64_t text_index, int64_t pattern_index) +int64_t match(Text_t text, Pattern_t pattern, int64_t text_index, int64_t pattern_index) { if (pattern_index >= pattern.length) return 0; int64_t start_index = text_index; @@ -1306,7 +1306,7 @@ int64_t match(Text_t text, Text_t pattern, int64_t text_index, int64_t pattern_i #undef EAT1 #undef EAT_MANY -public Int_t Text$find(Text_t text, Text_t pattern, Int_t from_index, int64_t *match_length) +public Int_t Text$find(Text_t text, Pattern_t pattern, Int_t from_index, int64_t *match_length) { int64_t first = Int_to_Int64(from_index, false); if (first == 0) fail("Invalid index: 0"); @@ -1340,7 +1340,7 @@ public Int_t Text$find(Text_t text, Text_t pattern, Int_t from_index, int64_t *m return I(0); } -public bool Text$has(Text_t text, Text_t pattern) +public bool Text$has(Text_t text, Pattern_t pattern) { return !I_is_zero(Text$find(text, pattern, I_small(1), NULL)); } @@ -1363,26 +1363,17 @@ public int printf_text(FILE *stream, const struct printf_info *info, const void return Text$print(stream, t); } -public Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info) -{ - (void)info; - if (!text) return info && info->TextInfo.lang ? Text$from_str(info->TextInfo.lang) : Text$from_str("Text"); - Text_t as_text = Text$quoted(*(Text_t*)text, colorize); - if (info && info->TextInfo.lang && info != &$Text) - as_text = Text$concat(Text$from_str(colorize ? "\x1b[1m$" : "$"), Text$from_str(info->TextInfo.lang), as_text); - return as_text; -} - -public Text_t Text$quoted(Text_t text, bool colorize) +static inline Text_t _quoted(Text_t text, bool colorize, char quote_char) { // TODO: optimize for ASCII and short strings array_t graphemes = {.atomic=1}; #define add_char(c) Array$insert_value(&graphemes, (uint32_t)c, I_small(0), sizeof(uint32_t)) #define add_str(s) ({ for (char *_c = s; *_c; ++_c) Array$insert_value(&graphemes, (uint32_t)*_c, I_small(0), sizeof(uint32_t)); }) if (colorize) - add_str("\x1b[35m\""); - else - add_char('"'); + add_str("\x1b[35m"); + if (quote_char != '"' && quote_char != '\"' && quote_char != '`') + add_char('$'); + add_char(quote_char); #define add_escaped(str) ({ if (colorize) add_str("\x1b[34;1m"); add_char('\\'); add_str(str); if (colorize) add_str("\x1b[0;35m"); }) iteration_state_t state = {0, 0}; @@ -1397,7 +1388,6 @@ public Text_t Text$quoted(Text_t text, bool colorize) case '\r': add_escaped("r"); break; case '\t': add_escaped("t"); break; case '\v': add_escaped("v"); break; - case '"': add_escaped("\""); break; case '\\': add_escaped("\\"); break; case '\x00' ... '\x06': case '\x0E' ... '\x1A': case '\x1C' ... '\x1F': case '\x7F' ... '\x7F': { @@ -1411,14 +1401,19 @@ public Text_t Text$quoted(Text_t text, bool colorize) add_str("\x1b[0;35m"); break; } - default: add_char(g); break; + default: { + if (g == quote_char) + add_escaped(((char[2]){quote_char, 0})); + else + add_char(g); + break; + } } } + add_char(quote_char); if (colorize) - add_str("\"\x1b[m"); - else - add_char('"'); + add_str("\x1b[m"); return (Text_t){.length=graphemes.length, .tag=TEXT_GRAPHEMES, .graphemes=graphemes.data}; #undef add_str @@ -1426,7 +1421,22 @@ public Text_t Text$quoted(Text_t text, bool colorize) #undef add_escaped } -public array_t Text$find_all(Text_t text, Text_t pattern) +public Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info) +{ + (void)info; + if (!text) return info && info->TextInfo.lang ? Text$from_str(info->TextInfo.lang) : Text$from_str("Text"); + Text_t as_text = _quoted(*(Text_t*)text, colorize, info == &Pattern ? '/' : '"'); + if (info && info->TextInfo.lang && info != &$Text && info != &Pattern) + as_text = Text$concat(Text$from_str(colorize ? "\x1b[1m$" : "$"), Text$from_str(info->TextInfo.lang), as_text); + return as_text; +} + +public Text_t Text$quoted(Text_t text, bool colorize) +{ + return _quoted(text, colorize, '"'); +} + +public array_t Text$find_all(Text_t text, Pattern_t pattern) { if (pattern.length == 0) // special case return (array_t){.length=0}; @@ -1446,7 +1456,7 @@ public array_t Text$find_all(Text_t text, Text_t pattern) return matches; } -public Text_t Text$replace(Text_t text, Text_t pattern, Text_t replacement) +public Text_t Text$replace(Text_t text, Pattern_t pattern, Text_t replacement) { Text_t ret = {.length=0}; @@ -1470,7 +1480,7 @@ public Text_t Text$replace(Text_t text, Text_t pattern, Text_t replacement) return ret; } -public array_t Text$split(Text_t text, Text_t pattern) +public array_t Text$split(Text_t text, Pattern_t pattern) { if (text.length == 0) // special case return (array_t){.length=0}; @@ -1656,4 +1666,42 @@ public const TypeInfo $Text = { .TextInfo={.lang="Text"}, }; +public Pattern_t Pattern$escape_text(Text_t text) +{ + // TODO: optimize for ASCII and short strings + array_t graphemes = {.atomic=1}; +#define add_char(c) Array$insert_value(&graphemes, (uint32_t)c, I_small(0), sizeof(uint32_t)) +#define add_str(s) ({ for (char *_c = s; *_c; ++_c) Array$insert_value(&graphemes, (uint32_t)*_c, I_small(0), sizeof(uint32_t)); }) + iteration_state_t state = {0, 0}; + for (int64_t i = 0; i < text.length; i++) { + int32_t g = _next_grapheme(text, &state, i); + uint32_t g0 = g < 0 ? synthetic_graphemes[-g-1].codepoints[0] : (uint32_t)g; + + if (g == '[') { + add_str("[..1[]"); + } else if (uc_is_property_quotation_mark(g0)) { + add_str("[..1"); + add_char(g); + add_char(']'); + } else if (uc_is_property_paired_punctuation(g0)) { + add_str("[..1"); + add_char(g); + add_char(']'); + } else { + add_char(g); + } + } + return (Text_t){.length=graphemes.length, .tag=TEXT_GRAPHEMES, .graphemes=graphemes.data}; +#undef add_str +#undef add_char +#undef add_escaped +} + +public const TypeInfo Pattern = { + .size=sizeof(Text_t), + .align=__alignof__(Text_t), + .tag=TextInfo, + .TextInfo={.lang="Pattern"}, +}; + // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/builtins/text.h b/builtins/text.h index b3cb6d79..edf3dc9f 100644 --- a/builtins/text.h +++ b/builtins/text.h @@ -29,11 +29,11 @@ Text_t Text$lower(Text_t text); Text_t Text$title(Text_t text); Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info); Text_t Text$quoted(Text_t str, bool colorize); -Text_t Text$replace(Text_t str, Text_t pat, Text_t replacement); -array_t Text$split(Text_t text, Text_t pattern); -Int_t Text$find(Text_t text, Text_t pattern, Int_t i, int64_t *match_length); -array_t Text$find_all(Text_t text, Text_t pattern); -bool Text$has(Text_t text, Text_t pattern); +Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement); +array_t Text$split(Text_t text, Pattern_t pattern); +Int_t Text$find(Text_t text, Pattern_t pattern, Int_t i, int64_t *match_length); +array_t Text$find_all(Text_t text, Pattern_t pattern); +bool Text$has(Text_t text, Pattern_t pattern); const char *Text$as_c_string(Text_t text); public Text_t Text$format(const char *fmt, ...); array_t Text$clusters(Text_t text); @@ -48,4 +48,7 @@ Text_t Text$join(Text_t glue, array_t pieces); extern const TypeInfo $Text; +Pattern_t Pattern$escape_text(Text_t text); +extern const TypeInfo Pattern; + // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/docs/text.md b/docs/text.md index 5d399edd..855c3c6c 100644 --- a/docs/text.md +++ b/docs/text.md @@ -262,15 +262,20 @@ Text codebase is around 1.5K lines of code). For more advanced use cases, consider linking against a C library for regular expressions or pattern matching. +`Pattern` is a [domain-specific language](docs/langs.md), in other words, it's +like a `Text`, but it has a distinct type. As a convenience, you can use +`$/.../` to write pattern literals instead of using the general-purpose DSL +syntax of `$Pattern"..."`. + Patterns are used in a small, but very powerful API that handles many text functions that would normally be handled by a more extensive API: ``` -Text.find(pattern:Text, start=1, length=!&Int64?)->Int -Text.find_all(pattern:Text)->[Text] -Text.split(pattern:Text)->[Text] -Text.replace(pattern:Text, replacement:Text)->[Text] -Text.has(pattern:Text)->Bool +Text.find(pattern:Pattern, start=1, length=!&Int64?)->Int +Text.find_all(pattern:Pattern)->[Text] +Text.split(pattern:Pattern)->[Text] +Text.replace(pattern:Pattern, replacement:Text)->[Text] +Text.has(pattern:Pattern)->Bool ``` See [Text Functions](#Text-Functions) for the full API documentation. @@ -330,6 +335,41 @@ If an exclamation mark (`!`) is placed before a pattern's name, then characters are matched only when they _don't_ match the pattern. For example, `[..!alpha]` will match all characters _except_ alphabetic ones. +## Interpolating Text and Escaping + +To escape a character in a pattern (e.g. if you want to match the literal +character `?`), you can use the syntax `[..1 ?]`. This is almost never +necessary unless you have text that looks like a Tomo text pattern and has +something like `[..` or `(?)` inside it. + +However, if you're trying to do an exact match of arbitrary text values, you'll +want to have the text automatically escaped. Fortunately, Tomo's injection-safe +DSL text interpolation supports automatic text escaping. This means that if you +use text interpolation with the `$` sign to insert a text value, the value will +be automatically escaped using the `[..1 ?]` rule described above: + +```tomo +# Risk of code injection (would cause an error because 'xxx' is not a valid +# pattern name: +>> user_input := get_user_input() += "[..xxx]" + +# Interpolation automatically escapes: +>> $/$user_input/ += $/[..1 []..xxx]/ + +# No error: +>> some_text:find($/$user_input/) += 0 +``` + +If you prefer, you can also use this to insert literal characters: + +```tomo +>> $/literal $"[..]"/ += $/literal [..1]]..]/ +``` + ## Repetitions By default, named patterns match 1 or more repetitions, but you can specify how diff --git a/environment.c b/environment.c index f960aaec..6ecb8652 100644 --- a/environment.c +++ b/environment.c @@ -60,6 +60,7 @@ env_t *new_compilation_unit(CORD *libname) THREAD_TYPE = Type(StructType, .name="Thread", .env=thread_env, .opaque=true); } + struct { const char *name; type_t *type; @@ -230,22 +231,25 @@ env_t *new_compilation_unit(CORD *libname) {"reversed", "Range$reversed", "func(range:Range)->Range"}, {"by", "Range$by", "func(range:Range, step:Int)->Range"}, )}, + {"Pattern", Type(TextType, .lang="Pattern", .env=namespace_env(env, "Pattern")), "Text_t", "Pattern", TypedArray(ns_entry_t, + {"escape_text", "Pattern$escape_text", "func(text:Text)->Pattern"}, + )}, {"Text", TEXT_TYPE, "Text_t", "$Text", TypedArray(ns_entry_t, - {"find", "Text$find", "func(text:Text, pattern:Text, start=1, length=!&Int64)->Int"}, - {"find_all", "Text$find_all", "func(text:Text, pattern:Text)->[Text]"}, + {"find", "Text$find", "func(text:Text, pattern:Pattern, start=1, length=!&Int64)->Int"}, + {"find_all", "Text$find_all", "func(text:Text, pattern:Pattern)->[Text]"}, {"as_c_string", "CORD_to_char_star", "func(text:Text)->CString"}, {"codepoint_names", "Text$codepoint_names", "func(text:Text)->[Text]"}, {"from_bytes", "Text$from_bytes", "func(bytes:[Int8])->Text"}, {"from_c_string", "Text$from_str", "func(str:CString)->Text"}, {"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text])->Text"}, {"from_codepoints", "Text$from_codepoints", "func(codepoints:[Int32])->Text"}, - {"has", "Text$has", "func(text:Text, pattern:Text)->Bool"}, + {"has", "Text$has", "func(text:Text, pattern:Pattern)->Bool"}, {"join", "Text$join", "func(glue:Text, pieces:[Text])->Text"}, {"lines", "Text$lines", "func(text:Text)->[Text]"}, {"lower", "Text$lower", "func(text:Text)->Text"}, {"quoted", "Text$quoted", "func(text:Text, color=no)->Text"}, - {"replace", "Text$replace", "func(text:Text, pattern:Text, replacement:Text)->Text"}, - {"split", "Text$split", "func(text:Text, pattern='')->[Text]"}, + {"replace", "Text$replace", "func(text:Text, pattern:Pattern, replacement:Text)->Text"}, + {"split", "Text$split", "func(text:Text, pattern=$Pattern'')->[Text]"}, {"slice", "Text$slice", "func(text:Text, from=1, to=-1)->Text"}, {"title", "Text$title", "func(text:Text)->Text"}, {"upper", "Text$upper", "func(text:Text)->Text"}, @@ -294,6 +298,10 @@ env_t *new_compilation_unit(CORD *libname) } } + set_binding(namespace_env(env, "Pattern"), "from_unsafe_text", + new(binding_t, .type=Type(FunctionType, .args=new(arg_t, .name="text", .type=TEXT_TYPE), .ret=Type(TextType, .lang="Pattern")), + .code="(Pattern_t)")); + return env; } diff --git a/learnxiny.tm b/learnxiny.tm index 5c8ad6c7..fcf25688 100644 --- a/learnxiny.tm +++ b/learnxiny.tm @@ -1,24 +1,27 @@ -// Tomo is a statically typed, garbage collected imperative language with -// emphasis on simplicity, safety, and speed. Tomo code cross compiles to C, -// which is compiled to a binary using your C compiler of choice. +# Tomo is a statically typed, garbage collected imperative language with +# emphasis on simplicity, safety, and speed. Tomo code cross compiles to C, +# which is compiled to a binary using your C compiler of choice. -// To begin with, let's define a main function: +# To begin with, let's define a main function: func main(): - // This function's code will run if you run this file. + # This function's code will run if you run this file. - // Print to the console + # Print to the console say("Hello world!") - // Declare an integer variable (types are inferred) + # You can also use !! as a shorthand: + !! This is the same as using say(), but a bit easier to type + + # Declare a variable with ':=' (the type is inferred to be integer) my_variable := 123 - // Assign a new value + # Assign a new value my_variable = 99 - // Floating point numbers are similar, but have a decimal point: + # Floating point numbers are similar, but require a decimal point: my_num := 2.0 - // Strings can use interpolation with the dollar sign $: + # Strings can use interpolation with the dollar sign $: say("My variable is $my_variable and this is a sum: $(1 + 2)") say(" @@ -30,21 +33,21 @@ func main(): The multiline string won't include a leading or trailing newline. ") - // Docstring tests use ">>" and when the program runs, they will print - // their source code to the console on stderr. + # Docstring tests use ">>" and when the program runs, they will print + # their source code to the console on stderr. >> 1 + 2 - // If there is a following line with "=", you can perform a check that - // the output matches what was expected. + # If there is a following line with "=", you can perform a check that + # the output matches what was expected. >> 2 + 3 = 5 - // If there is a mismatch, the program will halt and print a useful - // error message. + # If there is a mismatch, the program will halt and print a useful + # error message. - // Booleans use "yes" and "no" instead of "true" and "false" + # Booleans use "yes" and "no" instead of "true" and "false" my_bool := yes - // Conditionals: + # Conditionals: if my_bool: say("It worked!") else if my_variable == 99: @@ -52,125 +55,157 @@ func main(): else: say("else") - // Arrays: + # Arrays: my_numbers := [10, 20, 30] - // Arrays are 1-indexed, so the first element is at index 1: + # Arrays are 1-indexed, so the first element is at index 1: >> my_numbers[1] = 10 - // Negative indices can be used to get items from the back of the array: + # Negative indices can be used to get items from the back of the array: >> my_numbers[-1] = 30 - // If an invalid index outside the array's bounds is used (e.g. - // my_numbers[999]), an error message will be printed and the program will - // exit. + # If an invalid index outside the array's bounds is used (e.g. + # my_numbers[999]), an error message will be printed and the program will + # exit. - // Iteration: + # Iteration: for num in my_numbers: >> num - // Optionally, you can use an iteration index as well: + # Optionally, you can use an iteration index as well: for index, num in my_numbers: - pass // Pass means "do nothing" + pass # Pass means "do nothing" - // Loop control flow uses "skip" and "stop" - for num in my_numbers: - if num == 20: - // You can specify which loop variable you're skipping/stopping if - // there is any ambiguity. - skip num + # Arrays can be created with array comprehensions, which are loops: + >> [x*10 for x in my_numbers] + = [100, 200, 300] + >> [x*10 for x in my_numbers if x != 20] + = [100, 300] - if num == 30: - stop + # Loop control flow uses "skip" and "stop" + for x in my_numbers: + for y in my_numbers: + if x == y: + skip - >> num - - // Tables are efficient hash maps + # For readability, you can also use postfix conditionals: + skip if x == y + + if x + y == 60: + # Skip or stop can specify a loop variable if you want to + # affect an enclosing loop: + stop x + + # Tables are efficient hash maps table := {"one": 1, "two": 2} >> table:get("two") = 2 - // You can supply a default argument in case a key isn't found: + # You can supply a default argument in case a key isn't found: >> table:get("xxx", default=0) = 0 - // Otherwise, a runtime error will be raised: - // >> table:get("xxx") + # Otherwise, a runtime error will be raised: + # >> table:get("xxx") - // Tables can be iterated over either by key or key,value: + # Tables can be iterated over either by key or key,value: for key in table: pass for key, value in table: pass - // Tables also have ".keys" and ".values" fields to explicitly access the - // array of keys or values in the table. + # Tables also have ".keys" and ".values" fields to explicitly access the + # array of keys or values in the table. >> table.keys = ["one", "two"] >> table.values = [1, 2] - // Tables can have fallbacks: + # Tables can have a fallback table that's used as a fallback when the key + # isn't found in the table itself: table2 := {"three": 3; fallback=table} >> table2:get("two") = 2 >> table2:get("three") = 3 - // If no default is provided and a missing key is looked up, the program - // will print an error message and halt. + # Tables can also be created with comprehension loops: + >> {x:10*x for x in 5} + = {1:10, 2:20, 3:30, 4:40, 5:50} - // Any types can be used in tables, for example, a table mapping arrays to - // strings: - >> {[10, 20]: "one", [30, 40, 50]: "two"} + # If no default is provided and a missing key is looked up, the program + # will print an error message and halt. + + # Any types can be used in tables, for example, a table mapping arrays to + # strings: + table3 := {[10, 20]: "one", [30, 40, 50]: "two"} + >> table3:get([10, 20]) + = "one" + + # Sets are similar to tables, but they represent an unordered collection of + # unique values: + set := {10, 20, 30} + >> set:has(20) + = yes + >> set:has(999) + = no - // So far, the datastructures that have been discussed are all *immutable*, - // meaning you can't add, remove, or change their contents. If you want to - // have mutable data, you need to allocate an area of memory which can hold - // different values using the "@" operator (think: "@llocate"). + # You can do some operations on sets: + other_set := {30, 40, 50} + >> set:with(other_set) + = {10, 20, 30, 40, 50} + >> set:without(other_set) + = {10, 20} + >> set:overlap(other_set) + = {30} + + # So far, the datastructures that have been discussed are all *immutable*, + # meaning you can't add, remove, or change their contents. If you want to + # have mutable data, you need to allocate an area of memory which can hold + # different values using the "@" operator (think: "(a)llocate"). my_arr := @[10, 20, 30] my_arr[1] = 999 >> my_arr = @[999, 20, 30] - // To call a method, you must use ":" and the name of the method: + # To call a method, you must use ":" and the name of the method: my_arr:sort() >> my_arr = @[20, 30, 999] - // To access the immutable value that resides inside the memory area, you - // can use the "[]" operator: + # To access the immutable value that resides inside the memory area, you + # can use the "[]" operator: >> my_arr[] = [20, 30, 999] - // You can think of this like taking a photograph of what's at that memory - // location. Later, a new value might end up there, but the photograph will - // remain unchanged. + # You can think of this like taking a photograph of what's at that memory + # location. Later, a new value might end up there, but the photograph will + # remain unchanged. snapshot := my_arr[] my_arr:insert(1000) >> my_arr = @[20, 30, 999, 1000] >> snapshot = [20, 30, 999] - // Internally, this is implemented using copy-on-write, so it's quite - // efficient. + # Internally, this is implemented using copy-on-write, so it's quite + # efficient. - // These demos are defined below: + # These demos are defined below: demo_keyword_args() demo_structs() demo_enums() demo_lambdas() -// Functions must be declared at the top level of a file and must specify the -// types of all of their arguments and return value (if any): +# Functions must be declared at the top level of a file and must specify the +# types of all of their arguments and return value (if any): func add(x:Int, y:Int)->Int: return x + y -// Default values for arguments can be provided in place of a type (the type is -// inferred from the default value): +# Default values for arguments can be provided in place of a type (the type is +# inferred from the default value): func show_both(first:Int, second=0)->Text: return "first=$first second=$second" @@ -178,15 +213,15 @@ func demo_keyword_args(): >> show_both(1, 2) = "first=1 second=2" - // If unspecified, the default argument is used: + # If unspecified, the default argument is used: >> show_both(1) = "first=1 second=0" - // Arguments can be specified by name, in any order: + # Arguments can be specified by name, in any order: >> show_both(second=20, 10) = "first=10 second=20" -// Here are some different type signatures: +# Here are some different type signatures: func takes_many_types( boolean:Bool, integer:Int, @@ -200,42 +235,42 @@ func takes_many_types( ): pass -// Now let's define our own datastructure, a humble struct: +# Now let's define our own datastructure, a humble struct: struct Person(name:Text, age:Int): - // We can define constants here if we want to: + # We can define constants here if we want to: max_age := 122 - // Methods are defined here as well: + # Methods are defined here as well: func say_age(self:Person): say("My age is $self.age") - // If you want to mutate a value, you must have a mutable pointer: + # If you want to mutate a value, you must have a mutable pointer: func increase_age(self:@Person, amount=1): self.age += amount - // Methods don't have to take a Person as their first argument: + # Methods don't have to take a Person as their first argument: func get_cool_name()->Text: return "Blade" func demo_structs(): - // Creating a struct: + # Creating a struct: alice := Person("Alice", 30) >> alice = Person(name="Alice", age=30) - // Accessing fields: + # Accessing fields: >> alice.age = 30 - // Calling methods: + # Calling methods: alice:say_age() - // You can call static methods by using the class name and ".": + # You can call static methods by using the class name and ".": >> Person.get_cool_name() = "Blade" - // Comparisons, conversion to text, and hashing are all handled - // automatically when you create a struct: + # Comparisons, conversion to text, and hashing are all handled + # automatically when you create a struct: bob := Person("Bob", 30) >> alice == bob = no @@ -248,40 +283,40 @@ func demo_structs(): = "first" -// Now let's look at another feature: enums. Tomo enums are tagged unions, also -// known as "sum types". You enumerate all the different types of values -// something could have, and it's stored internally as a small integer that -// indicates which type it is, and any data you want to associate with it. +# Now let's look at another feature: enums. Tomo enums are tagged unions, also +# known as "sum types". You enumerate all the different types of values +# something could have, and it's stored internally as a small integer that +# indicates which type it is, and any data you want to associate with it. enum Shape( Point, Circle(radius:Num), Rectangle(width:Num, height:Num), ): - // Just like with structs, you define methods and constants inside a level - // of indentation: + # Just like with structs, you define methods and constants inside a level + # of indentation: func get_area(self:Shape)->Num: - // In order to work with an enum, it's most often handy to use a 'when' - // statement to get the internal values: + # In order to work with an enum, it's most often handy to use a 'when' + # statement to get the internal values: when self is Point: return 0 is Circle(r): return Num.PI * r^2 is Rectangle(w, h): return w * h - // 'when' statements are checked for exhaustiveness, so the compiler - // will give an error if you forgot any cases. You can also use 'else:' - // if you want a fallback to handle other cases. + # 'when' statements are checked for exhaustiveness, so the compiler + # will give an error if you forgot any cases. You can also use 'else:' + # if you want a fallback to handle other cases. func demo_enums(): - // Enums are constructed like this: + # Enums are constructed like this: my_shape := Shape.Circle(1.0) - // If an enum type doesn't have any associated data, it is not invoked as a - // function, but is just a static value: + # If an enum type doesn't have any associated data, it is not invoked as a + # function, but is just a static value: other_shape := Shape.Point - // Similar to structs, enums automatically define comparisons, conversion - // to text, and hashing: + # Similar to structs, enums automatically define comparisons, conversion + # to text, and hashing: >> my_shape == other_shape = no @@ -292,19 +327,19 @@ func demo_enums(): = {Shape.Circle(radius=1):"nice"} func demo_lambdas(): - // Lambdas, or anonymous functions, can be used like this: + # Lambdas, or anonymous functions, can be used like this: add_one := func(x:Int): x + 1 >> add_one(5) = 6 - // Lambdas can capture closure values, but only as a snapshot from when the - // lambda was created: + # Lambdas can capture closure values, but only as a snapshot from when the + # lambda was created: n := 10 add_n := func(x:Int): x + n >> add_n(5) = 15 - // The lambda's closure won't change when this variable is reassigned: + # The lambda's closure won't change when this variable is reassigned: n = -999 >> add_n(5) = 15 @@ -378,7 +378,7 @@ const char *get_id(const char **inout) { } bool comment(const char **pos) { - if ((*pos)[0] == '/' && (*pos)[1] == '/' && (*pos)[2] != '!') { + if ((*pos)[0] == '#') { *pos += strcspn(*pos, "\r\n"); return true; } else { @@ -1194,6 +1194,9 @@ PARSER(parse_text) { open_quote = *pos; ++pos; close_quote = closing[(int)open_quote] ? closing[(int)open_quote] : open_quote; + + if (!lang && open_quote == '/') + lang = "Pattern"; } else { return NULL; } @@ -2132,7 +2135,7 @@ PARSER(parse_doctest) { PARSER(parse_say) { const char *start = pos; - if (!match(&pos, "//!")) return NULL; + if (!match(&pos, "!!")) return NULL; spaces(&pos); ast_list_t *chunks = NULL; diff --git a/test/arrays.tm b/test/arrays.tm index 21c8cb5c..16f411a2 100644 --- a/test/arrays.tm +++ b/test/arrays.tm @@ -78,23 +78,23 @@ func main(): >> arr := [10, 20, 30] >> reversed := arr:reversed() = [30, 20, 10] - // Ensure the copy-on-write behavior triggers: + # Ensure the copy-on-write behavior triggers: >> arr[1] = 999 >> reversed = [30, 20, 10] do: >> nums := [10, -20, 30] - // Sorted function doesn't mutate original: + # Sorted function doesn't mutate original: >> nums:sorted() = [-20, 10, 30] >> nums = [10, -20, 30] - // Sort function does mutate in place: + # Sort function does mutate in place: >> nums:sort() >> nums = [-20, 10, 30] - // Custom sort functions: + # Custom sort functions: >> nums:sort(func(x:&%Int,y:&%Int): x:abs() <> y:abs()) >> nums = [10, -20, 30] @@ -148,7 +148,7 @@ func main(): >> [i*10 for i in 10]:by(2):by(-1) = [90, 70, 50, 30, 10] - // Test iterating over array:from() and array:to() + # Test iterating over array:from() and array:to() xs := ["A", "B", "C", "D"] for i,x in xs:to(-2): for y in xs:from(i+1): diff --git a/test/integers.tm b/test/integers.tm index 5743cbb2..e15b9473 100644 --- a/test/integers.tm +++ b/test/integers.tm @@ -79,7 +79,7 @@ func main(): for in 20: >> n := Int.random(-999999, 999999) >> d := Int.random(-999, 999) - //! n=$n, d=$d: + !! n=$n, d=$d: >> (n/d)*d + (n mod d) == n = yes diff --git a/test/lambdas.tm b/test/lambdas.tm index 8d543bfc..62b5fd36 100644 --- a/test/lambdas.tm +++ b/test/lambdas.tm @@ -31,12 +31,12 @@ func main(): >> abs100(-5) = 500 - // Test nested lambdas: + # Test nested lambdas: outer := "Hello" fn := func(): return func(): return func(): - defer: //! $outer + defer: !! $outer return outer >> fn()()() = "Hello" diff --git a/test/lang.tm b/test/lang.tm index d78476de..e2093a6e 100644 --- a/test/lang.tm +++ b/test/lang.tm @@ -1,11 +1,11 @@ lang HTML: HEADER := $HTML"<!DOCTYPE HTML>" func escape(t:Text)->HTML: - t = t:replace("&", "&") - t = t:replace("<", "<") - t = t:replace(">", ">") - t = t:replace('"', """) - t = t:replace("'", "'") + t = t:replace($/&/, "&") + t = t:replace($/</, "<") + t = t:replace($/>/, ">") + t = t:replace($/"/, """) + t = t:replace($/'/, "'") return HTML.from_unsafe_text(t) func escape_int(i:Int)->HTML: diff --git a/test/text.tm b/test/text.tm index d82a38c7..3049af99 100644 --- a/test/text.tm +++ b/test/text.tm @@ -1,6 +1,6 @@ func main(): >> str := "Hello Amélie!" - //! Testing strings like $str + !! Testing strings like $str >> str:upper() = "HELLO AMÉLIE!" @@ -40,45 +40,45 @@ func main(): >> amelie2:codepoint_names() = ["LATIN CAPITAL LETTER A", "LATIN SMALL LETTER M", "LATIN SMALL LETTER E WITH ACUTE", "LATIN SMALL LETTER L", "LATIN SMALL LETTER I", "LATIN SMALL LETTER E"] - >> "Hello":replace("e", "X") + >> "Hello":replace($/e/, "X") = "HXllo" - >> "Hello":has("l") + >> "Hello":has($/l/) = yes - >> "Hello":has("l[..end]") + >> "Hello":has($/l[..end]/) = no - >> "Hello":has("[..start]l") + >> "Hello":has($/[..start]l/) = no - >> "Hello":has("o") + >> "Hello":has($/o/) = yes - >> "Hello":has("o[..end]") + >> "Hello":has($/o[..end]/) = yes - >> "Hello":has("[..start]o") + >> "Hello":has($/[..start]o/) = no - >> "Hello":has("H") + >> "Hello":has($/H/) = yes - >> "Hello":has("H[..end]") + >> "Hello":has($/H[..end]/) = no - >> "Hello":has("[..start]H") + >> "Hello":has($/[..start]H/) = yes - >> "Hello":replace("l", "") + >> "Hello":replace($/l/, "") = "Heo" - >> "xxxx":replace("x", "") + >> "xxxx":replace($/x/, "") = "" - >> "xxxx":replace("y", "") + >> "xxxx":replace($/y/, "") = "xxxx" - >> "One two three four five six":replace("e ", "") + >> "One two three four five six":replace($/e /, "") = "Ontwo threfour fivsix" - >> " one ":replace("[..start][..space]", "") + >> " one ":replace($/[..start][..space]/, "") = "one " - >> " one ":replace("[..space][..end]", "") + >> " one ":replace($/[..space][..end]/, "") = " one" - >> amelie:has(amelie2) + >> amelie:has($/$amelie2/) >> multiline := " @@ -87,7 +87,7 @@ func main(): " = "line one\nline two" - //! Interpolation tests: + !! Interpolation tests: >> "A $(1+2)" = "A 3" >> 'A $(1+2)' @@ -104,9 +104,9 @@ func main(): >> $(one (nested) two $(1+2)) = "one (nested) two 3" - >> "one two three":replace("[..alpha]", "") + >> "one two three":replace($/[..alpha]/, "") = " " - >> "one two three":replace("[..alpha]", "word") + >> "one two three":replace($/[..alpha]/, "word") = "word word word" >> c := "É̩" @@ -130,17 +130,17 @@ func main(): >> "":lines() = [] - //! Test splitting and joining text: - >> "one two three":split(" ") + !! Test splitting and joining text: + >> "one two three":split($/ /) = ["one", "two", "three"] - >> "one,two,three,":split(",") + >> "one,two,three,":split($/,/) = ["one", "two", "three", ""] - >> "one two three":split("[..space]") + >> "one two three":split($/[..space]/) = ["one", "two", "three"] - >> "abc":split("") + >> "abc":split($//) = ["a", "b", "c"] >> ", ":join(["one", "two", "three"]) @@ -158,42 +158,42 @@ func main(): >> "":split() = [] - //! Test text:find_all() - >> " one two three ":find_all("[..alpha]") + !! Test text:find_all() + >> " one two three ":find_all($/[..alpha]/) = ["one", "two", "three"] - >> " one two three ":find_all("[..!space]") + >> " one two three ":find_all($/[..!space]/) = ["one", "two", "three"] - >> " ":find_all("[..alpha]") + >> " ":find_all($/[..alpha]/) = [] - >> " foo(baz(), 1) doop() ":find_all("[..id](?)") + >> " foo(baz(), 1) doop() ":find_all($/[..id](?)/) = ["foo(baz(), 1)", "doop()"] - >> "":find_all("") + >> "":find_all($Pattern'') = [] - >> "Hello":find_all("") + >> "Hello":find_all($Pattern'') = [] - //! Test text:find() - >> " one two three ":find("[..id]", start=-999) + !! Test text:find() + >> " one two three ":find($/[..id]/, start=-999) = 0 - >> " one two three ":find("[..id]", start=999) + >> " one two three ":find($/[..id]/, start=999) = 0 - >> " one two three ":find("[..id]") + >> " one two three ":find($/[..id]/) = 2 - >> " one two three ":find("[..id]", start=5) + >> " one two three ":find($/[..id]/, start=5) = 8 >> len := 0_i64 - >> " one ":find("[..id]", length=&len) + >> " one ":find($/[..id]/, length=&len) = 4 >> len = 3_i64 - //! Test text slicing: + !! Test text slicing: >> "abcdef":slice() = "abcdef" >> "abcdef":slice(from=3) @@ -220,3 +220,8 @@ func main(): >> Text.from_codepoint_names(["not a valid name here buddy"]) = "" + >> malicious := "[..xxx" + >> $/$malicious/ + = $/[..1[]..xxx/ + + diff --git a/test/threads.tm b/test/threads.tm index 986ab864..2f259a0e 100644 --- a/test/threads.tm +++ b/test/threads.tm @@ -26,7 +26,7 @@ func main(): results := |:Int; max_size| >> thread := Thread.new(func(): - //! In another thread! + !! In another thread! while yes: >> got := jobs:get() when got is Increment(x): @@ -63,8 +63,8 @@ func main(): >> results:get() = 1001 - //! Canceling... + !! Canceling... >> thread:cancel() - //! Joining... + !! Joining... >> thread:join() - //! Done! + !! Done! |
