aboutsummaryrefslogtreecommitdiff
path: root/grammars
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2021-05-11 12:38:58 -0700
committerBruce Hill <bruce@bruce-hill.com>2021-05-11 12:38:58 -0700
commita93220972f8130732c8cd0267c0b14db77ac19ea (patch)
treed1141bfdcded68c8b51d9ed6a735aea5134ab4c8 /grammars
parent3359a804c8fe02ea4e9bc1abb7430109affdd535 (diff)
Overhaul of |-word boundaries (| is deprecated), performance
improvements for repeating matches, tweaks to the logic of word vs. id
Diffstat (limited to 'grammars')
-rw-r--r--grammars/bp.bp2
-rw-r--r--grammars/builtins.bp12
-rw-r--r--grammars/c++.bp5
-rw-r--r--grammars/c.bp5
-rw-r--r--grammars/go.bp3
-rw-r--r--grammars/javascript.bp3
-rw-r--r--grammars/lisp.bp1
-rw-r--r--grammars/lua.bp3
-rw-r--r--grammars/python.bp4
-rw-r--r--grammars/rust.bp3
-rw-r--r--grammars/shell.bp3
-rw-r--r--grammars/utf8-id.bp8
12 files changed, 23 insertions, 29 deletions
diff --git a/grammars/bp.bp b/grammars/bp.bp
index 5e4dcc8..ed7eb80 100644
--- a/grammars/bp.bp
+++ b/grammars/bp.bp
@@ -78,6 +78,6 @@ extended-pat: Otherwise / Replace / Chain / pat
_: *(` / \t)
__: *(` / \t / \r / \n / comment)
-id: "__" / "_" / "|" / `a-z,A-Z *`a-z,A-Z,0-9,-
+id: "__" / "_" / `a-z,A-Z *`a-z,A-Z,0-9,-
comment: `# .. $
diff --git a/grammars/builtins.bp b/grammars/builtins.bp
index 354ac50..526498e 100644
--- a/grammars/builtins.bp
+++ b/grammars/builtins.bp
@@ -16,12 +16,18 @@ brackets: `[ ..%(\n/brackets/string) `]
braces: `{ ..%(\n/braces/string) `}
parens: `( ..%(\n/parens/string) `)
string: `" ..%(`\.) `" / `' ..%(`\.) `'
-id: !<`a-z,A-Z,_,0-9 (`a-z,A-Z,_ *`a-z,A-Z,_,0-9)!=keyword |
+left-id-boundary: ^ / <(\x00-x7f!=id-char) / <((\xc0-xdf \x80-xbf)!=id-char)
+ / <((\xe0-xef 2\x80-xbf)!=id-char) / <((\xf0-xf7 3\x80-xbf)!=id-char)
+right-id-boundary: !id-char
+id: left-id-boundary !`0-9 (+id-char)!=keyword
id-char: `a-z,A-Z,_,0-9
-|: !<`a-z,A-Z,_,0-9 / !`a-z,A-Z,_,0-9
var: id
keyword: !"" # No keywords defined by default
-word: |+`a-z,A-Z !`0-9,_
+left-word-boundary: ^ / <(\x00-x7f!=word-char) / <((\xc0-xdf \x80-xbf)!=word-char)
+ / <((\xe0-xef 2\x80-xbf)!=word-char) / <((\xf0-xf7 3\x80-xbf)!=word-char)
+right-word-boundary: !word-char
+word-char: `a-z,A-Z,_,0-9,-,'
+word: left-word-boundary +word-char
HEX: `0-9,A-F
Hex: `0-9,a-f,A-F
hex: `0-9,a-f
diff --git a/grammars/c++.bp b/grammars/c++.bp
index 33e3fb7..a48d5a2 100644
--- a/grammars/c++.bp
+++ b/grammars/c++.bp
@@ -8,7 +8,7 @@
comment: "//" .. $ / "/*" ..%\n "*/"
string: `" ..%(`\.) `"
-keyword: |(
+keyword:
"alignas" / "alignof" / "and" / "and_eq" / "asm" / "atomic_cancel" / "atomic_commit" /
"atomic_noexcept" / "auto" / "bitand" / "bitor" / "bool" / "break" / "case" / "catch" /
"char" / "char8_t" / "char16_t" / "char32_t" / "class" / "compl" / "concept" / "const" /
@@ -22,8 +22,7 @@ keyword: |(
"static_cast" / "struct" / "switch" / "synchronized" / "template" / "this" /
"thread_local" / "throw" / "true" / "try" / "typedef" / "typeid" / "typename" / "union" /
"unsigned" / "using" / "virtual" / "void" / "volatile" / "wchar_t" / "while" / "xor" / "xor_eq"
-)|
-function-def: ^_ 2+(keyword / id / anglebraces / `*) % __ parens (__`; / >(__`{))
+function-def: ^_ 2+(id / keyword / anglebraces / `*) % __ parens (__`; / >(__`{))
function: function-def __ braces
macro: ^{#define} ..$ *(<`\ \n..$)
import: ^({#include}/{#import}) __ (string / `<..`>)
diff --git a/grammars/c.bp b/grammars/c.bp
index be91cbe..2c74c3d 100644
--- a/grammars/c.bp
+++ b/grammars/c.bp
@@ -8,14 +8,13 @@
comment: "//" .. $ / "/*" ..%\n "*/"
string: `" ..%(`\.) `"
-keyword: |(
+keyword:
"auto" / "break" / "case" / "char" / "const" / "continue" / "default" / "do" /
"double" / "else" / "enum" / "extern" / "float" / "for" / "goto" / "if" /
"int" / "long" / "register" / "return" / "short" / "signed" / "sizeof" /
"static" / "struct" / "switch" / "typedef" / "union" / "unsigned" / "void" /
"volatile" / "while"
-)|
-function-def: ^_ 2+(keyword / id / `*) % __ parens (__`; / >(__`{))
+function-def: ^_ 2+(id / keyword / `*) % __ parens (__`; / >(__`{))
function: function-def __ braces
macro: ^{#define} ..$ *(<`\ \n..$)
import: ^{#include} __ (string / `<..`>)
diff --git a/grammars/go.bp b/grammars/go.bp
index 425325a..c9f7377 100644
--- a/grammars/go.bp
+++ b/grammars/go.bp
@@ -8,11 +8,10 @@
comment: "//" .. $ / "/*" ..%\n "*/"
string: `" ..%(`\.) `"
-keyword: |(
+keyword:
"break" / "default" / "func" / "interface" / "select" / "case" / "defer" / "go" /
"map" / "struct" / "chan" / "else" / "goto" / "package" / "switch" / "const" /
"fallthrough" / "if" / "range" / "type" / "continue" / "for" / "import" / "return" / "var"
-)|
function-def: {func} __ id __ parens __ [id / parens] >(__`{)
function: function-def __ braces
import: {import} __ (parens / string)
diff --git a/grammars/javascript.bp b/grammars/javascript.bp
index b663050..fdd76a1 100644
--- a/grammars/javascript.bp
+++ b/grammars/javascript.bp
@@ -8,7 +8,7 @@
comment: "//" .. $ / "/*" ..%\n "*/"
string: `" ..%(`\.) `" / `' ..%(`\.) `' / `/ ..%(`\.) `/
-keyword: |(
+keyword:
"abstract" / "arguments" / "await" / "boolean" / "break" / "byte" / "case" /
"catch" / "char" / "class" / "const" / "continue" / "debugger" / "default" /
"delete" / "do" / "double" / "else" / "enum" / "eval" / "export" / "extends" /
@@ -18,7 +18,6 @@ keyword: |(
"public" / "return" / "short" / "static" / "super" / "switch" / "synchronized" /
"this" / "throw" / "throws" / "transient" / "true" / "try" / "typeof" / "var" /
"void" / "volatile" / "while" / "with" / "yield"
-)|
function-def: {function} __ [id__] parens / (id / parens) __ "=>"
function: function-def __ braces
import: {import} ..%braces (`; / $)
diff --git a/grammars/lisp.bp b/grammars/lisp.bp
index 3e1f4dc..d02a458 100644
--- a/grammars/lisp.bp
+++ b/grammars/lisp.bp
@@ -13,4 +13,3 @@ function-def: `(__{defun}__id
function: function-def ..%parens `)
id-char: `A-Z,a-z,0-9,!,$,%,&,*,+,-,.,/,:,<,=,>,?,@,^,_,~
id: !<`A-Z,a-z,0-9,!,$,%,&,*,+,-,.,/,:,<,=,>,?,@,^,_,~ +`A-Z,a-z,0-9,!,$,%,&,*,+,-,.,/,:,<,=,>,?,@,^,_,~
-|: !<`A-Z,a-z,0-9,!,$,%,&,*,+,-,.,/,:,<,=,>,?,@,^,_,~ / !`A-Z,a-z,0-9,!,$,%,&,*,+,-,.,/,:,<,=,>,?,@,^,_,~
diff --git a/grammars/lua.bp b/grammars/lua.bp
index 5554167..6967f1e 100644
--- a/grammars/lua.bp
+++ b/grammars/lua.bp
@@ -9,11 +9,10 @@
comment: "--" (`[ @eqs=*`= `[ ..%\n (`]eqs`]) / ..$)
string: `"..%(`\.) `" / `' ..%(`\.) `' / `[ @eqs=*`= `[ ..%\n (`]eqs`])
table: `{ ..%(table/string/comment/\n) `}
-keyword: |(
+keyword:
"and" / "break" / "do" / "else" / "elseif" / "end" / "false" / "for" /
"function" / "goto" / "if" / "in" / "local" / "nil" / "not" / "or" /
"repeat" / "return" / "then" / "true" / "until" / "while"
-)|
function-def: {function}[_id (*(`.id)[`:id])]_ parens
block: function / ({do}/{then}) ..%(comment/string/block/\n) {end}
function: function-def ..%(comment/string/block/\n) {end}
diff --git a/grammars/python.bp b/grammars/python.bp
index daab6a5..37d6a88 100644
--- a/grammars/python.bp
+++ b/grammars/python.bp
@@ -8,11 +8,11 @@
comment: `# ..$
string: "'''" ..%\n "'''" / '"""' ..%\n '"""' / `" ..%(`\.) `" / `' ..%(`\.) `'
-keyword: |("and" / "as" / "assert" / "break" / "class" / "continue" / "def" /
+keyword: "and" / "as" / "assert" / "break" / "class" / "continue" / "def" /
"del" / "elif" / "else" / "except" / "finally" / "for" / "from" /
"global" / "if" / "import" / "in" / "is" / "lambda" / "None" / "nonlocal" /
"not" / "or" / "pass" / "raise" / "return" / "try" / "while" /
- "with" / "yield")|
+ "with" / "yield"
class: class-def +(\N ..$)
class-def: ^_{class}_id[_parens]_`:
function: function-def +(\N ..$)
diff --git a/grammars/rust.bp b/grammars/rust.bp
index bb58a61..97a1e73 100644
--- a/grammars/rust.bp
+++ b/grammars/rust.bp
@@ -8,12 +8,11 @@
comment: "//" .. $ / "/*" ..%(comment / \n) "*/"
string: `" ..%(`\.) `"
-keyword: |(
+keyword:
"as" / "break" / "const" / "continue" / "crate" / "else" / "enum" / "extern" /
"false" / "fn" / "for" / "if" / "impl" / "in" / "let" / "loop" / "match" /
"mod" / "move" / "mut" / "pub" / "ref" / "return" / "self" / "Self" / "static" /
"struct" / "super" / "trait" / "true" / "type" / "unsafe" / "use" / "where" / "while"
-)|
function-def: {fn} __ id __ parens __ ["->"__(id / parens)] >(__`{)
function: function-def __ braces
import: {use} _ *(id / braces) % "::" _ `;
diff --git a/grammars/shell.bp b/grammars/shell.bp
index 1ad182d..ff13d0b 100644
--- a/grammars/shell.bp
+++ b/grammars/shell.bp
@@ -9,11 +9,10 @@
comment: `#..$
string: `" ..%(`\./subcommand/\n) `" / `' ..%\n `' / "<<" _ @delim=id _$ ..%\n (^delim$)
subcommand: `` ..%\n `` / "$" parens
-keyword: |(
+keyword:
"echo" / "read" / "set" / "unset" / "readonly" / "shift" / "export" / "if" / "fi" /
"else" / "while" / "do" / "done" / "for" / "until" / "case" / "esac" / "break" /
"continue" / "exit" / "return" / "trap" / "wait" / "eval" / "exec" / "ulimit" / "umask"
-)|
function-def: ^_ ["function"_] id _ `(_`) >(__`{)
function: function-def __ braces
var: `$ (id / braces)
diff --git a/grammars/utf8-id.bp b/grammars/utf8-id.bp
index 9d5381d..b8ba2d3 100644
--- a/grammars/utf8-id.bp
+++ b/grammars/utf8-id.bp
@@ -1,11 +1,7 @@
# Definitions of UTF8-compliant identifiers
-id: | utf8-id-start *utf8-id-cont
+id: left-word-boundary (utf8-id-start *utf8-id-cont)!=keyword
id-char: utf8-id-cont / utf8-id-start
-|: !id-char / (
- !<(\x00-x7f==id-char)
- !<((\xc0-xdf \x80-xbf)==id-char)
- !<((\xe0-xef 2\x80-xbf)==id-char)
- !<((\xf0-xf7 3\x80-xbf)==id-char))
+word-char: utf8-id-cont / utf8-id-start
utf8-id-start: `A-Z / `a-z / !\x00-x7F (
\xc2 (\xaa / \xb5 / \xba)