aboutsummaryrefslogtreecommitdiff
path: root/grammars/builtins.bp
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2021-07-19 19:40:43 -0700
committerBruce Hill <bruce@bruce-hill.com>2021-07-19 19:40:43 -0700
commit711fe47a7f651f38e090c9a20ecef11feba6f705 (patch)
tree98a3283e7c630919a08f8bd95326ceeb1a93da9e /grammars/builtins.bp
parent62e7d654bd70db89cb38e5d9efeb9a9b0e9cf202 (diff)
Overhaul of word boundaries/edges. Now they use \b, which is implemented
in C, and the C code understands UTF8 id chars.
Diffstat (limited to 'grammars/builtins.bp')
-rw-r--r--grammars/builtins.bp18
1 files changed, 3 insertions, 15 deletions
diff --git a/grammars/builtins.bp b/grammars/builtins.bp
index 98e198d..437265c 100644
--- a/grammars/builtins.bp
+++ b/grammars/builtins.bp
@@ -3,12 +3,6 @@
nodent: \N !(\t/` )
indent: \N (` /\t)
dedent: $ !(nodent/indent)
-utf8-codepoint: (
- \x00-x7f
- / \xc0-xdf 1\x80-xbf
- / \xe0-xef 2\x80-xbf
- / \xf0-xf7 3\x80-xbf
-)
crlf: \r\n
cr: \r
anglebraces: `< ..%(\n/anglebraces/string) `>
@@ -17,16 +11,10 @@ braces: `{ ..%(\n/braces/string) `}
parens: `( ..%(\n/parens/string) `)
string: `" ..%string-escape `" / `' ..%string-escape `'
string-escape: `\ (`x 2 Hex / 1-3 `0-7 / `u 1-4 Hex / .)
-left-id-edge: !<id-char
-right-id-edge: !id-char
-id: left-id-edge !`0-9 !(keyword !id-char) +id-char
-id-char: `a-z,A-Z,_,0-9
-var: id
+id: \I *\i
+var: \I *\i
keyword: !"" # No keywords defined by default
-left-word-edge: !<word-char
-right-word-edge: !word-char
-word-char: `a-z,A-Z,_,0-9
-word: left-word-edge +word-char
+word: \b +\i
HEX: `0-9,A-F
Hex: `0-9,a-f,A-F
hex: `0-9,a-f