aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2020-09-28 16:14:06 -0700
committerBruce Hill <bruce@bruce-hill.com>2020-09-28 16:14:06 -0700
commit5049bd7cad8478ecb3f16f8aa7b9b741825922d7 (patch)
treec07da0b9c290abd439228329f070f581f76a9272
parent88571d7639d1bfa134b9b4f89ddd031b11fe8f69 (diff)
New grammar: [<pat>] instead of 0-1(<pat>), and @foo=<pat> instead of
@[foo]<pat>
-rw-r--r--bpeg.16
-rw-r--r--compiler.c22
-rw-r--r--grammars/bpeg.bpeg65
-rw-r--r--grammars/builtins.bpeg28
-rw-r--r--grammars/html.bpeg10
5 files changed, 78 insertions, 53 deletions
diff --git a/bpeg.1 b/bpeg.1
index a55849c..3d2b29e 100644
--- a/bpeg.1
+++ b/bpeg.1
@@ -111,10 +111,12 @@ The \fBescape-sequence-range-\fI<esc1>\fB-to-\fI<esc2>\fR
.B !\fI<pat>\fR
\fBNot-\fI<pat>\fR
+.B [\fI<pat>\fR]
+\fBMaybe-\fI<pat>\fR
+
.B \fI<N> <pat>\fR
.B \fI<MIN>\fB-\fI<MAX> <pat>\fR
.B \fI<MIN>\fB+ \fI<pat>\fR
-.B \fI<MAX>\fB- \fI<pat>\fR
\fI<MIN>\fB-to-\fI<MAX>\fB-\fI<pat>\fBs\fR (repetitions of a pattern)
.B \fI<repeating-pat>\fR \fB%\fI <sep>\fR
@@ -130,7 +132,7 @@ The \fBescape-sequence-range-\fI<esc1>\fB-to-\fI<esc2>\fR
.B @\fI<pat>\fR
\fBCapture-\fI<pat>\fR
-.B @[\fI<name>\fB]\fI<pat>\fR
+.B @\fI<name>\fB=\fI<pat>\fR
\fBLet-\fI<name>\fB-equal-\fI<pat>\fR (named capture)
.B {\fI<pat>\fB => "\fI<replacement>\fB"}
diff --git a/compiler.c b/compiler.c
index 60da7d8..49c4996 100644
--- a/compiler.c
+++ b/compiler.c
@@ -252,16 +252,24 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
check(matchchar(&str, ')'), "Expected closing ')' instead of \"%s\"", str);
break;
}
+ // Square brackets
+ case '[': {
+ vm_op_t *pat = bpeg_simplepattern(f, str);
+ check(pat, "Expected pattern inside square brackets");
+ pat = expand_choices(f, pat);
+ str = pat->end;
+ str = after_spaces(str);
+ check(matchchar(&str, ']'), "Expected closing ']' instead of \"%s\"", str);
+ set_range(op, 0, 1, pat, NULL);
+ break;
+ }
// Capture
case '@': {
op->op = VM_CAPTURE;
- str = after_spaces(str);
- if (matchchar(&str, '[')) {
- char *closing = strchr(str, ']');
- check(closing, "Expected closing ']'");
- op->args.capture.name = strndup(str, (size_t)(closing-str));
- str = closing;
- check(matchchar(&str, ']'), "Expected closing ']'");
+ const char *a = *str == '!' ? &str[1] : after_name(str);
+ if (a > str && *after_spaces(a) == '=') {
+ op->args.capture.name = strndup(str, (size_t)(a-str));
+ str = a + 1;
}
vm_op_t *pat = bpeg_simplepattern(f, str);
check(pat, "Expected pattern after @");
diff --git a/grammars/bpeg.bpeg b/grammars/bpeg.bpeg
index f3e2f48..380bf9c 100644
--- a/grammars/bpeg.bpeg
+++ b/grammars/bpeg.bpeg
@@ -1,60 +1,61 @@
# This is a file defining the BPEG grammar using BPEG syntax
-Grammar: __ 0+(Def 0-1(__`;))%__ __ ($$ / @[!]{... => "Could not parse this code"})
-Def: @[name]id _ `: __ (
- @[definition]extended-pat
- / $$ @[!]{=>"No definition for rule"}
- / @[!]{...>(`;/id_`:/$) => "Invalid definition: @0"})
+Grammar: __ 0+(Def [__`;])%__ __ ($$ / @!={... => "Could not parse this code"})
+Def: @name=id _ `: __ (
+ @definition=extended-pat
+ / $$ @!={=>"No definition for rule"}
+ / @!={...>(`;/id_`:/$) => "Invalid definition: @0"})
# This is used for command line arguments:
-String-pattern: 0+(`\ (escape-sequence / pat 0-1`;) / .)
+String-pattern: 0+(`\ (escape-sequence / pat [`;]) / .)
pat: suffixed-pat / simple-pat
simple-pat: Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No
- / Nodent / Repeat / After / Before / Capture / Replace / Ref / parens
+ / Nodent / Repeat / Optional / After / Before / Capture / Replace / Ref / parens
-suffixed-pat: Eq-pat / simple-pat 0-1(
- @[!]{`* => "'*' is not a BPEG operator. Use 0+<pat> instead of <pat>*"}
- / @[!]{`+ => "'+' is not a BPEG operator. Use 1+<pat> instead of <pat>+"}
- / @[!]{`? => "'?' is not a BPEG operator. Use 0-1<pat> instead of <pat>?"}
-)
+suffixed-pat: Eq-pat / simple-pat [
+ @!={`* => "'*' is not a BPEG operator. Use 0+<pat> instead of <pat>*"}
+ / @!={`+ => "'+' is not a BPEG operator. Use 1+<pat> instead of <pat>+"}
+ / @!={`? => "'?' is not a BPEG operator. Use [<pat>] instead of <pat>?"}
+]
-Eq-pat: @[first]simple-pat "==" @[second]pat
+Eq-pat: @first=simple-pat "==" @second=pat
Dot: `. !`.
String: (
- `" @[s]0+(Escape / !`".) (`" / @[!]{=> "Expected closing quote here"})
- / `' @[s]0+(Escape / !`'.) (`' / @[!]{=> "Expected closing quote here"})
+ `" @s=0+(Escape / !`".) (`" / @!={=> "Expected closing quote here"})
+ / `' @s=0+(Escape / !`'.) (`' / @!={=> "Expected closing quote here"})
)
-Char-range: `` @[low]. `- (@[high]. / @[!]{=> "Expected a second character to form a character range"})
-Char: `` (@[s]. / @[!]{=> "Expected a character following the '`'"})
-Escape-range: `\ @[low]escape-sequence `- @[high]escape-sequence
-Escape: `\ (@[s]escape-sequence
- / $ @[!]{=>"Backslashes are used for escape sequences, not splitting lines"}
- / @[!]{. 0+(Abc/`0-9) => "Invalid escape sequence: '@0'"}
+Char-range: `` @low=. `- (@high=. / @!={=> "Expected a second character to form a character range"})
+Char: `` (@s=. / @!={=> "Expected a character following the '`'"})
+Escape-range: `\ @low=escape-sequence `- @high=escape-sequence
+Escape: `\ (@s=escape-sequence
+ / $ @!={=>"Backslashes are used for escape sequences, not splitting lines"}
+ / @!={. 0+(Abc/`0-9) => "Invalid escape sequence: '@0'"}
)
escape-sequence: (
`n/`t/`r/`e/`b/`a/`v
/ 1-3 `0-7
/ `x 2 (`0-9/`a-f/`A-F)
)
-No: `! (_@pat / @[!]{=>"Expected a pattern after the exclamation mark"})
+No: `! (_@pat / @!={=>"Expected a pattern after the exclamation mark"})
Nodent: `|
-Upto-and: 2-3`. 0-1(_@simple-pat)
+Upto-and: 2-3`. [_@simple-pat]
Repeat: (
- @[min]int _ `- _ @[max]int
- / @[min]int _ `+ @[max]''
- / @[min]@[max]int
- ) __ @[repeat-pat]pat 0-1(__`%__@[sep]pat)
+ @min=int _ `- _ @max=int
+ / @min=int _ `+ @max=''
+ / @min=@max=int
+ ) __ @repeat-pat=pat [__`%__@sep=pat]
+Optional: `[ __ extended-pat (__`] / @!={=> "Expected closing square bracket here"})
After: `< _ pat
Before: `> _ pat
-Capture: `@ 0-1(_ `[ @[capture-name](...>(`]/$$)) (`] / @[!]{=>"Expected closing bracket here"})) _ @[capture]pat
+Capture: `@ [_ `[ @capture-name=(...>(`]/$$)) (`] / @!={=>"Expected closing bracket here"})] _ @capture=pat
Replace: `{ __ (
- 0-1(@[replace-pat]extended-pat __) "=>" 0-1(__ @[replacement]String)
- ) __ (`} / @[!]{=> "Expected closing brace here"})
-Ref: @[name]id !(_`:)
+ [@replace-pat=extended-pat __] "=>" [__ @replacement=String]
+ ) __ (`} / @!={=> "Expected closing brace here"})
+Ref: @name=id !(_`:)
-parens: `( __ extended-pat (__ `) / @[!]{=> "Expected closing parenthesis here"})
+parens: `( __ extended-pat (__ `) / @!={=> "Expected closing parenthesis here"})
Chain: 2+@pat%__
Otherwise: 2+@(Chain/pat)%(__`/__)
diff --git a/grammars/builtins.bpeg b/grammars/builtins.bpeg
index 72da396..1633a30 100644
--- a/grammars/builtins.bpeg
+++ b/grammars/builtins.bpeg
@@ -24,7 +24,7 @@ find-all: (
add-filename
0+ (!..pattern {..\n=>})
1+ (>..pattern add-line-number 1+(..hl-pattern) ..\n / {..\n=>})
- 0-1{!<\n => "\n"}
+ [{!<\n => "\n"}]
)
only-matches: (
(include-binary-files / is-text-file)
@@ -32,13 +32,27 @@ only-matches: (
add-filename
1+{...@hl-pattern=>'@1\n'}
)
-add-filename: 0-1(print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"}))
-add-line-number: 0-1(print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "}))
-hl-pattern: {@[match]pattern define-highlights => "@[hl-start]@[match]@[hl-end]"}
-hl-replacement: {@[match]replacement define-highlights => "@[hl-start]@[match]@[hl-end]" }
-define-highlights: highlight @[hl-start]{=>"\033[31;1m"} @[hl-end]{=>"\033[0m"} / @[hl-start]"" @[hl-end]""
+add-filename: [print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"})]
+add-line-number: [print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "})]
+hl-pattern: {@match=pattern define-highlights => "@hl-start;@match;@hl-end;"}
+hl-replacement: {@match=replacement define-highlights => "@hl-start;@match;@hl-end;" }
+define-highlights: highlight @hl-start={=>"\033[31;1m"} @hl-end={=>"\033[0m"} / @hl-start="" @hl-end=""
# Helper definitions (commonly used)
+#(
+url: (
+ "file://" 1+(`/ 0+url-char)
+ / "mailto:" email
+ / ("https"/"http"/"ftp") "://" [1+url-char [`: 1+url-char] `@] (ipv4/ipv6/domain) [`: int] [url-path]
+)
+url-path: 1+(`/ 0+url-char) [`? 1+(1+url-char`=1+url-char]
+ipv4: 4 int % `.
+ipv6: 8 (4 Hex) % `:
+domain: 1+(Abc/digit/`-)%`.
+url-char: Abc/digit/`$/`-/`_/`./`+/`!/`*/`'/`(/`)/`,/`%
+
+url: @(https?|ftp)://(-\.)?([^\s/?\.#-]+\.?)+(/[^\s]*)?$@iS
+)#
indent: \n|1+(\t/' ')
dedent: $ !(\n|)
indented-block: |` ..$ 0+(\n|..$)
@@ -60,7 +74,7 @@ word: !<(`a-z/`A-Z/`_/`0-9) 1+(`a-z/`A-Z) !>(`0-9/`_)
HEX: `0-9/`A-F
Hex: `0-9/`a-f/`A-F
hex: `0-9/`a-f
-number: 1+`0-9 0-1(`. 0+`0-9) / `. 1+`0-9
+number: 1+`0-9 [`. 0+`0-9] / `. 1+`0-9
int: 1+`0-9
digit: `0-9
Abc: `a-z/`A-Z
diff --git a/grammars/html.bpeg b/grammars/html.bpeg
index 908fa69..5dd93cd 100644
--- a/grammars/html.bpeg
+++ b/grammars/html.bpeg
@@ -1,5 +1,5 @@
# HTML grammar
-HTML: __ 0-1(doctype __) 0+html-element%__ __
+HTML: __ [doctype __] 0+html-element%__ __
doctype: "<!DOCTYPE" ..`>
@@ -9,13 +9,13 @@ html-element: (
/ >(`<("template")) template-element
/ normal-element)
-void-element: `< @[tag](id==match-tag) __attributes__ 0-1`/ __ `>
+void-element: `< @tag=(id==match-tag) __attributes__ [`/] __ `>
-template-element: `< @[tag](id==match-tag) __`> __ >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>)
+template-element: `< @tag=(id==match-tag) __`> __ >match-body @body=0+(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>)
-raw-element: `< @[tag](id==match-tag) __attributes__ `> >match-body @[body].. ("</"tag__`>)
+raw-element: `< @tag=(id==match-tag) __attributes__ `> >match-body @body=.. ("</"tag__`>)
-normal-element: `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>
+normal-element: `< @tag=(id==match-tag) __attributes__ `> >match-body @body=0+(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>
comment: "<!--" ..."-->"