diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2020-09-28 16:14:06 -0700 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2020-09-28 16:14:06 -0700 |
| commit | 5049bd7cad8478ecb3f16f8aa7b9b741825922d7 (patch) | |
| tree | c07da0b9c290abd439228329f070f581f76a9272 | |
| parent | 88571d7639d1bfa134b9b4f89ddd031b11fe8f69 (diff) | |
New grammar: [<pat>] instead of 0-1(<pat>), and @foo=<pat> instead of
@[foo]<pat>
| -rw-r--r-- | bpeg.1 | 6 | ||||
| -rw-r--r-- | compiler.c | 22 | ||||
| -rw-r--r-- | grammars/bpeg.bpeg | 65 | ||||
| -rw-r--r-- | grammars/builtins.bpeg | 28 | ||||
| -rw-r--r-- | grammars/html.bpeg | 10 |
5 files changed, 78 insertions, 53 deletions
@@ -111,10 +111,12 @@ The \fBescape-sequence-range-\fI<esc1>\fB-to-\fI<esc2>\fR .B !\fI<pat>\fR \fBNot-\fI<pat>\fR +.B [\fI<pat>\fR] +\fBMaybe-\fI<pat>\fR + .B \fI<N> <pat>\fR .B \fI<MIN>\fB-\fI<MAX> <pat>\fR .B \fI<MIN>\fB+ \fI<pat>\fR -.B \fI<MAX>\fB- \fI<pat>\fR \fI<MIN>\fB-to-\fI<MAX>\fB-\fI<pat>\fBs\fR (repetitions of a pattern) .B \fI<repeating-pat>\fR \fB%\fI <sep>\fR @@ -130,7 +132,7 @@ The \fBescape-sequence-range-\fI<esc1>\fB-to-\fI<esc2>\fR .B @\fI<pat>\fR \fBCapture-\fI<pat>\fR -.B @[\fI<name>\fB]\fI<pat>\fR +.B @\fI<name>\fB=\fI<pat>\fR \fBLet-\fI<name>\fB-equal-\fI<pat>\fR (named capture) .B {\fI<pat>\fB => "\fI<replacement>\fB"} @@ -252,16 +252,24 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) check(matchchar(&str, ')'), "Expected closing ')' instead of \"%s\"", str); break; } + // Square brackets + case '[': { + vm_op_t *pat = bpeg_simplepattern(f, str); + check(pat, "Expected pattern inside square brackets"); + pat = expand_choices(f, pat); + str = pat->end; + str = after_spaces(str); + check(matchchar(&str, ']'), "Expected closing ']' instead of \"%s\"", str); + set_range(op, 0, 1, pat, NULL); + break; + } // Capture case '@': { op->op = VM_CAPTURE; - str = after_spaces(str); - if (matchchar(&str, '[')) { - char *closing = strchr(str, ']'); - check(closing, "Expected closing ']'"); - op->args.capture.name = strndup(str, (size_t)(closing-str)); - str = closing; - check(matchchar(&str, ']'), "Expected closing ']'"); + const char *a = *str == '!' ? &str[1] : after_name(str); + if (a > str && *after_spaces(a) == '=') { + op->args.capture.name = strndup(str, (size_t)(a-str)); + str = a + 1; } vm_op_t *pat = bpeg_simplepattern(f, str); check(pat, "Expected pattern after @"); diff --git a/grammars/bpeg.bpeg b/grammars/bpeg.bpeg index f3e2f48..380bf9c 100644 --- a/grammars/bpeg.bpeg +++ b/grammars/bpeg.bpeg @@ -1,60 +1,61 @@ # This is a file defining the BPEG grammar using BPEG syntax -Grammar: __ 0+(Def 0-1(__`;))%__ __ ($$ / @[!]{... => "Could not parse this code"}) -Def: @[name]id _ `: __ ( - @[definition]extended-pat - / $$ @[!]{=>"No definition for rule"} - / @[!]{...>(`;/id_`:/$) => "Invalid definition: @0"}) +Grammar: __ 0+(Def [__`;])%__ __ ($$ / @!={... => "Could not parse this code"}) +Def: @name=id _ `: __ ( + @definition=extended-pat + / $$ @!={=>"No definition for rule"} + / @!={...>(`;/id_`:/$) => "Invalid definition: @0"}) # This is used for command line arguments: -String-pattern: 0+(`\ (escape-sequence / pat 0-1`;) / .) +String-pattern: 0+(`\ (escape-sequence / pat [`;]) / .) pat: suffixed-pat / simple-pat simple-pat: Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No - / Nodent / Repeat / After / Before / Capture / Replace / Ref / parens + / Nodent / Repeat / Optional / After / Before / Capture / Replace / Ref / parens -suffixed-pat: Eq-pat / simple-pat 0-1( - @[!]{`* => "'*' is not a BPEG operator. Use 0+<pat> instead of <pat>*"} - / @[!]{`+ => "'+' is not a BPEG operator. Use 1+<pat> instead of <pat>+"} - / @[!]{`? => "'?' is not a BPEG operator. Use 0-1<pat> instead of <pat>?"} -) +suffixed-pat: Eq-pat / simple-pat [ + @!={`* => "'*' is not a BPEG operator. Use 0+<pat> instead of <pat>*"} + / @!={`+ => "'+' is not a BPEG operator. Use 1+<pat> instead of <pat>+"} + / @!={`? => "'?' is not a BPEG operator. Use [<pat>] instead of <pat>?"} +] -Eq-pat: @[first]simple-pat "==" @[second]pat +Eq-pat: @first=simple-pat "==" @second=pat Dot: `. !`. String: ( - `" @[s]0+(Escape / !`".) (`" / @[!]{=> "Expected closing quote here"}) - / `' @[s]0+(Escape / !`'.) (`' / @[!]{=> "Expected closing quote here"}) + `" @s=0+(Escape / !`".) (`" / @!={=> "Expected closing quote here"}) + / `' @s=0+(Escape / !`'.) (`' / @!={=> "Expected closing quote here"}) ) -Char-range: `` @[low]. `- (@[high]. / @[!]{=> "Expected a second character to form a character range"}) -Char: `` (@[s]. / @[!]{=> "Expected a character following the '`'"}) -Escape-range: `\ @[low]escape-sequence `- @[high]escape-sequence -Escape: `\ (@[s]escape-sequence - / $ @[!]{=>"Backslashes are used for escape sequences, not splitting lines"} - / @[!]{. 0+(Abc/`0-9) => "Invalid escape sequence: '@0'"} +Char-range: `` @low=. `- (@high=. / @!={=> "Expected a second character to form a character range"}) +Char: `` (@s=. / @!={=> "Expected a character following the '`'"}) +Escape-range: `\ @low=escape-sequence `- @high=escape-sequence +Escape: `\ (@s=escape-sequence + / $ @!={=>"Backslashes are used for escape sequences, not splitting lines"} + / @!={. 0+(Abc/`0-9) => "Invalid escape sequence: '@0'"} ) escape-sequence: ( `n/`t/`r/`e/`b/`a/`v / 1-3 `0-7 / `x 2 (`0-9/`a-f/`A-F) ) -No: `! (_@pat / @[!]{=>"Expected a pattern after the exclamation mark"}) +No: `! (_@pat / @!={=>"Expected a pattern after the exclamation mark"}) Nodent: `| -Upto-and: 2-3`. 0-1(_@simple-pat) +Upto-and: 2-3`. [_@simple-pat] Repeat: ( - @[min]int _ `- _ @[max]int - / @[min]int _ `+ @[max]'' - / @[min]@[max]int - ) __ @[repeat-pat]pat 0-1(__`%__@[sep]pat) + @min=int _ `- _ @max=int + / @min=int _ `+ @max='' + / @min=@max=int + ) __ @repeat-pat=pat [__`%__@sep=pat] +Optional: `[ __ extended-pat (__`] / @!={=> "Expected closing square bracket here"}) After: `< _ pat Before: `> _ pat -Capture: `@ 0-1(_ `[ @[capture-name](...>(`]/$$)) (`] / @[!]{=>"Expected closing bracket here"})) _ @[capture]pat +Capture: `@ [_ `[ @capture-name=(...>(`]/$$)) (`] / @!={=>"Expected closing bracket here"})] _ @capture=pat Replace: `{ __ ( - 0-1(@[replace-pat]extended-pat __) "=>" 0-1(__ @[replacement]String) - ) __ (`} / @[!]{=> "Expected closing brace here"}) -Ref: @[name]id !(_`:) + [@replace-pat=extended-pat __] "=>" [__ @replacement=String] + ) __ (`} / @!={=> "Expected closing brace here"}) +Ref: @name=id !(_`:) -parens: `( __ extended-pat (__ `) / @[!]{=> "Expected closing parenthesis here"}) +parens: `( __ extended-pat (__ `) / @!={=> "Expected closing parenthesis here"}) Chain: 2+@pat%__ Otherwise: 2+@(Chain/pat)%(__`/__) diff --git a/grammars/builtins.bpeg b/grammars/builtins.bpeg index 72da396..1633a30 100644 --- a/grammars/builtins.bpeg +++ b/grammars/builtins.bpeg @@ -24,7 +24,7 @@ find-all: ( add-filename 0+ (!..pattern {..\n=>}) 1+ (>..pattern add-line-number 1+(..hl-pattern) ..\n / {..\n=>}) - 0-1{!<\n => "\n"} + [{!<\n => "\n"}] ) only-matches: ( (include-binary-files / is-text-file) @@ -32,13 +32,27 @@ only-matches: ( add-filename 1+{...@hl-pattern=>'@1\n'} ) -add-filename: 0-1(print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"})) -add-line-number: 0-1(print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "})) -hl-pattern: {@[match]pattern define-highlights => "@[hl-start]@[match]@[hl-end]"} -hl-replacement: {@[match]replacement define-highlights => "@[hl-start]@[match]@[hl-end]" } -define-highlights: highlight @[hl-start]{=>"\033[31;1m"} @[hl-end]{=>"\033[0m"} / @[hl-start]"" @[hl-end]"" +add-filename: [print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"})] +add-line-number: [print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "})] +hl-pattern: {@match=pattern define-highlights => "@hl-start;@match;@hl-end;"} +hl-replacement: {@match=replacement define-highlights => "@hl-start;@match;@hl-end;" } +define-highlights: highlight @hl-start={=>"\033[31;1m"} @hl-end={=>"\033[0m"} / @hl-start="" @hl-end="" # Helper definitions (commonly used) +#( +url: ( + "file://" 1+(`/ 0+url-char) + / "mailto:" email + / ("https"/"http"/"ftp") "://" [1+url-char [`: 1+url-char] `@] (ipv4/ipv6/domain) [`: int] [url-path] +) +url-path: 1+(`/ 0+url-char) [`? 1+(1+url-char`=1+url-char] +ipv4: 4 int % `. +ipv6: 8 (4 Hex) % `: +domain: 1+(Abc/digit/`-)%`. +url-char: Abc/digit/`$/`-/`_/`./`+/`!/`*/`'/`(/`)/`,/`% + +url: @(https?|ftp)://(-\.)?([^\s/?\.#-]+\.?)+(/[^\s]*)?$@iS +)# indent: \n|1+(\t/' ') dedent: $ !(\n|) indented-block: |` ..$ 0+(\n|..$) @@ -60,7 +74,7 @@ word: !<(`a-z/`A-Z/`_/`0-9) 1+(`a-z/`A-Z) !>(`0-9/`_) HEX: `0-9/`A-F Hex: `0-9/`a-f/`A-F hex: `0-9/`a-f -number: 1+`0-9 0-1(`. 0+`0-9) / `. 1+`0-9 +number: 1+`0-9 [`. 0+`0-9] / `. 1+`0-9 int: 1+`0-9 digit: `0-9 Abc: `a-z/`A-Z diff --git a/grammars/html.bpeg b/grammars/html.bpeg index 908fa69..5dd93cd 100644 --- a/grammars/html.bpeg +++ b/grammars/html.bpeg @@ -1,5 +1,5 @@ # HTML grammar -HTML: __ 0-1(doctype __) 0+html-element%__ __ +HTML: __ [doctype __] 0+html-element%__ __ doctype: "<!DOCTYPE" ..`> @@ -9,13 +9,13 @@ html-element: ( / >(`<("template")) template-element / normal-element) -void-element: `< @[tag](id==match-tag) __attributes__ 0-1`/ __ `> +void-element: `< @tag=(id==match-tag) __attributes__ [`/] __ `> -template-element: `< @[tag](id==match-tag) __`> __ >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>) +template-element: `< @tag=(id==match-tag) __`> __ >match-body @body=0+(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>) -raw-element: `< @[tag](id==match-tag) __attributes__ `> >match-body @[body].. ("</"tag__`>) +raw-element: `< @tag=(id==match-tag) __attributes__ `> >match-body @body=.. ("</"tag__`>) -normal-element: `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`> +normal-element: `< @tag=(id==match-tag) __attributes__ `> >match-body @body=0+(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`> comment: "<!--" ..."-->" |
