Moved */+ back to prefix, and dropped ?
This commit is contained in:
parent
699e7c8b98
commit
90b8db84a4
@ -49,8 +49,8 @@ Pattern | Meaning
|
||||
`2-4 pat` | Between 2 and 4 occurrences of `pat` (inclusive)
|
||||
`5+ pat` | 5 or more occurrences of `pat`
|
||||
`5+ pat % sep` | 5 or more occurrences of `pat`, separated by `sep` (e.g. `0+ int % ","` matches `1,2,3`)
|
||||
`pat*` `pat* % sep`| 0 or more occurrences of `pat` (optionally separated by `sep`)
|
||||
`pat+` `pat+ % sep`| 1 or more occurrences of `pat` (optionally separated by `sep`)
|
||||
`*pat` | 0 or more occurrences of `pat` (shorthand for `0+pat`)
|
||||
`+pat` | 1 or more occurrences of `pat` (shorthand for `1+pat`)
|
||||
`<pat` | `pat` matches just before the current position (backref)
|
||||
`>pat` | `pat` matches just in front of the current position (lookahead)
|
||||
`@pat` | Capture `pat` (used for text replacement and backreferences)
|
||||
|
8
bpeg.1
8
bpeg.1
@ -122,11 +122,11 @@ The \fBescape-sequence-range-\fI<esc1>\fB-to-\fI<esc2>\fR
|
||||
.B \fI<MIN>\fB+ \fI<pat>\fR
|
||||
\fI<MIN>\fB-to-\fI<MAX>\fB-\fI<pat>\fBs\fR (repetitions of a pattern)
|
||||
|
||||
.B \fI<pat>\fR*
|
||||
\fI<pat>\fB-zero-or-more-times\fR
|
||||
.B *\fI<pat>\fR
|
||||
\fBsome-\fI<pat>\fBs\fR
|
||||
|
||||
.B \fI<pat>\fR+
|
||||
\fI<pat>\fB-one-or-more-times\fR
|
||||
.B +\fI<pat>\fR
|
||||
\fBat-least-one-\fI<pat>\fBs\fR
|
||||
|
||||
.B \fI<repeating-pat>\fR \fB%\fI <sep>\fR
|
||||
\fI<repeating-pat>\fB-separated-by-\fI<sep>\fR (equivalent to \fI<pat>
|
||||
|
34
compiler.c
34
compiler.c
@ -269,6 +269,22 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
|
||||
set_range(op, 0, 1, pat, NULL);
|
||||
break;
|
||||
}
|
||||
// Repeating
|
||||
case '*': case '+': {
|
||||
ssize_t min = c == '*' ? 0 : 1;
|
||||
vm_op_t *pat = bpeg_simplepattern(f, str);
|
||||
check(pat, "Expected pattern after '%c'", *str);
|
||||
str = pat->end;
|
||||
str = after_spaces(str);
|
||||
vm_op_t *sep = NULL;
|
||||
if (matchchar(&str, '%')) {
|
||||
sep = bpeg_simplepattern(f, str);
|
||||
check(sep, "Expected pattern for separator after '%%'");
|
||||
str = sep->end;
|
||||
}
|
||||
set_range(op, min, -1, pat, sep);
|
||||
break;
|
||||
}
|
||||
// Capture
|
||||
case '@': {
|
||||
op->op = VM_CAPTURE;
|
||||
@ -373,23 +389,7 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
|
||||
postfix:
|
||||
if (f ? str >= f->end : !*str) return op;
|
||||
str = after_spaces(str);
|
||||
if (*str == '*' || *str == '+' || *str == '?') { // Repetitions: <pat>*, <pat>+, <pat>?
|
||||
char operator = *str;
|
||||
++str;
|
||||
vm_op_t *pat = op;
|
||||
vm_op_t *sep = NULL;
|
||||
if (operator != '?' && matchchar(&str, '%')) {
|
||||
sep = bpeg_simplepattern(f, str);
|
||||
check(sep, "Expected pattern for separator after '%%'");
|
||||
str = sep->end;
|
||||
}
|
||||
op = calloc(sizeof(vm_op_t), 1);
|
||||
set_range(op, operator == '+' ? 1 : 0, operator == '?' ? 1 : -1, pat, sep);
|
||||
op->start = pat->start;
|
||||
op->end = str;
|
||||
op->len = -1;
|
||||
goto postfix;
|
||||
} else if ((str[0] == '=' || str[0] == '!') && str[1] == '=') { // Equality <pat1>==<pat2> and inequality <pat1>!=<pat2>
|
||||
if ((str[0] == '=' || str[0] == '!') && str[1] == '=') { // Equality <pat1>==<pat2> and inequality <pat1>!=<pat2>
|
||||
int equal = str[0] == '=';
|
||||
str = after_spaces(str+2);
|
||||
vm_op_t *first = op;
|
||||
|
@ -1,43 +1,37 @@
|
||||
# This is a file defining the BPEG grammar using BPEG syntax
|
||||
|
||||
Grammar: __ 0+(Def [__`;])%__ __ ($$ / @!={... => "Could not parse this code"})
|
||||
Grammar: __ *(Def [__`;])%__ __ ($$ / @!={... => "Could not parse this code"})
|
||||
Def: @name=id _ `: __ (
|
||||
@definition=extended-pat
|
||||
/ $$ @!={=>"No definition for rule"}
|
||||
/ @!={...>(`;/id_`:/$) => "Invalid definition: @0"})
|
||||
|
||||
# This is used for command line arguments:
|
||||
String-pattern: 0+(`\ (escape-sequence / pat [`;]) / .)
|
||||
String-pattern: *(`\ (escape-sequence / pat [`;]) / .)
|
||||
|
||||
pat: simple-pat !(__("!="/"=="/`*/`+/`?)) / suffixed-pat
|
||||
pat: simple-pat !(__("!="/"==")) / suffixed-pat
|
||||
simple-pat: Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No
|
||||
/ Nodent / Repeat / Optional / After / Before / Capture / Replace / Ref / parens
|
||||
|
||||
suffixed-pat: (
|
||||
Eq-pat
|
||||
/ Not-eq-pat
|
||||
/ Star-pat
|
||||
/ Plus-pat
|
||||
/ Question-pat
|
||||
)
|
||||
|
||||
Eq-pat: @first=pat__"=="__@second=pat
|
||||
Not-eq-pat: @first=pat__"!="__@second=pat
|
||||
Star-pat: pat __ `* @min={=>"0"} @max="" [__`%__@sep=pat]
|
||||
Plus-pat: pat __ `+ @min={=>"1"} @max="" [__`%__@sep=pat]
|
||||
Question-pat: pat __ `?
|
||||
|
||||
Dot: `. !`.
|
||||
String: (
|
||||
`" @s=0+(Escape / !`".) (`" / @!={=> "Expected closing quote here"})
|
||||
/ `' @s=0+(Escape / !`'.) (`' / @!={=> "Expected closing quote here"})
|
||||
`" @s=*(Escape / !`".) (`" / @!={=> "Expected closing quote here"})
|
||||
/ `' @s=*(Escape / !`'.) (`' / @!={=> "Expected closing quote here"})
|
||||
)
|
||||
Char-range: `` @low=. `- (@high=. / @!={=> "Expected a second character to form a character range"})
|
||||
Char: `` (@s=. / @!={=> "Expected a character following the '`'"})
|
||||
Escape-range: `\ @low=escape-sequence `- @high=escape-sequence
|
||||
Escape: `\ (@s=escape-sequence
|
||||
/ $ @!={=>"Backslashes are used for escape sequences, not splitting lines"}
|
||||
/ @!={. 0+(Abc/`0-9) => "Invalid escape sequence: '@0'"}
|
||||
/ @!={. *(Abc/`0-9) => "Invalid escape sequence: '@0'"}
|
||||
)
|
||||
escape-sequence: (
|
||||
`n/`t/`r/`e/`b/`a/`v
|
||||
@ -68,17 +62,17 @@ Otherwise: 2+@(Chain/pat)%(__`/__)
|
||||
extended-pat: Otherwise / Chain / pat
|
||||
|
||||
# Special-symbol rules:
|
||||
_: 0+(` / \t)
|
||||
__: 0+(` / \t / \r / \n / comment)
|
||||
_: *(` / \t)
|
||||
__: *(` / \t / \r / \n / comment)
|
||||
$$: !$.
|
||||
$: !.
|
||||
^^: !<$.
|
||||
^: !<.
|
||||
|
||||
id: "^^" / "^" / "__" / "_" / "$$" / "$" / (`a-z/`A-Z) 0+(`a-z/`A-Z/`0-9/`-)
|
||||
id: "^^" / "^" / "__" / "_" / "$$" / "$" / (`a-z/`A-Z) *(`a-z/`A-Z/`0-9/`-)
|
||||
|
||||
line-comment: `# .. $
|
||||
block-comment: "#(" 0+(block-comment / !")#" .) ")#"
|
||||
block-comment: "#(" *(block-comment / !")#" .) ")#"
|
||||
|
||||
# Note: comments are undefined by default in regular BPEG
|
||||
comment: block-comment / line-comment
|
||||
|
@ -16,21 +16,21 @@ replace-all: (
|
||||
(include-binary-files / is-text-file)
|
||||
define-highlights
|
||||
add-filename
|
||||
0+(...(>pattern hl-replacement)) ...
|
||||
*(...(>pattern hl-replacement)) ...
|
||||
)
|
||||
find-all: (
|
||||
(include-binary-files / is-text-file)
|
||||
define-highlights
|
||||
add-filename
|
||||
0+ (!..pattern {..\n=>})
|
||||
1+ (>..pattern add-line-number 1+(..hl-pattern) ..\n / {..\n=>})
|
||||
*(!..pattern {..\n=>})
|
||||
+(>..pattern add-line-number +(..hl-pattern) ..\n / {..\n=>})
|
||||
[{!<\n => "\n"}]
|
||||
)
|
||||
only-matches: (
|
||||
(include-binary-files / is-text-file)
|
||||
define-highlights
|
||||
add-filename
|
||||
1+{...@hl-pattern =>'@1\n'}
|
||||
+{...@hl-pattern =>'@1\n'}
|
||||
)
|
||||
add-filename: [print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"})]
|
||||
add-line-number: [print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "})]
|
||||
@ -41,21 +41,21 @@ define-highlights: highlight @hl-start={=>"\033[31;1m"} @hl-end={=>"\033[0m"} /
|
||||
# Helper definitions (commonly used)
|
||||
#(
|
||||
url: (
|
||||
"file://" 1+(`/ 0+url-char)
|
||||
"file://" +(`/ *url-char)
|
||||
/ "mailto:" email
|
||||
/ ("https"/"http"/"ftp") "://" [1+url-char [`: 1+url-char] `@] (ipv4/ipv6/domain) [`: int] [url-path]
|
||||
/ ("https"/"http"/"ftp") "://" [+url-char [`: +url-char] `@] (ipv4/ipv6/domain) [`: int] [url-path]
|
||||
)
|
||||
url-path: 1+(`/ 0+url-char) [`? 1+(1+url-char`=1+url-char]
|
||||
url-path: +(`/ *url-char) [`? +(+url-char`=+url-char]
|
||||
ipv4: 4 int % `.
|
||||
ipv6: 8 (4 Hex) % `:
|
||||
domain: 1+(Abc/digit/`-)%`.
|
||||
domain: +(Abc/digit/`-)%`.
|
||||
url-char: Abc/digit/`$/`-/`_/`./`+/`!/`*/`'/`(/`)/`,/`%
|
||||
|
||||
url: @(https?|ftp)://(-\.)?([^\s/?\.#-]+\.?)+(/[^\s]*)?$@iS
|
||||
)#
|
||||
indent: \n|1+(\t/' ')
|
||||
indent: \n|+(\t/' ')
|
||||
dedent: $ !(\n|)
|
||||
indented-block: |` ..$ 0+(\n|..$)
|
||||
indented-block: |` ..$ *(\n|..$)
|
||||
utf8-codepoint: (
|
||||
\x00-x7f
|
||||
/ \xc0-xdf 1\x80-xbf
|
||||
@ -64,18 +64,18 @@ utf8-codepoint: (
|
||||
)
|
||||
crlf: \r\n
|
||||
cr: \r
|
||||
anglebraces: `< 0+(anglebraces / !`>$.) `>
|
||||
brackets: `[ 0+(brackets / !`]$.) `]
|
||||
braces: `{ 0+(braces / !`}$.) `}
|
||||
parens: `( 0+(parens / !`)$.) `)
|
||||
id: !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) 0+(`a-z/`A-Z/`_/`0-9)
|
||||
anglebraces: `< *(anglebraces / !`>$.) `>
|
||||
brackets: `[ *(brackets / !`]$.) `]
|
||||
braces: `{ *(braces / !`}$.) `}
|
||||
parens: `( *(parens / !`)$.) `)
|
||||
id: !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) *(`a-z/`A-Z/`_/`0-9)
|
||||
id-char: `a-z/`A-Z/`_/`0-9
|
||||
word: !<(`a-z/`A-Z/`_/`0-9) 1+(`a-z/`A-Z) !>(`0-9/`_)
|
||||
word: !<(`a-z/`A-Z/`_/`0-9) +(`a-z/`A-Z) !>(`0-9/`_)
|
||||
HEX: `0-9/`A-F
|
||||
Hex: `0-9/`a-f/`A-F
|
||||
hex: `0-9/`a-f
|
||||
number: 1+`0-9 [`. 0+`0-9] / `. 1+`0-9
|
||||
int: 1+`0-9
|
||||
number: +`0-9 [`. *`0-9] / `. +`0-9
|
||||
int: +`0-9
|
||||
digit: `0-9
|
||||
Abc: `a-z/`A-Z
|
||||
ABC: `A-Z
|
||||
@ -94,5 +94,5 @@ $$: !$.
|
||||
$: !.
|
||||
^^: !<$.
|
||||
^: !<.
|
||||
__: 0+(` /\t/\n/\r/comment)
|
||||
_: 0+(` /\t)
|
||||
__: *(` /\t/\n/\r/comment)
|
||||
_: *(` /\t)
|
||||
|
@ -1,5 +1,5 @@
|
||||
# HTML grammar
|
||||
HTML: __ [doctype __] 0+html-element%__ __
|
||||
HTML: __ [doctype __] *html-element%__ __
|
||||
|
||||
doctype: "<!DOCTYPE" ..`>
|
||||
|
||||
@ -11,16 +11,16 @@ html-element: (
|
||||
|
||||
void-element: `< @tag=(id==match-tag) __attributes__ [`/] __ `>
|
||||
|
||||
template-element: `< @tag=(id==match-tag) __`> __ >match-body @body=0+(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>)
|
||||
template-element: `< @tag=(id==match-tag) __`> __ >match-body @body=*(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>)
|
||||
|
||||
raw-element: `< @tag=(id==match-tag) __attributes__ `> >match-body @body=.. ("</"tag__`>)
|
||||
|
||||
normal-element: `< @tag=(id==match-tag) __attributes__ `> >match-body @body=0+(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>
|
||||
normal-element: `< @tag=(id==match-tag) __attributes__ `> >match-body @body=*(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>
|
||||
|
||||
comment: "<!--" ..."-->"
|
||||
|
||||
attributes: 0+attribute%__
|
||||
attribute: (1+id%`:)__`=__ (id / `" ..`" / `' ..`')
|
||||
attribute: (1+id%`:)__`=__ (id / `" ..`" / `' ..`')
|
||||
attributes: *attribute%__
|
||||
attribute: (+id%`:)__`=__ (id / `" ..`" / `' ..`')
|
||||
attribute: (+id%`:)__`=__ (id / `" ..`" / `' ..`')
|
||||
match-tag: id
|
||||
match-body: ''
|
||||
|
@ -1,5 +1,5 @@
|
||||
# Definitions of UTF8-compliant identifiers
|
||||
utf8-id: utf8-id-start 0+utf8-id-cont
|
||||
utf8-id: utf8-id-start *utf8-id-cont
|
||||
|
||||
utf8-id-start: `A-Z / `a-z / !\x00-x7F (
|
||||
\xc2 (\xaa / \xb5 / \xba)
|
||||
|
Loading…
Reference in New Issue
Block a user