Moved */+ back to prefix, and dropped ?

This commit is contained in:
Bruce Hill 2020-09-28 18:08:23 -07:00
parent 699e7c8b98
commit 90b8db84a4
7 changed files with 60 additions and 66 deletions

View File

@ -49,8 +49,8 @@ Pattern | Meaning
`2-4 pat` | Between 2 and 4 occurrences of `pat` (inclusive)
`5+ pat` | 5 or more occurrences of `pat`
`5+ pat % sep` | 5 or more occurrences of `pat`, separated by `sep` (e.g. `0+ int % ","` matches `1,2,3`)
`pat*` `pat* % sep`| 0 or more occurrences of `pat` (optionally separated by `sep`)
`pat+` `pat+ % sep`| 1 or more occurrences of `pat` (optionally separated by `sep`)
`*pat` | 0 or more occurrences of `pat` (shorthand for `0+pat`)
`+pat` | 1 or more occurrences of `pat` (shorthand for `1+pat`)
`<pat` | `pat` matches just before the current position (backref)
`>pat` | `pat` matches just in front of the current position (lookahead)
`@pat` | Capture `pat` (used for text replacement and backreferences)

8
bpeg.1
View File

@ -122,11 +122,11 @@ The \fBescape-sequence-range-\fI<esc1>\fB-to-\fI<esc2>\fR
.B \fI<MIN>\fB+ \fI<pat>\fR
\fI<MIN>\fB-to-\fI<MAX>\fB-\fI<pat>\fBs\fR (repetitions of a pattern)
.B \fI<pat>\fR*
\fI<pat>\fB-zero-or-more-times\fR
.B *\fI<pat>\fR
\fBsome-\fI<pat>\fBs\fR
.B \fI<pat>\fR+
\fI<pat>\fB-one-or-more-times\fR
.B +\fI<pat>\fR
\fBat-least-one-\fI<pat>\fBs\fR
.B \fI<repeating-pat>\fR \fB%\fI <sep>\fR
\fI<repeating-pat>\fB-separated-by-\fI<sep>\fR (equivalent to \fI<pat>

View File

@ -269,6 +269,22 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
set_range(op, 0, 1, pat, NULL);
break;
}
// Repeating
case '*': case '+': {
ssize_t min = c == '*' ? 0 : 1;
vm_op_t *pat = bpeg_simplepattern(f, str);
check(pat, "Expected pattern after '%c'", *str);
str = pat->end;
str = after_spaces(str);
vm_op_t *sep = NULL;
if (matchchar(&str, '%')) {
sep = bpeg_simplepattern(f, str);
check(sep, "Expected pattern for separator after '%%'");
str = sep->end;
}
set_range(op, min, -1, pat, sep);
break;
}
// Capture
case '@': {
op->op = VM_CAPTURE;
@ -373,23 +389,7 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
postfix:
if (f ? str >= f->end : !*str) return op;
str = after_spaces(str);
if (*str == '*' || *str == '+' || *str == '?') { // Repetitions: <pat>*, <pat>+, <pat>?
char operator = *str;
++str;
vm_op_t *pat = op;
vm_op_t *sep = NULL;
if (operator != '?' && matchchar(&str, '%')) {
sep = bpeg_simplepattern(f, str);
check(sep, "Expected pattern for separator after '%%'");
str = sep->end;
}
op = calloc(sizeof(vm_op_t), 1);
set_range(op, operator == '+' ? 1 : 0, operator == '?' ? 1 : -1, pat, sep);
op->start = pat->start;
op->end = str;
op->len = -1;
goto postfix;
} else if ((str[0] == '=' || str[0] == '!') && str[1] == '=') { // Equality <pat1>==<pat2> and inequality <pat1>!=<pat2>
if ((str[0] == '=' || str[0] == '!') && str[1] == '=') { // Equality <pat1>==<pat2> and inequality <pat1>!=<pat2>
int equal = str[0] == '=';
str = after_spaces(str+2);
vm_op_t *first = op;

View File

@ -1,43 +1,37 @@
# This is a file defining the BPEG grammar using BPEG syntax
Grammar: __ 0+(Def [__`;])%__ __ ($$ / @!={... => "Could not parse this code"})
Grammar: __ *(Def [__`;])%__ __ ($$ / @!={... => "Could not parse this code"})
Def: @name=id _ `: __ (
@definition=extended-pat
/ $$ @!={=>"No definition for rule"}
/ @!={...>(`;/id_`:/$) => "Invalid definition: @0"})
# This is used for command line arguments:
String-pattern: 0+(`\ (escape-sequence / pat [`;]) / .)
String-pattern: *(`\ (escape-sequence / pat [`;]) / .)
pat: simple-pat !(__("!="/"=="/`*/`+/`?)) / suffixed-pat
pat: simple-pat !(__("!="/"==")) / suffixed-pat
simple-pat: Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No
/ Nodent / Repeat / Optional / After / Before / Capture / Replace / Ref / parens
suffixed-pat: (
Eq-pat
/ Not-eq-pat
/ Star-pat
/ Plus-pat
/ Question-pat
)
Eq-pat: @first=pat__"=="__@second=pat
Not-eq-pat: @first=pat__"!="__@second=pat
Star-pat: pat __ `* @min={=>"0"} @max="" [__`%__@sep=pat]
Plus-pat: pat __ `+ @min={=>"1"} @max="" [__`%__@sep=pat]
Question-pat: pat __ `?
Dot: `. !`.
String: (
`" @s=0+(Escape / !`".) (`" / @!={=> "Expected closing quote here"})
/ `' @s=0+(Escape / !`'.) (`' / @!={=> "Expected closing quote here"})
`" @s=*(Escape / !`".) (`" / @!={=> "Expected closing quote here"})
/ `' @s=*(Escape / !`'.) (`' / @!={=> "Expected closing quote here"})
)
Char-range: `` @low=. `- (@high=. / @!={=> "Expected a second character to form a character range"})
Char: `` (@s=. / @!={=> "Expected a character following the '`'"})
Escape-range: `\ @low=escape-sequence `- @high=escape-sequence
Escape: `\ (@s=escape-sequence
/ $ @!={=>"Backslashes are used for escape sequences, not splitting lines"}
/ @!={. 0+(Abc/`0-9) => "Invalid escape sequence: '@0'"}
/ @!={. *(Abc/`0-9) => "Invalid escape sequence: '@0'"}
)
escape-sequence: (
`n/`t/`r/`e/`b/`a/`v
@ -68,17 +62,17 @@ Otherwise: 2+@(Chain/pat)%(__`/__)
extended-pat: Otherwise / Chain / pat
# Special-symbol rules:
_: 0+(` / \t)
__: 0+(` / \t / \r / \n / comment)
_: *(` / \t)
__: *(` / \t / \r / \n / comment)
$$: !$.
$: !.
^^: !<$.
^: !<.
id: "^^" / "^" / "__" / "_" / "$$" / "$" / (`a-z/`A-Z) 0+(`a-z/`A-Z/`0-9/`-)
id: "^^" / "^" / "__" / "_" / "$$" / "$" / (`a-z/`A-Z) *(`a-z/`A-Z/`0-9/`-)
line-comment: `# .. $
block-comment: "#(" 0+(block-comment / !")#" .) ")#"
block-comment: "#(" *(block-comment / !")#" .) ")#"
# Note: comments are undefined by default in regular BPEG
comment: block-comment / line-comment

View File

@ -16,21 +16,21 @@ replace-all: (
(include-binary-files / is-text-file)
define-highlights
add-filename
0+(...(>pattern hl-replacement)) ...
*(...(>pattern hl-replacement)) ...
)
find-all: (
(include-binary-files / is-text-file)
define-highlights
add-filename
0+ (!..pattern {..\n=>})
1+ (>..pattern add-line-number 1+(..hl-pattern) ..\n / {..\n=>})
*(!..pattern {..\n=>})
+(>..pattern add-line-number +(..hl-pattern) ..\n / {..\n=>})
[{!<\n => "\n"}]
)
only-matches: (
(include-binary-files / is-text-file)
define-highlights
add-filename
1+{...@hl-pattern =>'@1\n'}
+{...@hl-pattern =>'@1\n'}
)
add-filename: [print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"})]
add-line-number: [print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "})]
@ -41,21 +41,21 @@ define-highlights: highlight @hl-start={=>"\033[31;1m"} @hl-end={=>"\033[0m"} /
# Helper definitions (commonly used)
#(
url: (
"file://" 1+(`/ 0+url-char)
"file://" +(`/ *url-char)
/ "mailto:" email
/ ("https"/"http"/"ftp") "://" [1+url-char [`: 1+url-char] `@] (ipv4/ipv6/domain) [`: int] [url-path]
/ ("https"/"http"/"ftp") "://" [+url-char [`: +url-char] `@] (ipv4/ipv6/domain) [`: int] [url-path]
)
url-path: 1+(`/ 0+url-char) [`? 1+(1+url-char`=1+url-char]
url-path: +(`/ *url-char) [`? +(+url-char`=+url-char]
ipv4: 4 int % `.
ipv6: 8 (4 Hex) % `:
domain: 1+(Abc/digit/`-)%`.
domain: +(Abc/digit/`-)%`.
url-char: Abc/digit/`$/`-/`_/`./`+/`!/`*/`'/`(/`)/`,/`%
url: @(https?|ftp)://(-\.)?([^\s/?\.#-]+\.?)+(/[^\s]*)?$@iS
)#
indent: \n|1+(\t/' ')
indent: \n|+(\t/' ')
dedent: $ !(\n|)
indented-block: |` ..$ 0+(\n|..$)
indented-block: |` ..$ *(\n|..$)
utf8-codepoint: (
\x00-x7f
/ \xc0-xdf 1\x80-xbf
@ -64,18 +64,18 @@ utf8-codepoint: (
)
crlf: \r\n
cr: \r
anglebraces: `< 0+(anglebraces / !`>$.) `>
brackets: `[ 0+(brackets / !`]$.) `]
braces: `{ 0+(braces / !`}$.) `}
parens: `( 0+(parens / !`)$.) `)
id: !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) 0+(`a-z/`A-Z/`_/`0-9)
anglebraces: `< *(anglebraces / !`>$.) `>
brackets: `[ *(brackets / !`]$.) `]
braces: `{ *(braces / !`}$.) `}
parens: `( *(parens / !`)$.) `)
id: !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) *(`a-z/`A-Z/`_/`0-9)
id-char: `a-z/`A-Z/`_/`0-9
word: !<(`a-z/`A-Z/`_/`0-9) 1+(`a-z/`A-Z) !>(`0-9/`_)
word: !<(`a-z/`A-Z/`_/`0-9) +(`a-z/`A-Z) !>(`0-9/`_)
HEX: `0-9/`A-F
Hex: `0-9/`a-f/`A-F
hex: `0-9/`a-f
number: 1+`0-9 [`. 0+`0-9] / `. 1+`0-9
int: 1+`0-9
number: +`0-9 [`. *`0-9] / `. +`0-9
int: +`0-9
digit: `0-9
Abc: `a-z/`A-Z
ABC: `A-Z
@ -94,5 +94,5 @@ $$: !$.
$: !.
^^: !<$.
^: !<.
__: 0+(` /\t/\n/\r/comment)
_: 0+(` /\t)
__: *(` /\t/\n/\r/comment)
_: *(` /\t)

View File

@ -1,5 +1,5 @@
# HTML grammar
HTML: __ [doctype __] 0+html-element%__ __
HTML: __ [doctype __] *html-element%__ __
doctype: "<!DOCTYPE" ..`>
@ -11,16 +11,16 @@ html-element: (
void-element: `< @tag=(id==match-tag) __attributes__ [`/] __ `>
template-element: `< @tag=(id==match-tag) __`> __ >match-body @body=0+(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>)
template-element: `< @tag=(id==match-tag) __`> __ >match-body @body=*(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>)
raw-element: `< @tag=(id==match-tag) __attributes__ `> >match-body @body=.. ("</"tag__`>)
normal-element: `< @tag=(id==match-tag) __attributes__ `> >match-body @body=0+(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>
normal-element: `< @tag=(id==match-tag) __attributes__ `> >match-body @body=*(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>
comment: "<!--" ..."-->"
attributes: 0+attribute%__
attribute: (1+id%`:)__`=__ (id / `" ..`" / `' ..`')
attribute: (1+id%`:)__`=__ (id / `" ..`" / `' ..`')
attributes: *attribute%__
attribute: (+id%`:)__`=__ (id / `" ..`" / `' ..`')
attribute: (+id%`:)__`=__ (id / `" ..`" / `' ..`')
match-tag: id
match-body: ''

View File

@ -1,5 +1,5 @@
# Definitions of UTF8-compliant identifiers
utf8-id: utf8-id-start 0+utf8-id-cont
utf8-id: utf8-id-start *utf8-id-cont
utf8-id-start: `A-Z / `a-z / !\x00-x7F (
\xc2 (\xaa / \xb5 / \xba)