Removed the requirement for semicolons, changed '=' -> ':' for

definitions, added better error reporting for failed BPEG grammars
This commit is contained in:
Bruce Hill 2020-09-16 22:36:38 -07:00
parent 9ee7102f51
commit 82952fa5e9
6 changed files with 137 additions and 121 deletions

View File

@ -86,9 +86,8 @@ static vm_op_t *chain_together(vm_op_t *first, vm_op_t *second)
*/
vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
{
if (!*str) return NULL;
str = after_spaces(str);
check(*str, "Expected a pattern");
if (!*str) return NULL;
vm_op_t *op = calloc(sizeof(vm_op_t), 1);
op->start = str;
op->len = -1;
@ -320,6 +319,11 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
} else {
op->args.s = strndup(&c, 1);
}
if (*after_spaces(str) == ':') {
free((char*)op->args.s);
free(op);
return NULL;
}
op->op = VM_REF;
break;
}
@ -333,6 +337,10 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
--str;
const char *refname = str;
str = after_name(str);
if (*after_spaces(str) == ':') {
free(op);
return NULL;
}
op->op = VM_REF;
op->args.s = strndup(refname, (size_t)(str - refname));
break;

View File

@ -77,7 +77,7 @@ size_t get_line_number(file_t *f, const char *p)
if (f->lines[n] > p)
return n;
}
return 0;
return f->nlines;
}
size_t get_char_number(file_t *f, const char *p)

View File

@ -33,25 +33,33 @@ void add_def(grammar_t *g, file_t *f, const char *src, const char *name, vm_op_t
*/
vm_op_t *load_grammar(grammar_t *g, file_t *f)
{
check(f, "Null file provided");
vm_op_t *ret = NULL;
const char *src = f->contents;
do {
src = after_spaces(src);
if (!*src) break;
src = after_spaces(src);
while (*src) {
const char *name = src;
const char *name_end = after_name(name);
check(name_end > name, "Invalid name for definition");
name = strndup(name, (size_t)(name_end-name));
src = after_spaces(name_end);
check(matchchar(&src, '='), "Expected '=' in definition");
check(matchchar(&src, ':'), "Expected ':' in definition");
vm_op_t *op = bpeg_pattern(f, src);
check(op, "Couldn't load definition");
if (op == NULL) break;
//check(op, "Couldn't load definition");
add_def(g, f, src, name, op);
if (ret == NULL) {
ret = op;
}
src = op->end;
} while (*src && matchchar(&src, ';'));
src = after_spaces(src);
if (*src && matchchar(&src, ';'))
src = after_spaces(src);
}
if (src < &f->contents[f->length-1]) {
fprint_line(stderr, f, src, NULL, "Invalid BPEG pattern");
_exit(1);
}
return ret;
}

View File

@ -1,66 +1,66 @@
# This is a file defining the BPEG grammar using BPEG syntax
Grammar = __ 0+Def%(__`;__) 0-1(`;__);
Def = @[name]Ref __ `= __ @[definition]extended-pat;
Grammar: __ 0+(Def 0-1(__`;))%__ __
Def: @[name]id __ `: __ @[definition]extended-pat
# This is used for command line arguments:
String-pattern = 0+(`\ pat 0-1`; / .);
String-pattern: 0+(`\ pat 0-1`; / .)
pat = suffixed-pat / simple-pat;
simple-pat = Empty / Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No
/ Nodent / Repeat / After / Before / Capture / Replace / Ref / parens;
suffixed-pat = Eq-pat;
pat: suffixed-pat / simple-pat
simple-pat: Empty / Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No
/ Nodent / Repeat / After / Before / Capture / Replace / Ref / parens
suffixed-pat: Eq-pat
Eq-pat = @[first]simple-pat "==" @[second]pat;
Eq-pat: @[first]simple-pat "==" @[second]pat
Empty = `/ >(__ (`)/`}));
Dot = `. !`.;
String = (
Empty: `/ >(__ (`)/`}))
Dot: `. !`.
String: (
`" @[s]0+(Escape / !`"$.) `"
/ `' @[s]0+(Escape / !`'$.) `'
);
Char-range = `` @[low]. `- @[high].;
Char = `` @[s].;
Escape-range = `\ @[low]escape-sequence `- @[high]escape-sequence;
Escape = `\ @[s]escape-sequence;
escape-sequence = (
)
Char-range: `` @[low]. `- @[high].
Char: `` @[s].
Escape-range: `\ @[low]escape-sequence `- @[high]escape-sequence
Escape: `\ @[s]escape-sequence
escape-sequence: (
1-3 `0-7
/ `x 2 (`0-9/`a-f/`A-F)
/`a/`b/`e/`n/`r/`t/`v / . / \n
);
No = `! _ @pat;
Nodent = `|;
Upto-and = 2-3`. 0-1(_@pat);
Repeat = (
)
No: `! _ @pat
Nodent: `|
Upto-and: 2-3`. 0-1(_@pat)
Repeat: (
@[min]int _ `- _ @[max]int
/ @[min]int _ `+ @[max]''
/ @[min]@[max]int
) _ @[repeat-pat]pat 0-1( __ `% __ @[sep]pat);
After = `< _ pat;
Before = `> _ pat;
Capture = `@ 0-1(_ `[ @[capture-name]Ref `]) _ @[capture]pat;
Replace = `{ __ (
) _ @[repeat-pat]pat 0-1( __ `% __ @[sep]pat)
After: `< _ pat
Before: `> _ pat
Capture: `@ 0-1(_ `[ @[capture-name](...>`]) `]) _ @[capture]pat
Replace: `{ __ (
0-1(@[replace-pat]extended-pat __) "=>" 0-1(__ @[replacement]String)
) __ `};
Ref = @[name](
"^^" / "^" / "__" / "_" / "$$" / "$" /
(`a-z/`A-Z) 0+(`a-z/`A-Z/`0-9/`-));
) __ `}
Ref: @[name]id !>(__`:)
parens = `( __ extended-pat __ `);
parens: `( __ extended-pat __ `)
Chain = 2+@pat % (__);
Otherwise = 2+@(Chain/pat) % (__`/__);
extended-pat = Otherwise / Chain / pat;
Chain: 2+@pat % (__)
Otherwise: 2+@(Chain/pat) % (__`/__)
extended-pat: Otherwise / Chain / pat
# Special-symbol rules:
_ = 0+(` / \t);
__ = 0+(` / \t / \r / \n / comment);
$$ = !$.;
$ = !.;
^^ = !<$.;
^ = !<.;
_: 0+(` / \t)
__: 0+(` / \t / \r / \n / comment)
$$: !$.
$: !.
^^: !<$.
^: !<.
hash-comment = `# .. $;
id: "^^" / "^" / "__" / "_" / "$$" / "$" / (`a-z/`A-Z) 0+(`a-z/`A-Z/`0-9/`-)
hash-comment: `# .. $
# Note: comments are undefined by default in regular BPEG
comment = hash-comment;
comment: hash-comment

View File

@ -1,61 +1,61 @@
yes = '';
no = !'';
yes: ''
no: !''
# Configurable options:
is-tty = no; # Defined as either always-match or always-fail, depending on stdout
print-line-numbers = is-tty;
print-filenames = is-tty;
highlight = is-tty;
is-tty: no # Defined as either always-match or always-fail, depending on stdout
print-line-numbers: is-tty
print-filenames: is-tty
highlight: is-tty
# Meta-rules for acting on everything:
pattern = !''; # Not defined by default
replacement = !''; # Not defined by default
replace-all = define-highlights add-filename 1+(...@hl-replacement) ...;
find-all = define-highlights add-filename 1+find-next%\n 0-1{!<\n => "\n"};
find-next = matching-line / {..\n =>} find-next;
only-matches = define-highlights 1+{...@hl-pattern=>'@1\n'};
matching-line = add-line-number 1+(..hl-pattern) ..$;
add-filename = 0-1(print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"}));
add-line-number = 0-1(print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "}));
hl-pattern = {@[match]pattern define-highlights => "@[hl-start]@[match]@[hl-end]"};
hl-replacement = {@[match]replacement define-highlights => "@[hl-start]@[match]@[hl-end]" };
define-highlights = highlight @[hl-start]{=>"\033[31;1m"} @[hl-end]{=>"\033[0m"} / @[hl-start]"" @[hl-end]"";
pattern: !'' # Not defined by default
replacement: !'' # Not defined by default
replace-all: define-highlights add-filename 1+(...@hl-replacement) ...
find-all: define-highlights add-filename 1+find-next%\n 0-1{!<\n => "\n"}
find-next: matching-line / {..\n =>} find-next
only-matches: define-highlights 1+{...@hl-pattern=>'@1\n'}
matching-line: add-line-number 1+(..hl-pattern) ..$
add-filename: 0-1(print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"}))
add-line-number: 0-1(print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "}))
hl-pattern: {@[match]pattern define-highlights => "@[hl-start]@[match]@[hl-end]"}
hl-replacement: {@[match]replacement define-highlights => "@[hl-start]@[match]@[hl-end]" }
define-highlights: highlight @[hl-start]{=>"\033[31;1m"} @[hl-end]{=>"\033[0m"} / @[hl-start]"" @[hl-end]""
# Helper definitions (commonly used)
indent = \n|1+(\t/' ');
dedent = $ !(\n|);
indented-block = |` ..$ 0+(\n|..$);
crlf = \r\n;
cr = \r; r = \r;
anglebraces = `< 0+(anglebraces / !`>.) `>;
brackets = `[ 0+(brackets / !`].) `];
braces = `{ 0+(braces / !`}.) `};
parens = `( 0+(parens / !`).) `);
id = !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) 0+(`a-z/`A-Z/`_/`0-9);
id-char = `a-z/`A-Z/`_/`0-9;
word = !<(`a-z/`A-Z/`_/`0-9) 1+(`a-z/`A-Z) !>(`0-9/`_);
HEX = `0-9/`A-F;
Hex = `0-9/`a-f/`A-F;
hex = `0-9/`a-f;
number = 1+`0-9 0-1(`. 0+`0-9) / `. 1+`0-9;
int = 1+`0-9;
digit = `0-9;
Abc = `a-z/`A-Z;
ABC = `A-Z;
abc = `a-z;
esc = \e; e = \e;
tab = \t; t = \t;
nl = \n; lf = \n; n = \n;
c-block-comment = '/*' ... '*/';
c-line-comment = '//' ..$;
c-comment = c-line-comment / c-block-comment;
hash-comment = `# ..$;
comment = !''; # No default definition, can be overridden
WS = ` /\t/\n/\r/comment;
ws = ` /\t;
$$ = !$.;
$ = !.;
^^ = !<$.;
^ = !<.;
__ = 0+(` /\t/\n/\r/comment);
_ = 0+(` /\t);
indent: \n|1+(\t/' ')
dedent: $ !(\n|)
indented-block: |` ..$ 0+(\n|..$)
crlf: \r\n
cr: \r; r: \r
anglebraces: `< 0+(anglebraces / !`>.) `>
brackets: `[ 0+(brackets / !`].) `]
braces: `{ 0+(braces / !`}.) `}
parens: `( 0+(parens / !`).) `)
id: !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) 0+(`a-z/`A-Z/`_/`0-9)
id-char: `a-z/`A-Z/`_/`0-9
word: !<(`a-z/`A-Z/`_/`0-9) 1+(`a-z/`A-Z) !>(`0-9/`_)
HEX: `0-9/`A-F
Hex: `0-9/`a-f/`A-F
hex: `0-9/`a-f
number: 1+`0-9 0-1(`. 0+`0-9) / `. 1+`0-9
int: 1+`0-9
digit: `0-9
Abc: `a-z/`A-Z
ABC: `A-Z
abc: `a-z
esc: \e; e: \e
tab: \t; t: \t
nl: \n; lf: \n; n: \n
c-block-comment: '/*' ... '*/'
c-line-comment: '//' ..$
c-comment: c-line-comment / c-block-comment
hash-comment: `# ..$
comment: !''; # No default definition, can be overridden
WS: ` /\t/\n/\r/comment
ws: ` /\t
$$: !$.
$: !.
^^: !<$.
^: !<.
__: 0+(` /\t/\n/\r/comment)
_: 0+(` /\t)

View File

@ -1,26 +1,26 @@
# HTML grammar
HTML = __ 0-1(doctype __) 0+html-element%__ __;
HTML: __ 0-1(doctype __) 0+html-element%__ __
doctype = "<!DOCTYPE" ..`>;
doctype: "<!DOCTYPE" ..`>
html-element = (
html-element: (
>(`<("area"/"base"/"br"/"col"/"embed"/"hr"/"img"/"input"/"link"/"meta"/"param"/"source"/"track"/"wbr")) void-element
/ >(`<("script"/"style"/"textarea"/"title")) raw-element
/ >(`<("template")) template-element
/ normal-element);
/ normal-element)
void-element = `< @[tag](id==match-tag) __attributes__ 0-1`/ __ `>;
void-element: `< @[tag](id==match-tag) __attributes__ 0-1`/ __ `>
template-element = `< @[tag](id==match-tag) __`> __ >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>);
template-element: `< @[tag](id==match-tag) __`> __ >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>)
raw-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body].. ("</"tag__`>);
raw-element: `< @[tag](id==match-tag) __attributes__ `> >match-body @[body].. ("</"tag__`>)
normal-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>;
normal-element: `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>
comment = "<!--" ..."-->";
comment: "<!--" ..."-->"
attributes = 0+attribute%__;
attribute = (1+id%`:)__`=__ (id / `" ..`" / `' ..`');
attribute = (1+id%`:)__`=__ (id / `" ..`" / `' ..`');
match-tag = id;
match-body = '';
attributes: 0+attribute%__
attribute: (1+id%`:)__`=__ (id / `" ..`" / `' ..`')
attribute: (1+id%`:)__`=__ (id / `" ..`" / `' ..`')
match-tag: id
match-body: ''