Removed the requirement for semicolons, changed '=' -> ':' for
definitions, added better error reporting for failed BPEG grammars
This commit is contained in:
parent
9ee7102f51
commit
82952fa5e9
12
compiler.c
12
compiler.c
@ -86,9 +86,8 @@ static vm_op_t *chain_together(vm_op_t *first, vm_op_t *second)
|
||||
*/
|
||||
vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
|
||||
{
|
||||
if (!*str) return NULL;
|
||||
str = after_spaces(str);
|
||||
check(*str, "Expected a pattern");
|
||||
if (!*str) return NULL;
|
||||
vm_op_t *op = calloc(sizeof(vm_op_t), 1);
|
||||
op->start = str;
|
||||
op->len = -1;
|
||||
@ -320,6 +319,11 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
|
||||
} else {
|
||||
op->args.s = strndup(&c, 1);
|
||||
}
|
||||
if (*after_spaces(str) == ':') {
|
||||
free((char*)op->args.s);
|
||||
free(op);
|
||||
return NULL;
|
||||
}
|
||||
op->op = VM_REF;
|
||||
break;
|
||||
}
|
||||
@ -333,6 +337,10 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
|
||||
--str;
|
||||
const char *refname = str;
|
||||
str = after_name(str);
|
||||
if (*after_spaces(str) == ':') {
|
||||
free(op);
|
||||
return NULL;
|
||||
}
|
||||
op->op = VM_REF;
|
||||
op->args.s = strndup(refname, (size_t)(str - refname));
|
||||
break;
|
||||
|
@ -77,7 +77,7 @@ size_t get_line_number(file_t *f, const char *p)
|
||||
if (f->lines[n] > p)
|
||||
return n;
|
||||
}
|
||||
return 0;
|
||||
return f->nlines;
|
||||
}
|
||||
|
||||
size_t get_char_number(file_t *f, const char *p)
|
||||
|
20
grammar.c
20
grammar.c
@ -33,25 +33,33 @@ void add_def(grammar_t *g, file_t *f, const char *src, const char *name, vm_op_t
|
||||
*/
|
||||
vm_op_t *load_grammar(grammar_t *g, file_t *f)
|
||||
{
|
||||
check(f, "Null file provided");
|
||||
vm_op_t *ret = NULL;
|
||||
const char *src = f->contents;
|
||||
do {
|
||||
src = after_spaces(src);
|
||||
if (!*src) break;
|
||||
src = after_spaces(src);
|
||||
while (*src) {
|
||||
const char *name = src;
|
||||
const char *name_end = after_name(name);
|
||||
check(name_end > name, "Invalid name for definition");
|
||||
name = strndup(name, (size_t)(name_end-name));
|
||||
src = after_spaces(name_end);
|
||||
check(matchchar(&src, '='), "Expected '=' in definition");
|
||||
check(matchchar(&src, ':'), "Expected ':' in definition");
|
||||
vm_op_t *op = bpeg_pattern(f, src);
|
||||
check(op, "Couldn't load definition");
|
||||
if (op == NULL) break;
|
||||
//check(op, "Couldn't load definition");
|
||||
add_def(g, f, src, name, op);
|
||||
if (ret == NULL) {
|
||||
ret = op;
|
||||
}
|
||||
src = op->end;
|
||||
} while (*src && matchchar(&src, ';'));
|
||||
src = after_spaces(src);
|
||||
if (*src && matchchar(&src, ';'))
|
||||
src = after_spaces(src);
|
||||
}
|
||||
if (src < &f->contents[f->length-1]) {
|
||||
fprint_line(stderr, f, src, NULL, "Invalid BPEG pattern");
|
||||
_exit(1);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1,66 +1,66 @@
|
||||
# This is a file defining the BPEG grammar using BPEG syntax
|
||||
|
||||
Grammar = __ 0+Def%(__`;__) 0-1(`;__);
|
||||
Def = @[name]Ref __ `= __ @[definition]extended-pat;
|
||||
Grammar: __ 0+(Def 0-1(__`;))%__ __
|
||||
Def: @[name]id __ `: __ @[definition]extended-pat
|
||||
|
||||
# This is used for command line arguments:
|
||||
String-pattern = 0+(`\ pat 0-1`; / .);
|
||||
String-pattern: 0+(`\ pat 0-1`; / .)
|
||||
|
||||
pat = suffixed-pat / simple-pat;
|
||||
simple-pat = Empty / Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No
|
||||
/ Nodent / Repeat / After / Before / Capture / Replace / Ref / parens;
|
||||
suffixed-pat = Eq-pat;
|
||||
pat: suffixed-pat / simple-pat
|
||||
simple-pat: Empty / Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No
|
||||
/ Nodent / Repeat / After / Before / Capture / Replace / Ref / parens
|
||||
suffixed-pat: Eq-pat
|
||||
|
||||
Eq-pat = @[first]simple-pat "==" @[second]pat;
|
||||
Eq-pat: @[first]simple-pat "==" @[second]pat
|
||||
|
||||
Empty = `/ >(__ (`)/`}));
|
||||
Dot = `. !`.;
|
||||
String = (
|
||||
Empty: `/ >(__ (`)/`}))
|
||||
Dot: `. !`.
|
||||
String: (
|
||||
`" @[s]0+(Escape / !`"$.) `"
|
||||
/ `' @[s]0+(Escape / !`'$.) `'
|
||||
);
|
||||
Char-range = `` @[low]. `- @[high].;
|
||||
Char = `` @[s].;
|
||||
Escape-range = `\ @[low]escape-sequence `- @[high]escape-sequence;
|
||||
Escape = `\ @[s]escape-sequence;
|
||||
escape-sequence = (
|
||||
)
|
||||
Char-range: `` @[low]. `- @[high].
|
||||
Char: `` @[s].
|
||||
Escape-range: `\ @[low]escape-sequence `- @[high]escape-sequence
|
||||
Escape: `\ @[s]escape-sequence
|
||||
escape-sequence: (
|
||||
1-3 `0-7
|
||||
/ `x 2 (`0-9/`a-f/`A-F)
|
||||
/`a/`b/`e/`n/`r/`t/`v / . / \n
|
||||
);
|
||||
No = `! _ @pat;
|
||||
Nodent = `|;
|
||||
Upto-and = 2-3`. 0-1(_@pat);
|
||||
Repeat = (
|
||||
)
|
||||
No: `! _ @pat
|
||||
Nodent: `|
|
||||
Upto-and: 2-3`. 0-1(_@pat)
|
||||
Repeat: (
|
||||
@[min]int _ `- _ @[max]int
|
||||
/ @[min]int _ `+ @[max]''
|
||||
/ @[min]@[max]int
|
||||
) _ @[repeat-pat]pat 0-1( __ `% __ @[sep]pat);
|
||||
After = `< _ pat;
|
||||
Before = `> _ pat;
|
||||
Capture = `@ 0-1(_ `[ @[capture-name]Ref `]) _ @[capture]pat;
|
||||
Replace = `{ __ (
|
||||
) _ @[repeat-pat]pat 0-1( __ `% __ @[sep]pat)
|
||||
After: `< _ pat
|
||||
Before: `> _ pat
|
||||
Capture: `@ 0-1(_ `[ @[capture-name](...>`]) `]) _ @[capture]pat
|
||||
Replace: `{ __ (
|
||||
0-1(@[replace-pat]extended-pat __) "=>" 0-1(__ @[replacement]String)
|
||||
) __ `};
|
||||
Ref = @[name](
|
||||
"^^" / "^" / "__" / "_" / "$$" / "$" /
|
||||
(`a-z/`A-Z) 0+(`a-z/`A-Z/`0-9/`-));
|
||||
) __ `}
|
||||
Ref: @[name]id !>(__`:)
|
||||
|
||||
parens = `( __ extended-pat __ `);
|
||||
parens: `( __ extended-pat __ `)
|
||||
|
||||
Chain = 2+@pat % (__);
|
||||
Otherwise = 2+@(Chain/pat) % (__`/__);
|
||||
extended-pat = Otherwise / Chain / pat;
|
||||
Chain: 2+@pat % (__)
|
||||
Otherwise: 2+@(Chain/pat) % (__`/__)
|
||||
extended-pat: Otherwise / Chain / pat
|
||||
|
||||
# Special-symbol rules:
|
||||
_ = 0+(` / \t);
|
||||
__ = 0+(` / \t / \r / \n / comment);
|
||||
$$ = !$.;
|
||||
$ = !.;
|
||||
^^ = !<$.;
|
||||
^ = !<.;
|
||||
_: 0+(` / \t)
|
||||
__: 0+(` / \t / \r / \n / comment)
|
||||
$$: !$.
|
||||
$: !.
|
||||
^^: !<$.
|
||||
^: !<.
|
||||
|
||||
hash-comment = `# .. $;
|
||||
id: "^^" / "^" / "__" / "_" / "$$" / "$" / (`a-z/`A-Z) 0+(`a-z/`A-Z/`0-9/`-)
|
||||
|
||||
hash-comment: `# .. $
|
||||
|
||||
# Note: comments are undefined by default in regular BPEG
|
||||
comment = hash-comment;
|
||||
comment: hash-comment
|
||||
|
@ -1,61 +1,61 @@
|
||||
yes = '';
|
||||
no = !'';
|
||||
yes: ''
|
||||
no: !''
|
||||
|
||||
# Configurable options:
|
||||
is-tty = no; # Defined as either always-match or always-fail, depending on stdout
|
||||
print-line-numbers = is-tty;
|
||||
print-filenames = is-tty;
|
||||
highlight = is-tty;
|
||||
is-tty: no # Defined as either always-match or always-fail, depending on stdout
|
||||
print-line-numbers: is-tty
|
||||
print-filenames: is-tty
|
||||
highlight: is-tty
|
||||
|
||||
# Meta-rules for acting on everything:
|
||||
pattern = !''; # Not defined by default
|
||||
replacement = !''; # Not defined by default
|
||||
replace-all = define-highlights add-filename 1+(...@hl-replacement) ...;
|
||||
find-all = define-highlights add-filename 1+find-next%\n 0-1{!<\n => "\n"};
|
||||
find-next = matching-line / {..\n =>} find-next;
|
||||
only-matches = define-highlights 1+{...@hl-pattern=>'@1\n'};
|
||||
matching-line = add-line-number 1+(..hl-pattern) ..$;
|
||||
add-filename = 0-1(print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"}));
|
||||
add-line-number = 0-1(print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "}));
|
||||
hl-pattern = {@[match]pattern define-highlights => "@[hl-start]@[match]@[hl-end]"};
|
||||
hl-replacement = {@[match]replacement define-highlights => "@[hl-start]@[match]@[hl-end]" };
|
||||
define-highlights = highlight @[hl-start]{=>"\033[31;1m"} @[hl-end]{=>"\033[0m"} / @[hl-start]"" @[hl-end]"";
|
||||
pattern: !'' # Not defined by default
|
||||
replacement: !'' # Not defined by default
|
||||
replace-all: define-highlights add-filename 1+(...@hl-replacement) ...
|
||||
find-all: define-highlights add-filename 1+find-next%\n 0-1{!<\n => "\n"}
|
||||
find-next: matching-line / {..\n =>} find-next
|
||||
only-matches: define-highlights 1+{...@hl-pattern=>'@1\n'}
|
||||
matching-line: add-line-number 1+(..hl-pattern) ..$
|
||||
add-filename: 0-1(print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"}))
|
||||
add-line-number: 0-1(print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "}))
|
||||
hl-pattern: {@[match]pattern define-highlights => "@[hl-start]@[match]@[hl-end]"}
|
||||
hl-replacement: {@[match]replacement define-highlights => "@[hl-start]@[match]@[hl-end]" }
|
||||
define-highlights: highlight @[hl-start]{=>"\033[31;1m"} @[hl-end]{=>"\033[0m"} / @[hl-start]"" @[hl-end]""
|
||||
|
||||
# Helper definitions (commonly used)
|
||||
indent = \n|1+(\t/' ');
|
||||
dedent = $ !(\n|);
|
||||
indented-block = |` ..$ 0+(\n|..$);
|
||||
crlf = \r\n;
|
||||
cr = \r; r = \r;
|
||||
anglebraces = `< 0+(anglebraces / !`>.) `>;
|
||||
brackets = `[ 0+(brackets / !`].) `];
|
||||
braces = `{ 0+(braces / !`}.) `};
|
||||
parens = `( 0+(parens / !`).) `);
|
||||
id = !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) 0+(`a-z/`A-Z/`_/`0-9);
|
||||
id-char = `a-z/`A-Z/`_/`0-9;
|
||||
word = !<(`a-z/`A-Z/`_/`0-9) 1+(`a-z/`A-Z) !>(`0-9/`_);
|
||||
HEX = `0-9/`A-F;
|
||||
Hex = `0-9/`a-f/`A-F;
|
||||
hex = `0-9/`a-f;
|
||||
number = 1+`0-9 0-1(`. 0+`0-9) / `. 1+`0-9;
|
||||
int = 1+`0-9;
|
||||
digit = `0-9;
|
||||
Abc = `a-z/`A-Z;
|
||||
ABC = `A-Z;
|
||||
abc = `a-z;
|
||||
esc = \e; e = \e;
|
||||
tab = \t; t = \t;
|
||||
nl = \n; lf = \n; n = \n;
|
||||
c-block-comment = '/*' ... '*/';
|
||||
c-line-comment = '//' ..$;
|
||||
c-comment = c-line-comment / c-block-comment;
|
||||
hash-comment = `# ..$;
|
||||
comment = !''; # No default definition, can be overridden
|
||||
WS = ` /\t/\n/\r/comment;
|
||||
ws = ` /\t;
|
||||
$$ = !$.;
|
||||
$ = !.;
|
||||
^^ = !<$.;
|
||||
^ = !<.;
|
||||
__ = 0+(` /\t/\n/\r/comment);
|
||||
_ = 0+(` /\t);
|
||||
indent: \n|1+(\t/' ')
|
||||
dedent: $ !(\n|)
|
||||
indented-block: |` ..$ 0+(\n|..$)
|
||||
crlf: \r\n
|
||||
cr: \r; r: \r
|
||||
anglebraces: `< 0+(anglebraces / !`>.) `>
|
||||
brackets: `[ 0+(brackets / !`].) `]
|
||||
braces: `{ 0+(braces / !`}.) `}
|
||||
parens: `( 0+(parens / !`).) `)
|
||||
id: !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) 0+(`a-z/`A-Z/`_/`0-9)
|
||||
id-char: `a-z/`A-Z/`_/`0-9
|
||||
word: !<(`a-z/`A-Z/`_/`0-9) 1+(`a-z/`A-Z) !>(`0-9/`_)
|
||||
HEX: `0-9/`A-F
|
||||
Hex: `0-9/`a-f/`A-F
|
||||
hex: `0-9/`a-f
|
||||
number: 1+`0-9 0-1(`. 0+`0-9) / `. 1+`0-9
|
||||
int: 1+`0-9
|
||||
digit: `0-9
|
||||
Abc: `a-z/`A-Z
|
||||
ABC: `A-Z
|
||||
abc: `a-z
|
||||
esc: \e; e: \e
|
||||
tab: \t; t: \t
|
||||
nl: \n; lf: \n; n: \n
|
||||
c-block-comment: '/*' ... '*/'
|
||||
c-line-comment: '//' ..$
|
||||
c-comment: c-line-comment / c-block-comment
|
||||
hash-comment: `# ..$
|
||||
comment: !''; # No default definition, can be overridden
|
||||
WS: ` /\t/\n/\r/comment
|
||||
ws: ` /\t
|
||||
$$: !$.
|
||||
$: !.
|
||||
^^: !<$.
|
||||
^: !<.
|
||||
__: 0+(` /\t/\n/\r/comment)
|
||||
_: 0+(` /\t)
|
||||
|
@ -1,26 +1,26 @@
|
||||
# HTML grammar
|
||||
HTML = __ 0-1(doctype __) 0+html-element%__ __;
|
||||
HTML: __ 0-1(doctype __) 0+html-element%__ __
|
||||
|
||||
doctype = "<!DOCTYPE" ..`>;
|
||||
doctype: "<!DOCTYPE" ..`>
|
||||
|
||||
html-element = (
|
||||
html-element: (
|
||||
>(`<("area"/"base"/"br"/"col"/"embed"/"hr"/"img"/"input"/"link"/"meta"/"param"/"source"/"track"/"wbr")) void-element
|
||||
/ >(`<("script"/"style"/"textarea"/"title")) raw-element
|
||||
/ >(`<("template")) template-element
|
||||
/ normal-element);
|
||||
/ normal-element)
|
||||
|
||||
void-element = `< @[tag](id==match-tag) __attributes__ 0-1`/ __ `>;
|
||||
void-element: `< @[tag](id==match-tag) __attributes__ 0-1`/ __ `>
|
||||
|
||||
template-element = `< @[tag](id==match-tag) __`> __ >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>);
|
||||
template-element: `< @[tag](id==match-tag) __`> __ >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>)
|
||||
|
||||
raw-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body].. ("</"tag__`>);
|
||||
raw-element: `< @[tag](id==match-tag) __attributes__ `> >match-body @[body].. ("</"tag__`>)
|
||||
|
||||
normal-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>;
|
||||
normal-element: `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>
|
||||
|
||||
comment = "<!--" ..."-->";
|
||||
comment: "<!--" ..."-->"
|
||||
|
||||
attributes = 0+attribute%__;
|
||||
attribute = (1+id%`:)__`=__ (id / `" ..`" / `' ..`');
|
||||
attribute = (1+id%`:)__`=__ (id / `" ..`" / `' ..`');
|
||||
match-tag = id;
|
||||
match-body = '';
|
||||
attributes: 0+attribute%__
|
||||
attribute: (1+id%`:)__`=__ (id / `" ..`" / `' ..`')
|
||||
attribute: (1+id%`:)__`=__ (id / `" ..`" / `' ..`')
|
||||
match-tag: id
|
||||
match-body: ''
|
||||
|
Loading…
Reference in New Issue
Block a user