aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler.c12
-rw-r--r--file_loader.c2
-rw-r--r--grammar.c20
-rw-r--r--grammars/bpeg.bpeg86
-rw-r--r--grammars/builtins.bpeg110
-rw-r--r--grammars/html.bpeg28
6 files changed, 137 insertions, 121 deletions
diff --git a/compiler.c b/compiler.c
index f5bcd90..c3c6fba 100644
--- a/compiler.c
+++ b/compiler.c
@@ -86,9 +86,8 @@ static vm_op_t *chain_together(vm_op_t *first, vm_op_t *second)
*/
vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
{
- if (!*str) return NULL;
str = after_spaces(str);
- check(*str, "Expected a pattern");
+ if (!*str) return NULL;
vm_op_t *op = calloc(sizeof(vm_op_t), 1);
op->start = str;
op->len = -1;
@@ -320,6 +319,11 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
} else {
op->args.s = strndup(&c, 1);
}
+ if (*after_spaces(str) == ':') {
+ free((char*)op->args.s);
+ free(op);
+ return NULL;
+ }
op->op = VM_REF;
break;
}
@@ -333,6 +337,10 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
--str;
const char *refname = str;
str = after_name(str);
+ if (*after_spaces(str) == ':') {
+ free(op);
+ return NULL;
+ }
op->op = VM_REF;
op->args.s = strndup(refname, (size_t)(str - refname));
break;
diff --git a/file_loader.c b/file_loader.c
index 33339b0..1bc3d20 100644
--- a/file_loader.c
+++ b/file_loader.c
@@ -77,7 +77,7 @@ size_t get_line_number(file_t *f, const char *p)
if (f->lines[n] > p)
return n;
}
- return 0;
+ return f->nlines;
}
size_t get_char_number(file_t *f, const char *p)
diff --git a/grammar.c b/grammar.c
index b4621ee..ec0f40a 100644
--- a/grammar.c
+++ b/grammar.c
@@ -33,25 +33,33 @@ void add_def(grammar_t *g, file_t *f, const char *src, const char *name, vm_op_t
*/
vm_op_t *load_grammar(grammar_t *g, file_t *f)
{
+ check(f, "Null file provided");
vm_op_t *ret = NULL;
const char *src = f->contents;
- do {
- src = after_spaces(src);
- if (!*src) break;
+ src = after_spaces(src);
+ while (*src) {
const char *name = src;
const char *name_end = after_name(name);
check(name_end > name, "Invalid name for definition");
name = strndup(name, (size_t)(name_end-name));
src = after_spaces(name_end);
- check(matchchar(&src, '='), "Expected '=' in definition");
+ check(matchchar(&src, ':'), "Expected ':' in definition");
vm_op_t *op = bpeg_pattern(f, src);
- check(op, "Couldn't load definition");
+ if (op == NULL) break;
+ //check(op, "Couldn't load definition");
add_def(g, f, src, name, op);
if (ret == NULL) {
ret = op;
}
src = op->end;
- } while (*src && matchchar(&src, ';'));
+ src = after_spaces(src);
+ if (*src && matchchar(&src, ';'))
+ src = after_spaces(src);
+ }
+ if (src < &f->contents[f->length-1]) {
+ fprint_line(stderr, f, src, NULL, "Invalid BPEG pattern");
+ _exit(1);
+ }
return ret;
}
diff --git a/grammars/bpeg.bpeg b/grammars/bpeg.bpeg
index b1381bc..56a96d0 100644
--- a/grammars/bpeg.bpeg
+++ b/grammars/bpeg.bpeg
@@ -1,66 +1,66 @@
# This is a file defining the BPEG grammar using BPEG syntax
-Grammar = __ 0+Def%(__`;__) 0-1(`;__);
-Def = @[name]Ref __ `= __ @[definition]extended-pat;
+Grammar: __ 0+(Def 0-1(__`;))%__ __
+Def: @[name]id __ `: __ @[definition]extended-pat
# This is used for command line arguments:
-String-pattern = 0+(`\ pat 0-1`; / .);
+String-pattern: 0+(`\ pat 0-1`; / .)
-pat = suffixed-pat / simple-pat;
-simple-pat = Empty / Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No
- / Nodent / Repeat / After / Before / Capture / Replace / Ref / parens;
-suffixed-pat = Eq-pat;
+pat: suffixed-pat / simple-pat
+simple-pat: Empty / Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No
+ / Nodent / Repeat / After / Before / Capture / Replace / Ref / parens
+suffixed-pat: Eq-pat
-Eq-pat = @[first]simple-pat "==" @[second]pat;
+Eq-pat: @[first]simple-pat "==" @[second]pat
-Empty = `/ >(__ (`)/`}));
-Dot = `. !`.;
-String = (
+Empty: `/ >(__ (`)/`}))
+Dot: `. !`.
+String: (
`" @[s]0+(Escape / !`"$.) `"
/ `' @[s]0+(Escape / !`'$.) `'
- );
-Char-range = `` @[low]. `- @[high].;
-Char = `` @[s].;
-Escape-range = `\ @[low]escape-sequence `- @[high]escape-sequence;
-Escape = `\ @[s]escape-sequence;
-escape-sequence = (
+ )
+Char-range: `` @[low]. `- @[high].
+Char: `` @[s].
+Escape-range: `\ @[low]escape-sequence `- @[high]escape-sequence
+Escape: `\ @[s]escape-sequence
+escape-sequence: (
1-3 `0-7
/ `x 2 (`0-9/`a-f/`A-F)
/`a/`b/`e/`n/`r/`t/`v / . / \n
- );
-No = `! _ @pat;
-Nodent = `|;
-Upto-and = 2-3`. 0-1(_@pat);
-Repeat = (
+ )
+No: `! _ @pat
+Nodent: `|
+Upto-and: 2-3`. 0-1(_@pat)
+Repeat: (
@[min]int _ `- _ @[max]int
/ @[min]int _ `+ @[max]''
/ @[min]@[max]int
- ) _ @[repeat-pat]pat 0-1( __ `% __ @[sep]pat);
-After = `< _ pat;
-Before = `> _ pat;
-Capture = `@ 0-1(_ `[ @[capture-name]Ref `]) _ @[capture]pat;
-Replace = `{ __ (
+ ) _ @[repeat-pat]pat 0-1( __ `% __ @[sep]pat)
+After: `< _ pat
+Before: `> _ pat
+Capture: `@ 0-1(_ `[ @[capture-name](...>`]) `]) _ @[capture]pat
+Replace: `{ __ (
0-1(@[replace-pat]extended-pat __) "=>" 0-1(__ @[replacement]String)
- ) __ `};
-Ref = @[name](
- "^^" / "^" / "__" / "_" / "$$" / "$" /
- (`a-z/`A-Z) 0+(`a-z/`A-Z/`0-9/`-));
+ ) __ `}
+Ref: @[name]id !>(__`:)
-parens = `( __ extended-pat __ `);
+parens: `( __ extended-pat __ `)
-Chain = 2+@pat % (__);
-Otherwise = 2+@(Chain/pat) % (__`/__);
-extended-pat = Otherwise / Chain / pat;
+Chain: 2+@pat % (__)
+Otherwise: 2+@(Chain/pat) % (__`/__)
+extended-pat: Otherwise / Chain / pat
# Special-symbol rules:
-_ = 0+(` / \t);
-__ = 0+(` / \t / \r / \n / comment);
-$$ = !$.;
-$ = !.;
-^^ = !<$.;
-^ = !<.;
+_: 0+(` / \t)
+__: 0+(` / \t / \r / \n / comment)
+$$: !$.
+$: !.
+^^: !<$.
+^: !<.
-hash-comment = `# .. $;
+id: "^^" / "^" / "__" / "_" / "$$" / "$" / (`a-z/`A-Z) 0+(`a-z/`A-Z/`0-9/`-)
+
+hash-comment: `# .. $
# Note: comments are undefined by default in regular BPEG
-comment = hash-comment;
+comment: hash-comment
diff --git a/grammars/builtins.bpeg b/grammars/builtins.bpeg
index 7446212..7490079 100644
--- a/grammars/builtins.bpeg
+++ b/grammars/builtins.bpeg
@@ -1,61 +1,61 @@
-yes = '';
-no = !'';
+yes: ''
+no: !''
# Configurable options:
-is-tty = no; # Defined as either always-match or always-fail, depending on stdout
-print-line-numbers = is-tty;
-print-filenames = is-tty;
-highlight = is-tty;
+is-tty: no # Defined as either always-match or always-fail, depending on stdout
+print-line-numbers: is-tty
+print-filenames: is-tty
+highlight: is-tty
# Meta-rules for acting on everything:
-pattern = !''; # Not defined by default
-replacement = !''; # Not defined by default
-replace-all = define-highlights add-filename 1+(...@hl-replacement) ...;
-find-all = define-highlights add-filename 1+find-next%\n 0-1{!<\n => "\n"};
-find-next = matching-line / {..\n =>} find-next;
-only-matches = define-highlights 1+{...@hl-pattern=>'@1\n'};
-matching-line = add-line-number 1+(..hl-pattern) ..$;
-add-filename = 0-1(print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"}));
-add-line-number = 0-1(print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "}));
-hl-pattern = {@[match]pattern define-highlights => "@[hl-start]@[match]@[hl-end]"};
-hl-replacement = {@[match]replacement define-highlights => "@[hl-start]@[match]@[hl-end]" };
-define-highlights = highlight @[hl-start]{=>"\033[31;1m"} @[hl-end]{=>"\033[0m"} / @[hl-start]"" @[hl-end]"";
+pattern: !'' # Not defined by default
+replacement: !'' # Not defined by default
+replace-all: define-highlights add-filename 1+(...@hl-replacement) ...
+find-all: define-highlights add-filename 1+find-next%\n 0-1{!<\n => "\n"}
+find-next: matching-line / {..\n =>} find-next
+only-matches: define-highlights 1+{...@hl-pattern=>'@1\n'}
+matching-line: add-line-number 1+(..hl-pattern) ..$
+add-filename: 0-1(print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"}))
+add-line-number: 0-1(print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "}))
+hl-pattern: {@[match]pattern define-highlights => "@[hl-start]@[match]@[hl-end]"}
+hl-replacement: {@[match]replacement define-highlights => "@[hl-start]@[match]@[hl-end]" }
+define-highlights: highlight @[hl-start]{=>"\033[31;1m"} @[hl-end]{=>"\033[0m"} / @[hl-start]"" @[hl-end]""
# Helper definitions (commonly used)
-indent = \n|1+(\t/' ');
-dedent = $ !(\n|);
-indented-block = |` ..$ 0+(\n|..$);
-crlf = \r\n;
-cr = \r; r = \r;
-anglebraces = `< 0+(anglebraces / !`>.) `>;
-brackets = `[ 0+(brackets / !`].) `];
-braces = `{ 0+(braces / !`}.) `};
-parens = `( 0+(parens / !`).) `);
-id = !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) 0+(`a-z/`A-Z/`_/`0-9);
-id-char = `a-z/`A-Z/`_/`0-9;
-word = !<(`a-z/`A-Z/`_/`0-9) 1+(`a-z/`A-Z) !>(`0-9/`_);
-HEX = `0-9/`A-F;
-Hex = `0-9/`a-f/`A-F;
-hex = `0-9/`a-f;
-number = 1+`0-9 0-1(`. 0+`0-9) / `. 1+`0-9;
-int = 1+`0-9;
-digit = `0-9;
-Abc = `a-z/`A-Z;
-ABC = `A-Z;
-abc = `a-z;
-esc = \e; e = \e;
-tab = \t; t = \t;
-nl = \n; lf = \n; n = \n;
-c-block-comment = '/*' ... '*/';
-c-line-comment = '//' ..$;
-c-comment = c-line-comment / c-block-comment;
-hash-comment = `# ..$;
-comment = !''; # No default definition, can be overridden
-WS = ` /\t/\n/\r/comment;
-ws = ` /\t;
-$$ = !$.;
-$ = !.;
-^^ = !<$.;
-^ = !<.;
-__ = 0+(` /\t/\n/\r/comment);
-_ = 0+(` /\t);
+indent: \n|1+(\t/' ')
+dedent: $ !(\n|)
+indented-block: |` ..$ 0+(\n|..$)
+crlf: \r\n
+cr: \r; r: \r
+anglebraces: `< 0+(anglebraces / !`>.) `>
+brackets: `[ 0+(brackets / !`].) `]
+braces: `{ 0+(braces / !`}.) `}
+parens: `( 0+(parens / !`).) `)
+id: !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) 0+(`a-z/`A-Z/`_/`0-9)
+id-char: `a-z/`A-Z/`_/`0-9
+word: !<(`a-z/`A-Z/`_/`0-9) 1+(`a-z/`A-Z) !>(`0-9/`_)
+HEX: `0-9/`A-F
+Hex: `0-9/`a-f/`A-F
+hex: `0-9/`a-f
+number: 1+`0-9 0-1(`. 0+`0-9) / `. 1+`0-9
+int: 1+`0-9
+digit: `0-9
+Abc: `a-z/`A-Z
+ABC: `A-Z
+abc: `a-z
+esc: \e; e: \e
+tab: \t; t: \t
+nl: \n; lf: \n; n: \n
+c-block-comment: '/*' ... '*/'
+c-line-comment: '//' ..$
+c-comment: c-line-comment / c-block-comment
+hash-comment: `# ..$
+comment: !''; # No default definition, can be overridden
+WS: ` /\t/\n/\r/comment
+ws: ` /\t
+$$: !$.
+$: !.
+^^: !<$.
+^: !<.
+__: 0+(` /\t/\n/\r/comment)
+_: 0+(` /\t)
diff --git a/grammars/html.bpeg b/grammars/html.bpeg
index 3a2d9f2..908fa69 100644
--- a/grammars/html.bpeg
+++ b/grammars/html.bpeg
@@ -1,26 +1,26 @@
# HTML grammar
-HTML = __ 0-1(doctype __) 0+html-element%__ __;
+HTML: __ 0-1(doctype __) 0+html-element%__ __
-doctype = "<!DOCTYPE" ..`>;
+doctype: "<!DOCTYPE" ..`>
-html-element = (
+html-element: (
>(`<("area"/"base"/"br"/"col"/"embed"/"hr"/"img"/"input"/"link"/"meta"/"param"/"source"/"track"/"wbr")) void-element
/ >(`<("script"/"style"/"textarea"/"title")) raw-element
/ >(`<("template")) template-element
- / normal-element);
+ / normal-element)
-void-element = `< @[tag](id==match-tag) __attributes__ 0-1`/ __ `>;
+void-element: `< @[tag](id==match-tag) __attributes__ 0-1`/ __ `>
-template-element = `< @[tag](id==match-tag) __`> __ >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>);
+template-element: `< @[tag](id==match-tag) __`> __ >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>)
-raw-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body].. ("</"tag__`>);
+raw-element: `< @[tag](id==match-tag) __attributes__ `> >match-body @[body].. ("</"tag__`>)
-normal-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>;
+normal-element: `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>
-comment = "<!--" ..."-->";
+comment: "<!--" ..."-->"
-attributes = 0+attribute%__;
-attribute = (1+id%`:)__`=__ (id / `" ..`" / `' ..`');
-attribute = (1+id%`:)__`=__ (id / `" ..`" / `' ..`');
-match-tag = id;
-match-body = '';
+attributes: 0+attribute%__
+attribute: (1+id%`:)__`=__ (id / `" ..`" / `' ..`')
+attribute: (1+id%`:)__`=__ (id / `" ..`" / `' ..`')
+match-tag: id
+match-body: ''