diff options
| -rw-r--r-- | compiler.c | 36 | ||||
| -rw-r--r-- | grammars/bpeg.bpeg (renamed from bpeg.bpeg) | 32 | ||||
| -rw-r--r-- | grammars/builtins.bpeg | 32 | ||||
| -rw-r--r-- | grammars/html.bpeg | 12 | ||||
| -rw-r--r-- | types.h | 2 | ||||
| -rw-r--r-- | vm.c | 11 |
6 files changed, 50 insertions, 75 deletions
@@ -101,10 +101,11 @@ vm_op_t *bpeg_simplepattern(const char *str) if (matchchar(&str, '.')) // "..." op->multiline = 1; vm_op_t *till = bpeg_simplepattern(str); - // Don't advance str, the following pattern will be re-matched. - op->op = VM_UPTO; + op->op = VM_UPTO_AND; op->len = -1; op->args.pat = till; + if (till) + str = till->end; break; } else { anychar: @@ -205,38 +206,15 @@ vm_op_t *bpeg_simplepattern(const char *str) check(pat, "Expected pattern after repetition count"); str = pat->end; str = after_spaces(str); + vm_op_t *sep = NULL; if (matchchar(&str, '%')) { - vm_op_t *sep = bpeg_simplepattern(str); + sep = bpeg_simplepattern(str); check(sep, "Expected pattern for separator after '%%'"); str = sep->end; - set_range(op, min, max, pat, sep); } else { str = pat->end; - set_range(op, min, max, pat, NULL); - } - break; - } - // Special repetitions: - case '+': case '*': case '?': { - ssize_t min = -1, max = -1; - switch (c) { - case '+': min = 1, max = -1; break; - case '*': min = 0, max = -1; break; - case '?': min = 0, max = 1; break; - } - vm_op_t *pat = bpeg_simplepattern(str); - check(pat, "Expected pattern after '%c'", c); - str = pat->end; - str = after_spaces(str); - if (matchchar(&str, '%')) { - vm_op_t *sep = bpeg_simplepattern(str); - check(sep, "Expected pattern for separator after '%%'"); - str = sep->end; - set_range(op, min, max, pat, sep); - } else { - str = pat->end; - set_range(op, min, max, pat, NULL); } + set_range(op, min, max, pat, sep); break; } // Lookbehind @@ -269,7 +247,7 @@ vm_op_t *bpeg_simplepattern(const char *str) op = expand_choices(op); str = op->end; str = after_spaces(str); - check(matchchar(&str, ')'), "Expected closing parenthesis"); + check(matchchar(&str, ')'), "Expected closing ')' instead of \"%s\"", str); break; } // Capture diff --git a/bpeg.bpeg b/grammars/bpeg.bpeg index de0c676..dd3a3b4 100644 --- a/bpeg.bpeg +++ b/grammars/bpeg.bpeg @@ -1,13 +1,13 @@ # This is a file defining the BPEG grammar using BPEG syntax -Grammar = __ *Def%(__`;__) ?(`;__); +Grammar = __ 0+Def%(__`;__) 0-1(`;__); Def = @[name]Ref __ `= __ @[definition]extended-pat; # This is used for command line arguments: -String-pattern = *(`\ pat ?`; / .); +String-pattern = 0+(`\ pat 0-1`; / .); pat = suffixed-pat / simple-pat; -simple-pat = Empty / Upto / Dot / String / Char-range / Char / Escape-range / Escape / No +simple-pat = Empty / Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No / Nodent / Repeat / After / Before / Capture / Replace / Ref / parens; suffixed-pat = Eq-pat; @@ -16,8 +16,8 @@ Eq-pat = @[first]simple-pat "==" @[second]pat; Empty = `/ >(__ (`)/`})); Dot = `. !`.; String = ( - `" @[s]*(Escape / !`"$.) `" - / `' @[s]*(Escape / !`'$.) `' + `" @[s]0+(Escape / !`"$.) `" + / `' @[s]0+(Escape / !`'$.) `' ); Char-range = `` @[low]. `- @[high].; Char = `` @[s].; @@ -30,35 +30,31 @@ escape-sequence = ( ); No = `! _ @pat; Nodent = `|; -Upto = 2-3`. ?>(_@pat); +Upto-and = 2-3`. 0-1(_@pat); Repeat = ( @[min]int _ `- _ @[max]int - /{@[min]{=>"0"}=>} @[max]int _ `- / @[min]int _ `+ @[max](/) / @[min]@[max]int - /{@[min]{=>"1"}=>} `+ @[max](/) - /{@[min]{=>"0"}=>} `* @[max](/) - /{@[min]{=>"0"}=>} `? {@[max]{=>"1"}=>} - ) _ @[repeat-pat]pat ?( __ `% __ @[sep]pat); + ) _ @[repeat-pat]pat 0-1( __ `% __ @[sep]pat); After = `< _ pat; Before = `> _ pat; -Capture = `@ ?(_ `[ @[capture-name]Ref `]) _ @[capture]pat; +Capture = `@ 0-1(_ `[ @[capture-name]Ref `]) _ @[capture]pat; Replace = `{ __ ( - ?(@[replace-pat]extended-pat __) "=>" ?(__ @[replacement]String) + 0-1(@[replace-pat]extended-pat __) "=>" 0-1(__ @[replacement]String) ) __ `}; Ref = @[name]( "^^" / "^" / "__" / "_" / "$$" / "$" / - (`a-z/`A-Z) *(`a-z/`A-Z/`0-9/`-)); + (`a-z/`A-Z) 0+(`a-z/`A-Z/`0-9/`-)); parens = `( __ extended-pat __ `); -Chain = +@pat % (__); -Otherwise = +@(Chain/pat) % (__`/__); +Chain = 2+@pat % (__); +Otherwise = 2+@(Chain/pat) % (__`/__); extended-pat = Otherwise / Chain / pat; # Special-symbol rules: -_ = *(` / \t); -__ = *(` / \t / \r / \n / comment); +_ = 0+(` / \t); +__ = 0+(` / \t / \r / \n / comment); $$ = !$.; $ = !.; ^^ = !<$.; diff --git a/grammars/builtins.bpeg b/grammars/builtins.bpeg index b90ac12..6ba31df 100644 --- a/grammars/builtins.bpeg +++ b/grammars/builtins.bpeg @@ -1,29 +1,29 @@ # Meta-rules for acting on everything pattern = !(/); # Not defined by default replacement = !(/); # Not defined by default -replace-all = +(...@replacement) ...; -find-all = +find-next%\n ?{!<\n => "\n"}; +replace-all = 1+(...@replacement) ...; +find-all = 1+find-next%\n 0-1{!<\n => "\n"}; find-next = matching-line / {..\n =>} find-next; -only-matches = +{...@pattern=>'@1\n'}; -matching-line = +(..@pattern) ..$; +only-matches = 1+{...@pattern=>'@1\n'}; +matching-line = 1+(..@pattern) ..$; # Helper definitions (commonly used) -indent = \n|+(\t/' '); +indent = \n|1+(\t/' '); dedent = $ !(\n|); -indented-block = |` ..$ *(\n|..$); +indented-block = |` ..$ 0+(\n|..$); crlf = \r\n; cr = \r; r = \r; -anglebraces = `< *(anglebraces / !`>.) `>; -brackets = `[ *(brackets / !`].) `]; -braces = `{ *(braces / !`}.) `}; -parens = `( *(parens / !`).) `); -id = !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) *(`a-z/`A-Z/`_/`0-9); -word = !<(`a-z/`A-Z/`_/`0-9) +(`a-z/`A-Z) !>(`0-9/`_); +anglebraces = `< 0+(anglebraces / !`>.) `>; +brackets = `[ 0+(brackets / !`].) `]; +braces = `{ 0+(braces / !`}.) `}; +parens = `( 0+(parens / !`).) `); +id = !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) 0+(`a-z/`A-Z/`_/`0-9); +word = !<(`a-z/`A-Z/`_/`0-9) 1+(`a-z/`A-Z) !>(`0-9/`_); HEX = `0-9/`A-F; Hex = `0-9/`a-f/`A-F; hex = `0-9/`a-f; -number = +`0-9 ?(`. *`0-9) / `. +`0-9; -int = +`0-9; +number = 1+`0-9 0-1(`. 0+`0-9) / `. 1+`0-9; +int = 1+`0-9; digit = `0-9; Abc = `a-z/`A-Z; ABC = `A-Z; @@ -42,5 +42,5 @@ $$ = !$.; $ = !.; ^^ = !<$.; ^ = !<.; -__ = *(` /\t/\n/\r/comment); -_ = *(` /\t); +__ = 0+(` /\t/\n/\r/comment); +_ = 0+(` /\t); diff --git a/grammars/html.bpeg b/grammars/html.bpeg index 7af1f63..451e61c 100644 --- a/grammars/html.bpeg +++ b/grammars/html.bpeg @@ -1,5 +1,5 @@ # HTML grammar -HTML = __ ?(doctype __) *html-element%__ __; +HTML = __ 0-1(doctype __) 0+html-element%__ __; doctype = "<!DOCTYPE" ..`>; @@ -9,19 +9,19 @@ html-element = ( / >(`<("template")) template-element / normal-element); -void-element = `< @[tag](id==match-tag) __attributes__ ?`/ __ `>; +void-element = `< @[tag](id==match-tag) __attributes__ 0-1`/ __ `>; template-element = `< @[tag](id==match-tag) __`> __ >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>); raw-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body].. ("</"tag__`>); -normal-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]*(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>; +normal-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>; comment = "<!--" ..."-->"; -attributes = *(!(attribute==match-attribute))%__ __(attribute==match-attribute)__ *attribute%__; -attribute = (+id%`:)__`=__ (id / `" ..`" / `' ..`'); -attribute = (+id%`:)__`=__ (id / `" ..`" / `' ..`'); +attributes = 0+(!(attribute==match-attribute))%__ __(attribute==match-attribute)__ 0+attribute%__; +attribute = (1+id%`:)__`=__ (id / `" ..`" / `' ..`'); +attribute = (1+id%`:)__`=__ (id / `" ..`" / `' ..`'); match-attribute = attribute; match-tag = id; match-body = (/); @@ -20,7 +20,7 @@ enum VMOpcode { VM_STRING, VM_RANGE, VM_NOT, - VM_UPTO, + VM_UPTO_AND, VM_REPEAT, VM_BEFORE, VM_AFTER, @@ -19,7 +19,7 @@ static const char *opcode_names[] = { [VM_STRING] = "STRING", [VM_RANGE] = "RANGE", [VM_NOT] = "NOT", - [VM_UPTO] = "UPTO", + [VM_UPTO_AND] = "UPTO_AND", [VM_REPEAT] = "REPEAT", [VM_BEFORE] = "BEFORE", [VM_AFTER] = "AFTER", @@ -127,7 +127,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int m->end = str; return m; } - case VM_UPTO: { + case VM_UPTO_AND: { match_t *m = calloc(sizeof(match_t), 1); m->start = str; m->op = op; @@ -136,7 +136,8 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int prev = str; match_t *p = _match(g, str, op->args.pat, flags, rec); if (p) { - destroy_match(&p); + m->child = p; + str = p->end; break; } // This isn't in the for() structure because there needs to @@ -410,8 +411,8 @@ void print_pattern(vm_op_t *op) fprintf(stderr, ")"); break; } - case VM_UPTO: { - fprintf(stderr, "text up to ("); + case VM_UPTO_AND: { + fprintf(stderr, "text up to and including ("); print_pattern(op->args.pat); fprintf(stderr, ")"); break; |
