diff options
| -rw-r--r-- | compiler.c | 12 | ||||
| -rw-r--r-- | file_loader.c | 2 | ||||
| -rw-r--r-- | grammar.c | 20 | ||||
| -rw-r--r-- | grammars/bpeg.bpeg | 86 | ||||
| -rw-r--r-- | grammars/builtins.bpeg | 110 | ||||
| -rw-r--r-- | grammars/html.bpeg | 28 |
6 files changed, 137 insertions, 121 deletions
@@ -86,9 +86,8 @@ static vm_op_t *chain_together(vm_op_t *first, vm_op_t *second) */ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) { - if (!*str) return NULL; str = after_spaces(str); - check(*str, "Expected a pattern"); + if (!*str) return NULL; vm_op_t *op = calloc(sizeof(vm_op_t), 1); op->start = str; op->len = -1; @@ -320,6 +319,11 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) } else { op->args.s = strndup(&c, 1); } + if (*after_spaces(str) == ':') { + free((char*)op->args.s); + free(op); + return NULL; + } op->op = VM_REF; break; } @@ -333,6 +337,10 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) --str; const char *refname = str; str = after_name(str); + if (*after_spaces(str) == ':') { + free(op); + return NULL; + } op->op = VM_REF; op->args.s = strndup(refname, (size_t)(str - refname)); break; diff --git a/file_loader.c b/file_loader.c index 33339b0..1bc3d20 100644 --- a/file_loader.c +++ b/file_loader.c @@ -77,7 +77,7 @@ size_t get_line_number(file_t *f, const char *p) if (f->lines[n] > p) return n; } - return 0; + return f->nlines; } size_t get_char_number(file_t *f, const char *p) @@ -33,25 +33,33 @@ void add_def(grammar_t *g, file_t *f, const char *src, const char *name, vm_op_t */ vm_op_t *load_grammar(grammar_t *g, file_t *f) { + check(f, "Null file provided"); vm_op_t *ret = NULL; const char *src = f->contents; - do { - src = after_spaces(src); - if (!*src) break; + src = after_spaces(src); + while (*src) { const char *name = src; const char *name_end = after_name(name); check(name_end > name, "Invalid name for definition"); name = strndup(name, (size_t)(name_end-name)); src = after_spaces(name_end); - check(matchchar(&src, '='), "Expected '=' in definition"); + check(matchchar(&src, ':'), "Expected ':' in definition"); vm_op_t *op = bpeg_pattern(f, src); - check(op, "Couldn't load definition"); + if (op == NULL) break; + //check(op, "Couldn't load definition"); add_def(g, f, src, name, op); if (ret == NULL) { ret = op; } src = op->end; - } while (*src && matchchar(&src, ';')); + src = after_spaces(src); + if (*src && matchchar(&src, ';')) + src = after_spaces(src); + } + if (src < &f->contents[f->length-1]) { + fprint_line(stderr, f, src, NULL, "Invalid BPEG pattern"); + _exit(1); + } return ret; } diff --git a/grammars/bpeg.bpeg b/grammars/bpeg.bpeg index b1381bc..56a96d0 100644 --- a/grammars/bpeg.bpeg +++ b/grammars/bpeg.bpeg @@ -1,66 +1,66 @@ # This is a file defining the BPEG grammar using BPEG syntax -Grammar = __ 0+Def%(__`;__) 0-1(`;__); -Def = @[name]Ref __ `= __ @[definition]extended-pat; +Grammar: __ 0+(Def 0-1(__`;))%__ __ +Def: @[name]id __ `: __ @[definition]extended-pat # This is used for command line arguments: -String-pattern = 0+(`\ pat 0-1`; / .); +String-pattern: 0+(`\ pat 0-1`; / .) -pat = suffixed-pat / simple-pat; -simple-pat = Empty / Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No - / Nodent / Repeat / After / Before / Capture / Replace / Ref / parens; -suffixed-pat = Eq-pat; +pat: suffixed-pat / simple-pat +simple-pat: Empty / Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No + / Nodent / Repeat / After / Before / Capture / Replace / Ref / parens +suffixed-pat: Eq-pat -Eq-pat = @[first]simple-pat "==" @[second]pat; +Eq-pat: @[first]simple-pat "==" @[second]pat -Empty = `/ >(__ (`)/`})); -Dot = `. !`.; -String = ( +Empty: `/ >(__ (`)/`})) +Dot: `. !`. +String: ( `" @[s]0+(Escape / !`"$.) `" / `' @[s]0+(Escape / !`'$.) `' - ); -Char-range = `` @[low]. `- @[high].; -Char = `` @[s].; -Escape-range = `\ @[low]escape-sequence `- @[high]escape-sequence; -Escape = `\ @[s]escape-sequence; -escape-sequence = ( + ) +Char-range: `` @[low]. `- @[high]. +Char: `` @[s]. +Escape-range: `\ @[low]escape-sequence `- @[high]escape-sequence +Escape: `\ @[s]escape-sequence +escape-sequence: ( 1-3 `0-7 / `x 2 (`0-9/`a-f/`A-F) /`a/`b/`e/`n/`r/`t/`v / . / \n - ); -No = `! _ @pat; -Nodent = `|; -Upto-and = 2-3`. 0-1(_@pat); -Repeat = ( + ) +No: `! _ @pat +Nodent: `| +Upto-and: 2-3`. 0-1(_@pat) +Repeat: ( @[min]int _ `- _ @[max]int / @[min]int _ `+ @[max]'' / @[min]@[max]int - ) _ @[repeat-pat]pat 0-1( __ `% __ @[sep]pat); -After = `< _ pat; -Before = `> _ pat; -Capture = `@ 0-1(_ `[ @[capture-name]Ref `]) _ @[capture]pat; -Replace = `{ __ ( + ) _ @[repeat-pat]pat 0-1( __ `% __ @[sep]pat) +After: `< _ pat +Before: `> _ pat +Capture: `@ 0-1(_ `[ @[capture-name](...>`]) `]) _ @[capture]pat +Replace: `{ __ ( 0-1(@[replace-pat]extended-pat __) "=>" 0-1(__ @[replacement]String) - ) __ `}; -Ref = @[name]( - "^^" / "^" / "__" / "_" / "$$" / "$" / - (`a-z/`A-Z) 0+(`a-z/`A-Z/`0-9/`-)); + ) __ `} +Ref: @[name]id !>(__`:) -parens = `( __ extended-pat __ `); +parens: `( __ extended-pat __ `) -Chain = 2+@pat % (__); -Otherwise = 2+@(Chain/pat) % (__`/__); -extended-pat = Otherwise / Chain / pat; +Chain: 2+@pat % (__) +Otherwise: 2+@(Chain/pat) % (__`/__) +extended-pat: Otherwise / Chain / pat # Special-symbol rules: -_ = 0+(` / \t); -__ = 0+(` / \t / \r / \n / comment); -$$ = !$.; -$ = !.; -^^ = !<$.; -^ = !<.; +_: 0+(` / \t) +__: 0+(` / \t / \r / \n / comment) +$$: !$. +$: !. +^^: !<$. +^: !<. -hash-comment = `# .. $; +id: "^^" / "^" / "__" / "_" / "$$" / "$" / (`a-z/`A-Z) 0+(`a-z/`A-Z/`0-9/`-) + +hash-comment: `# .. $ # Note: comments are undefined by default in regular BPEG -comment = hash-comment; +comment: hash-comment diff --git a/grammars/builtins.bpeg b/grammars/builtins.bpeg index 7446212..7490079 100644 --- a/grammars/builtins.bpeg +++ b/grammars/builtins.bpeg @@ -1,61 +1,61 @@ -yes = ''; -no = !''; +yes: '' +no: !'' # Configurable options: -is-tty = no; # Defined as either always-match or always-fail, depending on stdout -print-line-numbers = is-tty; -print-filenames = is-tty; -highlight = is-tty; +is-tty: no # Defined as either always-match or always-fail, depending on stdout +print-line-numbers: is-tty +print-filenames: is-tty +highlight: is-tty # Meta-rules for acting on everything: -pattern = !''; # Not defined by default -replacement = !''; # Not defined by default -replace-all = define-highlights add-filename 1+(...@hl-replacement) ...; -find-all = define-highlights add-filename 1+find-next%\n 0-1{!<\n => "\n"}; -find-next = matching-line / {..\n =>} find-next; -only-matches = define-highlights 1+{...@hl-pattern=>'@1\n'}; -matching-line = add-line-number 1+(..hl-pattern) ..$; -add-filename = 0-1(print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"})); -add-line-number = 0-1(print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "})); -hl-pattern = {@[match]pattern define-highlights => "@[hl-start]@[match]@[hl-end]"}; -hl-replacement = {@[match]replacement define-highlights => "@[hl-start]@[match]@[hl-end]" }; -define-highlights = highlight @[hl-start]{=>"\033[31;1m"} @[hl-end]{=>"\033[0m"} / @[hl-start]"" @[hl-end]""; +pattern: !'' # Not defined by default +replacement: !'' # Not defined by default +replace-all: define-highlights add-filename 1+(...@hl-replacement) ... +find-all: define-highlights add-filename 1+find-next%\n 0-1{!<\n => "\n"} +find-next: matching-line / {..\n =>} find-next +only-matches: define-highlights 1+{...@hl-pattern=>'@1\n'} +matching-line: add-line-number 1+(..hl-pattern) ..$ +add-filename: 0-1(print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"})) +add-line-number: 0-1(print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "})) +hl-pattern: {@[match]pattern define-highlights => "@[hl-start]@[match]@[hl-end]"} +hl-replacement: {@[match]replacement define-highlights => "@[hl-start]@[match]@[hl-end]" } +define-highlights: highlight @[hl-start]{=>"\033[31;1m"} @[hl-end]{=>"\033[0m"} / @[hl-start]"" @[hl-end]"" # Helper definitions (commonly used) -indent = \n|1+(\t/' '); -dedent = $ !(\n|); -indented-block = |` ..$ 0+(\n|..$); -crlf = \r\n; -cr = \r; r = \r; -anglebraces = `< 0+(anglebraces / !`>.) `>; -brackets = `[ 0+(brackets / !`].) `]; -braces = `{ 0+(braces / !`}.) `}; -parens = `( 0+(parens / !`).) `); -id = !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) 0+(`a-z/`A-Z/`_/`0-9); -id-char = `a-z/`A-Z/`_/`0-9; -word = !<(`a-z/`A-Z/`_/`0-9) 1+(`a-z/`A-Z) !>(`0-9/`_); -HEX = `0-9/`A-F; -Hex = `0-9/`a-f/`A-F; -hex = `0-9/`a-f; -number = 1+`0-9 0-1(`. 0+`0-9) / `. 1+`0-9; -int = 1+`0-9; -digit = `0-9; -Abc = `a-z/`A-Z; -ABC = `A-Z; -abc = `a-z; -esc = \e; e = \e; -tab = \t; t = \t; -nl = \n; lf = \n; n = \n; -c-block-comment = '/*' ... '*/'; -c-line-comment = '//' ..$; -c-comment = c-line-comment / c-block-comment; -hash-comment = `# ..$; -comment = !''; # No default definition, can be overridden -WS = ` /\t/\n/\r/comment; -ws = ` /\t; -$$ = !$.; -$ = !.; -^^ = !<$.; -^ = !<.; -__ = 0+(` /\t/\n/\r/comment); -_ = 0+(` /\t); +indent: \n|1+(\t/' ') +dedent: $ !(\n|) +indented-block: |` ..$ 0+(\n|..$) +crlf: \r\n +cr: \r; r: \r +anglebraces: `< 0+(anglebraces / !`>.) `> +brackets: `[ 0+(brackets / !`].) `] +braces: `{ 0+(braces / !`}.) `} +parens: `( 0+(parens / !`).) `) +id: !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) 0+(`a-z/`A-Z/`_/`0-9) +id-char: `a-z/`A-Z/`_/`0-9 +word: !<(`a-z/`A-Z/`_/`0-9) 1+(`a-z/`A-Z) !>(`0-9/`_) +HEX: `0-9/`A-F +Hex: `0-9/`a-f/`A-F +hex: `0-9/`a-f +number: 1+`0-9 0-1(`. 0+`0-9) / `. 1+`0-9 +int: 1+`0-9 +digit: `0-9 +Abc: `a-z/`A-Z +ABC: `A-Z +abc: `a-z +esc: \e; e: \e +tab: \t; t: \t +nl: \n; lf: \n; n: \n +c-block-comment: '/*' ... '*/' +c-line-comment: '//' ..$ +c-comment: c-line-comment / c-block-comment +hash-comment: `# ..$ +comment: !''; # No default definition, can be overridden +WS: ` /\t/\n/\r/comment +ws: ` /\t +$$: !$. +$: !. +^^: !<$. +^: !<. +__: 0+(` /\t/\n/\r/comment) +_: 0+(` /\t) diff --git a/grammars/html.bpeg b/grammars/html.bpeg index 3a2d9f2..908fa69 100644 --- a/grammars/html.bpeg +++ b/grammars/html.bpeg @@ -1,26 +1,26 @@ # HTML grammar -HTML = __ 0-1(doctype __) 0+html-element%__ __; +HTML: __ 0-1(doctype __) 0+html-element%__ __ -doctype = "<!DOCTYPE" ..`>; +doctype: "<!DOCTYPE" ..`> -html-element = ( +html-element: ( >(`<("area"/"base"/"br"/"col"/"embed"/"hr"/"img"/"input"/"link"/"meta"/"param"/"source"/"track"/"wbr")) void-element / >(`<("script"/"style"/"textarea"/"title")) raw-element / >(`<("template")) template-element - / normal-element); + / normal-element) -void-element = `< @[tag](id==match-tag) __attributes__ 0-1`/ __ `>; +void-element: `< @[tag](id==match-tag) __attributes__ 0-1`/ __ `> -template-element = `< @[tag](id==match-tag) __`> __ >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>); +template-element: `< @[tag](id==match-tag) __`> __ >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>) -raw-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body].. ("</"tag__`>); +raw-element: `< @[tag](id==match-tag) __attributes__ `> >match-body @[body].. ("</"tag__`>) -normal-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>; +normal-element: `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`> -comment = "<!--" ..."-->"; +comment: "<!--" ..."-->" -attributes = 0+attribute%__; -attribute = (1+id%`:)__`=__ (id / `" ..`" / `' ..`'); -attribute = (1+id%`:)__`=__ (id / `" ..`" / `' ..`'); -match-tag = id; -match-body = ''; +attributes: 0+attribute%__ +attribute: (1+id%`:)__`=__ (id / `" ..`" / `' ..`') +attribute: (1+id%`:)__`=__ (id / `" ..`" / `' ..`') +match-tag: id +match-body: '' |
