diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2020-09-10 22:42:47 -0700 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2020-09-10 22:42:47 -0700 |
| commit | b93d8979bd9ea4148ea7e1d08d00ca846e151cf1 (patch) | |
| tree | 5762fbe37037115cdcee606b110000ce16232fcc | |
| parent | 95ec009a7c6cfb2ae43e63e25744ff5f76b88bf5 (diff) | |
Misc. tweaks and visualization changes, capitalized some rules
| -rw-r--r-- | bpeg.bpeg | 78 | ||||
| -rw-r--r-- | bpeg.c | 39 | ||||
| -rw-r--r-- | bpeg.h | 21 |
3 files changed, 87 insertions, 51 deletions
@@ -1,60 +1,66 @@ # This is a file defining the BPEG grammar using BPEG syntax -grammar; -grammar = __ @[main-pattern]extended-pat __ *((__`;__) def) ?(`;__); -def = @[name]ref __ `= __ @[definition]extended-pat; +Grammar; +Grammar = __ @[main-pattern]extended-pat __ *((__`;__) Def) ?(`;__); +Def = @[name]Ref __ `= __ @[definition]extended-pat; # This is used for command line arguments: -string-grammar = *(`\ pat ?`; / .); +String-grammar = *(`\ pat ?`; / .); -pat = empty / dot / string / char-range / char / escape-range / escape / no / anything-but - / upto-and / repeat / after / before / capture / replace / ref / parens; +pat = Empty / Dot / String / Char-range / Char / Escape-range / Escape / No / Anything-but + / Upto-and / Repeat / After / Before / Capture / Replace / Ref / parens; -empty = `/ >(__ (`)/`})); -dot = `.; -string = ( - `" @[s]*(escape / ~`") `" - / `' @[s]*(escape / ~`') `' +Empty = `/ >(__ (`)/`})); +Dot = `.; +String = ( + `" @[s]*(Escape / ~`") `" + / `' @[s]*(Escape / ~`') `' ); -char-range = `` @[low]. `- @[high].; -char = `` @[s].; -escape-range = `\ @[low]escape-sequence `- @[high]escape-sequence; -escape = `\ @[s]escape-sequence; +Char-range = `` @[low]. `- @[high].; +Char = `` @[s].; +Escape-range = `\ @[low]escape-sequence `- @[high]escape-sequence; +Escape = `\ @[s]escape-sequence; escape-sequence = ( 1-3 `0-7 / `x 2 (`0-9/`a-f/`A-F) /`a/`b/`e/`n/`r/`t/`v / . / \n ); -no = `! _ @pat; -anything-but = `~ ?`~ _ @pat; -upto-and = `& ?`& _ @pat; -repeat = ( - @[min]int _ `- _ @[max]int - / @[min]{=>"0"} @[max]int _ `- - / @[min]int _ `+ @[max](/) - / @[min]@[max]int - / `+ @[min]{=>"1"} @[max](/) - / `* @[min]{=>"0"} @[max](/) - / `? @[min]{=>"0"} @[max]{=>"1"} +No = `! _ @pat; +Anything-but = `~ ?`~ _ @pat; +Upto-and = `& ?`& _ @pat; +Repeat = ( + @[min]int _ `- _ @[max]int + /{@[min]{=>"0"}=>} @[max]int _ `- + / @[min]int _ `+ @[max](/) + / @[min]@[max]int + /{@[min]{=>"1"}=>} `+ @[max](/) + /{@[min]{=>"0"}=>} `* @[max](/) + /{@[min]{=>"0"}=>} `? {@[max]{=>"1"}=>} ) _ @[repeat-pat]pat ?( __ `% __ @[sep]pat); -after = `< _ pat; -before = `> _ pat; -capture = `@ ?(_ `[ @[capture-name]ref `]) _ @[capture]pat; -replace = `{ __ ( - ?(@[replace-pat]extended-pat __) "=>" ?(__ @[replacement]string) +After = `< _ pat; +Before = `> _ pat; +Capture = `@ ?(_ `[ @[capture-name]Ref `]) _ @[capture]pat; +Replace = `{ __ ( + ?(@[replace-pat]extended-pat __) "=>" ?(__ @[replacement]String) ) __ `}; -ref = @[name]( +Ref = @[name]( "^^" / "^" / "__" / "_" / "$$" / "$" / (`a-z/`A-Z) *(`a-z/`A-Z/`0-9/`-)); parens = `( __ extended-pat __ `); -chain = +@pat % (__); -otherwise = +@(chain/pat) % (__`/__); -extended-pat = otherwise / chain / pat; +Chain = +@pat % (__); +Otherwise = +@(Chain/pat) % (__`/__); +extended-pat = Otherwise / Chain / pat; -_ = *(` / \t); +# Special-symbol rules: +_ = *(` / \t); __ = *(` / \t / \r / \n / comment); +$$ = !$.; +$ = !.; +^^ = !<$.; +^ = !<.; + hash-comment = `# *.; # Note: comments are undefined by default in regular BPEG @@ -52,10 +52,11 @@ static match_t *free_match(match_t *m) */ static match_t *match(const char *str, vm_op_t *op) { - tailcall: + //tailcall: switch (op->op) { case VM_EMPTY: { match_t *m = calloc(sizeof(match_t), 1); + m->op = op; m->start = str; m->end = str; return m; @@ -64,6 +65,7 @@ static match_t *match(const char *str, vm_op_t *op) if (!*str || (!op->multiline && *str == '\n')) return NULL; match_t *m = calloc(sizeof(match_t), 1); + m->op = op; m->start = str; m->end = str+1; return m; @@ -72,6 +74,7 @@ static match_t *match(const char *str, vm_op_t *op) if (strncmp(str, op->args.s, op->len) != 0) return NULL; match_t *m = calloc(sizeof(match_t), 1); + m->op = op; m->start = str; m->end = str + op->len; return m; @@ -80,6 +83,7 @@ static match_t *match(const char *str, vm_op_t *op) if (*str < op->args.range.low || *str > op->args.range.high) return NULL; match_t *m = calloc(sizeof(match_t), 1); + m->op = op; m->start = str; m->end = str + 1; return m; @@ -94,6 +98,7 @@ static match_t *match(const char *str, vm_op_t *op) return NULL; } m = calloc(sizeof(match_t), 1); + m->op = op; m->start = str; if (op->op == VM_ANYTHING_BUT) ++str; m->end = str; @@ -102,6 +107,7 @@ static match_t *match(const char *str, vm_op_t *op) case VM_UPTO_AND: { match_t *m = calloc(sizeof(match_t), 1); m->start = str; + m->op = op; match_t *p = NULL; for (const char *prev = NULL; p == NULL && prev < str; ) { prev = str; @@ -121,6 +127,7 @@ static match_t *match(const char *str, vm_op_t *op) match_t *m = calloc(sizeof(match_t), 1); m->start = str; m->end = str; + m->op = op; if (op->args.repetitions.max == 0) return m; match_t **dest = &m->child; @@ -171,6 +178,7 @@ static match_t *match(const char *str, vm_op_t *op) match_t *m = calloc(sizeof(match_t), 1); m->start = str; m->end = str; + m->op = op; return m; } case VM_BEFORE: { @@ -180,6 +188,7 @@ static match_t *match(const char *str, vm_op_t *op) match_t *m = calloc(sizeof(match_t), 1); m->start = str; m->end = str; + m->op = op; return m; } case VM_CAPTURE: { @@ -188,6 +197,7 @@ static match_t *match(const char *str, vm_op_t *op) match_t *m = calloc(sizeof(match_t), 1); m->start = str; m->end = p->end; + m->op = op; m->child = p; m->is_capture = 1; if (op->args.capture.name) @@ -210,6 +220,7 @@ static match_t *match(const char *str, vm_op_t *op) match_t *m = calloc(sizeof(match_t), 1); m->start = str; m->end = m2->end; + m->op = op; m->child = m1; m1->nextsibling = m2; return m; @@ -217,6 +228,7 @@ static match_t *match(const char *str, vm_op_t *op) case VM_REPLACE: { match_t *m = calloc(sizeof(match_t), 1); m->start = str; + m->op = op; if (op->args.replace.replace_pat) { match_t *p = match(str, op->args.replace.replace_pat); if (p == NULL) return NULL; @@ -234,8 +246,20 @@ static match_t *match(const char *str, vm_op_t *op) for (int i = ndefs-1; i >= 0; i--) { if (streq(defs[i].name, op->args.s)) { // Bingo! + /* op = defs[i].op; goto tailcall; + */ + match_t *p = match(str, defs[i].op); + if (p == NULL) return NULL; + match_t *m = calloc(sizeof(match_t), 1); + m->start = p->start; + m->end = p->end; + m->op = op; + m->child = p; + m->name_or_replacement = defs[i].name; + m->is_ref = 1; + return m; } } check(0, "Unknown identifier: '%s'", op->args.s); @@ -905,8 +929,11 @@ static void print_match(match_t *m, const char *color) } } } else { - if (m->is_capture && m->name_or_replacement) - printf("\033[0;2;33m[%s:", m->name_or_replacement); + const char *name = m->name_or_replacement; + if (verbose && m->is_ref && name && isupper(name[0])) + printf("\033[0;2;35m{%s:", name); + //if (m->is_capture && name) + // printf("\033[0;2;33m[%s:", name); const char *prev = m->start; for (match_t *child = m->child; child; child = child->nextsibling) { if (child->start > prev) @@ -916,8 +943,10 @@ static void print_match(match_t *m, const char *color) } if (m->end > prev) printf("%s%.*s", color, (int)(m->end - prev), prev); - if (m->is_capture && m->name_or_replacement) - printf("\033[0;2;33m]"); + if (verbose && m->is_ref && name && isupper(name[0])) + printf("\033[0;2;35m}"); + //if (m->is_capture && name) + // printf("\033[0;2;33m]"); } } @@ -19,16 +19,6 @@ const char *usage = ( " -s --slow\t run in slow mode for debugging\n" " -r --replace <replacement> replace the input pattern with the given replacement\n" " -g --grammar <grammar file> use the specified file as a grammar\n"); -/* - * Pattern matching result object - */ -typedef struct match_s { - // Where the match starts and ends (end is after the last character) - const char *start, *end; - unsigned int is_capture:1, is_replacement:1; - const char *name_or_replacement; - struct match_s *child, *nextsibling; -} match_t; /* * BPEG virtual machine opcodes @@ -85,6 +75,17 @@ typedef struct vm_op_s { } args; } vm_op_t; +/* + * Pattern matching result object + */ +typedef struct match_s { + // Where the match starts and ends (end is after the last character) + const char *start, *end; + unsigned int is_capture:1, is_replacement:1, is_ref:1; + const char *name_or_replacement; + struct match_s *child, *nextsibling; + vm_op_t *op; +} match_t; static inline const char *after_spaces(const char *str); static match_t *free_match(match_t *m); |
