From a82164505e89dc8257ef87844dfef1476e235a7f Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Mon, 14 Sep 2020 01:21:49 -0700 Subject: Added nodent support (|) --- bpeg.bpeg | 3 ++- compiler.c | 15 ++++++++++----- grammars/builtins.bpeg | 9 ++++++--- types.h | 1 + utils.c | 6 +++--- vm.c | 30 ++++++++++++++++++++++++++++++ 6 files changed, 52 insertions(+), 12 deletions(-) diff --git a/bpeg.bpeg b/bpeg.bpeg index ae2d3fb..de0c676 100644 --- a/bpeg.bpeg +++ b/bpeg.bpeg @@ -8,7 +8,7 @@ String-pattern = *(`\ pat ?`; / .); pat = suffixed-pat / simple-pat; simple-pat = Empty / Upto / Dot / String / Char-range / Char / Escape-range / Escape / No - / Repeat / After / Before / Capture / Replace / Ref / parens; + / Nodent / Repeat / After / Before / Capture / Replace / Ref / parens; suffixed-pat = Eq-pat; Eq-pat = @[first]simple-pat "==" @[second]pat; @@ -29,6 +29,7 @@ escape-sequence = ( /`a/`b/`e/`n/`r/`t/`v / . / \n ); No = `! _ @pat; +Nodent = `|; Upto = 2-3`. ?>(_@pat); Repeat = ( @[min]int _ `- _ @[max]int diff --git a/compiler.c b/compiler.c index 267ea27..8279285 100644 --- a/compiler.c +++ b/compiler.c @@ -211,6 +211,7 @@ vm_op_t *bpeg_simplepattern(const char *str) str = sep->end; set_range(op, min, max, pat, sep); } else { + str = pat->end; set_range(op, min, max, pat, NULL); } break; @@ -233,6 +234,7 @@ vm_op_t *bpeg_simplepattern(const char *str) str = sep->end; set_range(op, min, max, pat, sep); } else { + str = pat->end; set_range(op, min, max, pat, NULL); } break; @@ -345,8 +347,8 @@ vm_op_t *bpeg_simplepattern(const char *str) } // Empty choice (/) or {/} case '/': { - str = after_spaces(str); - if (*str == ')' || *str == '}') { + const char *next = after_spaces(str); + if (*next == ')' || *next == '}') { op->op = VM_EMPTY; } else { free(op); @@ -354,6 +356,10 @@ vm_op_t *bpeg_simplepattern(const char *str) } break; } + case '|': { + op->op = VM_NODENT; + break; + } default: { // Reference if (isalpha(c)) { @@ -373,9 +379,8 @@ vm_op_t *bpeg_simplepattern(const char *str) // Postfix operators: postfix: - str = after_spaces(str); - if (strncmp(str, "==", 2) == 0) { - str += 2; + if (strncmp(after_spaces(str), "==", 2) == 0) { + str = after_spaces(str)+2; vm_op_t *first = op; vm_op_t *second = bpeg_simplepattern(str); check(second, "Expected pattern after '=='"); diff --git a/grammars/builtins.bpeg b/grammars/builtins.bpeg index c871408..df03104 100644 --- a/grammars/builtins.bpeg +++ b/grammars/builtins.bpeg @@ -2,12 +2,15 @@ pattern = !(/); # Not defined by default replacement = !(/); # Not defined by default replace-all = +(...@replacement) ...; -find-all = {... >matching-line =>} +(matching-line/non-matching-line) ?{!<\n => "\n"}; +find-all = +find-next%\n ?{!<\n => "\n"}; +find-next = matching-line / {..\n =>} find-next; only-matches = +{...@pattern=>'@1\n'}; -matching-line = +(..@pattern)..$ ?\n; -non-matching-line = {..$=>}; +matching-line = +(..@pattern) ..$; # Helper definitions (commonly used) +indent = \n |+(\t/' '); +dedent = >(\n !|); +indented-block = |` ..$ *(\n|..$); crlf = \r\n; cr = \r; r = \r; anglebraces = `< *(anglebraces / !`>.) `>; diff --git a/types.h b/types.h index c8f7a17..36e1aa6 100644 --- a/types.h +++ b/types.h @@ -26,6 +26,7 @@ enum VMOpcode { VM_REPLACE, VM_REF, VM_BACKREF, + VM_NODENT, }; /* diff --git a/utils.c b/utils.c index 5078c01..7844934 100644 --- a/utils.c +++ b/utils.c @@ -46,9 +46,9 @@ const char *after_name(const char *str) */ int matchchar(const char **str, char c) { - *str = after_spaces(*str); - if (**str == c) { - ++(*str); + const char *next = after_spaces(*str); + if (*next == c) { + *str = &next[1]; return 1; } else { return 0; diff --git a/vm.c b/vm.c index 2a44dd7..9974547 100644 --- a/vm.c +++ b/vm.c @@ -30,6 +30,7 @@ static const char *opcode_names[] = { [VM_EQUAL] = "EQUAL", [VM_REF] = "REF", [VM_BACKREF] = "BACKREF", + [VM_NODENT] = "NODENT", }; const char *opcode_name(enum VMOpcode o) @@ -337,6 +338,35 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref case VM_BACKREF: { return match_backref(str, op, (match_t*)op->args.backref); } + case VM_NODENT: { + if (str[-1] == '\0') { // First line + match_t *m = calloc(sizeof(match_t), 1); + m->start = str; + m->end = str; + m->op = op; + return m; + } else if (str[-1] != '\n') { + return NULL; // Not at beginning of line + } + const char *p = &str[-1]; + while (*p == '\n') --p; // Skip blank lines + while (p[-1] && p[-1] != '\n') --p; // Backtrack to start of last (nonblank) line + // Count indentation: + char denter = *p; + int dents = 0; + if (denter == ' ' || denter == '\t') { + for (; *p == denter; ++p) ++dents; + } + for (int i = 0; i < dents; i++) { + if (str[i] != denter) return NULL; + } + + match_t *m = calloc(sizeof(match_t), 1); + m->start = str; + m->end = &str[dents]; + m->op = op; + return m; + } default: { fprintf(stderr, "Unknown opcode: %d", op->op); _exit(1); -- cgit v1.2.3