diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2020-09-14 01:21:49 -0700 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2020-09-14 01:21:49 -0700 |
| commit | a82164505e89dc8257ef87844dfef1476e235a7f (patch) | |
| tree | f545e4a76b0ff0472bf6046b951230672ad18ce4 | |
| parent | 9f2d5464d6fd2c2aeb6dc234c64bd3aafe22d6e0 (diff) | |
Added nodent support (|)
| -rw-r--r-- | bpeg.bpeg | 3 | ||||
| -rw-r--r-- | compiler.c | 15 | ||||
| -rw-r--r-- | grammars/builtins.bpeg | 9 | ||||
| -rw-r--r-- | types.h | 1 | ||||
| -rw-r--r-- | utils.c | 6 | ||||
| -rw-r--r-- | vm.c | 30 |
6 files changed, 52 insertions, 12 deletions
@@ -8,7 +8,7 @@ String-pattern = *(`\ pat ?`; / .); pat = suffixed-pat / simple-pat; simple-pat = Empty / Upto / Dot / String / Char-range / Char / Escape-range / Escape / No - / Repeat / After / Before / Capture / Replace / Ref / parens; + / Nodent / Repeat / After / Before / Capture / Replace / Ref / parens; suffixed-pat = Eq-pat; Eq-pat = @[first]simple-pat "==" @[second]pat; @@ -29,6 +29,7 @@ escape-sequence = ( /`a/`b/`e/`n/`r/`t/`v / . / \n ); No = `! _ @pat; +Nodent = `|; Upto = 2-3`. ?>(_@pat); Repeat = ( @[min]int _ `- _ @[max]int @@ -211,6 +211,7 @@ vm_op_t *bpeg_simplepattern(const char *str) str = sep->end; set_range(op, min, max, pat, sep); } else { + str = pat->end; set_range(op, min, max, pat, NULL); } break; @@ -233,6 +234,7 @@ vm_op_t *bpeg_simplepattern(const char *str) str = sep->end; set_range(op, min, max, pat, sep); } else { + str = pat->end; set_range(op, min, max, pat, NULL); } break; @@ -345,8 +347,8 @@ vm_op_t *bpeg_simplepattern(const char *str) } // Empty choice (/) or {/} case '/': { - str = after_spaces(str); - if (*str == ')' || *str == '}') { + const char *next = after_spaces(str); + if (*next == ')' || *next == '}') { op->op = VM_EMPTY; } else { free(op); @@ -354,6 +356,10 @@ vm_op_t *bpeg_simplepattern(const char *str) } break; } + case '|': { + op->op = VM_NODENT; + break; + } default: { // Reference if (isalpha(c)) { @@ -373,9 +379,8 @@ vm_op_t *bpeg_simplepattern(const char *str) // Postfix operators: postfix: - str = after_spaces(str); - if (strncmp(str, "==", 2) == 0) { - str += 2; + if (strncmp(after_spaces(str), "==", 2) == 0) { + str = after_spaces(str)+2; vm_op_t *first = op; vm_op_t *second = bpeg_simplepattern(str); check(second, "Expected pattern after '=='"); diff --git a/grammars/builtins.bpeg b/grammars/builtins.bpeg index c871408..df03104 100644 --- a/grammars/builtins.bpeg +++ b/grammars/builtins.bpeg @@ -2,12 +2,15 @@ pattern = !(/); # Not defined by default replacement = !(/); # Not defined by default replace-all = +(...@replacement) ...; -find-all = {... >matching-line =>} +(matching-line/non-matching-line) ?{!<\n => "\n"}; +find-all = +find-next%\n ?{!<\n => "\n"}; +find-next = matching-line / {..\n =>} find-next; only-matches = +{...@pattern=>'@1\n'}; -matching-line = +(..@pattern)..$ ?\n; -non-matching-line = {..$=>}; +matching-line = +(..@pattern) ..$; # Helper definitions (commonly used) +indent = \n |+(\t/' '); +dedent = >(\n !|); +indented-block = |` ..$ *(\n|..$); crlf = \r\n; cr = \r; r = \r; anglebraces = `< *(anglebraces / !`>.) `>; @@ -26,6 +26,7 @@ enum VMOpcode { VM_REPLACE, VM_REF, VM_BACKREF, + VM_NODENT, }; /* @@ -46,9 +46,9 @@ const char *after_name(const char *str) */ int matchchar(const char **str, char c) { - *str = after_spaces(*str); - if (**str == c) { - ++(*str); + const char *next = after_spaces(*str); + if (*next == c) { + *str = &next[1]; return 1; } else { return 0; @@ -30,6 +30,7 @@ static const char *opcode_names[] = { [VM_EQUAL] = "EQUAL", [VM_REF] = "REF", [VM_BACKREF] = "BACKREF", + [VM_NODENT] = "NODENT", }; const char *opcode_name(enum VMOpcode o) @@ -337,6 +338,35 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref case VM_BACKREF: { return match_backref(str, op, (match_t*)op->args.backref); } + case VM_NODENT: { + if (str[-1] == '\0') { // First line + match_t *m = calloc(sizeof(match_t), 1); + m->start = str; + m->end = str; + m->op = op; + return m; + } else if (str[-1] != '\n') { + return NULL; // Not at beginning of line + } + const char *p = &str[-1]; + while (*p == '\n') --p; // Skip blank lines + while (p[-1] && p[-1] != '\n') --p; // Backtrack to start of last (nonblank) line + // Count indentation: + char denter = *p; + int dents = 0; + if (denter == ' ' || denter == '\t') { + for (; *p == denter; ++p) ++dents; + } + for (int i = 0; i < dents; i++) { + if (str[i] != denter) return NULL; + } + + match_t *m = calloc(sizeof(match_t), 1); + m->start = str; + m->end = &str[dents]; + m->op = op; + return m; + } default: { fprintf(stderr, "Unknown opcode: %d", op->op); _exit(1); |
