From 204185ac43f74160e20129e08af353563a40488e Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Fri, 18 Sep 2020 22:32:36 -0700 Subject: Added rule to help skip binary files, and improved looping logic --- grammars/builtins.bpeg | 29 ++++++++++++++++++++++++++--- vm.c | 30 +++++++++++++++++++----------- 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/grammars/builtins.bpeg b/grammars/builtins.bpeg index 7490079..7f45f52 100644 --- a/grammars/builtins.bpeg +++ b/grammars/builtins.bpeg @@ -6,14 +6,31 @@ is-tty: no # Defined as either always-match or always-fail, depending on stdout print-line-numbers: is-tty print-filenames: is-tty highlight: is-tty +include-binary-files: no; +is-text-file: ^^ >32(\t/\n/\r/\x20-x7e/!\x00-x7f utf8-codepoint/$$) # Meta-rules for acting on everything: pattern: !'' # Not defined by default replacement: !'' # Not defined by default -replace-all: define-highlights add-filename 1+(...@hl-replacement) ... -find-all: define-highlights add-filename 1+find-next%\n 0-1{!<\n => "\n"} +replace-all: ( + (include-binary-files / is-text-file) + define-highlights + add-filename + 1+(...@hl-replacement) ... +) +find-all: ( + (include-binary-files / is-text-file) + define-highlights + add-filename + 1+find-next%\n 0-1{!<\n => "\n"} +) +only-matches: ( + (include-binary-files / is-text-file) + define-highlights + add-filename + 1+{...@hl-pattern=>'@1\n'} +) find-next: matching-line / {..\n =>} find-next -only-matches: define-highlights 1+{...@hl-pattern=>'@1\n'} matching-line: add-line-number 1+(..hl-pattern) ..$ add-filename: 0-1(print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"})) add-line-number: 0-1(print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "})) @@ -25,6 +42,12 @@ define-highlights: highlight @[hl-start]{=>"\033[31;1m"} @[hl-end]{=>"\033[0m"} indent: \n|1+(\t/' ') dedent: $ !(\n|) indented-block: |` ..$ 0+(\n|..$) +utf8-codepoint: ( + \x00-x7f + / \xc0-xdf 1\x80-xbf + / \xe0-xef 2\x80-xbf + / \xf0-xf7 3\x80-xbf +) crlf: \r\n cr: \r; r: \r anglebraces: `< 0+(anglebraces / !`>.) `> diff --git a/vm.c b/vm.c index 4cdc307..c57d6c3 100644 --- a/vm.c +++ b/vm.c @@ -157,9 +157,10 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un m->op = op; match_t **dest = &m->child; - const char *prev = str; size_t reps = 0; - for (;;) { + ssize_t max = op->args.repetitions.max; + for (reps = 0; max == -1 || reps < (size_t)max; ++reps) { + const char *start = str; // Separator match_t *sep = NULL; if (op->args.repetitions.sep != NULL && reps > 0) { @@ -168,9 +169,23 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un str = sep->end; } match_t *p = _match(g, f, str, op->args.repetitions.repeat_pat, flags, rec); - if (p == NULL || (p->end == prev && reps > 0)) { // Prevent infinite loops + if (p == NULL) { + destroy_match(&sep); + break; + } + if (p->end == start && reps > 0) { + // Since no forward progress was made on either `pat` or + // `sep` and BPEG does not have mutable state, it's + // guaranteed that no progress will be made on the next + // loop either. We know that this will continue to loop + // until reps==max, so let's just cut to the chase instead + // of looping infinitely. destroy_match(&sep); destroy_match(&p); + if (op->args.repetitions.max == -1) + reps = ~(size_t)0; + else + reps = (size_t)op->args.repetitions.max; break; } if (sep) { @@ -180,16 +195,9 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un *dest = p; dest = &p->nextsibling; str = p->end; - prev = str; - - ++reps; - if (op->args.repetitions.max != -1 && reps > (size_t)op->args.repetitions.max) { - destroy_match(&m); - return NULL; - } } - if ((ssize_t)reps < op->args.repetitions.min) { + if (reps < (size_t)op->args.repetitions.min) { destroy_match(&m); return NULL; } -- cgit v1.2.3