aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2020-09-18 22:32:36 -0700
committerBruce Hill <bruce@bruce-hill.com>2020-09-18 22:32:36 -0700
commit204185ac43f74160e20129e08af353563a40488e (patch)
tree0c1261e45c72a280d886051b47547f0d53588d80
parent53ef6fd628201aa32b16934efb23ee8039dbce2e (diff)
Added rule to help skip binary files, and improved looping logic
-rw-r--r--grammars/builtins.bpeg29
-rw-r--r--vm.c30
2 files changed, 45 insertions, 14 deletions
diff --git a/grammars/builtins.bpeg b/grammars/builtins.bpeg
index 7490079..7f45f52 100644
--- a/grammars/builtins.bpeg
+++ b/grammars/builtins.bpeg
@@ -6,14 +6,31 @@ is-tty: no # Defined as either always-match or always-fail, depending on stdout
print-line-numbers: is-tty
print-filenames: is-tty
highlight: is-tty
+include-binary-files: no;
+is-text-file: ^^ >32(\t/\n/\r/\x20-x7e/!\x00-x7f utf8-codepoint/$$)
# Meta-rules for acting on everything:
pattern: !'' # Not defined by default
replacement: !'' # Not defined by default
-replace-all: define-highlights add-filename 1+(...@hl-replacement) ...
-find-all: define-highlights add-filename 1+find-next%\n 0-1{!<\n => "\n"}
+replace-all: (
+ (include-binary-files / is-text-file)
+ define-highlights
+ add-filename
+ 1+(...@hl-replacement) ...
+)
+find-all: (
+ (include-binary-files / is-text-file)
+ define-highlights
+ add-filename
+ 1+find-next%\n 0-1{!<\n => "\n"}
+)
+only-matches: (
+ (include-binary-files / is-text-file)
+ define-highlights
+ add-filename
+ 1+{...@hl-pattern=>'@1\n'}
+)
find-next: matching-line / {..\n =>} find-next
-only-matches: define-highlights 1+{...@hl-pattern=>'@1\n'}
matching-line: add-line-number 1+(..hl-pattern) ..$
add-filename: 0-1(print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"}))
add-line-number: 0-1(print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "}))
@@ -25,6 +42,12 @@ define-highlights: highlight @[hl-start]{=>"\033[31;1m"} @[hl-end]{=>"\033[0m"}
indent: \n|1+(\t/' ')
dedent: $ !(\n|)
indented-block: |` ..$ 0+(\n|..$)
+utf8-codepoint: (
+ \x00-x7f
+ / \xc0-xdf 1\x80-xbf
+ / \xe0-xef 2\x80-xbf
+ / \xf0-xf7 3\x80-xbf
+)
crlf: \r\n
cr: \r; r: \r
anglebraces: `< 0+(anglebraces / !`>.) `>
diff --git a/vm.c b/vm.c
index 4cdc307..c57d6c3 100644
--- a/vm.c
+++ b/vm.c
@@ -157,9 +157,10 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
m->op = op;
match_t **dest = &m->child;
- const char *prev = str;
size_t reps = 0;
- for (;;) {
+ ssize_t max = op->args.repetitions.max;
+ for (reps = 0; max == -1 || reps < (size_t)max; ++reps) {
+ const char *start = str;
// Separator
match_t *sep = NULL;
if (op->args.repetitions.sep != NULL && reps > 0) {
@@ -168,9 +169,23 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
str = sep->end;
}
match_t *p = _match(g, f, str, op->args.repetitions.repeat_pat, flags, rec);
- if (p == NULL || (p->end == prev && reps > 0)) { // Prevent infinite loops
+ if (p == NULL) {
+ destroy_match(&sep);
+ break;
+ }
+ if (p->end == start && reps > 0) {
+ // Since no forward progress was made on either `pat` or
+ // `sep` and BPEG does not have mutable state, it's
+ // guaranteed that no progress will be made on the next
+ // loop either. We know that this will continue to loop
+ // until reps==max, so let's just cut to the chase instead
+ // of looping infinitely.
destroy_match(&sep);
destroy_match(&p);
+ if (op->args.repetitions.max == -1)
+ reps = ~(size_t)0;
+ else
+ reps = (size_t)op->args.repetitions.max;
break;
}
if (sep) {
@@ -180,16 +195,9 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
*dest = p;
dest = &p->nextsibling;
str = p->end;
- prev = str;
-
- ++reps;
- if (op->args.repetitions.max != -1 && reps > (size_t)op->args.repetitions.max) {
- destroy_match(&m);
- return NULL;
- }
}
- if ((ssize_t)reps < op->args.repetitions.min) {
+ if (reps < (size_t)op->args.repetitions.min) {
destroy_match(&m);
return NULL;
}