Added rule to help skip binary files, and improved looping logic

This commit is contained in:
Bruce Hill 2020-09-18 22:32:36 -07:00
parent 53ef6fd628
commit 204185ac43
2 changed files with 45 additions and 14 deletions

View File

@ -6,14 +6,31 @@ is-tty: no # Defined as either always-match or always-fail, depending on stdout
print-line-numbers: is-tty
print-filenames: is-tty
highlight: is-tty
include-binary-files: no;
is-text-file: ^^ >32(\t/\n/\r/\x20-x7e/!\x00-x7f utf8-codepoint/$$)
# Meta-rules for acting on everything:
pattern: !'' # Not defined by default
replacement: !'' # Not defined by default
replace-all: define-highlights add-filename 1+(...@hl-replacement) ...
find-all: define-highlights add-filename 1+find-next%\n 0-1{!<\n => "\n"}
replace-all: (
(include-binary-files / is-text-file)
define-highlights
add-filename
1+(...@hl-replacement) ...
)
find-all: (
(include-binary-files / is-text-file)
define-highlights
add-filename
1+find-next%\n 0-1{!<\n => "\n"}
)
only-matches: (
(include-binary-files / is-text-file)
define-highlights
add-filename
1+{...@hl-pattern=>'@1\n'}
)
find-next: matching-line / {..\n =>} find-next
only-matches: define-highlights 1+{...@hl-pattern=>'@1\n'}
matching-line: add-line-number 1+(..hl-pattern) ..$
add-filename: 0-1(print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"}))
add-line-number: 0-1(print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "}))
@ -25,6 +42,12 @@ define-highlights: highlight @[hl-start]{=>"\033[31;1m"} @[hl-end]{=>"\033[0m"}
indent: \n|1+(\t/' ')
dedent: $ !(\n|)
indented-block: |` ..$ 0+(\n|..$)
utf8-codepoint: (
\x00-x7f
/ \xc0-xdf 1\x80-xbf
/ \xe0-xef 2\x80-xbf
/ \xf0-xf7 3\x80-xbf
)
crlf: \r\n
cr: \r; r: \r
anglebraces: `< 0+(anglebraces / !`>.) `>

30
vm.c
View File

@ -157,9 +157,10 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
m->op = op;
match_t **dest = &m->child;
const char *prev = str;
size_t reps = 0;
for (;;) {
ssize_t max = op->args.repetitions.max;
for (reps = 0; max == -1 || reps < (size_t)max; ++reps) {
const char *start = str;
// Separator
match_t *sep = NULL;
if (op->args.repetitions.sep != NULL && reps > 0) {
@ -168,9 +169,23 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
str = sep->end;
}
match_t *p = _match(g, f, str, op->args.repetitions.repeat_pat, flags, rec);
if (p == NULL || (p->end == prev && reps > 0)) { // Prevent infinite loops
if (p == NULL) {
destroy_match(&sep);
break;
}
if (p->end == start && reps > 0) {
// Since no forward progress was made on either `pat` or
// `sep` and BPEG does not have mutable state, it's
// guaranteed that no progress will be made on the next
// loop either. We know that this will continue to loop
// until reps==max, so let's just cut to the chase instead
// of looping infinitely.
destroy_match(&sep);
destroy_match(&p);
if (op->args.repetitions.max == -1)
reps = ~(size_t)0;
else
reps = (size_t)op->args.repetitions.max;
break;
}
if (sep) {
@ -180,16 +195,9 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
*dest = p;
dest = &p->nextsibling;
str = p->end;
prev = str;
++reps;
if (op->args.repetitions.max != -1 && reps > (size_t)op->args.repetitions.max) {
destroy_match(&m);
return NULL;
}
}
if ((ssize_t)reps < op->args.repetitions.min) {
if (reps < (size_t)op->args.repetitions.min) {
destroy_match(&m);
return NULL;
}