Major overhaul of how different modes of behavior work. Approximately 2x

speedup and 2x memory footprint reduction. Also removed --mode and
VM_HIDE (~ operator), and added --context. Printing works better now.
This commit is contained in:
Bruce Hill 2021-01-15 01:19:10 -08:00
parent 9b70cb4f62
commit 8ff80b09cc
11 changed files with 370 additions and 215 deletions

View File

@ -19,7 +19,7 @@ It's written in pure C with no dependencies.
* `-p` `--pattern <pat>` provide a pattern (equivalent to `bp '\(<pat>)'`)
* `-P` `--pattern-string <pat>` provide a string pattern (equivalent to `bp '<pat>'`, but may be useful if `'<pat>'` begins with a '-')
* `-r` `--replace <replacement>` replace the input pattern with the given replacement
* `-m` `--mode <mode>` set the behavior mode (defult: `find-all`)
* `-c` `--context <N>` change how many lines of context are printed (`0`: no context, `all`: the whole file, `<N>` matching lines and `<N-1>` lines before/after)
* `-g` `--grammar <grammar file>` use the specified file as a grammar
See `man ./bp.1` for more details.

12
bp.1
View File

@ -18,7 +18,7 @@ bp \- Bruce's Parsing Expression Grammar tool
[\fI-D\fR|\fI--define-string\fR \fI<name>\fR:\fI<string-pattern>\fR]
[\fI-r\fR|\fI--replace\fR \fI<replacement>\fR]
[\fI-g\fR|\fI--grammar\fR \fI<grammar file>\fR]
[\fI-m\fR|\fI--mode\fR \fI<mode>\fR]
[\fI-c\fR|\fI--conntext\fR \fI<N>\fR]
\fI<pattern\fR
[[--] \fI<input files...>\fR]
.SH DESCRIPTION
@ -54,10 +54,12 @@ Replace all occurrences of the main pattern with the given string.
.B \-g\fR, \fB--grammar \fI<grammar file>\fR
Load the grammar from the given file.
.B \-m\fR, \fB--mode \fI<mode>\fR
The mode to operate in. Options are: \fIfind-all\fR (the default),
\fIonly-matches\fR, \fIpattern\fR, \fIreplacement\fR, \fIreplace-all\fR
(implied by \fB--replace\fR), or any other grammar rule name.
.B \-c\fR, \fB--context \fI<N>\fR
The number of lines of context to print. If \fI<N>\fR is 0, print only the
exact text of the matches. If \fI<N>\fR is "all", print the entire file.
Otherwise, if \fI<N>\fR is a positive integer, print the whole line on which
matches occur, as well as the \fI<N-1>\fR lines before and after the match. The
default value for this argument is 1 (print whole lines where matches occur).
.B \--help
Print the usage and exit.

250
bp.c
View File

@ -36,15 +36,16 @@ static const char *usage = (
" -p --pattern <pat> provide a pattern (equivalent to bp '\\(<pat>)')\n"
" -P --pattern-string <pat> provide a string pattern (may be useful if '<pat>' begins with a '-')\n"
" -r --replace <replacement> replace the input pattern with the given replacement\n"
" -m --mode <mode> set the behavior mode (defult: find-all)\n"
" -c --context <context> set number of lines of context to print (all: the whole file, 0: only the match, 1: the line, N: N lines of context)\n"
" -g --grammar <grammar file> use the specified file as a grammar\n");
static print_options_t print_options = 0;
#define USE_DEFAULT_CONTEXT -2
#define ALL_CONTEXT -1
static int print_color = 0;
static int print_line_numbers = 0;
__attribute__((nonnull))
static char *getflag(const char *flag, char *argv[], int *i);
__attribute__((nonnull(3)))
static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, unsigned int flags);
//
// Return a pointer to the value part of a flag, if present, otherwise NULL.
@ -66,66 +67,173 @@ static char *getflag(const char *flag, char *argv[], int *i)
return NULL;
}
//
// Scan the first few dozen bytes of a file and return 1 if the contents all
// look like printable text characters, otherwise return 0.
//
static int is_text_file(const char *filename)
{
int fd = open(filename, O_RDONLY);
if (fd < 0) return 0;
unsigned char buf[64];
int len = read(fd, buf, sizeof(buf)/sizeof(unsigned char));
if (len < 0) return 0;
(void)close(fd);
for (int i = 0; i < len; i++) {
if (!(buf[i] == '\t' || buf[i] == '\n' || buf[i] == '\r'
|| buf[i] >= '\x20'))
return 0;
}
return 1;
}
//
// Print matches in JSON format.
//
static int print_matches_as_json(def_t *defs, file_t *f, vm_op_t *pattern, unsigned int flags)
{
int matches = 0;
for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) {
if (++matches > 1)
printf(",\n");
printf("{\"filename\":\"%s\",", f->filename ? f->filename : "-");
printf("\"tree\":{\"rule\":\"text\",\"start\":%d,\"end\":%ld,\"children\":[",
0, f->end - f->contents);
json_match(f->contents, m, (flags & BP_VERBOSE) ? 1 : 0);
printf("]}}\n");
}
return matches;
}
//
// Print matches in a visual explanation style
//
static int explain_matches(def_t *defs, file_t *f, vm_op_t *pattern, unsigned int flags)
{
int matches = 0;
for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) {
if (++matches == 1) {
if (print_color)
printf("\033[0;1;4;33m%s\033[0m\n", f->filename);
else
printf("%s:\n", f->filename);
} else {
printf("\n\n");
}
visualize_match(m);
}
return matches;
}
//
// Replace a file's contents with the text version of a match.
// (Useful for replacements)
//
static int inplace_modify_file(def_t *defs, file_t *f, vm_op_t *pattern, int context, unsigned int flags)
{
// Need to do this before matching:
intern_file(f);
printer_t pr = {
.file = f,
.context_lines = context,
.use_color = 0,
.print_line_numbers = 0,
};
FILE *inplace_file = NULL; // Lazy-open this on the first match
int matches = 0;
for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) {
++matches;
if (print_errors(&pr, m) > 0)
exit(1);
// Lazy-open file for writing upon first match:
if (inplace_file == NULL) {
inplace_file = fopen(f->filename, "w");
check(inplace_file, "Could not open file for writing: %s\n", f->filename);
}
print_match(inplace_file, &pr, m);
}
if (inplace_file) {
printf("%s\n", f->filename);
fclose(inplace_file);
}
return matches;
}
//
// Print all the matches in a file.
//
static int print_matches(def_t *defs, file_t *f, vm_op_t *pattern, int context, unsigned int flags)
{
static int printed_filenames = 0;
int matches = 0;
printer_t pr = {
.file = f,
.context_lines = context,
.use_color = print_color,
.print_line_numbers = print_line_numbers,
};
for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) {
if (print_errors(&pr, m) > 0)
exit(1);
if (++matches == 1) {
if (printed_filenames++ > 0) printf("\n");
if (print_color)
printf("\033[0;1;4;33m%s\033[0m\n", f->filename);
else
printf("%s:\n", f->filename);
}
print_match(stdout, &pr, m);
}
if (matches > 0) {
// Print trailing context lines:
print_match(stdout, &pr, NULL);
// Ensure a trailing newline:
if (pr.pos > f->contents && pr.pos[-1] != '\n') printf("\n");
}
return matches;
}
//
// For a given filename, open the file and attempt to match the given pattern
// against it, printing any results according to the flags.
//
static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, unsigned int flags)
static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, int context, unsigned int flags)
{
static int printed_matches = 0;
int success = 0;
file_t *f = load_file(NULL, filename);
check(f, "Could not open file: %s", filename);
if (flags & BP_INPLACE) // Need to do this before matching
intern_file(f);
match_t *m = match(defs, f, f->contents, pattern, flags);
if (m && print_errors(f, m, print_options) > 0)
exit(1);
if (m != NULL && m->end > m->start + 1) {
success = 1;
++printed_matches;
if (flags & BP_EXPLAIN) {
if (filename)
printf("\033[1;4m%s\033[0m\n", filename);
visualize_match(m);
} else if (flags & BP_LISTFILES) {
printf("%s\n", filename);
} else if (flags & BP_JSON) {
if (printed_matches > 1)
printf(",\n");
printf("{\"filename\":\"%s\",", filename ? filename : "-");
printf("\"tree\":{\"rule\":\"text\",\"start\":%d,\"end\":%ld,\"children\":[",
0, f->end - f->contents);
json_match(f->contents, m, (flags & BP_VERBOSE) ? 1 : 0);
printf("]}}\n");
} else if (flags & BP_INPLACE && filename) {
FILE *out = fopen(filename, "w");
print_match(out, f, m, 0);
fclose(out);
printf("%s\n", filename);
} else {
if (printed_matches > 1)
fputc('\n', stdout);
if (filename) {
if (print_options & PRINT_COLOR)
printf("\033[1;4;33m%s\033[0m\n", filename);
else
printf("%s:\n", filename);
}
print_match(stdout, f, m,
filename ? print_options : print_options & (print_options_t)~PRINT_LINE_NUMBERS);
int matches = 0;
if (flags & BP_EXPLAIN) {
matches += explain_matches(defs, f, pattern, flags);
} else if (flags & BP_LISTFILES) {
match_t *m = next_match(defs, f, NULL, pattern, flags);
if (m) {
recycle_if_unused(&m);
printf("%s\n", f->filename);
matches += 1;
}
} else if (flags & BP_JSON) {
matches += print_matches_as_json(defs, f, pattern, flags);
} else if (flags & BP_INPLACE) {
matches += inplace_modify_file(defs, f, pattern, context, flags);
} else {
matches += print_matches(defs, f, pattern, context, flags);
}
recycle_if_unused(&m);
#ifdef DEBUG_HEAP
check(recycle_all_matches() == 0, "Memory leak: there should no longer be any matches in use at this point.");
#endif
destroy_file(&f);
return success;
fflush(stdout);
return matches;
}
#define FLAG(f) (flag=getflag((f), argv, &i))
@ -133,9 +241,9 @@ static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, uns
int main(int argc, char *argv[])
{
unsigned int flags = 0;
int context = USE_DEFAULT_CONTEXT;
char *flag = NULL;
char path[PATH_MAX] = {0};
const char *mode = "find-all";
def_t *defs = NULL;
@ -145,6 +253,10 @@ int main(int argc, char *argv[])
file_t *pat_file = spoof_file(&loaded_files, "<pattern>", "pattern");
vm_op_t *pattern = bp_pattern(pat_file, pat_file->contents);
// Define an opcode that is just a reference to the rule `replacement`
file_t *rep_file = spoof_file(&loaded_files, "<replacement>", "replacement");
vm_op_t *replacement = bp_pattern(rep_file, rep_file->contents);
// Load builtins:
if (access("/etc/xdg/bp/builtins.bp", R_OK) != -1) {
file_t *f = load_file(&loaded_files, "/etc/xdg/bp/builtins.bp");
@ -174,6 +286,7 @@ int main(int argc, char *argv[])
flags |= BP_JSON;
} else if (streq(argv[i], "--inplace")) {
flags |= BP_INPLACE;
context = ALL_CONTEXT;
} else if (streq(argv[i], "--ignore-case")) {
flags |= BP_IGNORECASE;
} else if (streq(argv[i], "--list-files")) {
@ -185,7 +298,8 @@ int main(int argc, char *argv[])
vm_op_t *rep = bp_replacement(replace_file, pattern, replace_file->contents);
check(rep, "Replacement failed to compile: %s", flag);
defs = with_def(defs, replace_file, strlen("replacement"), "replacement", rep);
mode = "replace-all";
pattern = replacement;
if (context == USE_DEFAULT_CONTEXT) context = 1;
} else if (FLAG("--grammar") || FLAG("-g")) {
file_t *f = load_file(&loaded_files, flag);
if (f == NULL) {
@ -231,8 +345,11 @@ int main(int argc, char *argv[])
check(p, "Pattern failed to compile: %s", flag);
defs = with_def(defs, arg_file, strlen("pattern"), "pattern", p);
++npatterns;
} else if (FLAG("--mode") || FLAG("-m")) {
mode = flag;
} else if (FLAG("--context") || FLAG("-c")) {
if (streq(flag, "all"))
context = ALL_CONTEXT;
else
context = (int)strtol(flag, NULL, 10);
} else if (argv[i][0] == '-' && argv[i][1] && argv[i][1] != '-') { // single-char flags
for (char *c = &argv[i][1]; *c; ++c) {
switch (*c) {
@ -240,7 +357,7 @@ int main(int argc, char *argv[])
case 'v': flags |= BP_VERBOSE; break; // -v
case 'e': flags |= BP_EXPLAIN; break; // -e
case 'j': flags |= BP_JSON; break; // -j
case 'I': flags |= BP_INPLACE; break; // -I
case 'I': flags |= BP_INPLACE; context = ALL_CONTEXT; break; // -I
case 'i': flags |= BP_IGNORECASE; break; // -i
case 'l': flags |= BP_LISTFILES; break; // -l
default:
@ -268,24 +385,28 @@ int main(int argc, char *argv[])
return 1;
}
if (isatty(STDOUT_FILENO)) {
print_options |= PRINT_COLOR | PRINT_LINE_NUMBERS;
if (context < 0) {
if (context == USE_DEFAULT_CONTEXT) context = 1;
else if (context != ALL_CONTEXT) context = 0;
}
// Define an opcode that is just a reference to the overarching mode (e.g. find-all)
if (lookup(defs, mode) == NULL) {
printf("The mode '%s' is not defined.\n", mode);
if (flags & BP_INPLACE && context != ALL_CONTEXT) {
printf("--inplace and --context are mutually exclusive.\n"
"Please drop one of the two arguments and try again.\n");
return 1;
}
file_t *mode_file = spoof_file(&loaded_files, "<mode>", mode);
vm_op_t *mode_op = bp_pattern(mode_file, mode_file->contents);
if (isatty(STDOUT_FILENO)) {
print_color = 1;
print_line_numbers = 1;
}
int found = 0;
if (flags & BP_JSON) printf("[");
if (i < argc) {
// Files pass in as command line args:
for (int nfiles = 0; i < argc; nfiles++, i++) {
found += process_file(defs, argv[i], mode_op, flags);
found += process_file(defs, argv[i], pattern, context, flags);
}
} else if (isatty(STDIN_FILENO)) {
// No files, no piped in input, so use * **/*:
@ -293,12 +414,13 @@ int main(int argc, char *argv[])
glob("*", 0, NULL, &globbuf);
glob("**/*", GLOB_APPEND, NULL, &globbuf);
for (size_t i = 0; i < globbuf.gl_pathc; i++) {
found += process_file(defs, globbuf.gl_pathv[i], mode_op, flags);
if (is_text_file(globbuf.gl_pathv[i]))
found += process_file(defs, globbuf.gl_pathv[i], pattern, context, flags);
}
globfree(&globbuf);
} else {
// Piped in input:
found += process_file(defs, NULL, mode_op, flags);
found += process_file(defs, NULL, pattern, context, flags);
}
if (flags & BP_JSON) printf("]\n");

View File

@ -465,17 +465,6 @@ static vm_op_t *_bp_simplepattern(file_t *f, const char *str)
op->end = pat->end;
return op;
}
// Hide
case '~': {
vm_op_t *pat = bp_simplepattern(f, str);
if (!pat)
file_err(f, str, str, "There should be a pattern after this '~'");
vm_op_t *op = new_op(f, start, VM_HIDE);
op->len = 0;
op->args.pat = pat;
op->end = pat->end;
return op;
}
// Special rules:
case '_': case '^': case '$': case '|': {
const char *name = NULL;

View File

@ -11,7 +11,7 @@ String-pattern: ..$$ % (\n / Nodent / Escape / `\ pat [`;])
pat: simple-pat !(__("!="/"==")) / suffixed-pat
simple-pat: Upto-and / Dot / String / Chars / Nodent / Escape-range
/ Escape / Repeat / Optional / No / Hide / After / Before / Capture
/ Escape / Repeat / Optional / No / After / Before / Capture
/ Ref / parens
suffixed-pat: (
@ -41,7 +41,6 @@ escape-sequence: (
/ `x 2 `0-9,a-f,A-F
)
No: `! (__@pat / @!=(''=>"Expected a pattern after the exclamation mark"))
Hide: `~ (__@pat / @!=(''=>"Expected a pattern after the tilde"))
Nodent: `\ `N
Upto-and: ".." [__@first=simple-pat] [__`%__@second=simple-pat]
Repeat: (

View File

@ -9,26 +9,6 @@ is-text-file: >32(\t/\n/\r/\x20-x7e/!\x00-x7f utf8-codepoint/$$)
# Meta-rules for acting on everything:
pattern: !'' # Not defined by default
replacement: !'' # Not defined by default
replace-all: (
(include-binary-files / is-text-file)
+(..replacement%\n) ..%\n
)
find-all: (
(include-binary-files / is-text-file)
*(!..pattern ~(..\n))
+(+(..@pattern) ..(\n/$$) / ~(..\n))
[!<\n => "\n"]
)
find-lines: (
(include-binary-files / is-text-file)
*(!(pattern$) ~(..\n))
+(@pattern (\n/$$) / ~(..\n))
[!<\n => "\n"]
)
only-matches: (
(include-binary-files / is-text-file)
+(..@pattern%\n =>'@1\n')
)
# Helper definitions (commonly used)
url: (

View File

@ -16,21 +16,15 @@ typedef struct match_node_s {
struct match_node_s *next;
} match_node_t;
typedef struct {
size_t line, printed_line;
const char *color;
} print_state_t;
static const char *color_hl = "\033[0;31;1m";
static const char *color_normal = "\033[0m";
__attribute__((nonnull, pure))
static int height_of_match(match_t *m);
__attribute__((nonnull))
static void _visualize_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen);
__attribute__((nonnull))
static void _visualize_patterns(match_t *m);
__attribute__((nonnull))
static void print_line_number(FILE *out, print_state_t *state, print_options_t options);
__attribute__((nonnull))
static void _print_match(FILE *out, file_t *f, match_t *m, print_state_t *state, print_options_t options);
__attribute__((nonnull(1,2)))
static inline void print_line_number(FILE *out, printer_t *pr, size_t line_number, const char *color);
//
// Return the height of a match object (i.e. the number of descendents of the
@ -170,88 +164,139 @@ static void _visualize_matches(match_node_t *firstmatch, int depth, const char *
}
//
// Recursively look for references to a rule called "pattern" and print an
// explanation for each one.
// Print a visualization of a match object.
//
static void _visualize_patterns(match_t *m)
void visualize_match(match_t *m)
{
if (m->op->type == VM_REF && streq(m->op->args.s, "pattern")) {
m = m->child;
match_node_t first = {.m = m};
_visualize_matches(&first, 0, m->start, (size_t)(m->end - m->start));
printf("\033[?7l"); // Disable line wrapping
match_node_t first = {.m = m};
_visualize_matches(&first, 0, m->start, (size_t)(m->end - m->start));
printf("\033[?7h"); // Re-enable line wrapping
}
//
// Print a line number, if it needs to be printed.
// line number of 0 means "just print an empty space for the number"
//
__attribute__((nonnull(1,2)))
static inline void print_line_number(FILE *out, printer_t *pr, size_t line_number, const char *color)
{
if (!pr->print_line_numbers) return;
if (!pr->needs_line_number) return;
if (line_number == 0) {
if (color) fprintf(out, "\033[0;2m \033(0\x78\033(B%s", color);
else fprintf(out, " |");
} else {
for (match_t *c = m->child; c; c = c->nextsibling)
_visualize_patterns(c);
if (color) fprintf(out, "\033[0;2m% 5ld\033(0\x78\033(B%s", line_number, color);
else fprintf(out, "% 5ld|", line_number);
}
pr->needs_line_number = 0;
}
//
// Print a range of text from a file, adding line numbers if necessary.
//
__attribute__((nonnull(1,2,3,4)))
static void print_between(FILE *out, printer_t *pr, const char *start, const char *end, const char *color)
{
file_t *f = pr->file;
while (start < end) {
size_t line_num = get_line_number(f, start);
print_line_number(out, pr, line_num, color);
const char *eol = get_line(pr->file, line_num + 1);
if (!eol || eol > end) eol = end;
if (color) fprintf(out, "%s", color);
fprintf(out, "%.*s", (int)(eol - start), start);
if (eol[-1] == '\n')
pr->needs_line_number = 1;
start = eol;
}
pr->pos = end;
}
//
// Return a pointer to the first character of context information before `pos`,
// according to the context settings in `pr`
//
static const char *context_before(printer_t *pr, const char *pos)
{
if (pr->context_lines == -1) {
return pr->pos;
} else if (pr->context_lines > 0) {
size_t n = get_line_number(pr->file, pos);
if (n >= (size_t)((pr->context_lines - 1) + 1))
n -= (size_t)(pr->context_lines - 1);
else
n = 1;
const char *sol = get_line(pr->file, n);
if (sol == NULL || sol < pr->pos) sol = pr->pos;
return sol;
} else {
return pos;
}
}
//
// For a match object, print a visual explanation for each "pattern" matched
// inside it.
// Return a pointer to the last character of context information after `pos`,
// according to the context settings in `pr`
//
void visualize_match(match_t *m)
static const char *context_after(printer_t *pr, const char *pos)
{
printf("\033[?7l");
_visualize_patterns(m);
printf("\033[?7h");
if (pr->context_lines == -1) {
return pr->file->end;
} else if (pr->context_lines > 0) {
size_t n = get_line_number(pr->file, pos) + (size_t)(pr->context_lines - 1);
const char *eol = get_line(pr->file, n+1);
return eol ? eol : pr->file->end;
} else {
return pos;
}
}
//
// Print a line number.
// Print the text of a match (no context).
//
static void print_line_number(FILE *out, print_state_t *state, print_options_t options)
void _print_match(FILE *out, printer_t *pr, match_t *m)
{
state->printed_line = state->line;
if (!(options & PRINT_LINE_NUMBERS)) return;
if (options & PRINT_COLOR)
fprintf(out, "\033[0;2m% 5ld\033(0\x78\033(B%s", state->line, state->color);
else
fprintf(out, "% 5ld|", state->line);
}
pr->pos = m->start;
if (m->op->type == VM_REPLACE) {
size_t line_start = get_line_number(pr->file, m->start);
size_t line_end = get_line_number(pr->file, m->end);
size_t line = line_start;
//
// Helper function for print_match(), using a struct to keep track of some state.
//
static void _print_match(FILE *out, file_t *f, match_t *m, print_state_t *state, print_options_t options)
{
static const char *hl = "\033[0;31;1m";
const char *old_color = state->color;
if (m->op->type == VM_HIDE) {
// TODO: handle replacements?
for (const char *p = m->start; p < m->end; p++) {
if (*p == '\n') ++state->line;
}
} else if (m->op->type == VM_REPLACE) {
if (options & PRINT_COLOR && state->color != hl) {
state->color = hl;
fprintf(out, "%s", state->color);
}
if (pr->use_color) printf("%s", color_hl);
const char *text = m->op->args.replace.text;
const char *end = &text[m->op->args.replace.len];
// TODO: clean up the line numbering code
for (const char *r = text; r < end; ) {
print_line_number(out, pr, line > line_end ? 0 : line, pr->use_color ? color_hl : NULL);
// Capture substitution
if (*r == '@' && r[1] && r[1] != '@') {
++r;
match_t *cap = get_capture(m, &r);
if (cap != NULL) {
_print_match(out, f, cap, state, options);
print_match(out, pr, cap);
continue;
} else {
--r;
}
}
if (state->printed_line != state->line)
print_line_number(out, state, options);
if (*r == '\\') {
++r;
unsigned char c = unescapechar(r, &r);
fputc(c, out);
if (c == '\n') ++state->line;
if (c == '\n') {
++line;
pr->needs_line_number = 1;
}
continue;
} else if (*r == '\n') {
fputc('\n', out);
++state->line;
++line;
pr->needs_line_number = 1;
++r;
continue;
} else {
@ -260,70 +305,74 @@ static void _print_match(FILE *out, file_t *f, match_t *m, print_state_t *state,
continue;
}
}
print_line_number(out, pr, line > line_end ? 0 : line, pr->use_color ? color_hl : NULL);
} else {
if (m->op->type == VM_CAPTURE) {
if (options & PRINT_COLOR && state->color != hl) {
state->color = hl;
fprintf(out, "%s", state->color);
}
}
const char *prev = m->start;
for (match_t *child = m->child; child; child = child->nextsibling) {
// Skip children from e.g. zero-width matches like >@foo
if (!(prev <= child->start && child->start <= m->end &&
prev <= child->end && child->end <= m->end))
continue;
if (child->start > prev) {
for (const char *p = prev; p < child->start; ++p) {
if (state->printed_line != state->line)
print_line_number(out, state, options);
fputc(*p, out);
if (*p == '\n') ++state->line;
}
}
_print_match(out, f, child, state, options);
if (child->start > prev)
print_between(out, pr, prev, child->start, pr->use_color ? color_hl : NULL);
print_match(out, pr, child);
prev = child->end;
}
if (m->end > prev) {
for (const char *p = prev; p < m->end; ++p) {
if (state->printed_line != state->line)
print_line_number(out, state, options);
fputc(*p, out);
if (*p == '\n') ++state->line;
}
}
}
if (options & PRINT_COLOR && old_color != state->color) {
fprintf(out, "%s", old_color);
state->color = old_color;
if (m->end > prev)
print_between(out, pr, prev, m->end, pr->use_color ? color_hl : NULL);
}
pr->pos = m->end;
}
//
// Print a match with replacements and highlighting.
// Print the text of a match and any context.
//
void print_match(FILE *out, file_t *f, match_t *m, print_options_t options)
void print_match(FILE *out, printer_t *pr, match_t *m)
{
print_state_t state = {.line = 1, .color = "\033[0m"};
_print_match(out, f, m, &state, options);
int first = (pr->pos == NULL);
if (first) { // First match printed:
pr->pos = pr->file->contents;
pr->needs_line_number = 1;
}
if (m) {
const char *before_m = context_before(pr, m->start);
if (!first) {
const char *after_last = context_after(pr, pr->pos);
if (after_last >= before_m) {
// Overlapping ranges:
before_m = pr->pos;
} else {
// Non-overlapping ranges:
print_between(out, pr, pr->pos, after_last, pr->use_color ? color_normal : NULL);
if (pr->context_lines > 1)
printf("\n"); // Gap between chunks
}
}
print_between(out, pr, before_m, m->start, pr->use_color ? color_normal : NULL);
_print_match(out, pr, m);
if (pr->use_color) printf("%s", color_normal);
} else {
// After the last match is printed, print the trailing context:
const char *after_last = context_after(pr, pr->pos);
print_between(out, pr, pr->pos, after_last, pr->use_color ? color_normal : NULL);
}
}
//
// Print any errors that are present in the given match object.
//
int print_errors(file_t *f, match_t *m, print_options_t options)
int print_errors(printer_t *pr, match_t *m)
{
int ret = 0;
if (m->op->type == VM_CAPTURE && m->op->args.capture.name && streq(m->op->args.capture.name, "!")) {
printf("\033[31;1m");
print_match(stdout, f, m, options);
print_match(stdout, pr, m);
printf("\033[0m\n");
fprint_line(stdout, f, m->start, m->end, " ");
fprint_line(stdout, pr->file, m->start, m->end, " ");
return 1;
}
if (m->child) ret += print_errors(f, m->child, options);
if (m->nextsibling) ret += print_errors(f, m->nextsibling, options);
if (m->child) ret += print_errors(pr, m->child);
if (m->nextsibling) ret += print_errors(pr, m->nextsibling);
return ret;
}

View File

@ -6,17 +6,21 @@
#include "types.h"
typedef enum {
PRINT_COLOR = 1<<0,
PRINT_LINE_NUMBERS = 1<<1,
} print_options_t;
typedef struct {
file_t *file;
const char *pos;
int context_lines;
unsigned int needs_line_number:1;
unsigned int use_color:1;
unsigned int print_line_numbers:1;
} printer_t;
__attribute__((nonnull))
void visualize_match(match_t *m);
__attribute__((nonnull(1,2)))
void print_match(FILE *out, printer_t *pr, match_t *m);
__attribute__((nonnull))
void print_match(FILE *out, file_t *f, match_t *m, print_options_t options);
__attribute__((nonnull))
int print_errors(file_t *f, match_t *m, print_options_t options);
int print_errors(printer_t *pr, match_t *m);
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1

View File

@ -30,7 +30,6 @@ enum VMOpcode {
VM_BEFORE,
VM_AFTER,
VM_CAPTURE,
VM_HIDE,
VM_OTHERWISE,
VM_CHAIN,
VM_EQUAL,

29
vm.c
View File

@ -121,6 +121,25 @@ static const char *match_backref(const char *str, vm_op_t *op, match_t *cap, uns
}
//
// Find the next match after prev (or the first match if prev is NULL)
//
match_t *next_match(def_t *defs, file_t *f, match_t *prev, vm_op_t *op, unsigned int flags)
{
const char *str;
if (prev) {
str = prev->end > prev->start ? prev->end : prev->end + 1;
recycle_if_unused(&prev);
} else {
str = f->contents;
}
for (; str < f->end; ++str) {
match_t *m = match(defs, f, str, op, flags);
if (m) return m;
}
return NULL;
}
//
// Run virtual machine operation against a string and return
// a match struct, or NULL if no match is found.
@ -316,16 +335,6 @@ match_t *match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned in
ADD_OWNER(m->child, p);
return m;
}
case VM_HIDE: {
match_t *p = match(defs, f, str, op->args.pat, flags);
if (p == NULL) return NULL;
match_t *m = new_match();
m->start = str;
m->end = p->end;
m->op = op;
ADD_OWNER(m->child, p);
return m;
}
case VM_OTHERWISE: {
match_t *m = match(defs, f, str, op->args.multiple.first, flags);
if (m == NULL) m = match(defs, f, str, op->args.multiple.second, flags);

2
vm.h
View File

@ -8,6 +8,8 @@
#include "types.h"
__attribute__((nonnull(2,4)))
match_t *next_match(def_t *defs, file_t *f, match_t *prev, vm_op_t *op, unsigned int flags);
__attribute__((hot, nonnull(2,3,4)))
match_t *match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags);
__attribute__((nonnull))