diff options
| -rw-r--r-- | bpeg.c | 12 | ||||
| -rw-r--r-- | compiler.c | 4 | ||||
| -rw-r--r-- | file_loader.c | 9 | ||||
| -rw-r--r-- | file_loader.h | 1 | ||||
| -rw-r--r-- | grammars/builtins.bpeg | 12 | ||||
| -rw-r--r-- | types.h | 1 | ||||
| -rw-r--r-- | vm.c | 98 | ||||
| -rw-r--r-- | vm.h | 2 |
8 files changed, 87 insertions, 52 deletions
@@ -54,11 +54,7 @@ static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, unsig file_t *f = load_file(filename); match_t *m = match(g, f, f->contents, pattern, flags); if (m != NULL && m->end > m->start + 1) { - if (filename != NULL) { - if (isatty(STDOUT_FILENO)) printf("\033[1;4;33m%s\033[0m\n", filename); - else printf("%s\n", filename); - } - print_match(m, isatty(STDOUT_FILENO) ? "\033[0m" : NULL, (flags & BPEG_VERBOSE) != 0); + print_match(f, m, isatty(STDOUT_FILENO) ? "\033[0m" : NULL, (flags & BPEG_VERBOSE) != 0); destroy_file(&f); return 0; } else { @@ -158,6 +154,12 @@ int main(int argc, char *argv[]) } } + if (isatty(STDOUT_FILENO)) { + vm_op_t *p = bpeg_pattern(NULL, "(/)"); + check(p, "Failed to compile is-tty"); + add_def(g, NULL, "(/)", "is-tty", p); + } + vm_op_t *pattern = lookup(g, rule); check(pattern != NULL, "No such rule: '%s'", rule); @@ -252,6 +252,10 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) } // Capture case '@': { + if (matchchar(&str, '@')) { // @@ -> current position + op->op = VM_POSITION; + break; + } op->op = VM_CAPTURE; str = after_spaces(str); if (matchchar(&str, '[')) { diff --git a/file_loader.c b/file_loader.c index b7c3817..33339b0 100644 --- a/file_loader.c +++ b/file_loader.c @@ -38,9 +38,10 @@ file_t *load_file(const char *filename) // Calculate line numbers: size_t linecap = 10; f->lines = calloc(sizeof(const char*), linecap); - f->nlines = 1; + f->nlines = 0; char *p = f->contents; for (size_t n = 0; p && *p; ++n) { + ++f->nlines; if (n >= linecap) f->lines = realloc(f->lines, sizeof(const char*)*(linecap *= 2)); f->lines[n] = p; @@ -79,6 +80,12 @@ size_t get_line_number(file_t *f, const char *p) return 0; } +size_t get_char_number(file_t *f, const char *p) +{ + size_t linenum = get_line_number(f, p); + return 1 + (size_t)(p - f->lines[linenum-1]); +} + const char *get_line(file_t *f, size_t line_number) { if (line_number == 0 || line_number > f->nlines) return NULL; diff --git a/file_loader.h b/file_loader.h index cb49373..37399ae 100644 --- a/file_loader.h +++ b/file_loader.h @@ -16,6 +16,7 @@ typedef struct { file_t *load_file(const char *filename); void destroy_file(file_t **f); size_t get_line_number(file_t *f, const char *p); +size_t get_char_number(file_t *f, const char *p); const char *get_line(file_t *f, size_t line_number); void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *msg); diff --git a/grammars/builtins.bpeg b/grammars/builtins.bpeg index 6ba31df..1191bb4 100644 --- a/grammars/builtins.bpeg +++ b/grammars/builtins.bpeg @@ -1,11 +1,17 @@ # Meta-rules for acting on everything pattern = !(/); # Not defined by default replacement = !(/); # Not defined by default -replace-all = 1+(...@replacement) ...; -find-all = 1+find-next%\n 0-1{!<\n => "\n"}; +replace-all = add-filename 1+(...@replacement) ...; +find-all = add-filename 1+find-next%\n 0-1{!<\n => "\n"}; find-next = matching-line / {..\n =>} find-next; only-matches = 1+{...@pattern=>'@1\n'}; -matching-line = 1+(..@pattern) ..$; +matching-line = add-linenum 1+(..@pattern) ..$; + +is-tty = !(/); # Defined as either always-match or always-fail, depending on stdout +print-linenums = is-tty; +print-filenames = is-tty; +add-filename = 0-1(print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"})); +add-linenum = 0-1(print-linenums (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "})); # Helper definitions (commonly used) indent = \n|1+(\t/' '); @@ -77,7 +77,6 @@ typedef struct vm_op_s { typedef struct match_s { // Where the match starts and ends (end is after the last character) const char *start, *end; - unsigned int is_capture:1, is_replacement:1, is_ref:1; const char *name_or_replacement; struct match_s *child, *nextsibling; vm_op_t *op; @@ -1,6 +1,9 @@ /* * vm.c - Code for the BPEG virtual machine that performs the matching. */ + +#include <ctype.h> + #include "vm.h" #include "grammar.h" #include "utils.h" @@ -54,7 +57,7 @@ static size_t push_backrefs(grammar_t *g, match_t *m) if (m == NULL) return 0; if (m->op->op == VM_REF) return 0; size_t count = 0; - if (m->is_capture && m->name_or_replacement) { + if (m->op->op == VM_CAPTURE && m->name_or_replacement) { ++count; push_backref(g, m->name_or_replacement, m->child); } @@ -232,7 +235,6 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un m->end = p->end; m->op = op; m->child = p; - m->is_capture = 1; if (op->args.capture.name) m->name_or_replacement = op->args.capture.name; return m; @@ -292,7 +294,6 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un } else { m->end = m->start; } - m->is_replacement = 1; m->name_or_replacement = op->args.replace.replacement; return m; } @@ -333,7 +334,6 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un m->op = op; m->child = best; m->name_or_replacement = op->args.s; - m->is_ref = 1; return m; } case VM_BACKREF: { @@ -470,6 +470,10 @@ void print_pattern(vm_op_t *op) fprintf(stderr, " with \"%s\"", op->args.replace.replacement); break; } + case VM_NODENT: { + fprintf(stderr, "the start of a line with the same indentation as the previous line"); + break; + } default: break; } } @@ -481,8 +485,8 @@ static match_t *get_capture_n(match_t *m, int *n) { if (!m) return NULL; if (*n == 0) return m; - if (m->is_capture && *n == 1) return m; - if (m->is_capture) --(*n); + if (m->op->op == VM_CAPTURE && *n == 1) return m; + if (m->op->op == VM_CAPTURE) --(*n); for (match_t *c = m->child; c; c = c->nextsibling) { match_t *cap = get_capture_n(c, n); if (cap) return cap; @@ -495,7 +499,7 @@ static match_t *get_capture_n(match_t *m, int *n) */ static match_t *get_capture_named(match_t *m, const char *name) { - if (m->is_capture && m->name_or_replacement && streq(m->name_or_replacement, name)) + if (m->op->op == VM_CAPTURE && m->name_or_replacement && streq(m->name_or_replacement, name)) return m; for (match_t *c = m->child; c; c = c->nextsibling) { match_t *cap = get_capture_named(c, name); @@ -504,12 +508,30 @@ static match_t *get_capture_named(match_t *m, const char *name) return NULL; } +static match_t *get_cap(match_t *m, const char **r) +{ + if (isdigit(**r)) { + int n = (int)strtol(*r, (char**)r, 10); + return get_capture_n(m->child, &n); + } else if (**r == '[') { + char *closing = strchr(*r+1, ']'); + if (!closing) return NULL; + ++(*r); + char *name = strndup(*r, (size_t)(closing-*r)); + match_t *cap = get_capture_named(m, name); + free(name); + *r = closing + 1; + return cap; + } + return NULL; +} + /* * Print a match with replacements and highlighting. */ -void print_match(match_t *m, const char *color, int verbose) +void print_match(file_t *f, match_t *m, const char *color, int verbose) { - if (m->is_replacement) { + if (m->op->op == VM_REPLACE) { if (color) printf("\033[0;34m"); for (const char *r = m->name_or_replacement; *r; ) { if (*r == '\\') { @@ -523,42 +545,36 @@ void print_match(match_t *m, const char *color, int verbose) } ++r; - match_t *cap = NULL; - switch (*r) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': { - int n = (int)strtol(r, (char**)&r, 10); - cap = get_capture_n(m->child, &n); - break; - } - case '[': { - char *closing = strchr(r+1, ']'); - if (!closing) { - fputc('@', stdout); - break; - } - ++r; - char *name = strndup(r, (size_t)(closing-r)); - cap = get_capture_named(m, name); - free(name); - r = closing + 1; - break; - } - default: { - fputc('@', stdout); - break; - } + if (*r == '@') { + fputc('@', stdout); + continue; } + if (*r == '#') { + ++r; + printf("%ld", get_line_number(f, m->start)); + continue; + } else if (*r == ':') { + ++r; + printf("%ld", get_char_number(f, m->start)); + continue; + } else if (*r == '&') { + ++r; + printf("%s", f->filename ? f->filename : "-"); + continue; + } + match_t *cap = get_cap(m, &r); if (cap != NULL) { - print_match(cap, color ? "\033[0;35m" : NULL, verbose); + print_match(f, cap, color ? "\033[0;35m" : NULL, verbose); if (color) printf("\033[0;34m"); + } else { + fputc('@', stdout); } } } else { const char *name = m->name_or_replacement; - if (verbose && m->is_ref && name) + if (verbose && m->op->op == VM_REF && name) printf(color ? "\033[0;2;35m{%s:" : "{%s", name); - //if (m->is_capture && name) + //if (m->op->op == VM_CAPTURE && name) // printf("\033[0;2;33m[%s:", name); const char *prev = m->start; for (match_t *child = m->child; child; child = child->nextsibling) { @@ -568,14 +584,14 @@ void print_match(match_t *m, const char *color, int verbose) continue; if (child->start > prev) printf("%s%.*s", color ? color : "", (int)(child->start - prev), prev); - print_match(child, color ? (m->is_capture ? "\033[0;31;1m" : color) : NULL, verbose); + print_match(f, child, color ? (m->op->op == VM_CAPTURE ? "\033[0;31;1m" : color) : NULL, verbose); prev = child->end; } if (m->end > prev) printf("%s%.*s", color ? color : "", (int)(m->end - prev), prev); - if (verbose && m->is_ref && name) + if (verbose && m->op->op == VM_REF && name) printf(color ? "\033[0;2;35m}" : "}"); - //if (m->is_capture && name) + //if (m->op->op == VM_CAPTURE && name) // printf("\033[0;2;33m]"); } } @@ -588,7 +604,7 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsign ret->op = op; match_t **dest = &ret->child; - if (cap->is_replacement) { + if (cap->op->op == VM_REPLACE) { for (const char *r = cap->name_or_replacement; *r; ) { if (*r == '\\') { ++r; @@ -15,7 +15,7 @@ const char *opcode_name(enum VMOpcode o); match_t *match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags); void destroy_match(match_t **m); void print_pattern(vm_op_t *op); -void print_match(match_t *m, const char *color, int verbose); +void print_match(file_t *f, match_t *m, const char *color, int verbose); #endif // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 |
