aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bpeg.c12
-rw-r--r--compiler.c4
-rw-r--r--file_loader.c9
-rw-r--r--file_loader.h1
-rw-r--r--grammars/builtins.bpeg12
-rw-r--r--types.h1
-rw-r--r--vm.c98
-rw-r--r--vm.h2
8 files changed, 87 insertions, 52 deletions
diff --git a/bpeg.c b/bpeg.c
index b8b4c0d..237eef4 100644
--- a/bpeg.c
+++ b/bpeg.c
@@ -54,11 +54,7 @@ static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, unsig
file_t *f = load_file(filename);
match_t *m = match(g, f, f->contents, pattern, flags);
if (m != NULL && m->end > m->start + 1) {
- if (filename != NULL) {
- if (isatty(STDOUT_FILENO)) printf("\033[1;4;33m%s\033[0m\n", filename);
- else printf("%s\n", filename);
- }
- print_match(m, isatty(STDOUT_FILENO) ? "\033[0m" : NULL, (flags & BPEG_VERBOSE) != 0);
+ print_match(f, m, isatty(STDOUT_FILENO) ? "\033[0m" : NULL, (flags & BPEG_VERBOSE) != 0);
destroy_file(&f);
return 0;
} else {
@@ -158,6 +154,12 @@ int main(int argc, char *argv[])
}
}
+ if (isatty(STDOUT_FILENO)) {
+ vm_op_t *p = bpeg_pattern(NULL, "(/)");
+ check(p, "Failed to compile is-tty");
+ add_def(g, NULL, "(/)", "is-tty", p);
+ }
+
vm_op_t *pattern = lookup(g, rule);
check(pattern != NULL, "No such rule: '%s'", rule);
diff --git a/compiler.c b/compiler.c
index fe7ad3f..99ff44f 100644
--- a/compiler.c
+++ b/compiler.c
@@ -252,6 +252,10 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
}
// Capture
case '@': {
+ if (matchchar(&str, '@')) { // @@ -> current position
+ op->op = VM_POSITION;
+ break;
+ }
op->op = VM_CAPTURE;
str = after_spaces(str);
if (matchchar(&str, '[')) {
diff --git a/file_loader.c b/file_loader.c
index b7c3817..33339b0 100644
--- a/file_loader.c
+++ b/file_loader.c
@@ -38,9 +38,10 @@ file_t *load_file(const char *filename)
// Calculate line numbers:
size_t linecap = 10;
f->lines = calloc(sizeof(const char*), linecap);
- f->nlines = 1;
+ f->nlines = 0;
char *p = f->contents;
for (size_t n = 0; p && *p; ++n) {
+ ++f->nlines;
if (n >= linecap)
f->lines = realloc(f->lines, sizeof(const char*)*(linecap *= 2));
f->lines[n] = p;
@@ -79,6 +80,12 @@ size_t get_line_number(file_t *f, const char *p)
return 0;
}
+size_t get_char_number(file_t *f, const char *p)
+{
+ size_t linenum = get_line_number(f, p);
+ return 1 + (size_t)(p - f->lines[linenum-1]);
+}
+
const char *get_line(file_t *f, size_t line_number)
{
if (line_number == 0 || line_number > f->nlines) return NULL;
diff --git a/file_loader.h b/file_loader.h
index cb49373..37399ae 100644
--- a/file_loader.h
+++ b/file_loader.h
@@ -16,6 +16,7 @@ typedef struct {
file_t *load_file(const char *filename);
void destroy_file(file_t **f);
size_t get_line_number(file_t *f, const char *p);
+size_t get_char_number(file_t *f, const char *p);
const char *get_line(file_t *f, size_t line_number);
void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *msg);
diff --git a/grammars/builtins.bpeg b/grammars/builtins.bpeg
index 6ba31df..1191bb4 100644
--- a/grammars/builtins.bpeg
+++ b/grammars/builtins.bpeg
@@ -1,11 +1,17 @@
# Meta-rules for acting on everything
pattern = !(/); # Not defined by default
replacement = !(/); # Not defined by default
-replace-all = 1+(...@replacement) ...;
-find-all = 1+find-next%\n 0-1{!<\n => "\n"};
+replace-all = add-filename 1+(...@replacement) ...;
+find-all = add-filename 1+find-next%\n 0-1{!<\n => "\n"};
find-next = matching-line / {..\n =>} find-next;
only-matches = 1+{...@pattern=>'@1\n'};
-matching-line = 1+(..@pattern) ..$;
+matching-line = add-linenum 1+(..@pattern) ..$;
+
+is-tty = !(/); # Defined as either always-match or always-fail, depending on stdout
+print-linenums = is-tty;
+print-filenames = is-tty;
+add-filename = 0-1(print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"}));
+add-linenum = 0-1(print-linenums (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "}));
# Helper definitions (commonly used)
indent = \n|1+(\t/' ');
diff --git a/types.h b/types.h
index de408cc..2bef298 100644
--- a/types.h
+++ b/types.h
@@ -77,7 +77,6 @@ typedef struct vm_op_s {
typedef struct match_s {
// Where the match starts and ends (end is after the last character)
const char *start, *end;
- unsigned int is_capture:1, is_replacement:1, is_ref:1;
const char *name_or_replacement;
struct match_s *child, *nextsibling;
vm_op_t *op;
diff --git a/vm.c b/vm.c
index 637c4b8..da2b7df 100644
--- a/vm.c
+++ b/vm.c
@@ -1,6 +1,9 @@
/*
* vm.c - Code for the BPEG virtual machine that performs the matching.
*/
+
+#include <ctype.h>
+
#include "vm.h"
#include "grammar.h"
#include "utils.h"
@@ -54,7 +57,7 @@ static size_t push_backrefs(grammar_t *g, match_t *m)
if (m == NULL) return 0;
if (m->op->op == VM_REF) return 0;
size_t count = 0;
- if (m->is_capture && m->name_or_replacement) {
+ if (m->op->op == VM_CAPTURE && m->name_or_replacement) {
++count;
push_backref(g, m->name_or_replacement, m->child);
}
@@ -232,7 +235,6 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
m->end = p->end;
m->op = op;
m->child = p;
- m->is_capture = 1;
if (op->args.capture.name)
m->name_or_replacement = op->args.capture.name;
return m;
@@ -292,7 +294,6 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
} else {
m->end = m->start;
}
- m->is_replacement = 1;
m->name_or_replacement = op->args.replace.replacement;
return m;
}
@@ -333,7 +334,6 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
m->op = op;
m->child = best;
m->name_or_replacement = op->args.s;
- m->is_ref = 1;
return m;
}
case VM_BACKREF: {
@@ -470,6 +470,10 @@ void print_pattern(vm_op_t *op)
fprintf(stderr, " with \"%s\"", op->args.replace.replacement);
break;
}
+ case VM_NODENT: {
+ fprintf(stderr, "the start of a line with the same indentation as the previous line");
+ break;
+ }
default: break;
}
}
@@ -481,8 +485,8 @@ static match_t *get_capture_n(match_t *m, int *n)
{
if (!m) return NULL;
if (*n == 0) return m;
- if (m->is_capture && *n == 1) return m;
- if (m->is_capture) --(*n);
+ if (m->op->op == VM_CAPTURE && *n == 1) return m;
+ if (m->op->op == VM_CAPTURE) --(*n);
for (match_t *c = m->child; c; c = c->nextsibling) {
match_t *cap = get_capture_n(c, n);
if (cap) return cap;
@@ -495,7 +499,7 @@ static match_t *get_capture_n(match_t *m, int *n)
*/
static match_t *get_capture_named(match_t *m, const char *name)
{
- if (m->is_capture && m->name_or_replacement && streq(m->name_or_replacement, name))
+ if (m->op->op == VM_CAPTURE && m->name_or_replacement && streq(m->name_or_replacement, name))
return m;
for (match_t *c = m->child; c; c = c->nextsibling) {
match_t *cap = get_capture_named(c, name);
@@ -504,12 +508,30 @@ static match_t *get_capture_named(match_t *m, const char *name)
return NULL;
}
+static match_t *get_cap(match_t *m, const char **r)
+{
+ if (isdigit(**r)) {
+ int n = (int)strtol(*r, (char**)r, 10);
+ return get_capture_n(m->child, &n);
+ } else if (**r == '[') {
+ char *closing = strchr(*r+1, ']');
+ if (!closing) return NULL;
+ ++(*r);
+ char *name = strndup(*r, (size_t)(closing-*r));
+ match_t *cap = get_capture_named(m, name);
+ free(name);
+ *r = closing + 1;
+ return cap;
+ }
+ return NULL;
+}
+
/*
* Print a match with replacements and highlighting.
*/
-void print_match(match_t *m, const char *color, int verbose)
+void print_match(file_t *f, match_t *m, const char *color, int verbose)
{
- if (m->is_replacement) {
+ if (m->op->op == VM_REPLACE) {
if (color) printf("\033[0;34m");
for (const char *r = m->name_or_replacement; *r; ) {
if (*r == '\\') {
@@ -523,42 +545,36 @@ void print_match(match_t *m, const char *color, int verbose)
}
++r;
- match_t *cap = NULL;
- switch (*r) {
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9': {
- int n = (int)strtol(r, (char**)&r, 10);
- cap = get_capture_n(m->child, &n);
- break;
- }
- case '[': {
- char *closing = strchr(r+1, ']');
- if (!closing) {
- fputc('@', stdout);
- break;
- }
- ++r;
- char *name = strndup(r, (size_t)(closing-r));
- cap = get_capture_named(m, name);
- free(name);
- r = closing + 1;
- break;
- }
- default: {
- fputc('@', stdout);
- break;
- }
+ if (*r == '@') {
+ fputc('@', stdout);
+ continue;
}
+ if (*r == '#') {
+ ++r;
+ printf("%ld", get_line_number(f, m->start));
+ continue;
+ } else if (*r == ':') {
+ ++r;
+ printf("%ld", get_char_number(f, m->start));
+ continue;
+ } else if (*r == '&') {
+ ++r;
+ printf("%s", f->filename ? f->filename : "-");
+ continue;
+ }
+ match_t *cap = get_cap(m, &r);
if (cap != NULL) {
- print_match(cap, color ? "\033[0;35m" : NULL, verbose);
+ print_match(f, cap, color ? "\033[0;35m" : NULL, verbose);
if (color) printf("\033[0;34m");
+ } else {
+ fputc('@', stdout);
}
}
} else {
const char *name = m->name_or_replacement;
- if (verbose && m->is_ref && name)
+ if (verbose && m->op->op == VM_REF && name)
printf(color ? "\033[0;2;35m{%s:" : "{%s", name);
- //if (m->is_capture && name)
+ //if (m->op->op == VM_CAPTURE && name)
// printf("\033[0;2;33m[%s:", name);
const char *prev = m->start;
for (match_t *child = m->child; child; child = child->nextsibling) {
@@ -568,14 +584,14 @@ void print_match(match_t *m, const char *color, int verbose)
continue;
if (child->start > prev)
printf("%s%.*s", color ? color : "", (int)(child->start - prev), prev);
- print_match(child, color ? (m->is_capture ? "\033[0;31;1m" : color) : NULL, verbose);
+ print_match(f, child, color ? (m->op->op == VM_CAPTURE ? "\033[0;31;1m" : color) : NULL, verbose);
prev = child->end;
}
if (m->end > prev)
printf("%s%.*s", color ? color : "", (int)(m->end - prev), prev);
- if (verbose && m->is_ref && name)
+ if (verbose && m->op->op == VM_REF && name)
printf(color ? "\033[0;2;35m}" : "}");
- //if (m->is_capture && name)
+ //if (m->op->op == VM_CAPTURE && name)
// printf("\033[0;2;33m]");
}
}
@@ -588,7 +604,7 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsign
ret->op = op;
match_t **dest = &ret->child;
- if (cap->is_replacement) {
+ if (cap->op->op == VM_REPLACE) {
for (const char *r = cap->name_or_replacement; *r; ) {
if (*r == '\\') {
++r;
diff --git a/vm.h b/vm.h
index feb8eeb..2e2f888 100644
--- a/vm.h
+++ b/vm.h
@@ -15,7 +15,7 @@ const char *opcode_name(enum VMOpcode o);
match_t *match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags);
void destroy_match(match_t **m);
void print_pattern(vm_op_t *op);
-void print_match(match_t *m, const char *color, int verbose);
+void print_match(file_t *f, match_t *m, const char *color, int verbose);
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1