aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--grammar.c13
-rw-r--r--grammar.h2
-rw-r--r--viz.c112
-rw-r--r--viz.h3
-rw-r--r--vm.c327
-rw-r--r--vm.h2
6 files changed, 221 insertions, 238 deletions
diff --git a/grammar.c b/grammar.c
index 229fb0e..aa75522 100644
--- a/grammar.c
+++ b/grammar.c
@@ -98,6 +98,19 @@ void push_backref(grammar_t *g, const char *name, match_t *capture)
g->backrefs[i].op = op;
}
+size_t push_backrefs(grammar_t *g, match_t *m)
+{
+ if (m->op->op == VM_REF) return 0;
+ size_t count = 0;
+ if (m->op->op == VM_CAPTURE && m->op->args.capture.name) {
+ ++count;
+ push_backref(g, m->op->args.capture.name, m->child);
+ }
+ if (m->child) count += push_backrefs(g, m->child);
+ if (m->nextsibling) count += push_backrefs(g, m->nextsibling);
+ return count;
+}
+
void pop_backrefs(grammar_t *g, size_t count)
{
check(count <= g->backrefcount, "Attempt to pop %ld backrefs when there are only %ld", count, g->backrefcount);
diff --git a/grammar.h b/grammar.h
index 8e1980d..0e77cad 100644
--- a/grammar.h
+++ b/grammar.h
@@ -13,6 +13,8 @@ void add_def(grammar_t *g, file_t *f, const char *src, const char *name, vm_op_t
__attribute__((nonnull))
void push_backref(grammar_t *g, const char *name, match_t *capture);
__attribute__((nonnull))
+size_t push_backrefs(grammar_t *g, match_t *m);
+__attribute__((nonnull))
void pop_backrefs(grammar_t *g, size_t count);
__attribute__((nonnull))
vm_op_t *load_grammar(grammar_t *g, file_t *f);
diff --git a/viz.c b/viz.c
index 311564b..a4aa565 100644
--- a/viz.c
+++ b/viz.c
@@ -9,7 +9,12 @@
#include "types.h"
#include "utils.h"
#include "viz.h"
+#include "vm.h"
+typedef struct {
+ size_t line, printed_line;
+ const char *color;
+} print_state_t;
static int match_height(match_t *m)
{
@@ -147,4 +152,111 @@ void visualize_match(match_t *m)
printf("\033[?7h");
}
+static void print_line_number(FILE *out, print_state_t *state, print_options_t options)
+{
+ state->printed_line = state->line;
+ if (!(options & PRINT_LINE_NUMBERS)) return;
+ if (options & PRINT_COLOR)
+ fprintf(out, "\033[0;2m% 5ld\033(0\x78\033(B%s", state->line, state->color);
+ else
+ fprintf(out, "% 5ld|", state->line);
+}
+
+/*
+ * Print a match with replacements and highlighting.
+ */
+static void _print_match(FILE *out, file_t *f, match_t *m, print_state_t *state, print_options_t options)
+{
+ static const char *hl = "\033[0;31;1m";
+ const char *old_color = state->color;
+ if (m->op->op == VM_HIDE) {
+ // TODO: handle replacements?
+ for (const char *p = m->start; p < m->end; p++) {
+ if (*p == '\n') ++state->line;
+ }
+ } else if (m->op->op == VM_REPLACE) {
+ if (options & PRINT_COLOR && state->color != hl) {
+ state->color = hl;
+ fprintf(out, "%s", state->color);
+ }
+ const char *text = m->op->args.replace.text;
+ const char *end = &text[m->op->args.replace.len];
+ for (const char *r = text; r < end; ) {
+ if (*r == '@' && r[1] && r[1] != '@') {
+ ++r;
+ match_t *cap = get_capture(m, &r);
+ if (cap != NULL) {
+ _print_match(out, f, cap, state, options);
+ continue;
+ } else {
+ --r;
+ }
+ }
+
+ if (state->printed_line != state->line)
+ print_line_number(out, state, options);
+
+ if (*r == '\\') {
+ ++r;
+ unsigned char c = unescapechar(r, &r);
+ fputc(c, out);
+ if (c == '\n') ++state->line;
+ continue;
+ } else if (*r == '\n') {
+ fputc('\n', out);
+ ++state->line;
+ ++r;
+ continue;
+ } else {
+ fputc(*r, out);
+ ++r;
+ continue;
+ }
+ }
+ } else {
+ if (m->op->op == VM_CAPTURE) {
+ if (options & PRINT_COLOR && state->color != hl) {
+ state->color = hl;
+ fprintf(out, "%s", state->color);
+ }
+ }
+
+ const char *prev = m->start;
+ for (match_t *child = m->child; child; child = child->nextsibling) {
+ // Skip children from e.g. zero-width matches like >@foo
+ if (!(prev <= child->start && child->start <= m->end &&
+ prev <= child->end && child->end <= m->end))
+ continue;
+ if (child->start > prev) {
+ for (const char *p = prev; p < child->start; ++p) {
+ if (state->printed_line != state->line)
+ print_line_number(out, state, options);
+ fputc(*p, out);
+ if (*p == '\n') ++state->line;
+ }
+ }
+ _print_match(out, f, child, state, options);
+ prev = child->end;
+ }
+ if (m->end > prev) {
+ for (const char *p = prev; p < m->end; ++p) {
+ if (state->printed_line != state->line)
+ print_line_number(out, state, options);
+ fputc(*p, out);
+ if (*p == '\n') ++state->line;
+ }
+ }
+ }
+ if (options & PRINT_COLOR && old_color != state->color) {
+ fprintf(out, "%s", old_color);
+ state->color = old_color;
+ }
+}
+
+void print_match(FILE *out, file_t *f, match_t *m, print_options_t options)
+{
+ print_state_t state = {.line = 1, .color = "\033[0m"};
+ _print_match(out, f, m, &state, options);
+}
+
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/viz.h b/viz.h
index a918661..8b11bcc 100644
--- a/viz.h
+++ b/viz.h
@@ -9,7 +9,10 @@ typedef struct match_node_s {
struct match_node_s *next;
} match_node_t;
+__attribute__((nonnull))
void visualize_match(match_t *m);
+__attribute__((nonnull))
+void print_match(FILE *out, file_t *f, match_t *m, print_options_t options);
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/vm.c b/vm.c
index 0bbb013..b81fd2c 100644
--- a/vm.c
+++ b/vm.c
@@ -12,10 +12,6 @@
#include "utils.h"
#include "vm.h"
-static match_t *match_backref(const char *str, vm_op_t *op, match_t *m, unsigned int flags);
-static size_t push_backrefs(grammar_t *g, match_t *m);
-static match_t *get_capture_n(match_t *m, int *n);
-static match_t *get_capture_named(match_t *m, const char *name);
/*
* The names of the opcodes (keep in sync with the enum definition above)
@@ -74,20 +70,6 @@ void destroy_match(match_t **m)
*m = NULL;
}
-static size_t push_backrefs(grammar_t *g, match_t *m)
-{
- if (m == NULL) return 0;
- if (m->op->op == VM_REF) return 0;
- size_t count = 0;
- if (m->op->op == VM_CAPTURE && m->op->args.capture.name) {
- ++count;
- push_backref(g, m->op->args.capture.name, m->child);
- }
- if (m->child) count += push_backrefs(g, m->child);
- if (m->nextsibling) count += push_backrefs(g, m->nextsibling);
- return count;
-}
-
typedef struct recursive_ref_s {
const vm_op_t *op;
const char *pos;
@@ -96,6 +78,85 @@ typedef struct recursive_ref_s {
match_t *result;
} recursive_ref_t;
+/*
+ * Attempt to match text against a previously captured value.
+ */
+static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int flags)
+{
+ check(op->op == VM_BACKREF, "Attempt to match backref against something that's not a backref");
+ match_t *ret = new(match_t);
+ ret->start = str;
+ ret->op = op;
+ match_t **dest = &ret->child;
+
+ if (cap->op->op == VM_REPLACE) {
+ const char *text = cap->op->args.replace.text;
+ const char *end = &text[cap->op->args.replace.len];
+ for (const char *r = text; r < end; ) {
+ if (*r == '\\') {
+ ++r;
+ if (*(str++) != unescapechar(r, &r)) {
+ destroy_match(&ret);
+ return NULL;
+ }
+ } else if (*r != '@') {
+ if (*(str++) != *r) {
+ destroy_match(&ret);
+ return NULL;
+ }
+ ++r;
+ continue;
+ }
+
+ ++r;
+ match_t *value = get_capture(cap, &r);
+ if (value != NULL) {
+ *dest = match_backref(str, op, value, flags);
+ if (*dest == NULL) {
+ destroy_match(&ret);
+ return NULL;
+ }
+ str = (*dest)->end;
+ dest = &(*dest)->nextsibling;
+ }
+ }
+ } else {
+ const char *prev = cap->start;
+ for (match_t *child = cap->child; child; child = child->nextsibling) {
+ if (child->start > prev) {
+ size_t len = (size_t)(child->start - prev);
+ if ((flags & BP_IGNORECASE) ? memicmp(str, prev, len) != 0
+ : memcmp(str, prev, len) != 0) {
+ destroy_match(&ret);
+ return NULL;
+ }
+ str += len;
+ prev = child->start;
+ }
+ if (child->start < prev) continue;
+ *dest = match_backref(str, op, child, flags);
+ if (*dest == NULL) {
+ destroy_match(&ret);
+ return NULL;
+ }
+ str = (*dest)->end;
+ dest = &(*dest)->nextsibling;
+ prev = child->end;
+ }
+ if (cap->end > prev) {
+ size_t len = (size_t)(cap->end - prev);
+ if ((flags & BP_IGNORECASE) ? memicmp(str, prev, len) != 0
+ : memcmp(str, prev, len) != 0) {
+ destroy_match(&ret);
+ return NULL;
+ }
+ str += len;
+ }
+ }
+ ret->end = str;
+ return ret;
+}
+
/*
* Run virtual machine operation against a string and return
@@ -434,48 +495,49 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
}
}
-
/*
* Get a specific numbered pattern capture.
*/
-static match_t *get_capture_n(match_t *m, int *n)
+static match_t *get_capture_by_num(match_t *m, int *n)
{
- if (!m) return NULL;
if (*n == 0) return m;
if (m->op->op == VM_CAPTURE && *n == 1) return m;
if (m->op->op == VM_CAPTURE) --(*n);
for (match_t *c = m->child; c; c = c->nextsibling) {
- match_t *cap = get_capture_n(c, n);
+ match_t *cap = get_capture_by_num(c, n);
if (cap) return cap;
}
return NULL;
}
/*
- * Get a named capture.
+ * Get a capture with a specific name.
*/
-static match_t *get_capture_named(match_t *m, const char *name)
+static match_t *get_capture_by_name(match_t *m, const char *name)
{
if (m->op->op == VM_CAPTURE && m->op->args.capture.name
&& streq(m->op->args.capture.name, name))
return m;
for (match_t *c = m->child; c; c = c->nextsibling) {
- match_t *cap = get_capture_named(c, name);
+ match_t *cap = get_capture_by_name(c, name);
if (cap) return cap;
}
return NULL;
}
-static match_t *get_cap(match_t *m, const char **r)
+/*
+ * Get a capture by name.
+ */
+match_t *get_capture(match_t *m, const char **r)
{
if (isdigit(**r)) {
int n = (int)strtol(*r, (char**)r, 10);
- return get_capture_n(m->child, &n);
+ return get_capture_by_num(m->child, &n);
} else {
const char *end = after_name(*r);
if (end == *r) return NULL;
char *name = strndup(*r, (size_t)(end-*r));
- match_t *cap = get_capture_named(m, name);
+ match_t *cap = get_capture_by_name(m, name);
free(name);
*r = end;
if (**r == ';') ++(*r);
@@ -484,215 +546,6 @@ static match_t *get_cap(match_t *m, const char **r)
return NULL;
}
-typedef struct {
- size_t line, printed_line;
- const char *color;
-} print_state_t;
-
-static void print_line_number(FILE *out, print_state_t *state, print_options_t options)
-{
- state->printed_line = state->line;
- if (!(options & PRINT_LINE_NUMBERS)) return;
- if (options & PRINT_COLOR)
- fprintf(out, "\033[0;2m% 5ld\033(0\x78\033(B%s", state->line, state->color);
- else
- fprintf(out, "% 5ld|", state->line);
-}
-
-/*
- * Print a match with replacements and highlighting.
- */
-static void _print_match(FILE *out, file_t *f, match_t *m, print_state_t *state, print_options_t options)
-{
- static const char *hl = "\033[0;31;1m";
- const char *old_color = state->color;
- if (m->op->op == VM_HIDE) {
- // TODO: handle replacements?
- for (const char *p = m->start; p < m->end; p++) {
- if (*p == '\n') ++state->line;
- }
- } else if (m->op->op == VM_REPLACE) {
- if (options & PRINT_COLOR && state->color != hl) {
- state->color = hl;
- fprintf(out, "%s", state->color);
- }
- const char *text = m->op->args.replace.text;
- const char *end = &text[m->op->args.replace.len];
- for (const char *r = text; r < end; ) {
- if (*r == '@' && r[1] && r[1] != '@') {
- ++r;
- match_t *cap = get_cap(m, &r);
- if (cap != NULL) {
- _print_match(out, f, cap, state, options);
- continue;
- } else {
- --r;
- }
- }
-
- if (state->printed_line != state->line)
- print_line_number(out, state, options);
-
- if (*r == '\\') {
- ++r;
- unsigned char c = unescapechar(r, &r);
- fputc(c, out);
- if (c == '\n') ++state->line;
- continue;
- } else if (*r == '\n') {
- fputc('\n', out);
- ++state->line;
- ++r;
- continue;
- } else {
- fputc(*r, out);
- ++r;
- continue;
- }
- }
- } else {
- if (m->op->op == VM_CAPTURE) {
- if (options & PRINT_COLOR && state->color != hl) {
- state->color = hl;
- fprintf(out, "%s", state->color);
- }
- }
-
- const char *prev = m->start;
- for (match_t *child = m->child; child; child = child->nextsibling) {
- // Skip children from e.g. zero-width matches like >@foo
- if (!(prev <= child->start && child->start <= m->end &&
- prev <= child->end && child->end <= m->end))
- continue;
- if (child->start > prev) {
- for (const char *p = prev; p < child->start; ++p) {
- if (state->printed_line != state->line)
- print_line_number(out, state, options);
- fputc(*p, out);
- if (*p == '\n') ++state->line;
- }
- }
- _print_match(out, f, child, state, options);
- prev = child->end;
- }
- if (m->end > prev) {
- for (const char *p = prev; p < m->end; ++p) {
- if (state->printed_line != state->line)
- print_line_number(out, state, options);
- fputc(*p, out);
- if (*p == '\n') ++state->line;
- }
- }
- }
- if (options & PRINT_COLOR && old_color != state->color) {
- fprintf(out, "%s", old_color);
- state->color = old_color;
- }
-}
-
-void print_match(FILE *out, file_t *f, match_t *m, print_options_t options)
-{
- print_state_t state = {.line = 1, .color = "\033[0m"};
- _print_match(out, f, m, &state, options);
-}
-
-static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int flags)
-{
- check(op->op == VM_BACKREF, "Attempt to match backref against something that's not a backref");
- match_t *ret = new(match_t);
- ret->start = str;
- ret->op = op;
- match_t **dest = &ret->child;
-
- if (cap->op->op == VM_REPLACE) {
- const char *text = cap->op->args.replace.text;
- const char *end = &text[cap->op->args.replace.len];
- for (const char *r = text; r < end; ) {
- if (*r == '\\') {
- ++r;
- if (*(str++) != unescapechar(r, &r)) {
- destroy_match(&ret);
- return NULL;
- }
- } else if (*r != '@') {
- if (*(str++) != *r) {
- destroy_match(&ret);
- return NULL;
- }
- ++r;
- continue;
- }
-
- ++r;
- match_t *cap = NULL;
- switch (*r) {
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9': {
- int n = (int)strtol(r, (char**)&r, 10);
- cap = get_capture_n(cap->child, &n);
- break;
- }
- default: {
- const char *end = after_name(r);
- if (end == r) {
- destroy_match(&ret);
- return NULL;
- }
- char *name = strndup(r, (size_t)(end-r));
- cap = get_capture_named(cap, name);
- free(name);
- r = end;
- if (*r == ';') ++r;
- break;
- }
- }
- if (cap != NULL) {
- *dest = match_backref(str, op, cap, flags);
- if (*dest == NULL) {
- destroy_match(&ret);
- return NULL;
- }
- str = (*dest)->end;
- dest = &(*dest)->nextsibling;
- }
- }
- } else {
- const char *prev = cap->start;
- for (match_t *child = cap->child; child; child = child->nextsibling) {
- if (child->start > prev) {
- size_t len = (size_t)(child->start - prev);
- if ((flags & BP_IGNORECASE) ? memicmp(str, prev, len) != 0
- : memcmp(str, prev, len) != 0) {
- destroy_match(&ret);
- return NULL;
- }
- str += len;
- prev = child->start;
- }
- if (child->start < prev) continue;
- *dest = match_backref(str, op, child, flags);
- if (*dest == NULL) {
- destroy_match(&ret);
- return NULL;
- }
- str = (*dest)->end;
- dest = &(*dest)->nextsibling;
- prev = child->end;
- }
- if (cap->end > prev) {
- size_t len = (size_t)(cap->end - prev);
- if ((flags & BP_IGNORECASE) ? memicmp(str, prev, len) != 0
- : memcmp(str, prev, len) != 0) {
- destroy_match(&ret);
- return NULL;
- }
- str += len;
- }
- }
- ret->end = str;
- return ret;
-}
-
match_t *match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags)
{
return _match(g, f, str, op, flags, NULL);
diff --git a/vm.h b/vm.h
index 758b9d5..2c17743 100644
--- a/vm.h
+++ b/vm.h
@@ -19,7 +19,7 @@ match_t *match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned i
__attribute__((nonnull))
void destroy_match(match_t **m);
__attribute__((nonnull))
-void print_match(FILE *out, file_t *f, match_t *m, print_options_t options);
+match_t *get_capture(match_t *m, const char **r);
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1