Refactoring, moving visualization and virtual machine code into proper
files
This commit is contained in:
parent
7da6cde0fd
commit
668c7baf55
13
grammar.c
13
grammar.c
@ -98,6 +98,19 @@ void push_backref(grammar_t *g, const char *name, match_t *capture)
|
||||
g->backrefs[i].op = op;
|
||||
}
|
||||
|
||||
size_t push_backrefs(grammar_t *g, match_t *m)
|
||||
{
|
||||
if (m->op->op == VM_REF) return 0;
|
||||
size_t count = 0;
|
||||
if (m->op->op == VM_CAPTURE && m->op->args.capture.name) {
|
||||
++count;
|
||||
push_backref(g, m->op->args.capture.name, m->child);
|
||||
}
|
||||
if (m->child) count += push_backrefs(g, m->child);
|
||||
if (m->nextsibling) count += push_backrefs(g, m->nextsibling);
|
||||
return count;
|
||||
}
|
||||
|
||||
void pop_backrefs(grammar_t *g, size_t count)
|
||||
{
|
||||
check(count <= g->backrefcount, "Attempt to pop %ld backrefs when there are only %ld", count, g->backrefcount);
|
||||
|
@ -13,6 +13,8 @@ void add_def(grammar_t *g, file_t *f, const char *src, const char *name, vm_op_t
|
||||
__attribute__((nonnull))
|
||||
void push_backref(grammar_t *g, const char *name, match_t *capture);
|
||||
__attribute__((nonnull))
|
||||
size_t push_backrefs(grammar_t *g, match_t *m);
|
||||
__attribute__((nonnull))
|
||||
void pop_backrefs(grammar_t *g, size_t count);
|
||||
__attribute__((nonnull))
|
||||
vm_op_t *load_grammar(grammar_t *g, file_t *f);
|
||||
|
112
viz.c
112
viz.c
@ -9,7 +9,12 @@
|
||||
#include "types.h"
|
||||
#include "utils.h"
|
||||
#include "viz.h"
|
||||
#include "vm.h"
|
||||
|
||||
typedef struct {
|
||||
size_t line, printed_line;
|
||||
const char *color;
|
||||
} print_state_t;
|
||||
|
||||
static int match_height(match_t *m)
|
||||
{
|
||||
@ -147,4 +152,111 @@ void visualize_match(match_t *m)
|
||||
printf("\033[?7h");
|
||||
}
|
||||
|
||||
static void print_line_number(FILE *out, print_state_t *state, print_options_t options)
|
||||
{
|
||||
state->printed_line = state->line;
|
||||
if (!(options & PRINT_LINE_NUMBERS)) return;
|
||||
if (options & PRINT_COLOR)
|
||||
fprintf(out, "\033[0;2m% 5ld\033(0\x78\033(B%s", state->line, state->color);
|
||||
else
|
||||
fprintf(out, "% 5ld|", state->line);
|
||||
}
|
||||
|
||||
/*
|
||||
* Print a match with replacements and highlighting.
|
||||
*/
|
||||
static void _print_match(FILE *out, file_t *f, match_t *m, print_state_t *state, print_options_t options)
|
||||
{
|
||||
static const char *hl = "\033[0;31;1m";
|
||||
const char *old_color = state->color;
|
||||
if (m->op->op == VM_HIDE) {
|
||||
// TODO: handle replacements?
|
||||
for (const char *p = m->start; p < m->end; p++) {
|
||||
if (*p == '\n') ++state->line;
|
||||
}
|
||||
} else if (m->op->op == VM_REPLACE) {
|
||||
if (options & PRINT_COLOR && state->color != hl) {
|
||||
state->color = hl;
|
||||
fprintf(out, "%s", state->color);
|
||||
}
|
||||
const char *text = m->op->args.replace.text;
|
||||
const char *end = &text[m->op->args.replace.len];
|
||||
for (const char *r = text; r < end; ) {
|
||||
if (*r == '@' && r[1] && r[1] != '@') {
|
||||
++r;
|
||||
match_t *cap = get_capture(m, &r);
|
||||
if (cap != NULL) {
|
||||
_print_match(out, f, cap, state, options);
|
||||
continue;
|
||||
} else {
|
||||
--r;
|
||||
}
|
||||
}
|
||||
|
||||
if (state->printed_line != state->line)
|
||||
print_line_number(out, state, options);
|
||||
|
||||
if (*r == '\\') {
|
||||
++r;
|
||||
unsigned char c = unescapechar(r, &r);
|
||||
fputc(c, out);
|
||||
if (c == '\n') ++state->line;
|
||||
continue;
|
||||
} else if (*r == '\n') {
|
||||
fputc('\n', out);
|
||||
++state->line;
|
||||
++r;
|
||||
continue;
|
||||
} else {
|
||||
fputc(*r, out);
|
||||
++r;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (m->op->op == VM_CAPTURE) {
|
||||
if (options & PRINT_COLOR && state->color != hl) {
|
||||
state->color = hl;
|
||||
fprintf(out, "%s", state->color);
|
||||
}
|
||||
}
|
||||
|
||||
const char *prev = m->start;
|
||||
for (match_t *child = m->child; child; child = child->nextsibling) {
|
||||
// Skip children from e.g. zero-width matches like >@foo
|
||||
if (!(prev <= child->start && child->start <= m->end &&
|
||||
prev <= child->end && child->end <= m->end))
|
||||
continue;
|
||||
if (child->start > prev) {
|
||||
for (const char *p = prev; p < child->start; ++p) {
|
||||
if (state->printed_line != state->line)
|
||||
print_line_number(out, state, options);
|
||||
fputc(*p, out);
|
||||
if (*p == '\n') ++state->line;
|
||||
}
|
||||
}
|
||||
_print_match(out, f, child, state, options);
|
||||
prev = child->end;
|
||||
}
|
||||
if (m->end > prev) {
|
||||
for (const char *p = prev; p < m->end; ++p) {
|
||||
if (state->printed_line != state->line)
|
||||
print_line_number(out, state, options);
|
||||
fputc(*p, out);
|
||||
if (*p == '\n') ++state->line;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (options & PRINT_COLOR && old_color != state->color) {
|
||||
fprintf(out, "%s", old_color);
|
||||
state->color = old_color;
|
||||
}
|
||||
}
|
||||
|
||||
void print_match(FILE *out, file_t *f, match_t *m, print_options_t options)
|
||||
{
|
||||
print_state_t state = {.line = 1, .color = "\033[0m"};
|
||||
_print_match(out, f, m, &state, options);
|
||||
}
|
||||
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
||||
|
3
viz.h
3
viz.h
@ -9,7 +9,10 @@ typedef struct match_node_s {
|
||||
struct match_node_s *next;
|
||||
} match_node_t;
|
||||
|
||||
__attribute__((nonnull))
|
||||
void visualize_match(match_t *m);
|
||||
__attribute__((nonnull))
|
||||
void print_match(FILE *out, file_t *f, match_t *m, print_options_t options);
|
||||
|
||||
#endif
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
||||
|
327
vm.c
327
vm.c
@ -12,10 +12,6 @@
|
||||
#include "utils.h"
|
||||
#include "vm.h"
|
||||
|
||||
static match_t *match_backref(const char *str, vm_op_t *op, match_t *m, unsigned int flags);
|
||||
static size_t push_backrefs(grammar_t *g, match_t *m);
|
||||
static match_t *get_capture_n(match_t *m, int *n);
|
||||
static match_t *get_capture_named(match_t *m, const char *name);
|
||||
|
||||
/*
|
||||
* The names of the opcodes (keep in sync with the enum definition above)
|
||||
@ -74,20 +70,6 @@ void destroy_match(match_t **m)
|
||||
*m = NULL;
|
||||
}
|
||||
|
||||
static size_t push_backrefs(grammar_t *g, match_t *m)
|
||||
{
|
||||
if (m == NULL) return 0;
|
||||
if (m->op->op == VM_REF) return 0;
|
||||
size_t count = 0;
|
||||
if (m->op->op == VM_CAPTURE && m->op->args.capture.name) {
|
||||
++count;
|
||||
push_backref(g, m->op->args.capture.name, m->child);
|
||||
}
|
||||
if (m->child) count += push_backrefs(g, m->child);
|
||||
if (m->nextsibling) count += push_backrefs(g, m->nextsibling);
|
||||
return count;
|
||||
}
|
||||
|
||||
typedef struct recursive_ref_s {
|
||||
const vm_op_t *op;
|
||||
const char *pos;
|
||||
@ -96,6 +78,85 @@ typedef struct recursive_ref_s {
|
||||
match_t *result;
|
||||
} recursive_ref_t;
|
||||
|
||||
/*
|
||||
* Attempt to match text against a previously captured value.
|
||||
*/
|
||||
static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int flags)
|
||||
{
|
||||
check(op->op == VM_BACKREF, "Attempt to match backref against something that's not a backref");
|
||||
match_t *ret = new(match_t);
|
||||
ret->start = str;
|
||||
ret->op = op;
|
||||
match_t **dest = &ret->child;
|
||||
|
||||
if (cap->op->op == VM_REPLACE) {
|
||||
const char *text = cap->op->args.replace.text;
|
||||
const char *end = &text[cap->op->args.replace.len];
|
||||
for (const char *r = text; r < end; ) {
|
||||
if (*r == '\\') {
|
||||
++r;
|
||||
if (*(str++) != unescapechar(r, &r)) {
|
||||
destroy_match(&ret);
|
||||
return NULL;
|
||||
}
|
||||
} else if (*r != '@') {
|
||||
if (*(str++) != *r) {
|
||||
destroy_match(&ret);
|
||||
return NULL;
|
||||
}
|
||||
++r;
|
||||
continue;
|
||||
}
|
||||
|
||||
++r;
|
||||
match_t *value = get_capture(cap, &r);
|
||||
if (value != NULL) {
|
||||
*dest = match_backref(str, op, value, flags);
|
||||
if (*dest == NULL) {
|
||||
destroy_match(&ret);
|
||||
return NULL;
|
||||
}
|
||||
str = (*dest)->end;
|
||||
dest = &(*dest)->nextsibling;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const char *prev = cap->start;
|
||||
for (match_t *child = cap->child; child; child = child->nextsibling) {
|
||||
if (child->start > prev) {
|
||||
size_t len = (size_t)(child->start - prev);
|
||||
if ((flags & BP_IGNORECASE) ? memicmp(str, prev, len) != 0
|
||||
: memcmp(str, prev, len) != 0) {
|
||||
destroy_match(&ret);
|
||||
return NULL;
|
||||
}
|
||||
str += len;
|
||||
prev = child->start;
|
||||
}
|
||||
if (child->start < prev) continue;
|
||||
*dest = match_backref(str, op, child, flags);
|
||||
if (*dest == NULL) {
|
||||
destroy_match(&ret);
|
||||
return NULL;
|
||||
}
|
||||
str = (*dest)->end;
|
||||
dest = &(*dest)->nextsibling;
|
||||
prev = child->end;
|
||||
}
|
||||
if (cap->end > prev) {
|
||||
size_t len = (size_t)(cap->end - prev);
|
||||
if ((flags & BP_IGNORECASE) ? memicmp(str, prev, len) != 0
|
||||
: memcmp(str, prev, len) != 0) {
|
||||
destroy_match(&ret);
|
||||
return NULL;
|
||||
}
|
||||
str += len;
|
||||
}
|
||||
}
|
||||
ret->end = str;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Run virtual machine operation against a string and return
|
||||
@ -434,48 +495,49 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Get a specific numbered pattern capture.
|
||||
*/
|
||||
static match_t *get_capture_n(match_t *m, int *n)
|
||||
static match_t *get_capture_by_num(match_t *m, int *n)
|
||||
{
|
||||
if (!m) return NULL;
|
||||
if (*n == 0) return m;
|
||||
if (m->op->op == VM_CAPTURE && *n == 1) return m;
|
||||
if (m->op->op == VM_CAPTURE) --(*n);
|
||||
for (match_t *c = m->child; c; c = c->nextsibling) {
|
||||
match_t *cap = get_capture_n(c, n);
|
||||
match_t *cap = get_capture_by_num(c, n);
|
||||
if (cap) return cap;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a named capture.
|
||||
* Get a capture with a specific name.
|
||||
*/
|
||||
static match_t *get_capture_named(match_t *m, const char *name)
|
||||
static match_t *get_capture_by_name(match_t *m, const char *name)
|
||||
{
|
||||
if (m->op->op == VM_CAPTURE && m->op->args.capture.name
|
||||
&& streq(m->op->args.capture.name, name))
|
||||
return m;
|
||||
for (match_t *c = m->child; c; c = c->nextsibling) {
|
||||
match_t *cap = get_capture_named(c, name);
|
||||
match_t *cap = get_capture_by_name(c, name);
|
||||
if (cap) return cap;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static match_t *get_cap(match_t *m, const char **r)
|
||||
/*
|
||||
* Get a capture by name.
|
||||
*/
|
||||
match_t *get_capture(match_t *m, const char **r)
|
||||
{
|
||||
if (isdigit(**r)) {
|
||||
int n = (int)strtol(*r, (char**)r, 10);
|
||||
return get_capture_n(m->child, &n);
|
||||
return get_capture_by_num(m->child, &n);
|
||||
} else {
|
||||
const char *end = after_name(*r);
|
||||
if (end == *r) return NULL;
|
||||
char *name = strndup(*r, (size_t)(end-*r));
|
||||
match_t *cap = get_capture_named(m, name);
|
||||
match_t *cap = get_capture_by_name(m, name);
|
||||
free(name);
|
||||
*r = end;
|
||||
if (**r == ';') ++(*r);
|
||||
@ -484,215 +546,6 @@ static match_t *get_cap(match_t *m, const char **r)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
size_t line, printed_line;
|
||||
const char *color;
|
||||
} print_state_t;
|
||||
|
||||
static void print_line_number(FILE *out, print_state_t *state, print_options_t options)
|
||||
{
|
||||
state->printed_line = state->line;
|
||||
if (!(options & PRINT_LINE_NUMBERS)) return;
|
||||
if (options & PRINT_COLOR)
|
||||
fprintf(out, "\033[0;2m% 5ld\033(0\x78\033(B%s", state->line, state->color);
|
||||
else
|
||||
fprintf(out, "% 5ld|", state->line);
|
||||
}
|
||||
|
||||
/*
|
||||
* Print a match with replacements and highlighting.
|
||||
*/
|
||||
static void _print_match(FILE *out, file_t *f, match_t *m, print_state_t *state, print_options_t options)
|
||||
{
|
||||
static const char *hl = "\033[0;31;1m";
|
||||
const char *old_color = state->color;
|
||||
if (m->op->op == VM_HIDE) {
|
||||
// TODO: handle replacements?
|
||||
for (const char *p = m->start; p < m->end; p++) {
|
||||
if (*p == '\n') ++state->line;
|
||||
}
|
||||
} else if (m->op->op == VM_REPLACE) {
|
||||
if (options & PRINT_COLOR && state->color != hl) {
|
||||
state->color = hl;
|
||||
fprintf(out, "%s", state->color);
|
||||
}
|
||||
const char *text = m->op->args.replace.text;
|
||||
const char *end = &text[m->op->args.replace.len];
|
||||
for (const char *r = text; r < end; ) {
|
||||
if (*r == '@' && r[1] && r[1] != '@') {
|
||||
++r;
|
||||
match_t *cap = get_cap(m, &r);
|
||||
if (cap != NULL) {
|
||||
_print_match(out, f, cap, state, options);
|
||||
continue;
|
||||
} else {
|
||||
--r;
|
||||
}
|
||||
}
|
||||
|
||||
if (state->printed_line != state->line)
|
||||
print_line_number(out, state, options);
|
||||
|
||||
if (*r == '\\') {
|
||||
++r;
|
||||
unsigned char c = unescapechar(r, &r);
|
||||
fputc(c, out);
|
||||
if (c == '\n') ++state->line;
|
||||
continue;
|
||||
} else if (*r == '\n') {
|
||||
fputc('\n', out);
|
||||
++state->line;
|
||||
++r;
|
||||
continue;
|
||||
} else {
|
||||
fputc(*r, out);
|
||||
++r;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (m->op->op == VM_CAPTURE) {
|
||||
if (options & PRINT_COLOR && state->color != hl) {
|
||||
state->color = hl;
|
||||
fprintf(out, "%s", state->color);
|
||||
}
|
||||
}
|
||||
|
||||
const char *prev = m->start;
|
||||
for (match_t *child = m->child; child; child = child->nextsibling) {
|
||||
// Skip children from e.g. zero-width matches like >@foo
|
||||
if (!(prev <= child->start && child->start <= m->end &&
|
||||
prev <= child->end && child->end <= m->end))
|
||||
continue;
|
||||
if (child->start > prev) {
|
||||
for (const char *p = prev; p < child->start; ++p) {
|
||||
if (state->printed_line != state->line)
|
||||
print_line_number(out, state, options);
|
||||
fputc(*p, out);
|
||||
if (*p == '\n') ++state->line;
|
||||
}
|
||||
}
|
||||
_print_match(out, f, child, state, options);
|
||||
prev = child->end;
|
||||
}
|
||||
if (m->end > prev) {
|
||||
for (const char *p = prev; p < m->end; ++p) {
|
||||
if (state->printed_line != state->line)
|
||||
print_line_number(out, state, options);
|
||||
fputc(*p, out);
|
||||
if (*p == '\n') ++state->line;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (options & PRINT_COLOR && old_color != state->color) {
|
||||
fprintf(out, "%s", old_color);
|
||||
state->color = old_color;
|
||||
}
|
||||
}
|
||||
|
||||
void print_match(FILE *out, file_t *f, match_t *m, print_options_t options)
|
||||
{
|
||||
print_state_t state = {.line = 1, .color = "\033[0m"};
|
||||
_print_match(out, f, m, &state, options);
|
||||
}
|
||||
|
||||
static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int flags)
|
||||
{
|
||||
check(op->op == VM_BACKREF, "Attempt to match backref against something that's not a backref");
|
||||
match_t *ret = new(match_t);
|
||||
ret->start = str;
|
||||
ret->op = op;
|
||||
match_t **dest = &ret->child;
|
||||
|
||||
if (cap->op->op == VM_REPLACE) {
|
||||
const char *text = cap->op->args.replace.text;
|
||||
const char *end = &text[cap->op->args.replace.len];
|
||||
for (const char *r = text; r < end; ) {
|
||||
if (*r == '\\') {
|
||||
++r;
|
||||
if (*(str++) != unescapechar(r, &r)) {
|
||||
destroy_match(&ret);
|
||||
return NULL;
|
||||
}
|
||||
} else if (*r != '@') {
|
||||
if (*(str++) != *r) {
|
||||
destroy_match(&ret);
|
||||
return NULL;
|
||||
}
|
||||
++r;
|
||||
continue;
|
||||
}
|
||||
|
||||
++r;
|
||||
match_t *cap = NULL;
|
||||
switch (*r) {
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9': {
|
||||
int n = (int)strtol(r, (char**)&r, 10);
|
||||
cap = get_capture_n(cap->child, &n);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
const char *end = after_name(r);
|
||||
if (end == r) {
|
||||
destroy_match(&ret);
|
||||
return NULL;
|
||||
}
|
||||
char *name = strndup(r, (size_t)(end-r));
|
||||
cap = get_capture_named(cap, name);
|
||||
free(name);
|
||||
r = end;
|
||||
if (*r == ';') ++r;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (cap != NULL) {
|
||||
*dest = match_backref(str, op, cap, flags);
|
||||
if (*dest == NULL) {
|
||||
destroy_match(&ret);
|
||||
return NULL;
|
||||
}
|
||||
str = (*dest)->end;
|
||||
dest = &(*dest)->nextsibling;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const char *prev = cap->start;
|
||||
for (match_t *child = cap->child; child; child = child->nextsibling) {
|
||||
if (child->start > prev) {
|
||||
size_t len = (size_t)(child->start - prev);
|
||||
if ((flags & BP_IGNORECASE) ? memicmp(str, prev, len) != 0
|
||||
: memcmp(str, prev, len) != 0) {
|
||||
destroy_match(&ret);
|
||||
return NULL;
|
||||
}
|
||||
str += len;
|
||||
prev = child->start;
|
||||
}
|
||||
if (child->start < prev) continue;
|
||||
*dest = match_backref(str, op, child, flags);
|
||||
if (*dest == NULL) {
|
||||
destroy_match(&ret);
|
||||
return NULL;
|
||||
}
|
||||
str = (*dest)->end;
|
||||
dest = &(*dest)->nextsibling;
|
||||
prev = child->end;
|
||||
}
|
||||
if (cap->end > prev) {
|
||||
size_t len = (size_t)(cap->end - prev);
|
||||
if ((flags & BP_IGNORECASE) ? memicmp(str, prev, len) != 0
|
||||
: memcmp(str, prev, len) != 0) {
|
||||
destroy_match(&ret);
|
||||
return NULL;
|
||||
}
|
||||
str += len;
|
||||
}
|
||||
}
|
||||
ret->end = str;
|
||||
return ret;
|
||||
}
|
||||
|
||||
match_t *match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags)
|
||||
{
|
||||
return _match(g, f, str, op, flags, NULL);
|
||||
|
2
vm.h
2
vm.h
@ -19,7 +19,7 @@ match_t *match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned i
|
||||
__attribute__((nonnull))
|
||||
void destroy_match(match_t **m);
|
||||
__attribute__((nonnull))
|
||||
void print_match(FILE *out, file_t *f, match_t *m, print_options_t options);
|
||||
match_t *get_capture(match_t *m, const char **r);
|
||||
|
||||
#endif
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
||||
|
Loading…
Reference in New Issue
Block a user