Refactoring, moving visualization and virtual machine code into proper

files
This commit is contained in:
Bruce Hill 2021-01-08 01:00:27 -08:00
parent 7da6cde0fd
commit 668c7baf55
6 changed files with 221 additions and 238 deletions

View File

@ -98,6 +98,19 @@ void push_backref(grammar_t *g, const char *name, match_t *capture)
g->backrefs[i].op = op;
}
size_t push_backrefs(grammar_t *g, match_t *m)
{
if (m->op->op == VM_REF) return 0;
size_t count = 0;
if (m->op->op == VM_CAPTURE && m->op->args.capture.name) {
++count;
push_backref(g, m->op->args.capture.name, m->child);
}
if (m->child) count += push_backrefs(g, m->child);
if (m->nextsibling) count += push_backrefs(g, m->nextsibling);
return count;
}
void pop_backrefs(grammar_t *g, size_t count)
{
check(count <= g->backrefcount, "Attempt to pop %ld backrefs when there are only %ld", count, g->backrefcount);

View File

@ -13,6 +13,8 @@ void add_def(grammar_t *g, file_t *f, const char *src, const char *name, vm_op_t
__attribute__((nonnull))
void push_backref(grammar_t *g, const char *name, match_t *capture);
__attribute__((nonnull))
size_t push_backrefs(grammar_t *g, match_t *m);
__attribute__((nonnull))
void pop_backrefs(grammar_t *g, size_t count);
__attribute__((nonnull))
vm_op_t *load_grammar(grammar_t *g, file_t *f);

112
viz.c
View File

@ -9,7 +9,12 @@
#include "types.h"
#include "utils.h"
#include "viz.h"
#include "vm.h"
typedef struct {
size_t line, printed_line;
const char *color;
} print_state_t;
static int match_height(match_t *m)
{
@ -147,4 +152,111 @@ void visualize_match(match_t *m)
printf("\033[?7h");
}
static void print_line_number(FILE *out, print_state_t *state, print_options_t options)
{
state->printed_line = state->line;
if (!(options & PRINT_LINE_NUMBERS)) return;
if (options & PRINT_COLOR)
fprintf(out, "\033[0;2m% 5ld\033(0\x78\033(B%s", state->line, state->color);
else
fprintf(out, "% 5ld|", state->line);
}
/*
* Print a match with replacements and highlighting.
*/
static void _print_match(FILE *out, file_t *f, match_t *m, print_state_t *state, print_options_t options)
{
static const char *hl = "\033[0;31;1m";
const char *old_color = state->color;
if (m->op->op == VM_HIDE) {
// TODO: handle replacements?
for (const char *p = m->start; p < m->end; p++) {
if (*p == '\n') ++state->line;
}
} else if (m->op->op == VM_REPLACE) {
if (options & PRINT_COLOR && state->color != hl) {
state->color = hl;
fprintf(out, "%s", state->color);
}
const char *text = m->op->args.replace.text;
const char *end = &text[m->op->args.replace.len];
for (const char *r = text; r < end; ) {
if (*r == '@' && r[1] && r[1] != '@') {
++r;
match_t *cap = get_capture(m, &r);
if (cap != NULL) {
_print_match(out, f, cap, state, options);
continue;
} else {
--r;
}
}
if (state->printed_line != state->line)
print_line_number(out, state, options);
if (*r == '\\') {
++r;
unsigned char c = unescapechar(r, &r);
fputc(c, out);
if (c == '\n') ++state->line;
continue;
} else if (*r == '\n') {
fputc('\n', out);
++state->line;
++r;
continue;
} else {
fputc(*r, out);
++r;
continue;
}
}
} else {
if (m->op->op == VM_CAPTURE) {
if (options & PRINT_COLOR && state->color != hl) {
state->color = hl;
fprintf(out, "%s", state->color);
}
}
const char *prev = m->start;
for (match_t *child = m->child; child; child = child->nextsibling) {
// Skip children from e.g. zero-width matches like >@foo
if (!(prev <= child->start && child->start <= m->end &&
prev <= child->end && child->end <= m->end))
continue;
if (child->start > prev) {
for (const char *p = prev; p < child->start; ++p) {
if (state->printed_line != state->line)
print_line_number(out, state, options);
fputc(*p, out);
if (*p == '\n') ++state->line;
}
}
_print_match(out, f, child, state, options);
prev = child->end;
}
if (m->end > prev) {
for (const char *p = prev; p < m->end; ++p) {
if (state->printed_line != state->line)
print_line_number(out, state, options);
fputc(*p, out);
if (*p == '\n') ++state->line;
}
}
}
if (options & PRINT_COLOR && old_color != state->color) {
fprintf(out, "%s", old_color);
state->color = old_color;
}
}
void print_match(FILE *out, file_t *f, match_t *m, print_options_t options)
{
print_state_t state = {.line = 1, .color = "\033[0m"};
_print_match(out, f, m, &state, options);
}
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1

3
viz.h
View File

@ -9,7 +9,10 @@ typedef struct match_node_s {
struct match_node_s *next;
} match_node_t;
__attribute__((nonnull))
void visualize_match(match_t *m);
__attribute__((nonnull))
void print_match(FILE *out, file_t *f, match_t *m, print_options_t options);
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1

327
vm.c
View File

@ -12,10 +12,6 @@
#include "utils.h"
#include "vm.h"
static match_t *match_backref(const char *str, vm_op_t *op, match_t *m, unsigned int flags);
static size_t push_backrefs(grammar_t *g, match_t *m);
static match_t *get_capture_n(match_t *m, int *n);
static match_t *get_capture_named(match_t *m, const char *name);
/*
* The names of the opcodes (keep in sync with the enum definition above)
@ -74,20 +70,6 @@ void destroy_match(match_t **m)
*m = NULL;
}
static size_t push_backrefs(grammar_t *g, match_t *m)
{
if (m == NULL) return 0;
if (m->op->op == VM_REF) return 0;
size_t count = 0;
if (m->op->op == VM_CAPTURE && m->op->args.capture.name) {
++count;
push_backref(g, m->op->args.capture.name, m->child);
}
if (m->child) count += push_backrefs(g, m->child);
if (m->nextsibling) count += push_backrefs(g, m->nextsibling);
return count;
}
typedef struct recursive_ref_s {
const vm_op_t *op;
const char *pos;
@ -96,6 +78,85 @@ typedef struct recursive_ref_s {
match_t *result;
} recursive_ref_t;
/*
* Attempt to match text against a previously captured value.
*/
static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int flags)
{
check(op->op == VM_BACKREF, "Attempt to match backref against something that's not a backref");
match_t *ret = new(match_t);
ret->start = str;
ret->op = op;
match_t **dest = &ret->child;
if (cap->op->op == VM_REPLACE) {
const char *text = cap->op->args.replace.text;
const char *end = &text[cap->op->args.replace.len];
for (const char *r = text; r < end; ) {
if (*r == '\\') {
++r;
if (*(str++) != unescapechar(r, &r)) {
destroy_match(&ret);
return NULL;
}
} else if (*r != '@') {
if (*(str++) != *r) {
destroy_match(&ret);
return NULL;
}
++r;
continue;
}
++r;
match_t *value = get_capture(cap, &r);
if (value != NULL) {
*dest = match_backref(str, op, value, flags);
if (*dest == NULL) {
destroy_match(&ret);
return NULL;
}
str = (*dest)->end;
dest = &(*dest)->nextsibling;
}
}
} else {
const char *prev = cap->start;
for (match_t *child = cap->child; child; child = child->nextsibling) {
if (child->start > prev) {
size_t len = (size_t)(child->start - prev);
if ((flags & BP_IGNORECASE) ? memicmp(str, prev, len) != 0
: memcmp(str, prev, len) != 0) {
destroy_match(&ret);
return NULL;
}
str += len;
prev = child->start;
}
if (child->start < prev) continue;
*dest = match_backref(str, op, child, flags);
if (*dest == NULL) {
destroy_match(&ret);
return NULL;
}
str = (*dest)->end;
dest = &(*dest)->nextsibling;
prev = child->end;
}
if (cap->end > prev) {
size_t len = (size_t)(cap->end - prev);
if ((flags & BP_IGNORECASE) ? memicmp(str, prev, len) != 0
: memcmp(str, prev, len) != 0) {
destroy_match(&ret);
return NULL;
}
str += len;
}
}
ret->end = str;
return ret;
}
/*
* Run virtual machine operation against a string and return
@ -434,48 +495,49 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
}
}
/*
* Get a specific numbered pattern capture.
*/
static match_t *get_capture_n(match_t *m, int *n)
static match_t *get_capture_by_num(match_t *m, int *n)
{
if (!m) return NULL;
if (*n == 0) return m;
if (m->op->op == VM_CAPTURE && *n == 1) return m;
if (m->op->op == VM_CAPTURE) --(*n);
for (match_t *c = m->child; c; c = c->nextsibling) {
match_t *cap = get_capture_n(c, n);
match_t *cap = get_capture_by_num(c, n);
if (cap) return cap;
}
return NULL;
}
/*
* Get a named capture.
* Get a capture with a specific name.
*/
static match_t *get_capture_named(match_t *m, const char *name)
static match_t *get_capture_by_name(match_t *m, const char *name)
{
if (m->op->op == VM_CAPTURE && m->op->args.capture.name
&& streq(m->op->args.capture.name, name))
return m;
for (match_t *c = m->child; c; c = c->nextsibling) {
match_t *cap = get_capture_named(c, name);
match_t *cap = get_capture_by_name(c, name);
if (cap) return cap;
}
return NULL;
}
static match_t *get_cap(match_t *m, const char **r)
/*
* Get a capture by name.
*/
match_t *get_capture(match_t *m, const char **r)
{
if (isdigit(**r)) {
int n = (int)strtol(*r, (char**)r, 10);
return get_capture_n(m->child, &n);
return get_capture_by_num(m->child, &n);
} else {
const char *end = after_name(*r);
if (end == *r) return NULL;
char *name = strndup(*r, (size_t)(end-*r));
match_t *cap = get_capture_named(m, name);
match_t *cap = get_capture_by_name(m, name);
free(name);
*r = end;
if (**r == ';') ++(*r);
@ -484,215 +546,6 @@ static match_t *get_cap(match_t *m, const char **r)
return NULL;
}
typedef struct {
size_t line, printed_line;
const char *color;
} print_state_t;
static void print_line_number(FILE *out, print_state_t *state, print_options_t options)
{
state->printed_line = state->line;
if (!(options & PRINT_LINE_NUMBERS)) return;
if (options & PRINT_COLOR)
fprintf(out, "\033[0;2m% 5ld\033(0\x78\033(B%s", state->line, state->color);
else
fprintf(out, "% 5ld|", state->line);
}
/*
* Print a match with replacements and highlighting.
*/
static void _print_match(FILE *out, file_t *f, match_t *m, print_state_t *state, print_options_t options)
{
static const char *hl = "\033[0;31;1m";
const char *old_color = state->color;
if (m->op->op == VM_HIDE) {
// TODO: handle replacements?
for (const char *p = m->start; p < m->end; p++) {
if (*p == '\n') ++state->line;
}
} else if (m->op->op == VM_REPLACE) {
if (options & PRINT_COLOR && state->color != hl) {
state->color = hl;
fprintf(out, "%s", state->color);
}
const char *text = m->op->args.replace.text;
const char *end = &text[m->op->args.replace.len];
for (const char *r = text; r < end; ) {
if (*r == '@' && r[1] && r[1] != '@') {
++r;
match_t *cap = get_cap(m, &r);
if (cap != NULL) {
_print_match(out, f, cap, state, options);
continue;
} else {
--r;
}
}
if (state->printed_line != state->line)
print_line_number(out, state, options);
if (*r == '\\') {
++r;
unsigned char c = unescapechar(r, &r);
fputc(c, out);
if (c == '\n') ++state->line;
continue;
} else if (*r == '\n') {
fputc('\n', out);
++state->line;
++r;
continue;
} else {
fputc(*r, out);
++r;
continue;
}
}
} else {
if (m->op->op == VM_CAPTURE) {
if (options & PRINT_COLOR && state->color != hl) {
state->color = hl;
fprintf(out, "%s", state->color);
}
}
const char *prev = m->start;
for (match_t *child = m->child; child; child = child->nextsibling) {
// Skip children from e.g. zero-width matches like >@foo
if (!(prev <= child->start && child->start <= m->end &&
prev <= child->end && child->end <= m->end))
continue;
if (child->start > prev) {
for (const char *p = prev; p < child->start; ++p) {
if (state->printed_line != state->line)
print_line_number(out, state, options);
fputc(*p, out);
if (*p == '\n') ++state->line;
}
}
_print_match(out, f, child, state, options);
prev = child->end;
}
if (m->end > prev) {
for (const char *p = prev; p < m->end; ++p) {
if (state->printed_line != state->line)
print_line_number(out, state, options);
fputc(*p, out);
if (*p == '\n') ++state->line;
}
}
}
if (options & PRINT_COLOR && old_color != state->color) {
fprintf(out, "%s", old_color);
state->color = old_color;
}
}
void print_match(FILE *out, file_t *f, match_t *m, print_options_t options)
{
print_state_t state = {.line = 1, .color = "\033[0m"};
_print_match(out, f, m, &state, options);
}
static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int flags)
{
check(op->op == VM_BACKREF, "Attempt to match backref against something that's not a backref");
match_t *ret = new(match_t);
ret->start = str;
ret->op = op;
match_t **dest = &ret->child;
if (cap->op->op == VM_REPLACE) {
const char *text = cap->op->args.replace.text;
const char *end = &text[cap->op->args.replace.len];
for (const char *r = text; r < end; ) {
if (*r == '\\') {
++r;
if (*(str++) != unescapechar(r, &r)) {
destroy_match(&ret);
return NULL;
}
} else if (*r != '@') {
if (*(str++) != *r) {
destroy_match(&ret);
return NULL;
}
++r;
continue;
}
++r;
match_t *cap = NULL;
switch (*r) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9': {
int n = (int)strtol(r, (char**)&r, 10);
cap = get_capture_n(cap->child, &n);
break;
}
default: {
const char *end = after_name(r);
if (end == r) {
destroy_match(&ret);
return NULL;
}
char *name = strndup(r, (size_t)(end-r));
cap = get_capture_named(cap, name);
free(name);
r = end;
if (*r == ';') ++r;
break;
}
}
if (cap != NULL) {
*dest = match_backref(str, op, cap, flags);
if (*dest == NULL) {
destroy_match(&ret);
return NULL;
}
str = (*dest)->end;
dest = &(*dest)->nextsibling;
}
}
} else {
const char *prev = cap->start;
for (match_t *child = cap->child; child; child = child->nextsibling) {
if (child->start > prev) {
size_t len = (size_t)(child->start - prev);
if ((flags & BP_IGNORECASE) ? memicmp(str, prev, len) != 0
: memcmp(str, prev, len) != 0) {
destroy_match(&ret);
return NULL;
}
str += len;
prev = child->start;
}
if (child->start < prev) continue;
*dest = match_backref(str, op, child, flags);
if (*dest == NULL) {
destroy_match(&ret);
return NULL;
}
str = (*dest)->end;
dest = &(*dest)->nextsibling;
prev = child->end;
}
if (cap->end > prev) {
size_t len = (size_t)(cap->end - prev);
if ((flags & BP_IGNORECASE) ? memicmp(str, prev, len) != 0
: memcmp(str, prev, len) != 0) {
destroy_match(&ret);
return NULL;
}
str += len;
}
}
ret->end = str;
return ret;
}
match_t *match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags)
{
return _match(g, f, str, op, flags, NULL);

2
vm.h
View File

@ -19,7 +19,7 @@ match_t *match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned i
__attribute__((nonnull))
void destroy_match(match_t **m);
__attribute__((nonnull))
void print_match(FILE *out, file_t *f, match_t *m, print_options_t options);
match_t *get_capture(match_t *m, const char **r);
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1