Refactor of grammar logic to instead *only* use a linked list for all

grammar rules and backrefs. This simplifies things a lot.
This commit is contained in:
Bruce Hill 2021-01-10 01:45:40 -08:00
parent b8a5d399d7
commit 421880be12
6 changed files with 83 additions and 124 deletions

32
bp.c
View File

@ -72,14 +72,14 @@ static int print_errors(file_t *f, match_t *m)
return ret;
}
static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, unsigned int flags)
static int run_match(def_t *defs, const char *filename, vm_op_t *pattern, unsigned int flags)
{
static int printed_matches = 0;
file_t *f = load_file(filename);
check(f, "Could not open file: %s", filename);
if (flags & BP_INPLACE) // Need to do this before matching
intern_file(f);
match_t *m = match(g, f, f->contents, pattern, flags);
match_t *m = match(defs, f, f->contents, pattern, flags);
if (m && print_errors(f, m) > 0)
_exit(1);
if (m != NULL && m->end > m->start + 1) {
@ -133,14 +133,14 @@ int main(int argc, char *argv[])
char path[PATH_MAX] = {0};
const char *rule = "find-all";
grammar_t *g = new(grammar_t);
def_t *defs = NULL;
// Load builtins:
if (access("/etc/xdg/bp/builtins.bp", R_OK) != -1)
load_grammar(g, load_file("/etc/xdg/bp/builtins.bp")); // Keep in memory for debugging output
defs = load_grammar(defs, load_file("/etc/xdg/bp/builtins.bp")); // Keep in memory for debugging output
sprintf(path, "%s/.config/bp/builtins.bp", getenv("HOME"));
if (access(path, R_OK) != -1)
load_grammar(g, load_file(path)); // Keep in memory for debugging output
defs = load_grammar(defs, load_file(path)); // Keep in memory for debugging output
int i, npatterns = 0;
check(argc > 1, "%s", usage);
@ -172,7 +172,7 @@ int main(int argc, char *argv[])
file_t *replace_file = spoof_file("<replace argument>", flag);
vm_op_t *rep = bp_replacement(replace_file, patref, replace_file->contents);
check(rep, "Replacement failed to compile: %s", flag);
add_def(g, replace_file, "replacement", rep);
defs = with_def(defs, replace_file, "replacement", rep);
rule = "replace-all";
} else if (FLAG("--grammar") || FLAG("-g")) {
file_t *f = load_file(flag);
@ -185,7 +185,7 @@ int main(int argc, char *argv[])
f = load_file(path);
}
check(f != NULL, "Couldn't find grammar: %s", flag);
load_grammar(g, f); // Keep in memory for debug output
defs = load_grammar(defs, f); // Keep in memory for debug output
} else if (FLAG("--define") || FLAG("-d")) {
char *def = flag;
char *eq = strchr(def, ':');
@ -195,7 +195,7 @@ int main(int argc, char *argv[])
file_t *def_file = spoof_file(def, src);
vm_op_t *pat = bp_pattern(def_file, def_file->contents);
check(pat, "Failed to compile pattern: %s", flag);
add_def(g, def_file, def, pat);
defs = with_def(defs, def_file, def, pat);
} else if (FLAG("--define-string") || FLAG("-D")) {
char *def = flag;
char *eq = strchr(def, ':');
@ -205,19 +205,19 @@ int main(int argc, char *argv[])
file_t *def_file = spoof_file(def, src);
vm_op_t *pat = bp_stringpattern(def_file, def_file->contents);
check(pat, "Failed to compile pattern: %s", src);
add_def(g, def_file, def, pat);
defs = with_def(defs, def_file, def, pat);
} else if (FLAG("--pattern") || FLAG("-p")) {
check(npatterns == 0, "Cannot define multiple patterns");
file_t *arg_file = spoof_file("<pattern argument>", flag);
vm_op_t *p = bp_pattern(arg_file, arg_file->contents);
check(p, "Pattern failed to compile: %s", flag);
add_def(g, arg_file, "pattern", p);
defs = with_def(defs, arg_file, "pattern", p);
++npatterns;
} else if (FLAG("--pattern-string") || FLAG("-P")) {
file_t *arg_file = spoof_file("<pattern argument>", flag);
vm_op_t *p = bp_stringpattern(arg_file, arg_file->contents);
check(p, "Pattern failed to compile: %s", flag);
add_def(g, arg_file, "pattern", p);
defs = with_def(defs, arg_file, "pattern", p);
++npatterns;
} else if (FLAG("--mode") || FLAG("-m")) {
rule = flag;
@ -242,7 +242,7 @@ int main(int argc, char *argv[])
file_t *arg_file = spoof_file("<pattern argument>", argv[i]);
vm_op_t *p = bp_stringpattern(arg_file, arg_file->contents);
check(p, "Pattern failed to compile: %s", argv[i]);
add_def(g, arg_file, "pattern", p);
defs = with_def(defs, arg_file, "pattern", p);
++npatterns;
} else {
printf("Unrecognized flag: %s\n\n%s\n", argv[i], usage);
@ -260,7 +260,7 @@ int main(int argc, char *argv[])
print_options |= PRINT_COLOR | PRINT_LINE_NUMBERS;
}
vm_op_t *pattern = lookup(g, rule);
vm_op_t *pattern = lookup(defs, rule);
check(pattern != NULL, "No such rule: '%s'", rule);
int ret = 1;
@ -268,7 +268,7 @@ int main(int argc, char *argv[])
if (i < argc) {
// Files pass in as command line args:
for (int nfiles = 0; i < argc; nfiles++, i++) {
ret &= run_match(g, argv[i], pattern, flags);
ret &= run_match(defs, argv[i], pattern, flags);
}
} else if (isatty(STDIN_FILENO)) {
// No files, no piped in input, so use * **/*:
@ -276,12 +276,12 @@ int main(int argc, char *argv[])
glob("*", 0, NULL, &globbuf);
glob("**/*", GLOB_APPEND, NULL, &globbuf);
for (size_t i = 0; i < globbuf.gl_pathc; i++) {
ret &= run_match(g, globbuf.gl_pathv[i], pattern, flags);
ret &= run_match(defs, globbuf.gl_pathv[i], pattern, flags);
}
globfree(&globbuf);
} else {
// Piped in input:
ret &= run_match(g, NULL, pattern, flags);
ret &= run_match(defs, NULL, pattern, flags);
}
if (flags & BP_JSON) printf("]\n");

View File

@ -11,28 +11,27 @@
#include "utils.h"
/*
* Add a definition to the grammar
* Return a new list of definitions with one added to the front
*/
void add_def(grammar_t *g, file_t *f, const char *name, vm_op_t *op)
def_t *with_def(def_t *defs, file_t *f, const char *name, vm_op_t *op)
{
def_t *def = new(def_t);
def->next = g->firstdef;
def->next = defs;
def->file = f;
def->name = name;
def->op = op;
g->firstdef = def;
return def;
}
/*
* Load the given grammar (semicolon-separated definitions)
* and return the first rule defined.
*/
vm_op_t *load_grammar(grammar_t *g, file_t *f)
def_t *load_grammar(def_t *defs, file_t *f)
{
vm_op_t *ret = NULL;
const char *src = f->contents;
src = after_spaces(src);
while (*src) {
while (src < f->end) {
const char *name = src;
src = after_name(name);
check(src > name, "Invalid name for definition: %s", name);
@ -40,11 +39,7 @@ vm_op_t *load_grammar(grammar_t *g, file_t *f)
check(matchchar(&src, ':'), "Expected ':' in definition");
vm_op_t *op = bp_pattern(f, src);
if (op == NULL) break;
//check(op, "Couldn't load definition");
add_def(g, f, name, op);
if (ret == NULL) {
ret = op;
}
defs = with_def(defs, f, name, op);
src = op->end;
src = after_spaces(src);
if (matchchar(&src, ';'))
@ -54,21 +49,17 @@ vm_op_t *load_grammar(grammar_t *g, file_t *f)
fprint_line(stderr, f, src, NULL, "Invalid BP pattern");
_exit(1);
}
return ret;
return defs;
}
/*
* Look up a backreference or grammar definition by name
*/
vm_op_t *lookup(grammar_t *g, const char *name)
vm_op_t *lookup(def_t *defs, const char *name)
{
for (backref_t *b = g->firstbackref; b; b = b->next) {
if (streq(b->name, name))
return b->op;
}
for (def_t *d = g->firstdef; d; d = d->next) {
if (streq(d->name, name))
return d->op;
for ( ; defs; defs = defs->next) {
if (streq(defs->name, name))
return defs->op;
}
return NULL;
}
@ -76,49 +67,29 @@ vm_op_t *lookup(grammar_t *g, const char *name)
/*
* Push a backreference onto the backreference stack
*/
void push_backref(grammar_t *g, const char *name, match_t *capture)
static def_t *with_backref(def_t *defs, file_t *f, const char *name, match_t *m)
{
backref_t *backref = new(backref_t);
backref->name = name;
vm_op_t *op = new(vm_op_t);
op->op = VM_BACKREF;
op->start = capture->start;
op->end = capture->end;
op->start = m->start;
op->end = m->end;
op->len = -1; // TODO: maybe calculate this? (nontrivial because of replacements)
op->args.backref = capture;
backref->op = op;
backref->next = g->firstbackref;
g->firstbackref = backref;
op->args.backref = m;
return with_def(defs, f, name, op);
}
/*
* Push all the backreferences contained in a match onto the backreference stack
*/
size_t push_backrefs(grammar_t *g, match_t *m)
def_t *with_backrefs(def_t *defs, file_t *f, match_t *m)
{
if (m->op->op == VM_REF) return 0;
size_t count = 0;
if (m->op->op == VM_CAPTURE && m->op->args.capture.name) {
++count;
push_backref(g, m->op->args.capture.name, m->child);
}
if (m->child) count += push_backrefs(g, m->child);
if (m->nextsibling) count += push_backrefs(g, m->nextsibling);
return count;
}
/*
* Pop a number of backreferences off the backreference stack
*/
void pop_backrefs(grammar_t *g, size_t count)
{
for ( ; count > 0; count--) {
backref_t *b = g->firstbackref;
g->firstbackref = b->next;
check(b, "Attempt to pop %ld more backrefs than there are", count);
xfree(&b->op);
xfree(&b);
if (m->op->op != VM_REF) {
if (m->op->op == VM_CAPTURE && m->op->args.capture.name)
defs = with_backref(defs, f, m->op->args.capture.name, m->child);
if (m->child) defs = with_backrefs(defs, f, m->child);
if (m->nextsibling) defs = with_backrefs(defs, f, m->nextsibling);
}
return defs;
}
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1

View File

@ -7,18 +7,14 @@
#include "file_loader.h"
#include "types.h"
__attribute__((nonnull(1,3,4)))
void add_def(grammar_t *g, file_t *f, const char *name, vm_op_t *op);
__attribute__((nonnull))
void push_backref(grammar_t *g, const char *name, match_t *capture);
__attribute__((nonnull))
size_t push_backrefs(grammar_t *g, match_t *m);
__attribute__((nonnull))
void pop_backrefs(grammar_t *g, size_t count);
__attribute__((nonnull))
vm_op_t *load_grammar(grammar_t *g, file_t *f);
__attribute__((pure, nonnull))
vm_op_t *lookup(grammar_t *g, const char *name);
__attribute__((nonnull(2,3,4)))
def_t *with_def(def_t *defs, file_t *f, const char *name, vm_op_t *op);
__attribute__((nonnull(2,3)))
def_t *with_backrefs(def_t *defs, file_t *f, match_t *m);
__attribute__((nonnull(2)))
def_t *load_grammar(def_t *defs, file_t *f);
__attribute__((pure, nonnull(2)))
vm_op_t *lookup(def_t *defs, const char *name);
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1

19
types.h
View File

@ -89,7 +89,7 @@ typedef struct match_s {
} match_t;
/*
* Pattern matching rule definition
* Pattern matching rule definition(s)
*/
typedef struct def_s {
const char *name;
@ -98,22 +98,5 @@ typedef struct def_s {
struct def_s *next;
} def_t;
/*
* Backreference (look up previous capture by name)
*/
typedef struct backref_s {
const char *name;
vm_op_t *op;
struct backref_s *next;
} backref_t;
/*
* Grammar (a collection of definitions)
*/
typedef struct {
def_t *firstdef;
backref_t *firstbackref;
} grammar_t;
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1

55
vm.c
View File

@ -163,7 +163,7 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsign
* a match struct, or NULL if no match is found.
* The returned value should be free()'d to avoid memory leaking.
*/
static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags, recursive_ref_t *rec)
static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags, recursive_ref_t *rec)
{
switch (op->op) {
case VM_ANYCHAR: {
@ -197,7 +197,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
return m;
}
case VM_NOT: {
match_t *m = _match(g, f, str, op->args.pat, flags, rec);
match_t *m = _match(defs, f, str, op->args.pat, flags, rec);
if (m != NULL) {
destroy_match(&m);
return NULL;
@ -219,7 +219,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
for (const char *prev = NULL; prev < str; ) {
prev = str;
if (op->args.multiple.first) {
match_t *p = _match(g, f, str, op->args.multiple.first, flags, rec);
match_t *p = _match(defs, f, str, op->args.multiple.first, flags, rec);
if (p) {
*dest = p;
m->end = p->end;
@ -230,7 +230,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
return m;
}
if (op->args.multiple.second) {
match_t *p = _match(g, f, str, op->args.multiple.second, flags, rec);
match_t *p = _match(defs, f, str, op->args.multiple.second, flags, rec);
if (p) {
*dest = p;
dest = &p->nextsibling;
@ -264,11 +264,11 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
// Separator
match_t *sep = NULL;
if (op->args.repetitions.sep != NULL && reps > 0) {
sep = _match(g, f, str, op->args.repetitions.sep, flags, rec);
sep = _match(defs, f, str, op->args.repetitions.sep, flags, rec);
if (sep == NULL) break;
str = sep->end;
}
match_t *p = _match(g, f, str, op->args.repetitions.repeat_pat, flags, rec);
match_t *p = _match(defs, f, str, op->args.repetitions.repeat_pat, flags, rec);
if (p == NULL) {
destroy_match(&sep);
break;
@ -308,7 +308,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
ssize_t backtrack = op->args.pat->len;
check(backtrack != -1, "'<' is only allowed for fixed-length operations");
if (str - backtrack < f->contents) return NULL;
match_t *before = _match(g, f, str - backtrack, op->args.pat, flags, rec);
match_t *before = _match(defs, f, str - backtrack, op->args.pat, flags, rec);
if (before == NULL) return NULL;
match_t *m = new(match_t);
m->start = str;
@ -318,7 +318,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
return m;
}
case VM_BEFORE: {
match_t *after = _match(g, f, str, op->args.pat, flags, rec);
match_t *after = _match(defs, f, str, op->args.pat, flags, rec);
if (after == NULL) return NULL;
match_t *m = new(match_t);
m->start = str;
@ -328,7 +328,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
return m;
}
case VM_CAPTURE: {
match_t *p = _match(g, f, str, op->args.pat, flags, rec);
match_t *p = _match(defs, f, str, op->args.pat, flags, rec);
if (p == NULL) return NULL;
match_t *m = new(match_t);
m->start = str;
@ -338,7 +338,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
return m;
}
case VM_HIDE: {
match_t *p = _match(g, f, str, op->args.pat, flags, rec);
match_t *p = _match(defs, f, str, op->args.pat, flags, rec);
if (p == NULL) return NULL;
match_t *m = new(match_t);
m->start = str;
@ -348,17 +348,26 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
return m;
}
case VM_OTHERWISE: {
match_t *m = _match(g, f, str, op->args.multiple.first, flags, rec);
if (m == NULL) m = _match(g, f, str, op->args.multiple.second, flags, rec);
match_t *m = _match(defs, f, str, op->args.multiple.first, flags, rec);
if (m == NULL) m = _match(defs, f, str, op->args.multiple.second, flags, rec);
return m;
}
case VM_CHAIN: {
match_t *m1 = _match(g, f, str, op->args.multiple.first, flags, rec);
match_t *m1 = _match(defs, f, str, op->args.multiple.first, flags, rec);
if (m1 == NULL) return NULL;
size_t nbackrefs = push_backrefs(g, m1);
match_t *m2 = _match(g, f, m1->end, op->args.multiple.second, flags, rec);
pop_backrefs(g, nbackrefs);
match_t *m2;
{ // Push backrefs and run matching, then cleanup
def_t *defs2 = with_backrefs(defs, f, m1);
m2 = _match(defs2, f, m1->end, op->args.multiple.second, flags, rec);
while (defs2 != defs) {
def_t *next = defs2->next;
defs2->next = NULL;
xfree(&defs2);
defs2 = next;
}
}
if (m2 == NULL) {
destroy_match(&m1);
return NULL;
@ -372,7 +381,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
return m;
}
case VM_EQUAL: case VM_NOT_EQUAL: {
match_t *m1 = _match(g, f, str, op->args.multiple.first, flags, rec);
match_t *m1 = _match(defs, f, str, op->args.multiple.first, flags, rec);
if (m1 == NULL) return NULL;
// <p1>==<p2> matches iff the text of <p1> matches <p2>
@ -384,7 +393,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
.nlines=1 + get_line_number(f, m1->end)-get_line_number(f, m1->start),
.mmapped=f->mmapped,
};
match_t *m2 = _match(g, &inner, str, op->args.multiple.second, flags, rec);
match_t *m2 = _match(defs, &inner, str, op->args.multiple.second, flags, rec);
if ((m2 == NULL) == (op->op == VM_EQUAL)) {
destroy_match(&m1);
destroy_match(&m2);
@ -405,7 +414,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
case VM_REPLACE: {
match_t *p = NULL;
if (op->args.replace.pat) {
p = _match(g, f, str, op->args.replace.pat, flags, rec);
p = _match(defs, f, str, op->args.replace.pat, flags, rec);
if (p == NULL) return NULL;
}
match_t *m = new(match_t);
@ -420,7 +429,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
return m;
}
case VM_REF: {
vm_op_t *r = lookup(g, op->args.s);
vm_op_t *r = lookup(defs, op->args.s);
check(r != NULL, "Unknown identifier: '%s'", op->args.s);
// Prevent infinite left recursion:
@ -440,7 +449,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
};
match_t *best = NULL;
left_recursive:;
match_t *p = _match(g, f, str, r, flags, &wrap);
match_t *p = _match(defs, f, str, r, flags, &wrap);
if (p == NULL) return best;
if (wrap.hit && (best == NULL || p->end > best->end)) {
best = p;
@ -546,9 +555,9 @@ match_t *get_capture(match_t *m, const char **r)
return NULL;
}
match_t *match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags)
match_t *match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags)
{
return _match(g, f, str, op, flags, NULL);
return _match(defs, f, str, op, flags, NULL);
}
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1

4
vm.h
View File

@ -9,8 +9,8 @@
#include "types.h"
const char *opcode_name(enum VMOpcode o);
__attribute__((hot, nonnull))
match_t *match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags);
__attribute__((hot, nonnull(2,3,4)))
match_t *match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags);
__attribute__((nonnull))
void destroy_match(match_t **m);
__attribute__((nonnull))