aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bp.c59
-rw-r--r--compiler.c18
-rw-r--r--file_loader.c19
-rw-r--r--file_loader.h9
-rw-r--r--grammar.c15
-rw-r--r--grammar.h6
-rw-r--r--types.h9
-rw-r--r--vm.c241
-rw-r--r--vm.h2
9 files changed, 222 insertions, 156 deletions
diff --git a/bp.c b/bp.c
index 309e187..bab03ea 100644
--- a/bp.c
+++ b/bp.c
@@ -74,13 +74,14 @@ static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, uns
{
static int printed_matches = 0;
int success = 0;
- file_t *f = load_file(filename);
+ file_t *f = load_file(NULL, filename);
check(f, "Could not open file: %s", filename);
if (flags & BP_INPLACE) // Need to do this before matching
intern_file(f);
match_t *m = match(defs, f, f->contents, pattern, flags);
if (m && print_errors(f, m, print_options) > 0)
_exit(1);
+
if (m != NULL && m->end > m->start + 1) {
success = 1;
++printed_matches;
@@ -137,12 +138,18 @@ int main(int argc, char *argv[])
def_t *defs = NULL;
+ file_t *loaded_files = NULL;
+
// Load builtins:
- if (access("/etc/xdg/bp/builtins.bp", R_OK) != -1)
- defs = load_grammar(defs, load_file("/etc/xdg/bp/builtins.bp")); // Keep in memory for debugging output
+ if (access("/etc/xdg/bp/builtins.bp", R_OK) != -1) {
+ file_t *f = load_file(&loaded_files, "/etc/xdg/bp/builtins.bp");
+ defs = load_grammar(defs, f);
+ }
sprintf(path, "%s/.config/bp/builtins.bp", getenv("HOME"));
- if (access(path, R_OK) != -1)
- defs = load_grammar(defs, load_file(path)); // Keep in memory for debugging output
+ if (access(path, R_OK) != -1) {
+ file_t *f = load_file(&loaded_files, path);
+ defs = load_grammar(defs, f);
+ }
int i, npatterns = 0;
check(argc > 1, "%s", usage);
@@ -169,22 +176,22 @@ int main(int argc, char *argv[])
} else if (FLAG("--replace") || FLAG("-r")) {
// TODO: spoof file as sprintf("pattern => '%s'", flag)
// except that would require handling edge cases like quotation marks etc.
- file_t *pat_file = spoof_file("<pattern>", "pattern");
+ file_t *pat_file = spoof_file(&loaded_files, "<pattern>", "pattern");
vm_op_t *patref = bp_pattern(pat_file, pat_file->contents);
- file_t *replace_file = spoof_file("<replace argument>", flag);
+ file_t *replace_file = spoof_file(&loaded_files, "<replace argument>", flag);
vm_op_t *rep = bp_replacement(replace_file, patref, replace_file->contents);
check(rep, "Replacement failed to compile: %s", flag);
- defs = with_def(defs, replace_file, "replacement", rep);
+ defs = with_def(defs, replace_file, strlen("replacement"), "replacement", rep);
rule = "replace-all";
} else if (FLAG("--grammar") || FLAG("-g")) {
- file_t *f = load_file(flag);
+ file_t *f = load_file(&loaded_files, flag);
if (f == NULL) {
sprintf(path, "%s/.config/bp/%s.bp", getenv("HOME"), flag);
- f = load_file(path);
+ f = load_file(&loaded_files, path);
}
if (f == NULL) {
sprintf(path, "/etc/xdg/bp/%s.bp", flag);
- f = load_file(path);
+ f = load_file(&loaded_files, path);
}
check(f != NULL, "Couldn't find grammar: %s", flag);
defs = load_grammar(defs, f); // Keep in memory for debug output
@@ -194,32 +201,32 @@ int main(int argc, char *argv[])
check(eq, "Rule definitions must include an ':'\n\n%s", usage);
*eq = '\0';
char *src = ++eq;
- file_t *def_file = spoof_file(def, src);
+ file_t *def_file = spoof_file(&loaded_files, def, src);
vm_op_t *pat = bp_pattern(def_file, def_file->contents);
check(pat, "Failed to compile pattern: %s", flag);
- defs = with_def(defs, def_file, def, pat);
+ defs = with_def(defs, def_file, strlen(def), def, pat);
} else if (FLAG("--define-string") || FLAG("-D")) {
char *def = flag;
char *eq = strchr(def, ':');
check(eq, "Rule definitions must include an ':'\n\n%s", usage);
*eq = '\0';
char *src = ++eq;
- file_t *def_file = spoof_file(def, src);
+ file_t *def_file = spoof_file(&loaded_files, def, src);
vm_op_t *pat = bp_stringpattern(def_file, def_file->contents);
check(pat, "Failed to compile pattern: %s", src);
- defs = with_def(defs, def_file, def, pat);
+ defs = with_def(defs, def_file, strlen(def), def, pat);
} else if (FLAG("--pattern") || FLAG("-p")) {
check(npatterns == 0, "Cannot define multiple patterns");
- file_t *arg_file = spoof_file("<pattern argument>", flag);
+ file_t *arg_file = spoof_file(&loaded_files, "<pattern argument>", flag);
vm_op_t *p = bp_pattern(arg_file, arg_file->contents);
check(p, "Pattern failed to compile: %s", flag);
- defs = with_def(defs, arg_file, "pattern", p);
+ defs = with_def(defs, arg_file, strlen("pattern"), "pattern", p);
++npatterns;
} else if (FLAG("--pattern-string") || FLAG("-P")) {
- file_t *arg_file = spoof_file("<pattern argument>", flag);
+ file_t *arg_file = spoof_file(&loaded_files, "<pattern argument>", flag);
vm_op_t *p = bp_stringpattern(arg_file, arg_file->contents);
check(p, "Pattern failed to compile: %s", flag);
- defs = with_def(defs, arg_file, "pattern", p);
+ defs = with_def(defs, arg_file, strlen("pattern"), "pattern", p);
++npatterns;
} else if (FLAG("--mode") || FLAG("-m")) {
rule = flag;
@@ -241,10 +248,10 @@ int main(int argc, char *argv[])
} else if (argv[i][0] != '-') {
if (npatterns > 0) break;
// TODO: spoof file with quotation marks for better debugging
- file_t *arg_file = spoof_file("<pattern argument>", argv[i]);
+ file_t *arg_file = spoof_file(&loaded_files, "<pattern argument>", argv[i]);
vm_op_t *p = bp_stringpattern(arg_file, arg_file->contents);
check(p, "Pattern failed to compile: %s", argv[i]);
- defs = with_def(defs, arg_file, "pattern", p);
+ defs = with_def(defs, arg_file, strlen("pattern"), "pattern", p);
++npatterns;
} else {
printf("Unrecognized flag: %s\n\n%s\n", argv[i], usage);
@@ -262,8 +269,9 @@ int main(int argc, char *argv[])
print_options |= PRINT_COLOR | PRINT_LINE_NUMBERS;
}
- vm_op_t *pattern = lookup(defs, rule);
- check(pattern != NULL, "No such rule: '%s'", rule);
+ def_t *pattern_def = lookup(defs, rule);
+ check(pattern_def != NULL, "No such rule: '%s'", rule);
+ vm_op_t *pattern = pattern_def->op;
int found = 0;
if (flags & BP_JSON) printf("[");
@@ -288,6 +296,11 @@ int main(int argc, char *argv[])
if (flags & BP_JSON) printf("]\n");
free_defs(&defs, NULL);
+ while (loaded_files) {
+ file_t *next = loaded_files->next;
+ destroy_file(&loaded_files);
+ loaded_files = next;
+ }
return (found > 0) ? 0 : 1;
}
diff --git a/compiler.c b/compiler.c
index 87b92c6..882acdb 100644
--- a/compiler.c
+++ b/compiler.c
@@ -140,7 +140,7 @@ static vm_op_t *chain_together(file_t *f, vm_op_t *first, vm_op_t *second)
{
if (first == NULL) return second;
if (second == NULL) return first;
- check(first->type != VM_CHAIN, "A chain should not be the first item in a chain.\n");
+ check(first->type != VM_CHAIN, "A chain should not be the first item in a chain.");
vm_op_t *chain = new_op(f, first->start, VM_CHAIN);
chain->start = first->start;
if (first->len >= 0 && second->len >= 0)
@@ -160,7 +160,7 @@ vm_op_t *bp_simplepattern(file_t *f, const char *str)
vm_op_t *op = _bp_simplepattern(f, str);
if (op == NULL) return op;
- check(op->end != NULL, "op->end is uninitialized!\n");
+ check(op->end != NULL, "op->end is uninitialized!");
// Expand postfix operators (if any)
str = after_spaces(op->end);
@@ -516,8 +516,6 @@ vm_op_t *bp_stringpattern(file_t *f, const char *str)
{
vm_op_t *ret = NULL;
while (*str) {
- vm_op_t *strop = new_op(f, str, VM_STRING);
- strop->len = 0;
char *start = (char*)str;
vm_op_t *interp = NULL;
for (; *str; str++) {
@@ -553,13 +551,11 @@ vm_op_t *bp_stringpattern(file_t *f, const char *str)
// Note: an unescaped string is guaranteed to be no longer than the
// escaped string, so this is safe to do inplace.
len = unescape_string(literal, literal, len);
- strop->len = (ssize_t)len;
- strop->args.s = literal;
- strop->end = str;
-
- if (strop->len == 0) {
- xfree(&strop);
- } else {
+ if (len > 0) {
+ vm_op_t *strop = new_op(f, str, VM_STRING);
+ strop->len = (ssize_t)len;
+ strop->args.s = literal;
+ strop->end = str;
ret = chain_together(f, ret, strop);
}
if (interp) {
diff --git a/file_loader.c b/file_loader.c
index 27f871f..6e4aa3f 100644
--- a/file_loader.c
+++ b/file_loader.c
@@ -42,7 +42,7 @@ static void populate_lines(file_t *f)
//
// Read an entire file into memory.
//
-file_t *load_file(const char *filename)
+file_t *load_file(file_t **files, const char *filename)
{
if (filename == NULL) filename = "-";
int fd = streq(filename, "-") ? STDIN_FILENO : open(filename, O_RDONLY);
@@ -79,20 +79,28 @@ file_t *load_file(const char *filename)
finished_loading:
f->end = &f->contents[length];
populate_lines(f);
+ if (files != NULL) {
+ f->next = *files;
+ *files = f;
+ }
return f;
}
//
// Create a virtual file from a string.
//
-file_t *spoof_file(const char *filename, char *text)
+file_t *spoof_file(file_t **files, const char *filename, char *text)
{
if (filename == NULL) filename = "";
file_t *f = new(file_t);
f->filename = strdup(filename);
- f->contents = text;
+ f->contents = strdup(text);
f->end = &f->contents[strlen(text)];
populate_lines(f);
+ if (files != NULL) {
+ f->next = *files;
+ *files = f;
+ }
return f;
}
@@ -122,24 +130,23 @@ void destroy_file(file_t **f)
{
if ((*f)->filename) {
xfree(&((*f)->filename));
- (*f)->filename = NULL;
}
if ((*f)->lines) {
xfree(&((*f)->lines));
- (*f)->lines = NULL;
}
if ((*f)->contents) {
if ((*f)->mmapped) {
munmap((*f)->contents, (size_t)((*f)->end - (*f)->contents));
+ (*f)->contents = NULL;
} else {
xfree(&((*f)->contents));
}
- (*f)->contents = NULL;
}
while ((*f)->ops) {
+ destroy_op(&(*f)->ops->op);
allocated_op_t *tofree = (*f)->ops;
(*f)->ops = tofree->next;
memset(tofree, 'A', sizeof(allocated_op_t)); // Sentinel
diff --git a/file_loader.h b/file_loader.h
index 2a47d40..85bb920 100644
--- a/file_loader.h
+++ b/file_loader.h
@@ -8,7 +8,8 @@
struct allocated_op_s; // declared in types.h
-typedef struct {
+typedef struct file_s {
+ struct file_s *next;
const char *filename;
char *contents, **lines, *end;
size_t nlines;
@@ -16,9 +17,9 @@ typedef struct {
unsigned int mmapped:1;
} file_t;
-file_t *load_file(const char *filename);
-__attribute__((nonnull(2), returns_nonnull))
-file_t *spoof_file(const char *filename, char *text);
+file_t *load_file(file_t **files, const char *filename);
+__attribute__((nonnull(3), returns_nonnull))
+file_t *spoof_file(file_t **files, const char *filename, char *text);
__attribute__((nonnull))
void intern_file(file_t *f);
__attribute__((nonnull))
diff --git a/grammar.c b/grammar.c
index 08e7083..06b2693 100644
--- a/grammar.c
+++ b/grammar.c
@@ -16,11 +16,12 @@ static def_t *with_backref(def_t *defs, file_t *f, const char *name, match_t *m)
//
// Return a new list of definitions with one added to the front
//
-def_t *with_def(def_t *defs, file_t *f, const char *name, vm_op_t *op)
+def_t *with_def(def_t *defs, file_t *f, size_t namelen, const char *name, vm_op_t *op)
{
def_t *def = new(def_t);
def->next = defs;
def->file = f;
+ def->namelen = namelen;
def->name = name;
def->op = op;
return def;
@@ -38,11 +39,11 @@ def_t *load_grammar(def_t *defs, file_t *f)
const char *name = src;
src = after_name(name);
check(src > name, "Invalid name for definition: %s", name);
- name = strndup(name, (size_t)(src-name));
+ size_t namelen = (size_t)(src - name);
check(matchchar(&src, ':'), "Expected ':' in definition");
vm_op_t *op = bp_pattern(f, src);
if (op == NULL) break;
- defs = with_def(defs, f, name, op);
+ defs = with_def(defs, f, namelen, name, op);
src = op->end;
src = after_spaces(src);
if (matchchar(&src, ';'))
@@ -58,11 +59,11 @@ def_t *load_grammar(def_t *defs, file_t *f)
//
// Look up a backreference or grammar definition by name
//
-vm_op_t *lookup(def_t *defs, const char *name)
+def_t *lookup(def_t *defs, const char *name)
{
for ( ; defs; defs = defs->next) {
- if (streq(defs->name, name))
- return defs->op;
+ if (strlen(name) == defs->namelen && strncmp(defs->name, name, defs->namelen) == 0)
+ return defs;
}
return NULL;
}
@@ -76,7 +77,7 @@ static def_t *with_backref(def_t *defs, file_t *f, const char *name, match_t *m)
op->end = m->end;
op->len = -1; // TODO: maybe calculate this? (nontrivial because of replacements)
op->args.backref = m;
- return with_def(defs, f, name, op);
+ return with_def(defs, f, strlen(name), name, op);
}
//
diff --git a/grammar.h b/grammar.h
index 6822c24..8d69e76 100644
--- a/grammar.h
+++ b/grammar.h
@@ -7,14 +7,14 @@
#include "file_loader.h"
#include "types.h"
-__attribute__((nonnull(2,3,4), returns_nonnull))
-def_t *with_def(def_t *defs, file_t *f, const char *name, vm_op_t *op);
+__attribute__((nonnull(2,4,5), returns_nonnull))
+def_t *with_def(def_t *defs, file_t *f, size_t namelen, const char *name, vm_op_t *op);
__attribute__((nonnull(2,3)))
def_t *with_backrefs(def_t *defs, file_t *f, match_t *m);
__attribute__((nonnull(2)))
def_t *load_grammar(def_t *defs, file_t *f);
__attribute__((pure, nonnull(2)))
-vm_op_t *lookup(def_t *defs, const char *name);
+def_t *lookup(def_t *defs, const char *name);
__attribute__((nonnull(1)))
void free_defs(def_t **defs, def_t *stop);
diff --git a/types.h b/types.h
index ad7781b..b8c2b68 100644
--- a/types.h
+++ b/types.h
@@ -39,6 +39,7 @@ enum VMOpcode {
VM_REF,
VM_BACKREF,
VM_NODENT,
+ VM_CANNED,
};
struct match_s; // forward declared to resolve circular struct defs
@@ -74,6 +75,12 @@ typedef struct vm_op_s {
char *name;
} capture;
struct match_s *backref;
+ struct {
+ struct match_s *match;
+ unsigned int visits;
+ const char *at;
+ struct vm_op_s *fallback;
+ } canned;
struct vm_op_s *pat;
} args;
} vm_op_t;
@@ -86,12 +93,14 @@ typedef struct match_s {
const char *start, *end;
struct match_s *child, *nextsibling;
vm_op_t *op;
+ unsigned int refcount;
} match_t;
//
// Pattern matching rule definition(s)
//
typedef struct def_s {
+ size_t namelen;
const char *name;
file_t *file;
vm_op_t *op;
diff --git a/vm.c b/vm.c
index 0e5293f..9e35334 100644
--- a/vm.c
+++ b/vm.c
@@ -12,25 +12,25 @@
#include "utils.h"
#include "vm.h"
-typedef struct recursive_ref_s {
- const vm_op_t *op;
- const char *pos;
- struct recursive_ref_s *prev;
- int hit;
- match_t *result;
-} recursive_ref_t;
-
__attribute__((nonnull, pure))
static inline const char *next_char(file_t *f, const char *str);
__attribute__((nonnull))
static const char *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int flags);
__attribute__((hot, nonnull(2,3,4)))
-static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags, recursive_ref_t *rec);
+static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags);
__attribute__((nonnull))
static match_t *get_capture_by_num(match_t *m, int *n);
__attribute__((nonnull, pure))
static match_t *get_capture_by_name(match_t *m, const char *name);
+static inline void set_owner(match_t *m, match_t **owner)
+{
+ check(owner != &m->child, "Circular ownership");
+ check(owner != &m->nextsibling, "Circular ownership");
+ *owner = m;
+ ++m->refcount;
+}
+
//
// Return the location of the next character or UTF8 codepoint.
// (i.e. skip forward one codepoint at a time, not one byte at a time)
@@ -56,9 +56,14 @@ static inline const char *next_char(file_t *f, const char *str)
//
void destroy_match(match_t **m)
{
- if (!*m) return;
+ if (*m == NULL) return;
+ if (--(*m)->refcount > 0) {
+ *m = NULL;
+ return;
+ }
destroy_match(&((*m)->child));
destroy_match(&((*m)->nextsibling));
+ free(*m);
*m = NULL;
}
@@ -126,9 +131,18 @@ static const char *match_backref(const char *str, vm_op_t *op, match_t *cap, uns
// a match struct, or NULL if no match is found.
// The returned value should be free()'d to avoid memory leaking.
//
-static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags, recursive_ref_t *rec)
+static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags)
{
switch (op->type) {
+ case VM_CANNED: {
+ if (str == op->args.canned.at) {
+ ++op->args.canned.visits;
+ // TODO: deep copy?
+ return op->args.canned.match;
+ } else {
+ return _match(defs, f, str, op->args.canned.fallback, flags);
+ }
+ }
case VM_ANYCHAR: {
if (str >= f->end || *str == '\n')
return NULL;
@@ -141,7 +155,7 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
case VM_STRING: {
if (&str[op->len] > f->end) return NULL;
if ((flags & BP_IGNORECASE) ? memicmp(str, op->args.s, (size_t)op->len) != 0
- : memcmp(str, op->args.s, (size_t)op->len) != 0)
+ : memcmp(str, op->args.s, (size_t)op->len) != 0)
return NULL;
match_t *m = new(match_t);
m->op = op;
@@ -160,7 +174,7 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
return m;
}
case VM_NOT: {
- match_t *m = _match(defs, f, str, op->args.pat, flags, rec);
+ match_t *m = _match(defs, f, str, op->args.pat, flags);
if (m != NULL) {
destroy_match(&m);
return NULL;
@@ -175,43 +189,45 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
match_t *m = new(match_t);
m->start = str;
m->op = op;
- if (!op->args.multiple.first && !op->args.multiple.second) {
+
+ vm_op_t *pat = op->args.multiple.first, *skip = op->args.multiple.second;
+ if (!pat && !skip) {
while (str < f->end && *str != '\n') ++str;
- } else {
- match_t **dest = &m->child;
- for (const char *prev = NULL; prev < str; ) {
- prev = str;
- if (op->args.multiple.first) {
- match_t *p = _match(defs, f, str, op->args.multiple.first, flags, rec);
- if (p) {
- *dest = p;
- m->end = p->end;
- return m;
- }
- } else if (str == f->end) {
- m->end = str;
+ m->end = str;
+ return m;
+ }
+
+ match_t **dest = &m->child;
+ for (const char *prev = NULL; prev < str; ) {
+ prev = str;
+ if (pat) {
+ match_t *p = _match(defs, f, str, pat, flags);
+ if (p != NULL) {
+ set_owner(p, dest);
+ m->end = p->end;
return m;
}
- if (op->args.multiple.second) {
- match_t *p = _match(defs, f, str, op->args.multiple.second, flags, rec);
- if (p) {
- *dest = p;
- dest = &p->nextsibling;
- str = p->end;
- continue;
- }
+ } else if (str == f->end) {
+ m->end = str;
+ return m;
+ }
+ if (skip) {
+ match_t *s = _match(defs, f, str, skip, flags);
+ if (s != NULL) {
+ set_owner(s, dest);
+ dest = &s->nextsibling;
+ str = s->end;
+ continue;
}
- // This isn't in the for() structure because there needs to
- // be at least once chance to match the pattern, even if
- // we're at the end of the string already (e.g. "..$").
- if (str < f->end && *str != '\n')
- str = next_char(f, str);
}
- destroy_match(&m);
- return NULL;
+ // This isn't in the for() structure because there needs to
+ // be at least once chance to match the pattern, even if
+ // we're at the end of the string already (e.g. "..$").
+ if (str < f->end && *str != '\n')
+ str = next_char(f, str);
}
- m->end = str;
- return m;
+ destroy_match(&m);
+ return NULL;
}
case VM_REPEAT: {
match_t *m = new(match_t);
@@ -227,11 +243,11 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
// Separator
match_t *sep = NULL;
if (op->args.repetitions.sep != NULL && reps > 0) {
- sep = _match(defs, f, str, op->args.repetitions.sep, flags, rec);
+ sep = _match(defs, f, str, op->args.repetitions.sep, flags);
if (sep == NULL) break;
str = sep->end;
}
- match_t *p = _match(defs, f, str, op->args.repetitions.repeat_pat, flags, rec);
+ match_t *p = _match(defs, f, str, op->args.repetitions.repeat_pat, flags);
if (p == NULL) {
str = start;
destroy_match(&sep);
@@ -253,10 +269,10 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
break;
}
if (sep) {
- *dest = sep;
+ set_owner(sep, dest);
dest = &sep->nextsibling;
}
- *dest = p;
+ set_owner(p, dest);
dest = &p->nextsibling;
str = p->end;
}
@@ -272,58 +288,58 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
ssize_t backtrack = op->args.pat->len;
check(backtrack != -1, "'<' is only allowed for fixed-length operations");
if (str - backtrack < f->contents) return NULL;
- match_t *before = _match(defs, f, str - backtrack, op->args.pat, flags, rec);
+ match_t *before = _match(defs, f, str - backtrack, op->args.pat, flags);
if (before == NULL) return NULL;
match_t *m = new(match_t);
m->start = str;
m->end = str;
m->op = op;
- m->child = before;
+ set_owner(before, &m->child);
return m;
}
case VM_BEFORE: {
- match_t *after = _match(defs, f, str, op->args.pat, flags, rec);
+ match_t *after = _match(defs, f, str, op->args.pat, flags);
if (after == NULL) return NULL;
match_t *m = new(match_t);
m->start = str;
m->end = str;
m->op = op;
- m->child = after;
+ set_owner(after, &m->child);
return m;
}
case VM_CAPTURE: {
- match_t *p = _match(defs, f, str, op->args.pat, flags, rec);
+ match_t *p = _match(defs, f, str, op->args.pat, flags);
if (p == NULL) return NULL;
match_t *m = new(match_t);
m->start = str;
m->end = p->end;
m->op = op;
- m->child = p;
+ set_owner(p, &m->child);
return m;
}
case VM_HIDE: {
- match_t *p = _match(defs, f, str, op->args.pat, flags, rec);
+ match_t *p = _match(defs, f, str, op->args.pat, flags);
if (p == NULL) return NULL;
match_t *m = new(match_t);
m->start = str;
m->end = p->end;
m->op = op;
- m->child = p;
+ set_owner(p, &m->child);
return m;
}
case VM_OTHERWISE: {
- match_t *m = _match(defs, f, str, op->args.multiple.first, flags, rec);
- if (m == NULL) m = _match(defs, f, str, op->args.multiple.second, flags, rec);
+ match_t *m = _match(defs, f, str, op->args.multiple.first, flags);
+ if (m == NULL) m = _match(defs, f, str, op->args.multiple.second, flags);
return m;
}
case VM_CHAIN: {
- match_t *m1 = _match(defs, f, str, op->args.multiple.first, flags, rec);
+ match_t *m1 = _match(defs, f, str, op->args.multiple.first, flags);
if (m1 == NULL) return NULL;
match_t *m2;
{ // Push backrefs and run matching, then cleanup
def_t *defs2 = with_backrefs(defs, f, m1);
- m2 = _match(defs2, f, m1->end, op->args.multiple.second, flags, rec);
+ m2 = _match(defs2, f, m1->end, op->args.multiple.second, flags);
free_defs(&defs2, defs);
}
@@ -335,12 +351,12 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
m->start = str;
m->end = m2->end;
m->op = op;
- m->child = m1;
- m1->nextsibling = m2;
+ set_owner(m1, &m->child);
+ set_owner(m2, &m1->nextsibling);
return m;
}
case VM_EQUAL: case VM_NOT_EQUAL: {
- match_t *m1 = _match(defs, f, str, op->args.multiple.first, flags, rec);
+ match_t *m1 = _match(defs, f, str, op->args.multiple.first, flags);
if (m1 == NULL) return NULL;
// <p1>==<p2> matches iff the text of <p1> matches <p2>
@@ -352,19 +368,19 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
.nlines=1 + get_line_number(f, m1->end)-get_line_number(f, m1->start),
.mmapped=f->mmapped,
};
- match_t *m2 = _match(defs, &inner, str, op->args.multiple.second, flags, rec);
+ match_t *m2 = _match(defs, &inner, str, op->args.multiple.second, flags);
if ((m2 == NULL) == (op->type == VM_EQUAL)) {
destroy_match(&m1);
- destroy_match(&m2);
+ if (m2 != NULL) destroy_match(&m2);
return NULL;
}
match_t *m = new(match_t);
m->start = m1->start;
m->end = m1->end;
m->op = op;
- m->child = m1;
+ set_owner(m1, &m->child);
if (op->type == VM_EQUAL) {
- m1->nextsibling = m2;
+ set_owner(m2, &m1->nextsibling);
} else {
destroy_match(&m2);
}
@@ -373,14 +389,14 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
case VM_REPLACE: {
match_t *p = NULL;
if (op->args.replace.pat) {
- p = _match(defs, f, str, op->args.replace.pat, flags, rec);
+ p = _match(defs, f, str, op->args.replace.pat, flags);
if (p == NULL) return NULL;
}
match_t *m = new(match_t);
m->start = str;
m->op = op;
if (p) {
- m->child = p;
+ set_owner(p, &m->child);
m->end = p->end;
} else {
m->end = m->start;
@@ -388,41 +404,43 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
return m;
}
case VM_REF: {
- vm_op_t *r = lookup(defs, op->args.s);
- check(r != NULL, "Unknown identifier: '%s'", op->args.s);
-
- // Prevent infinite left recursion:
- for (recursive_ref_t *p = rec; p; p = p->prev) {
- if (p->pos == str && p->op == r) {
- ++p->hit;
- return p->result;
- }
- }
+ def_t *def = lookup(defs, op->args.s);
+ check(def != NULL, "Unknown identifier: '%s'", op->args.s);
+ vm_op_t *ref = def->op;
- recursive_ref_t wrap = {
- .op = r,
- .pos = str,
- .prev = rec,
- .hit = 0,
- .result = NULL,
+ vm_op_t fail_op = {
+ .type = VM_CANNED,
+ .start = ref->start,
+ .end = ref->end,
+ .len = 0,
+ .args.canned = {
+ .match = NULL,
+ .visits = 0,
+ .at = str,
+ .fallback = ref,
+ },
+ };
+ def_t defs2 = {
+ .namelen = def->namelen,
+ .name = def->name,
+ .file = def->file,
+ .op = &fail_op,
+ .next = defs,
};
- match_t *best = NULL;
- left_recursive:;
- match_t *p = _match(defs, f, str, r, flags, &wrap);
- if (p == NULL) return best;
- if (wrap.hit && (best == NULL || p->end > best->end)) {
- best = p;
- wrap.hit = 0;
- wrap.result = p;
- goto left_recursive;
- } else if (best == NULL) {
- best = p;
+
+ const char *prev = str;
+ match_t *m = _match(&defs2, f, str, ref, flags);
+ if (m == NULL) return NULL;
+
+ while (fail_op.args.canned.visits > 0 && m->end > prev) {
+ fail_op.args.canned.visits = 0;
+ fail_op.args.canned.match = m;
+ prev = m->end;
+ match_t *m2 = _match(&defs2, f, str, ref, flags);
+ if (m2 == NULL) break;
+ m = m2;
}
- match_t *m = new(match_t);
- m->start = best->start;
- m->end = best->end;
- m->op = op;
- m->child = best;
+
return m;
}
case VM_BACKREF: {
@@ -526,7 +544,26 @@ match_t *get_capture(match_t *m, const char **id)
//
match_t *match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags)
{
- return _match(defs, f, str, op, flags, NULL);
+ return _match(defs, f, str, op, flags);
+}
+
+// Deallocate memory associated with an op
+void destroy_op(vm_op_t *op)
+{
+ switch (op->type) {
+ case VM_STRING: case VM_REF:
+ xfree(&op->args.s);
+ break;
+ case VM_CAPTURE:
+ if (op->args.capture.name)
+ xfree(&op->args.capture.name);
+ break;
+ case VM_REPLACE:
+ if (op->args.replace.text)
+ xfree(&op->args.replace.text);
+ break;
+ default: break;
+ }
}
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/vm.h b/vm.h
index 6417f32..2127eca 100644
--- a/vm.h
+++ b/vm.h
@@ -14,6 +14,8 @@ __attribute__((nonnull))
void destroy_match(match_t **m);
__attribute__((nonnull))
match_t *get_capture(match_t *m, const char **id);
+__attribute__((nonnull))
+void destroy_op(vm_op_t *op);
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1