diff options
| -rw-r--r-- | bp.c | 32 | ||||
| -rw-r--r-- | grammar.c | 77 | ||||
| -rw-r--r-- | grammar.h | 20 | ||||
| -rw-r--r-- | types.h | 19 | ||||
| -rw-r--r-- | vm.c | 55 | ||||
| -rw-r--r-- | vm.h | 4 |
6 files changed, 83 insertions, 124 deletions
@@ -72,14 +72,14 @@ static int print_errors(file_t *f, match_t *m) return ret; } -static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, unsigned int flags) +static int run_match(def_t *defs, const char *filename, vm_op_t *pattern, unsigned int flags) { static int printed_matches = 0; file_t *f = load_file(filename); check(f, "Could not open file: %s", filename); if (flags & BP_INPLACE) // Need to do this before matching intern_file(f); - match_t *m = match(g, f, f->contents, pattern, flags); + match_t *m = match(defs, f, f->contents, pattern, flags); if (m && print_errors(f, m) > 0) _exit(1); if (m != NULL && m->end > m->start + 1) { @@ -133,14 +133,14 @@ int main(int argc, char *argv[]) char path[PATH_MAX] = {0}; const char *rule = "find-all"; - grammar_t *g = new(grammar_t); + def_t *defs = NULL; // Load builtins: if (access("/etc/xdg/bp/builtins.bp", R_OK) != -1) - load_grammar(g, load_file("/etc/xdg/bp/builtins.bp")); // Keep in memory for debugging output + defs = load_grammar(defs, load_file("/etc/xdg/bp/builtins.bp")); // Keep in memory for debugging output sprintf(path, "%s/.config/bp/builtins.bp", getenv("HOME")); if (access(path, R_OK) != -1) - load_grammar(g, load_file(path)); // Keep in memory for debugging output + defs = load_grammar(defs, load_file(path)); // Keep in memory for debugging output int i, npatterns = 0; check(argc > 1, "%s", usage); @@ -172,7 +172,7 @@ int main(int argc, char *argv[]) file_t *replace_file = spoof_file("<replace argument>", flag); vm_op_t *rep = bp_replacement(replace_file, patref, replace_file->contents); check(rep, "Replacement failed to compile: %s", flag); - add_def(g, replace_file, "replacement", rep); + defs = with_def(defs, replace_file, "replacement", rep); rule = "replace-all"; } else if (FLAG("--grammar") || FLAG("-g")) { file_t *f = load_file(flag); @@ -185,7 +185,7 @@ int main(int argc, char *argv[]) f = load_file(path); } check(f != NULL, "Couldn't find grammar: %s", flag); - load_grammar(g, f); // Keep in memory for debug output + defs = load_grammar(defs, f); // Keep in memory for debug output } else if (FLAG("--define") || FLAG("-d")) { char *def = flag; char *eq = strchr(def, ':'); @@ -195,7 +195,7 @@ int main(int argc, char *argv[]) file_t *def_file = spoof_file(def, src); vm_op_t *pat = bp_pattern(def_file, def_file->contents); check(pat, "Failed to compile pattern: %s", flag); - add_def(g, def_file, def, pat); + defs = with_def(defs, def_file, def, pat); } else if (FLAG("--define-string") || FLAG("-D")) { char *def = flag; char *eq = strchr(def, ':'); @@ -205,19 +205,19 @@ int main(int argc, char *argv[]) file_t *def_file = spoof_file(def, src); vm_op_t *pat = bp_stringpattern(def_file, def_file->contents); check(pat, "Failed to compile pattern: %s", src); - add_def(g, def_file, def, pat); + defs = with_def(defs, def_file, def, pat); } else if (FLAG("--pattern") || FLAG("-p")) { check(npatterns == 0, "Cannot define multiple patterns"); file_t *arg_file = spoof_file("<pattern argument>", flag); vm_op_t *p = bp_pattern(arg_file, arg_file->contents); check(p, "Pattern failed to compile: %s", flag); - add_def(g, arg_file, "pattern", p); + defs = with_def(defs, arg_file, "pattern", p); ++npatterns; } else if (FLAG("--pattern-string") || FLAG("-P")) { file_t *arg_file = spoof_file("<pattern argument>", flag); vm_op_t *p = bp_stringpattern(arg_file, arg_file->contents); check(p, "Pattern failed to compile: %s", flag); - add_def(g, arg_file, "pattern", p); + defs = with_def(defs, arg_file, "pattern", p); ++npatterns; } else if (FLAG("--mode") || FLAG("-m")) { rule = flag; @@ -242,7 +242,7 @@ int main(int argc, char *argv[]) file_t *arg_file = spoof_file("<pattern argument>", argv[i]); vm_op_t *p = bp_stringpattern(arg_file, arg_file->contents); check(p, "Pattern failed to compile: %s", argv[i]); - add_def(g, arg_file, "pattern", p); + defs = with_def(defs, arg_file, "pattern", p); ++npatterns; } else { printf("Unrecognized flag: %s\n\n%s\n", argv[i], usage); @@ -260,7 +260,7 @@ int main(int argc, char *argv[]) print_options |= PRINT_COLOR | PRINT_LINE_NUMBERS; } - vm_op_t *pattern = lookup(g, rule); + vm_op_t *pattern = lookup(defs, rule); check(pattern != NULL, "No such rule: '%s'", rule); int ret = 1; @@ -268,7 +268,7 @@ int main(int argc, char *argv[]) if (i < argc) { // Files pass in as command line args: for (int nfiles = 0; i < argc; nfiles++, i++) { - ret &= run_match(g, argv[i], pattern, flags); + ret &= run_match(defs, argv[i], pattern, flags); } } else if (isatty(STDIN_FILENO)) { // No files, no piped in input, so use * **/*: @@ -276,12 +276,12 @@ int main(int argc, char *argv[]) glob("*", 0, NULL, &globbuf); glob("**/*", GLOB_APPEND, NULL, &globbuf); for (size_t i = 0; i < globbuf.gl_pathc; i++) { - ret &= run_match(g, globbuf.gl_pathv[i], pattern, flags); + ret &= run_match(defs, globbuf.gl_pathv[i], pattern, flags); } globfree(&globbuf); } else { // Piped in input: - ret &= run_match(g, NULL, pattern, flags); + ret &= run_match(defs, NULL, pattern, flags); } if (flags & BP_JSON) printf("]\n"); @@ -11,28 +11,27 @@ #include "utils.h" /* - * Add a definition to the grammar + * Return a new list of definitions with one added to the front */ -void add_def(grammar_t *g, file_t *f, const char *name, vm_op_t *op) +def_t *with_def(def_t *defs, file_t *f, const char *name, vm_op_t *op) { def_t *def = new(def_t); - def->next = g->firstdef; + def->next = defs; def->file = f; def->name = name; def->op = op; - g->firstdef = def; + return def; } /* * Load the given grammar (semicolon-separated definitions) * and return the first rule defined. */ -vm_op_t *load_grammar(grammar_t *g, file_t *f) +def_t *load_grammar(def_t *defs, file_t *f) { - vm_op_t *ret = NULL; const char *src = f->contents; src = after_spaces(src); - while (*src) { + while (src < f->end) { const char *name = src; src = after_name(name); check(src > name, "Invalid name for definition: %s", name); @@ -40,11 +39,7 @@ vm_op_t *load_grammar(grammar_t *g, file_t *f) check(matchchar(&src, ':'), "Expected ':' in definition"); vm_op_t *op = bp_pattern(f, src); if (op == NULL) break; - //check(op, "Couldn't load definition"); - add_def(g, f, name, op); - if (ret == NULL) { - ret = op; - } + defs = with_def(defs, f, name, op); src = op->end; src = after_spaces(src); if (matchchar(&src, ';')) @@ -54,21 +49,17 @@ vm_op_t *load_grammar(grammar_t *g, file_t *f) fprint_line(stderr, f, src, NULL, "Invalid BP pattern"); _exit(1); } - return ret; + return defs; } /* * Look up a backreference or grammar definition by name */ -vm_op_t *lookup(grammar_t *g, const char *name) +vm_op_t *lookup(def_t *defs, const char *name) { - for (backref_t *b = g->firstbackref; b; b = b->next) { - if (streq(b->name, name)) - return b->op; - } - for (def_t *d = g->firstdef; d; d = d->next) { - if (streq(d->name, name)) - return d->op; + for ( ; defs; defs = defs->next) { + if (streq(defs->name, name)) + return defs->op; } return NULL; } @@ -76,49 +67,29 @@ vm_op_t *lookup(grammar_t *g, const char *name) /* * Push a backreference onto the backreference stack */ -void push_backref(grammar_t *g, const char *name, match_t *capture) +static def_t *with_backref(def_t *defs, file_t *f, const char *name, match_t *m) { - backref_t *backref = new(backref_t); - backref->name = name; vm_op_t *op = new(vm_op_t); op->op = VM_BACKREF; - op->start = capture->start; - op->end = capture->end; + op->start = m->start; + op->end = m->end; op->len = -1; // TODO: maybe calculate this? (nontrivial because of replacements) - op->args.backref = capture; - backref->op = op; - backref->next = g->firstbackref; - g->firstbackref = backref; + op->args.backref = m; + return with_def(defs, f, name, op); } /* * Push all the backreferences contained in a match onto the backreference stack */ -size_t push_backrefs(grammar_t *g, match_t *m) -{ - if (m->op->op == VM_REF) return 0; - size_t count = 0; - if (m->op->op == VM_CAPTURE && m->op->args.capture.name) { - ++count; - push_backref(g, m->op->args.capture.name, m->child); - } - if (m->child) count += push_backrefs(g, m->child); - if (m->nextsibling) count += push_backrefs(g, m->nextsibling); - return count; -} - -/* - * Pop a number of backreferences off the backreference stack - */ -void pop_backrefs(grammar_t *g, size_t count) +def_t *with_backrefs(def_t *defs, file_t *f, match_t *m) { - for ( ; count > 0; count--) { - backref_t *b = g->firstbackref; - g->firstbackref = b->next; - check(b, "Attempt to pop %ld more backrefs than there are", count); - xfree(&b->op); - xfree(&b); + if (m->op->op != VM_REF) { + if (m->op->op == VM_CAPTURE && m->op->args.capture.name) + defs = with_backref(defs, f, m->op->args.capture.name, m->child); + if (m->child) defs = with_backrefs(defs, f, m->child); + if (m->nextsibling) defs = with_backrefs(defs, f, m->nextsibling); } + return defs; } // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 @@ -7,18 +7,14 @@ #include "file_loader.h" #include "types.h" -__attribute__((nonnull(1,3,4))) -void add_def(grammar_t *g, file_t *f, const char *name, vm_op_t *op); -__attribute__((nonnull)) -void push_backref(grammar_t *g, const char *name, match_t *capture); -__attribute__((nonnull)) -size_t push_backrefs(grammar_t *g, match_t *m); -__attribute__((nonnull)) -void pop_backrefs(grammar_t *g, size_t count); -__attribute__((nonnull)) -vm_op_t *load_grammar(grammar_t *g, file_t *f); -__attribute__((pure, nonnull)) -vm_op_t *lookup(grammar_t *g, const char *name); +__attribute__((nonnull(2,3,4))) +def_t *with_def(def_t *defs, file_t *f, const char *name, vm_op_t *op); +__attribute__((nonnull(2,3))) +def_t *with_backrefs(def_t *defs, file_t *f, match_t *m); +__attribute__((nonnull(2))) +def_t *load_grammar(def_t *defs, file_t *f); +__attribute__((pure, nonnull(2))) +vm_op_t *lookup(def_t *defs, const char *name); #endif // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 @@ -89,7 +89,7 @@ typedef struct match_s { } match_t; /* - * Pattern matching rule definition + * Pattern matching rule definition(s) */ typedef struct def_s { const char *name; @@ -98,22 +98,5 @@ typedef struct def_s { struct def_s *next; } def_t; -/* - * Backreference (look up previous capture by name) - */ -typedef struct backref_s { - const char *name; - vm_op_t *op; - struct backref_s *next; -} backref_t; - -/* - * Grammar (a collection of definitions) - */ -typedef struct { - def_t *firstdef; - backref_t *firstbackref; -} grammar_t; - #endif // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 @@ -163,7 +163,7 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsign * a match struct, or NULL if no match is found. * The returned value should be free()'d to avoid memory leaking. */ -static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags, recursive_ref_t *rec) +static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags, recursive_ref_t *rec) { switch (op->op) { case VM_ANYCHAR: { @@ -197,7 +197,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un return m; } case VM_NOT: { - match_t *m = _match(g, f, str, op->args.pat, flags, rec); + match_t *m = _match(defs, f, str, op->args.pat, flags, rec); if (m != NULL) { destroy_match(&m); return NULL; @@ -219,7 +219,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un for (const char *prev = NULL; prev < str; ) { prev = str; if (op->args.multiple.first) { - match_t *p = _match(g, f, str, op->args.multiple.first, flags, rec); + match_t *p = _match(defs, f, str, op->args.multiple.first, flags, rec); if (p) { *dest = p; m->end = p->end; @@ -230,7 +230,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un return m; } if (op->args.multiple.second) { - match_t *p = _match(g, f, str, op->args.multiple.second, flags, rec); + match_t *p = _match(defs, f, str, op->args.multiple.second, flags, rec); if (p) { *dest = p; dest = &p->nextsibling; @@ -264,11 +264,11 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un // Separator match_t *sep = NULL; if (op->args.repetitions.sep != NULL && reps > 0) { - sep = _match(g, f, str, op->args.repetitions.sep, flags, rec); + sep = _match(defs, f, str, op->args.repetitions.sep, flags, rec); if (sep == NULL) break; str = sep->end; } - match_t *p = _match(g, f, str, op->args.repetitions.repeat_pat, flags, rec); + match_t *p = _match(defs, f, str, op->args.repetitions.repeat_pat, flags, rec); if (p == NULL) { destroy_match(&sep); break; @@ -308,7 +308,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un ssize_t backtrack = op->args.pat->len; check(backtrack != -1, "'<' is only allowed for fixed-length operations"); if (str - backtrack < f->contents) return NULL; - match_t *before = _match(g, f, str - backtrack, op->args.pat, flags, rec); + match_t *before = _match(defs, f, str - backtrack, op->args.pat, flags, rec); if (before == NULL) return NULL; match_t *m = new(match_t); m->start = str; @@ -318,7 +318,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un return m; } case VM_BEFORE: { - match_t *after = _match(g, f, str, op->args.pat, flags, rec); + match_t *after = _match(defs, f, str, op->args.pat, flags, rec); if (after == NULL) return NULL; match_t *m = new(match_t); m->start = str; @@ -328,7 +328,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un return m; } case VM_CAPTURE: { - match_t *p = _match(g, f, str, op->args.pat, flags, rec); + match_t *p = _match(defs, f, str, op->args.pat, flags, rec); if (p == NULL) return NULL; match_t *m = new(match_t); m->start = str; @@ -338,7 +338,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un return m; } case VM_HIDE: { - match_t *p = _match(g, f, str, op->args.pat, flags, rec); + match_t *p = _match(defs, f, str, op->args.pat, flags, rec); if (p == NULL) return NULL; match_t *m = new(match_t); m->start = str; @@ -348,17 +348,26 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un return m; } case VM_OTHERWISE: { - match_t *m = _match(g, f, str, op->args.multiple.first, flags, rec); - if (m == NULL) m = _match(g, f, str, op->args.multiple.second, flags, rec); + match_t *m = _match(defs, f, str, op->args.multiple.first, flags, rec); + if (m == NULL) m = _match(defs, f, str, op->args.multiple.second, flags, rec); return m; } case VM_CHAIN: { - match_t *m1 = _match(g, f, str, op->args.multiple.first, flags, rec); + match_t *m1 = _match(defs, f, str, op->args.multiple.first, flags, rec); if (m1 == NULL) return NULL; - size_t nbackrefs = push_backrefs(g, m1); - match_t *m2 = _match(g, f, m1->end, op->args.multiple.second, flags, rec); - pop_backrefs(g, nbackrefs); + match_t *m2; + { // Push backrefs and run matching, then cleanup + def_t *defs2 = with_backrefs(defs, f, m1); + m2 = _match(defs2, f, m1->end, op->args.multiple.second, flags, rec); + while (defs2 != defs) { + def_t *next = defs2->next; + defs2->next = NULL; + xfree(&defs2); + defs2 = next; + } + } + if (m2 == NULL) { destroy_match(&m1); return NULL; @@ -372,7 +381,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un return m; } case VM_EQUAL: case VM_NOT_EQUAL: { - match_t *m1 = _match(g, f, str, op->args.multiple.first, flags, rec); + match_t *m1 = _match(defs, f, str, op->args.multiple.first, flags, rec); if (m1 == NULL) return NULL; // <p1>==<p2> matches iff the text of <p1> matches <p2> @@ -384,7 +393,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un .nlines=1 + get_line_number(f, m1->end)-get_line_number(f, m1->start), .mmapped=f->mmapped, }; - match_t *m2 = _match(g, &inner, str, op->args.multiple.second, flags, rec); + match_t *m2 = _match(defs, &inner, str, op->args.multiple.second, flags, rec); if ((m2 == NULL) == (op->op == VM_EQUAL)) { destroy_match(&m1); destroy_match(&m2); @@ -405,7 +414,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un case VM_REPLACE: { match_t *p = NULL; if (op->args.replace.pat) { - p = _match(g, f, str, op->args.replace.pat, flags, rec); + p = _match(defs, f, str, op->args.replace.pat, flags, rec); if (p == NULL) return NULL; } match_t *m = new(match_t); @@ -420,7 +429,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un return m; } case VM_REF: { - vm_op_t *r = lookup(g, op->args.s); + vm_op_t *r = lookup(defs, op->args.s); check(r != NULL, "Unknown identifier: '%s'", op->args.s); // Prevent infinite left recursion: @@ -440,7 +449,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un }; match_t *best = NULL; left_recursive:; - match_t *p = _match(g, f, str, r, flags, &wrap); + match_t *p = _match(defs, f, str, r, flags, &wrap); if (p == NULL) return best; if (wrap.hit && (best == NULL || p->end > best->end)) { best = p; @@ -546,9 +555,9 @@ match_t *get_capture(match_t *m, const char **r) return NULL; } -match_t *match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags) +match_t *match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags) { - return _match(g, f, str, op, flags, NULL); + return _match(defs, f, str, op, flags, NULL); } // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 @@ -9,8 +9,8 @@ #include "types.h" const char *opcode_name(enum VMOpcode o); -__attribute__((hot, nonnull)) -match_t *match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags); +__attribute__((hot, nonnull(2,3,4))) +match_t *match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags); __attribute__((nonnull)) void destroy_match(match_t **m); __attribute__((nonnull)) |
