diff options
| -rw-r--r-- | bpeg.c | 146 | ||||
| -rw-r--r-- | compiler.c | 1 | ||||
| -rw-r--r-- | grammar.c | 37 | ||||
| -rw-r--r-- | grammar.h | 5 | ||||
| -rw-r--r-- | types.h | 1 | ||||
| -rw-r--r-- | vm.c | 36 |
6 files changed, 122 insertions, 104 deletions
@@ -52,29 +52,63 @@ static const char *usage = ( " -r --replace <replacement> replace the input pattern with the given replacement\n" " -g --grammar <grammar file> use the specified file as a grammar\n"); +static char *getflag(const char *flag, char *argv[], int *i) +{ + size_t n = strlen(flag); + if (strncmp(argv[*i], flag, n) == 0) { + if (argv[*i][n] == '=') { + return &argv[*i][n+1]; + } else if (argv[*i][n] == '\0') { + ++(*i); + return argv[*i]; + } + } + return NULL; +} +#define FLAG(f) (flag=getflag((f), argv, &i)) int main(int argc, char *argv[]) { int verbose = 0; - const char *pattern = NULL, - *replacement = NULL, - *grammarfile = NULL, - *infile = NULL; + char *flag = NULL; + const char *rule = "find-all"; grammar_t *g = new_grammar(); - for (int i = 1; i < argc; i++) { - if (streq(argv[i], "--help") || streq(argv[i], "-h")) { + int i, npatterns = 0; + for (i = 1; i < argc; i++) { + if (streq(argv[i], "--")) { + ++i; + break; + } else if (FLAG("--help") || FLAG("-h")) { printf("%s\n", usage); return 0; - } else if (streq(argv[i], "--verbose") || streq(argv[i], "-v")) { + } else if (FLAG("--verbose") || FLAG("-v")) { verbose = 1; - } else if (streq(argv[i], "--replace") || streq(argv[i], "-r")) { - replacement = argv[++i]; - } else if (streq(argv[i], "--grammar") || streq(argv[i], "-g")) { - grammarfile = argv[++i]; - } else if (streq(argv[i], "--define") || streq(argv[i], "-d")) { - char *def = argv[++i]; + } else if (FLAG("--pattern") || FLAG("-p")) { + vm_op_t *p = bpeg_pattern(flag); + check(p, "Pattern failed to compile"); + add_def(g, flag, "pattern", p); + ++npatterns; + } else if (FLAG("--replace") || FLAG("-r")) { + vm_op_t *p = bpeg_replacement(bpeg_pattern("pattern"), flag); + check(p, "Replacement failed to compile"); + add_def(g, flag, "replacement", p); + rule = "replace-all"; + } else if (FLAG("--grammar") || FLAG("-g")) { + const char *grammarfile = flag; + // load grammar from a file (semicolon mode) + char *grammar; + if (streq(grammarfile, "-")) { + grammar = readfile(STDIN_FILENO); + } else { + int fd = open(grammarfile, O_RDONLY); + check(fd >= 0, "Couldn't open file: %s", argv[2]); + grammar = readfile(fd); + } + load_grammar(g, grammar); + } else if (FLAG("--define") || FLAG("-d")) { + char *def = flag; char *eq = strchr(def, '='); check(eq, usage); *eq = '\0'; @@ -82,71 +116,51 @@ int main(int argc, char *argv[]) vm_op_t *pat = bpeg_pattern(src); check(pat, "Failed to compile pattern"); add_def(g, src, def, pat); - } else if (pattern == NULL) { - pattern = argv[i]; - } else if (infile == NULL) { - infile = argv[i]; + } else if (argv[i][0] != '-') { + if (npatterns > 0) break; + vm_op_t *p = bpeg_stringpattern(argv[i]); + check(p, "Pattern failed to compile"); + add_def(g, argv[i], "pattern", p); + ++npatterns; } } - check(pattern != NULL || grammarfile != NULL, usage); - - if (grammarfile) { - // load grammar from a file (semicolon mode) - char *grammar; - if (streq(grammarfile, "-")) { - grammar = readfile(STDIN_FILENO); - } else { - int fd = open(grammarfile, O_RDONLY); - check(fd >= 0, "Couldn't open file: %s", argv[2]); - grammar = readfile(fd); - } - load_grammar(g, grammar); - } else { - // load grammar in start-with-string mode: - vm_op_t *pat = bpeg_stringpattern(pattern); - if (replacement) - pat = bpeg_replacement(pat, replacement); - - add_def(g, pattern, "pattern", pat); - - if (replacement) { - load_grammar(g, - "replace-all = *&&@pattern &&$$;\n" - ); - } else { - load_grammar(g, - "find-all = *(matching-line / {&&(\\n/$$)=>});\n" - "matching-line = +&@pattern *. $ ?\\n;" - ); - } - } + vm_op_t *pattern = lookup(g, rule); + check(pattern != NULL, usage); if (verbose) { - print_pattern(g->pattern); + print_pattern(pattern); } char *input; - if (infile == NULL || streq(infile, "-")) { + if (i == argc) { + // Force stdin if no files given input = readfile(STDIN_FILENO); - } else { - int fd = open(infile, O_RDONLY); - check(fd >= 0, "Couldn't open file: %s", argv[2]); - input = readfile(fd); + goto run_match; } + for ( ; i < argc; i++) { + if (argv[i] == NULL || streq(argv[i], "-")) { + input = readfile(STDIN_FILENO); + } else { + int fd = open(argv[i], O_RDONLY); + check(fd >= 0, "Couldn't open file: %s", argv[2]); + input = readfile(fd); + } - // Ensure string has a null byte to the left: - char *lpadded = calloc(sizeof(char), strlen(input)+2); - stpcpy(&lpadded[1], input); - input = &lpadded[1]; + run_match:; + // Ensure string has a null byte to the left: + char *lpadded = calloc(sizeof(char), strlen(input)+2); + stpcpy(&lpadded[1], input); + input = &lpadded[1]; - match_t *m = match(g, input, g->pattern); - if (m == NULL) { - printf("No match\n"); - return 1; - } else { - print_match(m, "\033[0m", verbose); - //printf("\033[0;2m%s\n", m->end); + match_t *m = match(g, input, pattern); + if (m == NULL) { + printf("No match\n"); + return 1; + } else { + print_match(m, "\033[0m", verbose); + //printf("\033[0;2m%s\n", m->end); + } } return 0; @@ -436,6 +436,7 @@ vm_op_t *bpeg_stringpattern(const char *str) */ vm_op_t *bpeg_replacement(vm_op_t *pat, const char *replacement) { + check(pat, "Null pattern used in replacement"); vm_op_t *op = calloc(sizeof(vm_op_t), 1); op->op = VM_REPLACE; op->start = pat->start; @@ -7,6 +7,14 @@ #include "utils.h" const char *BPEG_BUILTIN_GRAMMAR = ( + // Meta-rules for acting on everything + "pattern = !(/);\n" // Not defined by default + "replacement = {!(/)=>};\n" // Not defined by default + "replace-all = *&&@replacement &&$$;\n" + "find-all = *(matching-line / {&&(\\n/$$)=>});\n" + "matching-line = +&@pattern *. $ ?\\n;\n" + + // Helper definitions (commonly used) "crlf=\\r\\n;\n" "cr=\\r;\n" "r=\\r;\n" "anglebraces=`< *(anglebraces / ~~`>) `>;\n" @@ -61,9 +69,13 @@ void add_def(grammar_t *g, const char *src, const char *name, vm_op_t *op) ++g->size; } -void load_grammar(grammar_t *g, const char *src) +/* + * Load the given grammar (semicolon-separated definitions) + * and return the first rule defined. + */ +vm_op_t *load_grammar(grammar_t *g, const char *src) { - vm_op_t *mainpat = NULL; + vm_op_t *ret = NULL; do { src = after_spaces(src); if (!*src) break; @@ -76,20 +88,21 @@ void load_grammar(grammar_t *g, const char *src) vm_op_t *op = bpeg_pattern(src); check(op, "Couldn't load definition"); add_def(g, src, name, op); - if (mainpat == NULL) { - mainpat = op; - g->pattern = op; + if (ret == NULL) { + ret = op; } src = op->end; } while (*src && matchchar(&src, ';')); + return ret; } -/* - * Print a BPEG grammar in human-readable form. - */ -void print_grammar(grammar_t *g) +vm_op_t *lookup(grammar_t *g, const char *name) { - if (g->pattern) print_pattern(g->pattern); + // Search backwards so newer defs take precedence + for (int i = g->size-1; i >= 0; i--) { + if (streq(g->definitions[i].name, name)) { + return g->definitions[i].op; + } + } + return NULL; } - -// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 @@ -11,9 +11,8 @@ grammar_t *new_grammar(void); void add_def(grammar_t *g, const char *src, const char *name, vm_op_t *op); -void load_grammar(grammar_t *g, const char *source); -void print_grammar(grammar_t *g); - +vm_op_t *load_grammar(grammar_t *g, const char *source); +vm_op_t *lookup(grammar_t *g, const char *name); #endif // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 @@ -81,7 +81,6 @@ typedef struct { } def_t; typedef struct { - vm_op_t *pattern; def_t *definitions; size_t size, capacity; } grammar_t; @@ -2,6 +2,7 @@ * vm.c - Code for the BPEG virtual machine that performs the matching. */ #include "vm.h" +#include "grammar.h" #include "utils.h" /* @@ -212,28 +213,19 @@ match_t *match(grammar_t *g, const char *str, vm_op_t *op) return m; } case VM_REF: { - // Search backwards so newer defs take precedence - for (int i = g->size-1; i >= 0; i--) { - if (streq(g->definitions[i].name, op->args.s)) { - // Bingo! - /* - op = g->definitions[i].op; - goto tailcall; - */ - match_t *p = match(g, str, g->definitions[i].op); - if (p == NULL) return NULL; - match_t *m = calloc(sizeof(match_t), 1); - m->start = p->start; - m->end = p->end; - m->op = op; - m->child = p; - m->name_or_replacement = g->definitions[i].name; - m->is_ref = 1; - return m; - } - } - check(0, "Unknown identifier: '%s'", op->args.s); - return NULL; + vm_op_t *r = lookup(g, op->args.s); + check(r != NULL, "Unknown identifier: '%s'", op->args.s); + // Could use tailcall here, but current have disabled + match_t *p = match(g, str, r); + if (p == NULL) return NULL; + match_t *m = calloc(sizeof(match_t), 1); + m->start = p->start; + m->end = p->end; + m->op = op; + m->child = p; + m->name_or_replacement = op->args.s; + m->is_ref = 1; + return m; } default: { fprintf(stderr, "Unknown opcode: %d", op->op); |
