diff --git a/Makefile b/Makefile index e1ef268..5e50c9c 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ all: bpeg clean: rm -f bpeg -bpeg: bpeg.c bpeg.h vm.h utils.h +bpeg: bpeg.c bpeg.h utils.h cc $(CFLAGS) $(OFLAGS) $< -o $@ .PHONY: all clean diff --git a/bpeg.c b/bpeg.c index d09dba3..31c2016 100644 --- a/bpeg.c +++ b/bpeg.c @@ -219,7 +219,7 @@ static match_t *match(const char *str, vm_op_t *op) m->end = m->start; } // TODO: handle captures - m->replacement = op->args.replace.replacement; + m->capture.replacement = op->args.replace.replacement; return m; } case VM_REF: { @@ -666,16 +666,19 @@ static vm_op_t *compile_bpeg(const char *str) return op; } -static void load_def(const char *name, const char *def) +static vm_op_t *load_def(const char *name, const char *def) { defs[ndefs].name = name; - defs[ndefs].op = compile_bpeg(def); + vm_op_t *op = compile_bpeg(def); + defs[ndefs].op = op; ++ndefs; + return op; } static void load_defs(void) { - load_def("_", "` /\\t/\\n/\\r"); + load_def("_", "*(` /\\t/\\n/\\r)"); + load_def("__", "+(` /\\t/\\n/\\r)"); load_def("nl", "\\n"); load_def("crlf", "\\r\\n"); load_def("abc", "`a,z"); @@ -696,15 +699,28 @@ static void load_defs(void) int main(int argc, char *argv[]) { + check(argc == 3, "Usage: bpeg "); load_defs(); - char *lang = argc > 1 ? argv[1] : "'x''y'"; + const char *lang = argc > 1 ? argv[1] : "'x''y'"; vm_op_t *op = compile_bpeg(lang); check(op, "Failed to compile_bpeg input"); op = expand_choices(op); - // TODO: check for semicolon and more rules - + const char *defs = after_spaces(op->end); + while (*defs == ';') { + defs = after_spaces(++defs); + const char *name = defs; + check(isalpha(*name), "Definition must begin with a name"); + while (isalpha(*defs)) ++defs; + name = strndup(name, (size_t)(defs-name)); + defs = after_spaces(defs); + check(*defs == '=', "Expected '=' in definition"); + ++defs; + vm_op_t *def = load_def(name, defs); + check(def, "Couldn't load definition"); + defs = def->end; + } char *str = argc > 2 ? argv[2] : "xyz"; @@ -725,3 +741,5 @@ int main(int argc, char *argv[]) return 0; } + +//vim: ts=4 diff --git a/bpeg.h b/bpeg.h index 8bc813a..a855e8a 100644 --- a/bpeg.h +++ b/bpeg.h @@ -8,18 +8,75 @@ #include #include "utils.h" -#include "vm.h" +/* + * Pattern matching result object + */ typedef struct match_s { + // Where the match starts and ends (end is after the last character) const char *start, *end; union { unsigned int is_capture:1; const char *name; + const char *replacement; } capture; - const char *replacement; struct match_s *child, *nextsibling; } match_t; +/* + * BPEG virtual machine opcodes + */ +enum VMOpcode { + VM_EMPTY = 0, + VM_ANYCHAR = 1, + VM_STRING, + VM_RANGE, + VM_NOT, + VM_UPTO, + VM_UPTO_AND, + VM_REPEAT, + VM_BEFORE, + VM_AFTER, + VM_CAPTURE, + VM_OTHERWISE, + VM_CHAIN, + VM_REPLACE, + VM_REF, +}; + +/* + * A struct reperesenting a BPEG virtual machine operation + */ +typedef struct vm_op_s { + enum VMOpcode op; + const char *start, *end; + // Length of the match, if constant, otherwise -1 + ssize_t len; + union { + const char *s; + struct { + char low, high; + } range; + struct { + ssize_t min, max; + struct vm_op_s *sep, *repeat_pat; + } repetitions; + struct { + struct vm_op_s *first, *second; + } multiple; + struct { + struct vm_op_s *replace_pat; + const char *replacement; + } replace; + struct { + struct vm_op_s *capture_pat; + char *name; + } capture; + struct vm_op_s *pat; + } args; +} vm_op_t; + + static match_t *free_match(match_t *m); static match_t *match(const char *str, vm_op_t *op); static void set_range(vm_op_t *op, ssize_t min, ssize_t max, vm_op_t *pat, vm_op_t *sep); diff --git a/vm.h b/vm.h deleted file mode 100644 index 2123c35..0000000 --- a/vm.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * vm.h - Source code for the BPEG virtual machine datatypes - */ - -enum VMOpcode { - VM_EMPTY = 0, - VM_ANYCHAR = 1, - VM_STRING, - VM_RANGE, - VM_NOT, - VM_UPTO, - VM_UPTO_AND, - VM_REPEAT, - VM_BEFORE, - VM_AFTER, - VM_CAPTURE, - VM_OTHERWISE, - VM_CHAIN, - VM_REPLACE, - VM_REF, -}; - -typedef struct vm_op_s { - enum VMOpcode op; - const char *start, *end; - ssize_t len; - union { - const char *s; - struct { - char low, high; - } range; - struct { - ssize_t min, max; - struct vm_op_s *sep, *repeat_pat; - } repetitions; - struct { - struct vm_op_s *first, *second; - } multiple; - struct { - struct vm_op_s *replace_pat; - const char *replacement; - } replace; - struct { - struct vm_op_s *capture_pat; - char *name; - } capture; - struct vm_op_s *pat; - } args; -} vm_op_t; -