diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2020-09-07 23:34:41 -0700 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2020-09-07 23:34:41 -0700 |
| commit | a76c3338775082bcf61706bb0c002ad4b47abaed (patch) | |
| tree | 4372365e3da26214f7f855cab57c2e9e43162695 | |
| parent | f7cdd6d4d2c492683f0b5f65fa2fe788772df7c8 (diff) | |
Updates and cleanup
| -rw-r--r-- | Makefile | 2 | ||||
| -rw-r--r-- | bpeg.c | 32 | ||||
| -rw-r--r-- | bpeg.h | 61 | ||||
| -rw-r--r-- | vm.h | 50 |
4 files changed, 85 insertions, 60 deletions
@@ -7,7 +7,7 @@ all: bpeg clean: rm -f bpeg -bpeg: bpeg.c bpeg.h vm.h utils.h +bpeg: bpeg.c bpeg.h utils.h cc $(CFLAGS) $(OFLAGS) $< -o $@ .PHONY: all clean @@ -219,7 +219,7 @@ static match_t *match(const char *str, vm_op_t *op) m->end = m->start; } // TODO: handle captures - m->replacement = op->args.replace.replacement; + m->capture.replacement = op->args.replace.replacement; return m; } case VM_REF: { @@ -666,16 +666,19 @@ static vm_op_t *compile_bpeg(const char *str) return op; } -static void load_def(const char *name, const char *def) +static vm_op_t *load_def(const char *name, const char *def) { defs[ndefs].name = name; - defs[ndefs].op = compile_bpeg(def); + vm_op_t *op = compile_bpeg(def); + defs[ndefs].op = op; ++ndefs; + return op; } static void load_defs(void) { - load_def("_", "` /\\t/\\n/\\r"); + load_def("_", "*(` /\\t/\\n/\\r)"); + load_def("__", "+(` /\\t/\\n/\\r)"); load_def("nl", "\\n"); load_def("crlf", "\\r\\n"); load_def("abc", "`a,z"); @@ -696,15 +699,28 @@ static void load_defs(void) int main(int argc, char *argv[]) { + check(argc == 3, "Usage: bpeg <pat> <str>"); load_defs(); - char *lang = argc > 1 ? argv[1] : "'x''y'"; + const char *lang = argc > 1 ? argv[1] : "'x''y'"; vm_op_t *op = compile_bpeg(lang); check(op, "Failed to compile_bpeg input"); op = expand_choices(op); - // TODO: check for semicolon and more rules - + const char *defs = after_spaces(op->end); + while (*defs == ';') { + defs = after_spaces(++defs); + const char *name = defs; + check(isalpha(*name), "Definition must begin with a name"); + while (isalpha(*defs)) ++defs; + name = strndup(name, (size_t)(defs-name)); + defs = after_spaces(defs); + check(*defs == '=', "Expected '=' in definition"); + ++defs; + vm_op_t *def = load_def(name, defs); + check(def, "Couldn't load definition"); + defs = def->end; + } char *str = argc > 2 ? argv[2] : "xyz"; @@ -725,3 +741,5 @@ int main(int argc, char *argv[]) return 0; } + +//vim: ts=4 @@ -8,18 +8,75 @@ #include <unistd.h> #include "utils.h" -#include "vm.h" +/* + * Pattern matching result object + */ typedef struct match_s { + // Where the match starts and ends (end is after the last character) const char *start, *end; union { unsigned int is_capture:1; const char *name; + const char *replacement; } capture; - const char *replacement; struct match_s *child, *nextsibling; } match_t; +/* + * BPEG virtual machine opcodes + */ +enum VMOpcode { + VM_EMPTY = 0, + VM_ANYCHAR = 1, + VM_STRING, + VM_RANGE, + VM_NOT, + VM_UPTO, + VM_UPTO_AND, + VM_REPEAT, + VM_BEFORE, + VM_AFTER, + VM_CAPTURE, + VM_OTHERWISE, + VM_CHAIN, + VM_REPLACE, + VM_REF, +}; + +/* + * A struct reperesenting a BPEG virtual machine operation + */ +typedef struct vm_op_s { + enum VMOpcode op; + const char *start, *end; + // Length of the match, if constant, otherwise -1 + ssize_t len; + union { + const char *s; + struct { + char low, high; + } range; + struct { + ssize_t min, max; + struct vm_op_s *sep, *repeat_pat; + } repetitions; + struct { + struct vm_op_s *first, *second; + } multiple; + struct { + struct vm_op_s *replace_pat; + const char *replacement; + } replace; + struct { + struct vm_op_s *capture_pat; + char *name; + } capture; + struct vm_op_s *pat; + } args; +} vm_op_t; + + static match_t *free_match(match_t *m); static match_t *match(const char *str, vm_op_t *op); static void set_range(vm_op_t *op, ssize_t min, ssize_t max, vm_op_t *pat, vm_op_t *sep); @@ -1,50 +0,0 @@ -/* - * vm.h - Source code for the BPEG virtual machine datatypes - */ - -enum VMOpcode { - VM_EMPTY = 0, - VM_ANYCHAR = 1, - VM_STRING, - VM_RANGE, - VM_NOT, - VM_UPTO, - VM_UPTO_AND, - VM_REPEAT, - VM_BEFORE, - VM_AFTER, - VM_CAPTURE, - VM_OTHERWISE, - VM_CHAIN, - VM_REPLACE, - VM_REF, -}; - -typedef struct vm_op_s { - enum VMOpcode op; - const char *start, *end; - ssize_t len; - union { - const char *s; - struct { - char low, high; - } range; - struct { - ssize_t min, max; - struct vm_op_s *sep, *repeat_pat; - } repetitions; - struct { - struct vm_op_s *first, *second; - } multiple; - struct { - struct vm_op_s *replace_pat; - const char *replacement; - } replace; - struct { - struct vm_op_s *capture_pat; - char *name; - } capture; - struct vm_op_s *pat; - } args; -} vm_op_t; - |
