/* * bpeg.h - Header file for the bpeg parser */ #include #include #include #include #include #include "utils.h" /* * Pattern matching result object */ typedef struct match_s { // Where the match starts and ends (end is after the last character) const char *start, *end; union { unsigned int is_capture:1; const char *name; const char *replacement; } capture; struct match_s *child, *nextsibling; } match_t; /* * BPEG virtual machine opcodes */ enum VMOpcode { VM_EMPTY = 0, VM_ANYCHAR = 1, VM_STRING, VM_RANGE, VM_NOT, VM_UPTO, VM_UPTO_AND, VM_REPEAT, VM_BEFORE, VM_AFTER, VM_CAPTURE, VM_OTHERWISE, VM_CHAIN, VM_REPLACE, VM_REF, }; /* * A struct reperesenting a BPEG virtual machine operation */ typedef struct vm_op_s { enum VMOpcode op; const char *start, *end; // Length of the match, if constant, otherwise -1 ssize_t len; union { const char *s; struct { char low, high; } range; struct { ssize_t min, max; struct vm_op_s *sep, *repeat_pat; } repetitions; struct { struct vm_op_s *first, *second; } multiple; struct { struct vm_op_s *replace_pat; const char *replacement; } replace; struct { struct vm_op_s *capture_pat; char *name; } capture; struct vm_op_s *pat; } args; } vm_op_t; static match_t *free_match(match_t *m); static match_t *match(const char *str, vm_op_t *op); static void set_range(vm_op_t *op, ssize_t min, ssize_t max, vm_op_t *pat, vm_op_t *sep); static inline const char *after_spaces(const char *str); static vm_op_t *expand_choices(vm_op_t *op); static vm_op_t *expand_chain(vm_op_t *first); static vm_op_t *compile_bpeg(const char *str); typedef struct { const char *name; vm_op_t *op; } def_t; static def_t defs[1024] = {{NULL, NULL}}; size_t ndefs = 0; static int verbose = 1;