/* * bpeg.h - Header file for the bpeg parser */ #include #include #include #include #include #include #include "utils.h" const char *usage = ( "Usage:\n" " bpeg [flags] [...]\n\n" "Flags:\n" " -h --help\t print the usage and quit\n" " -v --verbose\t print verbose debugging info\n" " -s --slow\t run in slow mode for debugging\n" " -r --replace replace the input pattern with the given replacement\n" " -g --grammar use the specified file as a grammar\n"); /* * Pattern matching result object */ typedef struct match_s { // Where the match starts and ends (end is after the last character) const char *start, *end; unsigned int is_capture:1, is_replacement:1; const char *name_or_replacement; struct match_s *child, *nextsibling; } match_t; /* * BPEG virtual machine opcodes */ enum VMOpcode { VM_EMPTY = 0, VM_ANYCHAR = 1, VM_ANYTHING_BUT, VM_STRING, VM_RANGE, VM_NOT, VM_UPTO_AND, VM_REPEAT, VM_BEFORE, VM_AFTER, VM_CAPTURE, VM_OTHERWISE, VM_CHAIN, VM_REPLACE, VM_REF, }; /* * A struct reperesenting a BPEG virtual machine operation */ typedef struct vm_op_s { enum VMOpcode op; unsigned int multiline:1; const char *start, *end; // Length of the match, if constant, otherwise -1 ssize_t len; union { const char *s; struct { char low, high; } range; struct { ssize_t min, max; struct vm_op_s *sep, *repeat_pat; } repetitions; // TODO: use a linked list instead of a binary tree struct { struct vm_op_s *first, *second; } multiple; struct { struct vm_op_s *replace_pat; const char *replacement; } replace; struct { struct vm_op_s *capture_pat; char *name; } capture; struct vm_op_s *pat; } args; } vm_op_t; static inline const char *after_spaces(const char *str); static match_t *free_match(match_t *m); static match_t *match(const char *str, vm_op_t *op); static vm_op_t *compile_bpeg(const char *source, const char *str); static vm_op_t *load_grammar(const char *grammar); static vm_op_t *add_def(const char *name, const char *source, vm_op_t *op); static vm_op_t *load_def(const char *name, const char *source); static vm_op_t *chain_together(vm_op_t *first, vm_op_t *second); static vm_op_t *compile_bpeg_string(const char *source, const char *str); static vm_op_t *expand_chain(const char *source, vm_op_t *first); static vm_op_t *expand_choices(const char *source, vm_op_t *op); static void print_match(match_t *m, const char *color); static void set_range(vm_op_t *op, ssize_t min, ssize_t max, vm_op_t *pat, vm_op_t *sep); typedef struct { const char *name; const char *source; vm_op_t *op; } def_t; static def_t defs[1024] = {{NULL, NULL, NULL}}; size_t ndefs = 0;