From 17460950a9c60f4550c449d7b352b129f06b6e0d Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Mon, 14 Sep 2020 12:16:15 -0700 Subject: Added --ignore-case flag --- README.md | 1 + bpeg.1 | 4 ++++ bpeg.c | 19 +++++++++++-------- types.h | 5 +++++ vm.c | 55 +++++++++++++++++++++++++++++-------------------------- vm.h | 2 +- 6 files changed, 51 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 5dc4578..75458d6 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ It's written in pure C with no dependencies. ### Flags * `-h` `--help` print the usage and quit * `-v` `--verbose` print verbose debugging info +* `-i` `--ignore-case` perform a case-insensitive match * `-d` `--define =` define a grammar rule * `-D` `--define-string =` define a grammar rule (string-pattern) * `-p` `--pattern ` provide a pattern (equivalent to bpeg ' diff --git a/bpeg.1 b/bpeg.1 index 6088e44..c536d82 100644 --- a/bpeg.1 +++ b/bpeg.1 @@ -7,6 +7,7 @@ bpeg \- Bruce's Parsing Expression Grammar tool .B bpeg [\fI-h\fR|\fI--help\fR] [\fI-v\fR|\fI--verbose\fR] +[\fI-i\fR|\fI--ignore-case\fR \fI\fR] [\fI-p\fR|\fI--pattern\fR \fI\fR] [\fI-P\fR|\fI--pattern-string\fR \fI\fR] [\fI-d\fR|\fI--define\fR \fI\fR=\fI\fR] @@ -22,6 +23,9 @@ bpeg \- Bruce's Parsing Expression Grammar tool .B \-v\fR, \fB--verbose Print debugging information. +.B \-i\fR, \fB--ignore-case +Perform pattern matching case-insensitively. + .B \-d\fR, \fB--define \fI\fR=\fI\fR Define a grammar rule using a bpeg pattern. diff --git a/bpeg.c b/bpeg.c index cd25eb8..44a70fa 100644 --- a/bpeg.c +++ b/bpeg.c @@ -22,6 +22,7 @@ static const char *usage = ( "Flags:\n" " -h --help\t print the usage and quit\n" " -v --verbose\t print verbose debugging info\n" + " -i --ignore-case\t preform matching case-insensitively\n" " -d --define =\t define a grammar rule\n" " -D --define-string =\t define a grammar rule (string-pattern)\n" " -p --pattern \t provide a pattern (equivalent to bpeg '\\()')\n" @@ -44,7 +45,7 @@ static char *getflag(const char *flag, char *argv[], int *i) return NULL; } -static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, int verbose) +static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, unsigned int flags) { char *input; if (filename == NULL || streq(filename, "-")) { @@ -54,13 +55,13 @@ static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, int v check(fd >= 0, "Couldn't open file: %s", filename); input = readfile(fd); } - match_t *m = match(g, input, pattern); + match_t *m = match(g, input, pattern, flags); if (m != NULL && m->end > m->start + 1) { if (filename != NULL) { if (isatty(STDOUT_FILENO)) printf("\033[1;4;33m%s\033[0m\n", filename); else printf("%s\n", filename); } - print_match(m, isatty(STDOUT_FILENO) ? "\033[0m" : NULL, verbose); + print_match(m, isatty(STDOUT_FILENO) ? "\033[0m" : NULL, (flags & BPEG_VERBOSE) != 0); freefile(input); return 0; } else { @@ -73,7 +74,7 @@ static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, int v int main(int argc, char *argv[]) { - int verbose = 0; + unsigned int flags = 0; char *flag = NULL; char path[PATH_MAX] = {0}; const char *rule = "find-all"; @@ -97,7 +98,9 @@ int main(int argc, char *argv[]) printf("%s\n", usage); return 0; } else if (streq(argv[i], "--verbose") || streq(argv[i], "-v")) { - verbose = 1; + flags |= BPEG_VERBOSE; + } else if (streq(argv[i], "--ignore-case") || streq(argv[i], "-i")) { + flags |= BPEG_IGNORECASE; } else if (FLAG("--replace") || FLAG("-r")) { vm_op_t *p = bpeg_replacement(bpeg_pattern("pattern"), flag); check(p, "Replacement failed to compile"); @@ -170,7 +173,7 @@ int main(int argc, char *argv[]) if (i < argc) { // Files pass in as command line args: for (int nfiles = 0; i < argc; nfiles++, i++) { - ret |= run_match(g, argv[i], pattern, verbose); + ret |= run_match(g, argv[i], pattern, flags); } } else if (isatty(STDIN_FILENO)) { // No files, no piped in input, so use * **/*: @@ -178,12 +181,12 @@ int main(int argc, char *argv[]) glob("*", 0, NULL, &globbuf); glob("**/*", GLOB_APPEND, NULL, &globbuf); for (size_t i = 0; i < globbuf.gl_pathc; i++) { - ret |= run_match(g, globbuf.gl_pathv[i], pattern, verbose); + ret |= run_match(g, globbuf.gl_pathv[i], pattern, flags); } globfree(&globbuf); } else { // Piped in input: - ret |= run_match(g, NULL, pattern, verbose); + ret |= run_match(g, NULL, pattern, flags); } diff --git a/types.h b/types.h index 36e1aa6..b2461a2 100644 --- a/types.h +++ b/types.h @@ -6,6 +6,11 @@ #include +enum BPEGFlag { + BPEG_VERBOSE = 1 << 0, + BPEG_IGNORECASE = 1 << 1, +}; + /* * BPEG virtual machine opcodes (these must be kept in sync with the names in vm.c) */ diff --git a/vm.c b/vm.c index 9974547..77d6d69 100644 --- a/vm.c +++ b/vm.c @@ -5,7 +5,7 @@ #include "grammar.h" #include "utils.h" -static match_t *match_backref(const char *str, vm_op_t *op, match_t *m); +static match_t *match_backref(const char *str, vm_op_t *op, match_t *m, unsigned int flags); static size_t push_backrefs(grammar_t *g, match_t *m); static match_t *get_capture_n(match_t *m, int *n); static match_t *get_capture_named(match_t *m, const char *name); @@ -77,7 +77,7 @@ typedef struct recursive_ref_s { * a match struct, or NULL if no match is found. * The returned value should be free()'d to avoid memory leaking. */ -static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref_t *rec) +static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int flags, recursive_ref_t *rec) { switch (op->op) { case VM_EMPTY: { @@ -97,7 +97,8 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref return m; } case VM_STRING: { - if (strncmp(str, op->args.s, (size_t)op->len) != 0) + if ((flags & BPEG_IGNORECASE) ? strncasecmp(str, op->args.s, (size_t)op->len) != 0 + : strncmp(str, op->args.s, (size_t)op->len) != 0) return NULL; match_t *m = calloc(sizeof(match_t), 1); m->op = op; @@ -115,7 +116,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref return m; } case VM_NOT: { - match_t *m = _match(g, str, op->args.pat, rec); + match_t *m = _match(g, str, op->args.pat, flags, rec); if (m != NULL) { destroy_match(&m); return NULL; @@ -133,7 +134,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref if (op->args.pat) { for (const char *prev = NULL; prev < str; ) { prev = str; - match_t *p = _match(g, str, op->args.pat, rec); + match_t *p = _match(g, str, op->args.pat, flags, rec); if (p) { destroy_match(&p); break; @@ -164,11 +165,11 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref // Separator match_t *sep = NULL; if (op->args.repetitions.sep != NULL && reps > 0) { - sep = _match(g, str, op->args.repetitions.sep, rec); + sep = _match(g, str, op->args.repetitions.sep, flags, rec); if (sep == NULL) break; str = sep->end; } - match_t *p = _match(g, str, op->args.repetitions.repeat_pat, rec); + match_t *p = _match(g, str, op->args.repetitions.repeat_pat, flags, rec); if (p == NULL || (p->end == prev && reps > 0)) { // Prevent infinite loops destroy_match(&sep); destroy_match(&p); @@ -204,7 +205,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref for (int i = 0; i < backtrack; i++) { if (str[-i] == '\0') return NULL; } - match_t *before = _match(g, str - backtrack, op->args.pat, rec); + match_t *before = _match(g, str - backtrack, op->args.pat, flags, rec); if (before == NULL) return NULL; match_t *m = calloc(sizeof(match_t), 1); m->start = str; @@ -214,7 +215,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref return m; } case VM_BEFORE: { - match_t *after = _match(g, str, op->args.pat, rec); + match_t *after = _match(g, str, op->args.pat, flags, rec); if (after == NULL) return NULL; match_t *m = calloc(sizeof(match_t), 1); m->start = str; @@ -224,7 +225,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref return m; } case VM_CAPTURE: { - match_t *p = _match(g, str, op->args.pat, rec); + match_t *p = _match(g, str, op->args.pat, flags, rec); if (p == NULL) return NULL; match_t *m = calloc(sizeof(match_t), 1); m->start = str; @@ -237,16 +238,16 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref return m; } case VM_OTHERWISE: { - match_t *m = _match(g, str, op->args.multiple.first, rec); - if (m == NULL) m = _match(g, str, op->args.multiple.second, rec); + match_t *m = _match(g, str, op->args.multiple.first, flags, rec); + if (m == NULL) m = _match(g, str, op->args.multiple.second, flags, rec); return m; } case VM_CHAIN: { - match_t *m1 = _match(g, str, op->args.multiple.first, rec); + match_t *m1 = _match(g, str, op->args.multiple.first, flags, rec); if (m1 == NULL) return NULL; size_t nbackrefs = push_backrefs(g, m1); - match_t *m2 = _match(g, m1->end, op->args.multiple.second, rec); + match_t *m2 = _match(g, m1->end, op->args.multiple.second, flags, rec); pop_backrefs(g, nbackrefs); if (m2 == NULL) { destroy_match(&m1); @@ -261,11 +262,11 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref return m; } case VM_EQUAL: { - match_t *m1 = _match(g, str, op->args.multiple.first, rec); + match_t *m1 = _match(g, str, op->args.multiple.first, flags, rec); if (m1 == NULL) return NULL; // == matches iff both have the same start and end point: - match_t *m2 = _match(g, str, op->args.multiple.second, rec); + match_t *m2 = _match(g, str, op->args.multiple.second, flags, rec); if (m2 == NULL || m2->end != m1->end) { destroy_match(&m1); destroy_match(&m2); @@ -284,7 +285,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref m->start = str; m->op = op; if (op->args.replace.replace_pat) { - match_t *p = _match(g, str, op->args.replace.replace_pat, rec); + match_t *p = _match(g, str, op->args.replace.replace_pat, flags, rec); if (p == NULL) return NULL; m->child = p; m->end = p->end; @@ -316,7 +317,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref }; match_t *best = NULL; left_recursive:; - match_t *p = _match(g, str, r, &wrap); + match_t *p = _match(g, str, r, flags, &wrap); if (p == NULL) return best; if (wrap.hit && (best == NULL || p->end > best->end)) { best = p; @@ -336,7 +337,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref return m; } case VM_BACKREF: { - return match_backref(str, op, (match_t*)op->args.backref); + return match_backref(str, op, (match_t*)op->args.backref, flags); } case VM_NODENT: { if (str[-1] == '\0') { // First line @@ -578,7 +579,7 @@ void print_match(match_t *m, const char *color, int verbose) } } -static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap) +static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int flags) { check(op->op == VM_BACKREF, "Attempt to match backref against something that's not a backref"); match_t *ret = calloc(sizeof(match_t), 1); @@ -636,7 +637,7 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap) } } if (cap != NULL) { - *dest = match_backref(str, op, cap); + *dest = match_backref(str, op, cap, flags); if (*dest == NULL) { destroy_match(&ret); return NULL; @@ -650,7 +651,8 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap) for (match_t *child = cap->child; child; child = child->nextsibling) { if (child->start > prev) { size_t len = (size_t)(child->start - prev); - if (strncmp(str, prev, len) != 0) { + if ((flags & BPEG_IGNORECASE) ? strncasecmp(str, prev, len) != 0 + : strncmp(str, prev, len) != 0) { destroy_match(&ret); return NULL; } @@ -658,7 +660,7 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap) prev = child->start; } if (child->start < prev) continue; - *dest = match_backref(str, op, child); + *dest = match_backref(str, op, child, flags); if (*dest == NULL) { destroy_match(&ret); return NULL; @@ -669,7 +671,8 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap) } if (cap->end > prev) { size_t len = (size_t)(cap->end - prev); - if (strncmp(str, prev, len) != 0) { + if ((flags & BPEG_IGNORECASE) ? strncasecmp(str, prev, len) != 0 + : strncmp(str, prev, len) != 0) { destroy_match(&ret); return NULL; } @@ -680,9 +683,9 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap) return ret; } -match_t *match(grammar_t *g, const char *str, vm_op_t *op) +match_t *match(grammar_t *g, const char *str, vm_op_t *op, unsigned int flags) { - return _match(g, str, op, NULL); + return _match(g, str, op, flags, NULL); } // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 diff --git a/vm.h b/vm.h index db1e9f5..6d0a1b4 100644 --- a/vm.h +++ b/vm.h @@ -11,7 +11,7 @@ #include "types.h" const char *opcode_name(enum VMOpcode o); -match_t *match(grammar_t *g, const char *str, vm_op_t *op); +match_t *match(grammar_t *g, const char *str, vm_op_t *op, unsigned int flags); void destroy_match(match_t **m); void print_pattern(vm_op_t *op); void print_match(match_t *m, const char *color, int verbose); -- cgit v1.2.3