Added --ignore-case flag

This commit is contained in:
Bruce Hill 2020-09-14 12:16:15 -07:00
parent 45c4fb1049
commit 17460950a9
6 changed files with 51 additions and 35 deletions

View File

@ -9,6 +9,7 @@ It's written in pure C with no dependencies.
### Flags
* `-h` `--help` print the usage and quit
* `-v` `--verbose` print verbose debugging info
* `-i` `--ignore-case` perform a case-insensitive match
* `-d` `--define <name>=<def>` define a grammar rule
* `-D` `--define-string <name>=<def>` define a grammar rule (string-pattern)
* `-p` `--pattern <pat>` provide a pattern (equivalent to bpeg '

4
bpeg.1
View File

@ -7,6 +7,7 @@ bpeg \- Bruce's Parsing Expression Grammar tool
.B bpeg
[\fI-h\fR|\fI--help\fR]
[\fI-v\fR|\fI--verbose\fR]
[\fI-i\fR|\fI--ignore-case\fR \fI<pattern>\fR]
[\fI-p\fR|\fI--pattern\fR \fI<pattern>\fR]
[\fI-P\fR|\fI--pattern-string\fR \fI<string-pattern>\fR]
[\fI-d\fR|\fI--define\fR \fI<name>\fR=\fI<pattern>\fR]
@ -22,6 +23,9 @@ bpeg \- Bruce's Parsing Expression Grammar tool
.B \-v\fR, \fB--verbose
Print debugging information.
.B \-i\fR, \fB--ignore-case
Perform pattern matching case-insensitively.
.B \-d\fR, \fB--define \fI<name>\fR=\fI<pattern>\fR
Define a grammar rule using a bpeg pattern.

19
bpeg.c
View File

@ -22,6 +22,7 @@ static const char *usage = (
"Flags:\n"
" -h --help\t print the usage and quit\n"
" -v --verbose\t print verbose debugging info\n"
" -i --ignore-case\t preform matching case-insensitively\n"
" -d --define <name>=<def>\t define a grammar rule\n"
" -D --define-string <name>=<def>\t define a grammar rule (string-pattern)\n"
" -p --pattern <pat>\t provide a pattern (equivalent to bpeg '\\(<pat>)')\n"
@ -44,7 +45,7 @@ static char *getflag(const char *flag, char *argv[], int *i)
return NULL;
}
static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, int verbose)
static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, unsigned int flags)
{
char *input;
if (filename == NULL || streq(filename, "-")) {
@ -54,13 +55,13 @@ static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, int v
check(fd >= 0, "Couldn't open file: %s", filename);
input = readfile(fd);
}
match_t *m = match(g, input, pattern);
match_t *m = match(g, input, pattern, flags);
if (m != NULL && m->end > m->start + 1) {
if (filename != NULL) {
if (isatty(STDOUT_FILENO)) printf("\033[1;4;33m%s\033[0m\n", filename);
else printf("%s\n", filename);
}
print_match(m, isatty(STDOUT_FILENO) ? "\033[0m" : NULL, verbose);
print_match(m, isatty(STDOUT_FILENO) ? "\033[0m" : NULL, (flags & BPEG_VERBOSE) != 0);
freefile(input);
return 0;
} else {
@ -73,7 +74,7 @@ static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, int v
int main(int argc, char *argv[])
{
int verbose = 0;
unsigned int flags = 0;
char *flag = NULL;
char path[PATH_MAX] = {0};
const char *rule = "find-all";
@ -97,7 +98,9 @@ int main(int argc, char *argv[])
printf("%s\n", usage);
return 0;
} else if (streq(argv[i], "--verbose") || streq(argv[i], "-v")) {
verbose = 1;
flags |= BPEG_VERBOSE;
} else if (streq(argv[i], "--ignore-case") || streq(argv[i], "-i")) {
flags |= BPEG_IGNORECASE;
} else if (FLAG("--replace") || FLAG("-r")) {
vm_op_t *p = bpeg_replacement(bpeg_pattern("pattern"), flag);
check(p, "Replacement failed to compile");
@ -170,7 +173,7 @@ int main(int argc, char *argv[])
if (i < argc) {
// Files pass in as command line args:
for (int nfiles = 0; i < argc; nfiles++, i++) {
ret |= run_match(g, argv[i], pattern, verbose);
ret |= run_match(g, argv[i], pattern, flags);
}
} else if (isatty(STDIN_FILENO)) {
// No files, no piped in input, so use * **/*:
@ -178,12 +181,12 @@ int main(int argc, char *argv[])
glob("*", 0, NULL, &globbuf);
glob("**/*", GLOB_APPEND, NULL, &globbuf);
for (size_t i = 0; i < globbuf.gl_pathc; i++) {
ret |= run_match(g, globbuf.gl_pathv[i], pattern, verbose);
ret |= run_match(g, globbuf.gl_pathv[i], pattern, flags);
}
globfree(&globbuf);
} else {
// Piped in input:
ret |= run_match(g, NULL, pattern, verbose);
ret |= run_match(g, NULL, pattern, flags);
}

View File

@ -6,6 +6,11 @@
#include <sys/types.h>
enum BPEGFlag {
BPEG_VERBOSE = 1 << 0,
BPEG_IGNORECASE = 1 << 1,
};
/*
* BPEG virtual machine opcodes (these must be kept in sync with the names in vm.c)
*/

55
vm.c
View File

@ -5,7 +5,7 @@
#include "grammar.h"
#include "utils.h"
static match_t *match_backref(const char *str, vm_op_t *op, match_t *m);
static match_t *match_backref(const char *str, vm_op_t *op, match_t *m, unsigned int flags);
static size_t push_backrefs(grammar_t *g, match_t *m);
static match_t *get_capture_n(match_t *m, int *n);
static match_t *get_capture_named(match_t *m, const char *name);
@ -77,7 +77,7 @@ typedef struct recursive_ref_s {
* a match struct, or NULL if no match is found.
* The returned value should be free()'d to avoid memory leaking.
*/
static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref_t *rec)
static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int flags, recursive_ref_t *rec)
{
switch (op->op) {
case VM_EMPTY: {
@ -97,7 +97,8 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
return m;
}
case VM_STRING: {
if (strncmp(str, op->args.s, (size_t)op->len) != 0)
if ((flags & BPEG_IGNORECASE) ? strncasecmp(str, op->args.s, (size_t)op->len) != 0
: strncmp(str, op->args.s, (size_t)op->len) != 0)
return NULL;
match_t *m = calloc(sizeof(match_t), 1);
m->op = op;
@ -115,7 +116,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
return m;
}
case VM_NOT: {
match_t *m = _match(g, str, op->args.pat, rec);
match_t *m = _match(g, str, op->args.pat, flags, rec);
if (m != NULL) {
destroy_match(&m);
return NULL;
@ -133,7 +134,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
if (op->args.pat) {
for (const char *prev = NULL; prev < str; ) {
prev = str;
match_t *p = _match(g, str, op->args.pat, rec);
match_t *p = _match(g, str, op->args.pat, flags, rec);
if (p) {
destroy_match(&p);
break;
@ -164,11 +165,11 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
// Separator
match_t *sep = NULL;
if (op->args.repetitions.sep != NULL && reps > 0) {
sep = _match(g, str, op->args.repetitions.sep, rec);
sep = _match(g, str, op->args.repetitions.sep, flags, rec);
if (sep == NULL) break;
str = sep->end;
}
match_t *p = _match(g, str, op->args.repetitions.repeat_pat, rec);
match_t *p = _match(g, str, op->args.repetitions.repeat_pat, flags, rec);
if (p == NULL || (p->end == prev && reps > 0)) { // Prevent infinite loops
destroy_match(&sep);
destroy_match(&p);
@ -204,7 +205,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
for (int i = 0; i < backtrack; i++) {
if (str[-i] == '\0') return NULL;
}
match_t *before = _match(g, str - backtrack, op->args.pat, rec);
match_t *before = _match(g, str - backtrack, op->args.pat, flags, rec);
if (before == NULL) return NULL;
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
@ -214,7 +215,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
return m;
}
case VM_BEFORE: {
match_t *after = _match(g, str, op->args.pat, rec);
match_t *after = _match(g, str, op->args.pat, flags, rec);
if (after == NULL) return NULL;
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
@ -224,7 +225,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
return m;
}
case VM_CAPTURE: {
match_t *p = _match(g, str, op->args.pat, rec);
match_t *p = _match(g, str, op->args.pat, flags, rec);
if (p == NULL) return NULL;
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
@ -237,16 +238,16 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
return m;
}
case VM_OTHERWISE: {
match_t *m = _match(g, str, op->args.multiple.first, rec);
if (m == NULL) m = _match(g, str, op->args.multiple.second, rec);
match_t *m = _match(g, str, op->args.multiple.first, flags, rec);
if (m == NULL) m = _match(g, str, op->args.multiple.second, flags, rec);
return m;
}
case VM_CHAIN: {
match_t *m1 = _match(g, str, op->args.multiple.first, rec);
match_t *m1 = _match(g, str, op->args.multiple.first, flags, rec);
if (m1 == NULL) return NULL;
size_t nbackrefs = push_backrefs(g, m1);
match_t *m2 = _match(g, m1->end, op->args.multiple.second, rec);
match_t *m2 = _match(g, m1->end, op->args.multiple.second, flags, rec);
pop_backrefs(g, nbackrefs);
if (m2 == NULL) {
destroy_match(&m1);
@ -261,11 +262,11 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
return m;
}
case VM_EQUAL: {
match_t *m1 = _match(g, str, op->args.multiple.first, rec);
match_t *m1 = _match(g, str, op->args.multiple.first, flags, rec);
if (m1 == NULL) return NULL;
// <p1>==<p2> matches iff both have the same start and end point:
match_t *m2 = _match(g, str, op->args.multiple.second, rec);
match_t *m2 = _match(g, str, op->args.multiple.second, flags, rec);
if (m2 == NULL || m2->end != m1->end) {
destroy_match(&m1);
destroy_match(&m2);
@ -284,7 +285,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
m->start = str;
m->op = op;
if (op->args.replace.replace_pat) {
match_t *p = _match(g, str, op->args.replace.replace_pat, rec);
match_t *p = _match(g, str, op->args.replace.replace_pat, flags, rec);
if (p == NULL) return NULL;
m->child = p;
m->end = p->end;
@ -316,7 +317,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
};
match_t *best = NULL;
left_recursive:;
match_t *p = _match(g, str, r, &wrap);
match_t *p = _match(g, str, r, flags, &wrap);
if (p == NULL) return best;
if (wrap.hit && (best == NULL || p->end > best->end)) {
best = p;
@ -336,7 +337,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
return m;
}
case VM_BACKREF: {
return match_backref(str, op, (match_t*)op->args.backref);
return match_backref(str, op, (match_t*)op->args.backref, flags);
}
case VM_NODENT: {
if (str[-1] == '\0') { // First line
@ -578,7 +579,7 @@ void print_match(match_t *m, const char *color, int verbose)
}
}
static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap)
static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int flags)
{
check(op->op == VM_BACKREF, "Attempt to match backref against something that's not a backref");
match_t *ret = calloc(sizeof(match_t), 1);
@ -636,7 +637,7 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap)
}
}
if (cap != NULL) {
*dest = match_backref(str, op, cap);
*dest = match_backref(str, op, cap, flags);
if (*dest == NULL) {
destroy_match(&ret);
return NULL;
@ -650,7 +651,8 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap)
for (match_t *child = cap->child; child; child = child->nextsibling) {
if (child->start > prev) {
size_t len = (size_t)(child->start - prev);
if (strncmp(str, prev, len) != 0) {
if ((flags & BPEG_IGNORECASE) ? strncasecmp(str, prev, len) != 0
: strncmp(str, prev, len) != 0) {
destroy_match(&ret);
return NULL;
}
@ -658,7 +660,7 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap)
prev = child->start;
}
if (child->start < prev) continue;
*dest = match_backref(str, op, child);
*dest = match_backref(str, op, child, flags);
if (*dest == NULL) {
destroy_match(&ret);
return NULL;
@ -669,7 +671,8 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap)
}
if (cap->end > prev) {
size_t len = (size_t)(cap->end - prev);
if (strncmp(str, prev, len) != 0) {
if ((flags & BPEG_IGNORECASE) ? strncasecmp(str, prev, len) != 0
: strncmp(str, prev, len) != 0) {
destroy_match(&ret);
return NULL;
}
@ -680,9 +683,9 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap)
return ret;
}
match_t *match(grammar_t *g, const char *str, vm_op_t *op)
match_t *match(grammar_t *g, const char *str, vm_op_t *op, unsigned int flags)
{
return _match(g, str, op, NULL);
return _match(g, str, op, flags, NULL);
}
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1

2
vm.h
View File

@ -11,7 +11,7 @@
#include "types.h"
const char *opcode_name(enum VMOpcode o);
match_t *match(grammar_t *g, const char *str, vm_op_t *op);
match_t *match(grammar_t *g, const char *str, vm_op_t *op, unsigned int flags);
void destroy_match(match_t **m);
void print_pattern(vm_op_t *op);
void print_match(match_t *m, const char *color, int verbose);