Added --ignore-case flag
This commit is contained in:
parent
45c4fb1049
commit
17460950a9
@ -9,6 +9,7 @@ It's written in pure C with no dependencies.
|
||||
### Flags
|
||||
* `-h` `--help` print the usage and quit
|
||||
* `-v` `--verbose` print verbose debugging info
|
||||
* `-i` `--ignore-case` perform a case-insensitive match
|
||||
* `-d` `--define <name>=<def>` define a grammar rule
|
||||
* `-D` `--define-string <name>=<def>` define a grammar rule (string-pattern)
|
||||
* `-p` `--pattern <pat>` provide a pattern (equivalent to bpeg '
|
||||
|
4
bpeg.1
4
bpeg.1
@ -7,6 +7,7 @@ bpeg \- Bruce's Parsing Expression Grammar tool
|
||||
.B bpeg
|
||||
[\fI-h\fR|\fI--help\fR]
|
||||
[\fI-v\fR|\fI--verbose\fR]
|
||||
[\fI-i\fR|\fI--ignore-case\fR \fI<pattern>\fR]
|
||||
[\fI-p\fR|\fI--pattern\fR \fI<pattern>\fR]
|
||||
[\fI-P\fR|\fI--pattern-string\fR \fI<string-pattern>\fR]
|
||||
[\fI-d\fR|\fI--define\fR \fI<name>\fR=\fI<pattern>\fR]
|
||||
@ -22,6 +23,9 @@ bpeg \- Bruce's Parsing Expression Grammar tool
|
||||
.B \-v\fR, \fB--verbose
|
||||
Print debugging information.
|
||||
|
||||
.B \-i\fR, \fB--ignore-case
|
||||
Perform pattern matching case-insensitively.
|
||||
|
||||
.B \-d\fR, \fB--define \fI<name>\fR=\fI<pattern>\fR
|
||||
Define a grammar rule using a bpeg pattern.
|
||||
|
||||
|
19
bpeg.c
19
bpeg.c
@ -22,6 +22,7 @@ static const char *usage = (
|
||||
"Flags:\n"
|
||||
" -h --help\t print the usage and quit\n"
|
||||
" -v --verbose\t print verbose debugging info\n"
|
||||
" -i --ignore-case\t preform matching case-insensitively\n"
|
||||
" -d --define <name>=<def>\t define a grammar rule\n"
|
||||
" -D --define-string <name>=<def>\t define a grammar rule (string-pattern)\n"
|
||||
" -p --pattern <pat>\t provide a pattern (equivalent to bpeg '\\(<pat>)')\n"
|
||||
@ -44,7 +45,7 @@ static char *getflag(const char *flag, char *argv[], int *i)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, int verbose)
|
||||
static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, unsigned int flags)
|
||||
{
|
||||
char *input;
|
||||
if (filename == NULL || streq(filename, "-")) {
|
||||
@ -54,13 +55,13 @@ static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, int v
|
||||
check(fd >= 0, "Couldn't open file: %s", filename);
|
||||
input = readfile(fd);
|
||||
}
|
||||
match_t *m = match(g, input, pattern);
|
||||
match_t *m = match(g, input, pattern, flags);
|
||||
if (m != NULL && m->end > m->start + 1) {
|
||||
if (filename != NULL) {
|
||||
if (isatty(STDOUT_FILENO)) printf("\033[1;4;33m%s\033[0m\n", filename);
|
||||
else printf("%s\n", filename);
|
||||
}
|
||||
print_match(m, isatty(STDOUT_FILENO) ? "\033[0m" : NULL, verbose);
|
||||
print_match(m, isatty(STDOUT_FILENO) ? "\033[0m" : NULL, (flags & BPEG_VERBOSE) != 0);
|
||||
freefile(input);
|
||||
return 0;
|
||||
} else {
|
||||
@ -73,7 +74,7 @@ static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, int v
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int verbose = 0;
|
||||
unsigned int flags = 0;
|
||||
char *flag = NULL;
|
||||
char path[PATH_MAX] = {0};
|
||||
const char *rule = "find-all";
|
||||
@ -97,7 +98,9 @@ int main(int argc, char *argv[])
|
||||
printf("%s\n", usage);
|
||||
return 0;
|
||||
} else if (streq(argv[i], "--verbose") || streq(argv[i], "-v")) {
|
||||
verbose = 1;
|
||||
flags |= BPEG_VERBOSE;
|
||||
} else if (streq(argv[i], "--ignore-case") || streq(argv[i], "-i")) {
|
||||
flags |= BPEG_IGNORECASE;
|
||||
} else if (FLAG("--replace") || FLAG("-r")) {
|
||||
vm_op_t *p = bpeg_replacement(bpeg_pattern("pattern"), flag);
|
||||
check(p, "Replacement failed to compile");
|
||||
@ -170,7 +173,7 @@ int main(int argc, char *argv[])
|
||||
if (i < argc) {
|
||||
// Files pass in as command line args:
|
||||
for (int nfiles = 0; i < argc; nfiles++, i++) {
|
||||
ret |= run_match(g, argv[i], pattern, verbose);
|
||||
ret |= run_match(g, argv[i], pattern, flags);
|
||||
}
|
||||
} else if (isatty(STDIN_FILENO)) {
|
||||
// No files, no piped in input, so use * **/*:
|
||||
@ -178,12 +181,12 @@ int main(int argc, char *argv[])
|
||||
glob("*", 0, NULL, &globbuf);
|
||||
glob("**/*", GLOB_APPEND, NULL, &globbuf);
|
||||
for (size_t i = 0; i < globbuf.gl_pathc; i++) {
|
||||
ret |= run_match(g, globbuf.gl_pathv[i], pattern, verbose);
|
||||
ret |= run_match(g, globbuf.gl_pathv[i], pattern, flags);
|
||||
}
|
||||
globfree(&globbuf);
|
||||
} else {
|
||||
// Piped in input:
|
||||
ret |= run_match(g, NULL, pattern, verbose);
|
||||
ret |= run_match(g, NULL, pattern, flags);
|
||||
}
|
||||
|
||||
|
||||
|
5
types.h
5
types.h
@ -6,6 +6,11 @@
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
enum BPEGFlag {
|
||||
BPEG_VERBOSE = 1 << 0,
|
||||
BPEG_IGNORECASE = 1 << 1,
|
||||
};
|
||||
|
||||
/*
|
||||
* BPEG virtual machine opcodes (these must be kept in sync with the names in vm.c)
|
||||
*/
|
||||
|
55
vm.c
55
vm.c
@ -5,7 +5,7 @@
|
||||
#include "grammar.h"
|
||||
#include "utils.h"
|
||||
|
||||
static match_t *match_backref(const char *str, vm_op_t *op, match_t *m);
|
||||
static match_t *match_backref(const char *str, vm_op_t *op, match_t *m, unsigned int flags);
|
||||
static size_t push_backrefs(grammar_t *g, match_t *m);
|
||||
static match_t *get_capture_n(match_t *m, int *n);
|
||||
static match_t *get_capture_named(match_t *m, const char *name);
|
||||
@ -77,7 +77,7 @@ typedef struct recursive_ref_s {
|
||||
* a match struct, or NULL if no match is found.
|
||||
* The returned value should be free()'d to avoid memory leaking.
|
||||
*/
|
||||
static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref_t *rec)
|
||||
static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int flags, recursive_ref_t *rec)
|
||||
{
|
||||
switch (op->op) {
|
||||
case VM_EMPTY: {
|
||||
@ -97,7 +97,8 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
|
||||
return m;
|
||||
}
|
||||
case VM_STRING: {
|
||||
if (strncmp(str, op->args.s, (size_t)op->len) != 0)
|
||||
if ((flags & BPEG_IGNORECASE) ? strncasecmp(str, op->args.s, (size_t)op->len) != 0
|
||||
: strncmp(str, op->args.s, (size_t)op->len) != 0)
|
||||
return NULL;
|
||||
match_t *m = calloc(sizeof(match_t), 1);
|
||||
m->op = op;
|
||||
@ -115,7 +116,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
|
||||
return m;
|
||||
}
|
||||
case VM_NOT: {
|
||||
match_t *m = _match(g, str, op->args.pat, rec);
|
||||
match_t *m = _match(g, str, op->args.pat, flags, rec);
|
||||
if (m != NULL) {
|
||||
destroy_match(&m);
|
||||
return NULL;
|
||||
@ -133,7 +134,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
|
||||
if (op->args.pat) {
|
||||
for (const char *prev = NULL; prev < str; ) {
|
||||
prev = str;
|
||||
match_t *p = _match(g, str, op->args.pat, rec);
|
||||
match_t *p = _match(g, str, op->args.pat, flags, rec);
|
||||
if (p) {
|
||||
destroy_match(&p);
|
||||
break;
|
||||
@ -164,11 +165,11 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
|
||||
// Separator
|
||||
match_t *sep = NULL;
|
||||
if (op->args.repetitions.sep != NULL && reps > 0) {
|
||||
sep = _match(g, str, op->args.repetitions.sep, rec);
|
||||
sep = _match(g, str, op->args.repetitions.sep, flags, rec);
|
||||
if (sep == NULL) break;
|
||||
str = sep->end;
|
||||
}
|
||||
match_t *p = _match(g, str, op->args.repetitions.repeat_pat, rec);
|
||||
match_t *p = _match(g, str, op->args.repetitions.repeat_pat, flags, rec);
|
||||
if (p == NULL || (p->end == prev && reps > 0)) { // Prevent infinite loops
|
||||
destroy_match(&sep);
|
||||
destroy_match(&p);
|
||||
@ -204,7 +205,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
|
||||
for (int i = 0; i < backtrack; i++) {
|
||||
if (str[-i] == '\0') return NULL;
|
||||
}
|
||||
match_t *before = _match(g, str - backtrack, op->args.pat, rec);
|
||||
match_t *before = _match(g, str - backtrack, op->args.pat, flags, rec);
|
||||
if (before == NULL) return NULL;
|
||||
match_t *m = calloc(sizeof(match_t), 1);
|
||||
m->start = str;
|
||||
@ -214,7 +215,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
|
||||
return m;
|
||||
}
|
||||
case VM_BEFORE: {
|
||||
match_t *after = _match(g, str, op->args.pat, rec);
|
||||
match_t *after = _match(g, str, op->args.pat, flags, rec);
|
||||
if (after == NULL) return NULL;
|
||||
match_t *m = calloc(sizeof(match_t), 1);
|
||||
m->start = str;
|
||||
@ -224,7 +225,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
|
||||
return m;
|
||||
}
|
||||
case VM_CAPTURE: {
|
||||
match_t *p = _match(g, str, op->args.pat, rec);
|
||||
match_t *p = _match(g, str, op->args.pat, flags, rec);
|
||||
if (p == NULL) return NULL;
|
||||
match_t *m = calloc(sizeof(match_t), 1);
|
||||
m->start = str;
|
||||
@ -237,16 +238,16 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
|
||||
return m;
|
||||
}
|
||||
case VM_OTHERWISE: {
|
||||
match_t *m = _match(g, str, op->args.multiple.first, rec);
|
||||
if (m == NULL) m = _match(g, str, op->args.multiple.second, rec);
|
||||
match_t *m = _match(g, str, op->args.multiple.first, flags, rec);
|
||||
if (m == NULL) m = _match(g, str, op->args.multiple.second, flags, rec);
|
||||
return m;
|
||||
}
|
||||
case VM_CHAIN: {
|
||||
match_t *m1 = _match(g, str, op->args.multiple.first, rec);
|
||||
match_t *m1 = _match(g, str, op->args.multiple.first, flags, rec);
|
||||
if (m1 == NULL) return NULL;
|
||||
|
||||
size_t nbackrefs = push_backrefs(g, m1);
|
||||
match_t *m2 = _match(g, m1->end, op->args.multiple.second, rec);
|
||||
match_t *m2 = _match(g, m1->end, op->args.multiple.second, flags, rec);
|
||||
pop_backrefs(g, nbackrefs);
|
||||
if (m2 == NULL) {
|
||||
destroy_match(&m1);
|
||||
@ -261,11 +262,11 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
|
||||
return m;
|
||||
}
|
||||
case VM_EQUAL: {
|
||||
match_t *m1 = _match(g, str, op->args.multiple.first, rec);
|
||||
match_t *m1 = _match(g, str, op->args.multiple.first, flags, rec);
|
||||
if (m1 == NULL) return NULL;
|
||||
|
||||
// <p1>==<p2> matches iff both have the same start and end point:
|
||||
match_t *m2 = _match(g, str, op->args.multiple.second, rec);
|
||||
match_t *m2 = _match(g, str, op->args.multiple.second, flags, rec);
|
||||
if (m2 == NULL || m2->end != m1->end) {
|
||||
destroy_match(&m1);
|
||||
destroy_match(&m2);
|
||||
@ -284,7 +285,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
|
||||
m->start = str;
|
||||
m->op = op;
|
||||
if (op->args.replace.replace_pat) {
|
||||
match_t *p = _match(g, str, op->args.replace.replace_pat, rec);
|
||||
match_t *p = _match(g, str, op->args.replace.replace_pat, flags, rec);
|
||||
if (p == NULL) return NULL;
|
||||
m->child = p;
|
||||
m->end = p->end;
|
||||
@ -316,7 +317,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
|
||||
};
|
||||
match_t *best = NULL;
|
||||
left_recursive:;
|
||||
match_t *p = _match(g, str, r, &wrap);
|
||||
match_t *p = _match(g, str, r, flags, &wrap);
|
||||
if (p == NULL) return best;
|
||||
if (wrap.hit && (best == NULL || p->end > best->end)) {
|
||||
best = p;
|
||||
@ -336,7 +337,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
|
||||
return m;
|
||||
}
|
||||
case VM_BACKREF: {
|
||||
return match_backref(str, op, (match_t*)op->args.backref);
|
||||
return match_backref(str, op, (match_t*)op->args.backref, flags);
|
||||
}
|
||||
case VM_NODENT: {
|
||||
if (str[-1] == '\0') { // First line
|
||||
@ -578,7 +579,7 @@ void print_match(match_t *m, const char *color, int verbose)
|
||||
}
|
||||
}
|
||||
|
||||
static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap)
|
||||
static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int flags)
|
||||
{
|
||||
check(op->op == VM_BACKREF, "Attempt to match backref against something that's not a backref");
|
||||
match_t *ret = calloc(sizeof(match_t), 1);
|
||||
@ -636,7 +637,7 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap)
|
||||
}
|
||||
}
|
||||
if (cap != NULL) {
|
||||
*dest = match_backref(str, op, cap);
|
||||
*dest = match_backref(str, op, cap, flags);
|
||||
if (*dest == NULL) {
|
||||
destroy_match(&ret);
|
||||
return NULL;
|
||||
@ -650,7 +651,8 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap)
|
||||
for (match_t *child = cap->child; child; child = child->nextsibling) {
|
||||
if (child->start > prev) {
|
||||
size_t len = (size_t)(child->start - prev);
|
||||
if (strncmp(str, prev, len) != 0) {
|
||||
if ((flags & BPEG_IGNORECASE) ? strncasecmp(str, prev, len) != 0
|
||||
: strncmp(str, prev, len) != 0) {
|
||||
destroy_match(&ret);
|
||||
return NULL;
|
||||
}
|
||||
@ -658,7 +660,7 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap)
|
||||
prev = child->start;
|
||||
}
|
||||
if (child->start < prev) continue;
|
||||
*dest = match_backref(str, op, child);
|
||||
*dest = match_backref(str, op, child, flags);
|
||||
if (*dest == NULL) {
|
||||
destroy_match(&ret);
|
||||
return NULL;
|
||||
@ -669,7 +671,8 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap)
|
||||
}
|
||||
if (cap->end > prev) {
|
||||
size_t len = (size_t)(cap->end - prev);
|
||||
if (strncmp(str, prev, len) != 0) {
|
||||
if ((flags & BPEG_IGNORECASE) ? strncasecmp(str, prev, len) != 0
|
||||
: strncmp(str, prev, len) != 0) {
|
||||
destroy_match(&ret);
|
||||
return NULL;
|
||||
}
|
||||
@ -680,9 +683,9 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap)
|
||||
return ret;
|
||||
}
|
||||
|
||||
match_t *match(grammar_t *g, const char *str, vm_op_t *op)
|
||||
match_t *match(grammar_t *g, const char *str, vm_op_t *op, unsigned int flags)
|
||||
{
|
||||
return _match(g, str, op, NULL);
|
||||
return _match(g, str, op, flags, NULL);
|
||||
}
|
||||
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
||||
|
2
vm.h
2
vm.h
@ -11,7 +11,7 @@
|
||||
#include "types.h"
|
||||
|
||||
const char *opcode_name(enum VMOpcode o);
|
||||
match_t *match(grammar_t *g, const char *str, vm_op_t *op);
|
||||
match_t *match(grammar_t *g, const char *str, vm_op_t *op, unsigned int flags);
|
||||
void destroy_match(match_t **m);
|
||||
void print_pattern(vm_op_t *op);
|
||||
void print_match(match_t *m, const char *color, int verbose);
|
||||
|
Loading…
Reference in New Issue
Block a user