Improved handling of CLI flags
This commit is contained in:
parent
2f05677c98
commit
d0538fa25f
146
bpeg.c
146
bpeg.c
@ -52,29 +52,63 @@ static const char *usage = (
|
||||
" -r --replace <replacement> replace the input pattern with the given replacement\n"
|
||||
" -g --grammar <grammar file> use the specified file as a grammar\n");
|
||||
|
||||
static char *getflag(const char *flag, char *argv[], int *i)
|
||||
{
|
||||
size_t n = strlen(flag);
|
||||
if (strncmp(argv[*i], flag, n) == 0) {
|
||||
if (argv[*i][n] == '=') {
|
||||
return &argv[*i][n+1];
|
||||
} else if (argv[*i][n] == '\0') {
|
||||
++(*i);
|
||||
return argv[*i];
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
#define FLAG(f) (flag=getflag((f), argv, &i))
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int verbose = 0;
|
||||
const char *pattern = NULL,
|
||||
*replacement = NULL,
|
||||
*grammarfile = NULL,
|
||||
*infile = NULL;
|
||||
char *flag = NULL;
|
||||
const char *rule = "find-all";
|
||||
|
||||
grammar_t *g = new_grammar();
|
||||
|
||||
for (int i = 1; i < argc; i++) {
|
||||
if (streq(argv[i], "--help") || streq(argv[i], "-h")) {
|
||||
int i, npatterns = 0;
|
||||
for (i = 1; i < argc; i++) {
|
||||
if (streq(argv[i], "--")) {
|
||||
++i;
|
||||
break;
|
||||
} else if (FLAG("--help") || FLAG("-h")) {
|
||||
printf("%s\n", usage);
|
||||
return 0;
|
||||
} else if (streq(argv[i], "--verbose") || streq(argv[i], "-v")) {
|
||||
} else if (FLAG("--verbose") || FLAG("-v")) {
|
||||
verbose = 1;
|
||||
} else if (streq(argv[i], "--replace") || streq(argv[i], "-r")) {
|
||||
replacement = argv[++i];
|
||||
} else if (streq(argv[i], "--grammar") || streq(argv[i], "-g")) {
|
||||
grammarfile = argv[++i];
|
||||
} else if (streq(argv[i], "--define") || streq(argv[i], "-d")) {
|
||||
char *def = argv[++i];
|
||||
} else if (FLAG("--pattern") || FLAG("-p")) {
|
||||
vm_op_t *p = bpeg_pattern(flag);
|
||||
check(p, "Pattern failed to compile");
|
||||
add_def(g, flag, "pattern", p);
|
||||
++npatterns;
|
||||
} else if (FLAG("--replace") || FLAG("-r")) {
|
||||
vm_op_t *p = bpeg_replacement(bpeg_pattern("pattern"), flag);
|
||||
check(p, "Replacement failed to compile");
|
||||
add_def(g, flag, "replacement", p);
|
||||
rule = "replace-all";
|
||||
} else if (FLAG("--grammar") || FLAG("-g")) {
|
||||
const char *grammarfile = flag;
|
||||
// load grammar from a file (semicolon mode)
|
||||
char *grammar;
|
||||
if (streq(grammarfile, "-")) {
|
||||
grammar = readfile(STDIN_FILENO);
|
||||
} else {
|
||||
int fd = open(grammarfile, O_RDONLY);
|
||||
check(fd >= 0, "Couldn't open file: %s", argv[2]);
|
||||
grammar = readfile(fd);
|
||||
}
|
||||
load_grammar(g, grammar);
|
||||
} else if (FLAG("--define") || FLAG("-d")) {
|
||||
char *def = flag;
|
||||
char *eq = strchr(def, '=');
|
||||
check(eq, usage);
|
||||
*eq = '\0';
|
||||
@ -82,71 +116,51 @@ int main(int argc, char *argv[])
|
||||
vm_op_t *pat = bpeg_pattern(src);
|
||||
check(pat, "Failed to compile pattern");
|
||||
add_def(g, src, def, pat);
|
||||
} else if (pattern == NULL) {
|
||||
pattern = argv[i];
|
||||
} else if (infile == NULL) {
|
||||
infile = argv[i];
|
||||
} else if (argv[i][0] != '-') {
|
||||
if (npatterns > 0) break;
|
||||
vm_op_t *p = bpeg_stringpattern(argv[i]);
|
||||
check(p, "Pattern failed to compile");
|
||||
add_def(g, argv[i], "pattern", p);
|
||||
++npatterns;
|
||||
}
|
||||
}
|
||||
|
||||
check(pattern != NULL || grammarfile != NULL, usage);
|
||||
|
||||
if (grammarfile) {
|
||||
// load grammar from a file (semicolon mode)
|
||||
char *grammar;
|
||||
if (streq(grammarfile, "-")) {
|
||||
grammar = readfile(STDIN_FILENO);
|
||||
} else {
|
||||
int fd = open(grammarfile, O_RDONLY);
|
||||
check(fd >= 0, "Couldn't open file: %s", argv[2]);
|
||||
grammar = readfile(fd);
|
||||
}
|
||||
load_grammar(g, grammar);
|
||||
} else {
|
||||
// load grammar in start-with-string mode:
|
||||
vm_op_t *pat = bpeg_stringpattern(pattern);
|
||||
if (replacement)
|
||||
pat = bpeg_replacement(pat, replacement);
|
||||
|
||||
add_def(g, pattern, "pattern", pat);
|
||||
|
||||
if (replacement) {
|
||||
load_grammar(g,
|
||||
"replace-all = *&&@pattern &&$$;\n"
|
||||
);
|
||||
} else {
|
||||
load_grammar(g,
|
||||
"find-all = *(matching-line / {&&(\\n/$$)=>});\n"
|
||||
"matching-line = +&@pattern *. $ ?\\n;"
|
||||
);
|
||||
}
|
||||
}
|
||||
vm_op_t *pattern = lookup(g, rule);
|
||||
check(pattern != NULL, usage);
|
||||
|
||||
if (verbose) {
|
||||
print_pattern(g->pattern);
|
||||
print_pattern(pattern);
|
||||
}
|
||||
|
||||
char *input;
|
||||
if (infile == NULL || streq(infile, "-")) {
|
||||
if (i == argc) {
|
||||
// Force stdin if no files given
|
||||
input = readfile(STDIN_FILENO);
|
||||
} else {
|
||||
int fd = open(infile, O_RDONLY);
|
||||
check(fd >= 0, "Couldn't open file: %s", argv[2]);
|
||||
input = readfile(fd);
|
||||
goto run_match;
|
||||
}
|
||||
for ( ; i < argc; i++) {
|
||||
if (argv[i] == NULL || streq(argv[i], "-")) {
|
||||
input = readfile(STDIN_FILENO);
|
||||
} else {
|
||||
int fd = open(argv[i], O_RDONLY);
|
||||
check(fd >= 0, "Couldn't open file: %s", argv[2]);
|
||||
input = readfile(fd);
|
||||
}
|
||||
|
||||
// Ensure string has a null byte to the left:
|
||||
char *lpadded = calloc(sizeof(char), strlen(input)+2);
|
||||
stpcpy(&lpadded[1], input);
|
||||
input = &lpadded[1];
|
||||
run_match:;
|
||||
// Ensure string has a null byte to the left:
|
||||
char *lpadded = calloc(sizeof(char), strlen(input)+2);
|
||||
stpcpy(&lpadded[1], input);
|
||||
input = &lpadded[1];
|
||||
|
||||
match_t *m = match(g, input, g->pattern);
|
||||
if (m == NULL) {
|
||||
printf("No match\n");
|
||||
return 1;
|
||||
} else {
|
||||
print_match(m, "\033[0m", verbose);
|
||||
//printf("\033[0;2m%s\n", m->end);
|
||||
match_t *m = match(g, input, pattern);
|
||||
if (m == NULL) {
|
||||
printf("No match\n");
|
||||
return 1;
|
||||
} else {
|
||||
print_match(m, "\033[0m", verbose);
|
||||
//printf("\033[0;2m%s\n", m->end);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -436,6 +436,7 @@ vm_op_t *bpeg_stringpattern(const char *str)
|
||||
*/
|
||||
vm_op_t *bpeg_replacement(vm_op_t *pat, const char *replacement)
|
||||
{
|
||||
check(pat, "Null pattern used in replacement");
|
||||
vm_op_t *op = calloc(sizeof(vm_op_t), 1);
|
||||
op->op = VM_REPLACE;
|
||||
op->start = pat->start;
|
||||
|
37
grammar.c
37
grammar.c
@ -7,6 +7,14 @@
|
||||
#include "utils.h"
|
||||
|
||||
const char *BPEG_BUILTIN_GRAMMAR = (
|
||||
// Meta-rules for acting on everything
|
||||
"pattern = !(/);\n" // Not defined by default
|
||||
"replacement = {!(/)=>};\n" // Not defined by default
|
||||
"replace-all = *&&@replacement &&$$;\n"
|
||||
"find-all = *(matching-line / {&&(\\n/$$)=>});\n"
|
||||
"matching-line = +&@pattern *. $ ?\\n;\n"
|
||||
|
||||
// Helper definitions (commonly used)
|
||||
"crlf=\\r\\n;\n"
|
||||
"cr=\\r;\n" "r=\\r;\n"
|
||||
"anglebraces=`< *(anglebraces / ~~`>) `>;\n"
|
||||
@ -61,9 +69,13 @@ void add_def(grammar_t *g, const char *src, const char *name, vm_op_t *op)
|
||||
++g->size;
|
||||
}
|
||||
|
||||
void load_grammar(grammar_t *g, const char *src)
|
||||
/*
|
||||
* Load the given grammar (semicolon-separated definitions)
|
||||
* and return the first rule defined.
|
||||
*/
|
||||
vm_op_t *load_grammar(grammar_t *g, const char *src)
|
||||
{
|
||||
vm_op_t *mainpat = NULL;
|
||||
vm_op_t *ret = NULL;
|
||||
do {
|
||||
src = after_spaces(src);
|
||||
if (!*src) break;
|
||||
@ -76,20 +88,21 @@ void load_grammar(grammar_t *g, const char *src)
|
||||
vm_op_t *op = bpeg_pattern(src);
|
||||
check(op, "Couldn't load definition");
|
||||
add_def(g, src, name, op);
|
||||
if (mainpat == NULL) {
|
||||
mainpat = op;
|
||||
g->pattern = op;
|
||||
if (ret == NULL) {
|
||||
ret = op;
|
||||
}
|
||||
src = op->end;
|
||||
} while (*src && matchchar(&src, ';'));
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Print a BPEG grammar in human-readable form.
|
||||
*/
|
||||
void print_grammar(grammar_t *g)
|
||||
vm_op_t *lookup(grammar_t *g, const char *name)
|
||||
{
|
||||
if (g->pattern) print_pattern(g->pattern);
|
||||
// Search backwards so newer defs take precedence
|
||||
for (int i = g->size-1; i >= 0; i--) {
|
||||
if (streq(g->definitions[i].name, name)) {
|
||||
return g->definitions[i].op;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
||||
|
@ -11,9 +11,8 @@
|
||||
|
||||
grammar_t *new_grammar(void);
|
||||
void add_def(grammar_t *g, const char *src, const char *name, vm_op_t *op);
|
||||
void load_grammar(grammar_t *g, const char *source);
|
||||
void print_grammar(grammar_t *g);
|
||||
|
||||
vm_op_t *load_grammar(grammar_t *g, const char *source);
|
||||
vm_op_t *lookup(grammar_t *g, const char *name);
|
||||
|
||||
#endif
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
||||
|
1
types.h
1
types.h
@ -81,7 +81,6 @@ typedef struct {
|
||||
} def_t;
|
||||
|
||||
typedef struct {
|
||||
vm_op_t *pattern;
|
||||
def_t *definitions;
|
||||
size_t size, capacity;
|
||||
} grammar_t;
|
||||
|
36
vm.c
36
vm.c
@ -2,6 +2,7 @@
|
||||
* vm.c - Code for the BPEG virtual machine that performs the matching.
|
||||
*/
|
||||
#include "vm.h"
|
||||
#include "grammar.h"
|
||||
#include "utils.h"
|
||||
|
||||
/*
|
||||
@ -212,28 +213,19 @@ match_t *match(grammar_t *g, const char *str, vm_op_t *op)
|
||||
return m;
|
||||
}
|
||||
case VM_REF: {
|
||||
// Search backwards so newer defs take precedence
|
||||
for (int i = g->size-1; i >= 0; i--) {
|
||||
if (streq(g->definitions[i].name, op->args.s)) {
|
||||
// Bingo!
|
||||
/*
|
||||
op = g->definitions[i].op;
|
||||
goto tailcall;
|
||||
*/
|
||||
match_t *p = match(g, str, g->definitions[i].op);
|
||||
if (p == NULL) return NULL;
|
||||
match_t *m = calloc(sizeof(match_t), 1);
|
||||
m->start = p->start;
|
||||
m->end = p->end;
|
||||
m->op = op;
|
||||
m->child = p;
|
||||
m->name_or_replacement = g->definitions[i].name;
|
||||
m->is_ref = 1;
|
||||
return m;
|
||||
}
|
||||
}
|
||||
check(0, "Unknown identifier: '%s'", op->args.s);
|
||||
return NULL;
|
||||
vm_op_t *r = lookup(g, op->args.s);
|
||||
check(r != NULL, "Unknown identifier: '%s'", op->args.s);
|
||||
// Could use tailcall here, but current have disabled
|
||||
match_t *p = match(g, str, r);
|
||||
if (p == NULL) return NULL;
|
||||
match_t *m = calloc(sizeof(match_t), 1);
|
||||
m->start = p->start;
|
||||
m->end = p->end;
|
||||
m->op = op;
|
||||
m->child = p;
|
||||
m->name_or_replacement = op->args.s;
|
||||
m->is_ref = 1;
|
||||
return m;
|
||||
}
|
||||
default: {
|
||||
fprintf(stderr, "Unknown opcode: %d", op->op);
|
||||
|
Loading…
Reference in New Issue
Block a user