Improved handling of CLI flags

This commit is contained in:
Bruce Hill 2020-09-11 02:55:15 -07:00
parent 2f05677c98
commit d0538fa25f
6 changed files with 122 additions and 104 deletions

146
bpeg.c
View File

@ -52,29 +52,63 @@ static const char *usage = (
" -r --replace <replacement> replace the input pattern with the given replacement\n"
" -g --grammar <grammar file> use the specified file as a grammar\n");
static char *getflag(const char *flag, char *argv[], int *i)
{
size_t n = strlen(flag);
if (strncmp(argv[*i], flag, n) == 0) {
if (argv[*i][n] == '=') {
return &argv[*i][n+1];
} else if (argv[*i][n] == '\0') {
++(*i);
return argv[*i];
}
}
return NULL;
}
#define FLAG(f) (flag=getflag((f), argv, &i))
int main(int argc, char *argv[])
{
int verbose = 0;
const char *pattern = NULL,
*replacement = NULL,
*grammarfile = NULL,
*infile = NULL;
char *flag = NULL;
const char *rule = "find-all";
grammar_t *g = new_grammar();
for (int i = 1; i < argc; i++) {
if (streq(argv[i], "--help") || streq(argv[i], "-h")) {
int i, npatterns = 0;
for (i = 1; i < argc; i++) {
if (streq(argv[i], "--")) {
++i;
break;
} else if (FLAG("--help") || FLAG("-h")) {
printf("%s\n", usage);
return 0;
} else if (streq(argv[i], "--verbose") || streq(argv[i], "-v")) {
} else if (FLAG("--verbose") || FLAG("-v")) {
verbose = 1;
} else if (streq(argv[i], "--replace") || streq(argv[i], "-r")) {
replacement = argv[++i];
} else if (streq(argv[i], "--grammar") || streq(argv[i], "-g")) {
grammarfile = argv[++i];
} else if (streq(argv[i], "--define") || streq(argv[i], "-d")) {
char *def = argv[++i];
} else if (FLAG("--pattern") || FLAG("-p")) {
vm_op_t *p = bpeg_pattern(flag);
check(p, "Pattern failed to compile");
add_def(g, flag, "pattern", p);
++npatterns;
} else if (FLAG("--replace") || FLAG("-r")) {
vm_op_t *p = bpeg_replacement(bpeg_pattern("pattern"), flag);
check(p, "Replacement failed to compile");
add_def(g, flag, "replacement", p);
rule = "replace-all";
} else if (FLAG("--grammar") || FLAG("-g")) {
const char *grammarfile = flag;
// load grammar from a file (semicolon mode)
char *grammar;
if (streq(grammarfile, "-")) {
grammar = readfile(STDIN_FILENO);
} else {
int fd = open(grammarfile, O_RDONLY);
check(fd >= 0, "Couldn't open file: %s", argv[2]);
grammar = readfile(fd);
}
load_grammar(g, grammar);
} else if (FLAG("--define") || FLAG("-d")) {
char *def = flag;
char *eq = strchr(def, '=');
check(eq, usage);
*eq = '\0';
@ -82,71 +116,51 @@ int main(int argc, char *argv[])
vm_op_t *pat = bpeg_pattern(src);
check(pat, "Failed to compile pattern");
add_def(g, src, def, pat);
} else if (pattern == NULL) {
pattern = argv[i];
} else if (infile == NULL) {
infile = argv[i];
} else if (argv[i][0] != '-') {
if (npatterns > 0) break;
vm_op_t *p = bpeg_stringpattern(argv[i]);
check(p, "Pattern failed to compile");
add_def(g, argv[i], "pattern", p);
++npatterns;
}
}
check(pattern != NULL || grammarfile != NULL, usage);
if (grammarfile) {
// load grammar from a file (semicolon mode)
char *grammar;
if (streq(grammarfile, "-")) {
grammar = readfile(STDIN_FILENO);
} else {
int fd = open(grammarfile, O_RDONLY);
check(fd >= 0, "Couldn't open file: %s", argv[2]);
grammar = readfile(fd);
}
load_grammar(g, grammar);
} else {
// load grammar in start-with-string mode:
vm_op_t *pat = bpeg_stringpattern(pattern);
if (replacement)
pat = bpeg_replacement(pat, replacement);
add_def(g, pattern, "pattern", pat);
if (replacement) {
load_grammar(g,
"replace-all = *&&@pattern &&$$;\n"
);
} else {
load_grammar(g,
"find-all = *(matching-line / {&&(\\n/$$)=>});\n"
"matching-line = +&@pattern *. $ ?\\n;"
);
}
}
vm_op_t *pattern = lookup(g, rule);
check(pattern != NULL, usage);
if (verbose) {
print_pattern(g->pattern);
print_pattern(pattern);
}
char *input;
if (infile == NULL || streq(infile, "-")) {
if (i == argc) {
// Force stdin if no files given
input = readfile(STDIN_FILENO);
} else {
int fd = open(infile, O_RDONLY);
check(fd >= 0, "Couldn't open file: %s", argv[2]);
input = readfile(fd);
goto run_match;
}
for ( ; i < argc; i++) {
if (argv[i] == NULL || streq(argv[i], "-")) {
input = readfile(STDIN_FILENO);
} else {
int fd = open(argv[i], O_RDONLY);
check(fd >= 0, "Couldn't open file: %s", argv[2]);
input = readfile(fd);
}
// Ensure string has a null byte to the left:
char *lpadded = calloc(sizeof(char), strlen(input)+2);
stpcpy(&lpadded[1], input);
input = &lpadded[1];
run_match:;
// Ensure string has a null byte to the left:
char *lpadded = calloc(sizeof(char), strlen(input)+2);
stpcpy(&lpadded[1], input);
input = &lpadded[1];
match_t *m = match(g, input, g->pattern);
if (m == NULL) {
printf("No match\n");
return 1;
} else {
print_match(m, "\033[0m", verbose);
//printf("\033[0;2m%s\n", m->end);
match_t *m = match(g, input, pattern);
if (m == NULL) {
printf("No match\n");
return 1;
} else {
print_match(m, "\033[0m", verbose);
//printf("\033[0;2m%s\n", m->end);
}
}
return 0;

View File

@ -436,6 +436,7 @@ vm_op_t *bpeg_stringpattern(const char *str)
*/
vm_op_t *bpeg_replacement(vm_op_t *pat, const char *replacement)
{
check(pat, "Null pattern used in replacement");
vm_op_t *op = calloc(sizeof(vm_op_t), 1);
op->op = VM_REPLACE;
op->start = pat->start;

View File

@ -7,6 +7,14 @@
#include "utils.h"
const char *BPEG_BUILTIN_GRAMMAR = (
// Meta-rules for acting on everything
"pattern = !(/);\n" // Not defined by default
"replacement = {!(/)=>};\n" // Not defined by default
"replace-all = *&&@replacement &&$$;\n"
"find-all = *(matching-line / {&&(\\n/$$)=>});\n"
"matching-line = +&@pattern *. $ ?\\n;\n"
// Helper definitions (commonly used)
"crlf=\\r\\n;\n"
"cr=\\r;\n" "r=\\r;\n"
"anglebraces=`< *(anglebraces / ~~`>) `>;\n"
@ -61,9 +69,13 @@ void add_def(grammar_t *g, const char *src, const char *name, vm_op_t *op)
++g->size;
}
void load_grammar(grammar_t *g, const char *src)
/*
* Load the given grammar (semicolon-separated definitions)
* and return the first rule defined.
*/
vm_op_t *load_grammar(grammar_t *g, const char *src)
{
vm_op_t *mainpat = NULL;
vm_op_t *ret = NULL;
do {
src = after_spaces(src);
if (!*src) break;
@ -76,20 +88,21 @@ void load_grammar(grammar_t *g, const char *src)
vm_op_t *op = bpeg_pattern(src);
check(op, "Couldn't load definition");
add_def(g, src, name, op);
if (mainpat == NULL) {
mainpat = op;
g->pattern = op;
if (ret == NULL) {
ret = op;
}
src = op->end;
} while (*src && matchchar(&src, ';'));
return ret;
}
/*
* Print a BPEG grammar in human-readable form.
*/
void print_grammar(grammar_t *g)
vm_op_t *lookup(grammar_t *g, const char *name)
{
if (g->pattern) print_pattern(g->pattern);
// Search backwards so newer defs take precedence
for (int i = g->size-1; i >= 0; i--) {
if (streq(g->definitions[i].name, name)) {
return g->definitions[i].op;
}
}
return NULL;
}
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1

View File

@ -11,9 +11,8 @@
grammar_t *new_grammar(void);
void add_def(grammar_t *g, const char *src, const char *name, vm_op_t *op);
void load_grammar(grammar_t *g, const char *source);
void print_grammar(grammar_t *g);
vm_op_t *load_grammar(grammar_t *g, const char *source);
vm_op_t *lookup(grammar_t *g, const char *name);
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1

View File

@ -81,7 +81,6 @@ typedef struct {
} def_t;
typedef struct {
vm_op_t *pattern;
def_t *definitions;
size_t size, capacity;
} grammar_t;

36
vm.c
View File

@ -2,6 +2,7 @@
* vm.c - Code for the BPEG virtual machine that performs the matching.
*/
#include "vm.h"
#include "grammar.h"
#include "utils.h"
/*
@ -212,28 +213,19 @@ match_t *match(grammar_t *g, const char *str, vm_op_t *op)
return m;
}
case VM_REF: {
// Search backwards so newer defs take precedence
for (int i = g->size-1; i >= 0; i--) {
if (streq(g->definitions[i].name, op->args.s)) {
// Bingo!
/*
op = g->definitions[i].op;
goto tailcall;
*/
match_t *p = match(g, str, g->definitions[i].op);
if (p == NULL) return NULL;
match_t *m = calloc(sizeof(match_t), 1);
m->start = p->start;
m->end = p->end;
m->op = op;
m->child = p;
m->name_or_replacement = g->definitions[i].name;
m->is_ref = 1;
return m;
}
}
check(0, "Unknown identifier: '%s'", op->args.s);
return NULL;
vm_op_t *r = lookup(g, op->args.s);
check(r != NULL, "Unknown identifier: '%s'", op->args.s);
// Could use tailcall here, but current have disabled
match_t *p = match(g, str, r);
if (p == NULL) return NULL;
match_t *m = calloc(sizeof(match_t), 1);
m->start = p->start;
m->end = p->end;
m->op = op;
m->child = p;
m->name_or_replacement = op->args.s;
m->is_ref = 1;
return m;
}
default: {
fprintf(stderr, "Unknown opcode: %d", op->op);