aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2020-09-11 02:55:15 -0700
committerBruce Hill <bruce@bruce-hill.com>2020-09-11 02:55:15 -0700
commitd0538fa25ffa60fc3ef17048aa2c46423a1bf036 (patch)
tree182cc28b637112337c41fdd0ca983713360fad4b
parent2f05677c98f655615ac0229924ded18dd8fe4e0d (diff)
Improved handling of CLI flags
-rw-r--r--bpeg.c146
-rw-r--r--compiler.c1
-rw-r--r--grammar.c37
-rw-r--r--grammar.h5
-rw-r--r--types.h1
-rw-r--r--vm.c36
6 files changed, 122 insertions, 104 deletions
diff --git a/bpeg.c b/bpeg.c
index 1ad576d..89f262a 100644
--- a/bpeg.c
+++ b/bpeg.c
@@ -52,29 +52,63 @@ static const char *usage = (
" -r --replace <replacement> replace the input pattern with the given replacement\n"
" -g --grammar <grammar file> use the specified file as a grammar\n");
+static char *getflag(const char *flag, char *argv[], int *i)
+{
+ size_t n = strlen(flag);
+ if (strncmp(argv[*i], flag, n) == 0) {
+ if (argv[*i][n] == '=') {
+ return &argv[*i][n+1];
+ } else if (argv[*i][n] == '\0') {
+ ++(*i);
+ return argv[*i];
+ }
+ }
+ return NULL;
+}
+#define FLAG(f) (flag=getflag((f), argv, &i))
int main(int argc, char *argv[])
{
int verbose = 0;
- const char *pattern = NULL,
- *replacement = NULL,
- *grammarfile = NULL,
- *infile = NULL;
+ char *flag = NULL;
+ const char *rule = "find-all";
grammar_t *g = new_grammar();
- for (int i = 1; i < argc; i++) {
- if (streq(argv[i], "--help") || streq(argv[i], "-h")) {
+ int i, npatterns = 0;
+ for (i = 1; i < argc; i++) {
+ if (streq(argv[i], "--")) {
+ ++i;
+ break;
+ } else if (FLAG("--help") || FLAG("-h")) {
printf("%s\n", usage);
return 0;
- } else if (streq(argv[i], "--verbose") || streq(argv[i], "-v")) {
+ } else if (FLAG("--verbose") || FLAG("-v")) {
verbose = 1;
- } else if (streq(argv[i], "--replace") || streq(argv[i], "-r")) {
- replacement = argv[++i];
- } else if (streq(argv[i], "--grammar") || streq(argv[i], "-g")) {
- grammarfile = argv[++i];
- } else if (streq(argv[i], "--define") || streq(argv[i], "-d")) {
- char *def = argv[++i];
+ } else if (FLAG("--pattern") || FLAG("-p")) {
+ vm_op_t *p = bpeg_pattern(flag);
+ check(p, "Pattern failed to compile");
+ add_def(g, flag, "pattern", p);
+ ++npatterns;
+ } else if (FLAG("--replace") || FLAG("-r")) {
+ vm_op_t *p = bpeg_replacement(bpeg_pattern("pattern"), flag);
+ check(p, "Replacement failed to compile");
+ add_def(g, flag, "replacement", p);
+ rule = "replace-all";
+ } else if (FLAG("--grammar") || FLAG("-g")) {
+ const char *grammarfile = flag;
+ // load grammar from a file (semicolon mode)
+ char *grammar;
+ if (streq(grammarfile, "-")) {
+ grammar = readfile(STDIN_FILENO);
+ } else {
+ int fd = open(grammarfile, O_RDONLY);
+ check(fd >= 0, "Couldn't open file: %s", argv[2]);
+ grammar = readfile(fd);
+ }
+ load_grammar(g, grammar);
+ } else if (FLAG("--define") || FLAG("-d")) {
+ char *def = flag;
char *eq = strchr(def, '=');
check(eq, usage);
*eq = '\0';
@@ -82,71 +116,51 @@ int main(int argc, char *argv[])
vm_op_t *pat = bpeg_pattern(src);
check(pat, "Failed to compile pattern");
add_def(g, src, def, pat);
- } else if (pattern == NULL) {
- pattern = argv[i];
- } else if (infile == NULL) {
- infile = argv[i];
+ } else if (argv[i][0] != '-') {
+ if (npatterns > 0) break;
+ vm_op_t *p = bpeg_stringpattern(argv[i]);
+ check(p, "Pattern failed to compile");
+ add_def(g, argv[i], "pattern", p);
+ ++npatterns;
}
}
- check(pattern != NULL || grammarfile != NULL, usage);
-
- if (grammarfile) {
- // load grammar from a file (semicolon mode)
- char *grammar;
- if (streq(grammarfile, "-")) {
- grammar = readfile(STDIN_FILENO);
- } else {
- int fd = open(grammarfile, O_RDONLY);
- check(fd >= 0, "Couldn't open file: %s", argv[2]);
- grammar = readfile(fd);
- }
- load_grammar(g, grammar);
- } else {
- // load grammar in start-with-string mode:
- vm_op_t *pat = bpeg_stringpattern(pattern);
- if (replacement)
- pat = bpeg_replacement(pat, replacement);
-
- add_def(g, pattern, "pattern", pat);
-
- if (replacement) {
- load_grammar(g,
- "replace-all = *&&@pattern &&$$;\n"
- );
- } else {
- load_grammar(g,
- "find-all = *(matching-line / {&&(\\n/$$)=>});\n"
- "matching-line = +&@pattern *. $ ?\\n;"
- );
- }
- }
+ vm_op_t *pattern = lookup(g, rule);
+ check(pattern != NULL, usage);
if (verbose) {
- print_pattern(g->pattern);
+ print_pattern(pattern);
}
char *input;
- if (infile == NULL || streq(infile, "-")) {
+ if (i == argc) {
+ // Force stdin if no files given
input = readfile(STDIN_FILENO);
- } else {
- int fd = open(infile, O_RDONLY);
- check(fd >= 0, "Couldn't open file: %s", argv[2]);
- input = readfile(fd);
+ goto run_match;
}
+ for ( ; i < argc; i++) {
+ if (argv[i] == NULL || streq(argv[i], "-")) {
+ input = readfile(STDIN_FILENO);
+ } else {
+ int fd = open(argv[i], O_RDONLY);
+ check(fd >= 0, "Couldn't open file: %s", argv[2]);
+ input = readfile(fd);
+ }
- // Ensure string has a null byte to the left:
- char *lpadded = calloc(sizeof(char), strlen(input)+2);
- stpcpy(&lpadded[1], input);
- input = &lpadded[1];
+ run_match:;
+ // Ensure string has a null byte to the left:
+ char *lpadded = calloc(sizeof(char), strlen(input)+2);
+ stpcpy(&lpadded[1], input);
+ input = &lpadded[1];
- match_t *m = match(g, input, g->pattern);
- if (m == NULL) {
- printf("No match\n");
- return 1;
- } else {
- print_match(m, "\033[0m", verbose);
- //printf("\033[0;2m%s\n", m->end);
+ match_t *m = match(g, input, pattern);
+ if (m == NULL) {
+ printf("No match\n");
+ return 1;
+ } else {
+ print_match(m, "\033[0m", verbose);
+ //printf("\033[0;2m%s\n", m->end);
+ }
}
return 0;
diff --git a/compiler.c b/compiler.c
index 067101b..eadfd38 100644
--- a/compiler.c
+++ b/compiler.c
@@ -436,6 +436,7 @@ vm_op_t *bpeg_stringpattern(const char *str)
*/
vm_op_t *bpeg_replacement(vm_op_t *pat, const char *replacement)
{
+ check(pat, "Null pattern used in replacement");
vm_op_t *op = calloc(sizeof(vm_op_t), 1);
op->op = VM_REPLACE;
op->start = pat->start;
diff --git a/grammar.c b/grammar.c
index 6001211..38403d8 100644
--- a/grammar.c
+++ b/grammar.c
@@ -7,6 +7,14 @@
#include "utils.h"
const char *BPEG_BUILTIN_GRAMMAR = (
+ // Meta-rules for acting on everything
+ "pattern = !(/);\n" // Not defined by default
+ "replacement = {!(/)=>};\n" // Not defined by default
+ "replace-all = *&&@replacement &&$$;\n"
+ "find-all = *(matching-line / {&&(\\n/$$)=>});\n"
+ "matching-line = +&@pattern *. $ ?\\n;\n"
+
+ // Helper definitions (commonly used)
"crlf=\\r\\n;\n"
"cr=\\r;\n" "r=\\r;\n"
"anglebraces=`< *(anglebraces / ~~`>) `>;\n"
@@ -61,9 +69,13 @@ void add_def(grammar_t *g, const char *src, const char *name, vm_op_t *op)
++g->size;
}
-void load_grammar(grammar_t *g, const char *src)
+/*
+ * Load the given grammar (semicolon-separated definitions)
+ * and return the first rule defined.
+ */
+vm_op_t *load_grammar(grammar_t *g, const char *src)
{
- vm_op_t *mainpat = NULL;
+ vm_op_t *ret = NULL;
do {
src = after_spaces(src);
if (!*src) break;
@@ -76,20 +88,21 @@ void load_grammar(grammar_t *g, const char *src)
vm_op_t *op = bpeg_pattern(src);
check(op, "Couldn't load definition");
add_def(g, src, name, op);
- if (mainpat == NULL) {
- mainpat = op;
- g->pattern = op;
+ if (ret == NULL) {
+ ret = op;
}
src = op->end;
} while (*src && matchchar(&src, ';'));
+ return ret;
}
-/*
- * Print a BPEG grammar in human-readable form.
- */
-void print_grammar(grammar_t *g)
+vm_op_t *lookup(grammar_t *g, const char *name)
{
- if (g->pattern) print_pattern(g->pattern);
+ // Search backwards so newer defs take precedence
+ for (int i = g->size-1; i >= 0; i--) {
+ if (streq(g->definitions[i].name, name)) {
+ return g->definitions[i].op;
+ }
+ }
+ return NULL;
}
-
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/grammar.h b/grammar.h
index e477cc6..56d7ef6 100644
--- a/grammar.h
+++ b/grammar.h
@@ -11,9 +11,8 @@
grammar_t *new_grammar(void);
void add_def(grammar_t *g, const char *src, const char *name, vm_op_t *op);
-void load_grammar(grammar_t *g, const char *source);
-void print_grammar(grammar_t *g);
-
+vm_op_t *load_grammar(grammar_t *g, const char *source);
+vm_op_t *lookup(grammar_t *g, const char *name);
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/types.h b/types.h
index 27e9388..f8899b3 100644
--- a/types.h
+++ b/types.h
@@ -81,7 +81,6 @@ typedef struct {
} def_t;
typedef struct {
- vm_op_t *pattern;
def_t *definitions;
size_t size, capacity;
} grammar_t;
diff --git a/vm.c b/vm.c
index 565f94d..dade953 100644
--- a/vm.c
+++ b/vm.c
@@ -2,6 +2,7 @@
* vm.c - Code for the BPEG virtual machine that performs the matching.
*/
#include "vm.h"
+#include "grammar.h"
#include "utils.h"
/*
@@ -212,28 +213,19 @@ match_t *match(grammar_t *g, const char *str, vm_op_t *op)
return m;
}
case VM_REF: {
- // Search backwards so newer defs take precedence
- for (int i = g->size-1; i >= 0; i--) {
- if (streq(g->definitions[i].name, op->args.s)) {
- // Bingo!
- /*
- op = g->definitions[i].op;
- goto tailcall;
- */
- match_t *p = match(g, str, g->definitions[i].op);
- if (p == NULL) return NULL;
- match_t *m = calloc(sizeof(match_t), 1);
- m->start = p->start;
- m->end = p->end;
- m->op = op;
- m->child = p;
- m->name_or_replacement = g->definitions[i].name;
- m->is_ref = 1;
- return m;
- }
- }
- check(0, "Unknown identifier: '%s'", op->args.s);
- return NULL;
+ vm_op_t *r = lookup(g, op->args.s);
+ check(r != NULL, "Unknown identifier: '%s'", op->args.s);
+ // Could use tailcall here, but current have disabled
+ match_t *p = match(g, str, r);
+ if (p == NULL) return NULL;
+ match_t *m = calloc(sizeof(match_t), 1);
+ m->start = p->start;
+ m->end = p->end;
+ m->op = op;
+ m->child = p;
+ m->name_or_replacement = op->args.s;
+ m->is_ref = 1;
+ return m;
}
default: {
fprintf(stderr, "Unknown opcode: %d", op->op);