bp/bpeg.c

201 lines
7.2 KiB
C
Raw Normal View History

2020-09-07 23:22:43 -07:00
/*
* bpeg.c - Source code for the bpeg parser
2020-09-07 23:22:43 -07:00
*
* See `man ./bpeg.1` for more details
2020-09-07 23:22:43 -07:00
*/
2020-09-11 01:28:06 -07:00
#include <fcntl.h>
#include <glob.h>
2020-09-12 15:49:51 -07:00
#include <limits.h>
2020-09-11 01:28:06 -07:00
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "compiler.h"
#include "grammar.h"
#include "utils.h"
#include "vm.h"
static const char *usage = (
2020-09-14 12:39:31 -07:00
"BPEG - a Parsing Expression Grammar command line tool\n\n"
2020-09-11 01:28:06 -07:00
"Usage:\n"
" bpeg [flags] <pattern> [<input files>...]\n\n"
"Flags:\n"
2020-09-14 12:39:31 -07:00
" -h --help print the usage and quit\n"
" -v --verbose print verbose debugging info\n"
" -i --ignore-case preform matching case-insensitively\n"
" -d --define <name>=<def> define a grammar rule\n"
" -D --define-string <name>=<def> define a grammar rule (string-pattern)\n"
" -p --pattern <pat> provide a pattern (equivalent to bpeg '\\(<pat>)')\n"
" -P --pattern-string <pat> provide a string pattern (may be useful if '<pat>' begins with a '-')\n"
" -r --replace <replacement> replace the input pattern with the given replacement\n"
" -m --mode <mode> set the behavior mode (defult: find-all)\n"
" -g --grammar <grammar file> use the specified file as a grammar\n");
2020-09-07 23:05:38 -07:00
2020-09-11 02:55:15 -07:00
static char *getflag(const char *flag, char *argv[], int *i)
{
size_t n = strlen(flag);
2020-09-14 12:39:31 -07:00
check(argv[*i], "Attempt to get flag from NULL argument");
2020-09-11 02:55:15 -07:00
if (strncmp(argv[*i], flag, n) == 0) {
if (argv[*i][n] == '=') {
return &argv[*i][n+1];
} else if (argv[*i][n] == '\0') {
2020-09-14 12:39:31 -07:00
check(argv[*i+1], "Expected argument after '%s'\n\n%s", flag, usage);
2020-09-11 02:55:15 -07:00
++(*i);
return argv[*i];
}
}
return NULL;
}
2020-09-14 12:16:15 -07:00
static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, unsigned int flags)
{
char *input;
if (filename == NULL || streq(filename, "-")) {
input = readfile(STDIN_FILENO);
} else {
int fd = open(filename, O_RDONLY);
check(fd >= 0, "Couldn't open file: %s", filename);
input = readfile(fd);
}
2020-09-14 12:16:15 -07:00
match_t *m = match(g, input, pattern, flags);
if (m != NULL && m->end > m->start + 1) {
2020-09-13 20:33:11 -07:00
if (filename != NULL) {
if (isatty(STDOUT_FILENO)) printf("\033[1;4;33m%s\033[0m\n", filename);
else printf("%s\n", filename);
}
2020-09-14 12:16:15 -07:00
print_match(m, isatty(STDOUT_FILENO) ? "\033[0m" : NULL, (flags & BPEG_VERBOSE) != 0);
freefile(input);
return 0;
} else {
freefile(input);
return 1;
}
}
2020-09-11 02:55:15 -07:00
#define FLAG(f) (flag=getflag((f), argv, &i))
int main(int argc, char *argv[])
{
2020-09-14 12:16:15 -07:00
unsigned int flags = 0;
2020-09-11 02:55:15 -07:00
char *flag = NULL;
char path[PATH_MAX] = {0};
2020-09-11 02:55:15 -07:00
const char *rule = "find-all";
2020-09-11 01:28:06 -07:00
grammar_t *g = new_grammar();
// Load builtins:
2020-09-12 15:11:44 -07:00
int fd;
if ((fd=open("/etc/xdg/bpeg/builtins.bpeg", O_RDONLY)) >= 0)
2020-09-12 20:05:55 -07:00
load_grammar(g, readfile(fd)); // Keep in memory for debugging output
2020-09-12 15:49:51 -07:00
sprintf(path, "%s/.config/bpeg/builtins.bpeg", getenv("HOME"));
if ((fd=open(path, O_RDONLY)) >= 0)
2020-09-12 20:05:55 -07:00
load_grammar(g, readfile(fd)); // Keep in memory for debugging output
2020-09-12 15:11:44 -07:00
2020-09-11 02:55:15 -07:00
int i, npatterns = 0;
2020-09-14 12:39:31 -07:00
check(argc > 1, "%s", usage);
2020-09-11 02:55:15 -07:00
for (i = 1; i < argc; i++) {
if (streq(argv[i], "--")) {
++i;
break;
} else if (streq(argv[i], "--help") || streq(argv[i], "-h")) {
printf("%s\n", usage);
return 0;
} else if (streq(argv[i], "--verbose") || streq(argv[i], "-v")) {
2020-09-14 12:16:15 -07:00
flags |= BPEG_VERBOSE;
} else if (streq(argv[i], "--ignore-case") || streq(argv[i], "-i")) {
flags |= BPEG_IGNORECASE;
2020-09-11 02:55:15 -07:00
} else if (FLAG("--replace") || FLAG("-r")) {
vm_op_t *p = bpeg_replacement(bpeg_pattern("pattern"), flag);
check(p, "Replacement failed to compile");
add_def(g, flag, "replacement", p);
rule = "replace-all";
} else if (FLAG("--grammar") || FLAG("-g")) {
2020-09-12 15:11:44 -07:00
int fd;
2020-09-12 15:49:51 -07:00
if (streq(flag, "-")) {
2020-09-12 15:11:44 -07:00
fd = STDIN_FILENO;
2020-09-11 02:55:15 -07:00
} else {
2020-09-12 15:49:51 -07:00
fd = open(flag, O_RDONLY);
if (fd < 0) {
sprintf(path, "%s/.config/bpeg/%s.bpeg", getenv("HOME"), flag);
fd = open(path, O_RDONLY);
}
if (fd < 0) {
sprintf(path, "/etc/xdg/bpeg/%s.bpeg", flag);
fd = open(path, O_RDONLY);
}
check(fd >= 0, "Couldn't find grammar: %s", flag);
2020-09-11 02:55:15 -07:00
}
2020-09-12 20:05:55 -07:00
load_grammar(g, readfile(fd)); // Keep in memory for debug output
2020-09-11 02:55:15 -07:00
} else if (FLAG("--define") || FLAG("-d")) {
char *def = flag;
2020-09-10 22:50:49 -07:00
char *eq = strchr(def, '=');
2020-09-14 12:39:31 -07:00
check(eq, "Rule definitions must include an '='\n\n%s", usage);
2020-09-10 22:50:49 -07:00
*eq = '\0';
2020-09-11 01:28:06 -07:00
char *src = ++eq;
vm_op_t *pat = bpeg_pattern(src);
check(pat, "Failed to compile pattern");
add_def(g, src, def, pat);
2020-09-13 20:33:11 -07:00
} else if (FLAG("--define-string") || FLAG("-D")) {
char *def = flag;
char *eq = strchr(def, '=');
2020-09-14 12:39:31 -07:00
check(eq, "Rule definitions must include an '='\n\n%s", usage);
2020-09-13 20:33:11 -07:00
*eq = '\0';
char *src = ++eq;
vm_op_t *pat = bpeg_stringpattern(src);
check(pat, "Failed to compile pattern");
add_def(g, src, def, pat);
} else if (FLAG("--pattern") || FLAG("-p")) {
2020-09-12 18:20:13 -07:00
check(npatterns == 0, "Cannot define multiple patterns");
2020-09-12 20:05:55 -07:00
vm_op_t *p = bpeg_pattern(flag);
check(p, "Pattern failed to compile: '%s'", flag);
add_def(g, flag, "pattern", p);
++npatterns;
} else if (FLAG("--pattern-string") || FLAG("-P")) {
2020-09-12 20:05:55 -07:00
vm_op_t *p = bpeg_stringpattern(flag);
2020-09-12 18:20:13 -07:00
check(p, "Pattern failed to compile");
2020-09-12 20:05:55 -07:00
add_def(g, flag, "pattern", p);
2020-09-12 18:20:13 -07:00
++npatterns;
} else if (FLAG("--mode") || FLAG("-m")) {
rule = flag;
2020-09-11 02:55:15 -07:00
} else if (argv[i][0] != '-') {
if (npatterns > 0) break;
vm_op_t *p = bpeg_stringpattern(argv[i]);
check(p, "Pattern failed to compile");
add_def(g, argv[i], "pattern", p);
++npatterns;
2020-09-12 20:05:55 -07:00
} else {
2020-09-14 12:39:31 -07:00
printf("Unrecognized flag: %s\n\n%s\n", argv[i], usage);
2020-09-12 20:05:55 -07:00
return 1;
}
}
2020-09-07 23:05:38 -07:00
2020-09-11 02:55:15 -07:00
vm_op_t *pattern = lookup(g, rule);
check(pattern != NULL, "No such rule: '%s'", rule);
int ret = 0;
if (i < argc) {
// Files pass in as command line args:
for (int nfiles = 0; i < argc; nfiles++, i++) {
2020-09-14 12:16:15 -07:00
ret |= run_match(g, argv[i], pattern, flags);
2020-09-11 02:55:15 -07:00
}
} else if (isatty(STDIN_FILENO)) {
// No files, no piped in input, so use * **/*:
glob_t globbuf;
glob("*", 0, NULL, &globbuf);
glob("**/*", GLOB_APPEND, NULL, &globbuf);
for (size_t i = 0; i < globbuf.gl_pathc; i++) {
2020-09-14 12:16:15 -07:00
ret |= run_match(g, globbuf.gl_pathv[i], pattern, flags);
2020-09-11 02:55:15 -07:00
}
globfree(&globbuf);
} else {
// Piped in input:
2020-09-14 12:16:15 -07:00
ret |= run_match(g, NULL, pattern, flags);
2020-09-07 23:05:38 -07:00
}
return ret;
2020-09-07 23:05:38 -07:00
}
2020-09-07 23:34:41 -07:00
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1