2020-09-07 23:22:43 -07:00
|
|
|
/*
|
2020-09-10 02:02:40 -07:00
|
|
|
* bpeg.c - Source code for the bpeg parser
|
2020-09-07 23:22:43 -07:00
|
|
|
*
|
2020-12-12 16:31:53 -08:00
|
|
|
* See `man ./bp.1` for more details
|
2020-09-07 23:22:43 -07:00
|
|
|
*/
|
2020-09-11 01:28:06 -07:00
|
|
|
#include <fcntl.h>
|
2020-09-13 15:55:09 -07:00
|
|
|
#include <glob.h>
|
2020-09-12 15:49:51 -07:00
|
|
|
#include <limits.h>
|
2020-09-11 01:28:06 -07:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
|
|
#include "compiler.h"
|
2020-09-16 19:35:43 -07:00
|
|
|
#include "file_loader.h"
|
2020-09-11 01:28:06 -07:00
|
|
|
#include "grammar.h"
|
|
|
|
#include "utils.h"
|
2020-12-12 16:31:53 -08:00
|
|
|
#include "viz.h"
|
2020-09-11 01:28:06 -07:00
|
|
|
#include "vm.h"
|
|
|
|
|
|
|
|
static const char *usage = (
|
2020-12-12 16:31:53 -08:00
|
|
|
"BP - a Parsing Expression Grammar command line tool\n\n"
|
2020-09-11 01:28:06 -07:00
|
|
|
"Usage:\n"
|
2020-12-12 16:31:53 -08:00
|
|
|
" bp [flags] <pattern> [<input files>...]\n\n"
|
2020-09-11 01:28:06 -07:00
|
|
|
"Flags:\n"
|
2020-09-14 12:39:31 -07:00
|
|
|
" -h --help print the usage and quit\n"
|
|
|
|
" -v --verbose print verbose debugging info\n"
|
2020-12-12 16:31:53 -08:00
|
|
|
" -e --explain explain the matches\n"
|
2020-09-14 12:39:31 -07:00
|
|
|
" -i --ignore-case preform matching case-insensitively\n"
|
2020-09-16 22:39:33 -07:00
|
|
|
" -d --define <name>:<def> define a grammar rule\n"
|
|
|
|
" -D --define-string <name>:<def> define a grammar rule (string-pattern)\n"
|
2020-12-12 16:31:53 -08:00
|
|
|
" -p --pattern <pat> provide a pattern (equivalent to bp '\\(<pat>)')\n"
|
2020-09-14 12:39:31 -07:00
|
|
|
" -P --pattern-string <pat> provide a string pattern (may be useful if '<pat>' begins with a '-')\n"
|
|
|
|
" -r --replace <replacement> replace the input pattern with the given replacement\n"
|
|
|
|
" -m --mode <mode> set the behavior mode (defult: find-all)\n"
|
|
|
|
" -g --grammar <grammar file> use the specified file as a grammar\n");
|
2020-09-07 23:05:38 -07:00
|
|
|
|
2020-12-12 16:31:53 -08:00
|
|
|
static print_options_t print_options = 0;
|
|
|
|
|
2020-09-11 02:55:15 -07:00
|
|
|
static char *getflag(const char *flag, char *argv[], int *i)
|
|
|
|
{
|
|
|
|
size_t n = strlen(flag);
|
2020-09-14 12:39:31 -07:00
|
|
|
check(argv[*i], "Attempt to get flag from NULL argument");
|
2020-09-11 02:55:15 -07:00
|
|
|
if (strncmp(argv[*i], flag, n) == 0) {
|
|
|
|
if (argv[*i][n] == '=') {
|
|
|
|
return &argv[*i][n+1];
|
|
|
|
} else if (argv[*i][n] == '\0') {
|
2020-09-14 12:39:31 -07:00
|
|
|
check(argv[*i+1], "Expected argument after '%s'\n\n%s", flag, usage);
|
2020-09-11 02:55:15 -07:00
|
|
|
++(*i);
|
|
|
|
return argv[*i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
2020-09-13 15:55:09 -07:00
|
|
|
|
2020-09-17 00:29:11 -07:00
|
|
|
static int print_errors(file_t *f, match_t *m)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
if (m->op->op == VM_CAPTURE && m->value.name && streq(m->value.name, "!")) {
|
|
|
|
printf("\033[31;1m");
|
2020-12-12 16:31:53 -08:00
|
|
|
print_match(f, m, print_options);
|
2020-09-17 00:29:11 -07:00
|
|
|
printf("\033[0m\n");
|
2020-09-28 21:30:43 -07:00
|
|
|
fprint_line(stdout, f, m->start, m->end, " ");
|
2020-09-17 00:29:11 -07:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if (m->child) ret += print_errors(f, m->child);
|
|
|
|
if (m->nextsibling) ret += print_errors(f, m->nextsibling);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2020-09-14 12:16:15 -07:00
|
|
|
static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, unsigned int flags)
|
2020-09-13 15:55:09 -07:00
|
|
|
{
|
2020-12-12 16:31:53 -08:00
|
|
|
static int printed_matches = 0;
|
2020-09-16 19:35:43 -07:00
|
|
|
file_t *f = load_file(filename);
|
2020-09-16 21:32:08 -07:00
|
|
|
check(f, "Could not open file: %s", filename);
|
2020-09-16 19:35:43 -07:00
|
|
|
match_t *m = match(g, f, f->contents, pattern, flags);
|
2020-09-17 00:29:11 -07:00
|
|
|
if (m && print_errors(f, m) > 0)
|
|
|
|
_exit(1);
|
2020-09-13 15:55:09 -07:00
|
|
|
if (m != NULL && m->end > m->start + 1) {
|
2020-12-12 16:31:53 -08:00
|
|
|
++printed_matches;
|
|
|
|
|
|
|
|
if (flags & BPEG_EXPLAIN) {
|
2020-12-14 21:55:36 -08:00
|
|
|
if (filename)
|
2020-12-14 21:43:06 -08:00
|
|
|
printf("\033[1;4m%s\033[0m\n", filename);
|
2020-12-14 21:55:36 -08:00
|
|
|
visualize_match(m);
|
|
|
|
} else if (flags & BPEG_JSON) {
|
2020-12-12 16:31:53 -08:00
|
|
|
if (printed_matches > 1)
|
|
|
|
fprintf(stdout, ",\n");
|
2020-12-14 21:58:38 -08:00
|
|
|
printf("{\"filename\":\"%s\",", filename ? filename : "-");
|
|
|
|
printf("\"tree\":{\"rule\":\"text\",\"start\":%d,\"end\":%ld,\"children\":[",
|
2020-12-12 16:31:53 -08:00
|
|
|
0, f->end - f->contents);
|
2020-12-14 21:55:36 -08:00
|
|
|
json_match(stdout, f->contents, m, (flags & BPEG_VERBOSE) ? 1 : 0);
|
2020-12-12 16:31:53 -08:00
|
|
|
printf("]}}\n");
|
|
|
|
} else {
|
|
|
|
if (printed_matches > 1)
|
|
|
|
fputc('\n', stdout);
|
|
|
|
if (filename) {
|
|
|
|
if (print_options & PRINT_COLOR)
|
2020-12-14 21:43:06 -08:00
|
|
|
printf("\033[1;4;33m%s\033[0m\n", filename);
|
2020-12-12 16:31:53 -08:00
|
|
|
else
|
|
|
|
printf("%s:\n", filename);
|
|
|
|
}
|
|
|
|
print_match(f, m, print_options);
|
|
|
|
}
|
2020-09-16 19:35:43 -07:00
|
|
|
destroy_file(&f);
|
2020-09-13 15:55:09 -07:00
|
|
|
return 0;
|
|
|
|
} else {
|
2020-09-16 19:35:43 -07:00
|
|
|
destroy_file(&f);
|
2020-09-13 15:55:09 -07:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-11 02:55:15 -07:00
|
|
|
#define FLAG(f) (flag=getflag((f), argv, &i))
|
2020-09-09 22:29:09 -07:00
|
|
|
|
|
|
|
int main(int argc, char *argv[])
|
|
|
|
{
|
2020-09-14 12:16:15 -07:00
|
|
|
unsigned int flags = 0;
|
2020-09-11 02:55:15 -07:00
|
|
|
char *flag = NULL;
|
2020-09-13 23:31:38 -07:00
|
|
|
char path[PATH_MAX] = {0};
|
2020-09-11 02:55:15 -07:00
|
|
|
const char *rule = "find-all";
|
2020-09-09 22:29:09 -07:00
|
|
|
|
2020-09-11 01:28:06 -07:00
|
|
|
grammar_t *g = new_grammar();
|
|
|
|
|
2020-09-13 23:31:38 -07:00
|
|
|
// Load builtins:
|
2020-12-12 16:31:53 -08:00
|
|
|
if (access("/etc/xdg/bp/builtins.bp", R_OK) != -1)
|
|
|
|
load_grammar(g, load_file("/etc/xdg/bp/builtins.bp")); // Keep in memory for debugging output
|
|
|
|
sprintf(path, "%s/.config/bp/builtins.bp", getenv("HOME"));
|
2020-09-16 19:35:43 -07:00
|
|
|
if (access(path, R_OK) != -1)
|
|
|
|
load_grammar(g, load_file(path)); // Keep in memory for debugging output
|
2020-09-12 15:11:44 -07:00
|
|
|
|
2020-09-11 02:55:15 -07:00
|
|
|
int i, npatterns = 0;
|
2020-09-14 12:39:31 -07:00
|
|
|
check(argc > 1, "%s", usage);
|
2020-09-11 02:55:15 -07:00
|
|
|
for (i = 1; i < argc; i++) {
|
|
|
|
if (streq(argv[i], "--")) {
|
|
|
|
++i;
|
|
|
|
break;
|
2020-09-13 23:31:38 -07:00
|
|
|
} else if (streq(argv[i], "--help") || streq(argv[i], "-h")) {
|
2020-09-09 22:29:09 -07:00
|
|
|
printf("%s\n", usage);
|
|
|
|
return 0;
|
2020-09-13 23:31:38 -07:00
|
|
|
} else if (streq(argv[i], "--verbose") || streq(argv[i], "-v")) {
|
2020-09-14 12:16:15 -07:00
|
|
|
flags |= BPEG_VERBOSE;
|
2020-12-12 16:31:53 -08:00
|
|
|
} else if (streq(argv[i], "--explain") || streq(argv[i], "-e")) {
|
|
|
|
flags |= BPEG_EXPLAIN;
|
2020-12-14 21:55:36 -08:00
|
|
|
} else if (streq(argv[i], "--json") || streq(argv[i], "-j")) {
|
|
|
|
flags |= BPEG_JSON;
|
2020-09-14 12:16:15 -07:00
|
|
|
} else if (streq(argv[i], "--ignore-case") || streq(argv[i], "-i")) {
|
|
|
|
flags |= BPEG_IGNORECASE;
|
2020-09-11 02:55:15 -07:00
|
|
|
} else if (FLAG("--replace") || FLAG("-r")) {
|
2020-09-28 21:30:43 -07:00
|
|
|
file_t *replace_file = spoof_file("<replace argument>", flag);
|
2020-09-28 22:02:00 -07:00
|
|
|
vm_op_t *patref = bpeg_pattern(replace_file, "pattern");
|
|
|
|
vm_op_t *rep = bpeg_replacement(replace_file, patref, flag);
|
|
|
|
check(rep, "Replacement failed to compile: %s", flag);
|
|
|
|
add_def(g, replace_file, flag, "replacement", rep);
|
2020-09-11 02:55:15 -07:00
|
|
|
rule = "replace-all";
|
|
|
|
} else if (FLAG("--grammar") || FLAG("-g")) {
|
2020-09-16 19:35:43 -07:00
|
|
|
file_t *f = load_file(flag);
|
|
|
|
if (f == NULL) {
|
2020-12-12 16:31:53 -08:00
|
|
|
sprintf(path, "%s/.config/bp/%s.bp", getenv("HOME"), flag);
|
2020-09-16 19:35:43 -07:00
|
|
|
f = load_file(path);
|
|
|
|
}
|
|
|
|
if (f == NULL) {
|
2020-12-12 16:31:53 -08:00
|
|
|
sprintf(path, "/etc/xdg/bp/%s.bp", flag);
|
2020-09-16 19:35:43 -07:00
|
|
|
f = load_file(path);
|
2020-09-11 02:55:15 -07:00
|
|
|
}
|
2020-09-16 19:35:43 -07:00
|
|
|
check(f != NULL, "Couldn't find grammar: %s", flag);
|
|
|
|
load_grammar(g, f); // Keep in memory for debug output
|
2020-09-11 02:55:15 -07:00
|
|
|
} else if (FLAG("--define") || FLAG("-d")) {
|
|
|
|
char *def = flag;
|
2020-09-16 22:39:33 -07:00
|
|
|
char *eq = strchr(def, ':');
|
|
|
|
check(eq, "Rule definitions must include an ':'\n\n%s", usage);
|
2020-09-10 22:50:49 -07:00
|
|
|
*eq = '\0';
|
2020-09-11 01:28:06 -07:00
|
|
|
char *src = ++eq;
|
2020-09-28 21:30:43 -07:00
|
|
|
file_t *def_file = spoof_file(def, flag);
|
|
|
|
vm_op_t *pat = bpeg_pattern(def_file, src);
|
2020-09-28 22:02:00 -07:00
|
|
|
check(pat, "Failed to compile pattern: %s", flag);
|
2020-09-28 21:30:43 -07:00
|
|
|
add_def(g, def_file, src, def, pat);
|
2020-09-13 20:33:11 -07:00
|
|
|
} else if (FLAG("--define-string") || FLAG("-D")) {
|
|
|
|
char *def = flag;
|
2020-09-16 22:39:33 -07:00
|
|
|
char *eq = strchr(def, ':');
|
|
|
|
check(eq, "Rule definitions must include an ':'\n\n%s", usage);
|
2020-09-13 20:33:11 -07:00
|
|
|
*eq = '\0';
|
|
|
|
char *src = ++eq;
|
2020-09-28 21:30:43 -07:00
|
|
|
file_t *def_file = spoof_file(def, flag);
|
|
|
|
vm_op_t *pat = bpeg_stringpattern(def_file, src);
|
2020-09-28 22:02:00 -07:00
|
|
|
check(pat, "Failed to compile pattern: %s", flag);
|
2020-09-28 21:30:43 -07:00
|
|
|
add_def(g, def_file, src, def, pat);
|
2020-09-13 23:31:38 -07:00
|
|
|
} else if (FLAG("--pattern") || FLAG("-p")) {
|
2020-09-12 18:20:13 -07:00
|
|
|
check(npatterns == 0, "Cannot define multiple patterns");
|
2020-09-28 21:30:43 -07:00
|
|
|
file_t *arg_file = spoof_file("<pattern argument>", flag);
|
|
|
|
vm_op_t *p = bpeg_pattern(arg_file, flag);
|
2020-09-28 22:02:00 -07:00
|
|
|
check(p, "Pattern failed to compile: %s", flag);
|
2020-09-28 21:30:43 -07:00
|
|
|
add_def(g, arg_file, flag, "pattern", p);
|
2020-09-12 20:05:55 -07:00
|
|
|
++npatterns;
|
2020-09-13 23:31:38 -07:00
|
|
|
} else if (FLAG("--pattern-string") || FLAG("-P")) {
|
2020-09-28 21:30:43 -07:00
|
|
|
file_t *arg_file = spoof_file("<pattern argument>", flag);
|
|
|
|
vm_op_t *p = bpeg_stringpattern(arg_file, flag);
|
2020-09-28 22:02:00 -07:00
|
|
|
check(p, "Pattern failed to compile: %s", flag);
|
2020-09-28 21:30:43 -07:00
|
|
|
add_def(g, arg_file, flag, "pattern", p);
|
2020-09-12 18:20:13 -07:00
|
|
|
++npatterns;
|
2020-09-13 00:37:17 -07:00
|
|
|
} else if (FLAG("--mode") || FLAG("-m")) {
|
|
|
|
rule = flag;
|
2020-09-11 02:55:15 -07:00
|
|
|
} else if (argv[i][0] != '-') {
|
|
|
|
if (npatterns > 0) break;
|
2020-09-28 22:02:00 -07:00
|
|
|
file_t *arg_file = spoof_file("<pattern argument>", argv[i]);
|
2020-09-28 21:30:43 -07:00
|
|
|
vm_op_t *p = bpeg_stringpattern(arg_file, argv[i]);
|
2020-09-28 22:02:00 -07:00
|
|
|
check(p, "Pattern failed to compile: %s", argv[i]);
|
2020-09-28 21:30:43 -07:00
|
|
|
add_def(g, arg_file, argv[i], "pattern", p);
|
2020-09-11 02:55:15 -07:00
|
|
|
++npatterns;
|
2020-09-12 20:05:55 -07:00
|
|
|
} else {
|
2020-09-14 12:39:31 -07:00
|
|
|
printf("Unrecognized flag: %s\n\n%s\n", argv[i], usage);
|
2020-09-12 20:05:55 -07:00
|
|
|
return 1;
|
2020-09-09 22:29:09 -07:00
|
|
|
}
|
|
|
|
}
|
2020-09-07 23:05:38 -07:00
|
|
|
|
2020-09-16 20:38:58 -07:00
|
|
|
if (isatty(STDOUT_FILENO)) {
|
2020-12-12 16:31:53 -08:00
|
|
|
print_options |= PRINT_COLOR | PRINT_LINE_NUMBERS;
|
2020-09-16 20:38:58 -07:00
|
|
|
}
|
|
|
|
|
2020-09-11 02:55:15 -07:00
|
|
|
vm_op_t *pattern = lookup(g, rule);
|
2020-09-12 01:32:59 -07:00
|
|
|
check(pattern != NULL, "No such rule: '%s'", rule);
|
2020-09-09 22:29:09 -07:00
|
|
|
|
2020-09-16 21:34:55 -07:00
|
|
|
int ret = 1;
|
2020-12-14 21:55:36 -08:00
|
|
|
if (flags & BPEG_JSON) printf("[");
|
2020-09-13 15:55:09 -07:00
|
|
|
if (i < argc) {
|
|
|
|
// Files pass in as command line args:
|
|
|
|
for (int nfiles = 0; i < argc; nfiles++, i++) {
|
2020-09-16 21:34:55 -07:00
|
|
|
ret &= run_match(g, argv[i], pattern, flags);
|
2020-09-11 02:55:15 -07:00
|
|
|
}
|
2020-09-13 15:55:09 -07:00
|
|
|
} else if (isatty(STDIN_FILENO)) {
|
|
|
|
// No files, no piped in input, so use * **/*:
|
|
|
|
glob_t globbuf;
|
|
|
|
glob("*", 0, NULL, &globbuf);
|
|
|
|
glob("**/*", GLOB_APPEND, NULL, &globbuf);
|
|
|
|
for (size_t i = 0; i < globbuf.gl_pathc; i++) {
|
2020-09-16 21:34:55 -07:00
|
|
|
ret &= run_match(g, globbuf.gl_pathv[i], pattern, flags);
|
2020-09-11 02:55:15 -07:00
|
|
|
}
|
2020-09-13 15:55:09 -07:00
|
|
|
globfree(&globbuf);
|
|
|
|
} else {
|
|
|
|
// Piped in input:
|
2020-09-16 21:34:55 -07:00
|
|
|
ret &= run_match(g, NULL, pattern, flags);
|
2020-09-07 23:05:38 -07:00
|
|
|
}
|
2020-12-14 21:55:36 -08:00
|
|
|
if (flags & BPEG_JSON) printf("]");
|
2020-09-07 23:05:38 -07:00
|
|
|
|
2020-09-13 15:55:09 -07:00
|
|
|
return ret;
|
2020-09-07 23:05:38 -07:00
|
|
|
}
|
2020-09-07 23:34:41 -07:00
|
|
|
|
2020-09-11 01:38:44 -07:00
|
|
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|