2020-09-07 23:22:43 -07:00
|
|
|
/*
|
2020-09-10 02:02:40 -07:00
|
|
|
* bpeg.c - Source code for the bpeg parser
|
2020-09-07 23:22:43 -07:00
|
|
|
*
|
|
|
|
* Grammar:
|
|
|
|
* # <comment> comment
|
2020-09-10 03:20:07 -07:00
|
|
|
* . any character (multiline: $.)
|
|
|
|
* ^ beginning of a line (^^: beginning of file)
|
|
|
|
* $ end of a line ($$: end of file)
|
|
|
|
* _ 0 or more spaces or tabs (__: include newlines and comments)
|
2020-09-10 00:27:51 -07:00
|
|
|
* `<c> character <c>
|
|
|
|
* `<a>-<z> character between <a> and <z>
|
|
|
|
* \<e> escape sequence (e.g. \n, \033)
|
2020-09-10 02:14:47 -07:00
|
|
|
* \<e1>-<e2> escape sequence range (e.g. \x00-\xF0)
|
2020-09-07 23:22:43 -07:00
|
|
|
* ! <pat> no <pat>
|
2020-09-10 03:20:07 -07:00
|
|
|
* ~ <pat> any character as long as it doesn't match <pat> (multiline: ~~<pat>)
|
|
|
|
* & <pat> upto and including <pat> (aka *~<pat> <pat>) (multiline: &&<pat>)
|
2020-09-07 23:22:43 -07:00
|
|
|
* <N=1> + <pat> [% <sep="">] <N> or more <pat>s (separated by <sep>)
|
|
|
|
* * <pat> [% <sep="">] sugar for "0+ <pat> [% <sep>]"
|
|
|
|
* <N=1> - <pat> [% <sep="">] <N> or fewer <pat>s (separated by <sep>)
|
|
|
|
* ? <pat> sugar for "1- <pat>"
|
|
|
|
* <N> - <M> <pat> <N> to <M> (inclusive) <pat>s
|
|
|
|
* < <pat> after <pat>, ...
|
|
|
|
* > <pat> before <pat>, ...
|
|
|
|
* ( <pat> ) <pat>
|
|
|
|
* @ <pat> capture <pat>
|
|
|
|
* @ [ <name> ] <pat> <pat> named <name>
|
2020-09-10 02:02:40 -07:00
|
|
|
* { <pat> => <str> } <pat> replaced with <str>
|
|
|
|
* "@1" or "@[1]" first capture
|
|
|
|
* "@foo" or "@[foo]" capture named "foo"
|
|
|
|
* <pat1> <pat2> <pat1> followed by <pat2>
|
|
|
|
* <pat> / <alt> <pat> otherwise <alt>
|
2020-09-07 23:22:43 -07:00
|
|
|
* ; <name> = <pat> <name> is defined to be <pat>
|
|
|
|
*/
|
2020-09-11 01:28:06 -07:00
|
|
|
#include <fcntl.h>
|
2020-09-12 15:49:51 -07:00
|
|
|
#include <limits.h>
|
2020-09-11 01:28:06 -07:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
|
|
#include "compiler.h"
|
|
|
|
#include "grammar.h"
|
|
|
|
#include "utils.h"
|
|
|
|
#include "vm.h"
|
|
|
|
|
|
|
|
static const char *usage = (
|
|
|
|
"Usage:\n"
|
|
|
|
" bpeg [flags] <pattern> [<input files>...]\n\n"
|
|
|
|
"Flags:\n"
|
|
|
|
" -h --help\t print the usage and quit\n"
|
|
|
|
" -v --verbose\t print verbose debugging info\n"
|
2020-09-11 01:54:26 -07:00
|
|
|
" -d --define <name>=<def> Define a grammar rule\n"
|
2020-09-11 01:28:06 -07:00
|
|
|
" -r --replace <replacement> replace the input pattern with the given replacement\n"
|
|
|
|
" -g --grammar <grammar file> use the specified file as a grammar\n");
|
2020-09-07 23:05:38 -07:00
|
|
|
|
2020-09-11 02:55:15 -07:00
|
|
|
static char *getflag(const char *flag, char *argv[], int *i)
|
|
|
|
{
|
|
|
|
size_t n = strlen(flag);
|
|
|
|
if (strncmp(argv[*i], flag, n) == 0) {
|
|
|
|
if (argv[*i][n] == '=') {
|
|
|
|
return &argv[*i][n+1];
|
|
|
|
} else if (argv[*i][n] == '\0') {
|
|
|
|
++(*i);
|
|
|
|
return argv[*i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
#define FLAG(f) (flag=getflag((f), argv, &i))
|
2020-09-09 22:29:09 -07:00
|
|
|
|
|
|
|
int main(int argc, char *argv[])
|
|
|
|
{
|
2020-09-11 01:28:06 -07:00
|
|
|
int verbose = 0;
|
2020-09-11 02:55:15 -07:00
|
|
|
char *flag = NULL;
|
|
|
|
const char *rule = "find-all";
|
2020-09-09 22:29:09 -07:00
|
|
|
|
2020-09-11 01:28:06 -07:00
|
|
|
grammar_t *g = new_grammar();
|
|
|
|
|
2020-09-12 15:11:44 -07:00
|
|
|
int fd;
|
|
|
|
if ((fd=open("/etc/xdg/bpeg/builtins.bpeg", O_RDONLY)) >= 0)
|
2020-09-12 20:05:55 -07:00
|
|
|
load_grammar(g, readfile(fd)); // Keep in memory for debugging output
|
2020-09-12 15:49:51 -07:00
|
|
|
char path[PATH_MAX] = {0};
|
|
|
|
sprintf(path, "%s/.config/bpeg/builtins.bpeg", getenv("HOME"));
|
|
|
|
if ((fd=open(path, O_RDONLY)) >= 0)
|
2020-09-12 20:05:55 -07:00
|
|
|
load_grammar(g, readfile(fd)); // Keep in memory for debugging output
|
2020-09-12 15:11:44 -07:00
|
|
|
|
2020-09-11 02:55:15 -07:00
|
|
|
int i, npatterns = 0;
|
|
|
|
for (i = 1; i < argc; i++) {
|
|
|
|
if (streq(argv[i], "--")) {
|
|
|
|
++i;
|
|
|
|
break;
|
|
|
|
} else if (FLAG("--help") || FLAG("-h")) {
|
2020-09-09 22:29:09 -07:00
|
|
|
printf("%s\n", usage);
|
|
|
|
return 0;
|
2020-09-11 02:55:15 -07:00
|
|
|
} else if (FLAG("--verbose") || FLAG("-v")) {
|
2020-09-09 22:29:09 -07:00
|
|
|
verbose = 1;
|
2020-09-11 02:55:15 -07:00
|
|
|
} else if (FLAG("--replace") || FLAG("-r")) {
|
|
|
|
vm_op_t *p = bpeg_replacement(bpeg_pattern("pattern"), flag);
|
|
|
|
check(p, "Replacement failed to compile");
|
|
|
|
add_def(g, flag, "replacement", p);
|
|
|
|
rule = "replace-all";
|
|
|
|
} else if (FLAG("--grammar") || FLAG("-g")) {
|
2020-09-12 15:11:44 -07:00
|
|
|
int fd;
|
2020-09-12 15:49:51 -07:00
|
|
|
if (streq(flag, "-")) {
|
2020-09-12 15:11:44 -07:00
|
|
|
fd = STDIN_FILENO;
|
2020-09-11 02:55:15 -07:00
|
|
|
} else {
|
2020-09-12 15:49:51 -07:00
|
|
|
fd = open(flag, O_RDONLY);
|
|
|
|
if (fd < 0) {
|
|
|
|
sprintf(path, "%s/.config/bpeg/%s.bpeg", getenv("HOME"), flag);
|
|
|
|
fd = open(path, O_RDONLY);
|
|
|
|
}
|
|
|
|
if (fd < 0) {
|
|
|
|
sprintf(path, "/etc/xdg/bpeg/%s.bpeg", flag);
|
|
|
|
fd = open(path, O_RDONLY);
|
|
|
|
}
|
|
|
|
check(fd >= 0, "Couldn't find grammar: %s", flag);
|
2020-09-11 02:55:15 -07:00
|
|
|
}
|
2020-09-12 20:05:55 -07:00
|
|
|
load_grammar(g, readfile(fd)); // Keep in memory for debug output
|
2020-09-11 02:55:15 -07:00
|
|
|
} else if (FLAG("--define") || FLAG("-d")) {
|
|
|
|
char *def = flag;
|
2020-09-10 22:50:49 -07:00
|
|
|
char *eq = strchr(def, '=');
|
|
|
|
check(eq, usage);
|
|
|
|
*eq = '\0';
|
2020-09-11 01:28:06 -07:00
|
|
|
char *src = ++eq;
|
|
|
|
vm_op_t *pat = bpeg_pattern(src);
|
|
|
|
check(pat, "Failed to compile pattern");
|
|
|
|
add_def(g, src, def, pat);
|
2020-09-12 18:20:13 -07:00
|
|
|
} else if (FLAG("--escaped") || FLAG("-e")) {
|
|
|
|
check(npatterns == 0, "Cannot define multiple patterns");
|
2020-09-12 20:05:55 -07:00
|
|
|
vm_op_t *p = bpeg_pattern(flag);
|
|
|
|
check(p, "Pattern failed to compile: '%s'", flag);
|
|
|
|
add_def(g, flag, "pattern", p);
|
|
|
|
++npatterns;
|
|
|
|
} else if (FLAG("--string") || FLAG("-s")) {
|
|
|
|
vm_op_t *p = bpeg_stringpattern(flag);
|
2020-09-12 18:20:13 -07:00
|
|
|
check(p, "Pattern failed to compile");
|
2020-09-12 20:05:55 -07:00
|
|
|
add_def(g, flag, "pattern", p);
|
2020-09-12 18:20:13 -07:00
|
|
|
++npatterns;
|
2020-09-13 00:37:17 -07:00
|
|
|
} else if (FLAG("--mode") || FLAG("-m")) {
|
|
|
|
rule = flag;
|
2020-09-11 02:55:15 -07:00
|
|
|
} else if (argv[i][0] != '-') {
|
|
|
|
if (npatterns > 0) break;
|
|
|
|
vm_op_t *p = bpeg_stringpattern(argv[i]);
|
|
|
|
check(p, "Pattern failed to compile");
|
|
|
|
add_def(g, argv[i], "pattern", p);
|
|
|
|
++npatterns;
|
2020-09-12 20:05:55 -07:00
|
|
|
} else {
|
|
|
|
printf("Unrecognized flag: %s\n%s\n", argv[i], usage);
|
|
|
|
return 1;
|
2020-09-09 22:29:09 -07:00
|
|
|
}
|
|
|
|
}
|
2020-09-07 23:05:38 -07:00
|
|
|
|
2020-09-11 02:55:15 -07:00
|
|
|
vm_op_t *pattern = lookup(g, rule);
|
2020-09-12 01:32:59 -07:00
|
|
|
check(pattern != NULL, "No such rule: '%s'", rule);
|
2020-09-09 22:29:09 -07:00
|
|
|
|
|
|
|
if (verbose) {
|
2020-09-11 02:55:15 -07:00
|
|
|
print_pattern(pattern);
|
2020-09-09 22:29:09 -07:00
|
|
|
}
|
2020-09-08 20:47:22 -07:00
|
|
|
|
2020-09-08 01:11:28 -07:00
|
|
|
char *input;
|
2020-09-11 02:55:15 -07:00
|
|
|
if (i == argc) {
|
|
|
|
// Force stdin if no files given
|
2020-09-09 22:29:09 -07:00
|
|
|
input = readfile(STDIN_FILENO);
|
2020-09-11 02:55:15 -07:00
|
|
|
goto run_match;
|
2020-09-08 01:11:28 -07:00
|
|
|
}
|
2020-09-11 03:00:30 -07:00
|
|
|
for (int nfiles = 0; i < argc; nfiles++, i++) {
|
2020-09-11 02:55:15 -07:00
|
|
|
if (argv[i] == NULL || streq(argv[i], "-")) {
|
|
|
|
input = readfile(STDIN_FILENO);
|
|
|
|
} else {
|
|
|
|
int fd = open(argv[i], O_RDONLY);
|
2020-09-11 03:00:30 -07:00
|
|
|
check(fd >= 0, "Couldn't open file: %s", argv[i]);
|
2020-09-11 02:55:15 -07:00
|
|
|
input = readfile(fd);
|
2020-09-11 03:00:30 -07:00
|
|
|
if (nfiles > 0) printf("\n");
|
|
|
|
printf("\033[1;4;33m%s\033[0m\n", argv[i]);
|
2020-09-11 02:55:15 -07:00
|
|
|
}
|
2020-09-07 23:05:38 -07:00
|
|
|
|
2020-09-11 02:55:15 -07:00
|
|
|
run_match:;
|
|
|
|
// Ensure string has a null byte to the left:
|
|
|
|
char *lpadded = calloc(sizeof(char), strlen(input)+2);
|
|
|
|
stpcpy(&lpadded[1], input);
|
|
|
|
input = &lpadded[1];
|
2020-09-07 23:05:38 -07:00
|
|
|
|
2020-09-11 02:55:15 -07:00
|
|
|
match_t *m = match(g, input, pattern);
|
|
|
|
if (m == NULL) {
|
|
|
|
printf("No match\n");
|
|
|
|
return 1;
|
|
|
|
} else {
|
2020-09-13 00:37:17 -07:00
|
|
|
print_match(m, isatty(STDOUT_FILENO) ? "\033[0m" : NULL, verbose);
|
2020-09-11 02:55:15 -07:00
|
|
|
//printf("\033[0;2m%s\n", m->end);
|
|
|
|
}
|
2020-09-12 20:05:55 -07:00
|
|
|
freefile(input);
|
2020-09-07 23:05:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2020-09-07 23:34:41 -07:00
|
|
|
|
2020-09-11 01:38:44 -07:00
|
|
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|