bp/bpeg.c

156 lines
5.3 KiB
C
Raw Normal View History

2020-09-07 23:22:43 -07:00
/*
* bpeg.c - Source code for the bpeg parser
2020-09-07 23:22:43 -07:00
*
* Grammar:
* # <comment> comment
2020-09-10 03:20:07 -07:00
* . any character (multiline: $.)
* ^ beginning of a line (^^: beginning of file)
* $ end of a line ($$: end of file)
* _ 0 or more spaces or tabs (__: include newlines and comments)
* `<c> character <c>
* `<a>-<z> character between <a> and <z>
* \<e> escape sequence (e.g. \n, \033)
2020-09-10 02:14:47 -07:00
* \<e1>-<e2> escape sequence range (e.g. \x00-\xF0)
2020-09-07 23:22:43 -07:00
* ! <pat> no <pat>
2020-09-10 03:20:07 -07:00
* ~ <pat> any character as long as it doesn't match <pat> (multiline: ~~<pat>)
* & <pat> upto and including <pat> (aka *~<pat> <pat>) (multiline: &&<pat>)
2020-09-07 23:22:43 -07:00
* <N=1> + <pat> [% <sep="">] <N> or more <pat>s (separated by <sep>)
* * <pat> [% <sep="">] sugar for "0+ <pat> [% <sep>]"
* <N=1> - <pat> [% <sep="">] <N> or fewer <pat>s (separated by <sep>)
* ? <pat> sugar for "1- <pat>"
* <N> - <M> <pat> <N> to <M> (inclusive) <pat>s
* < <pat> after <pat>, ...
* > <pat> before <pat>, ...
* ( <pat> ) <pat>
* @ <pat> capture <pat>
* @ [ <name> ] <pat> <pat> named <name>
* { <pat> => <str> } <pat> replaced with <str>
* "@1" or "@[1]" first capture
* "@foo" or "@[foo]" capture named "foo"
* <pat1> <pat2> <pat1> followed by <pat2>
* <pat> / <alt> <pat> otherwise <alt>
2020-09-07 23:22:43 -07:00
* ; <name> = <pat> <name> is defined to be <pat>
*/
2020-09-11 01:28:06 -07:00
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "compiler.h"
#include "grammar.h"
#include "utils.h"
#include "vm.h"
static const char *usage = (
"Usage:\n"
" bpeg [flags] <pattern> [<input files>...]\n\n"
"Flags:\n"
" -h --help\t print the usage and quit\n"
" -v --verbose\t print verbose debugging info\n"
2020-09-11 01:54:26 -07:00
" -d --define <name>=<def> Define a grammar rule\n"
2020-09-11 01:28:06 -07:00
" -r --replace <replacement> replace the input pattern with the given replacement\n"
" -g --grammar <grammar file> use the specified file as a grammar\n");
2020-09-07 23:05:38 -07:00
int main(int argc, char *argv[])
{
2020-09-11 01:28:06 -07:00
int verbose = 0;
const char *pattern = NULL,
*replacement = NULL,
*grammarfile = NULL,
*infile = NULL;
2020-09-11 01:28:06 -07:00
grammar_t *g = new_grammar();
for (int i = 1; i < argc; i++) {
if (streq(argv[i], "--help") || streq(argv[i], "-h")) {
printf("%s\n", usage);
return 0;
} else if (streq(argv[i], "--verbose") || streq(argv[i], "-v")) {
verbose = 1;
} else if (streq(argv[i], "--replace") || streq(argv[i], "-r")) {
replacement = argv[++i];
} else if (streq(argv[i], "--grammar") || streq(argv[i], "-g")) {
grammarfile = argv[++i];
2020-09-10 22:50:49 -07:00
} else if (streq(argv[i], "--define") || streq(argv[i], "-d")) {
char *def = argv[++i];
char *eq = strchr(def, '=');
check(eq, usage);
*eq = '\0';
2020-09-11 01:28:06 -07:00
char *src = ++eq;
vm_op_t *pat = bpeg_pattern(src);
check(pat, "Failed to compile pattern");
add_def(g, src, def, pat);
} else if (pattern == NULL) {
pattern = argv[i];
} else if (infile == NULL) {
infile = argv[i];
}
}
2020-09-07 23:05:38 -07:00
check(pattern != NULL || grammarfile != NULL, usage);
if (grammarfile) {
// load grammar from a file (semicolon mode)
char *grammar;
if (streq(grammarfile, "-")) {
grammar = readfile(STDIN_FILENO);
} else {
int fd = open(grammarfile, O_RDONLY);
check(fd >= 0, "Couldn't open file: %s", argv[2]);
grammar = readfile(fd);
}
2020-09-11 01:28:06 -07:00
load_grammar(g, grammar);
} else {
// load grammar in start-with-string mode:
2020-09-11 01:28:06 -07:00
vm_op_t *pat = bpeg_stringpattern(pattern);
if (replacement)
pat = bpeg_replacement(pat, replacement);
2020-09-11 01:28:06 -07:00
add_def(g, pattern, "pattern", pat);
2020-09-11 02:07:53 -07:00
if (replacement) {
load_grammar(g,
"replace-all = *&&@pattern &&$$;\n"
);
} else {
load_grammar(g,
"find-all = *(matching-line / {&&(\\n/$$)=>});\n"
"matching-line = +&@pattern *. $ ?\\n;"
2020-09-11 02:04:01 -07:00
);
2020-09-11 02:07:53 -07:00
}
}
if (verbose) {
2020-09-11 01:28:06 -07:00
print_pattern(g->pattern);
}
2020-09-08 20:47:22 -07:00
char *input;
if (infile == NULL || streq(infile, "-")) {
input = readfile(STDIN_FILENO);
} else {
int fd = open(infile, O_RDONLY);
check(fd >= 0, "Couldn't open file: %s", argv[2]);
input = readfile(fd);
}
2020-09-07 23:05:38 -07:00
// Ensure string has a null byte to the left:
char *lpadded = calloc(sizeof(char), strlen(input)+2);
stpcpy(&lpadded[1], input);
input = &lpadded[1];
2020-09-07 23:05:38 -07:00
2020-09-11 01:28:06 -07:00
match_t *m = match(g, input, g->pattern);
2020-09-07 23:05:38 -07:00
if (m == NULL) {
printf("No match\n");
2020-09-08 01:45:33 -07:00
return 1;
2020-09-07 23:05:38 -07:00
} else {
2020-09-11 01:28:06 -07:00
print_match(m, "\033[0m", verbose);
2020-09-11 02:04:01 -07:00
//printf("\033[0;2m%s\n", m->end);
2020-09-07 23:05:38 -07:00
}
return 0;
}
2020-09-07 23:34:41 -07:00
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1