WIP
This commit is contained in:
parent
3483cd75cb
commit
6c237850e9
2
Makefile
2
Makefile
@ -6,7 +6,7 @@ CWARN=-Wall -Wpedantic -Wextra -Wno-unknown-pragmas -Wno-missing-field-initializ
|
||||
G ?=
|
||||
O ?= -O3
|
||||
|
||||
CFILES=compiler.c grammar.c utils.c vm.c
|
||||
CFILES=compiler.c grammar.c utils.c vm.c file_loader.c
|
||||
OBJFILES=$(CFILES:.c=.o)
|
||||
|
||||
all: $(NAME)
|
||||
|
74
bpeg.c
74
bpeg.c
@ -12,6 +12,7 @@
|
||||
#include <unistd.h>
|
||||
|
||||
#include "compiler.h"
|
||||
#include "file_loader.h"
|
||||
#include "grammar.h"
|
||||
#include "utils.h"
|
||||
#include "vm.h"
|
||||
@ -50,25 +51,18 @@ static char *getflag(const char *flag, char *argv[], int *i)
|
||||
|
||||
static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, unsigned int flags)
|
||||
{
|
||||
char *input;
|
||||
if (filename == NULL || streq(filename, "-")) {
|
||||
input = readfile(STDIN_FILENO);
|
||||
} else {
|
||||
int fd = open(filename, O_RDONLY);
|
||||
check(fd >= 0, "Couldn't open file: %s", filename);
|
||||
input = readfile(fd);
|
||||
}
|
||||
match_t *m = match(g, input, pattern, flags);
|
||||
file_t *f = load_file(filename);
|
||||
match_t *m = match(g, f, f->contents, pattern, flags);
|
||||
if (m != NULL && m->end > m->start + 1) {
|
||||
if (filename != NULL) {
|
||||
if (isatty(STDOUT_FILENO)) printf("\033[1;4;33m%s\033[0m\n", filename);
|
||||
else printf("%s\n", filename);
|
||||
}
|
||||
print_match(m, isatty(STDOUT_FILENO) ? "\033[0m" : NULL, (flags & BPEG_VERBOSE) != 0);
|
||||
freefile(input);
|
||||
destroy_file(&f);
|
||||
return 0;
|
||||
} else {
|
||||
freefile(input);
|
||||
destroy_file(&f);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -85,12 +79,11 @@ int main(int argc, char *argv[])
|
||||
grammar_t *g = new_grammar();
|
||||
|
||||
// Load builtins:
|
||||
int fd;
|
||||
if ((fd=open("/etc/xdg/bpeg/builtins.bpeg", O_RDONLY)) >= 0)
|
||||
load_grammar(g, readfile(fd)); // Keep in memory for debugging output
|
||||
if (access("/etc/xdg/bpeg/builtins.bpeg", R_OK) != -1)
|
||||
load_grammar(g, load_file("/etc/xdg/bpeg/builtins.bpeg")); // Keep in memory for debugging output
|
||||
sprintf(path, "%s/.config/bpeg/builtins.bpeg", getenv("HOME"));
|
||||
if ((fd=open(path, O_RDONLY)) >= 0)
|
||||
load_grammar(g, readfile(fd)); // Keep in memory for debugging output
|
||||
if (access(path, R_OK) != -1)
|
||||
load_grammar(g, load_file(path)); // Keep in memory for debugging output
|
||||
|
||||
int i, npatterns = 0;
|
||||
check(argc > 1, "%s", usage);
|
||||
@ -106,63 +99,58 @@ int main(int argc, char *argv[])
|
||||
} else if (streq(argv[i], "--ignore-case") || streq(argv[i], "-i")) {
|
||||
flags |= BPEG_IGNORECASE;
|
||||
} else if (FLAG("--replace") || FLAG("-r")) {
|
||||
vm_op_t *p = bpeg_replacement(bpeg_pattern("pattern"), flag);
|
||||
vm_op_t *p = bpeg_replacement(bpeg_pattern(NULL, "pattern"), flag);
|
||||
check(p, "Replacement failed to compile");
|
||||
add_def(g, flag, "replacement", p);
|
||||
add_def(g, NULL, flag, "replacement", p);
|
||||
rule = "replace-all";
|
||||
} else if (FLAG("--grammar") || FLAG("-g")) {
|
||||
int fd;
|
||||
if (streq(flag, "-")) {
|
||||
fd = STDIN_FILENO;
|
||||
} else {
|
||||
fd = open(flag, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
sprintf(path, "%s/.config/bpeg/%s.bpeg", getenv("HOME"), flag);
|
||||
fd = open(path, O_RDONLY);
|
||||
}
|
||||
if (fd < 0) {
|
||||
sprintf(path, "/etc/xdg/bpeg/%s.bpeg", flag);
|
||||
fd = open(path, O_RDONLY);
|
||||
}
|
||||
check(fd >= 0, "Couldn't find grammar: %s", flag);
|
||||
file_t *f = load_file(flag);
|
||||
if (f == NULL) {
|
||||
sprintf(path, "%s/.config/bpeg/%s.bpeg", getenv("HOME"), flag);
|
||||
f = load_file(path);
|
||||
}
|
||||
load_grammar(g, readfile(fd)); // Keep in memory for debug output
|
||||
if (f == NULL) {
|
||||
sprintf(path, "/etc/xdg/bpeg/%s.bpeg", flag);
|
||||
f = load_file(path);
|
||||
}
|
||||
check(f != NULL, "Couldn't find grammar: %s", flag);
|
||||
load_grammar(g, f); // Keep in memory for debug output
|
||||
} else if (FLAG("--define") || FLAG("-d")) {
|
||||
char *def = flag;
|
||||
char *eq = strchr(def, '=');
|
||||
check(eq, "Rule definitions must include an '='\n\n%s", usage);
|
||||
*eq = '\0';
|
||||
char *src = ++eq;
|
||||
vm_op_t *pat = bpeg_pattern(src);
|
||||
vm_op_t *pat = bpeg_pattern(NULL, src);
|
||||
check(pat, "Failed to compile pattern");
|
||||
add_def(g, src, def, pat);
|
||||
add_def(g, NULL, src, def, pat);
|
||||
} else if (FLAG("--define-string") || FLAG("-D")) {
|
||||
char *def = flag;
|
||||
char *eq = strchr(def, '=');
|
||||
check(eq, "Rule definitions must include an '='\n\n%s", usage);
|
||||
*eq = '\0';
|
||||
char *src = ++eq;
|
||||
vm_op_t *pat = bpeg_stringpattern(src);
|
||||
vm_op_t *pat = bpeg_stringpattern(NULL, src);
|
||||
check(pat, "Failed to compile pattern");
|
||||
add_def(g, src, def, pat);
|
||||
add_def(g, NULL, src, def, pat);
|
||||
} else if (FLAG("--pattern") || FLAG("-p")) {
|
||||
check(npatterns == 0, "Cannot define multiple patterns");
|
||||
vm_op_t *p = bpeg_pattern(flag);
|
||||
vm_op_t *p = bpeg_pattern(NULL, flag);
|
||||
check(p, "Pattern failed to compile: '%s'", flag);
|
||||
add_def(g, flag, "pattern", p);
|
||||
add_def(g, NULL, flag, "pattern", p);
|
||||
++npatterns;
|
||||
} else if (FLAG("--pattern-string") || FLAG("-P")) {
|
||||
vm_op_t *p = bpeg_stringpattern(flag);
|
||||
vm_op_t *p = bpeg_stringpattern(NULL, flag);
|
||||
check(p, "Pattern failed to compile");
|
||||
add_def(g, flag, "pattern", p);
|
||||
add_def(g, NULL, flag, "pattern", p);
|
||||
++npatterns;
|
||||
} else if (FLAG("--mode") || FLAG("-m")) {
|
||||
rule = flag;
|
||||
} else if (argv[i][0] != '-') {
|
||||
if (npatterns > 0) break;
|
||||
vm_op_t *p = bpeg_stringpattern(argv[i]);
|
||||
vm_op_t *p = bpeg_stringpattern(NULL, argv[i]);
|
||||
check(p, "Pattern failed to compile");
|
||||
add_def(g, argv[i], "pattern", p);
|
||||
add_def(g, NULL, argv[i], "pattern", p);
|
||||
++npatterns;
|
||||
} else {
|
||||
printf("Unrecognized flag: %s\n\n%s\n", argv[i], usage);
|
||||
|
54
compiler.c
54
compiler.c
@ -5,8 +5,8 @@
|
||||
#include "compiler.h"
|
||||
#include "utils.h"
|
||||
|
||||
static vm_op_t *expand_chain(vm_op_t *first);
|
||||
static vm_op_t *expand_choices(vm_op_t *first);
|
||||
static vm_op_t *expand_chain(file_t *f, vm_op_t *first);
|
||||
static vm_op_t *expand_choices(file_t *f, vm_op_t *first);
|
||||
static vm_op_t *chain_together(vm_op_t *first, vm_op_t *second);
|
||||
static void set_range(vm_op_t *op, ssize_t min, ssize_t max, vm_op_t *pat, vm_op_t *sep);
|
||||
|
||||
@ -31,11 +31,11 @@ static void set_range(vm_op_t *op, ssize_t min, ssize_t max, vm_op_t *pat, vm_op
|
||||
* followed by any patterns (e.g. "`x `y"), otherwise return
|
||||
* the original input.
|
||||
*/
|
||||
static vm_op_t *expand_chain(vm_op_t *first)
|
||||
static vm_op_t *expand_chain(file_t *f, vm_op_t *first)
|
||||
{
|
||||
vm_op_t *second = bpeg_simplepattern(first->end);
|
||||
vm_op_t *second = bpeg_simplepattern(f, first->end);
|
||||
if (second == NULL) return first;
|
||||
second = expand_chain(second);
|
||||
second = expand_chain(f, second);
|
||||
check(second->end > first->end, "No forward progress in chain!");
|
||||
return chain_together(first, second);
|
||||
}
|
||||
@ -45,14 +45,14 @@ static vm_op_t *expand_chain(vm_op_t *first)
|
||||
* followed by any "/"-separated patterns (e.g. "`x/`y"), otherwise
|
||||
* return the original input.
|
||||
*/
|
||||
static vm_op_t *expand_choices(vm_op_t *first)
|
||||
static vm_op_t *expand_choices(file_t *f, vm_op_t *first)
|
||||
{
|
||||
first = expand_chain(first);
|
||||
first = expand_chain(f, first);
|
||||
const char *str = first->end;
|
||||
if (!matchchar(&str, '/')) return first;
|
||||
vm_op_t *second = bpeg_simplepattern(str);
|
||||
vm_op_t *second = bpeg_simplepattern(f, str);
|
||||
check(second, "Expected pattern after '/'");
|
||||
second = expand_choices(second);
|
||||
second = expand_choices(f, second);
|
||||
vm_op_t *choice = calloc(sizeof(vm_op_t), 1);
|
||||
choice->op = VM_OTHERWISE;
|
||||
choice->start = first->start;
|
||||
@ -84,7 +84,7 @@ static vm_op_t *chain_together(vm_op_t *first, vm_op_t *second)
|
||||
/*
|
||||
* Compile a string of BPEG code into virtual machine opcodes
|
||||
*/
|
||||
vm_op_t *bpeg_simplepattern(const char *str)
|
||||
vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
|
||||
{
|
||||
if (!*str) return NULL;
|
||||
str = after_spaces(str);
|
||||
@ -100,7 +100,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
if (matchchar(&str, '.')) { // ".."
|
||||
if (matchchar(&str, '.')) // "..."
|
||||
op->multiline = 1;
|
||||
vm_op_t *till = bpeg_simplepattern(str);
|
||||
vm_op_t *till = bpeg_simplepattern(f, str);
|
||||
op->op = VM_UPTO_AND;
|
||||
op->len = -1;
|
||||
op->args.pat = till;
|
||||
@ -177,7 +177,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
}
|
||||
// Not <pat>
|
||||
case '!': {
|
||||
vm_op_t *p = bpeg_simplepattern(str);
|
||||
vm_op_t *p = bpeg_simplepattern(f, str);
|
||||
check(p, "Expected pattern after '!'\n");
|
||||
str = p->end;
|
||||
op->op = VM_NOT;
|
||||
@ -202,13 +202,13 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
} else {
|
||||
min = n1, max = n1;
|
||||
}
|
||||
vm_op_t *pat = bpeg_simplepattern(str);
|
||||
vm_op_t *pat = bpeg_simplepattern(f, str);
|
||||
check(pat, "Expected pattern after repetition count");
|
||||
str = pat->end;
|
||||
str = after_spaces(str);
|
||||
vm_op_t *sep = NULL;
|
||||
if (matchchar(&str, '%')) {
|
||||
sep = bpeg_simplepattern(str);
|
||||
sep = bpeg_simplepattern(f, str);
|
||||
check(sep, "Expected pattern for separator after '%%'");
|
||||
str = sep->end;
|
||||
} else {
|
||||
@ -219,7 +219,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
}
|
||||
// Lookbehind
|
||||
case '<': {
|
||||
vm_op_t *pat = bpeg_simplepattern(str);
|
||||
vm_op_t *pat = bpeg_simplepattern(f, str);
|
||||
check(pat, "Expected pattern after <");
|
||||
str = pat->end;
|
||||
check(pat->len != -1, "Lookbehind patterns must have a fixed length");
|
||||
@ -231,7 +231,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
}
|
||||
// Lookahead
|
||||
case '>': {
|
||||
vm_op_t *pat = bpeg_simplepattern(str);
|
||||
vm_op_t *pat = bpeg_simplepattern(f, str);
|
||||
check(pat, "Expected pattern after >");
|
||||
str = pat->end;
|
||||
op->op = VM_BEFORE;
|
||||
@ -242,9 +242,9 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
// Parentheses
|
||||
case '(': {
|
||||
free(op);
|
||||
op = bpeg_simplepattern(str);
|
||||
op = bpeg_simplepattern(f, str);
|
||||
check(op, "Expected pattern inside parentheses");
|
||||
op = expand_choices(op);
|
||||
op = expand_choices(f, op);
|
||||
str = op->end;
|
||||
str = after_spaces(str);
|
||||
check(matchchar(&str, ')'), "Expected closing ')' instead of \"%s\"", str);
|
||||
@ -261,7 +261,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
str = closing;
|
||||
check(matchchar(&str, ']'), "Expected closing ']'");
|
||||
}
|
||||
vm_op_t *pat = bpeg_simplepattern(str);
|
||||
vm_op_t *pat = bpeg_simplepattern(f, str);
|
||||
check(pat, "Expected pattern after @");
|
||||
str = pat->end;
|
||||
op->args.capture.capture_pat = pat;
|
||||
@ -275,9 +275,9 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
if (strncmp(str, "=>", 2) == 0) {
|
||||
str += strlen("=>");
|
||||
} else {
|
||||
pat = bpeg_simplepattern(str);
|
||||
pat = bpeg_simplepattern(f, str);
|
||||
check(pat, "Invalid pattern after '{'");
|
||||
pat = expand_choices(pat);
|
||||
pat = expand_choices(f, pat);
|
||||
str = pat->end;
|
||||
str = after_spaces(str);
|
||||
check(matchchar(&str, '=') && matchchar(&str, '>'),
|
||||
@ -360,7 +360,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
if (strncmp(after_spaces(str), "==", 2) == 0) {
|
||||
str = after_spaces(str)+2;
|
||||
vm_op_t *first = op;
|
||||
vm_op_t *second = bpeg_simplepattern(str);
|
||||
vm_op_t *second = bpeg_simplepattern(f, str);
|
||||
check(second, "Expected pattern after '=='");
|
||||
check(first->len == -1 || second->len == -1 || first->len == second->len,
|
||||
"Two patterns cannot possibly match the same (different lengths: %ld != %ld)",
|
||||
@ -382,7 +382,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
/*
|
||||
* Similar to bpeg_simplepattern, except that the pattern begins with an implicit, unclosable quote.
|
||||
*/
|
||||
vm_op_t *bpeg_stringpattern(const char *str)
|
||||
vm_op_t *bpeg_stringpattern(file_t *f, const char *str)
|
||||
{
|
||||
vm_op_t *ret = NULL;
|
||||
while (*str) {
|
||||
@ -395,7 +395,7 @@ vm_op_t *bpeg_stringpattern(const char *str)
|
||||
for (; *str; str++) {
|
||||
if (*str == '\\') {
|
||||
check(str[1], "Expected more string contents after backslash");
|
||||
interp = bpeg_simplepattern(str + 1);
|
||||
interp = bpeg_simplepattern(f, str + 1);
|
||||
check(interp != NULL, "No valid BPEG pattern detected after backslash");
|
||||
break;
|
||||
}
|
||||
@ -448,10 +448,10 @@ vm_op_t *bpeg_replacement(vm_op_t *pat, const char *replacement)
|
||||
return op;
|
||||
}
|
||||
|
||||
vm_op_t *bpeg_pattern(const char *str)
|
||||
vm_op_t *bpeg_pattern(file_t *f, const char *str)
|
||||
{
|
||||
vm_op_t *op = bpeg_simplepattern(str);
|
||||
if (op != NULL) op = expand_choices(op);
|
||||
vm_op_t *op = bpeg_simplepattern(f, str);
|
||||
if (op != NULL) op = expand_choices(f, op);
|
||||
return op;
|
||||
}
|
||||
|
||||
|
@ -7,11 +7,12 @@
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "types.h"
|
||||
#include "file_loader.h"
|
||||
|
||||
vm_op_t *bpeg_simplepattern(const char *str);
|
||||
vm_op_t *bpeg_stringpattern(const char *str);
|
||||
vm_op_t *bpeg_simplepattern(file_t *f, const char *str);
|
||||
vm_op_t *bpeg_stringpattern(file_t *f, const char *str);
|
||||
vm_op_t *bpeg_replacement(vm_op_t *pat, const char *replacement);
|
||||
vm_op_t *bpeg_pattern(const char *str);
|
||||
vm_op_t *bpeg_pattern(file_t *f, const char *str);
|
||||
|
||||
#endif
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
||||
|
102
file_loader.c
Normal file
102
file_loader.c
Normal file
@ -0,0 +1,102 @@
|
||||
/*
|
||||
* file_loader.c - Implementation of some file loading functionality.
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include <fcntl.h>
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "file_loader.h"
|
||||
|
||||
/*
|
||||
* Read an entire file into memory.
|
||||
*/
|
||||
file_t *load_file(const char *filename)
|
||||
{
|
||||
if (filename == NULL) filename = "-";
|
||||
int fd = strcmp(filename, "-") != 0 ? open(filename, O_RDONLY) : STDIN_FILENO;
|
||||
if (fd < 0) return NULL;
|
||||
file_t *f = calloc(sizeof(file_t), 1);
|
||||
f->filename = strdup(filename);
|
||||
// TODO: use mmap when possible
|
||||
f->mmapped = 0;
|
||||
size_t capacity = 1000;
|
||||
f->length = 0;
|
||||
f->contents = calloc(sizeof(char), capacity+1);
|
||||
ssize_t just_read;
|
||||
while ((just_read=read(fd, &f->contents[f->length], capacity - f->length)) > 0) {
|
||||
f->length += (size_t)just_read;
|
||||
if (f->length >= capacity)
|
||||
f->contents = realloc(f->contents, sizeof(char)*(capacity *= 2) + 1);
|
||||
}
|
||||
f->contents[f->length] = '\0';
|
||||
close(fd);
|
||||
|
||||
// Calculate line numbers:
|
||||
size_t linecap = 10;
|
||||
f->lines = calloc(sizeof(const char*), linecap);
|
||||
f->nlines = 1;
|
||||
char *p = f->contents;
|
||||
for (size_t n = 0; p && *p; ++n) {
|
||||
if (n >= linecap)
|
||||
f->lines = realloc(f->lines, sizeof(const char*)*(linecap *= 2));
|
||||
f->lines[n] = p;
|
||||
p = strchr(p, '\n');
|
||||
if (p) ++p;
|
||||
}
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
void destroy_file(file_t **f)
|
||||
{
|
||||
if ((*f)->filename) {
|
||||
free((char*)(*f)->filename);
|
||||
(*f)->filename = NULL;
|
||||
}
|
||||
if ((*f)->lines) {
|
||||
free((*f)->lines);
|
||||
(*f)->lines = NULL;
|
||||
}
|
||||
if ((*f)->contents) {
|
||||
free((*f)->contents);
|
||||
(*f)->contents = NULL;
|
||||
}
|
||||
free(*f);
|
||||
*f = NULL;
|
||||
}
|
||||
|
||||
size_t get_line_number(file_t *f, const char *p)
|
||||
{
|
||||
// TODO: binary search
|
||||
for (size_t n = 1; n < f->nlines; n++) {
|
||||
if (f->lines[n] > p)
|
||||
return n;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char *get_line(file_t *f, size_t line_number)
|
||||
{
|
||||
if (line_number == 0 || line_number > f->nlines) return NULL;
|
||||
return f->lines[line_number - 1];
|
||||
}
|
||||
|
||||
void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *msg)
|
||||
{
|
||||
size_t linenum = get_line_number(f, start);
|
||||
const char *line = get_line(f, linenum);
|
||||
size_t charnum = 1 + (size_t)(start - line);
|
||||
fprintf(dest, "\033[1m%s:%ld:%ld:\033[0m %s\n",
|
||||
f->filename, linenum, charnum, msg);
|
||||
const char *eol = linenum == f->nlines ? strchr(line, '\0') : strchr(line, '\n');
|
||||
if (end == NULL || end > eol) end = eol;
|
||||
fprintf(dest, "\033[2m% 5ld |\033[0m %.*s\033[31;4;1m%.*s\033[0m%.*s\n",
|
||||
linenum,
|
||||
(int)charnum - 1, line,
|
||||
(int)(end - &line[charnum-1]), &line[charnum-1],
|
||||
(int)(eol - end), end);
|
||||
}
|
22
file_loader.h
Normal file
22
file_loader.h
Normal file
@ -0,0 +1,22 @@
|
||||
/*
|
||||
* file_loader.h - Definitions of an API for loading files.
|
||||
*/
|
||||
#ifndef FILE_LOADER__H
|
||||
#define FILE_LOADER__H
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
typedef struct {
|
||||
const char *filename;
|
||||
char *contents, **lines;
|
||||
size_t length, nlines;
|
||||
unsigned int mmapped:1;
|
||||
} file_t;
|
||||
|
||||
file_t *load_file(const char *filename);
|
||||
void destroy_file(file_t **f);
|
||||
size_t get_line_number(file_t *f, const char *p);
|
||||
const char *get_line(file_t *f, size_t line_number);
|
||||
void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *msg);
|
||||
|
||||
#endif
|
11
grammar.c
11
grammar.c
@ -4,6 +4,7 @@
|
||||
|
||||
#include "grammar.h"
|
||||
#include "compiler.h"
|
||||
#include "file_loader.h"
|
||||
#include "utils.h"
|
||||
|
||||
grammar_t *new_grammar(void)
|
||||
@ -13,12 +14,13 @@ grammar_t *new_grammar(void)
|
||||
return g;
|
||||
}
|
||||
|
||||
void add_def(grammar_t *g, const char *src, const char *name, vm_op_t *op)
|
||||
void add_def(grammar_t *g, file_t *f, const char *src, const char *name, vm_op_t *op)
|
||||
{
|
||||
if (g->defcount >= g->defcapacity) {
|
||||
g->definitions = realloc(g->definitions, sizeof(&g->definitions[0])*(g->defcapacity += 32));
|
||||
}
|
||||
int i = g->defcount;
|
||||
g->definitions[i].file = f;
|
||||
g->definitions[i].source = src;
|
||||
g->definitions[i].name = name;
|
||||
g->definitions[i].op = op;
|
||||
@ -29,9 +31,10 @@ void add_def(grammar_t *g, const char *src, const char *name, vm_op_t *op)
|
||||
* Load the given grammar (semicolon-separated definitions)
|
||||
* and return the first rule defined.
|
||||
*/
|
||||
vm_op_t *load_grammar(grammar_t *g, const char *src)
|
||||
vm_op_t *load_grammar(grammar_t *g, file_t *f)
|
||||
{
|
||||
vm_op_t *ret = NULL;
|
||||
const char *src = f->contents;
|
||||
do {
|
||||
src = after_spaces(src);
|
||||
if (!*src) break;
|
||||
@ -41,9 +44,9 @@ vm_op_t *load_grammar(grammar_t *g, const char *src)
|
||||
name = strndup(name, (size_t)(name_end-name));
|
||||
src = after_spaces(name_end);
|
||||
check(matchchar(&src, '='), "Expected '=' in definition");
|
||||
vm_op_t *op = bpeg_pattern(src);
|
||||
vm_op_t *op = bpeg_pattern(f, src);
|
||||
check(op, "Couldn't load definition");
|
||||
add_def(g, src, name, op);
|
||||
add_def(g, f, src, name, op);
|
||||
if (ret == NULL) {
|
||||
ret = op;
|
||||
}
|
||||
|
@ -7,13 +7,14 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "file_loader.h"
|
||||
#include "types.h"
|
||||
|
||||
grammar_t *new_grammar(void);
|
||||
void add_def(grammar_t *g, const char *src, const char *name, vm_op_t *op);
|
||||
void add_def(grammar_t *g, file_t *f, const char *src, const char *name, vm_op_t *op);
|
||||
void push_backref(grammar_t *g, const char *name, match_t *capture);
|
||||
void pop_backrefs(grammar_t *g, size_t count);
|
||||
vm_op_t *load_grammar(grammar_t *g, const char *source);
|
||||
vm_op_t *load_grammar(grammar_t *g, file_t *f);
|
||||
vm_op_t *lookup(grammar_t *g, const char *name);
|
||||
|
||||
#endif
|
||||
|
3
types.h
3
types.h
@ -6,6 +6,8 @@
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "file_loader.h"
|
||||
|
||||
enum BPEGFlag {
|
||||
BPEG_VERBOSE = 1 << 0,
|
||||
BPEG_IGNORECASE = 1 << 1,
|
||||
@ -85,6 +87,7 @@ typedef struct match_s {
|
||||
typedef struct {
|
||||
const char *name;
|
||||
const char *source;
|
||||
file_t *file;
|
||||
vm_op_t *op;
|
||||
} def_t;
|
||||
|
||||
|
23
utils.c
23
utils.c
@ -153,28 +153,5 @@ size_t unescape_string(char *dest, const char *src, size_t bufsize)
|
||||
#undef PUT
|
||||
}
|
||||
|
||||
/*
|
||||
* Read an entire file into memory. (Guaranteeing that ret[-1] == '\0')
|
||||
*/
|
||||
char *readfile(int fd)
|
||||
{
|
||||
size_t capacity = 1000, len = 0;
|
||||
char *buf = calloc(sizeof(char), capacity+1);
|
||||
buf[len++] = '\0';
|
||||
ssize_t just_read;
|
||||
while ((just_read=read(fd, &buf[len], capacity-len)) > 0) {
|
||||
len += (size_t)just_read;
|
||||
if (len >= capacity)
|
||||
buf = realloc(buf, sizeof(char)*(capacity *= 2));
|
||||
}
|
||||
buf[len] = '\0';
|
||||
close(fd);
|
||||
return &buf[1];
|
||||
}
|
||||
|
||||
void freefile(char *f)
|
||||
{
|
||||
free(&f[-1]);
|
||||
}
|
||||
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
||||
|
2
utils.h
2
utils.h
@ -17,8 +17,6 @@
|
||||
#define check(cond, ...) do { if (!(cond)) { fprintf(stderr, __VA_ARGS__); fwrite("\n", 1, 1, stderr); _exit(1); } } while(0)
|
||||
#define debug(...) do { if (verbose) fprintf(stderr, __VA_ARGS__); } while(0)
|
||||
|
||||
char *readfile(int fd);
|
||||
void freefile(char *f);
|
||||
char unescapechar(const char *escaped, const char **end);
|
||||
const char *after_name(const char *str);
|
||||
const char *after_spaces(const char *str);
|
||||
|
50
vm.c
50
vm.c
@ -77,7 +77,7 @@ typedef struct recursive_ref_s {
|
||||
* a match struct, or NULL if no match is found.
|
||||
* The returned value should be free()'d to avoid memory leaking.
|
||||
*/
|
||||
static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int flags, recursive_ref_t *rec)
|
||||
static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags, recursive_ref_t *rec)
|
||||
{
|
||||
switch (op->op) {
|
||||
case VM_EMPTY: {
|
||||
@ -116,7 +116,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
|
||||
return m;
|
||||
}
|
||||
case VM_NOT: {
|
||||
match_t *m = _match(g, str, op->args.pat, flags, rec);
|
||||
match_t *m = _match(g, f, str, op->args.pat, flags, rec);
|
||||
if (m != NULL) {
|
||||
destroy_match(&m);
|
||||
return NULL;
|
||||
@ -134,7 +134,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
|
||||
if (op->args.pat) {
|
||||
for (const char *prev = NULL; prev < str; ) {
|
||||
prev = str;
|
||||
match_t *p = _match(g, str, op->args.pat, flags, rec);
|
||||
match_t *p = _match(g, f, str, op->args.pat, flags, rec);
|
||||
if (p) {
|
||||
m->child = p;
|
||||
m->end = p->end;
|
||||
@ -168,11 +168,11 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
|
||||
// Separator
|
||||
match_t *sep = NULL;
|
||||
if (op->args.repetitions.sep != NULL && reps > 0) {
|
||||
sep = _match(g, str, op->args.repetitions.sep, flags, rec);
|
||||
sep = _match(g, f, str, op->args.repetitions.sep, flags, rec);
|
||||
if (sep == NULL) break;
|
||||
str = sep->end;
|
||||
}
|
||||
match_t *p = _match(g, str, op->args.repetitions.repeat_pat, flags, rec);
|
||||
match_t *p = _match(g, f, str, op->args.repetitions.repeat_pat, flags, rec);
|
||||
if (p == NULL || (p->end == prev && reps > 0)) { // Prevent infinite loops
|
||||
destroy_match(&sep);
|
||||
destroy_match(&p);
|
||||
@ -204,11 +204,8 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
|
||||
case VM_AFTER: {
|
||||
ssize_t backtrack = op->args.pat->len;
|
||||
check(backtrack != -1, "'<' is only allowed for fixed-length operations");
|
||||
// Check for necessary space:
|
||||
for (int i = 0; i < backtrack; i++) {
|
||||
if (str[-i] == '\0') return NULL;
|
||||
}
|
||||
match_t *before = _match(g, str - backtrack, op->args.pat, flags, rec);
|
||||
if (str - backtrack < f->contents) return NULL;
|
||||
match_t *before = _match(g, f, str - backtrack, op->args.pat, flags, rec);
|
||||
if (before == NULL) return NULL;
|
||||
match_t *m = calloc(sizeof(match_t), 1);
|
||||
m->start = str;
|
||||
@ -218,7 +215,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
|
||||
return m;
|
||||
}
|
||||
case VM_BEFORE: {
|
||||
match_t *after = _match(g, str, op->args.pat, flags, rec);
|
||||
match_t *after = _match(g, f, str, op->args.pat, flags, rec);
|
||||
if (after == NULL) return NULL;
|
||||
match_t *m = calloc(sizeof(match_t), 1);
|
||||
m->start = str;
|
||||
@ -228,7 +225,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
|
||||
return m;
|
||||
}
|
||||
case VM_CAPTURE: {
|
||||
match_t *p = _match(g, str, op->args.pat, flags, rec);
|
||||
match_t *p = _match(g, f, str, op->args.pat, flags, rec);
|
||||
if (p == NULL) return NULL;
|
||||
match_t *m = calloc(sizeof(match_t), 1);
|
||||
m->start = str;
|
||||
@ -241,16 +238,16 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
|
||||
return m;
|
||||
}
|
||||
case VM_OTHERWISE: {
|
||||
match_t *m = _match(g, str, op->args.multiple.first, flags, rec);
|
||||
if (m == NULL) m = _match(g, str, op->args.multiple.second, flags, rec);
|
||||
match_t *m = _match(g, f, str, op->args.multiple.first, flags, rec);
|
||||
if (m == NULL) m = _match(g, f, str, op->args.multiple.second, flags, rec);
|
||||
return m;
|
||||
}
|
||||
case VM_CHAIN: {
|
||||
match_t *m1 = _match(g, str, op->args.multiple.first, flags, rec);
|
||||
match_t *m1 = _match(g, f, str, op->args.multiple.first, flags, rec);
|
||||
if (m1 == NULL) return NULL;
|
||||
|
||||
size_t nbackrefs = push_backrefs(g, m1);
|
||||
match_t *m2 = _match(g, m1->end, op->args.multiple.second, flags, rec);
|
||||
match_t *m2 = _match(g, f, m1->end, op->args.multiple.second, flags, rec);
|
||||
pop_backrefs(g, nbackrefs);
|
||||
if (m2 == NULL) {
|
||||
destroy_match(&m1);
|
||||
@ -265,11 +262,11 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
|
||||
return m;
|
||||
}
|
||||
case VM_EQUAL: {
|
||||
match_t *m1 = _match(g, str, op->args.multiple.first, flags, rec);
|
||||
match_t *m1 = _match(g, f, str, op->args.multiple.first, flags, rec);
|
||||
if (m1 == NULL) return NULL;
|
||||
|
||||
// <p1>==<p2> matches iff both have the same start and end point:
|
||||
match_t *m2 = _match(g, str, op->args.multiple.second, flags, rec);
|
||||
match_t *m2 = _match(g, f, str, op->args.multiple.second, flags, rec);
|
||||
if (m2 == NULL || m2->end != m1->end) {
|
||||
destroy_match(&m1);
|
||||
destroy_match(&m2);
|
||||
@ -288,7 +285,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
|
||||
m->start = str;
|
||||
m->op = op;
|
||||
if (op->args.replace.replace_pat) {
|
||||
match_t *p = _match(g, str, op->args.replace.replace_pat, flags, rec);
|
||||
match_t *p = _match(g, f, str, op->args.replace.replace_pat, flags, rec);
|
||||
if (p == NULL) return NULL;
|
||||
m->child = p;
|
||||
m->end = p->end;
|
||||
@ -320,7 +317,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
|
||||
};
|
||||
match_t *best = NULL;
|
||||
left_recursive:;
|
||||
match_t *p = _match(g, str, r, flags, &wrap);
|
||||
match_t *p = _match(g, f, str, r, flags, &wrap);
|
||||
if (p == NULL) return best;
|
||||
if (wrap.hit && (best == NULL || p->end > best->end)) {
|
||||
best = p;
|
||||
@ -343,7 +340,10 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
|
||||
return match_backref(str, op, (match_t*)op->args.backref, flags);
|
||||
}
|
||||
case VM_NODENT: {
|
||||
if (str[-1] == '\0') { // First line
|
||||
size_t linenum = get_line_number(f, str);
|
||||
if (linenum == 1) { // First line
|
||||
if (str > f->contents)
|
||||
return NULL;
|
||||
match_t *m = calloc(sizeof(match_t), 1);
|
||||
m->start = str;
|
||||
m->end = str;
|
||||
@ -352,9 +352,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
|
||||
} else if (str[-1] != '\n') {
|
||||
return NULL; // Not at beginning of line
|
||||
}
|
||||
const char *p = &str[-1];
|
||||
while (*p == '\n') --p; // Skip blank lines
|
||||
while (p[-1] && p[-1] != '\n') --p; // Backtrack to start of last (nonblank) line
|
||||
const char *p = get_line(f, linenum - 1);
|
||||
// Count indentation:
|
||||
char denter = *p;
|
||||
int dents = 0;
|
||||
@ -686,9 +684,9 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsign
|
||||
return ret;
|
||||
}
|
||||
|
||||
match_t *match(grammar_t *g, const char *str, vm_op_t *op, unsigned int flags)
|
||||
match_t *match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags)
|
||||
{
|
||||
return _match(g, str, op, flags, NULL);
|
||||
return _match(g, f, str, op, flags, NULL);
|
||||
}
|
||||
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
||||
|
2
vm.h
2
vm.h
@ -12,7 +12,7 @@
|
||||
#include "types.h"
|
||||
|
||||
const char *opcode_name(enum VMOpcode o);
|
||||
match_t *match(grammar_t *g, const char *str, vm_op_t *op, unsigned int flags);
|
||||
match_t *match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags);
|
||||
void destroy_match(match_t **m);
|
||||
void print_pattern(vm_op_t *op);
|
||||
void print_match(match_t *m, const char *color, int verbose);
|
||||
|
Loading…
Reference in New Issue
Block a user