Updates and cleanup
This commit is contained in:
parent
f7cdd6d4d2
commit
a76c333877
2
Makefile
2
Makefile
@ -7,7 +7,7 @@ all: bpeg
|
||||
clean:
|
||||
rm -f bpeg
|
||||
|
||||
bpeg: bpeg.c bpeg.h vm.h utils.h
|
||||
bpeg: bpeg.c bpeg.h utils.h
|
||||
cc $(CFLAGS) $(OFLAGS) $< -o $@
|
||||
|
||||
.PHONY: all clean
|
||||
|
32
bpeg.c
32
bpeg.c
@ -219,7 +219,7 @@ static match_t *match(const char *str, vm_op_t *op)
|
||||
m->end = m->start;
|
||||
}
|
||||
// TODO: handle captures
|
||||
m->replacement = op->args.replace.replacement;
|
||||
m->capture.replacement = op->args.replace.replacement;
|
||||
return m;
|
||||
}
|
||||
case VM_REF: {
|
||||
@ -666,16 +666,19 @@ static vm_op_t *compile_bpeg(const char *str)
|
||||
return op;
|
||||
}
|
||||
|
||||
static void load_def(const char *name, const char *def)
|
||||
static vm_op_t *load_def(const char *name, const char *def)
|
||||
{
|
||||
defs[ndefs].name = name;
|
||||
defs[ndefs].op = compile_bpeg(def);
|
||||
vm_op_t *op = compile_bpeg(def);
|
||||
defs[ndefs].op = op;
|
||||
++ndefs;
|
||||
return op;
|
||||
}
|
||||
|
||||
static void load_defs(void)
|
||||
{
|
||||
load_def("_", "` /\\t/\\n/\\r");
|
||||
load_def("_", "*(` /\\t/\\n/\\r)");
|
||||
load_def("__", "+(` /\\t/\\n/\\r)");
|
||||
load_def("nl", "\\n");
|
||||
load_def("crlf", "\\r\\n");
|
||||
load_def("abc", "`a,z");
|
||||
@ -696,15 +699,28 @@ static void load_defs(void)
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
check(argc == 3, "Usage: bpeg <pat> <str>");
|
||||
load_defs();
|
||||
|
||||
char *lang = argc > 1 ? argv[1] : "'x''y'";
|
||||
const char *lang = argc > 1 ? argv[1] : "'x''y'";
|
||||
vm_op_t *op = compile_bpeg(lang);
|
||||
check(op, "Failed to compile_bpeg input");
|
||||
op = expand_choices(op);
|
||||
|
||||
// TODO: check for semicolon and more rules
|
||||
|
||||
const char *defs = after_spaces(op->end);
|
||||
while (*defs == ';') {
|
||||
defs = after_spaces(++defs);
|
||||
const char *name = defs;
|
||||
check(isalpha(*name), "Definition must begin with a name");
|
||||
while (isalpha(*defs)) ++defs;
|
||||
name = strndup(name, (size_t)(defs-name));
|
||||
defs = after_spaces(defs);
|
||||
check(*defs == '=', "Expected '=' in definition");
|
||||
++defs;
|
||||
vm_op_t *def = load_def(name, defs);
|
||||
check(def, "Couldn't load definition");
|
||||
defs = def->end;
|
||||
}
|
||||
|
||||
char *str = argc > 2 ? argv[2] : "xyz";
|
||||
|
||||
@ -725,3 +741,5 @@ int main(int argc, char *argv[])
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
//vim: ts=4
|
||||
|
61
bpeg.h
61
bpeg.h
@ -8,18 +8,75 @@
|
||||
#include <unistd.h>
|
||||
|
||||
#include "utils.h"
|
||||
#include "vm.h"
|
||||
|
||||
/*
|
||||
* Pattern matching result object
|
||||
*/
|
||||
typedef struct match_s {
|
||||
// Where the match starts and ends (end is after the last character)
|
||||
const char *start, *end;
|
||||
union {
|
||||
unsigned int is_capture:1;
|
||||
const char *name;
|
||||
const char *replacement;
|
||||
} capture;
|
||||
const char *replacement;
|
||||
struct match_s *child, *nextsibling;
|
||||
} match_t;
|
||||
|
||||
/*
|
||||
* BPEG virtual machine opcodes
|
||||
*/
|
||||
enum VMOpcode {
|
||||
VM_EMPTY = 0,
|
||||
VM_ANYCHAR = 1,
|
||||
VM_STRING,
|
||||
VM_RANGE,
|
||||
VM_NOT,
|
||||
VM_UPTO,
|
||||
VM_UPTO_AND,
|
||||
VM_REPEAT,
|
||||
VM_BEFORE,
|
||||
VM_AFTER,
|
||||
VM_CAPTURE,
|
||||
VM_OTHERWISE,
|
||||
VM_CHAIN,
|
||||
VM_REPLACE,
|
||||
VM_REF,
|
||||
};
|
||||
|
||||
/*
|
||||
* A struct reperesenting a BPEG virtual machine operation
|
||||
*/
|
||||
typedef struct vm_op_s {
|
||||
enum VMOpcode op;
|
||||
const char *start, *end;
|
||||
// Length of the match, if constant, otherwise -1
|
||||
ssize_t len;
|
||||
union {
|
||||
const char *s;
|
||||
struct {
|
||||
char low, high;
|
||||
} range;
|
||||
struct {
|
||||
ssize_t min, max;
|
||||
struct vm_op_s *sep, *repeat_pat;
|
||||
} repetitions;
|
||||
struct {
|
||||
struct vm_op_s *first, *second;
|
||||
} multiple;
|
||||
struct {
|
||||
struct vm_op_s *replace_pat;
|
||||
const char *replacement;
|
||||
} replace;
|
||||
struct {
|
||||
struct vm_op_s *capture_pat;
|
||||
char *name;
|
||||
} capture;
|
||||
struct vm_op_s *pat;
|
||||
} args;
|
||||
} vm_op_t;
|
||||
|
||||
|
||||
static match_t *free_match(match_t *m);
|
||||
static match_t *match(const char *str, vm_op_t *op);
|
||||
static void set_range(vm_op_t *op, ssize_t min, ssize_t max, vm_op_t *pat, vm_op_t *sep);
|
||||
|
50
vm.h
50
vm.h
@ -1,50 +0,0 @@
|
||||
/*
|
||||
* vm.h - Source code for the BPEG virtual machine datatypes
|
||||
*/
|
||||
|
||||
enum VMOpcode {
|
||||
VM_EMPTY = 0,
|
||||
VM_ANYCHAR = 1,
|
||||
VM_STRING,
|
||||
VM_RANGE,
|
||||
VM_NOT,
|
||||
VM_UPTO,
|
||||
VM_UPTO_AND,
|
||||
VM_REPEAT,
|
||||
VM_BEFORE,
|
||||
VM_AFTER,
|
||||
VM_CAPTURE,
|
||||
VM_OTHERWISE,
|
||||
VM_CHAIN,
|
||||
VM_REPLACE,
|
||||
VM_REF,
|
||||
};
|
||||
|
||||
typedef struct vm_op_s {
|
||||
enum VMOpcode op;
|
||||
const char *start, *end;
|
||||
ssize_t len;
|
||||
union {
|
||||
const char *s;
|
||||
struct {
|
||||
char low, high;
|
||||
} range;
|
||||
struct {
|
||||
ssize_t min, max;
|
||||
struct vm_op_s *sep, *repeat_pat;
|
||||
} repetitions;
|
||||
struct {
|
||||
struct vm_op_s *first, *second;
|
||||
} multiple;
|
||||
struct {
|
||||
struct vm_op_s *replace_pat;
|
||||
const char *replacement;
|
||||
} replace;
|
||||
struct {
|
||||
struct vm_op_s *capture_pat;
|
||||
char *name;
|
||||
} capture;
|
||||
struct vm_op_s *pat;
|
||||
} args;
|
||||
} vm_op_t;
|
||||
|
Loading…
Reference in New Issue
Block a user