aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile2
-rw-r--r--bpeg.c32
-rw-r--r--bpeg.h61
-rw-r--r--vm.h50
4 files changed, 85 insertions, 60 deletions
diff --git a/Makefile b/Makefile
index e1ef268..5e50c9c 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@ all: bpeg
clean:
rm -f bpeg
-bpeg: bpeg.c bpeg.h vm.h utils.h
+bpeg: bpeg.c bpeg.h utils.h
cc $(CFLAGS) $(OFLAGS) $< -o $@
.PHONY: all clean
diff --git a/bpeg.c b/bpeg.c
index d09dba3..31c2016 100644
--- a/bpeg.c
+++ b/bpeg.c
@@ -219,7 +219,7 @@ static match_t *match(const char *str, vm_op_t *op)
m->end = m->start;
}
// TODO: handle captures
- m->replacement = op->args.replace.replacement;
+ m->capture.replacement = op->args.replace.replacement;
return m;
}
case VM_REF: {
@@ -666,16 +666,19 @@ static vm_op_t *compile_bpeg(const char *str)
return op;
}
-static void load_def(const char *name, const char *def)
+static vm_op_t *load_def(const char *name, const char *def)
{
defs[ndefs].name = name;
- defs[ndefs].op = compile_bpeg(def);
+ vm_op_t *op = compile_bpeg(def);
+ defs[ndefs].op = op;
++ndefs;
+ return op;
}
static void load_defs(void)
{
- load_def("_", "` /\\t/\\n/\\r");
+ load_def("_", "*(` /\\t/\\n/\\r)");
+ load_def("__", "+(` /\\t/\\n/\\r)");
load_def("nl", "\\n");
load_def("crlf", "\\r\\n");
load_def("abc", "`a,z");
@@ -696,15 +699,28 @@ static void load_defs(void)
int main(int argc, char *argv[])
{
+ check(argc == 3, "Usage: bpeg <pat> <str>");
load_defs();
- char *lang = argc > 1 ? argv[1] : "'x''y'";
+ const char *lang = argc > 1 ? argv[1] : "'x''y'";
vm_op_t *op = compile_bpeg(lang);
check(op, "Failed to compile_bpeg input");
op = expand_choices(op);
- // TODO: check for semicolon and more rules
-
+ const char *defs = after_spaces(op->end);
+ while (*defs == ';') {
+ defs = after_spaces(++defs);
+ const char *name = defs;
+ check(isalpha(*name), "Definition must begin with a name");
+ while (isalpha(*defs)) ++defs;
+ name = strndup(name, (size_t)(defs-name));
+ defs = after_spaces(defs);
+ check(*defs == '=', "Expected '=' in definition");
+ ++defs;
+ vm_op_t *def = load_def(name, defs);
+ check(def, "Couldn't load definition");
+ defs = def->end;
+ }
char *str = argc > 2 ? argv[2] : "xyz";
@@ -725,3 +741,5 @@ int main(int argc, char *argv[])
return 0;
}
+
+//vim: ts=4
diff --git a/bpeg.h b/bpeg.h
index 8bc813a..a855e8a 100644
--- a/bpeg.h
+++ b/bpeg.h
@@ -8,18 +8,75 @@
#include <unistd.h>
#include "utils.h"
-#include "vm.h"
+/*
+ * Pattern matching result object
+ */
typedef struct match_s {
+ // Where the match starts and ends (end is after the last character)
const char *start, *end;
union {
unsigned int is_capture:1;
const char *name;
+ const char *replacement;
} capture;
- const char *replacement;
struct match_s *child, *nextsibling;
} match_t;
+/*
+ * BPEG virtual machine opcodes
+ */
+enum VMOpcode {
+ VM_EMPTY = 0,
+ VM_ANYCHAR = 1,
+ VM_STRING,
+ VM_RANGE,
+ VM_NOT,
+ VM_UPTO,
+ VM_UPTO_AND,
+ VM_REPEAT,
+ VM_BEFORE,
+ VM_AFTER,
+ VM_CAPTURE,
+ VM_OTHERWISE,
+ VM_CHAIN,
+ VM_REPLACE,
+ VM_REF,
+};
+
+/*
+ * A struct reperesenting a BPEG virtual machine operation
+ */
+typedef struct vm_op_s {
+ enum VMOpcode op;
+ const char *start, *end;
+ // Length of the match, if constant, otherwise -1
+ ssize_t len;
+ union {
+ const char *s;
+ struct {
+ char low, high;
+ } range;
+ struct {
+ ssize_t min, max;
+ struct vm_op_s *sep, *repeat_pat;
+ } repetitions;
+ struct {
+ struct vm_op_s *first, *second;
+ } multiple;
+ struct {
+ struct vm_op_s *replace_pat;
+ const char *replacement;
+ } replace;
+ struct {
+ struct vm_op_s *capture_pat;
+ char *name;
+ } capture;
+ struct vm_op_s *pat;
+ } args;
+} vm_op_t;
+
+
static match_t *free_match(match_t *m);
static match_t *match(const char *str, vm_op_t *op);
static void set_range(vm_op_t *op, ssize_t min, ssize_t max, vm_op_t *pat, vm_op_t *sep);
diff --git a/vm.h b/vm.h
deleted file mode 100644
index 2123c35..0000000
--- a/vm.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * vm.h - Source code for the BPEG virtual machine datatypes
- */
-
-enum VMOpcode {
- VM_EMPTY = 0,
- VM_ANYCHAR = 1,
- VM_STRING,
- VM_RANGE,
- VM_NOT,
- VM_UPTO,
- VM_UPTO_AND,
- VM_REPEAT,
- VM_BEFORE,
- VM_AFTER,
- VM_CAPTURE,
- VM_OTHERWISE,
- VM_CHAIN,
- VM_REPLACE,
- VM_REF,
-};
-
-typedef struct vm_op_s {
- enum VMOpcode op;
- const char *start, *end;
- ssize_t len;
- union {
- const char *s;
- struct {
- char low, high;
- } range;
- struct {
- ssize_t min, max;
- struct vm_op_s *sep, *repeat_pat;
- } repetitions;
- struct {
- struct vm_op_s *first, *second;
- } multiple;
- struct {
- struct vm_op_s *replace_pat;
- const char *replacement;
- } replace;
- struct {
- struct vm_op_s *capture_pat;
- char *name;
- } capture;
- struct vm_op_s *pat;
- } args;
-} vm_op_t;
-