aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bpeg.c19
-rw-r--r--compiler.c51
-rw-r--r--file_loader.c22
-rw-r--r--file_loader.h3
-rw-r--r--grammar.c4
-rw-r--r--json.c2
-rw-r--r--types.h9
-rw-r--r--utils.c10
-rw-r--r--utils.h1
-rw-r--r--viz.c2
-rw-r--r--vm.c38
11 files changed, 96 insertions, 65 deletions
diff --git a/bpeg.c b/bpeg.c
index e517079..530857a 100644
--- a/bpeg.c
+++ b/bpeg.c
@@ -59,7 +59,7 @@ static char *getflag(const char *flag, char *argv[], int *i)
static int print_errors(file_t *f, match_t *m)
{
int ret = 0;
- if (m->op->op == VM_CAPTURE && m->value.name && streq(m->value.name, "!")) {
+ if (m->op->op == VM_CAPTURE && m->op->args.capture.name && streq(m->op->args.capture.name, "!")) {
printf("\033[31;1m");
print_match(f, m, print_options);
printf("\033[0m\n");
@@ -154,11 +154,12 @@ int main(int argc, char *argv[])
} else if (streq(argv[i], "--list-files")) {
flags |= BPEG_LISTFILES;
} else if (FLAG("--replace") || FLAG("-r")) {
+ file_t *pat_file = spoof_file("<pattern>", "pattern");
+ vm_op_t *patref = bpeg_pattern(pat_file, pat_file->contents);
file_t *replace_file = spoof_file("<replace argument>", flag);
- vm_op_t *patref = bpeg_pattern(replace_file, "pattern");
- vm_op_t *rep = bpeg_replacement(replace_file, patref, flag);
+ vm_op_t *rep = bpeg_replacement(replace_file, patref, replace_file->contents);
check(rep, "Replacement failed to compile: %s", flag);
- add_def(g, replace_file, flag, "replacement", rep);
+ add_def(g, replace_file, replace_file->contents, "replacement", rep);
rule = "replace-all";
} else if (FLAG("--grammar") || FLAG("-g")) {
file_t *f = load_file(flag);
@@ -179,7 +180,7 @@ int main(int argc, char *argv[])
*eq = '\0';
char *src = ++eq;
file_t *def_file = spoof_file(def, src);
- vm_op_t *pat = bpeg_pattern(def_file, src);
+ vm_op_t *pat = bpeg_pattern(def_file, def_file->contents);
check(pat, "Failed to compile pattern: %s", flag);
add_def(g, def_file, src, def, pat);
} else if (FLAG("--define-string") || FLAG("-D")) {
@@ -189,19 +190,19 @@ int main(int argc, char *argv[])
*eq = '\0';
char *src = ++eq;
file_t *def_file = spoof_file(def, flag);
- vm_op_t *pat = bpeg_stringpattern(def_file, src);
+ vm_op_t *pat = bpeg_stringpattern(def_file, def_file->contents);
check(pat, "Failed to compile pattern: %s", flag);
add_def(g, def_file, src, def, pat);
} else if (FLAG("--pattern") || FLAG("-p")) {
check(npatterns == 0, "Cannot define multiple patterns");
file_t *arg_file = spoof_file("<pattern argument>", flag);
- vm_op_t *p = bpeg_pattern(arg_file, flag);
+ vm_op_t *p = bpeg_pattern(arg_file, arg_file->contents);
check(p, "Pattern failed to compile: %s", flag);
add_def(g, arg_file, flag, "pattern", p);
++npatterns;
} else if (FLAG("--pattern-string") || FLAG("-P")) {
file_t *arg_file = spoof_file("<pattern argument>", flag);
- vm_op_t *p = bpeg_stringpattern(arg_file, flag);
+ vm_op_t *p = bpeg_stringpattern(arg_file, arg_file->contents);
check(p, "Pattern failed to compile: %s", flag);
add_def(g, arg_file, flag, "pattern", p);
++npatterns;
@@ -224,7 +225,7 @@ int main(int argc, char *argv[])
} else if (argv[i][0] != '-') {
if (npatterns > 0) break;
file_t *arg_file = spoof_file("<pattern argument>", argv[i]);
- vm_op_t *p = bpeg_stringpattern(arg_file, argv[i]);
+ vm_op_t *p = bpeg_stringpattern(arg_file, arg_file->contents);
check(p, "Pattern failed to compile: %s", argv[i]);
add_def(g, arg_file, argv[i], "pattern", p);
++npatterns;
diff --git a/compiler.c b/compiler.c
index 34519b5..fb90bc9 100644
--- a/compiler.c
+++ b/compiler.c
@@ -142,24 +142,26 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
}
// Char literals
case '`': {
- char literal[2] = {*str, '\0'};
+ char c = *str;
++str;
- if (!literal[0] || literal[0] == '\n')
+ if (!c || c == '\n')
file_err(f, str, str, "There should be a character here after the '`'");
op->len = 1;
if (matchchar(&str, '-')) { // Range
char c2 = *str;
if (!c2 || c2 == '\n')
file_err(f, str, str, "There should be a character here to complete the character range.");
- if (c2 < literal[0])
+ if (c2 < c)
file_err(f, origin, str+1, "Character ranges must be low-to-high, but this is high-to-low.");
op->op = VM_RANGE;
- op->args.range.low = (unsigned char)literal[0];
+ op->args.range.low = (unsigned char)c;
op->args.range.high = (unsigned char)c2;
++str;
} else {
op->op = VM_STRING;
- op->args.s = strdup(literal);
+ char *s = xcalloc(sizeof(char), 2);
+ s[0] = c;
+ op->args.s = s;
}
break;
}
@@ -187,16 +189,17 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
op->args.range.low = e;
op->args.range.high = e2;
} else {
- char literal[2] = {(char)e, '\0'};
op->op = VM_STRING;
- op->args.s = strdup(literal);
+ char *s = xcalloc(sizeof(char), 2);
+ s[0] = (char)e;
+ op->args.s = s;
}
break;
}
// String literal
case '"': case '\'': case '\002': {
char endquote = c == '\002' ? '\003' : c;
- char *literal = (char*)str;
+ char *start = (char*)str;
for (; *str && *str != endquote; str++) {
if (*str == '\\') {
if (!str[1] || str[1] == '\n')
@@ -205,8 +208,9 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
++str;
}
}
- size_t len = (size_t)(str - literal);
- literal = strndup(literal, len);
+ size_t len = (size_t)(str - start);
+ char *literal = xcalloc(sizeof(char), len+1);
+ memcpy(literal, start, len);
len = unescape_string(literal, literal, len);
op->op = VM_STRING;
@@ -381,8 +385,10 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
char quote = *str;
const char *replacement;
+ size_t replace_len;
if (matchchar(&str, '}')) {
replacement = strdup("");
+ replace_len = 0;
} else {
if (!(matchchar(&str, '"') || matchchar(&str, '\'')))
file_err(f, str, str, "There should be a string literal as a replacement here.");
@@ -395,15 +401,18 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
++str;
}
}
- replacement = strndup(repstr, (size_t)(str-repstr));
+ replace_len = (size_t)(str-repstr);
+ replacement = xcalloc(sizeof(char), replace_len+1);
+ memcpy((void*)replacement, repstr, (size_t)(str-repstr));
if (!matchchar(&str, quote))
file_err(f, &repstr[-1], str, "This string doesn't have a closing quote.");
if (!matchchar(&str, '}'))
file_err(f, origin, str, "This replacement doesn't have a closing '}'");
}
op->op = VM_REPLACE;
- op->args.replace.replace_pat = pat;
- op->args.replace.replacement = replacement;
+ op->args.replace.pat = pat;
+ op->args.replace.text = replacement;
+ op->args.replace.len = replace_len;
if (pat != NULL) op->len = pat->len;
break;
}
@@ -489,7 +498,7 @@ vm_op_t *bpeg_stringpattern(file_t *f, const char *str)
strop->start = str;
strop->len = 0;
strop->op = VM_STRING;
- char *literal = (char*)str;
+ char *start = (char*)str;
vm_op_t *interp = NULL;
for (; *str; str++) {
if (*str == '\\') {
@@ -519,8 +528,9 @@ vm_op_t *bpeg_stringpattern(file_t *f, const char *str)
}
}
// End of string
- size_t len = (size_t)(str - literal);
- literal = strndup(literal, len);
+ size_t len = (size_t)(str - start);
+ char *literal = xcalloc(sizeof(char), len+1);
+ memcpy(literal, start, len);
len = unescape_string(literal, literal, len);
strop->len = (ssize_t)len;
strop->args.s = literal;
@@ -552,7 +562,7 @@ vm_op_t *bpeg_replacement(file_t *f, vm_op_t *pat, const char *replacement)
op->op = VM_REPLACE;
op->start = pat->start;
op->len = pat->len;
- op->args.replace.replace_pat = pat;
+ op->args.replace.pat = pat;
const char *p = replacement;
for (; *p; p++) {
if (*p == '\\') {
@@ -561,8 +571,11 @@ vm_op_t *bpeg_replacement(file_t *f, vm_op_t *pat, const char *replacement)
++p;
}
}
- replacement = strndup(replacement, (size_t)(p-replacement));
- op->args.replace.replacement = replacement;
+ size_t rlen = (size_t)(p-replacement);
+ char *rcpy = xcalloc(sizeof(char), rlen + 1);
+ memcpy(rcpy, replacement, rlen);
+ op->args.replace.text = rcpy;
+ op->args.replace.len = rlen;
return op;
}
diff --git a/file_loader.c b/file_loader.c
index 9262d04..ab677a1 100644
--- a/file_loader.c
+++ b/file_loader.c
@@ -38,8 +38,9 @@ static void populate_lines(file_t *f)
file_t *load_file(const char *filename)
{
if (filename == NULL) filename = "-";
- int fd = strcmp(filename, "-") != 0 ? open(filename, O_RDONLY) : STDIN_FILENO;
+ int fd = streq(filename, "-") ? STDIN_FILENO : open(filename, O_RDONLY);
if (fd < 0) return NULL;
+ size_t length;
file_t *f = new(file_t);
f->filename = strdup(filename);
@@ -52,24 +53,24 @@ file_t *load_file(const char *filename)
goto skip_mmap;
f->mmapped = 1;
- f->length = (size_t)sb.st_size;
+ length = (size_t)sb.st_size;
goto finished_loading;
skip_mmap:
f->mmapped = 0;
size_t capacity = 1000;
- f->length = 0;
+ length = 0;
f->contents = xcalloc(sizeof(char), capacity);
ssize_t just_read;
- while ((just_read=read(fd, &f->contents[f->length], capacity - f->length)) > 0) {
- f->length += (size_t)just_read;
- if (f->length >= capacity)
+ while ((just_read=read(fd, &f->contents[length], capacity - length)) > 0) {
+ length += (size_t)just_read;
+ if (length >= capacity)
f->contents = xrealloc(f->contents, sizeof(char)*(capacity *= 2) + 1);
}
close(fd);
finished_loading:
- f->end = &f->contents[f->length];
+ f->end = &f->contents[length];
populate_lines(f);
return f;
}
@@ -83,8 +84,7 @@ file_t *spoof_file(const char *filename, char *text)
file_t *f = new(file_t);
f->filename = strdup(filename);
f->contents = text;
- f->length = strlen(text);
- f->end = &f->contents[f->length];
+ f->end = &f->contents[strlen(text)];
populate_lines(f);
return f;
}
@@ -101,7 +101,7 @@ void destroy_file(file_t **f)
}
if ((*f)->contents) {
if ((*f)->mmapped) {
- munmap((*f)->contents, (*f)->length);
+ munmap((*f)->contents, (size_t)((*f)->end - (*f)->contents));
} else {
free((*f)->contents);
}
@@ -160,3 +160,5 @@ void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, cons
for (; p < end; ++p) fputc('^', dest);
fprintf(dest, "\033[0m\n");
}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/file_loader.h b/file_loader.h
index cda8dc3..d6d305a 100644
--- a/file_loader.h
+++ b/file_loader.h
@@ -9,7 +9,7 @@
typedef struct {
const char *filename;
char *contents, **lines, *end;
- size_t length, nlines;
+ size_t nlines;
unsigned int mmapped:1;
} file_t;
@@ -27,3 +27,4 @@ __attribute__((format (printf, 5, 6)))
void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *fmt, ...);
#endif
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/grammar.c b/grammar.c
index 5c9beee..08fe37b 100644
--- a/grammar.c
+++ b/grammar.c
@@ -58,7 +58,7 @@ vm_op_t *load_grammar(grammar_t *g, file_t *f)
if (*src && matchchar(&src, ';'))
src = after_spaces(src);
}
- if (src < &f->contents[f->length-1]) {
+ if (src < f->end) {
fprint_line(stderr, f, src, NULL, "Invalid BPEG pattern");
_exit(1);
}
@@ -108,3 +108,5 @@ void pop_backrefs(grammar_t *g, size_t count)
--g->backrefcount;
}
}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/json.c b/json.c
index 2c9b50a..f1a766c 100644
--- a/json.c
+++ b/json.c
@@ -45,3 +45,5 @@ void json_match(const char *text, match_t *m, int verbose)
{
_json_match(text, m, 0, verbose);
}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/types.h b/types.h
index aa61c38..38c14bb 100644
--- a/types.h
+++ b/types.h
@@ -63,8 +63,9 @@ typedef struct vm_op_s {
struct vm_op_s *first, *second;
} multiple;
struct {
- struct vm_op_s *replace_pat;
- const char *replacement;
+ struct vm_op_s *pat;
+ const char *text;
+ size_t len;
} replace;
struct {
struct vm_op_s *capture_pat;
@@ -81,10 +82,6 @@ typedef struct vm_op_s {
typedef struct match_s {
// Where the match starts and ends (end is after the last character)
const char *start, *end;
- union {
- const char *name;
- const char *replacement;
- } value;
struct match_s *child, *nextsibling;
vm_op_t *op;
} match_t;
diff --git a/utils.c b/utils.c
index 16a53a3..53bc3fc 100644
--- a/utils.c
+++ b/utils.c
@@ -181,4 +181,14 @@ void *memcheck(void *p)
return p;
}
+
+int memicmp(const void *v1, const void *v2, size_t n)
+{
+ int result = 0;
+ const char *s1 = (const char*)v1, *s2 = (const char*)v2;
+ while (n-- > 0 && (result = tolower(*(s1++)) - tolower(*(s2++))) == 0)
+ ;
+ return result;
+}
+
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/utils.h b/utils.h
index 61c817f..2912426 100644
--- a/utils.h
+++ b/utils.h
@@ -29,6 +29,7 @@ int matchchar(const char **str, char c);
__attribute__((nonnull))
size_t unescape_string(char *dest, const char *src, size_t bufsize);
void *memcheck(void *p);
+int memicmp(const void *s1, const void *s2, size_t n);
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/viz.c b/viz.c
index 5e4e4d8..311564b 100644
--- a/viz.c
+++ b/viz.c
@@ -146,3 +146,5 @@ void visualize_match(match_t *m)
_visualize_patterns(m);
printf("\033[?7h");
}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/vm.c b/vm.c
index d4ba826..278f0bb 100644
--- a/vm.c
+++ b/vm.c
@@ -5,7 +5,6 @@
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
-#include <strings.h>
#include "grammar.h"
#include "types.h"
@@ -41,6 +40,7 @@ static const char *opcode_names[] = {
[VM_NODENT] = "NODENT",
};
+// UTF8-compliant char iteration
static inline const char *next_char(file_t *f, const char *str)
{
char c = *str;
@@ -78,9 +78,9 @@ static size_t push_backrefs(grammar_t *g, match_t *m)
if (m == NULL) return 0;
if (m->op->op == VM_REF) return 0;
size_t count = 0;
- if (m->op->op == VM_CAPTURE && m->value.name) {
+ if (m->op->op == VM_CAPTURE && m->op->args.capture.name) {
++count;
- push_backref(g, m->value.name, m->child);
+ push_backref(g, m->op->args.capture.name, m->child);
}
if (m->child) count += push_backrefs(g, m->child);
if (m->nextsibling) count += push_backrefs(g, m->nextsibling);
@@ -115,8 +115,8 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
}
case VM_STRING: {
if (&str[op->len] > f->end) return NULL;
- if ((flags & BPEG_IGNORECASE) ? strncasecmp(str, op->args.s, (size_t)op->len) != 0
- : strncmp(str, op->args.s, (size_t)op->len) != 0)
+ if ((flags & BPEG_IGNORECASE) ? memicmp(str, op->args.s, (size_t)op->len) != 0
+ : memcmp(str, op->args.s, (size_t)op->len) != 0)
return NULL;
match_t *m = new(match_t);
m->op = op;
@@ -273,8 +273,6 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
m->end = p->end;
m->op = op;
m->child = p;
- if (op->args.capture.name)
- m->value.name = op->args.capture.name;
return m;
}
case VM_HIDE: {
@@ -321,7 +319,6 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
.filename=f->filename,
.contents=(char*)m1->start, .end=(char*)m1->end,
.lines=f->lines, // I think this works, but am not 100% sure
- .length=(size_t)(m1->end - m1->start),
.nlines=1 + get_line_number(f, m1->end)-get_line_number(f, m1->start),
.mmapped=f->mmapped,
};
@@ -345,8 +342,8 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
}
case VM_REPLACE: {
match_t *p = NULL;
- if (op->args.replace.replace_pat) {
- p = _match(g, f, str, op->args.replace.replace_pat, flags, rec);
+ if (op->args.replace.pat) {
+ p = _match(g, f, str, op->args.replace.pat, flags, rec);
if (p == NULL) return NULL;
}
match_t *m = new(match_t);
@@ -358,7 +355,6 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
} else {
m->end = m->start;
}
- m->value.replacement = op->args.replace.replacement;
return m;
}
case VM_REF: {
@@ -397,7 +393,6 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
m->end = best->end;
m->op = op;
m->child = best;
- m->value.name = op->args.s;
return m;
}
case VM_BACKREF: {
@@ -460,7 +455,8 @@ static match_t *get_capture_n(match_t *m, int *n)
*/
static match_t *get_capture_named(match_t *m, const char *name)
{
- if (m->op->op == VM_CAPTURE && m->value.name && streq(m->value.name, name))
+ if (m->op->op == VM_CAPTURE && m->op->args.capture.name
+ && streq(m->op->args.capture.name, name))
return m;
for (match_t *c = m->child; c; c = c->nextsibling) {
match_t *cap = get_capture_named(c, name);
@@ -519,7 +515,9 @@ static void _print_match(file_t *f, match_t *m, print_state_t *state, print_opti
state->color = hl;
printf("%s", state->color);
}
- for (const char *r = m->value.replacement; *r; ) {
+ const char *text = m->op->args.replace.text;
+ const char *end = &text[m->op->args.replace.len];
+ for (const char *r = text; r < end; ) {
if (*r == '@' && r[1] && r[1] != '@') {
++r;
match_t *cap = get_cap(m, &r);
@@ -606,7 +604,9 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsign
match_t **dest = &ret->child;
if (cap->op->op == VM_REPLACE) {
- for (const char *r = cap->value.replacement; *r; ) {
+ const char *text = cap->op->args.replace.text;
+ const char *end = &text[cap->op->args.replace.len];
+ for (const char *r = text; r < end; ) {
if (*r == '\\') {
++r;
if (*(str++) != unescapechar(r, &r)) {
@@ -660,8 +660,8 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsign
for (match_t *child = cap->child; child; child = child->nextsibling) {
if (child->start > prev) {
size_t len = (size_t)(child->start - prev);
- if ((flags & BPEG_IGNORECASE) ? strncasecmp(str, prev, len) != 0
- : strncmp(str, prev, len) != 0) {
+ if ((flags & BPEG_IGNORECASE) ? memicmp(str, prev, len) != 0
+ : memcmp(str, prev, len) != 0) {
destroy_match(&ret);
return NULL;
}
@@ -680,8 +680,8 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsign
}
if (cap->end > prev) {
size_t len = (size_t)(cap->end - prev);
- if ((flags & BPEG_IGNORECASE) ? strncasecmp(str, prev, len) != 0
- : strncmp(str, prev, len) != 0) {
+ if ((flags & BPEG_IGNORECASE) ? memicmp(str, prev, len) != 0
+ : memcmp(str, prev, len) != 0) {
destroy_match(&ret);
return NULL;
}