diff options
| -rw-r--r-- | bpeg.c | 19 | ||||
| -rw-r--r-- | compiler.c | 51 | ||||
| -rw-r--r-- | file_loader.c | 22 | ||||
| -rw-r--r-- | file_loader.h | 3 | ||||
| -rw-r--r-- | grammar.c | 4 | ||||
| -rw-r--r-- | json.c | 2 | ||||
| -rw-r--r-- | types.h | 9 | ||||
| -rw-r--r-- | utils.c | 10 | ||||
| -rw-r--r-- | utils.h | 1 | ||||
| -rw-r--r-- | viz.c | 2 | ||||
| -rw-r--r-- | vm.c | 38 |
11 files changed, 96 insertions, 65 deletions
@@ -59,7 +59,7 @@ static char *getflag(const char *flag, char *argv[], int *i) static int print_errors(file_t *f, match_t *m) { int ret = 0; - if (m->op->op == VM_CAPTURE && m->value.name && streq(m->value.name, "!")) { + if (m->op->op == VM_CAPTURE && m->op->args.capture.name && streq(m->op->args.capture.name, "!")) { printf("\033[31;1m"); print_match(f, m, print_options); printf("\033[0m\n"); @@ -154,11 +154,12 @@ int main(int argc, char *argv[]) } else if (streq(argv[i], "--list-files")) { flags |= BPEG_LISTFILES; } else if (FLAG("--replace") || FLAG("-r")) { + file_t *pat_file = spoof_file("<pattern>", "pattern"); + vm_op_t *patref = bpeg_pattern(pat_file, pat_file->contents); file_t *replace_file = spoof_file("<replace argument>", flag); - vm_op_t *patref = bpeg_pattern(replace_file, "pattern"); - vm_op_t *rep = bpeg_replacement(replace_file, patref, flag); + vm_op_t *rep = bpeg_replacement(replace_file, patref, replace_file->contents); check(rep, "Replacement failed to compile: %s", flag); - add_def(g, replace_file, flag, "replacement", rep); + add_def(g, replace_file, replace_file->contents, "replacement", rep); rule = "replace-all"; } else if (FLAG("--grammar") || FLAG("-g")) { file_t *f = load_file(flag); @@ -179,7 +180,7 @@ int main(int argc, char *argv[]) *eq = '\0'; char *src = ++eq; file_t *def_file = spoof_file(def, src); - vm_op_t *pat = bpeg_pattern(def_file, src); + vm_op_t *pat = bpeg_pattern(def_file, def_file->contents); check(pat, "Failed to compile pattern: %s", flag); add_def(g, def_file, src, def, pat); } else if (FLAG("--define-string") || FLAG("-D")) { @@ -189,19 +190,19 @@ int main(int argc, char *argv[]) *eq = '\0'; char *src = ++eq; file_t *def_file = spoof_file(def, flag); - vm_op_t *pat = bpeg_stringpattern(def_file, src); + vm_op_t *pat = bpeg_stringpattern(def_file, def_file->contents); check(pat, "Failed to compile pattern: %s", flag); add_def(g, def_file, src, def, pat); } else if (FLAG("--pattern") || FLAG("-p")) { check(npatterns == 0, "Cannot define multiple patterns"); file_t *arg_file = spoof_file("<pattern argument>", flag); - vm_op_t *p = bpeg_pattern(arg_file, flag); + vm_op_t *p = bpeg_pattern(arg_file, arg_file->contents); check(p, "Pattern failed to compile: %s", flag); add_def(g, arg_file, flag, "pattern", p); ++npatterns; } else if (FLAG("--pattern-string") || FLAG("-P")) { file_t *arg_file = spoof_file("<pattern argument>", flag); - vm_op_t *p = bpeg_stringpattern(arg_file, flag); + vm_op_t *p = bpeg_stringpattern(arg_file, arg_file->contents); check(p, "Pattern failed to compile: %s", flag); add_def(g, arg_file, flag, "pattern", p); ++npatterns; @@ -224,7 +225,7 @@ int main(int argc, char *argv[]) } else if (argv[i][0] != '-') { if (npatterns > 0) break; file_t *arg_file = spoof_file("<pattern argument>", argv[i]); - vm_op_t *p = bpeg_stringpattern(arg_file, argv[i]); + vm_op_t *p = bpeg_stringpattern(arg_file, arg_file->contents); check(p, "Pattern failed to compile: %s", argv[i]); add_def(g, arg_file, argv[i], "pattern", p); ++npatterns; @@ -142,24 +142,26 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) } // Char literals case '`': { - char literal[2] = {*str, '\0'}; + char c = *str; ++str; - if (!literal[0] || literal[0] == '\n') + if (!c || c == '\n') file_err(f, str, str, "There should be a character here after the '`'"); op->len = 1; if (matchchar(&str, '-')) { // Range char c2 = *str; if (!c2 || c2 == '\n') file_err(f, str, str, "There should be a character here to complete the character range."); - if (c2 < literal[0]) + if (c2 < c) file_err(f, origin, str+1, "Character ranges must be low-to-high, but this is high-to-low."); op->op = VM_RANGE; - op->args.range.low = (unsigned char)literal[0]; + op->args.range.low = (unsigned char)c; op->args.range.high = (unsigned char)c2; ++str; } else { op->op = VM_STRING; - op->args.s = strdup(literal); + char *s = xcalloc(sizeof(char), 2); + s[0] = c; + op->args.s = s; } break; } @@ -187,16 +189,17 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) op->args.range.low = e; op->args.range.high = e2; } else { - char literal[2] = {(char)e, '\0'}; op->op = VM_STRING; - op->args.s = strdup(literal); + char *s = xcalloc(sizeof(char), 2); + s[0] = (char)e; + op->args.s = s; } break; } // String literal case '"': case '\'': case '\002': { char endquote = c == '\002' ? '\003' : c; - char *literal = (char*)str; + char *start = (char*)str; for (; *str && *str != endquote; str++) { if (*str == '\\') { if (!str[1] || str[1] == '\n') @@ -205,8 +208,9 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) ++str; } } - size_t len = (size_t)(str - literal); - literal = strndup(literal, len); + size_t len = (size_t)(str - start); + char *literal = xcalloc(sizeof(char), len+1); + memcpy(literal, start, len); len = unescape_string(literal, literal, len); op->op = VM_STRING; @@ -381,8 +385,10 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) char quote = *str; const char *replacement; + size_t replace_len; if (matchchar(&str, '}')) { replacement = strdup(""); + replace_len = 0; } else { if (!(matchchar(&str, '"') || matchchar(&str, '\''))) file_err(f, str, str, "There should be a string literal as a replacement here."); @@ -395,15 +401,18 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) ++str; } } - replacement = strndup(repstr, (size_t)(str-repstr)); + replace_len = (size_t)(str-repstr); + replacement = xcalloc(sizeof(char), replace_len+1); + memcpy((void*)replacement, repstr, (size_t)(str-repstr)); if (!matchchar(&str, quote)) file_err(f, &repstr[-1], str, "This string doesn't have a closing quote."); if (!matchchar(&str, '}')) file_err(f, origin, str, "This replacement doesn't have a closing '}'"); } op->op = VM_REPLACE; - op->args.replace.replace_pat = pat; - op->args.replace.replacement = replacement; + op->args.replace.pat = pat; + op->args.replace.text = replacement; + op->args.replace.len = replace_len; if (pat != NULL) op->len = pat->len; break; } @@ -489,7 +498,7 @@ vm_op_t *bpeg_stringpattern(file_t *f, const char *str) strop->start = str; strop->len = 0; strop->op = VM_STRING; - char *literal = (char*)str; + char *start = (char*)str; vm_op_t *interp = NULL; for (; *str; str++) { if (*str == '\\') { @@ -519,8 +528,9 @@ vm_op_t *bpeg_stringpattern(file_t *f, const char *str) } } // End of string - size_t len = (size_t)(str - literal); - literal = strndup(literal, len); + size_t len = (size_t)(str - start); + char *literal = xcalloc(sizeof(char), len+1); + memcpy(literal, start, len); len = unescape_string(literal, literal, len); strop->len = (ssize_t)len; strop->args.s = literal; @@ -552,7 +562,7 @@ vm_op_t *bpeg_replacement(file_t *f, vm_op_t *pat, const char *replacement) op->op = VM_REPLACE; op->start = pat->start; op->len = pat->len; - op->args.replace.replace_pat = pat; + op->args.replace.pat = pat; const char *p = replacement; for (; *p; p++) { if (*p == '\\') { @@ -561,8 +571,11 @@ vm_op_t *bpeg_replacement(file_t *f, vm_op_t *pat, const char *replacement) ++p; } } - replacement = strndup(replacement, (size_t)(p-replacement)); - op->args.replace.replacement = replacement; + size_t rlen = (size_t)(p-replacement); + char *rcpy = xcalloc(sizeof(char), rlen + 1); + memcpy(rcpy, replacement, rlen); + op->args.replace.text = rcpy; + op->args.replace.len = rlen; return op; } diff --git a/file_loader.c b/file_loader.c index 9262d04..ab677a1 100644 --- a/file_loader.c +++ b/file_loader.c @@ -38,8 +38,9 @@ static void populate_lines(file_t *f) file_t *load_file(const char *filename) { if (filename == NULL) filename = "-"; - int fd = strcmp(filename, "-") != 0 ? open(filename, O_RDONLY) : STDIN_FILENO; + int fd = streq(filename, "-") ? STDIN_FILENO : open(filename, O_RDONLY); if (fd < 0) return NULL; + size_t length; file_t *f = new(file_t); f->filename = strdup(filename); @@ -52,24 +53,24 @@ file_t *load_file(const char *filename) goto skip_mmap; f->mmapped = 1; - f->length = (size_t)sb.st_size; + length = (size_t)sb.st_size; goto finished_loading; skip_mmap: f->mmapped = 0; size_t capacity = 1000; - f->length = 0; + length = 0; f->contents = xcalloc(sizeof(char), capacity); ssize_t just_read; - while ((just_read=read(fd, &f->contents[f->length], capacity - f->length)) > 0) { - f->length += (size_t)just_read; - if (f->length >= capacity) + while ((just_read=read(fd, &f->contents[length], capacity - length)) > 0) { + length += (size_t)just_read; + if (length >= capacity) f->contents = xrealloc(f->contents, sizeof(char)*(capacity *= 2) + 1); } close(fd); finished_loading: - f->end = &f->contents[f->length]; + f->end = &f->contents[length]; populate_lines(f); return f; } @@ -83,8 +84,7 @@ file_t *spoof_file(const char *filename, char *text) file_t *f = new(file_t); f->filename = strdup(filename); f->contents = text; - f->length = strlen(text); - f->end = &f->contents[f->length]; + f->end = &f->contents[strlen(text)]; populate_lines(f); return f; } @@ -101,7 +101,7 @@ void destroy_file(file_t **f) } if ((*f)->contents) { if ((*f)->mmapped) { - munmap((*f)->contents, (*f)->length); + munmap((*f)->contents, (size_t)((*f)->end - (*f)->contents)); } else { free((*f)->contents); } @@ -160,3 +160,5 @@ void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, cons for (; p < end; ++p) fputc('^', dest); fprintf(dest, "\033[0m\n"); } + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 diff --git a/file_loader.h b/file_loader.h index cda8dc3..d6d305a 100644 --- a/file_loader.h +++ b/file_loader.h @@ -9,7 +9,7 @@ typedef struct { const char *filename; char *contents, **lines, *end; - size_t length, nlines; + size_t nlines; unsigned int mmapped:1; } file_t; @@ -27,3 +27,4 @@ __attribute__((format (printf, 5, 6))) void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *fmt, ...); #endif +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 @@ -58,7 +58,7 @@ vm_op_t *load_grammar(grammar_t *g, file_t *f) if (*src && matchchar(&src, ';')) src = after_spaces(src); } - if (src < &f->contents[f->length-1]) { + if (src < f->end) { fprint_line(stderr, f, src, NULL, "Invalid BPEG pattern"); _exit(1); } @@ -108,3 +108,5 @@ void pop_backrefs(grammar_t *g, size_t count) --g->backrefcount; } } + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 @@ -45,3 +45,5 @@ void json_match(const char *text, match_t *m, int verbose) { _json_match(text, m, 0, verbose); } + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 @@ -63,8 +63,9 @@ typedef struct vm_op_s { struct vm_op_s *first, *second; } multiple; struct { - struct vm_op_s *replace_pat; - const char *replacement; + struct vm_op_s *pat; + const char *text; + size_t len; } replace; struct { struct vm_op_s *capture_pat; @@ -81,10 +82,6 @@ typedef struct vm_op_s { typedef struct match_s { // Where the match starts and ends (end is after the last character) const char *start, *end; - union { - const char *name; - const char *replacement; - } value; struct match_s *child, *nextsibling; vm_op_t *op; } match_t; @@ -181,4 +181,14 @@ void *memcheck(void *p) return p; } + +int memicmp(const void *v1, const void *v2, size_t n) +{ + int result = 0; + const char *s1 = (const char*)v1, *s2 = (const char*)v2; + while (n-- > 0 && (result = tolower(*(s1++)) - tolower(*(s2++))) == 0) + ; + return result; +} + // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 @@ -29,6 +29,7 @@ int matchchar(const char **str, char c); __attribute__((nonnull)) size_t unescape_string(char *dest, const char *src, size_t bufsize); void *memcheck(void *p); +int memicmp(const void *s1, const void *s2, size_t n); #endif // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 @@ -146,3 +146,5 @@ void visualize_match(match_t *m) _visualize_patterns(m); printf("\033[?7h"); } + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 @@ -5,7 +5,6 @@ #include <ctype.h> #include <stdlib.h> #include <string.h> -#include <strings.h> #include "grammar.h" #include "types.h" @@ -41,6 +40,7 @@ static const char *opcode_names[] = { [VM_NODENT] = "NODENT", }; +// UTF8-compliant char iteration static inline const char *next_char(file_t *f, const char *str) { char c = *str; @@ -78,9 +78,9 @@ static size_t push_backrefs(grammar_t *g, match_t *m) if (m == NULL) return 0; if (m->op->op == VM_REF) return 0; size_t count = 0; - if (m->op->op == VM_CAPTURE && m->value.name) { + if (m->op->op == VM_CAPTURE && m->op->args.capture.name) { ++count; - push_backref(g, m->value.name, m->child); + push_backref(g, m->op->args.capture.name, m->child); } if (m->child) count += push_backrefs(g, m->child); if (m->nextsibling) count += push_backrefs(g, m->nextsibling); @@ -115,8 +115,8 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un } case VM_STRING: { if (&str[op->len] > f->end) return NULL; - if ((flags & BPEG_IGNORECASE) ? strncasecmp(str, op->args.s, (size_t)op->len) != 0 - : strncmp(str, op->args.s, (size_t)op->len) != 0) + if ((flags & BPEG_IGNORECASE) ? memicmp(str, op->args.s, (size_t)op->len) != 0 + : memcmp(str, op->args.s, (size_t)op->len) != 0) return NULL; match_t *m = new(match_t); m->op = op; @@ -273,8 +273,6 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un m->end = p->end; m->op = op; m->child = p; - if (op->args.capture.name) - m->value.name = op->args.capture.name; return m; } case VM_HIDE: { @@ -321,7 +319,6 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un .filename=f->filename, .contents=(char*)m1->start, .end=(char*)m1->end, .lines=f->lines, // I think this works, but am not 100% sure - .length=(size_t)(m1->end - m1->start), .nlines=1 + get_line_number(f, m1->end)-get_line_number(f, m1->start), .mmapped=f->mmapped, }; @@ -345,8 +342,8 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un } case VM_REPLACE: { match_t *p = NULL; - if (op->args.replace.replace_pat) { - p = _match(g, f, str, op->args.replace.replace_pat, flags, rec); + if (op->args.replace.pat) { + p = _match(g, f, str, op->args.replace.pat, flags, rec); if (p == NULL) return NULL; } match_t *m = new(match_t); @@ -358,7 +355,6 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un } else { m->end = m->start; } - m->value.replacement = op->args.replace.replacement; return m; } case VM_REF: { @@ -397,7 +393,6 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un m->end = best->end; m->op = op; m->child = best; - m->value.name = op->args.s; return m; } case VM_BACKREF: { @@ -460,7 +455,8 @@ static match_t *get_capture_n(match_t *m, int *n) */ static match_t *get_capture_named(match_t *m, const char *name) { - if (m->op->op == VM_CAPTURE && m->value.name && streq(m->value.name, name)) + if (m->op->op == VM_CAPTURE && m->op->args.capture.name + && streq(m->op->args.capture.name, name)) return m; for (match_t *c = m->child; c; c = c->nextsibling) { match_t *cap = get_capture_named(c, name); @@ -519,7 +515,9 @@ static void _print_match(file_t *f, match_t *m, print_state_t *state, print_opti state->color = hl; printf("%s", state->color); } - for (const char *r = m->value.replacement; *r; ) { + const char *text = m->op->args.replace.text; + const char *end = &text[m->op->args.replace.len]; + for (const char *r = text; r < end; ) { if (*r == '@' && r[1] && r[1] != '@') { ++r; match_t *cap = get_cap(m, &r); @@ -606,7 +604,9 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsign match_t **dest = &ret->child; if (cap->op->op == VM_REPLACE) { - for (const char *r = cap->value.replacement; *r; ) { + const char *text = cap->op->args.replace.text; + const char *end = &text[cap->op->args.replace.len]; + for (const char *r = text; r < end; ) { if (*r == '\\') { ++r; if (*(str++) != unescapechar(r, &r)) { @@ -660,8 +660,8 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsign for (match_t *child = cap->child; child; child = child->nextsibling) { if (child->start > prev) { size_t len = (size_t)(child->start - prev); - if ((flags & BPEG_IGNORECASE) ? strncasecmp(str, prev, len) != 0 - : strncmp(str, prev, len) != 0) { + if ((flags & BPEG_IGNORECASE) ? memicmp(str, prev, len) != 0 + : memcmp(str, prev, len) != 0) { destroy_match(&ret); return NULL; } @@ -680,8 +680,8 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsign } if (cap->end > prev) { size_t len = (size_t)(cap->end - prev); - if ((flags & BPEG_IGNORECASE) ? strncasecmp(str, prev, len) != 0 - : strncmp(str, prev, len) != 0) { + if ((flags & BPEG_IGNORECASE) ? memicmp(str, prev, len) != 0 + : memcmp(str, prev, len) != 0) { destroy_match(&ret); return NULL; } |
