diff options
| -rw-r--r-- | Lua/lbp.c | 40 | ||||
| -rw-r--r-- | bp.c | 35 | ||||
| -rw-r--r-- | definitions.c | 8 | ||||
| -rw-r--r-- | pattern.c | 96 | ||||
| -rw-r--r-- | pattern.h | 17 | ||||
| -rw-r--r-- | types.h | 1 |
6 files changed, 111 insertions, 86 deletions
@@ -5,6 +5,7 @@ * bp.replace(str, pat, replacement, start_index) -> str with replacements */ +#include <stdlib.h> #include <string.h> #include "lua.h" @@ -23,6 +24,15 @@ static int MATCH_METATABLE = 0; +static inline void push_parse_error(lua_State *L, maybe_pat_t m) +{ + size_t err_len = (size_t)(m.value.error.end - m.value.error.start); + char *buf = calloc(err_len+1, sizeof(char)); + memcpy(buf, m.value.error.start, err_len); + luaL_error(L, "%s: \"%s\"", m.value.error.msg, buf); + free(buf); +} + static void push_matchstring(lua_State *L, file_t *f, match_t *m) { char *buf = NULL; @@ -124,17 +134,17 @@ static int Lmatch(lua_State *L) return 0; file_t *pat_file = spoof_file(NULL, "<pattern argument>", pat_text, patlen); - pat_t *pat = bp_pattern(pat_file, pat_file->start); - if (!pat) { - // destroy_file(&pat_file); - luaL_error(L, "Pattern failed to compile: %s", pat_text); + maybe_pat_t maybe_pat = bp_pattern(pat_file, pat_file->start); + if (!maybe_pat.success) { + push_parse_error(L, maybe_pat); + destroy_file(&pat_file); return 0; } file_t *text_file = spoof_file(NULL, "<text argument>", text+(index-1), textlen); match_t *m = NULL; int ret = 0; - if (next_match(&m, NULL, text_file, pat, NULL, false)) { + if (next_match(&m, NULL, text_file, maybe_pat.value.pat, NULL, false)) { // lua_createtable(L, 0, 1); @@ -173,18 +183,18 @@ static int Lreplace(lua_State *L) index = (lua_Integer)strlen(text)+1; file_t *pat_file = spoof_file(NULL, "<pattern argument>", pat_text, patlen); - pat_t *pat = bp_pattern(pat_file, pat_file->start); - if (!pat) { - // destroy_file(&pat_file); - luaL_error(L, "Pattern failed to compile: %s", pat_text); + maybe_pat_t maybe_pat = bp_pattern(pat_file, pat_file->start); + if (!maybe_pat.success) { + push_parse_error(L, maybe_pat); + destroy_file(&pat_file); return 0; } file_t *rep_file = spoof_file(NULL, "<replacement argument>", rep_text, replen); - pat = bp_replacement(rep_file, pat, rep_file->start); - if (!pat) { - // destroy_file(&pat_file); - // destroy_file(&rep_file); - luaL_error(L, "Replacement failed to compile: %s", rep_text); + maybe_pat = bp_replacement(rep_file, maybe_pat.value.pat, rep_file->start); + if (!maybe_pat.success) { + push_parse_error(L, maybe_pat); + destroy_file(&pat_file); + destroy_file(&rep_file); return 0; } @@ -200,7 +210,7 @@ static int Lreplace(lua_State *L) .lineformat = "", }; int replacements = 0; - for (match_t *m = NULL; next_match(&m, NULL, text_file, pat, NULL, false); ) { + for (match_t *m = NULL; next_match(&m, NULL, text_file, maybe_pat.value.pat, NULL, false); ) { print_match(out, &pr, m); ++replacements; } @@ -99,6 +99,16 @@ static inline void fprint_filename(FILE *out, const char *filename) } // +// If there was a parse error while building a pattern, print an error message and exit. +// +static inline pat_t *assert_pat(file_t *f, maybe_pat_t maybe_pat) +{ + if (!maybe_pat.success) + file_err(f, maybe_pat.value.error.start, maybe_pat.value.error.end, maybe_pat.value.error.msg); + return maybe_pat.value.pat; +} + +// // Look for a key/value flag at the first position in the given argument list. // If the flag is found, update `next` to point to the next place to check for a flag. // The contents of argv[0] may be modified for single-char flags. @@ -440,9 +450,7 @@ int main(int argc, char *argv[]) // TODO: spoof file as sprintf("pattern => '%s'", flag) // except that would require handling edge cases like quotation marks etc. file_t *replace_file = spoof_file(&loaded_files, "<replace argument>", flag, -1); - pattern = bp_replacement(replace_file, pattern, replace_file->start); - if (!pattern) - errx(EXIT_FAILURE, "Replacement failed to compile: %s", flag); + pattern = assert_pat(replace_file, bp_replacement(replace_file, pattern, replace_file->start)); if (options.context_before == USE_DEFAULT_CONTEXT) options.context_before = ALL_CONTEXT; if (options.context_after == USE_DEFAULT_CONTEXT) options.context_after = ALL_CONTEXT; } else if (FLAG("-g") || FLAG("--grammar")) { @@ -458,29 +466,17 @@ int main(int argc, char *argv[]) defs = load_grammar(defs, f); // Keep in memory for debug output } else if (FLAG("-p") || FLAG("--pattern")) { file_t *arg_file = spoof_file(&loaded_files, "<pattern argument>", flag, -1); - pat_t *p = bp_pattern(arg_file, arg_file->start); - if (!p) file_err(arg_file, arg_file->start, arg_file->end, "Failed to compile this part of the argument"); - if (p->type == BP_ERROR) - file_err(arg_file, p->args.error.start, p->args.error.end, p->args.error.msg); - if (after_spaces(p->end, true) < arg_file->end) file_err(arg_file, p->end, arg_file->end, "Failed to compile this part of the argument"); + pat_t *p = assert_pat(arg_file, bp_pattern(arg_file, arg_file->start)); pattern = chain_together(arg_file, pattern, p); } else if (FLAG("-w") || FLAG("--word")) { require(asprintf(&flag, "\\|%s\\|", flag), "Could not allocate memory"); file_t *arg_file = spoof_file(&loaded_files, "<word pattern>", flag, -1); delete(&flag); - pat_t *p = bp_stringpattern(arg_file, arg_file->start); - if (!p) errx(EXIT_FAILURE, "Pattern failed to compile: %s", flag); + pat_t *p = assert_pat(arg_file, bp_stringpattern(arg_file, arg_file->start)); pattern = chain_together(arg_file, pattern, p); } else if (FLAG("-s") || FLAG("--skip")) { file_t *arg_file = spoof_file(&loaded_files, "<skip argument>", flag, -1); - pat_t *s = bp_pattern(arg_file, arg_file->start); - if (!s) { - file_err(arg_file, arg_file->start, arg_file->end, - "Failed to compile the skip argument"); - } else if (after_spaces(s->end, true) < arg_file->end) { - file_err(arg_file, s->end, arg_file->end, - "Failed to compile part of the skip argument"); - } + pat_t *s = assert_pat(arg_file, bp_pattern(arg_file, arg_file->start)); options.skip = either_pat(arg_file, options.skip, s); } else if (FLAG("-C") || FLAG("--context")) { options.context_before = options.context_after = context_from_flag(flag); @@ -502,8 +498,7 @@ int main(int argc, char *argv[]) } else if (argv[0][0] != '-') { if (pattern != NULL) break; file_t *arg_file = spoof_file(&loaded_files, "<pattern argument>", argv[0], -1); - pat_t *p = bp_stringpattern(arg_file, arg_file->start); - if (!p) errx(EXIT_FAILURE, "Pattern failed to compile: %s", argv[0]); + pat_t *p = assert_pat(arg_file, bp_stringpattern(arg_file, arg_file->start)); pattern = chain_together(arg_file, pattern, p); ++argv; } else { diff --git a/definitions.c b/definitions.c index 5309c8b..ed7d71a 100644 --- a/definitions.c +++ b/definitions.c @@ -34,10 +34,10 @@ def_t *with_def(def_t *defs, size_t namelen, const char *name, pat_t *pat) def_t *load_grammar(def_t *defs, file_t *f) { const char *str = after_spaces(f->start, true); - pat_t *pat = bp_pattern(f, str); - if (!pat) file_err(f, str, f->end, "Could not parse this file"); - if (pat->end < f->end) file_err(f, pat->end, f->end, "Could not parse this part of the file"); - for (pat_t *p = pat; p && p->type == BP_DEFINITION; p = p->args.def.pat) + maybe_pat_t maybe_pat = bp_pattern(f, str); + if (!maybe_pat.success) + file_err(f, maybe_pat.value.error.start, maybe_pat.value.error.end, maybe_pat.value.error.msg); + for (pat_t *p = maybe_pat.value.pat; p && p->type == BP_DEFINITION; p = p->args.def.pat) defs = with_def(defs, p->args.def.namelen, p->args.def.name, p->args.def.def); return defs; } @@ -3,11 +3,11 @@ // #include <ctype.h> #include <err.h> +#include <setjmp.h> #include <stdbool.h> #include <stdlib.h> #include <string.h> #include <unistd.h> -#include <setjmp.h> #include "files.h" #include "pattern.h" @@ -23,10 +23,10 @@ static pat_t *bp_simplepattern(file_t *f, const char *str); // recursive function calls when a parse error occurs. bool is_in_try_catch = false; static jmp_buf err_jmp; -pat_t *err_pat = NULL; +static maybe_pat_t parse_error = {.success = false}; #define __TRY_PATTERN__ bool was_in_try_catch = is_in_try_catch; \ - if (!is_in_try_catch) { is_in_try_catch = true; if (setjmp(err_jmp)) return err_pat; } + if (!is_in_try_catch) { is_in_try_catch = true; if (setjmp(err_jmp)) return parse_error; } #define __END_TRY_PATTERN__ if (!was_in_try_catch) is_in_try_catch = false; static inline void parse_err(file_t *f, const char *start, const char *end, const char *msg) @@ -35,10 +35,10 @@ static inline void parse_err(file_t *f, const char *start, const char *end, cons fprintf(stderr, "Parse error: %s\n%.*s\n", msg, (int)(end-start), start); exit(1); } - err_pat = new_pat(f, start, end, 0, 0, BP_ERROR); - err_pat->args.error.start = start; - err_pat->args.error.end = end; - err_pat->args.error.msg = msg; + (void)f; + parse_error.value.error.start = start; + parse_error.value.error.end = end; + parse_error.value.error.msg = msg; longjmp(err_jmp, 1); } @@ -221,17 +221,21 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) // Any char (dot) case '.': { if (*str == '.') { // ".." - pat_t *skip = NULL; str = next_char(str, f->end); - char skipper = *str; - if (matchchar(&str, '%', false) || matchchar(&str, '=', false)) { - skip = bp_simplepattern(f, str); - if (!skip) - file_err(f, str, str, "There should be a pattern to skip here after the '%c'", skipper); - str = skip->end; + enum pattype_e type = BP_UPTO; + pat_t *extra_arg = NULL; + if (matchchar(&str, '%', false)) { + extra_arg = bp_simplepattern(f, str); + if (!extra_arg) + parse_err(f, str, str, "There should be a pattern to skip here after the '%'"); + } else if (matchchar(&str, '=', false)) { + extra_arg = bp_simplepattern(f, str); + if (!extra_arg) + parse_err(f, str, str, "There should be a pattern here after the '='"); + type = BP_UPTO_STRICT; } - pat_t *upto = new_pat(f, start, str, 0, -1, skipper == '=' ? BP_UPTO_STRICT : BP_UPTO); - upto->args.multiple.second = skip; + pat_t *upto = new_pat(f, start, extra_arg ? extra_arg->end : str, 0, -1, type); + upto->args.multiple.second = extra_arg; return upto; } else { return new_pat(f, start, str, 1, UTF8_MAXCHARLEN, BP_ANYCHAR); @@ -249,10 +253,10 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) if (*str == '-') { // Range const char *c2_loc = ++str; if (next_char(c1_loc, f->end) > c1_loc+1 || next_char(c2_loc, f->end) > c2_loc+1) - file_err(f, start, next_char(c2_loc, f->end), "Sorry, UTF-8 character ranges are not yet supported."); + parse_err(f, start, next_char(c2_loc, f->end), "Sorry, UTF-8 character ranges are not yet supported."); char c1 = *c1_loc, c2 = *c2_loc; if (!c2 || c2 == '\n') - file_err(f, str, str, "There should be a character here to complete the character range."); + parse_err(f, str, str, "There should be a character here to complete the character range."); if (c1 > c2) { // Swap order char tmp = c1; c1 = c2; @@ -276,7 +280,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) // Escapes case '\\': { if (!*str || *str == '\n') - file_err(f, str, str, "There should be an escape sequence here after this backslash."); + parse_err(f, str, str, "There should be an escape sequence here after this backslash."); pat_t *all = NULL; do { // Comma-separated items: @@ -298,18 +302,18 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) const char *opstart = str; unsigned char e_low = (unsigned char)unescapechar(str, &str); if (str == opstart) - file_err(f, start, str+1, "This isn't a valid escape sequence."); + parse_err(f, start, str+1, "This isn't a valid escape sequence."); unsigned char e_high = e_low; if (*str == '-') { // Escape range (e.g. \x00-\xFF) ++str; if (next_char(str, f->end) != str+1) - file_err(f, start, next_char(str, f->end), "Sorry, UTF8 escape sequences are not supported in ranges."); + parse_err(f, start, next_char(str, f->end), "Sorry, UTF8 escape sequences are not supported in ranges."); const char *seqstart = str; e_high = (unsigned char)unescapechar(str, &str); if (str == seqstart) - file_err(f, seqstart, str+1, "This value isn't a valid escape sequence"); + parse_err(f, seqstart, str+1, "This value isn't a valid escape sequence"); if (e_high < e_low) - file_err(f, start, str, "Escape ranges should be low-to-high, but this is high-to-low."); + parse_err(f, start, str, "Escape ranges should be low-to-high, but this is high-to-low."); } pat_t *esc = new_pat(f, start, str, 1, 1, BP_RANGE); esc->args.range.low = e_low; @@ -339,7 +343,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) // Not <pat> case '!': { pat_t *p = bp_simplepattern(f, str); - if (!p) file_err(f, str, str, "There should be a pattern after this '!'"); + if (!p) parse_err(f, str, str, "There should be a pattern after this '!'"); pat_t *not = new_pat(f, start, p->end, 0, 0, BP_NOT); not->args.pat = p; return not; @@ -364,13 +368,13 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) } pat_t *repeating = bp_simplepattern(f, str); if (!repeating) - file_err(f, str, str, "There should be a pattern after this repetition count."); + parse_err(f, str, str, "There should be a pattern after this repetition count."); str = repeating->end; pat_t *sep = NULL; if (matchchar(&str, '%', false)) { sep = bp_simplepattern(f, str); if (!sep) - file_err(f, str, str, "There should be a separator pattern after this '%%'"); + parse_err(f, str, str, "There should be a separator pattern after this '%%'"); str = sep->end; } else { str = repeating->end; @@ -381,7 +385,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) case '<': { pat_t *behind = bp_simplepattern(f, str); if (!behind) - file_err(f, str, str, "There should be a pattern after this '<'"); + parse_err(f, str, str, "There should be a pattern after this '<'"); str = behind->end; str = behind->end; pat_t *pat = new_pat(f, start, str, 0, 0, BP_AFTER); @@ -392,7 +396,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) case '>': { pat_t *ahead = bp_simplepattern(f, str); if (!ahead) - file_err(f, str, str, "There should be a pattern after this '>'"); + parse_err(f, str, str, "There should be a pattern after this '>'"); str = ahead->end; pat_t *pat = new_pat(f, start, str, 0, 0, BP_BEFORE); pat->args.pat = ahead; @@ -402,9 +406,9 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) case '(': { pat_t *pat = bp_pattern_nl(f, str, true); if (!pat) - file_err(f, str, str, "There should be a valid pattern after this parenthesis."); + parse_err(f, str, str, "There should be a valid pattern after this parenthesis."); str = pat->end; - if (!matchchar(&str, ')', true)) file_err(f, str, str, "Missing paren: )"); + if (!matchchar(&str, ')', true)) parse_err(f, str, str, "Missing paren: )"); pat->start = start; pat->end = str; return pat; @@ -413,7 +417,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) case '[': { pat_t *maybe = bp_pattern_nl(f, str, true); if (!maybe) - file_err(f, str, str, "There should be a valid pattern after this square bracket."); + parse_err(f, str, str, "There should be a valid pattern after this square bracket."); str = maybe->end; (void)matchchar(&str, ']', true); return new_range(f, start, str, 0, 1, maybe, NULL); @@ -423,13 +427,13 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) size_t min = (size_t)(c == '*' ? 0 : 1); pat_t *repeating = bp_simplepattern(f, str); if (!repeating) - file_err(f, str, str, "There should be a valid pattern here after the '%c'", c); + parse_err(f, str, str, "There should be a valid pattern to repeat here"); str = repeating->end; pat_t *sep = NULL; if (matchchar(&str, '%', false)) { sep = bp_simplepattern(f, str); if (!sep) - file_err(f, str, str, "There should be a separator pattern after the '%%' here."); + parse_err(f, str, str, "There should be a separator pattern after the '%%' here."); str = sep->end; } return new_range(f, start, str, min, -1, repeating, sep); @@ -447,7 +451,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) } pat_t *pat = bp_simplepattern(f, str); if (!pat) - file_err(f, str, str, "There should be a valid pattern here to capture after the '@'"); + parse_err(f, str, str, "There should be a valid pattern here to capture after the '@'"); pat_t *capture = new_pat(f, start, pat->end, pat->min_matchlen, pat->max_matchlen, BP_CAPTURE); capture->args.capture.capture_pat = pat; @@ -474,7 +478,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) size_t namelen = (size_t)(str - start); if (matchchar(&str, ':', false)) { // Definitions pat_t *def = bp_pattern_nl(f, str, false); - if (!def) file_err(f, str, f->end, "Could not parse this definition."); + if (!def) parse_err(f, str, f->end, "Could not parse this definition."); str = def->end; (void)matchchar(&str, ';', false); // Optional semicolon str = after_spaces(str, true); @@ -499,7 +503,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) // // Similar to bp_simplepattern, except that the pattern begins with an implicit, unclosable quote. // -pat_t *bp_stringpattern(file_t *f, const char *str) +maybe_pat_t bp_stringpattern(file_t *f, const char *str) { __TRY_PATTERN__ pat_t *ret = NULL; @@ -530,8 +534,9 @@ pat_t *bp_stringpattern(file_t *f, const char *str) (void)matchchar(&str, ';', false); } } + if (!ret) ret = new_pat(f, str, str, 0, 0, BP_STRING); __END_TRY_PATTERN__ - return ret; + return (maybe_pat_t){.success = true, .value.pat = ret}; } // @@ -555,7 +560,7 @@ static pat_t *bp_simplepattern(file_t *f, const char *str) pat_t *first = pat; pat_t *second = bp_simplepattern(f, str); if (!second) - file_err(f, str, str, "The '%s' operator expects a pattern before and after.", type == BP_MATCH ? "~" : "!~"); + parse_err(f, str, str, "There should be a valid pattern here"); pat = new_pat(f, str, second->end, first->min_matchlen, first->max_matchlen, type); pat->args.multiple.first = first; @@ -570,7 +575,7 @@ static pat_t *bp_simplepattern(file_t *f, const char *str) // Given a pattern and a replacement string, compile the two into a BP // replace pattern. // -pat_t *bp_replacement(file_t *f, pat_t *replacepat, const char *replacement) +maybe_pat_t bp_replacement(file_t *f, pat_t *replacepat, const char *replacement) { pat_t *pat = new_pat(f, replacepat->start, replacepat->end, replacepat->min_matchlen, replacepat->max_matchlen, BP_REPLACE); pat->args.replace.pat = replacepat; @@ -579,7 +584,7 @@ pat_t *bp_replacement(file_t *f, pat_t *replacepat, const char *replacement) for (; p < f->end; p++) { if (*p == '\\') { if (!p[1] || p[1] == '\n') - file_err(f, p, p, "There should be an escape sequence or pattern here after this backslash."); + parse_err(f, p, p, "There should be an escape sequence or pattern here after this backslash."); ++p; } } @@ -589,7 +594,7 @@ pat_t *bp_replacement(file_t *f, pat_t *replacepat, const char *replacement) memcpy(rcpy, replacement, rlen); pat->args.replace.text = rcpy; pat->args.replace.len = rlen; - return pat; + return (maybe_pat_t){.success = true, .value.pat = pat}; } static pat_t *bp_pattern_nl(file_t *f, const char *str, bool allow_nl) @@ -605,12 +610,17 @@ static pat_t *bp_pattern_nl(file_t *f, const char *str, bool allow_nl) // // Compile a string representing a BP pattern into a pattern object. // -pat_t *bp_pattern(file_t *f, const char *str) +maybe_pat_t bp_pattern(file_t *f, const char *str) { __TRY_PATTERN__ pat_t *ret = bp_pattern_nl(f, str, false); __END_TRY_PATTERN__ - return ret; + if (ret && after_spaces(ret->end, true) < f->end) + return (maybe_pat_t){.success = false, .value.error.start = ret->end, .value.error.end = f->end, .value.error.msg = "Failed to parse this part of the pattern"}; + else if (ret) + return (maybe_pat_t){.success = true, .value.pat = ret}; + else + return (maybe_pat_t){.success = false, .value.error.start = str, .value.error.end = f->end, .value.error.msg = "Failed to parse this pattern"}; } // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 @@ -7,18 +7,29 @@ #include "files.h" #include "types.h" +typedef struct { + bool success; + union { + pat_t *pat; + struct { + const char *start, *end, *msg; + } error; + } value; +} maybe_pat_t; + __attribute__((returns_nonnull, nonnull(1,2))) pat_t *new_pat(file_t *f, const char *start, const char *end, size_t minlen, ssize_t maxlen, enum pattype_e type); __attribute__((nonnull)) -pat_t *bp_stringpattern(file_t *f, const char *str); +maybe_pat_t bp_stringpattern(file_t *f, const char *str); __attribute__((nonnull(1,2))) -pat_t *bp_replacement(file_t *f, pat_t *replacepat, const char *replacement); +maybe_pat_t bp_replacement(file_t *f, pat_t *replacepat, const char *replacement); __attribute__((nonnull(1))) pat_t *chain_together(file_t *f, pat_t *first, pat_t *second); __attribute__((nonnull(1))) pat_t *either_pat(file_t *f, pat_t *first, pat_t *second); __attribute__((nonnull)) -pat_t *bp_pattern(file_t *f, const char *str); +maybe_pat_t bp_pattern(file_t *f, const char *str); + #endif // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 @@ -37,7 +37,6 @@ enum pattype_e { BP_WORD_BOUNDARY = 24, BP_DEFINITION = 25, BP_LEFTRECURSION = 26, - BP_ERROR = 27, }; struct match_s; // forward declared to resolve circular struct defs |
