Switched to using an optional type

This commit is contained in:
Bruce Hill 2021-09-23 14:38:46 -07:00
parent 86404d0ab3
commit 89c5888dd6
6 changed files with 111 additions and 86 deletions

View File

@ -5,6 +5,7 @@
* bp.replace(str, pat, replacement, start_index) -> str with replacements
*/
#include <stdlib.h>
#include <string.h>
#include "lua.h"
@ -23,6 +24,15 @@
static int MATCH_METATABLE = 0;
static inline void push_parse_error(lua_State *L, maybe_pat_t m)
{
size_t err_len = (size_t)(m.value.error.end - m.value.error.start);
char *buf = calloc(err_len+1, sizeof(char));
memcpy(buf, m.value.error.start, err_len);
luaL_error(L, "%s: \"%s\"", m.value.error.msg, buf);
free(buf);
}
static void push_matchstring(lua_State *L, file_t *f, match_t *m)
{
char *buf = NULL;
@ -124,17 +134,17 @@ static int Lmatch(lua_State *L)
return 0;
file_t *pat_file = spoof_file(NULL, "<pattern argument>", pat_text, patlen);
pat_t *pat = bp_pattern(pat_file, pat_file->start);
if (!pat) {
// destroy_file(&pat_file);
luaL_error(L, "Pattern failed to compile: %s", pat_text);
maybe_pat_t maybe_pat = bp_pattern(pat_file, pat_file->start);
if (!maybe_pat.success) {
push_parse_error(L, maybe_pat);
destroy_file(&pat_file);
return 0;
}
file_t *text_file = spoof_file(NULL, "<text argument>", text+(index-1), textlen);
match_t *m = NULL;
int ret = 0;
if (next_match(&m, NULL, text_file, pat, NULL, false)) {
if (next_match(&m, NULL, text_file, maybe_pat.value.pat, NULL, false)) {
// lua_createtable(L, 0, 1);
@ -173,18 +183,18 @@ static int Lreplace(lua_State *L)
index = (lua_Integer)strlen(text)+1;
file_t *pat_file = spoof_file(NULL, "<pattern argument>", pat_text, patlen);
pat_t *pat = bp_pattern(pat_file, pat_file->start);
if (!pat) {
// destroy_file(&pat_file);
luaL_error(L, "Pattern failed to compile: %s", pat_text);
maybe_pat_t maybe_pat = bp_pattern(pat_file, pat_file->start);
if (!maybe_pat.success) {
push_parse_error(L, maybe_pat);
destroy_file(&pat_file);
return 0;
}
file_t *rep_file = spoof_file(NULL, "<replacement argument>", rep_text, replen);
pat = bp_replacement(rep_file, pat, rep_file->start);
if (!pat) {
// destroy_file(&pat_file);
// destroy_file(&rep_file);
luaL_error(L, "Replacement failed to compile: %s", rep_text);
maybe_pat = bp_replacement(rep_file, maybe_pat.value.pat, rep_file->start);
if (!maybe_pat.success) {
push_parse_error(L, maybe_pat);
destroy_file(&pat_file);
destroy_file(&rep_file);
return 0;
}
@ -200,7 +210,7 @@ static int Lreplace(lua_State *L)
.lineformat = "",
};
int replacements = 0;
for (match_t *m = NULL; next_match(&m, NULL, text_file, pat, NULL, false); ) {
for (match_t *m = NULL; next_match(&m, NULL, text_file, maybe_pat.value.pat, NULL, false); ) {
print_match(out, &pr, m);
++replacements;
}

35
bp.c
View File

@ -98,6 +98,16 @@ static inline void fprint_filename(FILE *out, const char *filename)
else fprintf(out, "%s:\n", filename);
}
//
// If there was a parse error while building a pattern, print an error message and exit.
//
static inline pat_t *assert_pat(file_t *f, maybe_pat_t maybe_pat)
{
if (!maybe_pat.success)
file_err(f, maybe_pat.value.error.start, maybe_pat.value.error.end, maybe_pat.value.error.msg);
return maybe_pat.value.pat;
}
//
// Look for a key/value flag at the first position in the given argument list.
// If the flag is found, update `next` to point to the next place to check for a flag.
@ -440,9 +450,7 @@ int main(int argc, char *argv[])
// TODO: spoof file as sprintf("pattern => '%s'", flag)
// except that would require handling edge cases like quotation marks etc.
file_t *replace_file = spoof_file(&loaded_files, "<replace argument>", flag, -1);
pattern = bp_replacement(replace_file, pattern, replace_file->start);
if (!pattern)
errx(EXIT_FAILURE, "Replacement failed to compile: %s", flag);
pattern = assert_pat(replace_file, bp_replacement(replace_file, pattern, replace_file->start));
if (options.context_before == USE_DEFAULT_CONTEXT) options.context_before = ALL_CONTEXT;
if (options.context_after == USE_DEFAULT_CONTEXT) options.context_after = ALL_CONTEXT;
} else if (FLAG("-g") || FLAG("--grammar")) {
@ -458,29 +466,17 @@ int main(int argc, char *argv[])
defs = load_grammar(defs, f); // Keep in memory for debug output
} else if (FLAG("-p") || FLAG("--pattern")) {
file_t *arg_file = spoof_file(&loaded_files, "<pattern argument>", flag, -1);
pat_t *p = bp_pattern(arg_file, arg_file->start);
if (!p) file_err(arg_file, arg_file->start, arg_file->end, "Failed to compile this part of the argument");
if (p->type == BP_ERROR)
file_err(arg_file, p->args.error.start, p->args.error.end, p->args.error.msg);
if (after_spaces(p->end, true) < arg_file->end) file_err(arg_file, p->end, arg_file->end, "Failed to compile this part of the argument");
pat_t *p = assert_pat(arg_file, bp_pattern(arg_file, arg_file->start));
pattern = chain_together(arg_file, pattern, p);
} else if (FLAG("-w") || FLAG("--word")) {
require(asprintf(&flag, "\\|%s\\|", flag), "Could not allocate memory");
file_t *arg_file = spoof_file(&loaded_files, "<word pattern>", flag, -1);
delete(&flag);
pat_t *p = bp_stringpattern(arg_file, arg_file->start);
if (!p) errx(EXIT_FAILURE, "Pattern failed to compile: %s", flag);
pat_t *p = assert_pat(arg_file, bp_stringpattern(arg_file, arg_file->start));
pattern = chain_together(arg_file, pattern, p);
} else if (FLAG("-s") || FLAG("--skip")) {
file_t *arg_file = spoof_file(&loaded_files, "<skip argument>", flag, -1);
pat_t *s = bp_pattern(arg_file, arg_file->start);
if (!s) {
file_err(arg_file, arg_file->start, arg_file->end,
"Failed to compile the skip argument");
} else if (after_spaces(s->end, true) < arg_file->end) {
file_err(arg_file, s->end, arg_file->end,
"Failed to compile part of the skip argument");
}
pat_t *s = assert_pat(arg_file, bp_pattern(arg_file, arg_file->start));
options.skip = either_pat(arg_file, options.skip, s);
} else if (FLAG("-C") || FLAG("--context")) {
options.context_before = options.context_after = context_from_flag(flag);
@ -502,8 +498,7 @@ int main(int argc, char *argv[])
} else if (argv[0][0] != '-') {
if (pattern != NULL) break;
file_t *arg_file = spoof_file(&loaded_files, "<pattern argument>", argv[0], -1);
pat_t *p = bp_stringpattern(arg_file, arg_file->start);
if (!p) errx(EXIT_FAILURE, "Pattern failed to compile: %s", argv[0]);
pat_t *p = assert_pat(arg_file, bp_stringpattern(arg_file, arg_file->start));
pattern = chain_together(arg_file, pattern, p);
++argv;
} else {

View File

@ -34,10 +34,10 @@ def_t *with_def(def_t *defs, size_t namelen, const char *name, pat_t *pat)
def_t *load_grammar(def_t *defs, file_t *f)
{
const char *str = after_spaces(f->start, true);
pat_t *pat = bp_pattern(f, str);
if (!pat) file_err(f, str, f->end, "Could not parse this file");
if (pat->end < f->end) file_err(f, pat->end, f->end, "Could not parse this part of the file");
for (pat_t *p = pat; p && p->type == BP_DEFINITION; p = p->args.def.pat)
maybe_pat_t maybe_pat = bp_pattern(f, str);
if (!maybe_pat.success)
file_err(f, maybe_pat.value.error.start, maybe_pat.value.error.end, maybe_pat.value.error.msg);
for (pat_t *p = maybe_pat.value.pat; p && p->type == BP_DEFINITION; p = p->args.def.pat)
defs = with_def(defs, p->args.def.namelen, p->args.def.name, p->args.def.def);
return defs;
}

View File

@ -3,11 +3,11 @@
//
#include <ctype.h>
#include <err.h>
#include <setjmp.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <setjmp.h>
#include "files.h"
#include "pattern.h"
@ -23,10 +23,10 @@ static pat_t *bp_simplepattern(file_t *f, const char *str);
// recursive function calls when a parse error occurs.
bool is_in_try_catch = false;
static jmp_buf err_jmp;
pat_t *err_pat = NULL;
static maybe_pat_t parse_error = {.success = false};
#define __TRY_PATTERN__ bool was_in_try_catch = is_in_try_catch; \
if (!is_in_try_catch) { is_in_try_catch = true; if (setjmp(err_jmp)) return err_pat; }
if (!is_in_try_catch) { is_in_try_catch = true; if (setjmp(err_jmp)) return parse_error; }
#define __END_TRY_PATTERN__ if (!was_in_try_catch) is_in_try_catch = false;
static inline void parse_err(file_t *f, const char *start, const char *end, const char *msg)
@ -35,10 +35,10 @@ static inline void parse_err(file_t *f, const char *start, const char *end, cons
fprintf(stderr, "Parse error: %s\n%.*s\n", msg, (int)(end-start), start);
exit(1);
}
err_pat = new_pat(f, start, end, 0, 0, BP_ERROR);
err_pat->args.error.start = start;
err_pat->args.error.end = end;
err_pat->args.error.msg = msg;
(void)f;
parse_error.value.error.start = start;
parse_error.value.error.end = end;
parse_error.value.error.msg = msg;
longjmp(err_jmp, 1);
}
@ -221,17 +221,21 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
// Any char (dot)
case '.': {
if (*str == '.') { // ".."
pat_t *skip = NULL;
str = next_char(str, f->end);
char skipper = *str;
if (matchchar(&str, '%', false) || matchchar(&str, '=', false)) {
skip = bp_simplepattern(f, str);
if (!skip)
file_err(f, str, str, "There should be a pattern to skip here after the '%c'", skipper);
str = skip->end;
enum pattype_e type = BP_UPTO;
pat_t *extra_arg = NULL;
if (matchchar(&str, '%', false)) {
extra_arg = bp_simplepattern(f, str);
if (!extra_arg)
parse_err(f, str, str, "There should be a pattern to skip here after the '%'");
} else if (matchchar(&str, '=', false)) {
extra_arg = bp_simplepattern(f, str);
if (!extra_arg)
parse_err(f, str, str, "There should be a pattern here after the '='");
type = BP_UPTO_STRICT;
}
pat_t *upto = new_pat(f, start, str, 0, -1, skipper == '=' ? BP_UPTO_STRICT : BP_UPTO);
upto->args.multiple.second = skip;
pat_t *upto = new_pat(f, start, extra_arg ? extra_arg->end : str, 0, -1, type);
upto->args.multiple.second = extra_arg;
return upto;
} else {
return new_pat(f, start, str, 1, UTF8_MAXCHARLEN, BP_ANYCHAR);
@ -249,10 +253,10 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
if (*str == '-') { // Range
const char *c2_loc = ++str;
if (next_char(c1_loc, f->end) > c1_loc+1 || next_char(c2_loc, f->end) > c2_loc+1)
file_err(f, start, next_char(c2_loc, f->end), "Sorry, UTF-8 character ranges are not yet supported.");
parse_err(f, start, next_char(c2_loc, f->end), "Sorry, UTF-8 character ranges are not yet supported.");
char c1 = *c1_loc, c2 = *c2_loc;
if (!c2 || c2 == '\n')
file_err(f, str, str, "There should be a character here to complete the character range.");
parse_err(f, str, str, "There should be a character here to complete the character range.");
if (c1 > c2) { // Swap order
char tmp = c1;
c1 = c2;
@ -276,7 +280,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
// Escapes
case '\\': {
if (!*str || *str == '\n')
file_err(f, str, str, "There should be an escape sequence here after this backslash.");
parse_err(f, str, str, "There should be an escape sequence here after this backslash.");
pat_t *all = NULL;
do { // Comma-separated items:
@ -298,18 +302,18 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
const char *opstart = str;
unsigned char e_low = (unsigned char)unescapechar(str, &str);
if (str == opstart)
file_err(f, start, str+1, "This isn't a valid escape sequence.");
parse_err(f, start, str+1, "This isn't a valid escape sequence.");
unsigned char e_high = e_low;
if (*str == '-') { // Escape range (e.g. \x00-\xFF)
++str;
if (next_char(str, f->end) != str+1)
file_err(f, start, next_char(str, f->end), "Sorry, UTF8 escape sequences are not supported in ranges.");
parse_err(f, start, next_char(str, f->end), "Sorry, UTF8 escape sequences are not supported in ranges.");
const char *seqstart = str;
e_high = (unsigned char)unescapechar(str, &str);
if (str == seqstart)
file_err(f, seqstart, str+1, "This value isn't a valid escape sequence");
parse_err(f, seqstart, str+1, "This value isn't a valid escape sequence");
if (e_high < e_low)
file_err(f, start, str, "Escape ranges should be low-to-high, but this is high-to-low.");
parse_err(f, start, str, "Escape ranges should be low-to-high, but this is high-to-low.");
}
pat_t *esc = new_pat(f, start, str, 1, 1, BP_RANGE);
esc->args.range.low = e_low;
@ -339,7 +343,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
// Not <pat>
case '!': {
pat_t *p = bp_simplepattern(f, str);
if (!p) file_err(f, str, str, "There should be a pattern after this '!'");
if (!p) parse_err(f, str, str, "There should be a pattern after this '!'");
pat_t *not = new_pat(f, start, p->end, 0, 0, BP_NOT);
not->args.pat = p;
return not;
@ -364,13 +368,13 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
}
pat_t *repeating = bp_simplepattern(f, str);
if (!repeating)
file_err(f, str, str, "There should be a pattern after this repetition count.");
parse_err(f, str, str, "There should be a pattern after this repetition count.");
str = repeating->end;
pat_t *sep = NULL;
if (matchchar(&str, '%', false)) {
sep = bp_simplepattern(f, str);
if (!sep)
file_err(f, str, str, "There should be a separator pattern after this '%%'");
parse_err(f, str, str, "There should be a separator pattern after this '%%'");
str = sep->end;
} else {
str = repeating->end;
@ -381,7 +385,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
case '<': {
pat_t *behind = bp_simplepattern(f, str);
if (!behind)
file_err(f, str, str, "There should be a pattern after this '<'");
parse_err(f, str, str, "There should be a pattern after this '<'");
str = behind->end;
str = behind->end;
pat_t *pat = new_pat(f, start, str, 0, 0, BP_AFTER);
@ -392,7 +396,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
case '>': {
pat_t *ahead = bp_simplepattern(f, str);
if (!ahead)
file_err(f, str, str, "There should be a pattern after this '>'");
parse_err(f, str, str, "There should be a pattern after this '>'");
str = ahead->end;
pat_t *pat = new_pat(f, start, str, 0, 0, BP_BEFORE);
pat->args.pat = ahead;
@ -402,9 +406,9 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
case '(': {
pat_t *pat = bp_pattern_nl(f, str, true);
if (!pat)
file_err(f, str, str, "There should be a valid pattern after this parenthesis.");
parse_err(f, str, str, "There should be a valid pattern after this parenthesis.");
str = pat->end;
if (!matchchar(&str, ')', true)) file_err(f, str, str, "Missing paren: )");
if (!matchchar(&str, ')', true)) parse_err(f, str, str, "Missing paren: )");
pat->start = start;
pat->end = str;
return pat;
@ -413,7 +417,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
case '[': {
pat_t *maybe = bp_pattern_nl(f, str, true);
if (!maybe)
file_err(f, str, str, "There should be a valid pattern after this square bracket.");
parse_err(f, str, str, "There should be a valid pattern after this square bracket.");
str = maybe->end;
(void)matchchar(&str, ']', true);
return new_range(f, start, str, 0, 1, maybe, NULL);
@ -423,13 +427,13 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
size_t min = (size_t)(c == '*' ? 0 : 1);
pat_t *repeating = bp_simplepattern(f, str);
if (!repeating)
file_err(f, str, str, "There should be a valid pattern here after the '%c'", c);
parse_err(f, str, str, "There should be a valid pattern to repeat here");
str = repeating->end;
pat_t *sep = NULL;
if (matchchar(&str, '%', false)) {
sep = bp_simplepattern(f, str);
if (!sep)
file_err(f, str, str, "There should be a separator pattern after the '%%' here.");
parse_err(f, str, str, "There should be a separator pattern after the '%%' here.");
str = sep->end;
}
return new_range(f, start, str, min, -1, repeating, sep);
@ -447,7 +451,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
}
pat_t *pat = bp_simplepattern(f, str);
if (!pat)
file_err(f, str, str, "There should be a valid pattern here to capture after the '@'");
parse_err(f, str, str, "There should be a valid pattern here to capture after the '@'");
pat_t *capture = new_pat(f, start, pat->end, pat->min_matchlen, pat->max_matchlen, BP_CAPTURE);
capture->args.capture.capture_pat = pat;
@ -474,7 +478,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
size_t namelen = (size_t)(str - start);
if (matchchar(&str, ':', false)) { // Definitions
pat_t *def = bp_pattern_nl(f, str, false);
if (!def) file_err(f, str, f->end, "Could not parse this definition.");
if (!def) parse_err(f, str, f->end, "Could not parse this definition.");
str = def->end;
(void)matchchar(&str, ';', false); // Optional semicolon
str = after_spaces(str, true);
@ -499,7 +503,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
//
// Similar to bp_simplepattern, except that the pattern begins with an implicit, unclosable quote.
//
pat_t *bp_stringpattern(file_t *f, const char *str)
maybe_pat_t bp_stringpattern(file_t *f, const char *str)
{
__TRY_PATTERN__
pat_t *ret = NULL;
@ -530,8 +534,9 @@ pat_t *bp_stringpattern(file_t *f, const char *str)
(void)matchchar(&str, ';', false);
}
}
if (!ret) ret = new_pat(f, str, str, 0, 0, BP_STRING);
__END_TRY_PATTERN__
return ret;
return (maybe_pat_t){.success = true, .value.pat = ret};
}
//
@ -555,7 +560,7 @@ static pat_t *bp_simplepattern(file_t *f, const char *str)
pat_t *first = pat;
pat_t *second = bp_simplepattern(f, str);
if (!second)
file_err(f, str, str, "The '%s' operator expects a pattern before and after.", type == BP_MATCH ? "~" : "!~");
parse_err(f, str, str, "There should be a valid pattern here");
pat = new_pat(f, str, second->end, first->min_matchlen, first->max_matchlen, type);
pat->args.multiple.first = first;
@ -570,7 +575,7 @@ static pat_t *bp_simplepattern(file_t *f, const char *str)
// Given a pattern and a replacement string, compile the two into a BP
// replace pattern.
//
pat_t *bp_replacement(file_t *f, pat_t *replacepat, const char *replacement)
maybe_pat_t bp_replacement(file_t *f, pat_t *replacepat, const char *replacement)
{
pat_t *pat = new_pat(f, replacepat->start, replacepat->end, replacepat->min_matchlen, replacepat->max_matchlen, BP_REPLACE);
pat->args.replace.pat = replacepat;
@ -579,7 +584,7 @@ pat_t *bp_replacement(file_t *f, pat_t *replacepat, const char *replacement)
for (; p < f->end; p++) {
if (*p == '\\') {
if (!p[1] || p[1] == '\n')
file_err(f, p, p, "There should be an escape sequence or pattern here after this backslash.");
parse_err(f, p, p, "There should be an escape sequence or pattern here after this backslash.");
++p;
}
}
@ -589,7 +594,7 @@ pat_t *bp_replacement(file_t *f, pat_t *replacepat, const char *replacement)
memcpy(rcpy, replacement, rlen);
pat->args.replace.text = rcpy;
pat->args.replace.len = rlen;
return pat;
return (maybe_pat_t){.success = true, .value.pat = pat};
}
static pat_t *bp_pattern_nl(file_t *f, const char *str, bool allow_nl)
@ -605,12 +610,17 @@ static pat_t *bp_pattern_nl(file_t *f, const char *str, bool allow_nl)
//
// Compile a string representing a BP pattern into a pattern object.
//
pat_t *bp_pattern(file_t *f, const char *str)
maybe_pat_t bp_pattern(file_t *f, const char *str)
{
__TRY_PATTERN__
pat_t *ret = bp_pattern_nl(f, str, false);
__END_TRY_PATTERN__
return ret;
if (ret && after_spaces(ret->end, true) < f->end)
return (maybe_pat_t){.success = false, .value.error.start = ret->end, .value.error.end = f->end, .value.error.msg = "Failed to parse this part of the pattern"};
else if (ret)
return (maybe_pat_t){.success = true, .value.pat = ret};
else
return (maybe_pat_t){.success = false, .value.error.start = str, .value.error.end = f->end, .value.error.msg = "Failed to parse this pattern"};
}
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0

View File

@ -7,18 +7,29 @@
#include "files.h"
#include "types.h"
typedef struct {
bool success;
union {
pat_t *pat;
struct {
const char *start, *end, *msg;
} error;
} value;
} maybe_pat_t;
__attribute__((returns_nonnull, nonnull(1,2)))
pat_t *new_pat(file_t *f, const char *start, const char *end, size_t minlen, ssize_t maxlen, enum pattype_e type);
__attribute__((nonnull))
pat_t *bp_stringpattern(file_t *f, const char *str);
maybe_pat_t bp_stringpattern(file_t *f, const char *str);
__attribute__((nonnull(1,2)))
pat_t *bp_replacement(file_t *f, pat_t *replacepat, const char *replacement);
maybe_pat_t bp_replacement(file_t *f, pat_t *replacepat, const char *replacement);
__attribute__((nonnull(1)))
pat_t *chain_together(file_t *f, pat_t *first, pat_t *second);
__attribute__((nonnull(1)))
pat_t *either_pat(file_t *f, pat_t *first, pat_t *second);
__attribute__((nonnull))
pat_t *bp_pattern(file_t *f, const char *str);
maybe_pat_t bp_pattern(file_t *f, const char *str);
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0

View File

@ -37,7 +37,6 @@ enum pattype_e {
BP_WORD_BOUNDARY = 24,
BP_DEFINITION = 25,
BP_LEFTRECURSION = 26,
BP_ERROR = 27,
};
struct match_s; // forward declared to resolve circular struct defs