Simplified code by disallowing escapes in strings and splitting

stringpatterns into chunks (so unescaped versions don't need to be
created)
This commit is contained in:
Bruce Hill 2021-01-19 21:35:34 -08:00
parent b8bb6c25ec
commit 16c401fbbb
5 changed files with 23 additions and 87 deletions

View File

@ -147,7 +147,6 @@ void destroy_file(file_t **f)
for (allocated_pat_t *next; (*f)->pats; (*f)->pats = next) { for (allocated_pat_t *next; (*f)->pats; (*f)->pats = next) {
next = (*f)->pats->next; next = (*f)->pats->next;
destroy_pat(&(*f)->pats->pat);
xfree(&(*f)->pats); xfree(&(*f)->pats);
} }

View File

@ -225,6 +225,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
case '`': { case '`': {
pat_t *all = NULL; pat_t *all = NULL;
do { do {
const char *charloc = str;
c = *str; c = *str;
if (!c || c == '\n') if (!c || c == '\n')
file_err(f, str, str, "There should be a character here after the '`'"); file_err(f, str, str, "There should be a character here after the '`'");
@ -247,9 +248,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
++str; ++str;
} else { } else {
pat = new_pat(f, opstart, BP_STRING); pat = new_pat(f, opstart, BP_STRING);
char *s = xcalloc(sizeof(char), 2); pat->args.string = charloc;
s[0] = c;
pat->args.string = s;
} }
pat->len = 1; pat->len = 1;
@ -309,28 +308,14 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
case '"': case '\'': case '\002': { case '"': case '\'': case '\002': {
char endquote = c == '\002' ? '\003' : c; char endquote = c == '\002' ? '\003' : c;
char *litstart = (char*)str; char *litstart = (char*)str;
for (; *str && *str != endquote; str++) { for (; *str != endquote; ++str)
if (*str == '\\') { if (str >= f->end)
if (!str[1] || str[1] == '\n') file_err(f, start, str, "This string doesn't have a closing %c-quote.", endquote);
file_err(f, str, str+1, ++str;
"There should be an escape sequence after this backslash.");
++str;
}
}
size_t len = (size_t)(str - litstart);
char *literal = xcalloc(sizeof(char), len+1);
memcpy(literal, litstart, len);
// Note: an unescaped string is guaranteed to be no longer than the
// escaped string, so this is safe to do inplace.
len = unescape_string(literal, literal, len);
pat_t *pat = new_pat(f, start, BP_STRING); pat_t *pat = new_pat(f, start, BP_STRING);
pat->len = (ssize_t)len; pat->len = (ssize_t)((str - 1) - litstart);
pat->args.string = literal; pat->args.string = litstart;
if (!matchchar(&str, endquote))
file_err(f, start, str, "This string doesn't have a closing quote.");
pat->end = str; pat->end = str;
return pat; return pat;
} }
@ -502,43 +487,28 @@ pat_t *bp_stringpattern(file_t *f, const char *str)
while (*str) { while (*str) {
char *start = (char*)str; char *start = (char*)str;
pat_t *interp = NULL; pat_t *interp = NULL;
for (; *str; str++) { for (; str < f->end; str++) {
if (*str == '\\') { if (*str == '\\' && str+1 < f->end) {
if (!str[1] || str[1] == '\n') char e = unescapechar(&str[1], NULL);
file_err(f, str, str, "There should be an escape sequence or pattern here after this backslash."); // If there is not a special escape sequence (\n, \x0A, \N,
// etc.) or \\, then check for an interpolated value:
if (matchchar(&str, 'N')) { // \N (nodent) if (e != str[1] || e == '\\' || e == 'N') {
interp = new_pat(f, str-2, BP_NODENT); interp = bp_simplepattern(f, str);
check(interp, "Failed to match pattern %.*s", 2, str);
break; break;
} } else {
const char *after_escape;
char e = unescapechar(&str[1], &after_escape);
// If there is not a special escape sequence (\n, \x0A, etc.)
// or \\, \", \', \`, then check for an interpolated value:
// The special cases for single and double quotes aren't
// needed, but there's no known legitimate use case for
// interpolating a literal string, and users might escape
// quotes out of paranoia, and we want to support that. String
// literal interpolations can be done with \("...") anyways.
if (e == str[1] && e != '\'' && e != '"' && e != '\\' && e != '`') {
interp = bp_simplepattern(f, str + 1); interp = bp_simplepattern(f, str + 1);
if (interp) break; if (interp) break;
// If there is no interpolated value, this is just a plain ol' regular backslash
} }
str = after_escape - 1; // Otherwise treat as a literal character
} }
} }
// End of string // End of string
size_t len = (size_t)(str - start); ssize_t len = (ssize_t)(str - start);
char *literal = xcalloc(sizeof(char), len+1);
memcpy(literal, start, len);
// Note: an unescaped string is guaranteed to be no longer than the
// escaped string, so this is safe to do inplace.
len = unescape_string(literal, literal, len);
if (len > 0) { if (len > 0) {
pat_t *strop = new_pat(f, str, BP_STRING); pat_t *strop = new_pat(f, str, BP_STRING);
strop->len = (ssize_t)len; strop->len = len;
strop->args.string = literal; strop->args.string = start;
strop->end = str; strop->end = str;
ret = chain_together(f, ret, strop); ret = chain_together(f, ret, strop);
} }
@ -604,13 +574,4 @@ def_t *bp_definition(def_t *defs, file_t *f, const char *str)
return with_def(defs, namelen, name, defpat); return with_def(defs, namelen, name, defpat);
} }
//
// Deallocate memory referenced inside a pattern struct
//
void destroy_pat(pat_t *pat)
{
if (pat->type == BP_STRING)
xfree(&pat->args.string);
}
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1

View File

@ -19,8 +19,6 @@ __attribute__((nonnull))
pat_t *bp_pattern(file_t *f, const char *str); pat_t *bp_pattern(file_t *f, const char *str);
__attribute__((nonnull)) __attribute__((nonnull))
def_t *bp_definition(def_t *defs, file_t *f, const char *str); def_t *bp_definition(def_t *defs, file_t *f, const char *str);
__attribute__((nonnull))
void destroy_pat(pat_t *pat);
#endif #endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1

22
utils.c
View File

@ -114,30 +114,10 @@ char unescapechar(const char *escaped, const char **end)
} }
default: break; default: break;
} }
*end = &escaped[len]; if (end) *end = &escaped[len];
return (char)ret; return (char)ret;
} }
//
// Write an unescaped version of `src` to `dest` (at most bufsize-1 chars,
// terminated by a null byte)
//
size_t unescape_string(char *dest, const char *src, size_t bufsize)
{
size_t len = 0;
while (*src && len < bufsize) {
if (*src == '\\') {
++src;
*(dest++) = unescapechar(src, &src);
} else {
*(dest++) = *(src++);
}
++len;
}
*dest = '\0';
return len;
}
// //
// Fail and exit if a memory value is NULL // Fail and exit if a memory value is NULL
// //

View File

@ -18,7 +18,7 @@
#define xcalloc(a,b) memcheck(calloc(a,b)) #define xcalloc(a,b) memcheck(calloc(a,b))
#define xrealloc(a,b) memcheck(realloc(a,b)) #define xrealloc(a,b) memcheck(realloc(a,b))
__attribute__((nonnull)) __attribute__((nonnull(1)))
char unescapechar(const char *escaped, const char **end); char unescapechar(const char *escaped, const char **end);
__attribute__((pure, nonnull)) __attribute__((pure, nonnull))
const char *after_name(const char *str); const char *after_name(const char *str);
@ -28,8 +28,6 @@ __attribute__((nonnull))
bool matchchar(const char **str, char c); bool matchchar(const char **str, char c);
__attribute__((nonnull)) __attribute__((nonnull))
bool matchstr(const char **str, const char *target); bool matchstr(const char **str, const char *target);
__attribute__((nonnull))
size_t unescape_string(char *dest, const char *src, size_t bufsize);
__attribute__((returns_nonnull)) __attribute__((returns_nonnull))
void *memcheck(/*@null@*/ /*@out@*/ void *p); void *memcheck(/*@null@*/ /*@out@*/ void *p);
__attribute__((nonnull)) __attribute__((nonnull))