From 16c401fbbb4f22a29afe3d63c8105cc8f33061d0 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Tue, 19 Jan 2021 21:35:34 -0800 Subject: [PATCH] Simplified code by disallowing escapes in strings and splitting stringpatterns into chunks (so unescaped versions don't need to be created) --- files.c | 1 - pattern.c | 81 +++++++++++++++---------------------------------------- pattern.h | 2 -- utils.c | 22 +-------------- utils.h | 4 +-- 5 files changed, 23 insertions(+), 87 deletions(-) diff --git a/files.c b/files.c index c554371..0d4664a 100644 --- a/files.c +++ b/files.c @@ -147,7 +147,6 @@ void destroy_file(file_t **f) for (allocated_pat_t *next; (*f)->pats; (*f)->pats = next) { next = (*f)->pats->next; - destroy_pat(&(*f)->pats->pat); xfree(&(*f)->pats); } diff --git a/pattern.c b/pattern.c index 2a054e5..658f132 100644 --- a/pattern.c +++ b/pattern.c @@ -225,6 +225,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) case '`': { pat_t *all = NULL; do { + const char *charloc = str; c = *str; if (!c || c == '\n') file_err(f, str, str, "There should be a character here after the '`'"); @@ -247,9 +248,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) ++str; } else { pat = new_pat(f, opstart, BP_STRING); - char *s = xcalloc(sizeof(char), 2); - s[0] = c; - pat->args.string = s; + pat->args.string = charloc; } pat->len = 1; @@ -309,28 +308,14 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) case '"': case '\'': case '\002': { char endquote = c == '\002' ? '\003' : c; char *litstart = (char*)str; - for (; *str && *str != endquote; str++) { - if (*str == '\\') { - if (!str[1] || str[1] == '\n') - file_err(f, str, str+1, - "There should be an escape sequence after this backslash."); - ++str; - } - } - size_t len = (size_t)(str - litstart); - char *literal = xcalloc(sizeof(char), len+1); - memcpy(literal, litstart, len); - // Note: an unescaped string is guaranteed to be no longer than the - // escaped string, so this is safe to do inplace. - len = unescape_string(literal, literal, len); + for (; *str != endquote; ++str) + if (str >= f->end) + file_err(f, start, str, "This string doesn't have a closing %c-quote.", endquote); + ++str; pat_t *pat = new_pat(f, start, BP_STRING); - pat->len = (ssize_t)len; - pat->args.string = literal; - - if (!matchchar(&str, endquote)) - file_err(f, start, str, "This string doesn't have a closing quote."); - + pat->len = (ssize_t)((str - 1) - litstart); + pat->args.string = litstart; pat->end = str; return pat; } @@ -502,43 +487,28 @@ pat_t *bp_stringpattern(file_t *f, const char *str) while (*str) { char *start = (char*)str; pat_t *interp = NULL; - for (; *str; str++) { - if (*str == '\\') { - if (!str[1] || str[1] == '\n') - file_err(f, str, str, "There should be an escape sequence or pattern here after this backslash."); - - if (matchchar(&str, 'N')) { // \N (nodent) - interp = new_pat(f, str-2, BP_NODENT); + for (; str < f->end; str++) { + if (*str == '\\' && str+1 < f->end) { + char e = unescapechar(&str[1], NULL); + // If there is not a special escape sequence (\n, \x0A, \N, + // etc.) or \\, then check for an interpolated value: + if (e != str[1] || e == '\\' || e == 'N') { + interp = bp_simplepattern(f, str); + check(interp, "Failed to match pattern %.*s", 2, str); break; - } - - const char *after_escape; - char e = unescapechar(&str[1], &after_escape); - // If there is not a special escape sequence (\n, \x0A, etc.) - // or \\, \", \', \`, then check for an interpolated value: - // The special cases for single and double quotes aren't - // needed, but there's no known legitimate use case for - // interpolating a literal string, and users might escape - // quotes out of paranoia, and we want to support that. String - // literal interpolations can be done with \("...") anyways. - if (e == str[1] && e != '\'' && e != '"' && e != '\\' && e != '`') { + } else { interp = bp_simplepattern(f, str + 1); if (interp) break; + // If there is no interpolated value, this is just a plain ol' regular backslash } - str = after_escape - 1; // Otherwise treat as a literal character } } // End of string - size_t len = (size_t)(str - start); - char *literal = xcalloc(sizeof(char), len+1); - memcpy(literal, start, len); - // Note: an unescaped string is guaranteed to be no longer than the - // escaped string, so this is safe to do inplace. - len = unescape_string(literal, literal, len); + ssize_t len = (ssize_t)(str - start); if (len > 0) { pat_t *strop = new_pat(f, str, BP_STRING); - strop->len = (ssize_t)len; - strop->args.string = literal; + strop->len = len; + strop->args.string = start; strop->end = str; ret = chain_together(f, ret, strop); } @@ -604,13 +574,4 @@ def_t *bp_definition(def_t *defs, file_t *f, const char *str) return with_def(defs, namelen, name, defpat); } -// -// Deallocate memory referenced inside a pattern struct -// -void destroy_pat(pat_t *pat) -{ - if (pat->type == BP_STRING) - xfree(&pat->args.string); -} - // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 diff --git a/pattern.h b/pattern.h index 497fe76..bcb9eac 100644 --- a/pattern.h +++ b/pattern.h @@ -19,8 +19,6 @@ __attribute__((nonnull)) pat_t *bp_pattern(file_t *f, const char *str); __attribute__((nonnull)) def_t *bp_definition(def_t *defs, file_t *f, const char *str); -__attribute__((nonnull)) -void destroy_pat(pat_t *pat); #endif // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 diff --git a/utils.c b/utils.c index 08a5b6f..63a06fe 100644 --- a/utils.c +++ b/utils.c @@ -114,30 +114,10 @@ char unescapechar(const char *escaped, const char **end) } default: break; } - *end = &escaped[len]; + if (end) *end = &escaped[len]; return (char)ret; } -// -// Write an unescaped version of `src` to `dest` (at most bufsize-1 chars, -// terminated by a null byte) -// -size_t unescape_string(char *dest, const char *src, size_t bufsize) -{ - size_t len = 0; - while (*src && len < bufsize) { - if (*src == '\\') { - ++src; - *(dest++) = unescapechar(src, &src); - } else { - *(dest++) = *(src++); - } - ++len; - } - *dest = '\0'; - return len; -} - // // Fail and exit if a memory value is NULL // diff --git a/utils.h b/utils.h index 4c3a065..17da86e 100644 --- a/utils.h +++ b/utils.h @@ -18,7 +18,7 @@ #define xcalloc(a,b) memcheck(calloc(a,b)) #define xrealloc(a,b) memcheck(realloc(a,b)) -__attribute__((nonnull)) +__attribute__((nonnull(1))) char unescapechar(const char *escaped, const char **end); __attribute__((pure, nonnull)) const char *after_name(const char *str); @@ -28,8 +28,6 @@ __attribute__((nonnull)) bool matchchar(const char **str, char c); __attribute__((nonnull)) bool matchstr(const char **str, const char *target); -__attribute__((nonnull)) -size_t unescape_string(char *dest, const char *src, size_t bufsize); __attribute__((returns_nonnull)) void *memcheck(/*@null@*/ /*@out@*/ void *p); __attribute__((nonnull))