Simplified code by disallowing escapes in strings and splitting
stringpatterns into chunks (so unescaped versions don't need to be created)
This commit is contained in:
parent
b8bb6c25ec
commit
16c401fbbb
1
files.c
1
files.c
@ -147,7 +147,6 @@ void destroy_file(file_t **f)
|
||||
|
||||
for (allocated_pat_t *next; (*f)->pats; (*f)->pats = next) {
|
||||
next = (*f)->pats->next;
|
||||
destroy_pat(&(*f)->pats->pat);
|
||||
xfree(&(*f)->pats);
|
||||
}
|
||||
|
||||
|
81
pattern.c
81
pattern.c
@ -225,6 +225,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
|
||||
case '`': {
|
||||
pat_t *all = NULL;
|
||||
do {
|
||||
const char *charloc = str;
|
||||
c = *str;
|
||||
if (!c || c == '\n')
|
||||
file_err(f, str, str, "There should be a character here after the '`'");
|
||||
@ -247,9 +248,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
|
||||
++str;
|
||||
} else {
|
||||
pat = new_pat(f, opstart, BP_STRING);
|
||||
char *s = xcalloc(sizeof(char), 2);
|
||||
s[0] = c;
|
||||
pat->args.string = s;
|
||||
pat->args.string = charloc;
|
||||
}
|
||||
|
||||
pat->len = 1;
|
||||
@ -309,28 +308,14 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
|
||||
case '"': case '\'': case '\002': {
|
||||
char endquote = c == '\002' ? '\003' : c;
|
||||
char *litstart = (char*)str;
|
||||
for (; *str && *str != endquote; str++) {
|
||||
if (*str == '\\') {
|
||||
if (!str[1] || str[1] == '\n')
|
||||
file_err(f, str, str+1,
|
||||
"There should be an escape sequence after this backslash.");
|
||||
++str;
|
||||
}
|
||||
}
|
||||
size_t len = (size_t)(str - litstart);
|
||||
char *literal = xcalloc(sizeof(char), len+1);
|
||||
memcpy(literal, litstart, len);
|
||||
// Note: an unescaped string is guaranteed to be no longer than the
|
||||
// escaped string, so this is safe to do inplace.
|
||||
len = unescape_string(literal, literal, len);
|
||||
for (; *str != endquote; ++str)
|
||||
if (str >= f->end)
|
||||
file_err(f, start, str, "This string doesn't have a closing %c-quote.", endquote);
|
||||
++str;
|
||||
|
||||
pat_t *pat = new_pat(f, start, BP_STRING);
|
||||
pat->len = (ssize_t)len;
|
||||
pat->args.string = literal;
|
||||
|
||||
if (!matchchar(&str, endquote))
|
||||
file_err(f, start, str, "This string doesn't have a closing quote.");
|
||||
|
||||
pat->len = (ssize_t)((str - 1) - litstart);
|
||||
pat->args.string = litstart;
|
||||
pat->end = str;
|
||||
return pat;
|
||||
}
|
||||
@ -502,43 +487,28 @@ pat_t *bp_stringpattern(file_t *f, const char *str)
|
||||
while (*str) {
|
||||
char *start = (char*)str;
|
||||
pat_t *interp = NULL;
|
||||
for (; *str; str++) {
|
||||
if (*str == '\\') {
|
||||
if (!str[1] || str[1] == '\n')
|
||||
file_err(f, str, str, "There should be an escape sequence or pattern here after this backslash.");
|
||||
|
||||
if (matchchar(&str, 'N')) { // \N (nodent)
|
||||
interp = new_pat(f, str-2, BP_NODENT);
|
||||
for (; str < f->end; str++) {
|
||||
if (*str == '\\' && str+1 < f->end) {
|
||||
char e = unescapechar(&str[1], NULL);
|
||||
// If there is not a special escape sequence (\n, \x0A, \N,
|
||||
// etc.) or \\, then check for an interpolated value:
|
||||
if (e != str[1] || e == '\\' || e == 'N') {
|
||||
interp = bp_simplepattern(f, str);
|
||||
check(interp, "Failed to match pattern %.*s", 2, str);
|
||||
break;
|
||||
}
|
||||
|
||||
const char *after_escape;
|
||||
char e = unescapechar(&str[1], &after_escape);
|
||||
// If there is not a special escape sequence (\n, \x0A, etc.)
|
||||
// or \\, \", \', \`, then check for an interpolated value:
|
||||
// The special cases for single and double quotes aren't
|
||||
// needed, but there's no known legitimate use case for
|
||||
// interpolating a literal string, and users might escape
|
||||
// quotes out of paranoia, and we want to support that. String
|
||||
// literal interpolations can be done with \("...") anyways.
|
||||
if (e == str[1] && e != '\'' && e != '"' && e != '\\' && e != '`') {
|
||||
} else {
|
||||
interp = bp_simplepattern(f, str + 1);
|
||||
if (interp) break;
|
||||
// If there is no interpolated value, this is just a plain ol' regular backslash
|
||||
}
|
||||
str = after_escape - 1; // Otherwise treat as a literal character
|
||||
}
|
||||
}
|
||||
// End of string
|
||||
size_t len = (size_t)(str - start);
|
||||
char *literal = xcalloc(sizeof(char), len+1);
|
||||
memcpy(literal, start, len);
|
||||
// Note: an unescaped string is guaranteed to be no longer than the
|
||||
// escaped string, so this is safe to do inplace.
|
||||
len = unescape_string(literal, literal, len);
|
||||
ssize_t len = (ssize_t)(str - start);
|
||||
if (len > 0) {
|
||||
pat_t *strop = new_pat(f, str, BP_STRING);
|
||||
strop->len = (ssize_t)len;
|
||||
strop->args.string = literal;
|
||||
strop->len = len;
|
||||
strop->args.string = start;
|
||||
strop->end = str;
|
||||
ret = chain_together(f, ret, strop);
|
||||
}
|
||||
@ -604,13 +574,4 @@ def_t *bp_definition(def_t *defs, file_t *f, const char *str)
|
||||
return with_def(defs, namelen, name, defpat);
|
||||
}
|
||||
|
||||
//
|
||||
// Deallocate memory referenced inside a pattern struct
|
||||
//
|
||||
void destroy_pat(pat_t *pat)
|
||||
{
|
||||
if (pat->type == BP_STRING)
|
||||
xfree(&pat->args.string);
|
||||
}
|
||||
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
||||
|
@ -19,8 +19,6 @@ __attribute__((nonnull))
|
||||
pat_t *bp_pattern(file_t *f, const char *str);
|
||||
__attribute__((nonnull))
|
||||
def_t *bp_definition(def_t *defs, file_t *f, const char *str);
|
||||
__attribute__((nonnull))
|
||||
void destroy_pat(pat_t *pat);
|
||||
|
||||
#endif
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
||||
|
22
utils.c
22
utils.c
@ -114,30 +114,10 @@ char unescapechar(const char *escaped, const char **end)
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
*end = &escaped[len];
|
||||
if (end) *end = &escaped[len];
|
||||
return (char)ret;
|
||||
}
|
||||
|
||||
//
|
||||
// Write an unescaped version of `src` to `dest` (at most bufsize-1 chars,
|
||||
// terminated by a null byte)
|
||||
//
|
||||
size_t unescape_string(char *dest, const char *src, size_t bufsize)
|
||||
{
|
||||
size_t len = 0;
|
||||
while (*src && len < bufsize) {
|
||||
if (*src == '\\') {
|
||||
++src;
|
||||
*(dest++) = unescapechar(src, &src);
|
||||
} else {
|
||||
*(dest++) = *(src++);
|
||||
}
|
||||
++len;
|
||||
}
|
||||
*dest = '\0';
|
||||
return len;
|
||||
}
|
||||
|
||||
//
|
||||
// Fail and exit if a memory value is NULL
|
||||
//
|
||||
|
4
utils.h
4
utils.h
@ -18,7 +18,7 @@
|
||||
#define xcalloc(a,b) memcheck(calloc(a,b))
|
||||
#define xrealloc(a,b) memcheck(realloc(a,b))
|
||||
|
||||
__attribute__((nonnull))
|
||||
__attribute__((nonnull(1)))
|
||||
char unescapechar(const char *escaped, const char **end);
|
||||
__attribute__((pure, nonnull))
|
||||
const char *after_name(const char *str);
|
||||
@ -28,8 +28,6 @@ __attribute__((nonnull))
|
||||
bool matchchar(const char **str, char c);
|
||||
__attribute__((nonnull))
|
||||
bool matchstr(const char **str, const char *target);
|
||||
__attribute__((nonnull))
|
||||
size_t unescape_string(char *dest, const char *src, size_t bufsize);
|
||||
__attribute__((returns_nonnull))
|
||||
void *memcheck(/*@null@*/ /*@out@*/ void *p);
|
||||
__attribute__((nonnull))
|
||||
|
Loading…
Reference in New Issue
Block a user