Hardening utils (added *end param to avoid going past the end of
unterminated strings)
This commit is contained in:
parent
46fa856529
commit
a61efe2cf0
10
match.c
10
match.c
@ -911,10 +911,10 @@ void fprint_match(FILE *out, const char *file_start, match_t *m, print_options_t
|
||||
int n = (int)strtol(next, (char**)&next, 10);
|
||||
cap = get_numbered_capture(m->children[0], n);
|
||||
} else {
|
||||
const char *name = next, *end = after_name(next);
|
||||
if (end > name) {
|
||||
cap = get_named_capture(m->children[0], name, (size_t)(end - name));
|
||||
next = end;
|
||||
const char *name = next, *name_end = after_name(next, end);
|
||||
if (name_end) {
|
||||
cap = get_named_capture(m->children[0], name, (size_t)(name_end - name));
|
||||
next = name_end;
|
||||
if (next < m->end && *next == ';') ++next;
|
||||
}
|
||||
}
|
||||
@ -941,7 +941,7 @@ void fprint_match(FILE *out, const char *file_start, match_t *m, print_options_t
|
||||
fputc(*p, out);
|
||||
continue;
|
||||
}
|
||||
fputc_safe(out, unescapechar(r, &r), opts);
|
||||
fputc_safe(out, unescapechar(r, &r, end), opts);
|
||||
} else {
|
||||
fputc_safe(out, *r, opts);
|
||||
++r;
|
||||
|
68
pattern.c
68
pattern.c
@ -90,7 +90,7 @@ static pat_t *new_range(const char *start, const char *end, size_t min, ssize_t
|
||||
__attribute__((nonnull))
|
||||
static pat_t *expand_chain(pat_t *first, const char *end, bool allow_nl)
|
||||
{
|
||||
const char *str = after_spaces(first->end, allow_nl);
|
||||
const char *str = after_spaces(first->end, allow_nl, end);
|
||||
pat_t *second = bp_simplepattern(str, end);
|
||||
if (second == NULL) return first;
|
||||
second = expand_chain(second, end, allow_nl);
|
||||
@ -104,11 +104,11 @@ __attribute__((nonnull))
|
||||
static pat_t *expand_replacements(pat_t *replace_pat, const char *end, bool allow_nl)
|
||||
{
|
||||
const char *str = replace_pat->end;
|
||||
while (matchstr(&str, "=>", allow_nl)) {
|
||||
while (matchstr(&str, "=>", allow_nl, end)) {
|
||||
const char *repstr;
|
||||
size_t replen;
|
||||
if (matchchar(&str, '"', allow_nl) || matchchar(&str, '\'', allow_nl)
|
||||
|| matchchar(&str, '{', allow_nl) || matchchar(&str, '\002', allow_nl)) {
|
||||
if (matchchar(&str, '"', allow_nl, end) || matchchar(&str, '\'', allow_nl, end)
|
||||
|| matchchar(&str, '{', allow_nl, end) || matchchar(&str, '\002', allow_nl, end)) {
|
||||
char closequote = str[-1] == '{' ? '}' : (str[-1] == '\002' ? '\003' : str[-1]);
|
||||
repstr = str;
|
||||
for (; str < end && *str != closequote; str = next_char(str, end)) {
|
||||
@ -120,7 +120,7 @@ static pat_t *expand_replacements(pat_t *replace_pat, const char *end, bool allo
|
||||
}
|
||||
}
|
||||
replen = (size_t)(str-repstr);
|
||||
(void)matchchar(&str, closequote, true);
|
||||
(void)matchchar(&str, closequote, true, end);
|
||||
} else {
|
||||
repstr = "";
|
||||
replen = 0;
|
||||
@ -147,11 +147,11 @@ static pat_t *expand_choices(pat_t *first, const char *end, bool allow_nl)
|
||||
first = expand_chain(first, end, allow_nl);
|
||||
first = expand_replacements(first, end, allow_nl);
|
||||
const char *str = first->end;
|
||||
if (!matchchar(&str, '/', allow_nl)) return first;
|
||||
str = after_spaces(str, allow_nl);
|
||||
if (!matchchar(&str, '/', allow_nl, end)) return first;
|
||||
str = after_spaces(str, allow_nl, end);
|
||||
pat_t *second = bp_simplepattern(str, end);
|
||||
if (second) str = second->end;
|
||||
if (matchstr(&str, "=>", allow_nl))
|
||||
if (matchstr(&str, "=>", allow_nl, end))
|
||||
second = expand_replacements(second ? second : new_pat(BP_STRING, str-2, str-2, 0, 0), end, allow_nl);
|
||||
if (!second)
|
||||
parse_err(str, str, "There should be a pattern here after a '/'");
|
||||
@ -225,18 +225,18 @@ __attribute__((nonnull))
|
||||
static pat_t *_bp_definition(const char *start, const char *end)
|
||||
{
|
||||
if (start >= end || !(isalpha(*start) || *start == '_')) return NULL;
|
||||
const char *str = after_name(start);
|
||||
const char *str = after_name(start, end);
|
||||
size_t namelen = (size_t)(str - start);
|
||||
if (!matchchar(&str, ':', false)) return NULL;
|
||||
if (!matchchar(&str, ':', false, end)) return NULL;
|
||||
pat_t *def = bp_pattern_nl(str, end, false);
|
||||
if (!def) parse_err(str, end, "Could not parse this definition.");
|
||||
str = def->end;
|
||||
(void)matchchar(&str, ';', false); // Optional semicolon
|
||||
(void)matchchar(&str, ';', false, end); // Optional semicolon
|
||||
pat_t *ret = new_pat(BP_DEFINITIONS, start, str, 0, -1);
|
||||
ret->args.def.name = start;
|
||||
ret->args.def.namelen = namelen;
|
||||
ret->args.def.meaning = def;
|
||||
ret->args.def.next_def = _bp_definition(after_spaces(str, true), end);
|
||||
ret->args.def.next_def = _bp_definition(after_spaces(str, true, end), end);
|
||||
if (ret->args.def.next_def)
|
||||
ret->end = ret->args.def.next_def->end;
|
||||
return ret;
|
||||
@ -248,7 +248,7 @@ static pat_t *_bp_definition(const char *start, const char *end)
|
||||
__attribute__((nonnull))
|
||||
static pat_t *_bp_simplepattern(const char *str, const char *end)
|
||||
{
|
||||
str = after_spaces(str, false);
|
||||
str = after_spaces(str, false, end);
|
||||
if (!*str) return NULL;
|
||||
const char *start = str;
|
||||
char c = *str;
|
||||
@ -260,11 +260,11 @@ static pat_t *_bp_simplepattern(const char *str, const char *end)
|
||||
str = next_char(str, end);
|
||||
enum pattype_e type = BP_UPTO;
|
||||
pat_t *extra_arg = NULL;
|
||||
if (matchchar(&str, '%', false)) {
|
||||
if (matchchar(&str, '%', false, end)) {
|
||||
extra_arg = bp_simplepattern(str, end);
|
||||
if (!extra_arg)
|
||||
parse_err(str, str, "There should be a pattern to skip here after the '%'");
|
||||
} else if (matchchar(&str, '=', false)) {
|
||||
} else if (matchchar(&str, '=', false, end)) {
|
||||
extra_arg = bp_simplepattern(str, end);
|
||||
if (!extra_arg)
|
||||
parse_err(str, str, "There should be a pattern here after the '='");
|
||||
@ -336,7 +336,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end)
|
||||
}
|
||||
|
||||
const char *opstart = str;
|
||||
unsigned char e_low = (unsigned char)unescapechar(str, &str);
|
||||
unsigned char e_low = (unsigned char)unescapechar(str, &str, end);
|
||||
if (str == opstart)
|
||||
parse_err(start, str+1, "This isn't a valid escape sequence.");
|
||||
unsigned char e_high = e_low;
|
||||
@ -345,7 +345,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end)
|
||||
if (next_char(str, end) != str+1)
|
||||
parse_err(start, next_char(str, end), "Sorry, UTF8 escape sequences are not supported in ranges.");
|
||||
const char *seqstart = str;
|
||||
e_high = (unsigned char)unescapechar(str, &str);
|
||||
e_high = (unsigned char)unescapechar(str, &str, end);
|
||||
if (str == seqstart)
|
||||
parse_err(seqstart, str+1, "This value isn't a valid escape sequence");
|
||||
if (e_high < e_low)
|
||||
@ -391,13 +391,13 @@ static pat_t *_bp_simplepattern(const char *str, const char *end)
|
||||
ssize_t max = -1;
|
||||
--str;
|
||||
long n1 = strtol(str, (char**)&str, 10);
|
||||
if (matchchar(&str, '-', false)) {
|
||||
str = after_spaces(str, false);
|
||||
if (matchchar(&str, '-', false, end)) {
|
||||
str = after_spaces(str, false, end);
|
||||
const char *numstart = str;
|
||||
long n2 = strtol(str, (char**)&str, 10);
|
||||
if (str == numstart) min = 0, max = (ssize_t)n1;
|
||||
else min = (size_t)n1, max = (ssize_t)n2;
|
||||
} else if (matchchar(&str, '+', false)) {
|
||||
} else if (matchchar(&str, '+', false, end)) {
|
||||
min = (size_t)n1, max = -1;
|
||||
} else {
|
||||
min = (size_t)n1, max = (ssize_t)n1;
|
||||
@ -407,7 +407,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end)
|
||||
parse_err(str, str, "There should be a pattern after this repetition count.");
|
||||
str = repeating->end;
|
||||
pat_t *sep = NULL;
|
||||
if (matchchar(&str, '%', false)) {
|
||||
if (matchchar(&str, '%', false, end)) {
|
||||
sep = bp_simplepattern(str, end);
|
||||
if (!sep)
|
||||
parse_err(str, str, "There should be a separator pattern after this '%%'");
|
||||
@ -444,7 +444,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end)
|
||||
if (!pat)
|
||||
parse_err(str, str, "There should be a valid pattern after this parenthesis.");
|
||||
str = pat->end;
|
||||
if (!matchchar(&str, ')', true)) parse_err(str, str, "Missing paren: )");
|
||||
if (!matchchar(&str, ')', true, end)) parse_err(str, str, "Missing paren: )");
|
||||
pat->start = start;
|
||||
pat->end = str;
|
||||
return pat;
|
||||
@ -455,7 +455,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end)
|
||||
if (!maybe)
|
||||
parse_err(str, str, "There should be a valid pattern after this square bracket.");
|
||||
str = maybe->end;
|
||||
(void)matchchar(&str, ']', true);
|
||||
(void)matchchar(&str, ']', true, end);
|
||||
return new_range(start, str, 0, 1, maybe, NULL);
|
||||
}
|
||||
// Repeating
|
||||
@ -466,7 +466,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end)
|
||||
parse_err(str, str, "There should be a valid pattern to repeat here");
|
||||
str = repeating->end;
|
||||
pat_t *sep = NULL;
|
||||
if (matchchar(&str, '%', false)) {
|
||||
if (matchchar(&str, '%', false, end)) {
|
||||
sep = bp_simplepattern(str, end);
|
||||
if (!sep)
|
||||
parse_err(str, str, "There should be a separator pattern after the '%%' here.");
|
||||
@ -478,9 +478,9 @@ static pat_t *_bp_simplepattern(const char *str, const char *end)
|
||||
case '@': {
|
||||
const char *name = NULL;
|
||||
size_t namelen = 0;
|
||||
const char *a = after_name(str);
|
||||
const char *a = after_name(str, end);
|
||||
const char *eq = a;
|
||||
if (a > str && !matchstr(&eq, "=>", false) && matchchar(&eq, '=', false)) {
|
||||
if (a > str && !matchstr(&eq, "=>", false, end) && matchchar(&eq, '=', false, end)) {
|
||||
name = str;
|
||||
namelen = (size_t)(a-str);
|
||||
str = eq;
|
||||
@ -512,7 +512,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end)
|
||||
if (def) return def;
|
||||
// Reference
|
||||
if (!isalpha(c) && c != '_') return NULL;
|
||||
str = after_name(start);
|
||||
str = after_name(start, end);
|
||||
size_t namelen = (size_t)(str - start);
|
||||
pat_t *ref = new_pat(BP_REF, start, str, 0, -1);
|
||||
ref->args.ref.name = start;
|
||||
@ -553,7 +553,7 @@ maybe_pat_t bp_stringpattern(const char *str, const char *end)
|
||||
ret = chain_together(ret, interp);
|
||||
str = interp->end;
|
||||
// allow terminating seq
|
||||
(void)matchchar(&str, ';', false);
|
||||
(void)matchchar(&str, ';', false, end);
|
||||
}
|
||||
}
|
||||
if (!ret) ret = new_pat(BP_STRING, str, str, 0, 0);
|
||||
@ -573,9 +573,9 @@ static pat_t *bp_simplepattern(const char *str, const char *end)
|
||||
// Expand postfix operators (if any)
|
||||
while (str < end) {
|
||||
enum pattype_e type;
|
||||
if (matchchar(&str, '~', false))
|
||||
if (matchchar(&str, '~', false, end))
|
||||
type = BP_MATCH;
|
||||
else if (matchstr(&str, "!~", false))
|
||||
else if (matchstr(&str, "!~", false, end))
|
||||
type = BP_NOT_MATCH;
|
||||
else break;
|
||||
|
||||
@ -621,10 +621,10 @@ maybe_pat_t bp_replacement(pat_t *replacepat, const char *replacement, const cha
|
||||
|
||||
static pat_t *bp_pattern_nl(const char *str, const char *end, bool allow_nl)
|
||||
{
|
||||
str = after_spaces(str, allow_nl);
|
||||
str = after_spaces(str, allow_nl, end);
|
||||
pat_t *pat = bp_simplepattern(str, end);
|
||||
if (pat != NULL) pat = expand_choices(pat, end, allow_nl);
|
||||
if (matchstr(&str, "=>", allow_nl))
|
||||
if (matchstr(&str, "=>", allow_nl, end))
|
||||
pat = expand_replacements(pat ? pat : new_pat(BP_STRING, str-2, str-2, 0, 0), end, allow_nl);
|
||||
return pat;
|
||||
}
|
||||
@ -644,11 +644,11 @@ pat_t *bp_raw_literal(const char *str, size_t len)
|
||||
//
|
||||
maybe_pat_t bp_pattern(const char *str, const char *end)
|
||||
{
|
||||
str = after_spaces(str, true);
|
||||
str = after_spaces(str, true, end);
|
||||
__TRY_PATTERN__
|
||||
pat_t *ret = bp_pattern_nl(str, end, false);
|
||||
__END_TRY_PATTERN__
|
||||
if (ret && after_spaces(ret->end, true) < end)
|
||||
if (ret && after_spaces(ret->end, true, end) < end)
|
||||
return (maybe_pat_t){.success = false, .value.error.start = ret->end, .value.error.end = end, .value.error.msg = "Failed to parse this part of the pattern"};
|
||||
else if (ret)
|
||||
return (maybe_pat_t){.success = true, .value.pat = ret};
|
||||
|
48
utils.c
48
utils.c
@ -15,10 +15,11 @@
|
||||
// Helper function to skip past all spaces (and comments)
|
||||
// Returns a pointer to the first non-space character.
|
||||
//
|
||||
const char *after_spaces(const char *str, bool skip_nl)
|
||||
const char *after_spaces(const char *str, bool skip_nl, const char *end)
|
||||
{
|
||||
// Skip whitespace and comments:
|
||||
skip_whitespace:
|
||||
if (str >= end) return str;
|
||||
switch (*str) {
|
||||
case '\r': case '\n':
|
||||
if (!skip_nl) break;
|
||||
@ -28,7 +29,7 @@ const char *after_spaces(const char *str, bool skip_nl)
|
||||
goto skip_whitespace;
|
||||
}
|
||||
case '#': {
|
||||
while (*str && *str != '\n') ++str;
|
||||
while (str < end && *str != '\n') ++str;
|
||||
goto skip_whitespace;
|
||||
}
|
||||
default: break;
|
||||
@ -40,14 +41,15 @@ const char *after_spaces(const char *str, bool skip_nl)
|
||||
// Return the first character after a valid BP name, or NULL if none is
|
||||
// found.
|
||||
//
|
||||
const char *after_name(const char *str)
|
||||
const char *after_name(const char *str, const char *end)
|
||||
{
|
||||
if (str >= end) return NULL;
|
||||
if (*str == '|') return &str[1];
|
||||
if (*str == '^' || *str == '_' || *str == '$') {
|
||||
return (str[1] == *str) ? &str[2] : &str[1];
|
||||
return (&str[1] < end && str[1] == *str) ? &str[2] : &str[1];
|
||||
}
|
||||
if (!isalpha(*str)) return NULL;
|
||||
for (++str; *str; ++str) {
|
||||
for (++str; str < end; ++str) {
|
||||
if (!(isalnum(*str) || *str == '-'))
|
||||
break;
|
||||
}
|
||||
@ -57,9 +59,10 @@ const char *after_name(const char *str)
|
||||
//
|
||||
// Check if a character is found and if so, move past it.
|
||||
//
|
||||
bool matchchar(const char **str, char c, bool skip_nl)
|
||||
bool matchchar(const char **str, char c, bool skip_nl, const char *end)
|
||||
{
|
||||
const char *next = after_spaces(*str, skip_nl);
|
||||
const char *next = after_spaces(*str, skip_nl, end);
|
||||
if (next >= end) return false;
|
||||
if (*next == c) {
|
||||
*str = next + 1;
|
||||
return true;
|
||||
@ -70,9 +73,10 @@ bool matchchar(const char **str, char c, bool skip_nl)
|
||||
//
|
||||
// Check if a string is found and if so, move past it.
|
||||
//
|
||||
bool matchstr(const char **str, const char *target, bool skip_nl)
|
||||
bool matchstr(const char **str, const char *target, bool skip_nl, const char *end)
|
||||
{
|
||||
const char *next = after_spaces(*str, skip_nl);
|
||||
const char *next = after_spaces(*str, skip_nl, end);
|
||||
if (next + strlen(target) > end) return false;
|
||||
if (strncmp(next, target, strlen(target)) == 0) {
|
||||
*str = &next[strlen(target)];
|
||||
return true;
|
||||
@ -85,10 +89,13 @@ bool matchstr(const char **str, const char *target, bool skip_nl)
|
||||
// character that was escaped.
|
||||
// Set *end = the first character past the end of the escape sequence.
|
||||
//
|
||||
char unescapechar(const char *escaped, const char **end)
|
||||
char unescapechar(const char *escaped, const char **after, const char *end)
|
||||
{
|
||||
size_t len = 1;
|
||||
unsigned char ret = (unsigned char)*escaped;
|
||||
size_t len = 0;
|
||||
unsigned char ret = '\\';
|
||||
if (escaped >= end) goto finished;
|
||||
ret = (unsigned char)*escaped;
|
||||
++len;
|
||||
switch (*escaped) {
|
||||
case 'a': ret = '\a'; break; case 'b': ret = '\b'; break;
|
||||
case 'n': ret = '\n'; break; case 'r': ret = '\r'; break;
|
||||
@ -101,7 +108,10 @@ char unescapechar(const char *escaped, const char **end)
|
||||
['a']=0xa, ['b']=0xb, ['c']=0xc, ['d']=0xd, ['e']=0xe, ['f']=0xf,
|
||||
['A']=0xa, ['B']=0xb, ['C']=0xc, ['D']=0xd, ['E']=0xe, ['F']=0xf,
|
||||
};
|
||||
if (hextable[(int)escaped[1]] && hextable[(int)escaped[2]]) {
|
||||
if (escaped + 2 >= end) {
|
||||
len = 0;
|
||||
goto finished;
|
||||
} else if (hextable[(int)escaped[1]] && hextable[(int)escaped[2]]) {
|
||||
ret = (hextable[(int)escaped[1]] << 4) | (hextable[(int)escaped[2]] & 0xF);
|
||||
len = 3;
|
||||
}
|
||||
@ -109,7 +119,10 @@ char unescapechar(const char *escaped, const char **end)
|
||||
}
|
||||
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { // Octal
|
||||
ret = (unsigned char)(escaped[0] - '0');
|
||||
if ('0' <= escaped[1] && escaped[1] <= '7') {
|
||||
if (escaped + 2 >= end) {
|
||||
len = 0;
|
||||
goto finished;
|
||||
} else if ('0' <= escaped[1] && escaped[1] <= '7') {
|
||||
++len;
|
||||
ret = (ret << 3) | (escaped[1] - '0');
|
||||
if ('0' <= escaped[2] && escaped[2] <= '7') {
|
||||
@ -120,10 +133,11 @@ char unescapechar(const char *escaped, const char **end)
|
||||
break;
|
||||
}
|
||||
default:
|
||||
if (end) *end = escaped;
|
||||
return '\\';
|
||||
len = 0;
|
||||
goto finished;
|
||||
}
|
||||
if (end) *end = &escaped[len];
|
||||
finished:
|
||||
if (after) *after = &escaped[len];
|
||||
return (char)ret;
|
||||
}
|
||||
|
||||
|
12
utils.h
12
utils.h
@ -12,8 +12,6 @@
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "match.h"
|
||||
|
||||
#define S1(x) #x
|
||||
#define S2(x) S1(x)
|
||||
#define __LOCATION__ __FILE__ ":" S2(__LINE__)
|
||||
@ -48,15 +46,15 @@ DEFINE_CHECK_TYPE(_Bool, bool, b, b);
|
||||
#define streq(a, b) (strcmp(a, b) == 0)
|
||||
|
||||
__attribute__((nonnull(1)))
|
||||
char unescapechar(const char *escaped, const char **end);
|
||||
char unescapechar(const char *escaped, const char **after, const char *end);
|
||||
__attribute__((pure, nonnull))
|
||||
const char *after_name(const char *str);
|
||||
const char *after_name(const char *str, const char *end);
|
||||
__attribute__((pure, nonnull, returns_nonnull))
|
||||
const char *after_spaces(const char *str, bool skip_nl);
|
||||
const char *after_spaces(const char *str, bool skip_nl, const char *end);
|
||||
__attribute__((nonnull))
|
||||
bool matchchar(const char **str, char c, bool skip_nl);
|
||||
bool matchchar(const char **str, char c, bool skip_nl, const char *end);
|
||||
__attribute__((nonnull))
|
||||
bool matchstr(const char **str, const char *target, bool skip_nl);
|
||||
bool matchstr(const char **str, const char *target, bool skip_nl, const char *end);
|
||||
__attribute__((nonnull))
|
||||
void delete(void *p);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user