diff options
| -rw-r--r-- | match.c | 10 | ||||
| -rw-r--r-- | pattern.c | 68 | ||||
| -rw-r--r-- | utils.c | 48 | ||||
| -rw-r--r-- | utils.h | 12 |
4 files changed, 75 insertions, 63 deletions
@@ -911,10 +911,10 @@ void fprint_match(FILE *out, const char *file_start, match_t *m, print_options_t int n = (int)strtol(next, (char**)&next, 10); cap = get_numbered_capture(m->children[0], n); } else { - const char *name = next, *end = after_name(next); - if (end > name) { - cap = get_named_capture(m->children[0], name, (size_t)(end - name)); - next = end; + const char *name = next, *name_end = after_name(next, end); + if (name_end) { + cap = get_named_capture(m->children[0], name, (size_t)(name_end - name)); + next = name_end; if (next < m->end && *next == ';') ++next; } } @@ -941,7 +941,7 @@ void fprint_match(FILE *out, const char *file_start, match_t *m, print_options_t fputc(*p, out); continue; } - fputc_safe(out, unescapechar(r, &r), opts); + fputc_safe(out, unescapechar(r, &r, end), opts); } else { fputc_safe(out, *r, opts); ++r; @@ -90,7 +90,7 @@ static pat_t *new_range(const char *start, const char *end, size_t min, ssize_t __attribute__((nonnull)) static pat_t *expand_chain(pat_t *first, const char *end, bool allow_nl) { - const char *str = after_spaces(first->end, allow_nl); + const char *str = after_spaces(first->end, allow_nl, end); pat_t *second = bp_simplepattern(str, end); if (second == NULL) return first; second = expand_chain(second, end, allow_nl); @@ -104,11 +104,11 @@ __attribute__((nonnull)) static pat_t *expand_replacements(pat_t *replace_pat, const char *end, bool allow_nl) { const char *str = replace_pat->end; - while (matchstr(&str, "=>", allow_nl)) { + while (matchstr(&str, "=>", allow_nl, end)) { const char *repstr; size_t replen; - if (matchchar(&str, '"', allow_nl) || matchchar(&str, '\'', allow_nl) - || matchchar(&str, '{', allow_nl) || matchchar(&str, '\002', allow_nl)) { + if (matchchar(&str, '"', allow_nl, end) || matchchar(&str, '\'', allow_nl, end) + || matchchar(&str, '{', allow_nl, end) || matchchar(&str, '\002', allow_nl, end)) { char closequote = str[-1] == '{' ? '}' : (str[-1] == '\002' ? '\003' : str[-1]); repstr = str; for (; str < end && *str != closequote; str = next_char(str, end)) { @@ -120,7 +120,7 @@ static pat_t *expand_replacements(pat_t *replace_pat, const char *end, bool allo } } replen = (size_t)(str-repstr); - (void)matchchar(&str, closequote, true); + (void)matchchar(&str, closequote, true, end); } else { repstr = ""; replen = 0; @@ -147,11 +147,11 @@ static pat_t *expand_choices(pat_t *first, const char *end, bool allow_nl) first = expand_chain(first, end, allow_nl); first = expand_replacements(first, end, allow_nl); const char *str = first->end; - if (!matchchar(&str, '/', allow_nl)) return first; - str = after_spaces(str, allow_nl); + if (!matchchar(&str, '/', allow_nl, end)) return first; + str = after_spaces(str, allow_nl, end); pat_t *second = bp_simplepattern(str, end); if (second) str = second->end; - if (matchstr(&str, "=>", allow_nl)) + if (matchstr(&str, "=>", allow_nl, end)) second = expand_replacements(second ? second : new_pat(BP_STRING, str-2, str-2, 0, 0), end, allow_nl); if (!second) parse_err(str, str, "There should be a pattern here after a '/'"); @@ -225,18 +225,18 @@ __attribute__((nonnull)) static pat_t *_bp_definition(const char *start, const char *end) { if (start >= end || !(isalpha(*start) || *start == '_')) return NULL; - const char *str = after_name(start); + const char *str = after_name(start, end); size_t namelen = (size_t)(str - start); - if (!matchchar(&str, ':', false)) return NULL; + if (!matchchar(&str, ':', false, end)) return NULL; pat_t *def = bp_pattern_nl(str, end, false); if (!def) parse_err(str, end, "Could not parse this definition."); str = def->end; - (void)matchchar(&str, ';', false); // Optional semicolon + (void)matchchar(&str, ';', false, end); // Optional semicolon pat_t *ret = new_pat(BP_DEFINITIONS, start, str, 0, -1); ret->args.def.name = start; ret->args.def.namelen = namelen; ret->args.def.meaning = def; - ret->args.def.next_def = _bp_definition(after_spaces(str, true), end); + ret->args.def.next_def = _bp_definition(after_spaces(str, true, end), end); if (ret->args.def.next_def) ret->end = ret->args.def.next_def->end; return ret; @@ -248,7 +248,7 @@ static pat_t *_bp_definition(const char *start, const char *end) __attribute__((nonnull)) static pat_t *_bp_simplepattern(const char *str, const char *end) { - str = after_spaces(str, false); + str = after_spaces(str, false, end); if (!*str) return NULL; const char *start = str; char c = *str; @@ -260,11 +260,11 @@ static pat_t *_bp_simplepattern(const char *str, const char *end) str = next_char(str, end); enum pattype_e type = BP_UPTO; pat_t *extra_arg = NULL; - if (matchchar(&str, '%', false)) { + if (matchchar(&str, '%', false, end)) { extra_arg = bp_simplepattern(str, end); if (!extra_arg) parse_err(str, str, "There should be a pattern to skip here after the '%'"); - } else if (matchchar(&str, '=', false)) { + } else if (matchchar(&str, '=', false, end)) { extra_arg = bp_simplepattern(str, end); if (!extra_arg) parse_err(str, str, "There should be a pattern here after the '='"); @@ -336,7 +336,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end) } const char *opstart = str; - unsigned char e_low = (unsigned char)unescapechar(str, &str); + unsigned char e_low = (unsigned char)unescapechar(str, &str, end); if (str == opstart) parse_err(start, str+1, "This isn't a valid escape sequence."); unsigned char e_high = e_low; @@ -345,7 +345,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end) if (next_char(str, end) != str+1) parse_err(start, next_char(str, end), "Sorry, UTF8 escape sequences are not supported in ranges."); const char *seqstart = str; - e_high = (unsigned char)unescapechar(str, &str); + e_high = (unsigned char)unescapechar(str, &str, end); if (str == seqstart) parse_err(seqstart, str+1, "This value isn't a valid escape sequence"); if (e_high < e_low) @@ -391,13 +391,13 @@ static pat_t *_bp_simplepattern(const char *str, const char *end) ssize_t max = -1; --str; long n1 = strtol(str, (char**)&str, 10); - if (matchchar(&str, '-', false)) { - str = after_spaces(str, false); + if (matchchar(&str, '-', false, end)) { + str = after_spaces(str, false, end); const char *numstart = str; long n2 = strtol(str, (char**)&str, 10); if (str == numstart) min = 0, max = (ssize_t)n1; else min = (size_t)n1, max = (ssize_t)n2; - } else if (matchchar(&str, '+', false)) { + } else if (matchchar(&str, '+', false, end)) { min = (size_t)n1, max = -1; } else { min = (size_t)n1, max = (ssize_t)n1; @@ -407,7 +407,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end) parse_err(str, str, "There should be a pattern after this repetition count."); str = repeating->end; pat_t *sep = NULL; - if (matchchar(&str, '%', false)) { + if (matchchar(&str, '%', false, end)) { sep = bp_simplepattern(str, end); if (!sep) parse_err(str, str, "There should be a separator pattern after this '%%'"); @@ -444,7 +444,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end) if (!pat) parse_err(str, str, "There should be a valid pattern after this parenthesis."); str = pat->end; - if (!matchchar(&str, ')', true)) parse_err(str, str, "Missing paren: )"); + if (!matchchar(&str, ')', true, end)) parse_err(str, str, "Missing paren: )"); pat->start = start; pat->end = str; return pat; @@ -455,7 +455,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end) if (!maybe) parse_err(str, str, "There should be a valid pattern after this square bracket."); str = maybe->end; - (void)matchchar(&str, ']', true); + (void)matchchar(&str, ']', true, end); return new_range(start, str, 0, 1, maybe, NULL); } // Repeating @@ -466,7 +466,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end) parse_err(str, str, "There should be a valid pattern to repeat here"); str = repeating->end; pat_t *sep = NULL; - if (matchchar(&str, '%', false)) { + if (matchchar(&str, '%', false, end)) { sep = bp_simplepattern(str, end); if (!sep) parse_err(str, str, "There should be a separator pattern after the '%%' here."); @@ -478,9 +478,9 @@ static pat_t *_bp_simplepattern(const char *str, const char *end) case '@': { const char *name = NULL; size_t namelen = 0; - const char *a = after_name(str); + const char *a = after_name(str, end); const char *eq = a; - if (a > str && !matchstr(&eq, "=>", false) && matchchar(&eq, '=', false)) { + if (a > str && !matchstr(&eq, "=>", false, end) && matchchar(&eq, '=', false, end)) { name = str; namelen = (size_t)(a-str); str = eq; @@ -512,7 +512,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end) if (def) return def; // Reference if (!isalpha(c) && c != '_') return NULL; - str = after_name(start); + str = after_name(start, end); size_t namelen = (size_t)(str - start); pat_t *ref = new_pat(BP_REF, start, str, 0, -1); ref->args.ref.name = start; @@ -553,7 +553,7 @@ maybe_pat_t bp_stringpattern(const char *str, const char *end) ret = chain_together(ret, interp); str = interp->end; // allow terminating seq - (void)matchchar(&str, ';', false); + (void)matchchar(&str, ';', false, end); } } if (!ret) ret = new_pat(BP_STRING, str, str, 0, 0); @@ -573,9 +573,9 @@ static pat_t *bp_simplepattern(const char *str, const char *end) // Expand postfix operators (if any) while (str < end) { enum pattype_e type; - if (matchchar(&str, '~', false)) + if (matchchar(&str, '~', false, end)) type = BP_MATCH; - else if (matchstr(&str, "!~", false)) + else if (matchstr(&str, "!~", false, end)) type = BP_NOT_MATCH; else break; @@ -621,10 +621,10 @@ maybe_pat_t bp_replacement(pat_t *replacepat, const char *replacement, const cha static pat_t *bp_pattern_nl(const char *str, const char *end, bool allow_nl) { - str = after_spaces(str, allow_nl); + str = after_spaces(str, allow_nl, end); pat_t *pat = bp_simplepattern(str, end); if (pat != NULL) pat = expand_choices(pat, end, allow_nl); - if (matchstr(&str, "=>", allow_nl)) + if (matchstr(&str, "=>", allow_nl, end)) pat = expand_replacements(pat ? pat : new_pat(BP_STRING, str-2, str-2, 0, 0), end, allow_nl); return pat; } @@ -644,11 +644,11 @@ pat_t *bp_raw_literal(const char *str, size_t len) // maybe_pat_t bp_pattern(const char *str, const char *end) { - str = after_spaces(str, true); + str = after_spaces(str, true, end); __TRY_PATTERN__ pat_t *ret = bp_pattern_nl(str, end, false); __END_TRY_PATTERN__ - if (ret && after_spaces(ret->end, true) < end) + if (ret && after_spaces(ret->end, true, end) < end) return (maybe_pat_t){.success = false, .value.error.start = ret->end, .value.error.end = end, .value.error.msg = "Failed to parse this part of the pattern"}; else if (ret) return (maybe_pat_t){.success = true, .value.pat = ret}; @@ -15,10 +15,11 @@ // Helper function to skip past all spaces (and comments) // Returns a pointer to the first non-space character. // -const char *after_spaces(const char *str, bool skip_nl) +const char *after_spaces(const char *str, bool skip_nl, const char *end) { // Skip whitespace and comments: skip_whitespace: + if (str >= end) return str; switch (*str) { case '\r': case '\n': if (!skip_nl) break; @@ -28,7 +29,7 @@ const char *after_spaces(const char *str, bool skip_nl) goto skip_whitespace; } case '#': { - while (*str && *str != '\n') ++str; + while (str < end && *str != '\n') ++str; goto skip_whitespace; } default: break; @@ -40,14 +41,15 @@ const char *after_spaces(const char *str, bool skip_nl) // Return the first character after a valid BP name, or NULL if none is // found. // -const char *after_name(const char *str) +const char *after_name(const char *str, const char *end) { + if (str >= end) return NULL; if (*str == '|') return &str[1]; if (*str == '^' || *str == '_' || *str == '$') { - return (str[1] == *str) ? &str[2] : &str[1]; + return (&str[1] < end && str[1] == *str) ? &str[2] : &str[1]; } if (!isalpha(*str)) return NULL; - for (++str; *str; ++str) { + for (++str; str < end; ++str) { if (!(isalnum(*str) || *str == '-')) break; } @@ -57,9 +59,10 @@ const char *after_name(const char *str) // // Check if a character is found and if so, move past it. // -bool matchchar(const char **str, char c, bool skip_nl) +bool matchchar(const char **str, char c, bool skip_nl, const char *end) { - const char *next = after_spaces(*str, skip_nl); + const char *next = after_spaces(*str, skip_nl, end); + if (next >= end) return false; if (*next == c) { *str = next + 1; return true; @@ -70,9 +73,10 @@ bool matchchar(const char **str, char c, bool skip_nl) // // Check if a string is found and if so, move past it. // -bool matchstr(const char **str, const char *target, bool skip_nl) +bool matchstr(const char **str, const char *target, bool skip_nl, const char *end) { - const char *next = after_spaces(*str, skip_nl); + const char *next = after_spaces(*str, skip_nl, end); + if (next + strlen(target) > end) return false; if (strncmp(next, target, strlen(target)) == 0) { *str = &next[strlen(target)]; return true; @@ -85,10 +89,13 @@ bool matchstr(const char **str, const char *target, bool skip_nl) // character that was escaped. // Set *end = the first character past the end of the escape sequence. // -char unescapechar(const char *escaped, const char **end) +char unescapechar(const char *escaped, const char **after, const char *end) { - size_t len = 1; - unsigned char ret = (unsigned char)*escaped; + size_t len = 0; + unsigned char ret = '\\'; + if (escaped >= end) goto finished; + ret = (unsigned char)*escaped; + ++len; switch (*escaped) { case 'a': ret = '\a'; break; case 'b': ret = '\b'; break; case 'n': ret = '\n'; break; case 'r': ret = '\r'; break; @@ -101,7 +108,10 @@ char unescapechar(const char *escaped, const char **end) ['a']=0xa, ['b']=0xb, ['c']=0xc, ['d']=0xd, ['e']=0xe, ['f']=0xf, ['A']=0xa, ['B']=0xb, ['C']=0xc, ['D']=0xd, ['E']=0xe, ['F']=0xf, }; - if (hextable[(int)escaped[1]] && hextable[(int)escaped[2]]) { + if (escaped + 2 >= end) { + len = 0; + goto finished; + } else if (hextable[(int)escaped[1]] && hextable[(int)escaped[2]]) { ret = (hextable[(int)escaped[1]] << 4) | (hextable[(int)escaped[2]] & 0xF); len = 3; } @@ -109,7 +119,10 @@ char unescapechar(const char *escaped, const char **end) } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { // Octal ret = (unsigned char)(escaped[0] - '0'); - if ('0' <= escaped[1] && escaped[1] <= '7') { + if (escaped + 2 >= end) { + len = 0; + goto finished; + } else if ('0' <= escaped[1] && escaped[1] <= '7') { ++len; ret = (ret << 3) | (escaped[1] - '0'); if ('0' <= escaped[2] && escaped[2] <= '7') { @@ -120,10 +133,11 @@ char unescapechar(const char *escaped, const char **end) break; } default: - if (end) *end = escaped; - return '\\'; + len = 0; + goto finished; } - if (end) *end = &escaped[len]; + finished: + if (after) *after = &escaped[len]; return (char)ret; } @@ -12,8 +12,6 @@ #include <string.h> #include <unistd.h> -#include "match.h" - #define S1(x) #x #define S2(x) S1(x) #define __LOCATION__ __FILE__ ":" S2(__LINE__) @@ -48,15 +46,15 @@ DEFINE_CHECK_TYPE(_Bool, bool, b, b); #define streq(a, b) (strcmp(a, b) == 0) __attribute__((nonnull(1))) -char unescapechar(const char *escaped, const char **end); +char unescapechar(const char *escaped, const char **after, const char *end); __attribute__((pure, nonnull)) -const char *after_name(const char *str); +const char *after_name(const char *str, const char *end); __attribute__((pure, nonnull, returns_nonnull)) -const char *after_spaces(const char *str, bool skip_nl); +const char *after_spaces(const char *str, bool skip_nl, const char *end); __attribute__((nonnull)) -bool matchchar(const char **str, char c, bool skip_nl); +bool matchchar(const char **str, char c, bool skip_nl, const char *end); __attribute__((nonnull)) -bool matchstr(const char **str, const char *target, bool skip_nl); +bool matchstr(const char **str, const char *target, bool skip_nl, const char *end); __attribute__((nonnull)) void delete(void *p); |
