From 1727d9b75c93051c3f46d3154256c8f3a78911a6 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Thu, 23 Sep 2021 17:46:46 -0700 Subject: Use strncasecmp and memmem instead of custom logic. --- match.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'match.c') diff --git a/match.c b/match.c index c4faeb7..f8205d4 100644 --- a/match.c +++ b/match.c @@ -2,6 +2,7 @@ // match.c - Code for the BP virtual machine that performs the matching. // +#include #include #include #include @@ -257,22 +258,23 @@ static match_t *_next_match(def_t *defs, cache_t *cache, file_t *f, const char * pat_t *first = first_pat(defs, pat); // Performance optimization: if the pattern starts with a string literal, - // we can just rely on the highly optimized strstr()/strcasestr() - // implementations to skip past areas where we know we won't find a match. - if (!skip && first->type == BP_STRING) { - for (size_t i = 0; i < first->min_matchlen; i++) - if (first->args.string[i] == '\0') - goto pattern_search; - char *tmp = strndup(first->args.string, first->min_matchlen); - char *found = (ignorecase ? strcasestr : strstr)(str, tmp); - if (found) - str = found; - else - str += strlen(str); // Use += strlen here instead of f->end to handle files with NULL bytes - free(tmp); - } - - pattern_search: + // we can just rely on the highly optimized memmem() implementation to skip + // past areas where we know we won't find a match. + if (!skip && first->type == BP_STRING && first->min_matchlen > 0) { + if (ignorecase) { + char c1 = first->args.string[0]; + char *upper = memchr(str, toupper(c1), (size_t)(str - f->end)); + char *lower = isalpha(c1) ? memchr(str, tolower(c1), (size_t)(str - f->end)) : NULL; + if (upper && lower) + str = upper < lower ? upper : lower; + else if (upper) str = upper; + else if (lower) str = lower; + } else { + char *found = memmem(str, (size_t)(str - f->end), first->args.string, first->min_matchlen); + str = found ? found : f->end; + } + } + if (str > f->end) return NULL; do { @@ -341,7 +343,7 @@ static match_t *match(def_t *defs, cache_t *cache, file_t *f, const char *str, p } case BP_STRING: { if (&str[pat->min_matchlen] > f->end) return NULL; - if (pat->min_matchlen > 0 && (ignorecase ? memicmp : memcmp)(str, pat->args.string, pat->min_matchlen) != 0) + if (pat->min_matchlen > 0 && (ignorecase ? strncasecmp : strncmp)(str, pat->args.string, pat->min_matchlen) != 0) return NULL; return new_match(defs, pat, str, str + pat->min_matchlen, NULL); } -- cgit v1.2.3 From 6ab22ad6a90b96f159a0d78499a49fd62989cb15 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Thu, 23 Sep 2021 17:50:16 -0700 Subject: Bugfix --- match.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'match.c') diff --git a/match.c b/match.c index f8205d4..ac8543e 100644 --- a/match.c +++ b/match.c @@ -270,7 +270,7 @@ static match_t *_next_match(def_t *defs, cache_t *cache, file_t *f, const char * else if (upper) str = upper; else if (lower) str = lower; } else { - char *found = memmem(str, (size_t)(str - f->end), first->args.string, first->min_matchlen); + char *found = memmem(str, (size_t)(f->end - str), first->args.string, first->min_matchlen); str = found ? found : f->end; } } -- cgit v1.2.3