aboutsummaryrefslogtreecommitdiff
path: root/match.c
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2021-05-20 15:27:24 -0700
committerBruce Hill <bruce@bruce-hill.com>2021-05-20 15:27:24 -0700
commit655ed121289c0befa2c87f7a6a2db3409f54094c (patch)
treea575edd664c5feb418389fb2fb22c6d627b04927 /match.c
parenta0028e96055669ab83dd6ff9e14b0e728a121d7d (diff)
Mostly working version
Diffstat (limited to 'match.c')
-rw-r--r--match.c56
1 files changed, 18 insertions, 38 deletions
diff --git a/match.c b/match.c
index 9f31975..6791025 100644
--- a/match.c
+++ b/match.c
@@ -13,6 +13,7 @@
#include "match.h"
#include "types.h"
#include "utils.h"
+#include "utf8.h"
#ifdef DEBUG_HEAP
// Doubly-linked list operations:
@@ -38,8 +39,6 @@ static match_t *in_use_matches = NULL;
static inline pat_t *deref(def_t *defs, pat_t *pat);
__attribute__((returns_nonnull))
static match_t *new_match(pat_t *pat, const char *start, const char *end, match_t *child);
-__attribute__((nonnull, pure))
-static inline const char *next_char(file_t *f, const char *str);
__attribute__((nonnull))
static const char *match_backref(const char *str, match_t *cap, bool ignorecase);
__attribute__((nonnull))
@@ -61,25 +60,6 @@ static inline pat_t *deref(def_t *defs, pat_t *pat)
}
return pat;
}
-//
-// Return the location of the next character or UTF8 codepoint.
-// (i.e. skip forward one codepoint at a time, not one byte at a time)
-//
-static inline const char *next_char(file_t *f, const char *str)
-{
- char c = *str;
- ++str;
- if (__builtin_expect(!(c & 0x80), 1))
- return str;
-
- if (__builtin_expect(str < f->end && !!(*str & 0x80), 1))
- ++str;
- if (c > '\xDF' && __builtin_expect(str < f->end && !!(*str & 0x80), 1))
- ++str;
- if (c > '\xEF' && __builtin_expect(str < f->end && !!(*str & 0x80), 1))
- ++str;
- return str;
-}
//
// Attempt to match text against a previously captured value.
@@ -200,10 +180,10 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
return (str == f->end || *str == '\n') ? new_match(pat, str, str, NULL) : NULL;
}
case BP_STRING: {
- if (&str[pat->len] > f->end) return NULL;
- if (pat->len > 0 && (ignorecase ? memicmp : memcmp)(str, pat->args.string, (size_t)pat->len) != 0)
+ if (&str[pat->min_matchlen] > f->end) return NULL;
+ if (pat->min_matchlen > 0 && (ignorecase ? memicmp : memcmp)(str, pat->args.string, (size_t)pat->min_matchlen) != 0)
return NULL;
- return new_match(pat, str, str + pat->len, NULL);
+ return new_match(pat, str, str + pat->min_matchlen, NULL);
}
case BP_RANGE: {
if (str >= f->end) return NULL;
@@ -315,28 +295,27 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
return m;
}
case BP_AFTER: {
- ssize_t backtrack = pat->args.pat->len;
- if (backtrack == -1)
- errx(EXIT_FAILURE, "'<' is only allowed for fixed-length operations");
- if (str - backtrack < f->contents) return NULL;
- match_t *before = match(defs, f, str - backtrack, pat->args.pat, ignorecase);
- if (before == NULL) return NULL;
- return new_match(pat, str, str, before);
+ pat_t *back = deref(defs, pat->args.pat);
+ for (const char *pos = &str[-back->min_matchlen];
+ pos >= f->contents && (back->max_matchlen == -1 || pos >= &str[-back->max_matchlen]);
+ pos = prev_char(f, pos)) {
+ match_t *m = match(defs, f, pos, back, ignorecase);
+ if (m) return new_match(pat, str, str, m);
+ if (pos == f->contents) break;
+ }
+ return NULL;
}
case BP_BEFORE: {
match_t *after = match(defs, f, str, pat->args.pat, ignorecase);
- if (after == NULL) return NULL;
- return new_match(pat, str, str, after);
+ return after ? new_match(pat, str, str, after) : NULL;
}
case BP_CAPTURE: {
match_t *p = match(defs, f, str, pat->args.pat, ignorecase);
- if (p == NULL) return NULL;
- return new_match(pat, str, p->end, p);
+ return p ? new_match(pat, str, p->end, p) : NULL;
}
case BP_OTHERWISE: {
match_t *m = match(defs, f, str, pat->args.multiple.first, ignorecase);
- if (m == NULL) m = match(defs, f, str, pat->args.multiple.second, ignorecase);
- return m;
+ return m ? match(defs, f, str, pat->args.multiple.second, ignorecase) : NULL;
}
case BP_CHAIN: {
match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase);
@@ -400,7 +379,8 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
.type = BP_LEFTRECURSION,
.start = ref->start,
.end = ref->end,
- .len = 0,
+ .min_matchlen = 0,
+ .max_matchlen = -1,
.args.leftrec = {
.match = NULL,
.visits = 0,