From 655ed121289c0befa2c87f7a6a2db3409f54094c Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Thu, 20 May 2021 15:27:24 -0700 Subject: Mostly working version --- Makefile | 2 +- definitions.c | 2 +- grammars/builtins.bp | 3 +- match.c | 56 +++++++--------------- pattern.c | 130 ++++++++++++++++++++++++++------------------------- pattern.h | 2 +- types.h | 7 ++- utf8.h | 42 +++++++++++++++++ 8 files changed, 136 insertions(+), 108 deletions(-) create mode 100644 utf8.h diff --git a/Makefile b/Makefile index bb740e8..fb13b81 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ OBJFILES=$(CFILES:.c=.o) all: $(NAME) bp.1 -%.o: %.c %.h types.h +%.o: %.c %.h types.h utf8.h $(CC) -c $(ALL_FLAGS) -o $@ $< $(NAME): $(OBJFILES) bp.c diff --git a/definitions.c b/definitions.c index 76293f2..55be12b 100644 --- a/definitions.c +++ b/definitions.c @@ -71,7 +71,7 @@ def_t *lookup(def_t *defs, size_t namelen, const char *name) def_t *with_backref(def_t *defs, file_t *f, size_t namelen, const char *name, match_t *m) { // TODO: maybe calculate length? (nontrivial because of replacements) - pat_t *backref = new_pat(f, m->start, m->end, -1, BP_BACKREF); + pat_t *backref = new_pat(f, m->start, m->end, 0, -1, BP_BACKREF); backref->args.backref = m; return with_def(defs, namelen, name, backref); } diff --git a/grammars/builtins.bp b/grammars/builtins.bp index 42a41ac..cf642e2 100644 --- a/grammars/builtins.bp +++ b/grammars/builtins.bp @@ -24,8 +24,7 @@ id: left-id-edge !`0-9 !(keyword left-id-edge) +id-char id-char: `a-z,A-Z,_,0-9 var: id keyword: !"" # No keywords defined by default -left-word-edge: ^ / <(\x00-x7f!~(^^word-char)) / <((\xc0-xdf \x80-xbf)!~(^^word-char)) - / <((\xe0-xef 2\x80-xbf)!~(^^word-char)) / <((\xf0-xf7 3\x80-xbf)!~(^^word-char)) +left-word-edge: !end && !!(*str & 0x80), 1)) - ++str; - if (c > '\xDF' && __builtin_expect(str < f->end && !!(*str & 0x80), 1)) - ++str; - if (c > '\xEF' && __builtin_expect(str < f->end && !!(*str & 0x80), 1)) - ++str; - return str; -} // // Attempt to match text against a previously captured value. @@ -200,10 +180,10 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool return (str == f->end || *str == '\n') ? new_match(pat, str, str, NULL) : NULL; } case BP_STRING: { - if (&str[pat->len] > f->end) return NULL; - if (pat->len > 0 && (ignorecase ? memicmp : memcmp)(str, pat->args.string, (size_t)pat->len) != 0) + if (&str[pat->min_matchlen] > f->end) return NULL; + if (pat->min_matchlen > 0 && (ignorecase ? memicmp : memcmp)(str, pat->args.string, (size_t)pat->min_matchlen) != 0) return NULL; - return new_match(pat, str, str + pat->len, NULL); + return new_match(pat, str, str + pat->min_matchlen, NULL); } case BP_RANGE: { if (str >= f->end) return NULL; @@ -315,28 +295,27 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool return m; } case BP_AFTER: { - ssize_t backtrack = pat->args.pat->len; - if (backtrack == -1) - errx(EXIT_FAILURE, "'<' is only allowed for fixed-length operations"); - if (str - backtrack < f->contents) return NULL; - match_t *before = match(defs, f, str - backtrack, pat->args.pat, ignorecase); - if (before == NULL) return NULL; - return new_match(pat, str, str, before); + pat_t *back = deref(defs, pat->args.pat); + for (const char *pos = &str[-back->min_matchlen]; + pos >= f->contents && (back->max_matchlen == -1 || pos >= &str[-back->max_matchlen]); + pos = prev_char(f, pos)) { + match_t *m = match(defs, f, pos, back, ignorecase); + if (m) return new_match(pat, str, str, m); + if (pos == f->contents) break; + } + return NULL; } case BP_BEFORE: { match_t *after = match(defs, f, str, pat->args.pat, ignorecase); - if (after == NULL) return NULL; - return new_match(pat, str, str, after); + return after ? new_match(pat, str, str, after) : NULL; } case BP_CAPTURE: { match_t *p = match(defs, f, str, pat->args.pat, ignorecase); - if (p == NULL) return NULL; - return new_match(pat, str, p->end, p); + return p ? new_match(pat, str, p->end, p) : NULL; } case BP_OTHERWISE: { match_t *m = match(defs, f, str, pat->args.multiple.first, ignorecase); - if (m == NULL) m = match(defs, f, str, pat->args.multiple.second, ignorecase); - return m; + return m ? match(defs, f, str, pat->args.multiple.second, ignorecase) : NULL; } case BP_CHAIN: { match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase); @@ -400,7 +379,8 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool .type = BP_LEFTRECURSION, .start = ref->start, .end = ref->end, - .len = 0, + .min_matchlen = 0, + .max_matchlen = -1, .args.leftrec = { .match = NULL, .visits = 0, diff --git a/pattern.c b/pattern.c index 7108ab1..be0bb48 100644 --- a/pattern.c +++ b/pattern.c @@ -13,6 +13,7 @@ #include "files.h" #include "pattern.h" #include "utils.h" +#include "utf8.h" __attribute__((nonnull(1,2))) static pat_t *expand_replacements(file_t *f, const char *str, pat_t *replace_pat); @@ -31,7 +32,7 @@ static pat_t *bp_simplepattern(file_t *f, const char *str); // Allocate a new pattern for this file (ensuring it will be automatically // freed when the file is freed) // -pat_t *new_pat(file_t *f, const char *start, const char *end, ssize_t len, enum pattype_e type) +pat_t *new_pat(file_t *f, const char *start, const char *end, size_t minlen, ssize_t maxlen, enum pattype_e type) { allocated_pat_t *tracker = new(allocated_pat_t); tracker->next = f->pats; @@ -39,7 +40,8 @@ pat_t *new_pat(file_t *f, const char *start, const char *end, ssize_t len, enum tracker->pat.type = type; tracker->pat.start = start; tracker->pat.end = end; - tracker->pat.len = len; + tracker->pat.min_matchlen = minlen; + tracker->pat.max_matchlen = maxlen; return &tracker->pat; } @@ -48,13 +50,10 @@ pat_t *new_pat(file_t *f, const char *start, const char *end, ssize_t len, enum // static pat_t *new_range(file_t *f, const char *start, const char *end, size_t min, ssize_t max, pat_t *repeating, pat_t *sep) { - pat_t *range = new_pat(f, start, end, -1, BP_REPEAT); - if ((ssize_t)min == max && repeating->len >= 0) { - if (sep == NULL || max == 0) - range->len = repeating->len * max; - else if (sep->len >= 0) - range->len = repeating->len * max + sep->len * (max - 1); - } + size_t minlen = min*repeating->min_matchlen + (min > 0 ? min-1 : 0)*(sep ? sep->min_matchlen : 0); + ssize_t maxlen = (max == -1 || UNBOUNDED(repeating) || (max != 0 && max != 1 && sep && UNBOUNDED(sep))) ? -1 + : max*repeating->max_matchlen + (ssize_t)(max > 0 ? min-1 : 0)*(ssize_t)(sep ? sep->min_matchlen : 0); + pat_t *range = new_pat(f, start, end, minlen, maxlen, BP_REPEAT); range->args.repetitions.min = min; range->args.repetitions.max = max; range->args.repetitions.repeat_pat = repeating; @@ -93,13 +92,14 @@ static pat_t *expand_replacements(file_t *f, const char *str, pat_t *replace_pat if (!str[1] || str[1] == '\n') file_err(f, str, str+1, "There should be an escape sequence after this backslash."); - ++str; + str = next_char(f, str); } } (void)matchchar(&str, quote); - if (replace_pat == NULL) replace_pat = new_pat(f, start, start, 0, BP_STRING); - pat_t *pat = new_pat(f, replace_pat->start, str, replace_pat->len, BP_REPLACE); + if (replace_pat == NULL) replace_pat = new_pat(f, start, start, 0, 0, BP_STRING); + pat_t *pat = new_pat(f, replace_pat->start, str, replace_pat->min_matchlen, + replace_pat->max_matchlen, BP_REPLACE); pat->args.replace.pat = replace_pat; pat->args.replace.text = repstr; pat->args.replace.len = (size_t)(str-repstr-1); @@ -134,8 +134,9 @@ pat_t *chain_together(file_t *f, pat_t *first, pat_t *second) { if (first == NULL) return second; if (second == NULL) return first; - ssize_t len = (first->len >= 0 && second->len >= 0) ? first->len + second->len : -1; - pat_t *chain = new_pat(f, first->start, second->end, len, BP_CHAIN); + size_t minlen = first->min_matchlen + second->min_matchlen; + ssize_t maxlen = (UNBOUNDED(first) || UNBOUNDED(second)) ? -1 : first->max_matchlen + second->max_matchlen; + pat_t *chain = new_pat(f, first->start, second->end, minlen, maxlen, BP_CHAIN); chain->args.multiple.first = first; chain->args.multiple.second = second; @@ -144,6 +145,7 @@ pat_t *chain_together(file_t *f, pat_t *first, pat_t *second) for (pat_t *p = first; p; ) { if (p->type == BP_UPTO) { p->args.multiple.first = second; + p->min_matchlen = second->min_matchlen; break; } else if (p->type == BP_CAPTURE) { p = p->args.capture.capture_pat; @@ -164,8 +166,10 @@ pat_t *either_pat(file_t *f, pat_t *first, pat_t *second) { if (first == NULL) return second; if (second == NULL) return first; - ssize_t len = first->len == second->len ? first->len : -1; - pat_t *either = new_pat(f, first->start, second->end, len, BP_OTHERWISE); + size_t minlen = first->min_matchlen < second->min_matchlen ? first->min_matchlen : second->min_matchlen; + ssize_t maxlen = (UNBOUNDED(first) || UNBOUNDED(second)) ? -1 : + (first->max_matchlen > second->max_matchlen ? first->max_matchlen : second->max_matchlen); + pat_t *either = new_pat(f, first->start, second->end, minlen, maxlen, BP_OTHERWISE); either->args.multiple.first = first; either->args.multiple.second = second; return either; @@ -197,7 +201,7 @@ static pat_t *bp_simplepattern(file_t *f, const char *str) if (!second) file_err(f, str, str, "The '%s' operator expects a pattern before and after.", type == BP_MATCH ? "~" : "!~"); - pat = new_pat(f, str, second->end, first->len, type); + pat = new_pat(f, str, second->end, first->min_matchlen, first->max_matchlen, type); pat->args.multiple.first = first; pat->args.multiple.second = second; str = pat->end; @@ -216,54 +220,56 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) if (!*str) return NULL; const char *start = str; char c = *str; - ++str; + str = next_char(f, str); switch (c) { // Any char (dot) case '.': { if (*str == '.') { // ".." pat_t *skip = NULL; - ++str; + str = next_char(f, str); if (matchchar(&str, '%')) { skip = bp_simplepattern(f, str); if (!skip) file_err(f, str, str, "There should be a pattern to skip here after the '%%'"); str = skip->end; } - pat_t *upto = new_pat(f, start, str, -1, BP_UPTO); + pat_t *upto = new_pat(f, start, str, 0, -1, BP_UPTO); upto->args.multiple.second = skip; return upto; } else { - return new_pat(f, start, str, 1, BP_ANYCHAR); + return new_pat(f, start, str, 1, UTF8_MAXCHARLEN, BP_ANYCHAR); } } // Char literals case '`': { pat_t *all = NULL; do { - const char *charloc = str; - c = *str; - if (!c || c == '\n') + if (str >= f->end || !*str || *str == '\n') file_err(f, str, str, "There should be a character here after the '`'"); - const char *opstart = str-1; - ++str; + const char *c1_loc = str; + str = next_char(f, c1_loc); if (matchchar(&str, '-')) { // Range + if (&str[-1] - c1_loc > 1 || next_char(f, str) > str+1) + file_err(f, start, next_char(f, str), "Sorry, UTF-8 character ranges are not yet supported."); + char c1 = *c1_loc; char c2 = *str; if (!c2 || c2 == '\n') file_err(f, str, str, "There should be a character here to complete the character range."); - if (c > c2) { // Swap order - char tmp = c; - c = c2; + if (c1 > c2) { // Swap order + char tmp = c1; + c1 = c2; c2 = tmp; } - ++str; - pat_t *pat = new_pat(f, opstart, str, 1, BP_RANGE); - pat->args.range.low = (unsigned char)c; + str = next_char(f, str); + pat_t *pat = new_pat(f, start, str, 1, 1, BP_RANGE); + pat->args.range.low = (unsigned char)c1; pat->args.range.high = (unsigned char)c2; all = either_pat(f, all, pat); } else { - pat_t *pat = new_pat(f, opstart, str, 1, BP_STRING); - pat->args.string = charloc; + size_t len = (size_t)(str - start - 1); + pat_t *pat = new_pat(f, start, str, len, (ssize_t)len, BP_STRING); + pat->args.string = c1_loc; all = either_pat(f, all, pat); } } while (matchchar(&str, ',')); @@ -276,24 +282,26 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) file_err(f, str, str, "There should be an escape sequence here after this backslash."); if (matchchar(&str, 'N')) // \N (nodent) - return new_pat(f, start, str, -1, BP_NODENT); + return new_pat(f, start, str, 1, -1, BP_NODENT); const char *opstart = str; unsigned char e = (unsigned char)unescapechar(str, &str); if (*str == '-') { // Escape range (e.g. \x00-\xFF) - ++str; + if (next_char(f, str) != str+1) + file_err(f, start, next_char(f, str), "Sorry, UTF8 escape sequences are not supported."); + str = next_char(f, str); const char *seqstart = str; unsigned char e2 = (unsigned char)unescapechar(str, &str); if (str == seqstart) file_err(f, seqstart, str+1, "This value isn't a valid escape sequence"); if (e2 < e) file_err(f, start, str, "Escape ranges should be low-to-high, but this is high-to-low."); - pat_t *esc = new_pat(f, opstart, str, 1, BP_RANGE); + pat_t *esc = new_pat(f, opstart, str, 1, 1, BP_RANGE); esc->args.range.low = e; esc->args.range.high = e2; return esc; } else { - pat_t *esc = new_pat(f, opstart, str, 1, BP_STRING); + pat_t *esc = new_pat(f, opstart, str, 1, 1, BP_STRING); char *s = xcalloc(sizeof(char), 2); s[0] = (char)e; esc->args.string = s; @@ -305,19 +313,19 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) char endquote = c == '{' ? '}' : (c == '\002' ? '\003' : c); char *litstart = (char*)str; while (str < f->end && *str != endquote) - ++str; - ssize_t len = (ssize_t)(str - litstart); - ++str; + str = next_char(f, str); + size_t len = (size_t)(str - litstart); + str = next_char(f, str); - pat_t *pat = new_pat(f, start, str, len, BP_STRING); + pat_t *pat = new_pat(f, start, str, len, (ssize_t)len, BP_STRING); pat->args.string = litstart; if (c == '{') { // Surround with `|` word boundaries - pat_t *left = new_pat(f, start, start+1, -1, BP_REF); + pat_t *left = new_pat(f, start, start+1, 0, -1, BP_REF); left->args.ref.name = "left-word-edge"; left->args.ref.len = strlen(left->args.ref.name); - pat_t *right = new_pat(f, str-1, str, -1, BP_REF); + pat_t *right = new_pat(f, str-1, str, 0, -1, BP_REF); right->args.ref.name = "right-word-edge"; right->args.ref.len = strlen(right->args.ref.name); @@ -329,7 +337,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) case '!': { pat_t *p = bp_simplepattern(f, str); if (!p) file_err(f, str, str, "There should be a pattern after this '!'"); - pat_t *not = new_pat(f, start, p->end, 0, BP_NOT); + pat_t *not = new_pat(f, start, p->end, 0, 0, BP_NOT); not->args.pat = p; return not; } @@ -372,12 +380,8 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) if (!behind) file_err(f, str, str, "There should be a pattern after this '<'"); str = behind->end; - if (behind->len == -1) - file_err(f, start, behind->end, - "Sorry, variable-length lookbehind patterns like this are not supported.\n" - "Please use a fixed-length lookbehind pattern instead."); str = behind->end; - pat_t *pat = new_pat(f, start, str, 0, BP_AFTER); + pat_t *pat = new_pat(f, start, str, 0, 0, BP_AFTER); pat->args.pat = behind; return pat; } @@ -387,7 +391,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) if (!ahead) file_err(f, str, str, "There should be a pattern after this '>'"); str = ahead->end; - pat_t *pat = new_pat(f, start, str, 0, BP_BEFORE); + pat_t *pat = new_pat(f, start, str, 0, 0, BP_BEFORE); pat->args.pat = ahead; return pat; } @@ -395,10 +399,10 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) case '(': { if (matchstr(&str, "!)")) { pat_t *pat = bp_simplepattern(f, str); - if (!pat) pat = new_pat(f, str, str, 0, BP_STRING); + if (!pat) pat = new_pat(f, str, str, 0, 0, BP_STRING); pat = expand_replacements(f, pat->end, pat); - pat_t *error = new_pat(f, start, pat->end, pat->len, BP_ERROR); + pat_t *error = new_pat(f, start, pat->end, pat->min_matchlen, pat->max_matchlen, BP_ERROR); error->args.pat = pat; return error; } @@ -453,7 +457,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) if (!pat) file_err(f, str, str, "There should be a valid pattern here to capture after the '@'"); - pat_t *capture = new_pat(f, start, pat->end, pat->len, BP_CAPTURE); + pat_t *capture = new_pat(f, start, pat->end, pat->min_matchlen, pat->max_matchlen, BP_CAPTURE); capture->args.capture.capture_pat = pat; capture->args.capture.name = name; capture->args.capture.namelen = namelen; @@ -466,14 +470,14 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) // Start of file/line: case '^': { if (matchchar(&str, '^')) - return new_pat(f, start, str, 0, BP_START_OF_FILE); - return new_pat(f, start, str, 0, BP_START_OF_LINE); + return new_pat(f, start, str, 0, 0, BP_START_OF_FILE); + return new_pat(f, start, str, 0, 0, BP_START_OF_LINE); } // End of file/line: case '$': { if (matchchar(&str, '$')) - return new_pat(f, start, str, 0, BP_END_OF_FILE); - return new_pat(f, start, str, 0, BP_END_OF_LINE); + return new_pat(f, start, str, 0, 0, BP_END_OF_FILE); + return new_pat(f, start, str, 0, 0, BP_END_OF_LINE); } // Whitespace: case '_': { @@ -481,7 +485,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) if (matchchar(&str, '_')) // double __ (whitespace with newlines) ++namelen; if (matchchar(&str, ':')) return NULL; // Don't match definitions - pat_t *ref = new_pat(f, start, str, -1, BP_REF); + pat_t *ref = new_pat(f, start, str, 0, -1, BP_REF); ref->args.ref.name = start; ref->args.ref.len = namelen; return ref; @@ -494,7 +498,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) str = after_name(str); if (matchchar(&str, ':')) // Don't match definitions return NULL; - pat_t *ref = new_pat(f, start, str, -1, BP_REF); + pat_t *ref = new_pat(f, start, str, 0, -1, BP_REF); ref->args.ref.name = refname; ref->args.ref.len = (size_t)(str - refname); return ref; @@ -519,9 +523,9 @@ pat_t *bp_stringpattern(file_t *f, const char *str) } } // End of string - ssize_t len = (ssize_t)(str - start); + size_t len = (size_t)(str - start); if (len > 0) { - pat_t *str_chunk = new_pat(f, start, str, len, BP_STRING); + pat_t *str_chunk = new_pat(f, start, str, len, (ssize_t)len, BP_STRING); str_chunk->args.string = start; ret = chain_together(f, ret, str_chunk); } @@ -541,7 +545,7 @@ pat_t *bp_stringpattern(file_t *f, const char *str) // pat_t *bp_replacement(file_t *f, pat_t *replacepat, const char *replacement) { - pat_t *pat = new_pat(f, replacepat->start, replacepat->end, replacepat->len, BP_REPLACE); + pat_t *pat = new_pat(f, replacepat->start, replacepat->end, replacepat->min_matchlen, replacepat->max_matchlen, BP_REPLACE); pat->args.replace.pat = replacepat; const char *p = replacement; for (; *p; p++) { diff --git a/pattern.h b/pattern.h index b38486d..e9bebf7 100644 --- a/pattern.h +++ b/pattern.h @@ -8,7 +8,7 @@ #include "types.h" __attribute__((returns_nonnull, nonnull(1,2))) -pat_t *new_pat(file_t *f, const char *start, const char *end, ssize_t len, enum pattype_e type); +pat_t *new_pat(file_t *f, const char *start, const char *end, size_t minlen, ssize_t maxlen, enum pattype_e type); __attribute__((nonnull(1,2))) pat_t *bp_stringpattern(file_t *f, const char *str); __attribute__((nonnull(1,2))) diff --git a/types.h b/types.h index dda89de..96e0727 100644 --- a/types.h +++ b/types.h @@ -9,6 +9,8 @@ #include "files.h" +#define UNBOUNDED(pat) ((pat)->max_matchlen == -1) + // BP virtual machine pattern types enum pattype_e { BP_ANYCHAR = 1, @@ -44,8 +46,9 @@ struct match_s; // forward declared to resolve circular struct defs typedef struct pat_s { enum pattype_e type; const char *start, *end; - // Length of the match, if constant, otherwise -1 - ssize_t len; + // The bounds of the match length (used for backtracking) + size_t min_matchlen; + ssize_t max_matchlen; // -1 means unbounded length union { const char *string; struct { diff --git a/utf8.h b/utf8.h new file mode 100644 index 0000000..7734dfb --- /dev/null +++ b/utf8.h @@ -0,0 +1,42 @@ +// UTF8 helper functions +#ifndef UTF8__H +#define UTF8__H + +#define UTF8_MAXCHARLEN 4 +// +// Return the location of the next character or UTF8 codepoint. +// (i.e. skip forward one codepoint at a time, not one byte at a time) +// +__attribute__((nonnull, pure)) +static inline const char *next_char(file_t *f, const char *str) +{ + if (__builtin_expect(str+1 <= f->end && (str[0] & 0x80) == 0x0, 1)) + return str+1; + if (__builtin_expect(str+2 <= f->end && (str[0] & 0xe0) == 0xc0, 1)) + return str+2; + if (__builtin_expect(str+3 <= f->end && (str[0] & 0xf0) == 0xe0, 1)) + return str+3; + if (__builtin_expect(str+4 <= f->end && (str[0] & 0xf8) == 0xf0, 1)) + return str+4; + return __builtin_expect(str+1 <= f->end, 1) ? str+1 : f->end; +} + +// +// Return the location of the previous character or UTF8 codepoint. +// (i.e. skip backwards one codepoint at a time, not one byte at a time) +// +__attribute__((nonnull, pure)) +static inline const char *prev_char(file_t *f, const char *str) +{ + if (__builtin_expect(str-1 >= f->contents && (str[-1] & 0x80) == 0x0, 1)) + return str-1; + if (__builtin_expect(str-2 >= f->contents && (str[-2] & 0xe0) == 0xc0, 1)) + return str-2; + if (__builtin_expect(str-3 >= f->contents && (str[-3] & 0xf0) == 0xe0, 1)) + return str-3; + if (__builtin_expect(str-4 >= f->contents && (str[-4] & 0xf8) == 0xf0, 1)) + return str-4; + return __builtin_expect(str-1 >= f->contents, 1) ? str-1 : f->contents; +} +#endif +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 -- cgit v1.2.3