From da6c8857d6bad131635a846e8177e7c00a4c224e Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Mon, 31 May 2021 12:38:42 -0700 Subject: Moved utf8 code into a C file, fixed some potential null deref issues --- utf8.h | 41 ++++++----------------------------------- 1 file changed, 6 insertions(+), 35 deletions(-) (limited to 'utf8.h') diff --git a/utf8.h b/utf8.h index 8d7d969..ae2df2a 100644 --- a/utf8.h +++ b/utf8.h @@ -1,46 +1,17 @@ // // utf8.h - UTF8 helper functions // +#include "files.h" + #ifndef UTF8__H #define UTF8__H -#include "files.h" - #define UTF8_MAXCHARLEN 4 -// -// Return the location of the next character or UTF8 codepoint. -// (i.e. skip forward one codepoint at a time, not one byte at a time) -// -__attribute__((nonnull, pure)) -inline const char *next_char(file_t *f, const char *str) -{ - if (__builtin_expect(str+1 <= f->end && (str[0] & 0x80) == 0x0, 1)) - return str+1; - if (__builtin_expect(str+2 <= f->end && (str[0] & 0xe0) == 0xc0, 1)) - return str+2; - if (__builtin_expect(str+3 <= f->end && (str[0] & 0xf0) == 0xe0, 1)) - return str+3; - if (__builtin_expect(str+4 <= f->end && (str[0] & 0xf8) == 0xf0, 1)) - return str+4; - return __builtin_expect(str+1 <= f->end, 1) ? str+1 : f->end; -} -// -// Return the location of the previous character or UTF8 codepoint. -// (i.e. skip backwards one codepoint at a time, not one byte at a time) -// __attribute__((nonnull, pure)) -inline const char *prev_char(file_t *f, const char *str) -{ - if (__builtin_expect(str-1 >= f->start && (str[-1] & 0x80) == 0x0, 1)) - return str-1; - if (__builtin_expect(str-2 >= f->start && (str[-2] & 0xe0) == 0xc0, 1)) - return str-2; - if (__builtin_expect(str-3 >= f->start && (str[-3] & 0xf0) == 0xe0, 1)) - return str-3; - if (__builtin_expect(str-4 >= f->start && (str[-4] & 0xf8) == 0xf0, 1)) - return str-4; - return __builtin_expect(str-1 >= f->start, 1) ? str-1 : f->start; -} +const char *next_char(file_t *f, const char *str); +__attribute__((nonnull, pure)) +const char *prev_char(file_t *f, const char *str); + #endif // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 -- cgit v1.2.3