diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2021-05-20 15:27:24 -0700 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2021-05-20 15:27:24 -0700 |
| commit | 655ed121289c0befa2c87f7a6a2db3409f54094c (patch) | |
| tree | a575edd664c5feb418389fb2fb22c6d627b04927 /utf8.h | |
| parent | a0028e96055669ab83dd6ff9e14b0e728a121d7d (diff) | |
Mostly working version
Diffstat (limited to 'utf8.h')
| -rw-r--r-- | utf8.h | 42 |
1 files changed, 42 insertions, 0 deletions
@@ -0,0 +1,42 @@ +// UTF8 helper functions +#ifndef UTF8__H +#define UTF8__H + +#define UTF8_MAXCHARLEN 4 +// +// Return the location of the next character or UTF8 codepoint. +// (i.e. skip forward one codepoint at a time, not one byte at a time) +// +__attribute__((nonnull, pure)) +static inline const char *next_char(file_t *f, const char *str) +{ + if (__builtin_expect(str+1 <= f->end && (str[0] & 0x80) == 0x0, 1)) + return str+1; + if (__builtin_expect(str+2 <= f->end && (str[0] & 0xe0) == 0xc0, 1)) + return str+2; + if (__builtin_expect(str+3 <= f->end && (str[0] & 0xf0) == 0xe0, 1)) + return str+3; + if (__builtin_expect(str+4 <= f->end && (str[0] & 0xf8) == 0xf0, 1)) + return str+4; + return __builtin_expect(str+1 <= f->end, 1) ? str+1 : f->end; +} + +// +// Return the location of the previous character or UTF8 codepoint. +// (i.e. skip backwards one codepoint at a time, not one byte at a time) +// +__attribute__((nonnull, pure)) +static inline const char *prev_char(file_t *f, const char *str) +{ + if (__builtin_expect(str-1 >= f->contents && (str[-1] & 0x80) == 0x0, 1)) + return str-1; + if (__builtin_expect(str-2 >= f->contents && (str[-2] & 0xe0) == 0xc0, 1)) + return str-2; + if (__builtin_expect(str-3 >= f->contents && (str[-3] & 0xf0) == 0xe0, 1)) + return str-3; + if (__builtin_expect(str-4 >= f->contents && (str[-4] & 0xf8) == 0xf0, 1)) + return str-4; + return __builtin_expect(str-1 >= f->contents, 1) ? str-1 : f->contents; +} +#endif +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 |
