From da6c8857d6bad131635a846e8177e7c00a4c224e Mon Sep 17 00:00:00 2001
From: Bruce Hill <bruce@bruce-hill.com>
Date: Mon, 31 May 2021 12:38:42 -0700
Subject: Moved utf8 code into a C file, fixed some potential null deref issues

---
 utf8.c | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 utf8.c

(limited to 'utf8.c')

diff --git a/utf8.c b/utf8.c
new file mode 100644
index 0000000..25e0048
--- /dev/null
+++ b/utf8.c
@@ -0,0 +1,40 @@
+//
+// utf8.c - UTF8 helper functions
+//
+#include "files.h"
+#include "utf8.h"
+
+//
+// Return the location of the next character or UTF8 codepoint.
+// (i.e. skip forward one codepoint at a time, not one byte at a time)
+//
+const char *next_char(file_t *f, const char *str)
+{
+    if (__builtin_expect(str+1 <= f->end && (str[0] & 0x80) == 0x0, 1))
+        return str+1;
+    if (__builtin_expect(str+2 <= f->end && (str[0] & 0xe0) == 0xc0, 1))
+        return str+2;
+    if (__builtin_expect(str+3 <= f->end && (str[0] & 0xf0) == 0xe0, 1))
+        return str+3;
+    if (__builtin_expect(str+4 <= f->end && (str[0] & 0xf8) == 0xf0, 1))
+        return str+4;
+    return __builtin_expect(str+1 <= f->end, 1) ? str+1 : f->end;
+}
+
+//
+// Return the location of the previous character or UTF8 codepoint.
+// (i.e. skip backwards one codepoint at a time, not one byte at a time)
+//
+const char *prev_char(file_t *f, const char *str)
+{
+    if (__builtin_expect(str-1 >= f->start && (str[-1] & 0x80) == 0x0, 1))
+        return str-1;
+    if (__builtin_expect(str-2 >= f->start && (str[-2] & 0xe0) == 0xc0, 1))
+        return str-2;
+    if (__builtin_expect(str-3 >= f->start && (str[-3] & 0xf0) == 0xe0, 1))
+        return str-3;
+    if (__builtin_expect(str-4 >= f->start && (str[-4] & 0xf8) == 0xf0, 1))
+        return str-4;
+    return __builtin_expect(str-1 >= f->start, 1) ? str-1 : f->start;
+}
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
-- 
cgit v1.2.3