Moved utf8 code into a C file, fixed some potential null deref issues
This commit is contained in:
parent
0443fbb063
commit
da6c8857d6
4
Makefile
4
Makefile
@ -2,14 +2,14 @@ NAME=bp
|
||||
CC=cc
|
||||
PREFIX=/usr/local
|
||||
SYSCONFDIR=/etc
|
||||
CFLAGS=-std=c99 -Werror -D_XOPEN_SOURCE=700 -D_GNU_SOURCE -D_POSIX_C_SOURCE=200809L
|
||||
CFLAGS=-std=c99 -Werror -D_XOPEN_SOURCE=700 -D_GNU_SOURCE -D_POSIX_C_SOURCE=200809L -flto
|
||||
CWARN=-Wall -Wpedantic -Wextra -Wsign-conversion -Wtype-limits -Wunused-result -Wnull-dereference
|
||||
EXTRA=
|
||||
G=
|
||||
O=-O3
|
||||
ALL_FLAGS=$(CFLAGS) -DBP_NAME="\"$(NAME)\"" $(EXTRA) $(CWARN) $(G) $(O)
|
||||
|
||||
CFILES=pattern.c definitions.c utils.c match.c files.c print.c json.c
|
||||
CFILES=pattern.c definitions.c utils.c match.c files.c print.c json.c utf8.c
|
||||
OBJFILES=$(CFILES:.c=.o)
|
||||
|
||||
all: $(NAME) bp.1
|
||||
|
5
match.c
5
match.c
@ -36,6 +36,7 @@ static match_t *unused_matches = NULL;
|
||||
static match_t *in_use_matches = NULL;
|
||||
#endif
|
||||
|
||||
__attribute__((nonnull(1)))
|
||||
static inline pat_t *deref(def_t *defs, pat_t *pat);
|
||||
__attribute__((returns_nonnull))
|
||||
static match_t *new_match(pat_t *pat, const char *start, const char *end, match_t *child);
|
||||
@ -52,10 +53,9 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
// If the given pattern is a reference, look it up and return the referenced
|
||||
// pattern. This is used for an optimization to avoid repeated lookups.
|
||||
//
|
||||
__attribute__((nonnull, returns_nonnull))
|
||||
static inline pat_t *deref(def_t *defs, pat_t *pat)
|
||||
{
|
||||
if (pat->type == BP_REF) {
|
||||
if (pat && pat->type == BP_REF) {
|
||||
def_t *def = lookup(defs, pat->args.ref.len, pat->args.ref.name);
|
||||
if (def) pat = def->pat;
|
||||
}
|
||||
@ -297,6 +297,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
}
|
||||
case BP_AFTER: {
|
||||
pat_t *back = deref(defs, pat->args.pat);
|
||||
if (!back) return NULL;
|
||||
|
||||
// We only care about the region from the backtrack pos up to the
|
||||
// current pos, so mock it out as a file slice.
|
||||
|
4
print.c
4
print.c
@ -314,13 +314,13 @@ static void _print_match(FILE *out, printer_t *pr, match_t *m)
|
||||
// the replacement text contains newlines, this may get weird.
|
||||
const char *line_start = get_line(
|
||||
pr->file, get_line_number(pr->file, m->start));
|
||||
char denter = *line_start;
|
||||
char denter = line_start ? *line_start : '\t';
|
||||
fputc('\n', out);
|
||||
++line;
|
||||
pr->needs_line_number = 1;
|
||||
print_line_number(out, pr, 0, pr->use_color ? color_replace : NULL);
|
||||
if (denter == ' ' || denter == '\t') {
|
||||
for (const char *p = line_start; *p == denter && p < m->start; ++p)
|
||||
for (const char *p = line_start; p && *p == denter && p < m->start; ++p)
|
||||
fputc(denter, out);
|
||||
}
|
||||
continue;
|
||||
|
40
utf8.c
Normal file
40
utf8.c
Normal file
@ -0,0 +1,40 @@
|
||||
//
|
||||
// utf8.c - UTF8 helper functions
|
||||
//
|
||||
#include "files.h"
|
||||
#include "utf8.h"
|
||||
|
||||
//
|
||||
// Return the location of the next character or UTF8 codepoint.
|
||||
// (i.e. skip forward one codepoint at a time, not one byte at a time)
|
||||
//
|
||||
const char *next_char(file_t *f, const char *str)
|
||||
{
|
||||
if (__builtin_expect(str+1 <= f->end && (str[0] & 0x80) == 0x0, 1))
|
||||
return str+1;
|
||||
if (__builtin_expect(str+2 <= f->end && (str[0] & 0xe0) == 0xc0, 1))
|
||||
return str+2;
|
||||
if (__builtin_expect(str+3 <= f->end && (str[0] & 0xf0) == 0xe0, 1))
|
||||
return str+3;
|
||||
if (__builtin_expect(str+4 <= f->end && (str[0] & 0xf8) == 0xf0, 1))
|
||||
return str+4;
|
||||
return __builtin_expect(str+1 <= f->end, 1) ? str+1 : f->end;
|
||||
}
|
||||
|
||||
//
|
||||
// Return the location of the previous character or UTF8 codepoint.
|
||||
// (i.e. skip backwards one codepoint at a time, not one byte at a time)
|
||||
//
|
||||
const char *prev_char(file_t *f, const char *str)
|
||||
{
|
||||
if (__builtin_expect(str-1 >= f->start && (str[-1] & 0x80) == 0x0, 1))
|
||||
return str-1;
|
||||
if (__builtin_expect(str-2 >= f->start && (str[-2] & 0xe0) == 0xc0, 1))
|
||||
return str-2;
|
||||
if (__builtin_expect(str-3 >= f->start && (str[-3] & 0xf0) == 0xe0, 1))
|
||||
return str-3;
|
||||
if (__builtin_expect(str-4 >= f->start && (str[-4] & 0xf8) == 0xf0, 1))
|
||||
return str-4;
|
||||
return __builtin_expect(str-1 >= f->start, 1) ? str-1 : f->start;
|
||||
}
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
41
utf8.h
41
utf8.h
@ -1,46 +1,17 @@
|
||||
//
|
||||
// utf8.h - UTF8 helper functions
|
||||
//
|
||||
#include "files.h"
|
||||
|
||||
#ifndef UTF8__H
|
||||
#define UTF8__H
|
||||
|
||||
#include "files.h"
|
||||
|
||||
#define UTF8_MAXCHARLEN 4
|
||||
//
|
||||
// Return the location of the next character or UTF8 codepoint.
|
||||
// (i.e. skip forward one codepoint at a time, not one byte at a time)
|
||||
//
|
||||
__attribute__((nonnull, pure))
|
||||
inline const char *next_char(file_t *f, const char *str)
|
||||
{
|
||||
if (__builtin_expect(str+1 <= f->end && (str[0] & 0x80) == 0x0, 1))
|
||||
return str+1;
|
||||
if (__builtin_expect(str+2 <= f->end && (str[0] & 0xe0) == 0xc0, 1))
|
||||
return str+2;
|
||||
if (__builtin_expect(str+3 <= f->end && (str[0] & 0xf0) == 0xe0, 1))
|
||||
return str+3;
|
||||
if (__builtin_expect(str+4 <= f->end && (str[0] & 0xf8) == 0xf0, 1))
|
||||
return str+4;
|
||||
return __builtin_expect(str+1 <= f->end, 1) ? str+1 : f->end;
|
||||
}
|
||||
|
||||
//
|
||||
// Return the location of the previous character or UTF8 codepoint.
|
||||
// (i.e. skip backwards one codepoint at a time, not one byte at a time)
|
||||
//
|
||||
__attribute__((nonnull, pure))
|
||||
inline const char *prev_char(file_t *f, const char *str)
|
||||
{
|
||||
if (__builtin_expect(str-1 >= f->start && (str[-1] & 0x80) == 0x0, 1))
|
||||
return str-1;
|
||||
if (__builtin_expect(str-2 >= f->start && (str[-2] & 0xe0) == 0xc0, 1))
|
||||
return str-2;
|
||||
if (__builtin_expect(str-3 >= f->start && (str[-3] & 0xf0) == 0xe0, 1))
|
||||
return str-3;
|
||||
if (__builtin_expect(str-4 >= f->start && (str[-4] & 0xf8) == 0xf0, 1))
|
||||
return str-4;
|
||||
return __builtin_expect(str-1 >= f->start, 1) ? str-1 : f->start;
|
||||
}
|
||||
const char *next_char(file_t *f, const char *str);
|
||||
__attribute__((nonnull, pure))
|
||||
const char *prev_char(file_t *f, const char *str);
|
||||
|
||||
#endif
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
||||
|
Loading…
Reference in New Issue
Block a user