Renamed file_t.contents -> file_t.start, added file_t.memory for

canonical tracking of memory for mmap, simplified json printing,
and added support for initiating searches on a specific line (and/or
column).
This commit is contained in:
Bruce Hill 2021-05-20 18:31:28 -07:00
parent cc6e25a06f
commit 219f62ed3f
7 changed files with 86 additions and 56 deletions

26
bp.c
View File

@ -169,11 +169,9 @@ static int print_matches_as_json(def_t *defs, file_t *f, pat_t *pattern)
for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, options.skip, options.ignorecase)); ) {
if (++matches > 1)
printf(",\n");
printf("{\"filename\":\"%s\",", f->filename);
printf("\"tree\":{\"rule\":\"text\",\"start\":%d,\"end\":%ld,\"children\":[",
0, f->end - f->contents);
json_match(f->contents, m, options.verbose);
printf("]}}");
printf("{\"filename\":\"%s\",\"match\":", f->filename);
json_match(f->start, m, options.verbose);
printf("}");
}
return matches;
}
@ -204,8 +202,8 @@ static void cleanup(void)
if (modifying_file && backup_file) {
rewind(modifying_file);
ftruncate(fileno(modifying_file), 0);
fwrite(backup_file->contents, 1,
(size_t)(backup_file->end - backup_file->contents),
fwrite(backup_file->start, 1,
(size_t)(backup_file->end - backup_file->start),
modifying_file);
fclose(modifying_file);
modifying_file = NULL;
@ -288,7 +286,7 @@ static int inplace_modify_file(def_t *defs, file_t *f, pat_t *pattern)
file_t *inmem_copy = NULL;
// Ensure the file is resident in memory:
if (f->mmapped) {
inmem_copy = spoof_file(NULL, f->filename, f->contents, (ssize_t)(f->end - f->contents));
inmem_copy = spoof_file(NULL, f->filename, f->start, (ssize_t)(f->end - f->start));
f = inmem_copy;
}
@ -535,7 +533,7 @@ int main(int argc, char *argv[])
// TODO: spoof file as sprintf("pattern => '%s'", flag)
// except that would require handling edge cases like quotation marks etc.
file_t *replace_file = spoof_file(&loaded_files, "<replace argument>", flag, -1);
pattern = bp_replacement(replace_file, pattern, replace_file->contents);
pattern = bp_replacement(replace_file, pattern, replace_file->start);
if (!pattern)
errx(EXIT_FAILURE, "Replacement failed to compile: %s", flag);
} else if (FLAG("-g") || FLAG("--grammar")) {
@ -551,7 +549,7 @@ int main(int argc, char *argv[])
defs = load_grammar(defs, f); // Keep in memory for debug output
} else if (FLAG("-p") || FLAG("--pattern")) {
file_t *arg_file = spoof_file(&loaded_files, "<pattern argument>", flag, -1);
for (const char *str = arg_file->contents; str < arg_file->end; ) {
for (const char *str = arg_file->start; str < arg_file->end; ) {
def_t *d = bp_definition(defs, arg_file, str);
if (d) {
defs = d;
@ -567,9 +565,9 @@ int main(int argc, char *argv[])
}
} else if (FLAG("-s") || FLAG("--skip")) {
file_t *arg_file = spoof_file(&loaded_files, "<skip argument>", flag, -1);
pat_t *s = bp_pattern(arg_file, arg_file->contents);
pat_t *s = bp_pattern(arg_file, arg_file->start);
if (!s) {
fprint_line(stdout, arg_file, arg_file->contents, arg_file->end,
fprint_line(stdout, arg_file, arg_file->start, arg_file->end,
"Failed to compile the skip argument");
} else if (after_spaces(s->end) < arg_file->end) {
fprint_line(stdout, arg_file, s->end, arg_file->end,
@ -593,7 +591,7 @@ int main(int argc, char *argv[])
} else if (argv[0][0] != '-') {
if (pattern != NULL) break;
file_t *arg_file = spoof_file(&loaded_files, "<pattern argument>", argv[0], -1);
pat_t *p = bp_stringpattern(arg_file, arg_file->contents);
pat_t *p = bp_stringpattern(arg_file, arg_file->start);
if (!p)
errx(EXIT_FAILURE, "Pattern failed to compile: %s", argv[0]);
pattern = chain_together(arg_file, pattern, p);
@ -641,7 +639,7 @@ int main(int argc, char *argv[])
// pattern the args specified, and use `pattern` as the thing being matched.
defs = with_def(defs, strlen("pattern"), "pattern", pattern);
file_t *patref_file = spoof_file(&loaded_files, "<pattern ref>", "pattern", -1);
pattern = bp_pattern(patref_file, patref_file->contents);
pattern = bp_pattern(patref_file, patref_file->start);
int found = 0;
if (options.mode == MODE_JSON) printf("[");

View File

@ -30,7 +30,7 @@ def_t *with_def(def_t *defs, size_t namelen, const char *name, pat_t *pat)
//
def_t *load_grammar(def_t *defs, file_t *f)
{
const char *src = f->contents;
const char *src = f->start;
src = after_spaces(src);
while (src < f->end) {
const char *name = src;

75
files.c
View File

@ -18,7 +18,7 @@
#include "utils.h"
__attribute__((nonnull))
static void populate_lines(file_t *f);
static void populate_lines(file_t *f, size_t len);
__attribute__((pure, nonnull))
static size_t get_char_number(file_t *f, const char *p);
@ -26,14 +26,14 @@ static size_t get_char_number(file_t *f, const char *p);
// In the file object, populate the `lines` array with pointers to the
// beginning of each line.
//
static void populate_lines(file_t *f)
static void populate_lines(file_t *f, size_t len)
{
// Calculate line numbers:
size_t linecap = 10;
f->lines = xcalloc(sizeof(const char*), linecap);
f->nlines = 0;
char *p = f->contents;
for (size_t n = 0; p && p < f->end; ++n) {
char *p = f->memory;
for (size_t n = 0; p && p < &f->memory[len]; ++n) {
++f->nlines;
if (n >= linecap)
f->lines = xrealloc(f->lines, sizeof(const char*)*(linecap *= 2));
@ -64,7 +64,26 @@ file_t *load_filef(file_t **files, const char *fmt, ...)
file_t *load_file(file_t **files, const char *filename)
{
int fd = filename[0] == '\0' ? STDIN_FILENO : open(filename, O_RDONLY);
if (fd < 0) return NULL;
if (fd < 0) {
// Check for <file>:<line>[:<col>]
if (strchr(filename, ':')) {
char tmp[PATH_MAX] = {0};
strcpy(tmp, filename);
char *colon = strchr(tmp, ':');
*colon = '\0';
file_t *f = load_file(files, tmp);
if (!f) return f;
long line = strtol(colon+1, &colon, 10);
f->start = (char*)get_line(f, (size_t)line);
if (*colon == ':') {
long offset = strtol(colon+1, &colon, 10);
f->start += offset;
}
if (f->start > f->end) f->start = f->end;
return f;
}
return NULL;
}
size_t length;
file_t *f = new(file_t);
f->filename = memcheck(strdup(filename));
@ -73,8 +92,8 @@ file_t *load_file(file_t **files, const char *filename)
if (fstat(fd, &sb) == -1)
goto skip_mmap;
f->contents = mmap(NULL, (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (f->contents == MAP_FAILED)
f->memory = mmap(NULL, (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (f->memory == MAP_FAILED)
goto skip_mmap;
f->mmapped = true;
@ -85,12 +104,12 @@ file_t *load_file(file_t **files, const char *filename)
f->mmapped = false;
size_t capacity = 1000;
length = 0;
f->contents = xcalloc(sizeof(char), capacity);
f->memory = xcalloc(sizeof(char), capacity);
ssize_t just_read;
while ((just_read=read(fd, &f->contents[length], capacity - length)) > 0) {
while ((just_read=read(fd, &f->memory[length], capacity - length)) > 0) {
length += (size_t)just_read;
if (length >= capacity)
f->contents = xrealloc(f->contents, sizeof(char)*(capacity *= 2) + 1);
f->memory = xrealloc(f->memory, sizeof(char)*(capacity *= 2) + 1);
}
finished_loading:
@ -98,8 +117,9 @@ file_t *load_file(file_t **files, const char *filename)
if (close(fd) != 0)
err(EXIT_FAILURE, "Failed to close file");
}
f->end = &f->contents[length];
populate_lines(f);
f->start = &f->memory[0];
f->end = &f->memory[length];
populate_lines(f, length);
if (files != NULL) {
f->next = *files;
*files = f;
@ -107,6 +127,16 @@ file_t *load_file(file_t **files, const char *filename)
return f;
}
//
// Set a file struct to represent a region of a different file.
//
void slice_file(file_t *slice, file_t *src, const char *start, const char *end)
{
memcpy(slice, src, sizeof(file_t));
slice->start = (char*)start;
slice->end = (char*)end;
}
//
// Create a virtual file from a string.
//
@ -116,10 +146,11 @@ file_t *spoof_file(file_t **files, const char *filename, const char *text, ssize
file_t *f = new(file_t);
size_t len = _len == -1 ? strlen(text) : (size_t)_len;
f->filename = memcheck(strdup(filename));
f->contents = xcalloc(len+1, sizeof(char));
memcpy(f->contents, text, len);
f->end = &f->contents[len];
populate_lines(f);
f->memory = xcalloc(len+1, sizeof(char));
memcpy(f->memory, text, len);
f->start = &f->memory[0];
f->end = &f->memory[len];
populate_lines(f, len);
if (files != NULL) {
f->next = *files;
*files = f;
@ -141,13 +172,13 @@ void destroy_file(file_t **f)
xfree(&((*f)->lines));
}
if ((*f)->contents) {
if ((*f)->memory) {
if ((*f)->mmapped) {
if (munmap((*f)->contents, (size_t)((*f)->end - (*f)->contents)) != 0)
if (munmap((*f)->memory, (size_t)((*f)->end - (*f)->memory)) != 0)
err(EXIT_FAILURE, "Failure to un-memory-map some memory");
(*f)->contents = NULL;
(*f)->memory = NULL;
} else {
xfree(&((*f)->contents));
xfree(&((*f)->memory));
}
}
@ -203,9 +234,9 @@ const char *get_line(file_t *f, size_t line_number)
//
void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *fmt, ...)
{
if (start < f->contents) start = f->contents;
if (start < f->start) start = f->start;
if (start > f->end) start = f->end;
if (end < f->contents) end = f->contents;
if (end < f->start) end = f->start;
if (end > f->end) end = f->end;
size_t linenum = get_line_number(f, start);
const char *line = get_line(f, linenum);

View File

@ -15,7 +15,7 @@ struct allocated_pat_s; // declared in types.h
typedef struct file_s {
struct file_s *next;
const char *filename;
char *contents, **lines, *end;
char *memory, **lines, *start, *end;
size_t nlines;
struct allocated_pat_s *pats;
bool mmapped:1;
@ -25,6 +25,8 @@ __attribute__((nonnull(2)))
file_t *load_file(file_t **files, const char *filename);
__attribute__((format(printf,2,3)))
file_t *load_filef(file_t **files, const char *fmt, ...);
__attribute__((nonnull))
void slice_file(file_t *slice, file_t *src, const char *start, const char *end);
__attribute__((nonnull(3), returns_nonnull))
file_t *spoof_file(file_t **files, const char *filename, const char *text, ssize_t len);
__attribute__((nonnull))

21
match.c
View File

@ -127,7 +127,7 @@ match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, pat_t *sk
str = prev->end > prev->start ? prev->end : prev->end + 1;
recycle_if_unused(&prev);
} else {
str = f->contents;
str = f->start;
}
bool only_start = pat->type == BP_START_OF_FILE || (pat->type == BP_CHAIN && pat->args.multiple.first->type == BP_START_OF_FILE);
while (str <= f->end) {
@ -168,10 +168,10 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
return (str < f->end && *str != '\n') ? new_match(pat, str, next_char(f, str), NULL) : NULL;
}
case BP_START_OF_FILE: {
return (str == f->contents) ? new_match(pat, str, str, NULL) : NULL;
return (str == f->start) ? new_match(pat, str, str, NULL) : NULL;
}
case BP_START_OF_LINE: {
return (str == f->contents || str[-1] == '\n') ? new_match(pat, str, str, NULL) : NULL;
return (str == f->start || str[-1] == '\n') ? new_match(pat, str, str, NULL) : NULL;
}
case BP_END_OF_FILE: {
return (str == f->end) ? new_match(pat, str, str, NULL) : NULL;
@ -302,19 +302,18 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
// TODO: this breaks ^/^^/$/$$, but that can probably be ignored
// because you rarely need to check those in a backtrack.
file_t slice;
memcpy(&slice, f, sizeof(file_t));
slice.end = (char*)str;
slice_file(&slice, f, f->start, str);
for (const char *pos = &str[-(long)back->min_matchlen];
pos >= f->contents && (back->max_matchlen == -1 || pos >= &str[-(long)back->max_matchlen]);
pos >= f->start && (back->max_matchlen == -1 || pos >= &str[-(long)back->max_matchlen]);
pos = prev_char(f, pos)) {
slice.contents = (char*)pos;
slice.start = (char*)pos;
match_t *m = match(defs, &slice, pos, back, ignorecase);
// Match should not go past str (i.e. (<"AB" "B") should match "ABB", but not "AB")
if (m && m->end != str)
recycle_if_unused(&m);
else if (m)
return new_match(pat, str, str, m);
if (pos == f->contents) break;
if (pos == f->start) break;
// To prevent extreme performance degradation, don't keep
// walking backwards endlessly over newlines.
if (back->max_matchlen == -1 && *pos == '\n') break;
@ -361,9 +360,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
// <p1>==<p2> matches iff the text of <p1> matches <p2>
// <p1>!=<p2> matches iff the text of <p1> does not match <p2>
file_t slice;
memcpy(&slice, f, sizeof(file_t));
slice.contents = (char*)m1->start;
slice.end = (char*)m1->end;
slice_file(&slice, f, m1->start, m1->end);
match_t *m2 = next_match(defs, &slice, NULL, pat->args.multiple.second, NULL, ignorecase);
if ((!m2 && pat->type == BP_MATCH) || (m2 && pat->type == BP_NOT_MATCH)) {
recycle_if_unused(&m2);
@ -451,7 +448,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
size_t linenum = get_line_number(f, str);
const char *p = get_line(f, linenum);
if (p < f->contents) p=f->contents; // Can happen with recursive matching
if (p < f->start) p = f->start; // Can happen with recursive matching
// Current indentation:
char denter = *p;

View File

@ -371,7 +371,7 @@ void print_match(FILE *out, printer_t *pr, match_t *m)
current_color = color_normal;
bool first = (pr->pos == NULL);
if (first) { // First match printed:
pr->pos = pr->file->contents;
pr->pos = pr->file->start;
pr->needs_line_number = 1;
}
if (m) {

12
utf8.h
View File

@ -4,6 +4,8 @@
#ifndef UTF8__H
#define UTF8__H
#include "files.h"
#define UTF8_MAXCHARLEN 4
//
// Return the location of the next character or UTF8 codepoint.
@ -30,15 +32,15 @@ static inline const char *next_char(file_t *f, const char *str)
__attribute__((nonnull, pure))
static inline const char *prev_char(file_t *f, const char *str)
{
if (__builtin_expect(str-1 >= f->contents && (str[-1] & 0x80) == 0x0, 1))
if (__builtin_expect(str-1 >= f->start && (str[-1] & 0x80) == 0x0, 1))
return str-1;
if (__builtin_expect(str-2 >= f->contents && (str[-2] & 0xe0) == 0xc0, 1))
if (__builtin_expect(str-2 >= f->start && (str[-2] & 0xe0) == 0xc0, 1))
return str-2;
if (__builtin_expect(str-3 >= f->contents && (str[-3] & 0xf0) == 0xe0, 1))
if (__builtin_expect(str-3 >= f->start && (str[-3] & 0xf0) == 0xe0, 1))
return str-3;
if (__builtin_expect(str-4 >= f->contents && (str[-4] & 0xf8) == 0xf0, 1))
if (__builtin_expect(str-4 >= f->start && (str[-4] & 0xf8) == 0xf0, 1))
return str-4;
return __builtin_expect(str-1 >= f->contents, 1) ? str-1 : f->contents;
return __builtin_expect(str-1 >= f->start, 1) ? str-1 : f->start;
}
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1