Renamed file_t.contents -> file_t.start, added file_t.memory for
canonical tracking of memory for mmap, simplified json printing, and added support for initiating searches on a specific line (and/or column).
This commit is contained in:
parent
cc6e25a06f
commit
219f62ed3f
26
bp.c
26
bp.c
@ -169,11 +169,9 @@ static int print_matches_as_json(def_t *defs, file_t *f, pat_t *pattern)
|
||||
for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, options.skip, options.ignorecase)); ) {
|
||||
if (++matches > 1)
|
||||
printf(",\n");
|
||||
printf("{\"filename\":\"%s\",", f->filename);
|
||||
printf("\"tree\":{\"rule\":\"text\",\"start\":%d,\"end\":%ld,\"children\":[",
|
||||
0, f->end - f->contents);
|
||||
json_match(f->contents, m, options.verbose);
|
||||
printf("]}}");
|
||||
printf("{\"filename\":\"%s\",\"match\":", f->filename);
|
||||
json_match(f->start, m, options.verbose);
|
||||
printf("}");
|
||||
}
|
||||
return matches;
|
||||
}
|
||||
@ -204,8 +202,8 @@ static void cleanup(void)
|
||||
if (modifying_file && backup_file) {
|
||||
rewind(modifying_file);
|
||||
ftruncate(fileno(modifying_file), 0);
|
||||
fwrite(backup_file->contents, 1,
|
||||
(size_t)(backup_file->end - backup_file->contents),
|
||||
fwrite(backup_file->start, 1,
|
||||
(size_t)(backup_file->end - backup_file->start),
|
||||
modifying_file);
|
||||
fclose(modifying_file);
|
||||
modifying_file = NULL;
|
||||
@ -288,7 +286,7 @@ static int inplace_modify_file(def_t *defs, file_t *f, pat_t *pattern)
|
||||
file_t *inmem_copy = NULL;
|
||||
// Ensure the file is resident in memory:
|
||||
if (f->mmapped) {
|
||||
inmem_copy = spoof_file(NULL, f->filename, f->contents, (ssize_t)(f->end - f->contents));
|
||||
inmem_copy = spoof_file(NULL, f->filename, f->start, (ssize_t)(f->end - f->start));
|
||||
f = inmem_copy;
|
||||
}
|
||||
|
||||
@ -535,7 +533,7 @@ int main(int argc, char *argv[])
|
||||
// TODO: spoof file as sprintf("pattern => '%s'", flag)
|
||||
// except that would require handling edge cases like quotation marks etc.
|
||||
file_t *replace_file = spoof_file(&loaded_files, "<replace argument>", flag, -1);
|
||||
pattern = bp_replacement(replace_file, pattern, replace_file->contents);
|
||||
pattern = bp_replacement(replace_file, pattern, replace_file->start);
|
||||
if (!pattern)
|
||||
errx(EXIT_FAILURE, "Replacement failed to compile: %s", flag);
|
||||
} else if (FLAG("-g") || FLAG("--grammar")) {
|
||||
@ -551,7 +549,7 @@ int main(int argc, char *argv[])
|
||||
defs = load_grammar(defs, f); // Keep in memory for debug output
|
||||
} else if (FLAG("-p") || FLAG("--pattern")) {
|
||||
file_t *arg_file = spoof_file(&loaded_files, "<pattern argument>", flag, -1);
|
||||
for (const char *str = arg_file->contents; str < arg_file->end; ) {
|
||||
for (const char *str = arg_file->start; str < arg_file->end; ) {
|
||||
def_t *d = bp_definition(defs, arg_file, str);
|
||||
if (d) {
|
||||
defs = d;
|
||||
@ -567,9 +565,9 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
} else if (FLAG("-s") || FLAG("--skip")) {
|
||||
file_t *arg_file = spoof_file(&loaded_files, "<skip argument>", flag, -1);
|
||||
pat_t *s = bp_pattern(arg_file, arg_file->contents);
|
||||
pat_t *s = bp_pattern(arg_file, arg_file->start);
|
||||
if (!s) {
|
||||
fprint_line(stdout, arg_file, arg_file->contents, arg_file->end,
|
||||
fprint_line(stdout, arg_file, arg_file->start, arg_file->end,
|
||||
"Failed to compile the skip argument");
|
||||
} else if (after_spaces(s->end) < arg_file->end) {
|
||||
fprint_line(stdout, arg_file, s->end, arg_file->end,
|
||||
@ -593,7 +591,7 @@ int main(int argc, char *argv[])
|
||||
} else if (argv[0][0] != '-') {
|
||||
if (pattern != NULL) break;
|
||||
file_t *arg_file = spoof_file(&loaded_files, "<pattern argument>", argv[0], -1);
|
||||
pat_t *p = bp_stringpattern(arg_file, arg_file->contents);
|
||||
pat_t *p = bp_stringpattern(arg_file, arg_file->start);
|
||||
if (!p)
|
||||
errx(EXIT_FAILURE, "Pattern failed to compile: %s", argv[0]);
|
||||
pattern = chain_together(arg_file, pattern, p);
|
||||
@ -641,7 +639,7 @@ int main(int argc, char *argv[])
|
||||
// pattern the args specified, and use `pattern` as the thing being matched.
|
||||
defs = with_def(defs, strlen("pattern"), "pattern", pattern);
|
||||
file_t *patref_file = spoof_file(&loaded_files, "<pattern ref>", "pattern", -1);
|
||||
pattern = bp_pattern(patref_file, patref_file->contents);
|
||||
pattern = bp_pattern(patref_file, patref_file->start);
|
||||
|
||||
int found = 0;
|
||||
if (options.mode == MODE_JSON) printf("[");
|
||||
|
@ -30,7 +30,7 @@ def_t *with_def(def_t *defs, size_t namelen, const char *name, pat_t *pat)
|
||||
//
|
||||
def_t *load_grammar(def_t *defs, file_t *f)
|
||||
{
|
||||
const char *src = f->contents;
|
||||
const char *src = f->start;
|
||||
src = after_spaces(src);
|
||||
while (src < f->end) {
|
||||
const char *name = src;
|
||||
|
75
files.c
75
files.c
@ -18,7 +18,7 @@
|
||||
#include "utils.h"
|
||||
|
||||
__attribute__((nonnull))
|
||||
static void populate_lines(file_t *f);
|
||||
static void populate_lines(file_t *f, size_t len);
|
||||
__attribute__((pure, nonnull))
|
||||
static size_t get_char_number(file_t *f, const char *p);
|
||||
|
||||
@ -26,14 +26,14 @@ static size_t get_char_number(file_t *f, const char *p);
|
||||
// In the file object, populate the `lines` array with pointers to the
|
||||
// beginning of each line.
|
||||
//
|
||||
static void populate_lines(file_t *f)
|
||||
static void populate_lines(file_t *f, size_t len)
|
||||
{
|
||||
// Calculate line numbers:
|
||||
size_t linecap = 10;
|
||||
f->lines = xcalloc(sizeof(const char*), linecap);
|
||||
f->nlines = 0;
|
||||
char *p = f->contents;
|
||||
for (size_t n = 0; p && p < f->end; ++n) {
|
||||
char *p = f->memory;
|
||||
for (size_t n = 0; p && p < &f->memory[len]; ++n) {
|
||||
++f->nlines;
|
||||
if (n >= linecap)
|
||||
f->lines = xrealloc(f->lines, sizeof(const char*)*(linecap *= 2));
|
||||
@ -64,7 +64,26 @@ file_t *load_filef(file_t **files, const char *fmt, ...)
|
||||
file_t *load_file(file_t **files, const char *filename)
|
||||
{
|
||||
int fd = filename[0] == '\0' ? STDIN_FILENO : open(filename, O_RDONLY);
|
||||
if (fd < 0) return NULL;
|
||||
if (fd < 0) {
|
||||
// Check for <file>:<line>[:<col>]
|
||||
if (strchr(filename, ':')) {
|
||||
char tmp[PATH_MAX] = {0};
|
||||
strcpy(tmp, filename);
|
||||
char *colon = strchr(tmp, ':');
|
||||
*colon = '\0';
|
||||
file_t *f = load_file(files, tmp);
|
||||
if (!f) return f;
|
||||
long line = strtol(colon+1, &colon, 10);
|
||||
f->start = (char*)get_line(f, (size_t)line);
|
||||
if (*colon == ':') {
|
||||
long offset = strtol(colon+1, &colon, 10);
|
||||
f->start += offset;
|
||||
}
|
||||
if (f->start > f->end) f->start = f->end;
|
||||
return f;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
size_t length;
|
||||
file_t *f = new(file_t);
|
||||
f->filename = memcheck(strdup(filename));
|
||||
@ -73,8 +92,8 @@ file_t *load_file(file_t **files, const char *filename)
|
||||
if (fstat(fd, &sb) == -1)
|
||||
goto skip_mmap;
|
||||
|
||||
f->contents = mmap(NULL, (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
|
||||
if (f->contents == MAP_FAILED)
|
||||
f->memory = mmap(NULL, (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
|
||||
if (f->memory == MAP_FAILED)
|
||||
goto skip_mmap;
|
||||
|
||||
f->mmapped = true;
|
||||
@ -85,12 +104,12 @@ file_t *load_file(file_t **files, const char *filename)
|
||||
f->mmapped = false;
|
||||
size_t capacity = 1000;
|
||||
length = 0;
|
||||
f->contents = xcalloc(sizeof(char), capacity);
|
||||
f->memory = xcalloc(sizeof(char), capacity);
|
||||
ssize_t just_read;
|
||||
while ((just_read=read(fd, &f->contents[length], capacity - length)) > 0) {
|
||||
while ((just_read=read(fd, &f->memory[length], capacity - length)) > 0) {
|
||||
length += (size_t)just_read;
|
||||
if (length >= capacity)
|
||||
f->contents = xrealloc(f->contents, sizeof(char)*(capacity *= 2) + 1);
|
||||
f->memory = xrealloc(f->memory, sizeof(char)*(capacity *= 2) + 1);
|
||||
}
|
||||
|
||||
finished_loading:
|
||||
@ -98,8 +117,9 @@ file_t *load_file(file_t **files, const char *filename)
|
||||
if (close(fd) != 0)
|
||||
err(EXIT_FAILURE, "Failed to close file");
|
||||
}
|
||||
f->end = &f->contents[length];
|
||||
populate_lines(f);
|
||||
f->start = &f->memory[0];
|
||||
f->end = &f->memory[length];
|
||||
populate_lines(f, length);
|
||||
if (files != NULL) {
|
||||
f->next = *files;
|
||||
*files = f;
|
||||
@ -107,6 +127,16 @@ file_t *load_file(file_t **files, const char *filename)
|
||||
return f;
|
||||
}
|
||||
|
||||
//
|
||||
// Set a file struct to represent a region of a different file.
|
||||
//
|
||||
void slice_file(file_t *slice, file_t *src, const char *start, const char *end)
|
||||
{
|
||||
memcpy(slice, src, sizeof(file_t));
|
||||
slice->start = (char*)start;
|
||||
slice->end = (char*)end;
|
||||
}
|
||||
|
||||
//
|
||||
// Create a virtual file from a string.
|
||||
//
|
||||
@ -116,10 +146,11 @@ file_t *spoof_file(file_t **files, const char *filename, const char *text, ssize
|
||||
file_t *f = new(file_t);
|
||||
size_t len = _len == -1 ? strlen(text) : (size_t)_len;
|
||||
f->filename = memcheck(strdup(filename));
|
||||
f->contents = xcalloc(len+1, sizeof(char));
|
||||
memcpy(f->contents, text, len);
|
||||
f->end = &f->contents[len];
|
||||
populate_lines(f);
|
||||
f->memory = xcalloc(len+1, sizeof(char));
|
||||
memcpy(f->memory, text, len);
|
||||
f->start = &f->memory[0];
|
||||
f->end = &f->memory[len];
|
||||
populate_lines(f, len);
|
||||
if (files != NULL) {
|
||||
f->next = *files;
|
||||
*files = f;
|
||||
@ -141,13 +172,13 @@ void destroy_file(file_t **f)
|
||||
xfree(&((*f)->lines));
|
||||
}
|
||||
|
||||
if ((*f)->contents) {
|
||||
if ((*f)->memory) {
|
||||
if ((*f)->mmapped) {
|
||||
if (munmap((*f)->contents, (size_t)((*f)->end - (*f)->contents)) != 0)
|
||||
if (munmap((*f)->memory, (size_t)((*f)->end - (*f)->memory)) != 0)
|
||||
err(EXIT_FAILURE, "Failure to un-memory-map some memory");
|
||||
(*f)->contents = NULL;
|
||||
(*f)->memory = NULL;
|
||||
} else {
|
||||
xfree(&((*f)->contents));
|
||||
xfree(&((*f)->memory));
|
||||
}
|
||||
}
|
||||
|
||||
@ -203,9 +234,9 @@ const char *get_line(file_t *f, size_t line_number)
|
||||
//
|
||||
void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *fmt, ...)
|
||||
{
|
||||
if (start < f->contents) start = f->contents;
|
||||
if (start < f->start) start = f->start;
|
||||
if (start > f->end) start = f->end;
|
||||
if (end < f->contents) end = f->contents;
|
||||
if (end < f->start) end = f->start;
|
||||
if (end > f->end) end = f->end;
|
||||
size_t linenum = get_line_number(f, start);
|
||||
const char *line = get_line(f, linenum);
|
||||
|
4
files.h
4
files.h
@ -15,7 +15,7 @@ struct allocated_pat_s; // declared in types.h
|
||||
typedef struct file_s {
|
||||
struct file_s *next;
|
||||
const char *filename;
|
||||
char *contents, **lines, *end;
|
||||
char *memory, **lines, *start, *end;
|
||||
size_t nlines;
|
||||
struct allocated_pat_s *pats;
|
||||
bool mmapped:1;
|
||||
@ -25,6 +25,8 @@ __attribute__((nonnull(2)))
|
||||
file_t *load_file(file_t **files, const char *filename);
|
||||
__attribute__((format(printf,2,3)))
|
||||
file_t *load_filef(file_t **files, const char *fmt, ...);
|
||||
__attribute__((nonnull))
|
||||
void slice_file(file_t *slice, file_t *src, const char *start, const char *end);
|
||||
__attribute__((nonnull(3), returns_nonnull))
|
||||
file_t *spoof_file(file_t **files, const char *filename, const char *text, ssize_t len);
|
||||
__attribute__((nonnull))
|
||||
|
21
match.c
21
match.c
@ -127,7 +127,7 @@ match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, pat_t *sk
|
||||
str = prev->end > prev->start ? prev->end : prev->end + 1;
|
||||
recycle_if_unused(&prev);
|
||||
} else {
|
||||
str = f->contents;
|
||||
str = f->start;
|
||||
}
|
||||
bool only_start = pat->type == BP_START_OF_FILE || (pat->type == BP_CHAIN && pat->args.multiple.first->type == BP_START_OF_FILE);
|
||||
while (str <= f->end) {
|
||||
@ -168,10 +168,10 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
return (str < f->end && *str != '\n') ? new_match(pat, str, next_char(f, str), NULL) : NULL;
|
||||
}
|
||||
case BP_START_OF_FILE: {
|
||||
return (str == f->contents) ? new_match(pat, str, str, NULL) : NULL;
|
||||
return (str == f->start) ? new_match(pat, str, str, NULL) : NULL;
|
||||
}
|
||||
case BP_START_OF_LINE: {
|
||||
return (str == f->contents || str[-1] == '\n') ? new_match(pat, str, str, NULL) : NULL;
|
||||
return (str == f->start || str[-1] == '\n') ? new_match(pat, str, str, NULL) : NULL;
|
||||
}
|
||||
case BP_END_OF_FILE: {
|
||||
return (str == f->end) ? new_match(pat, str, str, NULL) : NULL;
|
||||
@ -302,19 +302,18 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
// TODO: this breaks ^/^^/$/$$, but that can probably be ignored
|
||||
// because you rarely need to check those in a backtrack.
|
||||
file_t slice;
|
||||
memcpy(&slice, f, sizeof(file_t));
|
||||
slice.end = (char*)str;
|
||||
slice_file(&slice, f, f->start, str);
|
||||
for (const char *pos = &str[-(long)back->min_matchlen];
|
||||
pos >= f->contents && (back->max_matchlen == -1 || pos >= &str[-(long)back->max_matchlen]);
|
||||
pos >= f->start && (back->max_matchlen == -1 || pos >= &str[-(long)back->max_matchlen]);
|
||||
pos = prev_char(f, pos)) {
|
||||
slice.contents = (char*)pos;
|
||||
slice.start = (char*)pos;
|
||||
match_t *m = match(defs, &slice, pos, back, ignorecase);
|
||||
// Match should not go past str (i.e. (<"AB" "B") should match "ABB", but not "AB")
|
||||
if (m && m->end != str)
|
||||
recycle_if_unused(&m);
|
||||
else if (m)
|
||||
return new_match(pat, str, str, m);
|
||||
if (pos == f->contents) break;
|
||||
if (pos == f->start) break;
|
||||
// To prevent extreme performance degradation, don't keep
|
||||
// walking backwards endlessly over newlines.
|
||||
if (back->max_matchlen == -1 && *pos == '\n') break;
|
||||
@ -361,9 +360,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
// <p1>==<p2> matches iff the text of <p1> matches <p2>
|
||||
// <p1>!=<p2> matches iff the text of <p1> does not match <p2>
|
||||
file_t slice;
|
||||
memcpy(&slice, f, sizeof(file_t));
|
||||
slice.contents = (char*)m1->start;
|
||||
slice.end = (char*)m1->end;
|
||||
slice_file(&slice, f, m1->start, m1->end);
|
||||
match_t *m2 = next_match(defs, &slice, NULL, pat->args.multiple.second, NULL, ignorecase);
|
||||
if ((!m2 && pat->type == BP_MATCH) || (m2 && pat->type == BP_NOT_MATCH)) {
|
||||
recycle_if_unused(&m2);
|
||||
@ -451,7 +448,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
|
||||
size_t linenum = get_line_number(f, str);
|
||||
const char *p = get_line(f, linenum);
|
||||
if (p < f->contents) p=f->contents; // Can happen with recursive matching
|
||||
if (p < f->start) p = f->start; // Can happen with recursive matching
|
||||
|
||||
// Current indentation:
|
||||
char denter = *p;
|
||||
|
2
print.c
2
print.c
@ -371,7 +371,7 @@ void print_match(FILE *out, printer_t *pr, match_t *m)
|
||||
current_color = color_normal;
|
||||
bool first = (pr->pos == NULL);
|
||||
if (first) { // First match printed:
|
||||
pr->pos = pr->file->contents;
|
||||
pr->pos = pr->file->start;
|
||||
pr->needs_line_number = 1;
|
||||
}
|
||||
if (m) {
|
||||
|
12
utf8.h
12
utf8.h
@ -4,6 +4,8 @@
|
||||
#ifndef UTF8__H
|
||||
#define UTF8__H
|
||||
|
||||
#include "files.h"
|
||||
|
||||
#define UTF8_MAXCHARLEN 4
|
||||
//
|
||||
// Return the location of the next character or UTF8 codepoint.
|
||||
@ -30,15 +32,15 @@ static inline const char *next_char(file_t *f, const char *str)
|
||||
__attribute__((nonnull, pure))
|
||||
static inline const char *prev_char(file_t *f, const char *str)
|
||||
{
|
||||
if (__builtin_expect(str-1 >= f->contents && (str[-1] & 0x80) == 0x0, 1))
|
||||
if (__builtin_expect(str-1 >= f->start && (str[-1] & 0x80) == 0x0, 1))
|
||||
return str-1;
|
||||
if (__builtin_expect(str-2 >= f->contents && (str[-2] & 0xe0) == 0xc0, 1))
|
||||
if (__builtin_expect(str-2 >= f->start && (str[-2] & 0xe0) == 0xc0, 1))
|
||||
return str-2;
|
||||
if (__builtin_expect(str-3 >= f->contents && (str[-3] & 0xf0) == 0xe0, 1))
|
||||
if (__builtin_expect(str-3 >= f->start && (str[-3] & 0xf0) == 0xe0, 1))
|
||||
return str-3;
|
||||
if (__builtin_expect(str-4 >= f->contents && (str[-4] & 0xf8) == 0xf0, 1))
|
||||
if (__builtin_expect(str-4 >= f->start && (str[-4] & 0xf8) == 0xf0, 1))
|
||||
return str-4;
|
||||
return __builtin_expect(str-1 >= f->contents, 1) ? str-1 : f->contents;
|
||||
return __builtin_expect(str-1 >= f->start, 1) ? str-1 : f->start;
|
||||
}
|
||||
#endif
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
||||
|
Loading…
Reference in New Issue
Block a user