From 219f62ed3f51a1094e75d4bf8823db7608e70b0e Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Thu, 20 May 2021 18:31:28 -0700 Subject: Renamed file_t.contents -> file_t.start, added file_t.memory for canonical tracking of memory for mmap, simplified json printing, and added support for initiating searches on a specific line (and/or column). --- bp.c | 26 ++++++++++----------- definitions.c | 2 +- files.c | 75 +++++++++++++++++++++++++++++++++++++++++------------------ files.h | 4 +++- match.c | 21 +++++++---------- print.c | 2 +- utf8.h | 12 ++++++---- 7 files changed, 86 insertions(+), 56 deletions(-) diff --git a/bp.c b/bp.c index f04c8de..c9998d7 100644 --- a/bp.c +++ b/bp.c @@ -169,11 +169,9 @@ static int print_matches_as_json(def_t *defs, file_t *f, pat_t *pattern) for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, options.skip, options.ignorecase)); ) { if (++matches > 1) printf(",\n"); - printf("{\"filename\":\"%s\",", f->filename); - printf("\"tree\":{\"rule\":\"text\",\"start\":%d,\"end\":%ld,\"children\":[", - 0, f->end - f->contents); - json_match(f->contents, m, options.verbose); - printf("]}}"); + printf("{\"filename\":\"%s\",\"match\":", f->filename); + json_match(f->start, m, options.verbose); + printf("}"); } return matches; } @@ -204,8 +202,8 @@ static void cleanup(void) if (modifying_file && backup_file) { rewind(modifying_file); ftruncate(fileno(modifying_file), 0); - fwrite(backup_file->contents, 1, - (size_t)(backup_file->end - backup_file->contents), + fwrite(backup_file->start, 1, + (size_t)(backup_file->end - backup_file->start), modifying_file); fclose(modifying_file); modifying_file = NULL; @@ -288,7 +286,7 @@ static int inplace_modify_file(def_t *defs, file_t *f, pat_t *pattern) file_t *inmem_copy = NULL; // Ensure the file is resident in memory: if (f->mmapped) { - inmem_copy = spoof_file(NULL, f->filename, f->contents, (ssize_t)(f->end - f->contents)); + inmem_copy = spoof_file(NULL, f->filename, f->start, (ssize_t)(f->end - f->start)); f = inmem_copy; } @@ -535,7 +533,7 @@ int main(int argc, char *argv[]) // TODO: spoof file as sprintf("pattern => '%s'", flag) // except that would require handling edge cases like quotation marks etc. file_t *replace_file = spoof_file(&loaded_files, "", flag, -1); - pattern = bp_replacement(replace_file, pattern, replace_file->contents); + pattern = bp_replacement(replace_file, pattern, replace_file->start); if (!pattern) errx(EXIT_FAILURE, "Replacement failed to compile: %s", flag); } else if (FLAG("-g") || FLAG("--grammar")) { @@ -551,7 +549,7 @@ int main(int argc, char *argv[]) defs = load_grammar(defs, f); // Keep in memory for debug output } else if (FLAG("-p") || FLAG("--pattern")) { file_t *arg_file = spoof_file(&loaded_files, "", flag, -1); - for (const char *str = arg_file->contents; str < arg_file->end; ) { + for (const char *str = arg_file->start; str < arg_file->end; ) { def_t *d = bp_definition(defs, arg_file, str); if (d) { defs = d; @@ -567,9 +565,9 @@ int main(int argc, char *argv[]) } } else if (FLAG("-s") || FLAG("--skip")) { file_t *arg_file = spoof_file(&loaded_files, "", flag, -1); - pat_t *s = bp_pattern(arg_file, arg_file->contents); + pat_t *s = bp_pattern(arg_file, arg_file->start); if (!s) { - fprint_line(stdout, arg_file, arg_file->contents, arg_file->end, + fprint_line(stdout, arg_file, arg_file->start, arg_file->end, "Failed to compile the skip argument"); } else if (after_spaces(s->end) < arg_file->end) { fprint_line(stdout, arg_file, s->end, arg_file->end, @@ -593,7 +591,7 @@ int main(int argc, char *argv[]) } else if (argv[0][0] != '-') { if (pattern != NULL) break; file_t *arg_file = spoof_file(&loaded_files, "", argv[0], -1); - pat_t *p = bp_stringpattern(arg_file, arg_file->contents); + pat_t *p = bp_stringpattern(arg_file, arg_file->start); if (!p) errx(EXIT_FAILURE, "Pattern failed to compile: %s", argv[0]); pattern = chain_together(arg_file, pattern, p); @@ -641,7 +639,7 @@ int main(int argc, char *argv[]) // pattern the args specified, and use `pattern` as the thing being matched. defs = with_def(defs, strlen("pattern"), "pattern", pattern); file_t *patref_file = spoof_file(&loaded_files, "", "pattern", -1); - pattern = bp_pattern(patref_file, patref_file->contents); + pattern = bp_pattern(patref_file, patref_file->start); int found = 0; if (options.mode == MODE_JSON) printf("["); diff --git a/definitions.c b/definitions.c index 55be12b..3e5f0e2 100644 --- a/definitions.c +++ b/definitions.c @@ -30,7 +30,7 @@ def_t *with_def(def_t *defs, size_t namelen, const char *name, pat_t *pat) // def_t *load_grammar(def_t *defs, file_t *f) { - const char *src = f->contents; + const char *src = f->start; src = after_spaces(src); while (src < f->end) { const char *name = src; diff --git a/files.c b/files.c index 4cb00cc..735d928 100644 --- a/files.c +++ b/files.c @@ -18,7 +18,7 @@ #include "utils.h" __attribute__((nonnull)) -static void populate_lines(file_t *f); +static void populate_lines(file_t *f, size_t len); __attribute__((pure, nonnull)) static size_t get_char_number(file_t *f, const char *p); @@ -26,14 +26,14 @@ static size_t get_char_number(file_t *f, const char *p); // In the file object, populate the `lines` array with pointers to the // beginning of each line. // -static void populate_lines(file_t *f) +static void populate_lines(file_t *f, size_t len) { // Calculate line numbers: size_t linecap = 10; f->lines = xcalloc(sizeof(const char*), linecap); f->nlines = 0; - char *p = f->contents; - for (size_t n = 0; p && p < f->end; ++n) { + char *p = f->memory; + for (size_t n = 0; p && p < &f->memory[len]; ++n) { ++f->nlines; if (n >= linecap) f->lines = xrealloc(f->lines, sizeof(const char*)*(linecap *= 2)); @@ -64,7 +64,26 @@ file_t *load_filef(file_t **files, const char *fmt, ...) file_t *load_file(file_t **files, const char *filename) { int fd = filename[0] == '\0' ? STDIN_FILENO : open(filename, O_RDONLY); - if (fd < 0) return NULL; + if (fd < 0) { + // Check for :[:] + if (strchr(filename, ':')) { + char tmp[PATH_MAX] = {0}; + strcpy(tmp, filename); + char *colon = strchr(tmp, ':'); + *colon = '\0'; + file_t *f = load_file(files, tmp); + if (!f) return f; + long line = strtol(colon+1, &colon, 10); + f->start = (char*)get_line(f, (size_t)line); + if (*colon == ':') { + long offset = strtol(colon+1, &colon, 10); + f->start += offset; + } + if (f->start > f->end) f->start = f->end; + return f; + } + return NULL; + } size_t length; file_t *f = new(file_t); f->filename = memcheck(strdup(filename)); @@ -73,8 +92,8 @@ file_t *load_file(file_t **files, const char *filename) if (fstat(fd, &sb) == -1) goto skip_mmap; - f->contents = mmap(NULL, (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); - if (f->contents == MAP_FAILED) + f->memory = mmap(NULL, (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (f->memory == MAP_FAILED) goto skip_mmap; f->mmapped = true; @@ -85,12 +104,12 @@ file_t *load_file(file_t **files, const char *filename) f->mmapped = false; size_t capacity = 1000; length = 0; - f->contents = xcalloc(sizeof(char), capacity); + f->memory = xcalloc(sizeof(char), capacity); ssize_t just_read; - while ((just_read=read(fd, &f->contents[length], capacity - length)) > 0) { + while ((just_read=read(fd, &f->memory[length], capacity - length)) > 0) { length += (size_t)just_read; if (length >= capacity) - f->contents = xrealloc(f->contents, sizeof(char)*(capacity *= 2) + 1); + f->memory = xrealloc(f->memory, sizeof(char)*(capacity *= 2) + 1); } finished_loading: @@ -98,8 +117,9 @@ file_t *load_file(file_t **files, const char *filename) if (close(fd) != 0) err(EXIT_FAILURE, "Failed to close file"); } - f->end = &f->contents[length]; - populate_lines(f); + f->start = &f->memory[0]; + f->end = &f->memory[length]; + populate_lines(f, length); if (files != NULL) { f->next = *files; *files = f; @@ -107,6 +127,16 @@ file_t *load_file(file_t **files, const char *filename) return f; } +// +// Set a file struct to represent a region of a different file. +// +void slice_file(file_t *slice, file_t *src, const char *start, const char *end) +{ + memcpy(slice, src, sizeof(file_t)); + slice->start = (char*)start; + slice->end = (char*)end; +} + // // Create a virtual file from a string. // @@ -116,10 +146,11 @@ file_t *spoof_file(file_t **files, const char *filename, const char *text, ssize file_t *f = new(file_t); size_t len = _len == -1 ? strlen(text) : (size_t)_len; f->filename = memcheck(strdup(filename)); - f->contents = xcalloc(len+1, sizeof(char)); - memcpy(f->contents, text, len); - f->end = &f->contents[len]; - populate_lines(f); + f->memory = xcalloc(len+1, sizeof(char)); + memcpy(f->memory, text, len); + f->start = &f->memory[0]; + f->end = &f->memory[len]; + populate_lines(f, len); if (files != NULL) { f->next = *files; *files = f; @@ -141,13 +172,13 @@ void destroy_file(file_t **f) xfree(&((*f)->lines)); } - if ((*f)->contents) { + if ((*f)->memory) { if ((*f)->mmapped) { - if (munmap((*f)->contents, (size_t)((*f)->end - (*f)->contents)) != 0) + if (munmap((*f)->memory, (size_t)((*f)->end - (*f)->memory)) != 0) err(EXIT_FAILURE, "Failure to un-memory-map some memory"); - (*f)->contents = NULL; + (*f)->memory = NULL; } else { - xfree(&((*f)->contents)); + xfree(&((*f)->memory)); } } @@ -203,9 +234,9 @@ const char *get_line(file_t *f, size_t line_number) // void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *fmt, ...) { - if (start < f->contents) start = f->contents; + if (start < f->start) start = f->start; if (start > f->end) start = f->end; - if (end < f->contents) end = f->contents; + if (end < f->start) end = f->start; if (end > f->end) end = f->end; size_t linenum = get_line_number(f, start); const char *line = get_line(f, linenum); diff --git a/files.h b/files.h index 447a52f..c3ee37d 100644 --- a/files.h +++ b/files.h @@ -15,7 +15,7 @@ struct allocated_pat_s; // declared in types.h typedef struct file_s { struct file_s *next; const char *filename; - char *contents, **lines, *end; + char *memory, **lines, *start, *end; size_t nlines; struct allocated_pat_s *pats; bool mmapped:1; @@ -25,6 +25,8 @@ __attribute__((nonnull(2))) file_t *load_file(file_t **files, const char *filename); __attribute__((format(printf,2,3))) file_t *load_filef(file_t **files, const char *fmt, ...); +__attribute__((nonnull)) +void slice_file(file_t *slice, file_t *src, const char *start, const char *end); __attribute__((nonnull(3), returns_nonnull)) file_t *spoof_file(file_t **files, const char *filename, const char *text, ssize_t len); __attribute__((nonnull)) diff --git a/match.c b/match.c index 7525a45..287315e 100644 --- a/match.c +++ b/match.c @@ -127,7 +127,7 @@ match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, pat_t *sk str = prev->end > prev->start ? prev->end : prev->end + 1; recycle_if_unused(&prev); } else { - str = f->contents; + str = f->start; } bool only_start = pat->type == BP_START_OF_FILE || (pat->type == BP_CHAIN && pat->args.multiple.first->type == BP_START_OF_FILE); while (str <= f->end) { @@ -168,10 +168,10 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool return (str < f->end && *str != '\n') ? new_match(pat, str, next_char(f, str), NULL) : NULL; } case BP_START_OF_FILE: { - return (str == f->contents) ? new_match(pat, str, str, NULL) : NULL; + return (str == f->start) ? new_match(pat, str, str, NULL) : NULL; } case BP_START_OF_LINE: { - return (str == f->contents || str[-1] == '\n') ? new_match(pat, str, str, NULL) : NULL; + return (str == f->start || str[-1] == '\n') ? new_match(pat, str, str, NULL) : NULL; } case BP_END_OF_FILE: { return (str == f->end) ? new_match(pat, str, str, NULL) : NULL; @@ -302,19 +302,18 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool // TODO: this breaks ^/^^/$/$$, but that can probably be ignored // because you rarely need to check those in a backtrack. file_t slice; - memcpy(&slice, f, sizeof(file_t)); - slice.end = (char*)str; + slice_file(&slice, f, f->start, str); for (const char *pos = &str[-(long)back->min_matchlen]; - pos >= f->contents && (back->max_matchlen == -1 || pos >= &str[-(long)back->max_matchlen]); + pos >= f->start && (back->max_matchlen == -1 || pos >= &str[-(long)back->max_matchlen]); pos = prev_char(f, pos)) { - slice.contents = (char*)pos; + slice.start = (char*)pos; match_t *m = match(defs, &slice, pos, back, ignorecase); // Match should not go past str (i.e. (<"AB" "B") should match "ABB", but not "AB") if (m && m->end != str) recycle_if_unused(&m); else if (m) return new_match(pat, str, str, m); - if (pos == f->contents) break; + if (pos == f->start) break; // To prevent extreme performance degradation, don't keep // walking backwards endlessly over newlines. if (back->max_matchlen == -1 && *pos == '\n') break; @@ -361,9 +360,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool // == matches iff the text of matches // != matches iff the text of does not match file_t slice; - memcpy(&slice, f, sizeof(file_t)); - slice.contents = (char*)m1->start; - slice.end = (char*)m1->end; + slice_file(&slice, f, m1->start, m1->end); match_t *m2 = next_match(defs, &slice, NULL, pat->args.multiple.second, NULL, ignorecase); if ((!m2 && pat->type == BP_MATCH) || (m2 && pat->type == BP_NOT_MATCH)) { recycle_if_unused(&m2); @@ -451,7 +448,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool size_t linenum = get_line_number(f, str); const char *p = get_line(f, linenum); - if (p < f->contents) p=f->contents; // Can happen with recursive matching + if (p < f->start) p = f->start; // Can happen with recursive matching // Current indentation: char denter = *p; diff --git a/print.c b/print.c index 3dd8d6c..20be733 100644 --- a/print.c +++ b/print.c @@ -371,7 +371,7 @@ void print_match(FILE *out, printer_t *pr, match_t *m) current_color = color_normal; bool first = (pr->pos == NULL); if (first) { // First match printed: - pr->pos = pr->file->contents; + pr->pos = pr->file->start; pr->needs_line_number = 1; } if (m) { diff --git a/utf8.h b/utf8.h index 95e4358..fb353d9 100644 --- a/utf8.h +++ b/utf8.h @@ -4,6 +4,8 @@ #ifndef UTF8__H #define UTF8__H +#include "files.h" + #define UTF8_MAXCHARLEN 4 // // Return the location of the next character or UTF8 codepoint. @@ -30,15 +32,15 @@ static inline const char *next_char(file_t *f, const char *str) __attribute__((nonnull, pure)) static inline const char *prev_char(file_t *f, const char *str) { - if (__builtin_expect(str-1 >= f->contents && (str[-1] & 0x80) == 0x0, 1)) + if (__builtin_expect(str-1 >= f->start && (str[-1] & 0x80) == 0x0, 1)) return str-1; - if (__builtin_expect(str-2 >= f->contents && (str[-2] & 0xe0) == 0xc0, 1)) + if (__builtin_expect(str-2 >= f->start && (str[-2] & 0xe0) == 0xc0, 1)) return str-2; - if (__builtin_expect(str-3 >= f->contents && (str[-3] & 0xf0) == 0xe0, 1)) + if (__builtin_expect(str-3 >= f->start && (str[-3] & 0xf0) == 0xe0, 1)) return str-3; - if (__builtin_expect(str-4 >= f->contents && (str[-4] & 0xf8) == 0xf0, 1)) + if (__builtin_expect(str-4 >= f->start && (str[-4] & 0xf8) == 0xf0, 1)) return str-4; - return __builtin_expect(str-1 >= f->contents, 1) ? str-1 : f->contents; + return __builtin_expect(str-1 >= f->start, 1) ? str-1 : f->start; } #endif // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 -- cgit v1.2.3