bp/files.c

387 lines
11 KiB
C

//
// files.c - Implementation of some file loading functionality.
//
#include <ctype.h>
#include <err.h>
#include <fcntl.h>
#include <limits.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include "files.h"
#include "match.h"
#include "pattern.h"
#include "utils.h"
//
// In the file object, populate the `lines` array with pointers to the
// beginning of each line.
//
__attribute__((nonnull))
static void populate_lines(file_t *f)
{
// Calculate line numbers:
size_t linecap = 10;
f->lines = new(const char*[linecap]);
f->nlines = 0;
char *p = f->start;
for (size_t n = 0; p && p < f->end; ++n) {
++f->nlines;
if (n >= linecap)
f->lines = grow(f->lines, linecap *= 2);
f->lines[n] = p;
do {
char *nl = strchr(p, '\n');
if (nl) {
p = nl+1;
break;
} else if (p < f->end)
p += strlen(p)+1;
} while (p < f->end);
}
}
//
// Read an entire file into memory, using a printf-style formatting string to
// construct the filename.
//
file_t *load_filef(file_t **files, const char *fmt, ...)
{
char filename[PATH_MAX+1] = {'\0'};
va_list args;
va_start(args, fmt);
if (vsnprintf(filename, PATH_MAX, fmt, args) > (int)PATH_MAX)
errx(EXIT_FAILURE, "File name is too large");
va_end(args);
return load_file(files, filename);
}
//
// Read an entire file into memory.
//
file_t *load_file(file_t **files, const char *filename)
{
int fd = filename[0] == '\0' ? STDIN_FILENO : open(filename, O_RDONLY);
if (fd < 0) {
// Check for <file>:<line>
if (strrchr(filename, ':')) {
char tmp[PATH_MAX] = {0};
strcpy(tmp, filename);
char *colon = strrchr(tmp, ':');
*colon = '\0';
file_t *f = load_file(files, tmp);
if (!f) return f;
long line = strtol(colon+1, &colon, 10);
f->start = (char*)get_line(f, (size_t)line);
f->end = (char*)get_line(f, (size_t)line+1);
return f;
}
return NULL;
}
file_t *f = new(file_t);
f->filename = checked_strdup(filename);
struct stat sb;
if (fstat(fd, &sb) == -1)
goto read_file;
f->mmapped = mmap(NULL, (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (f->mmapped == MAP_FAILED) {
f->mmapped = NULL;
goto read_file;
}
f->start = f->mmapped;
f->end = &f->mmapped[sb.st_size];
goto finished_loading;
read_file:
{
size_t capacity = 1000, length = 0;
f->allocated = new(char[capacity]);
ssize_t just_read;
while ((just_read=read(fd, &f->allocated[length], (capacity-1) - length)) > 0) {
length += (size_t)just_read;
if (length >= capacity-1)
f->allocated = grow(f->allocated, capacity *= 2);
}
f->allocated[length] = '\0';
f->start = f->allocated;
f->end = &f->allocated[length];
}
finished_loading:
if (fd != STDIN_FILENO)
require(close(fd), "Failed to close file");
populate_lines(f);
if (files != NULL) {
f->next = *files;
*files = f;
}
return f;
}
//
// Set a file struct to represent a region of a different file.
//
void slice_file(file_t *slice, file_t *src, const char *start, const char *end)
{
memset(slice, 0, sizeof(file_t));
slice->filename = src->filename;
slice->lines = src->lines;
slice->nlines = src->nlines;
slice->start = (char*)start;
slice->end = (char*)end;
}
//
// Create a virtual file from a string.
//
file_t *spoof_file(file_t **files, const char *filename, const char *text, ssize_t _len)
{
if (filename == NULL) filename = "";
file_t *f = new(file_t);
size_t len = _len == -1 ? strlen(text) : (size_t)_len;
f->filename = checked_strdup(filename);
f->allocated = new(char[len+1]);
memcpy(f->allocated, text, len);
f->start = &f->allocated[0];
f->end = &f->allocated[len];
populate_lines(f);
if (files != NULL) {
f->next = *files;
*files = f;
}
return f;
}
//
// Free a file and all memory contained inside its members, then set the input
// pointer to NULL.
//
void destroy_file(file_t **at_f)
{
file_t *f = (file_t*)*at_f;
if (f->filename)
delete(&f->filename);
if (f->lines)
delete(&f->lines);
if (f->allocated)
delete(&f->allocated);
if (f->mmapped) {
require(munmap(f->mmapped, (size_t)(f->end - f->mmapped)),
"Failure to un-memory-map some memory");
f->mmapped = NULL;
}
cache_destroy(f);
for (pat_t *next; f->pats; f->pats = next) {
next = f->pats->next;
delete(&f->pats);
}
delete(at_f);
}
//
// Given a pointer, determine which line number it points to.
//
size_t get_line_number(file_t *f, const char *p)
{
if (f->nlines == 0) return 0;
// Binary search:
size_t lo = 0, hi = f->nlines-1;
while (lo <= hi) {
size_t mid = (lo + hi) / 2;
if (f->lines[mid] == p)
return mid + 1;
else if (f->lines[mid] < p)
lo = mid + 1;
else if (f->lines[mid] > p)
hi = mid - 1;
}
return lo; // Return the line number whose line starts closest before p
}
//
// Return a pointer to the line with the specified line number.
//
const char *get_line(file_t *f, size_t line_number)
{
if (line_number == 0 || line_number > f->nlines) return NULL;
return f->lines[line_number - 1];
}
//
// Print the filename/line number, followed by the given message, followed by
// the line itself.
//
void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *fmt, ...)
{
if (start < f->start) start = f->start;
if (start > f->end) start = f->end;
if (end < f->start) end = f->start;
if (end > f->end) end = f->end;
size_t linenum = get_line_number(f, start);
const char *line = get_line(f, linenum);
fprintf(dest, "\033[1m%s:%lu:\033[0m ", f->filename[0] ? f->filename : "stdin", linenum);
va_list args;
va_start(args, fmt);
(void)vfprintf(dest, fmt, args);
va_end(args);
(void)fputc('\n', dest);
const char *eol = linenum == f->nlines ? strchr(line, '\0') : strchr(line, '\n');
if (end == NULL || end > eol) end = eol;
fprintf(dest, "\033[2m%5lu\033(0\x78\033(B\033[0m%.*s\033[41;30m%.*s\033[0m%.*s\n",
linenum,
(int)(start - line), line,
(int)(end - start), start,
(int)(eol - end), end);
fprintf(dest, " \033[34;1m");
const char *p = line;
for (; p < start; ++p) (void)fputc(*p == '\t' ? '\t' : ' ', dest);
if (start == end) ++end;
for (; p < end; ++p)
if (*p == '\t')
// Some janky hacks: 8 ^'s, backtrack 8 spaces, move forward a tab stop, clear any ^'s that overshot
fprintf(dest, "^^^^^^^^\033[8D\033[I\033[K");
else
(void)fputc('^', dest);
fprintf(dest, "\033[0m\n");
}
//
// Hash a string position/pattern.
//
static inline size_t hash(const char *str, pat_t *pat)
{
return (size_t)str + 2*pat->id;
}
//
// Check if we have memoized a pattern match at the given position for the
// given definitions. If a result has been memoized, set *result to the
// memoized value and return true, otherwise return false.
//
bool cache_get(file_t *f, def_t *defs, const char *str, pat_t *pat, match_t **result)
{
if (!f->cache.matches) return NULL;
size_t h = hash(str, pat) & (f->cache.size-1);
for (match_t *c = f->cache.matches[h]; c; c = c->cache.next) {
if (c->pat == pat && c->defs_id == (defs?defs->id:0) && c->start == str) {
// If c->end == NULL, that means no match occurs here
*result = c->end == NULL ? NULL : c;
return true;
}
}
return false;
}
//
// Remove an item from the cache.
//
static void cache_remove(file_t *f, match_t *m)
{
if (!m->cache.home) return;
*m->cache.home = m->cache.next;
if (m->cache.next) m->cache.next->cache.home = m->cache.home;
m->cache.next = NULL;
m->cache.home = NULL;
if (--m->refcount == 0) recycle_if_unused(&m);
--f->cache.occupancy;
}
//
// Save a match in the cache.
//
void cache_save(file_t *f, def_t *defs, const char *str, pat_t *pat, match_t *m)
{
// As a convention, a match with {.pat=pat, .start=str, .end==NULL} is used
// to memoize the fact that `pat` will *not* match at `str`.
if (m == NULL) m = new_match(defs, pat, str, NULL, NULL);
if (f->cache.occupancy+1 > 3*f->cache.size) {
if (f->cache.size == MAX_CACHE_SIZE) {
size_t h = hash(m->start, m->pat) & (f->cache.size-1);
for (int quota = 2; f->cache.matches[h] && quota > 0; quota--) {
match_t *last = f->cache.matches[h];
while (last->cache.next) last = last->cache.next;
cache_remove(f, last);
}
} else {
match_t **old_matches = f->cache.matches;
size_t old_size = f->cache.size;
f->cache.size = old_size == 0 ? 16 : 2*old_size;
f->cache.matches = new(match_t*[f->cache.size]);
// Rehash:
if (old_matches) {
for (size_t i = 0; i < old_size; i++) {
for (match_t *o; (o = old_matches[i]); ) {
*o->cache.home = o->cache.next;
if (o->cache.next) o->cache.next->cache.home = o->cache.home;
size_t h = hash(o->start, o->pat) & (f->cache.size-1);
o->cache.home = &(f->cache.matches[h]);
o->cache.next = f->cache.matches[h];
if (f->cache.matches[h]) f->cache.matches[h]->cache.home = &o->cache.next;
f->cache.matches[h] = o;
}
}
free(old_matches);
}
}
}
size_t h = hash(m->start, m->pat) & (f->cache.size-1);
m->cache.home = &(f->cache.matches[h]);
m->cache.next = f->cache.matches[h];
if (f->cache.matches[h]) f->cache.matches[h]->cache.home = &m->cache.next;
f->cache.matches[h] = m;
++m->refcount;
++f->cache.occupancy;
}
//
// Remove all items from the cache that do not overlap `start` and `end`.
// (This is used to remove useless items from the cache)
//
void cache_prune(file_t *f, const char *start, const char *end)
{
if (!f->cache.matches) return;
for (size_t i = 0; i < f->cache.size; i++) {
for (match_t *m = f->cache.matches[i], *next = NULL; m; m = next) {
next = m->cache.next;
if (m->start < start || (m->end ? m->end : m->start) > end)
cache_remove(f, m);
}
}
}
//
// Clear and deallocate the cache.
//
void cache_destroy(file_t *f)
{
if (!f->cache.matches) return;
for (size_t i = 0; i < f->cache.size; i++) {
while (f->cache.matches[i])
cache_remove(f, f->cache.matches[i]);
}
f->cache.occupancy = 0;
delete(&f->cache.matches);
f->cache.size = 0;
}
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1