From 77b33d6a3cdc2655fa0319a2c5a077eb709cb6aa Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Fri, 15 Jan 2021 19:27:25 -0800 Subject: Renaming files: printing->print, file_loader->files --- Makefile | 2 +- bp.c | 4 +- compiler.h | 2 +- file_loader.c | 230 ----------------------------------- file_loader.h | 38 ------ files.c | 230 +++++++++++++++++++++++++++++++++++ files.h | 38 ++++++ grammar.c | 2 +- grammar.h | 2 +- print.c | 384 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ print.h | 26 ++++ printing.c | 384 ---------------------------------------------------------- printing.h | 26 ---- types.h | 2 +- 14 files changed, 685 insertions(+), 685 deletions(-) delete mode 100644 file_loader.c delete mode 100644 file_loader.h create mode 100644 files.c create mode 100644 files.h create mode 100644 print.c create mode 100644 print.h delete mode 100644 printing.c delete mode 100644 printing.h diff --git a/Makefile b/Makefile index a0cf193..829fc41 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ G= O=-O3 ALL_FLAGS=$(CFLAGS) -DBP_NAME="\"$(NAME)\"" $(EXTRA) $(CWARN) $(G) $(O) -CFILES=compiler.c grammar.c utils.c vm.c file_loader.c printing.c json.c +CFILES=compiler.c grammar.c utils.c vm.c files.c print.c json.c OBJFILES=$(CFILES:.c=.o) all: $(NAME) diff --git a/bp.c b/bp.c index 380a43d..23f3f2d 100644 --- a/bp.c +++ b/bp.c @@ -13,10 +13,10 @@ #include #include "compiler.h" -#include "file_loader.h" +#include "files.h" #include "grammar.h" #include "json.h" -#include "printing.h" +#include "print.h" #include "utils.h" #include "vm.h" diff --git a/compiler.h b/compiler.h index 6761644..3038d09 100644 --- a/compiler.h +++ b/compiler.h @@ -4,7 +4,7 @@ #ifndef COMPILER__H #define COMPILER__H -#include "file_loader.h" +#include "files.h" #include "types.h" __attribute__((nonnull)) diff --git a/file_loader.c b/file_loader.c deleted file mode 100644 index 0d92241..0000000 --- a/file_loader.c +++ /dev/null @@ -1,230 +0,0 @@ -// -// file_loader.c - Implementation of some file loading functionality. -// - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "file_loader.h" -#include "utils.h" - -__attribute__((nonnull)) -static void populate_lines(file_t *f); - -// -// In the file object, populate the `lines` array with pointers to the -// beginning of each line. -// -static void populate_lines(file_t *f) -{ - // Calculate line numbers: - size_t linecap = 10; - f->lines = xcalloc(sizeof(const char*), linecap); - f->nlines = 0; - char *p = f->contents; - for (size_t n = 0; p && p < f->end; ++n) { - ++f->nlines; - if (n >= linecap) - f->lines = xrealloc(f->lines, sizeof(const char*)*(linecap *= 2)); - f->lines[n] = p; - p = strchr(p, '\n'); - if (p) ++p; - } -} - -// -// Read an entire file into memory. -// -file_t *load_file(file_t **files, const char *fmt, ...) -{ - char filename[PATH_MAX+1] = {0}; - va_list args; - va_start(args, fmt); - check(vsnprintf(filename, PATH_MAX, fmt, args) <= PATH_MAX, - "File name is too large"); - va_end(args); - - int fd = filename[0] == '\0' ? STDIN_FILENO : open(filename, O_RDONLY); - if (fd < 0) return NULL; - size_t length; - file_t *f = new(file_t); - f->filename = strdup(filename); - - struct stat sb; - if (fstat(fd, &sb) == -1) - goto skip_mmap; - - f->contents = mmap(NULL, (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); - if (f->contents == MAP_FAILED) - goto skip_mmap; - - f->mmapped = 1; - length = (size_t)sb.st_size; - goto finished_loading; - - skip_mmap: - f->mmapped = 0; - size_t capacity = 1000; - length = 0; - f->contents = xcalloc(sizeof(char), capacity); - ssize_t just_read; - while ((just_read=read(fd, &f->contents[length], capacity - length)) > 0) { - length += (size_t)just_read; - if (length >= capacity) - f->contents = xrealloc(f->contents, sizeof(char)*(capacity *= 2) + 1); - } - if (fd != STDIN_FILENO) close(fd); - - finished_loading: - f->end = &f->contents[length]; - populate_lines(f); - if (files != NULL) { - f->next = *files; - *files = f; - } - return f; -} - -// -// Create a virtual file from a string. -// -file_t *spoof_file(file_t **files, const char *filename, const char *text) -{ - if (filename == NULL) filename = ""; - file_t *f = new(file_t); - f->filename = strdup(filename); - f->contents = strdup(text); - f->end = &f->contents[strlen(text)]; - populate_lines(f); - if (files != NULL) { - f->next = *files; - *files = f; - } - return f; -} - -// -// Ensure that the file's contents are held in memory, rather than being memory -// mapped IO. -// -void intern_file(file_t *f) -{ - if (!f->mmapped) return; - size_t size = (size_t)(f->end - f->contents); - char *buf = xcalloc(sizeof(char), size + 1); - memcpy(buf, f->contents, size); - munmap(f->contents, size); - f->contents = buf; - f->end = buf + size; - f->mmapped = 0; - xfree(&f->lines); - populate_lines(f); -} - -// -// Free a file and all memory contained inside its members, then set the input -// pointer to NULL. -// -void destroy_file(file_t **f) -{ - if ((*f)->filename) { - xfree(&((*f)->filename)); - } - - if ((*f)->lines) { - xfree(&((*f)->lines)); - } - - if ((*f)->contents) { - if ((*f)->mmapped) { - munmap((*f)->contents, (size_t)((*f)->end - (*f)->contents)); - (*f)->contents = NULL; - } else { - xfree(&((*f)->contents)); - } - } - - for (allocated_pat_t *next; (*f)->pats; (*f)->pats = next) { - next = (*f)->pats->next; - destroy_pat(&(*f)->pats->pat); - xfree(&(*f)->pats); - } - - xfree(f); -} - -// -// Given a pointer, determine which line number it points to. -// -size_t get_line_number(file_t *f, const char *p) -{ - // TODO: binary search - for (size_t n = 1; n < f->nlines; n++) { - if (f->lines[n] > p) - return n; - } - return f->nlines; -} - -// -// Given a pointer, determine which character offset within the line it points to. -// -size_t get_char_number(file_t *f, const char *p) -{ - size_t linenum = get_line_number(f, p); - return 1 + (size_t)(p - f->lines[linenum-1]); -} - -// -// Return a pointer to the line with the specified line number. -// -const char *get_line(file_t *f, size_t line_number) -{ - if (line_number == 0 || line_number > f->nlines) return NULL; - return f->lines[line_number - 1]; -} - -// -// Print the filename/line number, followed by the given message, followed by -// the line itself. -// -void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *fmt, ...) -{ - if (start < f->contents) start = f->contents; - if (start > f->end) start = f->end; - if (end < f->contents) end = f->contents; - if (end > f->end) end = f->end; - size_t linenum = get_line_number(f, start); - const char *line = get_line(f, linenum); - size_t charnum = get_char_number(f, start); - fprintf(dest, "\033[1m%s:%ld:\033[0m ", f->filename, linenum); - - va_list args; - va_start(args, fmt); - vfprintf(dest, fmt, args); - va_end(args); - fputc('\n', dest); - - const char *eol = linenum == f->nlines ? strchr(line, '\0') : strchr(line, '\n'); - if (end == NULL || end > eol) end = eol; - fprintf(dest, "\033[2m% 5ld |\033[0m %.*s\033[41;30m%.*s\033[0m%.*s\n", - linenum, - (int)charnum - 1, line, - (int)(end - &line[charnum-1]), &line[charnum-1], - (int)(eol - end), end); - fprintf(dest, " \033[34;1m"); - const char *p = line - 1; - for (; p < start; ++p) fputc(' ', dest); - if (start == end) ++end; - for (; p < end; ++p) fputc('^', dest); - fprintf(dest, "\033[0m\n"); -} - -// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 diff --git a/file_loader.h b/file_loader.h deleted file mode 100644 index 78f3757..0000000 --- a/file_loader.h +++ /dev/null @@ -1,38 +0,0 @@ -// -// file_loader.h - Definitions of an API for loading files. -// -#ifndef FILE_LOADER__H -#define FILE_LOADER__H - -#include - -struct allocated_pat_s; // declared in types.h - -typedef struct file_s { - struct file_s *next; - const char *filename; - char *contents, **lines, *end; - size_t nlines; - struct allocated_pat_s *pats; - unsigned int mmapped:1; -} file_t; - -__attribute__((format(printf,2,3))) -file_t *load_file(file_t **files, const char *fmt, ...); -__attribute__((nonnull(3), returns_nonnull)) -file_t *spoof_file(file_t **files, const char *filename, const char *text); -__attribute__((nonnull)) -void intern_file(file_t *f); -__attribute__((nonnull)) -void destroy_file(file_t **f); -__attribute__((pure, nonnull)) -size_t get_line_number(file_t *f, const char *p); -__attribute__((pure, nonnull)) -size_t get_char_number(file_t *f, const char *p); -__attribute__((pure, nonnull)) -const char *get_line(file_t *f, size_t line_number); -__attribute__((nonnull(1,2,3), format(printf,5,6))) -void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *fmt, ...); - -#endif -// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 diff --git a/files.c b/files.c new file mode 100644 index 0000000..e4ca380 --- /dev/null +++ b/files.c @@ -0,0 +1,230 @@ +// +// files.c - Implementation of some file loading functionality. +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "files.h" +#include "utils.h" + +__attribute__((nonnull)) +static void populate_lines(file_t *f); + +// +// In the file object, populate the `lines` array with pointers to the +// beginning of each line. +// +static void populate_lines(file_t *f) +{ + // Calculate line numbers: + size_t linecap = 10; + f->lines = xcalloc(sizeof(const char*), linecap); + f->nlines = 0; + char *p = f->contents; + for (size_t n = 0; p && p < f->end; ++n) { + ++f->nlines; + if (n >= linecap) + f->lines = xrealloc(f->lines, sizeof(const char*)*(linecap *= 2)); + f->lines[n] = p; + p = strchr(p, '\n'); + if (p) ++p; + } +} + +// +// Read an entire file into memory. +// +file_t *load_file(file_t **files, const char *fmt, ...) +{ + char filename[PATH_MAX+1] = {0}; + va_list args; + va_start(args, fmt); + check(vsnprintf(filename, PATH_MAX, fmt, args) <= PATH_MAX, + "File name is too large"); + va_end(args); + + int fd = filename[0] == '\0' ? STDIN_FILENO : open(filename, O_RDONLY); + if (fd < 0) return NULL; + size_t length; + file_t *f = new(file_t); + f->filename = strdup(filename); + + struct stat sb; + if (fstat(fd, &sb) == -1) + goto skip_mmap; + + f->contents = mmap(NULL, (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (f->contents == MAP_FAILED) + goto skip_mmap; + + f->mmapped = 1; + length = (size_t)sb.st_size; + goto finished_loading; + + skip_mmap: + f->mmapped = 0; + size_t capacity = 1000; + length = 0; + f->contents = xcalloc(sizeof(char), capacity); + ssize_t just_read; + while ((just_read=read(fd, &f->contents[length], capacity - length)) > 0) { + length += (size_t)just_read; + if (length >= capacity) + f->contents = xrealloc(f->contents, sizeof(char)*(capacity *= 2) + 1); + } + if (fd != STDIN_FILENO) close(fd); + + finished_loading: + f->end = &f->contents[length]; + populate_lines(f); + if (files != NULL) { + f->next = *files; + *files = f; + } + return f; +} + +// +// Create a virtual file from a string. +// +file_t *spoof_file(file_t **files, const char *filename, const char *text) +{ + if (filename == NULL) filename = ""; + file_t *f = new(file_t); + f->filename = strdup(filename); + f->contents = strdup(text); + f->end = &f->contents[strlen(text)]; + populate_lines(f); + if (files != NULL) { + f->next = *files; + *files = f; + } + return f; +} + +// +// Ensure that the file's contents are held in memory, rather than being memory +// mapped IO. +// +void intern_file(file_t *f) +{ + if (!f->mmapped) return; + size_t size = (size_t)(f->end - f->contents); + char *buf = xcalloc(sizeof(char), size + 1); + memcpy(buf, f->contents, size); + munmap(f->contents, size); + f->contents = buf; + f->end = buf + size; + f->mmapped = 0; + xfree(&f->lines); + populate_lines(f); +} + +// +// Free a file and all memory contained inside its members, then set the input +// pointer to NULL. +// +void destroy_file(file_t **f) +{ + if ((*f)->filename) { + xfree(&((*f)->filename)); + } + + if ((*f)->lines) { + xfree(&((*f)->lines)); + } + + if ((*f)->contents) { + if ((*f)->mmapped) { + munmap((*f)->contents, (size_t)((*f)->end - (*f)->contents)); + (*f)->contents = NULL; + } else { + xfree(&((*f)->contents)); + } + } + + for (allocated_pat_t *next; (*f)->pats; (*f)->pats = next) { + next = (*f)->pats->next; + destroy_pat(&(*f)->pats->pat); + xfree(&(*f)->pats); + } + + xfree(f); +} + +// +// Given a pointer, determine which line number it points to. +// +size_t get_line_number(file_t *f, const char *p) +{ + // TODO: binary search + for (size_t n = 1; n < f->nlines; n++) { + if (f->lines[n] > p) + return n; + } + return f->nlines; +} + +// +// Given a pointer, determine which character offset within the line it points to. +// +size_t get_char_number(file_t *f, const char *p) +{ + size_t linenum = get_line_number(f, p); + return 1 + (size_t)(p - f->lines[linenum-1]); +} + +// +// Return a pointer to the line with the specified line number. +// +const char *get_line(file_t *f, size_t line_number) +{ + if (line_number == 0 || line_number > f->nlines) return NULL; + return f->lines[line_number - 1]; +} + +// +// Print the filename/line number, followed by the given message, followed by +// the line itself. +// +void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *fmt, ...) +{ + if (start < f->contents) start = f->contents; + if (start > f->end) start = f->end; + if (end < f->contents) end = f->contents; + if (end > f->end) end = f->end; + size_t linenum = get_line_number(f, start); + const char *line = get_line(f, linenum); + size_t charnum = get_char_number(f, start); + fprintf(dest, "\033[1m%s:%ld:\033[0m ", f->filename, linenum); + + va_list args; + va_start(args, fmt); + vfprintf(dest, fmt, args); + va_end(args); + fputc('\n', dest); + + const char *eol = linenum == f->nlines ? strchr(line, '\0') : strchr(line, '\n'); + if (end == NULL || end > eol) end = eol; + fprintf(dest, "\033[2m% 5ld |\033[0m %.*s\033[41;30m%.*s\033[0m%.*s\n", + linenum, + (int)charnum - 1, line, + (int)(end - &line[charnum-1]), &line[charnum-1], + (int)(eol - end), end); + fprintf(dest, " \033[34;1m"); + const char *p = line - 1; + for (; p < start; ++p) fputc(' ', dest); + if (start == end) ++end; + for (; p < end; ++p) fputc('^', dest); + fprintf(dest, "\033[0m\n"); +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 diff --git a/files.h b/files.h new file mode 100644 index 0000000..cee341c --- /dev/null +++ b/files.h @@ -0,0 +1,38 @@ +// +// files.h - Definitions of an API for loading files. +// +#ifndef FILES__H +#define FILES__H + +#include + +struct allocated_pat_s; // declared in types.h + +typedef struct file_s { + struct file_s *next; + const char *filename; + char *contents, **lines, *end; + size_t nlines; + struct allocated_pat_s *pats; + unsigned int mmapped:1; +} file_t; + +__attribute__((format(printf,2,3))) +file_t *load_file(file_t **files, const char *fmt, ...); +__attribute__((nonnull(3), returns_nonnull)) +file_t *spoof_file(file_t **files, const char *filename, const char *text); +__attribute__((nonnull)) +void intern_file(file_t *f); +__attribute__((nonnull)) +void destroy_file(file_t **f); +__attribute__((pure, nonnull)) +size_t get_line_number(file_t *f, const char *p); +__attribute__((pure, nonnull)) +size_t get_char_number(file_t *f, const char *p); +__attribute__((pure, nonnull)) +const char *get_line(file_t *f, size_t line_number); +__attribute__((nonnull(1,2,3), format(printf,5,6))) +void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *fmt, ...); + +#endif +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 diff --git a/grammar.c b/grammar.c index 102eca8..66103eb 100644 --- a/grammar.c +++ b/grammar.c @@ -6,7 +6,7 @@ #include #include "compiler.h" -#include "file_loader.h" +#include "files.h" #include "grammar.h" #include "utils.h" diff --git a/grammar.h b/grammar.h index b8e93ff..397a3c4 100644 --- a/grammar.h +++ b/grammar.h @@ -4,7 +4,7 @@ #ifndef GRAMMAR__H #define GRAMMAR__H -#include "file_loader.h" +#include "files.h" #include "types.h" __attribute__((nonnull(2,4,5), returns_nonnull)) diff --git a/print.c b/print.c new file mode 100644 index 0000000..6feee3f --- /dev/null +++ b/print.c @@ -0,0 +1,384 @@ +// +// print.c - Code for printing and visualizing matches. +// + +#include +#include +#include + +#include "print.h" +#include "types.h" +#include "utils.h" +#include "vm.h" + +typedef struct match_node_s { + match_t *m; + struct match_node_s *next; +} match_node_t; + +static const char *color_match = "\033[0;31;1m"; +static const char *color_replace = "\033[0;34;1m"; +static const char *color_normal = "\033[0m"; + +__attribute__((nonnull, pure)) +static int height_of_match(match_t *m); +__attribute__((nonnull)) +static void _visualize_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen); +__attribute__((nonnull(1,2))) +static inline void print_line_number(FILE *out, printer_t *pr, size_t line_number, const char *color); + +// +// Return the height of a match object (i.e. the number of descendents of the +// structure). +// +static int height_of_match(match_t *m) +{ + int height = 0; + for (match_t *c = m->child; c; c = c->nextsibling) { + int childheight = height_of_match(c); + if (childheight > height) height = childheight; + } + return 1 + height; +} + +// +// Print a visual explanation for the as-yet-unprinted matches provided. +// +static void _visualize_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen) +{ + const char *V = "│"; // Vertical bar + const char *H = "─"; // Horizontal bar + const char *color = (depth % 2 == 0) ? "34" : "33"; + + match_t *viz = firstmatch->m; + // This is a heuristic: print matches first if they have more submatches. + // In general, this helps reduce the height of the final output by allowing + // for more rows that show the same rule matching in multiple places. + // TODO: there may be a better heuristic that optimizes for this factor + // while also printing earlier matches first when it doesn't affect overall + // output height. + for (match_node_t *p = firstmatch; p; p = p->next) + if (height_of_match(p->m) > height_of_match(viz)) + viz = p->m; + const char *viz_type = viz->pat->start; + size_t viz_typelen = (size_t)(viz->pat->end - viz->pat->start); + + // Backrefs use added dim quote marks to indicate that the pattern is a + // literal string being matched. (Backrefs have start/end inside the text + // input, instead of something the user typed in) + if (viz_type >= text && viz_type <= &text[textlen]) + printf("\033[%ldG\033[0;2m\"\033[%s;1m", 2*textlen+3, color); + else + printf("\033[%ldG\033[%s;1m", 2*textlen+3, color); + + for (size_t i = 0; i < viz_typelen; i++) { + switch (viz_type[i]) { + case '\n': printf("↵"); break; + default: printf("%c", viz_type[i]); break; + } + } + + if (viz_type >= text && viz_type <= &text[textlen]) + printf("\033[0;2m\""); + + printf("\033[0m"); + + match_node_t *children = NULL; + match_node_t **nextchild = &children; + +#define RIGHT_TYPE(m) (m->m->pat->end == m->m->pat->start + viz_typelen && strncmp(m->m->pat->start, viz_type, viz_typelen) == 0) + // Print nonzero-width first: + for (match_node_t *m = firstmatch; m; m = m->next) { + if (RIGHT_TYPE(m)) { + for (match_t *c = m->m->child; c; c = c->nextsibling) { + *nextchild = new(match_node_t); + (*nextchild)->m = c; + nextchild = &((*nextchild)->next); + } + if (m->m->end == m->m->start) continue; + printf("\033[%ldG\033[0;2m%s\033[0;7;%sm", 1+2*(m->m->start - text), V, color); + for (const char *c = m->m->start; c < m->m->end; ++c) { + // TODO: newline + if (c > m->m->start) printf(" "); + // TODO: utf8 + //while ((*c & 0xC0) != 0x80) printf("%c", *(c++)); + printf("%c", *c); + } + printf("\033[0;2m%s\033[0m", V); + } else { + *nextchild = new(match_node_t); + (*nextchild)->m = m->m; + nextchild = &((*nextchild)->next); + printf("\033[%ldG\033[0;2m%s", 1+2*(m->m->start - text), V); + for (ssize_t i = (ssize_t)(2*(m->m->end - m->m->start)-1); i > 0; i--) + printf(" "); + if (m->m->end > m->m->start) + printf("\033[0;2m%s", V); + printf("\033[0m"); + } + } + + // Print stars for zero-width: + for (match_node_t *m = firstmatch; m; m = m->next) { + if (m->m->end > m->m->start) continue; + if (RIGHT_TYPE(m)) { + printf("\033[%ldG\033[7;%sm▒\033[0m", 1+2*(m->m->start - text), color); + } else { + printf("\033[%ldG\033[0;2m%s\033[0m", 1+2*(m->m->start - text), V); + } + } + + printf("\n"); + + for (match_node_t *m = firstmatch; m; m = m->next) { + if (m->m->end == m->m->start) { + if (!RIGHT_TYPE(m)) + printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), V); + } else { + const char *l = "└"; + const char *r = "┘"; + for (match_node_t *c = children; c; c = c->next) { + if (c->m->start == m->m->start || c->m->end == m->m->start) l = V; + if (c->m->start == m->m->end || c->m->end == m->m->end) r = V; + } + printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), l); + const char *h = RIGHT_TYPE(m) ? H : " "; + for (ssize_t n = (ssize_t)(2*(m->m->end - m->m->start) - 1); n > 0; n--) + printf("%s", h); + printf("%s\033[0m", r); + } + } +#undef RIGHT_TYPE + + printf("\n"); + + if (children) + _visualize_matches(children, depth+1, text, textlen); + + for (match_node_t *c = children, *next = NULL; c; c = next) { + next = c->next; + xfree(&c); + } +} + +// +// Print a visualization of a match object. +// +void visualize_match(match_t *m) +{ + printf("\033[?7l"); // Disable line wrapping + match_node_t first = {.m = m}; + _visualize_matches(&first, 0, m->start, (size_t)(m->end - m->start)); + printf("\033[?7h"); // Re-enable line wrapping +} + +// +// Print a line number, if it needs to be printed. +// line number of 0 means "just print an empty space for the number" +// +__attribute__((nonnull(1,2))) +static inline void print_line_number(FILE *out, printer_t *pr, size_t line_number, const char *color) +{ + if (!pr->print_line_numbers) return; + if (!pr->needs_line_number) return; + if (line_number == 0) { + if (color) fprintf(out, "\033[0;2m \033(0\x78\033(B%s", color); + else fprintf(out, " |"); + } else { + if (color) fprintf(out, "\033[0;2m% 5ld\033(0\x78\033(B%s", line_number, color); + else fprintf(out, "% 5ld|", line_number); + } + pr->needs_line_number = 0; +} + +// +// Print a range of text from a file, adding line numbers if necessary. +// +__attribute__((nonnull(1,2,3,4))) +static void print_between(FILE *out, printer_t *pr, const char *start, const char *end, const char *color) +{ + file_t *f = pr->file; + while (start < end) { + size_t line_num = get_line_number(f, start); + print_line_number(out, pr, line_num, color); + const char *eol = get_line(pr->file, line_num + 1); + if (!eol || eol > end) eol = end; + if (color) fprintf(out, "%s", color); + fprintf(out, "%.*s", (int)(eol - start), start); + if (eol[-1] == '\n') + pr->needs_line_number = 1; + start = eol; + } + pr->pos = end; +} + +// +// Return a pointer to the first character of context information before `pos`, +// according to the context settings in `pr` +// +static const char *context_before(printer_t *pr, const char *pos) +{ + if (pr->context_lines == -1) { + return pr->pos; + } else if (pr->context_lines > 0) { + size_t n = get_line_number(pr->file, pos); + if (n >= (size_t)((pr->context_lines - 1) + 1)) + n -= (size_t)(pr->context_lines - 1); + else + n = 1; + const char *sol = get_line(pr->file, n); + if (sol == NULL || sol < pr->pos) sol = pr->pos; + return sol; + } else { + return pos; + } +} + +// +// Return a pointer to the last character of context information after `pos`, +// according to the context settings in `pr` +// +static const char *context_after(printer_t *pr, const char *pos) +{ + if (pr->context_lines == -1) { + return pr->file->end; + } else if (pr->context_lines > 0) { + size_t n = get_line_number(pr->file, pos) + (size_t)(pr->context_lines - 1); + const char *eol = get_line(pr->file, n+1); + return eol ? eol : pr->file->end; + } else { + return pos; + } +} + +// +// Print the text of a match (no context). +// +void _print_match(FILE *out, printer_t *pr, match_t *m) +{ + pr->pos = m->start; + if (m->pat->type == VM_REPLACE) { + if (m->skip_replacement) { + _print_match(out, pr, m->child); + return; + } + size_t line_start = get_line_number(pr->file, m->start); + size_t line_end = get_line_number(pr->file, m->end); + size_t line = line_start; + + if (pr->use_color) fprintf(out, "%s", color_replace); + const char *text = m->pat->args.replace.text; + const char *end = &text[m->pat->args.replace.len]; + + // TODO: clean up the line numbering code + for (const char *r = text; r < end; ) { + print_line_number(out, pr, line > line_end ? 0 : line, pr->use_color ? color_replace : NULL); + + // Capture substitution + if (*r == '@' && r[1] && r[1] != '@') { + ++r; + match_t *cap = get_capture(m, &r); + if (cap != NULL) { + _print_match(out, pr, cap); + if (pr->use_color) fprintf(out, "%s", color_replace); + continue; + } else { + --r; + } + } + + if (*r == '\\') { + ++r; + unsigned char c = unescapechar(r, &r); + fputc(c, out); + if (c == '\n') { + ++line; + pr->needs_line_number = 1; + } + continue; + } else if (*r == '\n') { + fputc('\n', out); + ++line; + pr->needs_line_number = 1; + ++r; + continue; + } else { + fputc(*r, out); + ++r; + continue; + } + } + print_line_number(out, pr, line > line_end ? 0 : line, pr->use_color ? color_normal : NULL); + } else { + const char *prev = m->start; + for (match_t *child = m->child; child; child = child->nextsibling) { + // Skip children from e.g. zero-width matches like >@foo + if (!(prev <= child->start && child->start <= m->end && + prev <= child->end && child->end <= m->end)) + continue; + if (child->start > prev) + print_between(out, pr, prev, child->start, pr->use_color ? color_match : NULL); + _print_match(out, pr, child); + prev = child->end; + } + if (m->end > prev) + print_between(out, pr, prev, m->end, pr->use_color ? color_match : NULL); + } + pr->pos = m->end; +} + +// +// Print the text of a match and any context. +// +void print_match(FILE *out, printer_t *pr, match_t *m) +{ + int first = (pr->pos == NULL); + if (first) { // First match printed: + pr->pos = pr->file->contents; + pr->needs_line_number = 1; + } + if (m) { + const char *before_m = context_before(pr, m->start); + if (!first) { + const char *after_last = context_after(pr, pr->pos); + if (after_last >= before_m) { + // Overlapping ranges: + before_m = pr->pos; + } else { + // Non-overlapping ranges: + print_between(out, pr, pr->pos, after_last, pr->use_color ? color_normal : NULL); + if (pr->context_lines > 1) + fprintf(out, "\n"); // Gap between chunks + } + } + print_between(out, pr, before_m, m->start, pr->use_color ? color_normal : NULL); + _print_match(out, pr, m); + } else { + // After the last match is printed, print the trailing context: + const char *after_last = context_after(pr, pr->pos); + print_between(out, pr, pr->pos, after_last, pr->use_color ? color_normal : NULL); + // Guarantee trailing newline + if (pr->pos > pr->file->contents && pr->pos[-1] != '\n') fprintf(out, "\n"); + } + if (pr->use_color) fprintf(out, "%s", color_normal); +} + +// +// Print any errors that are present in the given match object. +// +int print_errors(printer_t *pr, match_t *m) +{ + int ret = 0; + if (m->pat->type == VM_CAPTURE && m->pat->args.capture.name && streq(m->pat->args.capture.name, "!")) { + printf("\033[31;1m"); + print_match(stdout, pr, m); + printf("\033[0m\n"); + fprint_line(stdout, pr->file, m->start, m->end, " "); + return 1; + } + if (m->child) ret += print_errors(pr, m->child); + if (m->nextsibling) ret += print_errors(pr, m->nextsibling); + return ret; +} + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 diff --git a/print.h b/print.h new file mode 100644 index 0000000..09a30f6 --- /dev/null +++ b/print.h @@ -0,0 +1,26 @@ +// +// Header file for print.c (printing/visualizing matches) +// +#ifndef PRINT__H +#define PRINT__H + +#include "types.h" + +typedef struct { + file_t *file; + const char *pos; + int context_lines; + unsigned int needs_line_number:1; + unsigned int use_color:1; + unsigned int print_line_numbers:1; +} printer_t; + +__attribute__((nonnull)) +void visualize_match(match_t *m); +__attribute__((nonnull(1,2))) +void print_match(FILE *out, printer_t *pr, match_t *m); +__attribute__((nonnull)) +int print_errors(printer_t *pr, match_t *m); + +#endif +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 diff --git a/printing.c b/printing.c deleted file mode 100644 index 346ee73..0000000 --- a/printing.c +++ /dev/null @@ -1,384 +0,0 @@ -// -// printing.c - Code for printing and visualizing matches. -// - -#include -#include -#include - -#include "printing.h" -#include "types.h" -#include "utils.h" -#include "vm.h" - -typedef struct match_node_s { - match_t *m; - struct match_node_s *next; -} match_node_t; - -static const char *color_match = "\033[0;31;1m"; -static const char *color_replace = "\033[0;34;1m"; -static const char *color_normal = "\033[0m"; - -__attribute__((nonnull, pure)) -static int height_of_match(match_t *m); -__attribute__((nonnull)) -static void _visualize_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen); -__attribute__((nonnull(1,2))) -static inline void print_line_number(FILE *out, printer_t *pr, size_t line_number, const char *color); - -// -// Return the height of a match object (i.e. the number of descendents of the -// structure). -// -static int height_of_match(match_t *m) -{ - int height = 0; - for (match_t *c = m->child; c; c = c->nextsibling) { - int childheight = height_of_match(c); - if (childheight > height) height = childheight; - } - return 1 + height; -} - -// -// Print a visual explanation for the as-yet-unprinted matches provided. -// -static void _visualize_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen) -{ - const char *V = "│"; // Vertical bar - const char *H = "─"; // Horizontal bar - const char *color = (depth % 2 == 0) ? "34" : "33"; - - match_t *viz = firstmatch->m; - // This is a heuristic: print matches first if they have more submatches. - // In general, this helps reduce the height of the final output by allowing - // for more rows that show the same rule matching in multiple places. - // TODO: there may be a better heuristic that optimizes for this factor - // while also printing earlier matches first when it doesn't affect overall - // output height. - for (match_node_t *p = firstmatch; p; p = p->next) - if (height_of_match(p->m) > height_of_match(viz)) - viz = p->m; - const char *viz_type = viz->pat->start; - size_t viz_typelen = (size_t)(viz->pat->end - viz->pat->start); - - // Backrefs use added dim quote marks to indicate that the pattern is a - // literal string being matched. (Backrefs have start/end inside the text - // input, instead of something the user typed in) - if (viz_type >= text && viz_type <= &text[textlen]) - printf("\033[%ldG\033[0;2m\"\033[%s;1m", 2*textlen+3, color); - else - printf("\033[%ldG\033[%s;1m", 2*textlen+3, color); - - for (size_t i = 0; i < viz_typelen; i++) { - switch (viz_type[i]) { - case '\n': printf("↵"); break; - default: printf("%c", viz_type[i]); break; - } - } - - if (viz_type >= text && viz_type <= &text[textlen]) - printf("\033[0;2m\""); - - printf("\033[0m"); - - match_node_t *children = NULL; - match_node_t **nextchild = &children; - -#define RIGHT_TYPE(m) (m->m->pat->end == m->m->pat->start + viz_typelen && strncmp(m->m->pat->start, viz_type, viz_typelen) == 0) - // Print nonzero-width first: - for (match_node_t *m = firstmatch; m; m = m->next) { - if (RIGHT_TYPE(m)) { - for (match_t *c = m->m->child; c; c = c->nextsibling) { - *nextchild = new(match_node_t); - (*nextchild)->m = c; - nextchild = &((*nextchild)->next); - } - if (m->m->end == m->m->start) continue; - printf("\033[%ldG\033[0;2m%s\033[0;7;%sm", 1+2*(m->m->start - text), V, color); - for (const char *c = m->m->start; c < m->m->end; ++c) { - // TODO: newline - if (c > m->m->start) printf(" "); - // TODO: utf8 - //while ((*c & 0xC0) != 0x80) printf("%c", *(c++)); - printf("%c", *c); - } - printf("\033[0;2m%s\033[0m", V); - } else { - *nextchild = new(match_node_t); - (*nextchild)->m = m->m; - nextchild = &((*nextchild)->next); - printf("\033[%ldG\033[0;2m%s", 1+2*(m->m->start - text), V); - for (ssize_t i = (ssize_t)(2*(m->m->end - m->m->start)-1); i > 0; i--) - printf(" "); - if (m->m->end > m->m->start) - printf("\033[0;2m%s", V); - printf("\033[0m"); - } - } - - // Print stars for zero-width: - for (match_node_t *m = firstmatch; m; m = m->next) { - if (m->m->end > m->m->start) continue; - if (RIGHT_TYPE(m)) { - printf("\033[%ldG\033[7;%sm▒\033[0m", 1+2*(m->m->start - text), color); - } else { - printf("\033[%ldG\033[0;2m%s\033[0m", 1+2*(m->m->start - text), V); - } - } - - printf("\n"); - - for (match_node_t *m = firstmatch; m; m = m->next) { - if (m->m->end == m->m->start) { - if (!RIGHT_TYPE(m)) - printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), V); - } else { - const char *l = "└"; - const char *r = "┘"; - for (match_node_t *c = children; c; c = c->next) { - if (c->m->start == m->m->start || c->m->end == m->m->start) l = V; - if (c->m->start == m->m->end || c->m->end == m->m->end) r = V; - } - printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), l); - const char *h = RIGHT_TYPE(m) ? H : " "; - for (ssize_t n = (ssize_t)(2*(m->m->end - m->m->start) - 1); n > 0; n--) - printf("%s", h); - printf("%s\033[0m", r); - } - } -#undef RIGHT_TYPE - - printf("\n"); - - if (children) - _visualize_matches(children, depth+1, text, textlen); - - for (match_node_t *c = children, *next = NULL; c; c = next) { - next = c->next; - xfree(&c); - } -} - -// -// Print a visualization of a match object. -// -void visualize_match(match_t *m) -{ - printf("\033[?7l"); // Disable line wrapping - match_node_t first = {.m = m}; - _visualize_matches(&first, 0, m->start, (size_t)(m->end - m->start)); - printf("\033[?7h"); // Re-enable line wrapping -} - -// -// Print a line number, if it needs to be printed. -// line number of 0 means "just print an empty space for the number" -// -__attribute__((nonnull(1,2))) -static inline void print_line_number(FILE *out, printer_t *pr, size_t line_number, const char *color) -{ - if (!pr->print_line_numbers) return; - if (!pr->needs_line_number) return; - if (line_number == 0) { - if (color) fprintf(out, "\033[0;2m \033(0\x78\033(B%s", color); - else fprintf(out, " |"); - } else { - if (color) fprintf(out, "\033[0;2m% 5ld\033(0\x78\033(B%s", line_number, color); - else fprintf(out, "% 5ld|", line_number); - } - pr->needs_line_number = 0; -} - -// -// Print a range of text from a file, adding line numbers if necessary. -// -__attribute__((nonnull(1,2,3,4))) -static void print_between(FILE *out, printer_t *pr, const char *start, const char *end, const char *color) -{ - file_t *f = pr->file; - while (start < end) { - size_t line_num = get_line_number(f, start); - print_line_number(out, pr, line_num, color); - const char *eol = get_line(pr->file, line_num + 1); - if (!eol || eol > end) eol = end; - if (color) fprintf(out, "%s", color); - fprintf(out, "%.*s", (int)(eol - start), start); - if (eol[-1] == '\n') - pr->needs_line_number = 1; - start = eol; - } - pr->pos = end; -} - -// -// Return a pointer to the first character of context information before `pos`, -// according to the context settings in `pr` -// -static const char *context_before(printer_t *pr, const char *pos) -{ - if (pr->context_lines == -1) { - return pr->pos; - } else if (pr->context_lines > 0) { - size_t n = get_line_number(pr->file, pos); - if (n >= (size_t)((pr->context_lines - 1) + 1)) - n -= (size_t)(pr->context_lines - 1); - else - n = 1; - const char *sol = get_line(pr->file, n); - if (sol == NULL || sol < pr->pos) sol = pr->pos; - return sol; - } else { - return pos; - } -} - -// -// Return a pointer to the last character of context information after `pos`, -// according to the context settings in `pr` -// -static const char *context_after(printer_t *pr, const char *pos) -{ - if (pr->context_lines == -1) { - return pr->file->end; - } else if (pr->context_lines > 0) { - size_t n = get_line_number(pr->file, pos) + (size_t)(pr->context_lines - 1); - const char *eol = get_line(pr->file, n+1); - return eol ? eol : pr->file->end; - } else { - return pos; - } -} - -// -// Print the text of a match (no context). -// -void _print_match(FILE *out, printer_t *pr, match_t *m) -{ - pr->pos = m->start; - if (m->pat->type == VM_REPLACE) { - if (m->skip_replacement) { - _print_match(out, pr, m->child); - return; - } - size_t line_start = get_line_number(pr->file, m->start); - size_t line_end = get_line_number(pr->file, m->end); - size_t line = line_start; - - if (pr->use_color) fprintf(out, "%s", color_replace); - const char *text = m->pat->args.replace.text; - const char *end = &text[m->pat->args.replace.len]; - - // TODO: clean up the line numbering code - for (const char *r = text; r < end; ) { - print_line_number(out, pr, line > line_end ? 0 : line, pr->use_color ? color_replace : NULL); - - // Capture substitution - if (*r == '@' && r[1] && r[1] != '@') { - ++r; - match_t *cap = get_capture(m, &r); - if (cap != NULL) { - _print_match(out, pr, cap); - if (pr->use_color) fprintf(out, "%s", color_replace); - continue; - } else { - --r; - } - } - - if (*r == '\\') { - ++r; - unsigned char c = unescapechar(r, &r); - fputc(c, out); - if (c == '\n') { - ++line; - pr->needs_line_number = 1; - } - continue; - } else if (*r == '\n') { - fputc('\n', out); - ++line; - pr->needs_line_number = 1; - ++r; - continue; - } else { - fputc(*r, out); - ++r; - continue; - } - } - print_line_number(out, pr, line > line_end ? 0 : line, pr->use_color ? color_normal : NULL); - } else { - const char *prev = m->start; - for (match_t *child = m->child; child; child = child->nextsibling) { - // Skip children from e.g. zero-width matches like >@foo - if (!(prev <= child->start && child->start <= m->end && - prev <= child->end && child->end <= m->end)) - continue; - if (child->start > prev) - print_between(out, pr, prev, child->start, pr->use_color ? color_match : NULL); - _print_match(out, pr, child); - prev = child->end; - } - if (m->end > prev) - print_between(out, pr, prev, m->end, pr->use_color ? color_match : NULL); - } - pr->pos = m->end; -} - -// -// Print the text of a match and any context. -// -void print_match(FILE *out, printer_t *pr, match_t *m) -{ - int first = (pr->pos == NULL); - if (first) { // First match printed: - pr->pos = pr->file->contents; - pr->needs_line_number = 1; - } - if (m) { - const char *before_m = context_before(pr, m->start); - if (!first) { - const char *after_last = context_after(pr, pr->pos); - if (after_last >= before_m) { - // Overlapping ranges: - before_m = pr->pos; - } else { - // Non-overlapping ranges: - print_between(out, pr, pr->pos, after_last, pr->use_color ? color_normal : NULL); - if (pr->context_lines > 1) - fprintf(out, "\n"); // Gap between chunks - } - } - print_between(out, pr, before_m, m->start, pr->use_color ? color_normal : NULL); - _print_match(out, pr, m); - } else { - // After the last match is printed, print the trailing context: - const char *after_last = context_after(pr, pr->pos); - print_between(out, pr, pr->pos, after_last, pr->use_color ? color_normal : NULL); - // Guarantee trailing newline - if (pr->pos > pr->file->contents && pr->pos[-1] != '\n') fprintf(out, "\n"); - } - if (pr->use_color) fprintf(out, "%s", color_normal); -} - -// -// Print any errors that are present in the given match object. -// -int print_errors(printer_t *pr, match_t *m) -{ - int ret = 0; - if (m->pat->type == VM_CAPTURE && m->pat->args.capture.name && streq(m->pat->args.capture.name, "!")) { - printf("\033[31;1m"); - print_match(stdout, pr, m); - printf("\033[0m\n"); - fprint_line(stdout, pr->file, m->start, m->end, " "); - return 1; - } - if (m->child) ret += print_errors(pr, m->child); - if (m->nextsibling) ret += print_errors(pr, m->nextsibling); - return ret; -} - -// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 diff --git a/printing.h b/printing.h deleted file mode 100644 index 822285e..0000000 --- a/printing.h +++ /dev/null @@ -1,26 +0,0 @@ -// -// Header file for printing.c (printing/visualizing matches) -// -#ifndef PRINTING__H -#define PRINTING__H - -#include "types.h" - -typedef struct { - file_t *file; - const char *pos; - int context_lines; - unsigned int needs_line_number:1; - unsigned int use_color:1; - unsigned int print_line_numbers:1; -} printer_t; - -__attribute__((nonnull)) -void visualize_match(match_t *m); -__attribute__((nonnull(1,2))) -void print_match(FILE *out, printer_t *pr, match_t *m); -__attribute__((nonnull)) -int print_errors(printer_t *pr, match_t *m); - -#endif -// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 diff --git a/types.h b/types.h index ab5694c..cfdd9d2 100644 --- a/types.h +++ b/types.h @@ -6,7 +6,7 @@ #include -#include "file_loader.h" +#include "files.h" // BP virtual machine pattern types enum pattype_e { -- cgit v1.2.3