From 326a0b960c6eeac4278c15a57e421326464372a6 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Sat, 9 Apr 2022 14:15:07 -0400 Subject: Moving print logic out of match.c and renaming explain -> printmatch --- Makefile | 2 +- README.md | 2 +- bp.c | 2 +- explain.c | 173 --------------------------------------- explain.h | 13 --- match.c | 87 -------------------- match.h | 9 -- printmatch.c | 263 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ printmatch.h | 22 +++++ 9 files changed, 288 insertions(+), 285 deletions(-) delete mode 100644 explain.c delete mode 100644 explain.h create mode 100644 printmatch.c create mode 100644 printmatch.h diff --git a/Makefile b/Makefile index f54e8af..abbdb19 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ G= O=-O3 ALL_FLAGS=$(CFLAGS) $(OSFLAGS) -DBP_NAME="\"$(NAME)\"" $(EXTRA) $(CWARN) $(G) $(O) -CFILES=pattern.c utils.c match.c files.c explain.c json.c utf8.c +CFILES=pattern.c utils.c match.c files.c printmatch.c json.c utf8.c OBJFILES=$(CFILES:.c=.o) all: $(NAME) bp.1 diff --git a/README.md b/README.md index fb8d39b..20ee2ad 100644 --- a/README.md +++ b/README.md @@ -140,11 +140,11 @@ so use at your own risk! These grammar files are only approximations of syntax. File | Description -------------------------------|----------------------------------------------------- [bp.c](bp.c) | The main program. -[explain.c](explain.c) | Printing a visual explanation of a match. [files.c](files.c) | Loading files into memory. [json.c](json.c) | JSON output of matches. [match.c](match.c) | Pattern matching code (find occurrences of a bp pattern within an input string). [pattern.c](pattern.c) | Pattern compiling code (compile a bp pattern from an input string). +[printmatch.c](printmatch.c) | Printing a visual explanation of a match. [utf8.c](utf8.c) | UTF-8 helper code. [utils.c](utils.c) | Miscellaneous helper functions. diff --git a/bp.c b/bp.c index 5146d5a..dbd72bd 100644 --- a/bp.c +++ b/bp.c @@ -20,11 +20,11 @@ #include #include -#include "explain.h" #include "files.h" #include "json.h" #include "match.h" #include "pattern.h" +#include "printmatch.h" #include "utils.h" #ifndef BP_NAME diff --git a/explain.c b/explain.c deleted file mode 100644 index 41939c3..0000000 --- a/explain.c +++ /dev/null @@ -1,173 +0,0 @@ -// -// explain.c - Debug visualization of pattern matches. -// - -#include -#include - -#include "match.h" -#include "utils.h" - -typedef struct match_node_s { - match_t *m; - struct match_node_s *next; -} match_node_t; - -__attribute__((nonnull, pure)) -static int height_of_match(match_t *m); -__attribute__((nonnull)) -static void _explain_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen); - -// -// Return the height of a match object (i.e. the number of descendents of the -// structure). -// -static int height_of_match(match_t *m) -{ - int height = 0; - for (int i = 0; m->children && m->children[i]; i++) { - match_t *child = m->children[i]; - int childheight = height_of_match(child); - if (childheight > height) height = childheight; - } - return 1 + height; -} - -// -// Print a visual explanation for the as-yet-unprinted matches provided. -// -static void _explain_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen) -{ - const char *V = "│"; // Vertical bar - const char *H = "─"; // Horizontal bar - const char *color = (depth % 2 == 0) ? "34" : "33"; - - match_t *viz = firstmatch->m; - // This is a heuristic: print matches first if they have more submatches. - // In general, this helps reduce the height of the final output by allowing - // for more rows that show the same rule matching in multiple places. - // TODO: there may be a better heuristic that optimizes for this factor - // while also printing earlier matches first when it doesn't affect overall - // output height. - for (match_node_t *p = firstmatch; p; p = p->next) - if (height_of_match(p->m) > height_of_match(viz)) - viz = p->m; - const char *viz_type = viz->pat->start; - size_t viz_typelen = (size_t)(viz->pat->end - viz->pat->start); - - // Backrefs use added dim quote marks to indicate that the pattern is a - // literal string being matched. (Backrefs have start/end inside the text - // input, instead of something the user typed in) - if (viz_type >= text && viz_type <= &text[textlen]) - printf("\033[%luG\033[0;2m\"\033[%s;1m", 2*textlen+3, color); - else - printf("\033[%luG\033[%s;1m", 2*textlen+3, color); - - for (size_t i = 0; i < viz_typelen; i++) { - switch (viz_type[i]) { - case '\n': printf("↵"); break; - case '\t': printf("⇥"); break; - default: printf("%c", viz_type[i]); break; - } - } - - if (viz_type >= text && viz_type <= &text[textlen]) - printf("\033[0;2m\""); - - printf("\033[m"); - - match_node_t *children = NULL; - match_node_t **nextchild = &children; - -#define RIGHT_TYPE(m) (m->m->pat->end == m->m->pat->start + viz_typelen && strncmp(m->m->pat->start, viz_type, viz_typelen) == 0) - // Print nonzero-width first: - for (match_node_t *m = firstmatch; m; m = m->next) { - if (RIGHT_TYPE(m)) { - for (int i = 0; m->m->children && m->m->children[i]; i++) { - *nextchild = new(match_node_t); - (*nextchild)->m = m->m->children[i]; - nextchild = &((*nextchild)->next); - } - if (m->m->end == m->m->start) continue; - printf("\033[%ldG\033[0;2m%s\033[0;7;%sm", 1+2*(m->m->start - text), V, color); - for (const char *c = m->m->start; c < m->m->end; ++c) { - // TODO: newline - if (c > m->m->start) printf(" "); - // TODO: utf8 - //while ((*c & 0xC0) != 0x80) printf("%c", *(c++)); - if (*c == '\n') - printf("↵"); - else if (*c == '\t') - printf("⇥"); - else - printf("%c", *c); - } - printf("\033[0;2m%s\033[m", V); - } else { - *nextchild = new(match_node_t); - (*nextchild)->m = m->m; - nextchild = &((*nextchild)->next); - printf("\033[%ldG\033[0;2m%s", 1+2*(m->m->start - text), V); - for (ssize_t i = (ssize_t)(2*(m->m->end - m->m->start)-1); i > 0; i--) - printf(" "); - if (m->m->end > m->m->start) - printf("\033[0;2m%s", V); - printf("\033[m"); - } - } - - // Print stars for zero-width: - for (match_node_t *m = firstmatch; m; m = m->next) { - if (m->m->end > m->m->start) continue; - if (RIGHT_TYPE(m)) { - printf("\033[%ldG\033[7;%sm▒\033[m", 1+2*(m->m->start - text), color); - } else { - printf("\033[%ldG\033[0;2m%s\033[m", 1+2*(m->m->start - text), V); - } - } - - printf("\n"); - - for (match_node_t *m = firstmatch; m; m = m->next) { - if (m->m->end == m->m->start) { - if (!RIGHT_TYPE(m)) - printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), V); - } else { - const char *l = "└"; - const char *r = "┘"; - for (match_node_t *c = children; c; c = c->next) { - if (c->m->start == m->m->start || c->m->end == m->m->start) l = V; - if (c->m->start == m->m->end || c->m->end == m->m->end) r = V; - } - printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), l); - const char *h = RIGHT_TYPE(m) ? H : " "; - for (ssize_t n = (ssize_t)(2*(m->m->end - m->m->start) - 1); n > 0; n--) - printf("%s", h); - printf("%s\033[m", r); - } - } -#undef RIGHT_TYPE - - printf("\n"); - - if (children) - _explain_matches(children, depth+1, text, textlen); - - for (match_node_t *c = children, *next = NULL; c; c = next) { - next = c->next; - delete(&c); - } -} - -// -// Print a visualization of a match object. -// -void explain_match(match_t *m) -{ - printf("\033[?7l"); // Disable line wrapping - match_node_t first = {.m = m}; - _explain_matches(&first, 0, m->start, (size_t)(m->end - m->start)); - printf("\033[?7h"); // Re-enable line wrapping -} - -// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/explain.h b/explain.h deleted file mode 100644 index fbb2abf..0000000 --- a/explain.h +++ /dev/null @@ -1,13 +0,0 @@ -// -// Debug visualization of matches -// -#ifndef EXPLAIN__H -#define EXPLAIN__H - -#include "match.h" - -__attribute__((nonnull)) -void explain_match(match_t *m); - -#endif -// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/match.c b/match.c index 461148d..80296e9 100644 --- a/match.c +++ b/match.c @@ -857,91 +857,4 @@ match_t *get_named_capture(match_t *m, const char *name, size_t namelen) return NULL; } -static inline void fputc_safe(FILE *out, char c, print_options_t *opts) -{ - (void)fputc(c, out); - if (c == '\n' && opts && opts->on_nl) { - opts->on_nl(out); - if (opts->replace_color) fprintf(out, "%s", opts->replace_color); - } -} - -void fprint_match(FILE *out, const char *file_start, match_t *m, print_options_t *opts) -{ - if (m->pat->type == BP_REPLACE) { - const char *text = m->pat->args.replace.text; - const char *end = &text[m->pat->args.replace.len]; - if (opts && opts->replace_color) fprintf(out, "%s", opts->replace_color); - - // TODO: clean up the line numbering code - for (const char *r = text; r < end; ) { - // Capture substitution - if (*r == '@' && r+1 < end && r[1] != '@') { - const char *next = r+1; - // Retrieve the capture value: - match_t *cap = NULL; - if (isdigit(*next)) { - int n = (int)strtol(next, (char**)&next, 10); - cap = get_numbered_capture(m->children[0], n); - } else { - const char *name = next, *name_end = after_name(next, end); - if (name_end) { - cap = get_named_capture(m->children[0], name, (size_t)(name_end - name)); - next = name_end; - if (next < m->end && *next == ';') ++next; - } - } - - if (cap != NULL) { - fprint_match(out, file_start, cap, opts); - if (opts && opts->replace_color) fprintf(out, "%s", opts->replace_color); - r = next; - continue; - } - } - - if (*r == '\\') { - ++r; - if (*r == 'N') { // \N (nodent) - ++r; - // Mildly hacky: nodents here are based on the *first line* - // of the match. If the match spans multiple lines, or if - // the replacement text contains newlines, this may get weird. - const char *line_start = m->start; - while (line_start > file_start && line_start[-1] != '\n') --line_start; - fputc_safe(out, '\n', opts); - for (const char *p = line_start; p < m->start && (*p == ' ' || *p == '\t'); ++p) - fputc(*p, out); - continue; - } - fputc_safe(out, unescapechar(r, &r, end), opts); - } else { - fputc_safe(out, *r, opts); - ++r; - } - } - } else { - if (opts && opts->match_color) fprintf(out, "%s", opts->match_color); - const char *prev = m->start; - for (int i = 0; m->children && m->children[i]; i++) { - match_t *child = m->children[i]; - // Skip children from e.g. zero-width matches like >@foo - if (!(prev <= child->start && child->start <= m->end && - prev <= child->end && child->end <= m->end)) - continue; - if (child->start > prev) { - if (opts && opts->fprint_between) opts->fprint_between(out, prev, child->start, opts->match_color); - else fwrite(prev, sizeof(char), (size_t)(child->start - prev), out); - } - fprint_match(out, file_start, child, opts); - if (opts && opts->match_color) fprintf(out, "%s", opts->match_color); - prev = child->end; - } - if (m->end > prev) { - if (opts && opts->fprint_between) opts->fprint_between(out, prev, m->end, opts->match_color); - else fwrite(prev, sizeof(char), (size_t)(m->end - prev), out); - } - } -} - // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/match.h b/match.h index 93f2ade..7439b27 100644 --- a/match.h +++ b/match.h @@ -24,12 +24,6 @@ typedef struct match_s { struct match_s *_children[3]; } match_t; -typedef struct { - const char *normal_color, *match_color, *replace_color; - void (*fprint_between)(FILE *out, const char *start, const char *end, const char *normal_color); - void (*on_nl)(FILE *out); -} print_options_t; - __attribute__((nonnull)) void recycle_match(match_t **at_m); size_t free_all_matches(void); @@ -40,9 +34,6 @@ __attribute__((nonnull)) match_t *get_numbered_capture(match_t *m, int n); __attribute__((nonnull, pure)) match_t *get_named_capture(match_t *m, const char *name, size_t namelen); -__attribute__((nonnull(1,2,3))) -//void fprint_match(FILE *out, const char *file_start, match_t *m, const char *colors[3]); -void fprint_match(FILE *out, const char *file_start, match_t *m, print_options_t *opts); #endif // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/printmatch.c b/printmatch.c new file mode 100644 index 0000000..5b7e6e7 --- /dev/null +++ b/printmatch.c @@ -0,0 +1,263 @@ +// +// printmatch.c - Debug visualization of pattern matches. +// + +#include +#include +#include + +#include "match.h" +#include "printmatch.h" +#include "utils.h" + +typedef struct match_node_s { + match_t *m; + struct match_node_s *next; +} match_node_t; + +__attribute__((nonnull, pure)) +static int height_of_match(match_t *m); +__attribute__((nonnull)) +static void _explain_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen); + +// +// Return the height of a match object (i.e. the number of descendents of the +// structure). +// +static int height_of_match(match_t *m) +{ + int height = 0; + for (int i = 0; m->children && m->children[i]; i++) { + match_t *child = m->children[i]; + int childheight = height_of_match(child); + if (childheight > height) height = childheight; + } + return 1 + height; +} + +// +// Print a visual explanation for the as-yet-unprinted matches provided. +// +static void _explain_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen) +{ + const char *V = "│"; // Vertical bar + const char *H = "─"; // Horizontal bar + const char *color = (depth % 2 == 0) ? "34" : "33"; + + match_t *viz = firstmatch->m; + // This is a heuristic: print matches first if they have more submatches. + // In general, this helps reduce the height of the final output by allowing + // for more rows that show the same rule matching in multiple places. + // TODO: there may be a better heuristic that optimizes for this factor + // while also printing earlier matches first when it doesn't affect overall + // output height. + for (match_node_t *p = firstmatch; p; p = p->next) + if (height_of_match(p->m) > height_of_match(viz)) + viz = p->m; + const char *viz_type = viz->pat->start; + size_t viz_typelen = (size_t)(viz->pat->end - viz->pat->start); + + // Backrefs use added dim quote marks to indicate that the pattern is a + // literal string being matched. (Backrefs have start/end inside the text + // input, instead of something the user typed in) + if (viz_type >= text && viz_type <= &text[textlen]) + printf("\033[%luG\033[0;2m\"\033[%s;1m", 2*textlen+3, color); + else + printf("\033[%luG\033[%s;1m", 2*textlen+3, color); + + for (size_t i = 0; i < viz_typelen; i++) { + switch (viz_type[i]) { + case '\n': printf("↵"); break; + case '\t': printf("⇥"); break; + default: printf("%c", viz_type[i]); break; + } + } + + if (viz_type >= text && viz_type <= &text[textlen]) + printf("\033[0;2m\""); + + printf("\033[m"); + + match_node_t *children = NULL; + match_node_t **nextchild = &children; + +#define RIGHT_TYPE(m) (m->m->pat->end == m->m->pat->start + viz_typelen && strncmp(m->m->pat->start, viz_type, viz_typelen) == 0) + // Print nonzero-width first: + for (match_node_t *m = firstmatch; m; m = m->next) { + if (RIGHT_TYPE(m)) { + for (int i = 0; m->m->children && m->m->children[i]; i++) { + *nextchild = new(match_node_t); + (*nextchild)->m = m->m->children[i]; + nextchild = &((*nextchild)->next); + } + if (m->m->end == m->m->start) continue; + printf("\033[%ldG\033[0;2m%s\033[0;7;%sm", 1+2*(m->m->start - text), V, color); + for (const char *c = m->m->start; c < m->m->end; ++c) { + // TODO: newline + if (c > m->m->start) printf(" "); + // TODO: utf8 + //while ((*c & 0xC0) != 0x80) printf("%c", *(c++)); + if (*c == '\n') + printf("↵"); + else if (*c == '\t') + printf("⇥"); + else + printf("%c", *c); + } + printf("\033[0;2m%s\033[m", V); + } else { + *nextchild = new(match_node_t); + (*nextchild)->m = m->m; + nextchild = &((*nextchild)->next); + printf("\033[%ldG\033[0;2m%s", 1+2*(m->m->start - text), V); + for (ssize_t i = (ssize_t)(2*(m->m->end - m->m->start)-1); i > 0; i--) + printf(" "); + if (m->m->end > m->m->start) + printf("\033[0;2m%s", V); + printf("\033[m"); + } + } + + // Print stars for zero-width: + for (match_node_t *m = firstmatch; m; m = m->next) { + if (m->m->end > m->m->start) continue; + if (RIGHT_TYPE(m)) { + printf("\033[%ldG\033[7;%sm▒\033[m", 1+2*(m->m->start - text), color); + } else { + printf("\033[%ldG\033[0;2m%s\033[m", 1+2*(m->m->start - text), V); + } + } + + printf("\n"); + + for (match_node_t *m = firstmatch; m; m = m->next) { + if (m->m->end == m->m->start) { + if (!RIGHT_TYPE(m)) + printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), V); + } else { + const char *l = "└"; + const char *r = "┘"; + for (match_node_t *c = children; c; c = c->next) { + if (c->m->start == m->m->start || c->m->end == m->m->start) l = V; + if (c->m->start == m->m->end || c->m->end == m->m->end) r = V; + } + printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), l); + const char *h = RIGHT_TYPE(m) ? H : " "; + for (ssize_t n = (ssize_t)(2*(m->m->end - m->m->start) - 1); n > 0; n--) + printf("%s", h); + printf("%s\033[m", r); + } + } +#undef RIGHT_TYPE + + printf("\n"); + + if (children) + _explain_matches(children, depth+1, text, textlen); + + for (match_node_t *c = children, *next = NULL; c; c = next) { + next = c->next; + delete(&c); + } +} + +// +// Print a visualization of a match object. +// +void explain_match(match_t *m) +{ + printf("\033[?7l"); // Disable line wrapping + match_node_t first = {.m = m}; + _explain_matches(&first, 0, m->start, (size_t)(m->end - m->start)); + printf("\033[?7h"); // Re-enable line wrapping +} + +static inline void fputc_safe(FILE *out, char c, print_options_t *opts) +{ + (void)fputc(c, out); + if (c == '\n' && opts && opts->on_nl) { + opts->on_nl(out); + if (opts->replace_color) fprintf(out, "%s", opts->replace_color); + } +} + +void fprint_match(FILE *out, const char *file_start, match_t *m, print_options_t *opts) +{ + if (m->pat->type == BP_REPLACE) { + const char *text = m->pat->args.replace.text; + const char *end = &text[m->pat->args.replace.len]; + if (opts && opts->replace_color) fprintf(out, "%s", opts->replace_color); + + // TODO: clean up the line numbering code + for (const char *r = text; r < end; ) { + // Capture substitution + if (*r == '@' && r+1 < end && r[1] != '@') { + const char *next = r+1; + // Retrieve the capture value: + match_t *cap = NULL; + if (isdigit(*next)) { + int n = (int)strtol(next, (char**)&next, 10); + cap = get_numbered_capture(m->children[0], n); + } else { + const char *name = next, *name_end = after_name(next, end); + if (name_end) { + cap = get_named_capture(m->children[0], name, (size_t)(name_end - name)); + next = name_end; + if (next < m->end && *next == ';') ++next; + } + } + + if (cap != NULL) { + fprint_match(out, file_start, cap, opts); + if (opts && opts->replace_color) fprintf(out, "%s", opts->replace_color); + r = next; + continue; + } + } + + if (*r == '\\') { + ++r; + if (*r == 'N') { // \N (nodent) + ++r; + // Mildly hacky: nodents here are based on the *first line* + // of the match. If the match spans multiple lines, or if + // the replacement text contains newlines, this may get weird. + const char *line_start = m->start; + while (line_start > file_start && line_start[-1] != '\n') --line_start; + fputc_safe(out, '\n', opts); + for (const char *p = line_start; p < m->start && (*p == ' ' || *p == '\t'); ++p) + fputc(*p, out); + continue; + } + fputc_safe(out, unescapechar(r, &r, end), opts); + } else { + fputc_safe(out, *r, opts); + ++r; + } + } + } else { + if (opts && opts->match_color) fprintf(out, "%s", opts->match_color); + const char *prev = m->start; + for (int i = 0; m->children && m->children[i]; i++) { + match_t *child = m->children[i]; + // Skip children from e.g. zero-width matches like >@foo + if (!(prev <= child->start && child->start <= m->end && + prev <= child->end && child->end <= m->end)) + continue; + if (child->start > prev) { + if (opts && opts->fprint_between) opts->fprint_between(out, prev, child->start, opts->match_color); + else fwrite(prev, sizeof(char), (size_t)(child->start - prev), out); + } + fprint_match(out, file_start, child, opts); + if (opts && opts->match_color) fprintf(out, "%s", opts->match_color); + prev = child->end; + } + if (m->end > prev) { + if (opts && opts->fprint_between) opts->fprint_between(out, prev, m->end, opts->match_color); + else fwrite(prev, sizeof(char), (size_t)(m->end - prev), out); + } + } +} + + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/printmatch.h b/printmatch.h new file mode 100644 index 0000000..f6a894d --- /dev/null +++ b/printmatch.h @@ -0,0 +1,22 @@ +// +// Debug visualization of matches +// +#ifndef EXPLAIN__H +#define EXPLAIN__H + +#include "match.h" + +typedef struct { + const char *normal_color, *match_color, *replace_color; + void (*fprint_between)(FILE *out, const char *start, const char *end, const char *normal_color); + void (*on_nl)(FILE *out); +} print_options_t; +__attribute__((nonnull(1,2,3))) +//void fprint_match(FILE *out, const char *file_start, match_t *m, const char *colors[3]); +void fprint_match(FILE *out, const char *file_start, match_t *m, print_options_t *opts); + +__attribute__((nonnull)) +void explain_match(match_t *m); + +#endif +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 -- cgit v1.2.3