diff --git a/Makefile b/Makefile index bce9d00..24f9ca0 100644 --- a/Makefile +++ b/Makefile @@ -4,25 +4,25 @@ PREFIX=/usr/local SYSCONFDIR=/etc CFLAGS=-std=c99 -Werror -D_XOPEN_SOURCE=700 -D_POSIX_C_SOURCE=200809L -flto CWARN=-Wall -Wextra -# -Wpedantic -Wsign-conversion -Wtype-limits -Wunused-result -Wnull-dereference \ -# -Waggregate-return -Walloc-zero -Walloca -Warith-conversion -Wcast-align -Wcast-align=strict \ -# -Wdangling-else -Wdate-time -Wdisabled-optimization -Wdouble-promotion -Wduplicated-branches \ -# -Wduplicated-cond -Wexpansion-to-defined -Wfloat-conversion -Wfloat-equal -Wformat-nonliteral \ -# -Wformat-security -Wformat-signedness -Wframe-address -Winline -Winvalid-pch -Wjump-misses-init \ -# -Wlogical-op -Wlong-long -Wmissing-format-attribute -Wmissing-include-dirs -Wmissing-noreturn \ -# -Wnull-dereference -Woverlength-strings -Wpacked -Wpacked-not-aligned -Wpointer-arith \ -# -Wredundant-decls -Wshadow -Wshadow=compatible-local -Wshadow=global -Wshadow=local \ -# -Wsign-conversion -Wstack-protector -Wsuggest-attribute=const -Wswitch-default -Wswitch-enum \ -# -Wsync-nand -Wtrampolines -Wundef -Wunsuffixed-float-constants -Wunused -Wunused-but-set-variable \ -# -Wunused-const-variable -Wunused-local-typedefs -Wunused-macros -Wvariadic-macros -Wvector-operation-performance \ -# -Wvla -Wwrite-strings + # -Wpedantic -Wsign-conversion -Wtype-limits -Wunused-result -Wnull-dereference \ + # -Waggregate-return -Walloc-zero -Walloca -Warith-conversion -Wcast-align -Wcast-align=strict \ + # -Wdangling-else -Wdate-time -Wdisabled-optimization -Wdouble-promotion -Wduplicated-branches \ + # -Wduplicated-cond -Wexpansion-to-defined -Wfloat-conversion -Wfloat-equal -Wformat-nonliteral \ + # -Wformat-security -Wformat-signedness -Wframe-address -Winline -Winvalid-pch -Wjump-misses-init \ + # -Wlogical-op -Wlong-long -Wmissing-format-attribute -Wmissing-include-dirs -Wmissing-noreturn \ + # -Wnull-dereference -Woverlength-strings -Wpacked -Wpacked-not-aligned -Wpointer-arith \ + # -Wredundant-decls -Wshadow -Wshadow=compatible-local -Wshadow=global -Wshadow=local \ + # -Wsign-conversion -Wstack-protector -Wsuggest-attribute=const -Wswitch-default -Wswitch-enum \ + # -Wsync-nand -Wtrampolines -Wundef -Wunsuffixed-float-constants -Wunused -Wunused-but-set-variable \ + # -Wunused-const-variable -Wunused-local-typedefs -Wunused-macros -Wvariadic-macros -Wvector-operation-performance \ + # -Wvla -Wwrite-strings OSFLAGS != case $$(uname -s) in *BSD|Darwin) echo '-D_BSD_SOURCE';; Linux) echo '-D_GNU_SOURCE';; *) echo '-D_DEFAULT_SOURCE';; esac EXTRA= G= O=-O3 ALL_FLAGS=$(CFLAGS) $(OSFLAGS) -DBP_NAME="\"$(NAME)\"" $(EXTRA) $(CWARN) $(G) $(O) -CFILES=pattern.c definitions.c utils.c match.c files.c print.c json.c utf8.c +CFILES=pattern.c definitions.c utils.c match.c files.c print.c matchviz.c json.c utf8.c OBJFILES=$(CFILES:.c=.o) all: $(NAME) bp.1 diff --git a/bp.c b/bp.c index 5a0da1b..8bc9283 100644 --- a/bp.c +++ b/bp.c @@ -24,6 +24,7 @@ #include "files.h" #include "json.h" #include "match.h" +#include "matchviz.h" #include "pattern.h" #include "print.h" #include "utils.h" diff --git a/match.c b/match.c index 514d8ca..2ab3a90 100644 --- a/match.c +++ b/match.c @@ -288,7 +288,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool // Temporarily add a rule that the backref name matches the // exact string of the original match (no replacements) ssize_t len = (ssize_t)(m1->end - m1->start); - pat_t *backref = new_pat(f, m1->start, m1->end, len, len, BP_STRING); + pat_t *backref = new_pat(f, m1->start, m1->end, (size_t)len, len, BP_STRING); backref->args.string = m1->start; defs2 = with_def(defs, pat->args.ref.len, pat->args.ref.name, backref); } diff --git a/matchviz.c b/matchviz.c new file mode 100644 index 0000000..ebf0ad9 --- /dev/null +++ b/matchviz.c @@ -0,0 +1,171 @@ +// +// debugviz.c - Debug visualization of pattern matches. +// + +#include +#include + +#include "matchviz.h" +#include "types.h" +#include "utils.h" + +typedef struct match_node_s { + match_t *m; + struct match_node_s *next; +} match_node_t; + +__attribute__((nonnull, pure)) +static int height_of_match(match_t *m); +__attribute__((nonnull)) +static void _visualize_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen); + +// +// Return the height of a match object (i.e. the number of descendents of the +// structure). +// +static int height_of_match(match_t *m) +{ + int height = 0; + for (match_t *c = m->child; c; c = c->nextsibling) { + int childheight = height_of_match(c); + if (childheight > height) height = childheight; + } + return 1 + height; +} + +// +// Print a visual explanation for the as-yet-unprinted matches provided. +// +static void _visualize_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen) +{ + const char *V = "│"; // Vertical bar + const char *H = "─"; // Horizontal bar + const char *color = (depth % 2 == 0) ? "34" : "33"; + + match_t *viz = firstmatch->m; + // This is a heuristic: print matches first if they have more submatches. + // In general, this helps reduce the height of the final output by allowing + // for more rows that show the same rule matching in multiple places. + // TODO: there may be a better heuristic that optimizes for this factor + // while also printing earlier matches first when it doesn't affect overall + // output height. + for (match_node_t *p = firstmatch; p; p = p->next) + if (height_of_match(p->m) > height_of_match(viz)) + viz = p->m; + const char *viz_type = viz->pat->start; + size_t viz_typelen = (size_t)(viz->pat->end - viz->pat->start); + + // Backrefs use added dim quote marks to indicate that the pattern is a + // literal string being matched. (Backrefs have start/end inside the text + // input, instead of something the user typed in) + if (viz_type >= text && viz_type <= &text[textlen]) + printf("\033[%luG\033[0;2m\"\033[%s;1m", 2*textlen+3, color); + else + printf("\033[%luG\033[%s;1m", 2*textlen+3, color); + + for (size_t i = 0; i < viz_typelen; i++) { + switch (viz_type[i]) { + case '\n': printf("↵"); break; + case '\t': printf("⇥"); break; + default: printf("%c", viz_type[i]); break; + } + } + + if (viz_type >= text && viz_type <= &text[textlen]) + printf("\033[0;2m\""); + + printf("\033[0m"); + + match_node_t *children = NULL; + match_node_t **nextchild = &children; + +#define RIGHT_TYPE(m) (m->m->pat->end == m->m->pat->start + viz_typelen && strncmp(m->m->pat->start, viz_type, viz_typelen) == 0) + // Print nonzero-width first: + for (match_node_t *m = firstmatch; m; m = m->next) { + if (RIGHT_TYPE(m)) { + for (match_t *c = m->m->child; c; c = c->nextsibling) { + *nextchild = new(match_node_t); + (*nextchild)->m = c; + nextchild = &((*nextchild)->next); + } + if (m->m->end == m->m->start) continue; + printf("\033[%ldG\033[0;2m%s\033[0;7;%sm", 1+2*(m->m->start - text), V, color); + for (const char *c = m->m->start; c < m->m->end; ++c) { + // TODO: newline + if (c > m->m->start) printf(" "); + // TODO: utf8 + //while ((*c & 0xC0) != 0x80) printf("%c", *(c++)); + if (*c == '\n') + printf("↵"); + else if (*c == '\t') + printf("⇥"); + else + printf("%c", *c); + } + printf("\033[0;2m%s\033[0m", V); + } else { + *nextchild = new(match_node_t); + (*nextchild)->m = m->m; + nextchild = &((*nextchild)->next); + printf("\033[%ldG\033[0;2m%s", 1+2*(m->m->start - text), V); + for (ssize_t i = (ssize_t)(2*(m->m->end - m->m->start)-1); i > 0; i--) + printf(" "); + if (m->m->end > m->m->start) + printf("\033[0;2m%s", V); + printf("\033[0m"); + } + } + + // Print stars for zero-width: + for (match_node_t *m = firstmatch; m; m = m->next) { + if (m->m->end > m->m->start) continue; + if (RIGHT_TYPE(m)) { + printf("\033[%ldG\033[7;%sm▒\033[0m", 1+2*(m->m->start - text), color); + } else { + printf("\033[%ldG\033[0;2m%s\033[0m", 1+2*(m->m->start - text), V); + } + } + + printf("\n"); + + for (match_node_t *m = firstmatch; m; m = m->next) { + if (m->m->end == m->m->start) { + if (!RIGHT_TYPE(m)) + printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), V); + } else { + const char *l = "└"; + const char *r = "┘"; + for (match_node_t *c = children; c; c = c->next) { + if (c->m->start == m->m->start || c->m->end == m->m->start) l = V; + if (c->m->start == m->m->end || c->m->end == m->m->end) r = V; + } + printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), l); + const char *h = RIGHT_TYPE(m) ? H : " "; + for (ssize_t n = (ssize_t)(2*(m->m->end - m->m->start) - 1); n > 0; n--) + printf("%s", h); + printf("%s\033[0m", r); + } + } +#undef RIGHT_TYPE + + printf("\n"); + + if (children) + _visualize_matches(children, depth+1, text, textlen); + + for (match_node_t *c = children, *next = NULL; c; c = next) { + next = c->next; + xfree(&c); + } +} + +// +// Print a visualization of a match object. +// +void visualize_match(match_t *m) +{ + printf("\033[?7l"); // Disable line wrapping + match_node_t first = {.m = m}; + _visualize_matches(&first, 0, m->start, (size_t)(m->end - m->start)); + printf("\033[?7h"); // Re-enable line wrapping +} diff --git a/matchviz.h b/matchviz.h new file mode 100644 index 0000000..eb87774 --- /dev/null +++ b/matchviz.h @@ -0,0 +1,13 @@ +// +// Debug visualization of matches +// +#ifndef DEBUGVIZ__H +#define DEBUGVIZ__H + +#include "types.h" + +__attribute__((nonnull)) +void visualize_match(match_t *m); + +#endif +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 diff --git a/print.c b/print.c index 8638876..eb5f67d 100644 --- a/print.c +++ b/print.c @@ -12,174 +12,11 @@ #include "types.h" #include "utils.h" -typedef struct match_node_s { - match_t *m; - struct match_node_s *next; -} match_node_t; - static const char *color_match = "\033[0;31;1m"; static const char *color_replace = "\033[0;34;1m"; static const char *color_normal = "\033[0m"; static const char *current_color = NULL; -__attribute__((nonnull, pure)) -static int height_of_match(match_t *m); -__attribute__((nonnull)) -static void _visualize_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen); -__attribute__((nonnull(1,2))) -static inline void print_line_number(FILE *out, printer_t *pr, size_t line_number, const char *color); - -// -// Return the height of a match object (i.e. the number of descendents of the -// structure). -// -static int height_of_match(match_t *m) -{ - int height = 0; - for (match_t *c = m->child; c; c = c->nextsibling) { - int childheight = height_of_match(c); - if (childheight > height) height = childheight; - } - return 1 + height; -} - -// -// Print a visual explanation for the as-yet-unprinted matches provided. -// -static void _visualize_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen) -{ - const char *V = "│"; // Vertical bar - const char *H = "─"; // Horizontal bar - const char *color = (depth % 2 == 0) ? "34" : "33"; - - match_t *viz = firstmatch->m; - // This is a heuristic: print matches first if they have more submatches. - // In general, this helps reduce the height of the final output by allowing - // for more rows that show the same rule matching in multiple places. - // TODO: there may be a better heuristic that optimizes for this factor - // while also printing earlier matches first when it doesn't affect overall - // output height. - for (match_node_t *p = firstmatch; p; p = p->next) - if (height_of_match(p->m) > height_of_match(viz)) - viz = p->m; - const char *viz_type = viz->pat->start; - size_t viz_typelen = (size_t)(viz->pat->end - viz->pat->start); - - // Backrefs use added dim quote marks to indicate that the pattern is a - // literal string being matched. (Backrefs have start/end inside the text - // input, instead of something the user typed in) - if (viz_type >= text && viz_type <= &text[textlen]) - printf("\033[%luG\033[0;2m\"\033[%s;1m", 2*textlen+3, color); - else - printf("\033[%luG\033[%s;1m", 2*textlen+3, color); - - for (size_t i = 0; i < viz_typelen; i++) { - switch (viz_type[i]) { - case '\n': printf("↵"); break; - case '\t': printf("⇥"); break; - default: printf("%c", viz_type[i]); break; - } - } - - if (viz_type >= text && viz_type <= &text[textlen]) - printf("\033[0;2m\""); - - printf("\033[0m"); - - match_node_t *children = NULL; - match_node_t **nextchild = &children; - -#define RIGHT_TYPE(m) (m->m->pat->end == m->m->pat->start + viz_typelen && strncmp(m->m->pat->start, viz_type, viz_typelen) == 0) - // Print nonzero-width first: - for (match_node_t *m = firstmatch; m; m = m->next) { - if (RIGHT_TYPE(m)) { - for (match_t *c = m->m->child; c; c = c->nextsibling) { - *nextchild = new(match_node_t); - (*nextchild)->m = c; - nextchild = &((*nextchild)->next); - } - if (m->m->end == m->m->start) continue; - printf("\033[%ldG\033[0;2m%s\033[0;7;%sm", 1+2*(m->m->start - text), V, color); - for (const char *c = m->m->start; c < m->m->end; ++c) { - // TODO: newline - if (c > m->m->start) printf(" "); - // TODO: utf8 - //while ((*c & 0xC0) != 0x80) printf("%c", *(c++)); - if (*c == '\n') - printf("↵"); - else if (*c == '\t') - printf("⇥"); - else - printf("%c", *c); - } - printf("\033[0;2m%s\033[0m", V); - } else { - *nextchild = new(match_node_t); - (*nextchild)->m = m->m; - nextchild = &((*nextchild)->next); - printf("\033[%ldG\033[0;2m%s", 1+2*(m->m->start - text), V); - for (ssize_t i = (ssize_t)(2*(m->m->end - m->m->start)-1); i > 0; i--) - printf(" "); - if (m->m->end > m->m->start) - printf("\033[0;2m%s", V); - printf("\033[0m"); - } - } - - // Print stars for zero-width: - for (match_node_t *m = firstmatch; m; m = m->next) { - if (m->m->end > m->m->start) continue; - if (RIGHT_TYPE(m)) { - printf("\033[%ldG\033[7;%sm▒\033[0m", 1+2*(m->m->start - text), color); - } else { - printf("\033[%ldG\033[0;2m%s\033[0m", 1+2*(m->m->start - text), V); - } - } - - printf("\n"); - - for (match_node_t *m = firstmatch; m; m = m->next) { - if (m->m->end == m->m->start) { - if (!RIGHT_TYPE(m)) - printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), V); - } else { - const char *l = "└"; - const char *r = "┘"; - for (match_node_t *c = children; c; c = c->next) { - if (c->m->start == m->m->start || c->m->end == m->m->start) l = V; - if (c->m->start == m->m->end || c->m->end == m->m->end) r = V; - } - printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), l); - const char *h = RIGHT_TYPE(m) ? H : " "; - for (ssize_t n = (ssize_t)(2*(m->m->end - m->m->start) - 1); n > 0; n--) - printf("%s", h); - printf("%s\033[0m", r); - } - } -#undef RIGHT_TYPE - - printf("\n"); - - if (children) - _visualize_matches(children, depth+1, text, textlen); - - for (match_node_t *c = children, *next = NULL; c; c = next) { - next = c->next; - xfree(&c); - } -} - -// -// Print a visualization of a match object. -// -void visualize_match(match_t *m) -{ - printf("\033[?7l"); // Disable line wrapping - match_node_t first = {.m = m}; - _visualize_matches(&first, 0, m->start, (size_t)(m->end - m->start)); - printf("\033[?7h"); // Re-enable line wrapping -} - // // Print a line number, if it needs to be printed. // line number of 0 means "just print an empty space for the number" diff --git a/print.h b/print.h index 9a0439b..5ad645c 100644 --- a/print.h +++ b/print.h @@ -17,8 +17,6 @@ typedef struct { bool print_line_numbers:1; } printer_t; -__attribute__((nonnull)) -void visualize_match(match_t *m); __attribute__((nonnull(1,2))) void print_match(FILE *out, printer_t *pr, match_t *m); __attribute__((nonnull))