Factored debug visualization into its own file

This commit is contained in:
Bruce Hill 2021-07-17 14:05:10 -07:00
parent 0f05961578
commit 378e94090f
7 changed files with 199 additions and 179 deletions

View File

@ -4,25 +4,25 @@ PREFIX=/usr/local
SYSCONFDIR=/etc
CFLAGS=-std=c99 -Werror -D_XOPEN_SOURCE=700 -D_POSIX_C_SOURCE=200809L -flto
CWARN=-Wall -Wextra
# -Wpedantic -Wsign-conversion -Wtype-limits -Wunused-result -Wnull-dereference \
# -Waggregate-return -Walloc-zero -Walloca -Warith-conversion -Wcast-align -Wcast-align=strict \
# -Wdangling-else -Wdate-time -Wdisabled-optimization -Wdouble-promotion -Wduplicated-branches \
# -Wduplicated-cond -Wexpansion-to-defined -Wfloat-conversion -Wfloat-equal -Wformat-nonliteral \
# -Wformat-security -Wformat-signedness -Wframe-address -Winline -Winvalid-pch -Wjump-misses-init \
# -Wlogical-op -Wlong-long -Wmissing-format-attribute -Wmissing-include-dirs -Wmissing-noreturn \
# -Wnull-dereference -Woverlength-strings -Wpacked -Wpacked-not-aligned -Wpointer-arith \
# -Wredundant-decls -Wshadow -Wshadow=compatible-local -Wshadow=global -Wshadow=local \
# -Wsign-conversion -Wstack-protector -Wsuggest-attribute=const -Wswitch-default -Wswitch-enum \
# -Wsync-nand -Wtrampolines -Wundef -Wunsuffixed-float-constants -Wunused -Wunused-but-set-variable \
# -Wunused-const-variable -Wunused-local-typedefs -Wunused-macros -Wvariadic-macros -Wvector-operation-performance \
# -Wvla -Wwrite-strings
# -Wpedantic -Wsign-conversion -Wtype-limits -Wunused-result -Wnull-dereference \
# -Waggregate-return -Walloc-zero -Walloca -Warith-conversion -Wcast-align -Wcast-align=strict \
# -Wdangling-else -Wdate-time -Wdisabled-optimization -Wdouble-promotion -Wduplicated-branches \
# -Wduplicated-cond -Wexpansion-to-defined -Wfloat-conversion -Wfloat-equal -Wformat-nonliteral \
# -Wformat-security -Wformat-signedness -Wframe-address -Winline -Winvalid-pch -Wjump-misses-init \
# -Wlogical-op -Wlong-long -Wmissing-format-attribute -Wmissing-include-dirs -Wmissing-noreturn \
# -Wnull-dereference -Woverlength-strings -Wpacked -Wpacked-not-aligned -Wpointer-arith \
# -Wredundant-decls -Wshadow -Wshadow=compatible-local -Wshadow=global -Wshadow=local \
# -Wsign-conversion -Wstack-protector -Wsuggest-attribute=const -Wswitch-default -Wswitch-enum \
# -Wsync-nand -Wtrampolines -Wundef -Wunsuffixed-float-constants -Wunused -Wunused-but-set-variable \
# -Wunused-const-variable -Wunused-local-typedefs -Wunused-macros -Wvariadic-macros -Wvector-operation-performance \
# -Wvla -Wwrite-strings
OSFLAGS != case $$(uname -s) in *BSD|Darwin) echo '-D_BSD_SOURCE';; Linux) echo '-D_GNU_SOURCE';; *) echo '-D_DEFAULT_SOURCE';; esac
EXTRA=
G=
O=-O3
ALL_FLAGS=$(CFLAGS) $(OSFLAGS) -DBP_NAME="\"$(NAME)\"" $(EXTRA) $(CWARN) $(G) $(O)
CFILES=pattern.c definitions.c utils.c match.c files.c print.c json.c utf8.c
CFILES=pattern.c definitions.c utils.c match.c files.c print.c matchviz.c json.c utf8.c
OBJFILES=$(CFILES:.c=.o)
all: $(NAME) bp.1

1
bp.c
View File

@ -24,6 +24,7 @@
#include "files.h"
#include "json.h"
#include "match.h"
#include "matchviz.h"
#include "pattern.h"
#include "print.h"
#include "utils.h"

View File

@ -288,7 +288,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
// Temporarily add a rule that the backref name matches the
// exact string of the original match (no replacements)
ssize_t len = (ssize_t)(m1->end - m1->start);
pat_t *backref = new_pat(f, m1->start, m1->end, len, len, BP_STRING);
pat_t *backref = new_pat(f, m1->start, m1->end, (size_t)len, len, BP_STRING);
backref->args.string = m1->start;
defs2 = with_def(defs, pat->args.ref.len, pat->args.ref.name, backref);
}

171
matchviz.c Normal file
View File

@ -0,0 +1,171 @@
//
// debugviz.c - Debug visualization of pattern matches.
//
#include <stdio.h>
#include <string.h>
#include "matchviz.h"
#include "types.h"
#include "utils.h"
typedef struct match_node_s {
match_t *m;
struct match_node_s *next;
} match_node_t;
__attribute__((nonnull, pure))
static int height_of_match(match_t *m);
__attribute__((nonnull))
static void _visualize_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen);
//
// Return the height of a match object (i.e. the number of descendents of the
// structure).
//
static int height_of_match(match_t *m)
{
int height = 0;
for (match_t *c = m->child; c; c = c->nextsibling) {
int childheight = height_of_match(c);
if (childheight > height) height = childheight;
}
return 1 + height;
}
//
// Print a visual explanation for the as-yet-unprinted matches provided.
//
static void _visualize_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen)
{
const char *V = ""; // Vertical bar
const char *H = ""; // Horizontal bar
const char *color = (depth % 2 == 0) ? "34" : "33";
match_t *viz = firstmatch->m;
// This is a heuristic: print matches first if they have more submatches.
// In general, this helps reduce the height of the final output by allowing
// for more rows that show the same rule matching in multiple places.
// TODO: there may be a better heuristic that optimizes for this factor
// while also printing earlier matches first when it doesn't affect overall
// output height.
for (match_node_t *p = firstmatch; p; p = p->next)
if (height_of_match(p->m) > height_of_match(viz))
viz = p->m;
const char *viz_type = viz->pat->start;
size_t viz_typelen = (size_t)(viz->pat->end - viz->pat->start);
// Backrefs use added dim quote marks to indicate that the pattern is a
// literal string being matched. (Backrefs have start/end inside the text
// input, instead of something the user typed in)
if (viz_type >= text && viz_type <= &text[textlen])
printf("\033[%luG\033[0;2m\"\033[%s;1m", 2*textlen+3, color);
else
printf("\033[%luG\033[%s;1m", 2*textlen+3, color);
for (size_t i = 0; i < viz_typelen; i++) {
switch (viz_type[i]) {
case '\n': printf(""); break;
case '\t': printf(""); break;
default: printf("%c", viz_type[i]); break;
}
}
if (viz_type >= text && viz_type <= &text[textlen])
printf("\033[0;2m\"");
printf("\033[0m");
match_node_t *children = NULL;
match_node_t **nextchild = &children;
#define RIGHT_TYPE(m) (m->m->pat->end == m->m->pat->start + viz_typelen && strncmp(m->m->pat->start, viz_type, viz_typelen) == 0)
// Print nonzero-width first:
for (match_node_t *m = firstmatch; m; m = m->next) {
if (RIGHT_TYPE(m)) {
for (match_t *c = m->m->child; c; c = c->nextsibling) {
*nextchild = new(match_node_t);
(*nextchild)->m = c;
nextchild = &((*nextchild)->next);
}
if (m->m->end == m->m->start) continue;
printf("\033[%ldG\033[0;2m%s\033[0;7;%sm", 1+2*(m->m->start - text), V, color);
for (const char *c = m->m->start; c < m->m->end; ++c) {
// TODO: newline
if (c > m->m->start) printf(" ");
// TODO: utf8
//while ((*c & 0xC0) != 0x80) printf("%c", *(c++));
if (*c == '\n')
printf("");
else if (*c == '\t')
printf("");
else
printf("%c", *c);
}
printf("\033[0;2m%s\033[0m", V);
} else {
*nextchild = new(match_node_t);
(*nextchild)->m = m->m;
nextchild = &((*nextchild)->next);
printf("\033[%ldG\033[0;2m%s", 1+2*(m->m->start - text), V);
for (ssize_t i = (ssize_t)(2*(m->m->end - m->m->start)-1); i > 0; i--)
printf(" ");
if (m->m->end > m->m->start)
printf("\033[0;2m%s", V);
printf("\033[0m");
}
}
// Print stars for zero-width:
for (match_node_t *m = firstmatch; m; m = m->next) {
if (m->m->end > m->m->start) continue;
if (RIGHT_TYPE(m)) {
printf("\033[%ldG\033[7;%sm▒\033[0m", 1+2*(m->m->start - text), color);
} else {
printf("\033[%ldG\033[0;2m%s\033[0m", 1+2*(m->m->start - text), V);
}
}
printf("\n");
for (match_node_t *m = firstmatch; m; m = m->next) {
if (m->m->end == m->m->start) {
if (!RIGHT_TYPE(m))
printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), V);
} else {
const char *l = "";
const char *r = "";
for (match_node_t *c = children; c; c = c->next) {
if (c->m->start == m->m->start || c->m->end == m->m->start) l = V;
if (c->m->start == m->m->end || c->m->end == m->m->end) r = V;
}
printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), l);
const char *h = RIGHT_TYPE(m) ? H : " ";
for (ssize_t n = (ssize_t)(2*(m->m->end - m->m->start) - 1); n > 0; n--)
printf("%s", h);
printf("%s\033[0m", r);
}
}
#undef RIGHT_TYPE
printf("\n");
if (children)
_visualize_matches(children, depth+1, text, textlen);
for (match_node_t *c = children, *next = NULL; c; c = next) {
next = c->next;
xfree(&c);
}
}
//
// Print a visualization of a match object.
//
void visualize_match(match_t *m)
{
printf("\033[?7l"); // Disable line wrapping
match_node_t first = {.m = m};
_visualize_matches(&first, 0, m->start, (size_t)(m->end - m->start));
printf("\033[?7h"); // Re-enable line wrapping
}

13
matchviz.h Normal file
View File

@ -0,0 +1,13 @@
//
// Debug visualization of matches
//
#ifndef DEBUGVIZ__H
#define DEBUGVIZ__H
#include "types.h"
__attribute__((nonnull))
void visualize_match(match_t *m);
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1

163
print.c
View File

@ -12,174 +12,11 @@
#include "types.h"
#include "utils.h"
typedef struct match_node_s {
match_t *m;
struct match_node_s *next;
} match_node_t;
static const char *color_match = "\033[0;31;1m";
static const char *color_replace = "\033[0;34;1m";
static const char *color_normal = "\033[0m";
static const char *current_color = NULL;
__attribute__((nonnull, pure))
static int height_of_match(match_t *m);
__attribute__((nonnull))
static void _visualize_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen);
__attribute__((nonnull(1,2)))
static inline void print_line_number(FILE *out, printer_t *pr, size_t line_number, const char *color);
//
// Return the height of a match object (i.e. the number of descendents of the
// structure).
//
static int height_of_match(match_t *m)
{
int height = 0;
for (match_t *c = m->child; c; c = c->nextsibling) {
int childheight = height_of_match(c);
if (childheight > height) height = childheight;
}
return 1 + height;
}
//
// Print a visual explanation for the as-yet-unprinted matches provided.
//
static void _visualize_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen)
{
const char *V = ""; // Vertical bar
const char *H = ""; // Horizontal bar
const char *color = (depth % 2 == 0) ? "34" : "33";
match_t *viz = firstmatch->m;
// This is a heuristic: print matches first if they have more submatches.
// In general, this helps reduce the height of the final output by allowing
// for more rows that show the same rule matching in multiple places.
// TODO: there may be a better heuristic that optimizes for this factor
// while also printing earlier matches first when it doesn't affect overall
// output height.
for (match_node_t *p = firstmatch; p; p = p->next)
if (height_of_match(p->m) > height_of_match(viz))
viz = p->m;
const char *viz_type = viz->pat->start;
size_t viz_typelen = (size_t)(viz->pat->end - viz->pat->start);
// Backrefs use added dim quote marks to indicate that the pattern is a
// literal string being matched. (Backrefs have start/end inside the text
// input, instead of something the user typed in)
if (viz_type >= text && viz_type <= &text[textlen])
printf("\033[%luG\033[0;2m\"\033[%s;1m", 2*textlen+3, color);
else
printf("\033[%luG\033[%s;1m", 2*textlen+3, color);
for (size_t i = 0; i < viz_typelen; i++) {
switch (viz_type[i]) {
case '\n': printf(""); break;
case '\t': printf(""); break;
default: printf("%c", viz_type[i]); break;
}
}
if (viz_type >= text && viz_type <= &text[textlen])
printf("\033[0;2m\"");
printf("\033[0m");
match_node_t *children = NULL;
match_node_t **nextchild = &children;
#define RIGHT_TYPE(m) (m->m->pat->end == m->m->pat->start + viz_typelen && strncmp(m->m->pat->start, viz_type, viz_typelen) == 0)
// Print nonzero-width first:
for (match_node_t *m = firstmatch; m; m = m->next) {
if (RIGHT_TYPE(m)) {
for (match_t *c = m->m->child; c; c = c->nextsibling) {
*nextchild = new(match_node_t);
(*nextchild)->m = c;
nextchild = &((*nextchild)->next);
}
if (m->m->end == m->m->start) continue;
printf("\033[%ldG\033[0;2m%s\033[0;7;%sm", 1+2*(m->m->start - text), V, color);
for (const char *c = m->m->start; c < m->m->end; ++c) {
// TODO: newline
if (c > m->m->start) printf(" ");
// TODO: utf8
//while ((*c & 0xC0) != 0x80) printf("%c", *(c++));
if (*c == '\n')
printf("");
else if (*c == '\t')
printf("");
else
printf("%c", *c);
}
printf("\033[0;2m%s\033[0m", V);
} else {
*nextchild = new(match_node_t);
(*nextchild)->m = m->m;
nextchild = &((*nextchild)->next);
printf("\033[%ldG\033[0;2m%s", 1+2*(m->m->start - text), V);
for (ssize_t i = (ssize_t)(2*(m->m->end - m->m->start)-1); i > 0; i--)
printf(" ");
if (m->m->end > m->m->start)
printf("\033[0;2m%s", V);
printf("\033[0m");
}
}
// Print stars for zero-width:
for (match_node_t *m = firstmatch; m; m = m->next) {
if (m->m->end > m->m->start) continue;
if (RIGHT_TYPE(m)) {
printf("\033[%ldG\033[7;%sm▒\033[0m", 1+2*(m->m->start - text), color);
} else {
printf("\033[%ldG\033[0;2m%s\033[0m", 1+2*(m->m->start - text), V);
}
}
printf("\n");
for (match_node_t *m = firstmatch; m; m = m->next) {
if (m->m->end == m->m->start) {
if (!RIGHT_TYPE(m))
printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), V);
} else {
const char *l = "";
const char *r = "";
for (match_node_t *c = children; c; c = c->next) {
if (c->m->start == m->m->start || c->m->end == m->m->start) l = V;
if (c->m->start == m->m->end || c->m->end == m->m->end) r = V;
}
printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), l);
const char *h = RIGHT_TYPE(m) ? H : " ";
for (ssize_t n = (ssize_t)(2*(m->m->end - m->m->start) - 1); n > 0; n--)
printf("%s", h);
printf("%s\033[0m", r);
}
}
#undef RIGHT_TYPE
printf("\n");
if (children)
_visualize_matches(children, depth+1, text, textlen);
for (match_node_t *c = children, *next = NULL; c; c = next) {
next = c->next;
xfree(&c);
}
}
//
// Print a visualization of a match object.
//
void visualize_match(match_t *m)
{
printf("\033[?7l"); // Disable line wrapping
match_node_t first = {.m = m};
_visualize_matches(&first, 0, m->start, (size_t)(m->end - m->start));
printf("\033[?7h"); // Re-enable line wrapping
}
//
// Print a line number, if it needs to be printed.
// line number of 0 means "just print an empty space for the number"

View File

@ -17,8 +17,6 @@ typedef struct {
bool print_line_numbers:1;
} printer_t;
__attribute__((nonnull))
void visualize_match(match_t *m);
__attribute__((nonnull(1,2)))
void print_match(FILE *out, printer_t *pr, match_t *m);
__attribute__((nonnull))