2 // printmatch.c - Debug visualization of pattern matches.
10 #include "printmatch.h"
13 typedef struct match_node_s {
15 struct match_node_s *next;
18 __attribute__((nonnull, pure)) static int height_of_match(bp_match_t *m);
19 __attribute__((nonnull)) static void _explain_matches(match_node_t *firstmatch, int depth, const char *text,
23 // Return the height of a match object (i.e. the number of descendents of the
26 static int height_of_match(bp_match_t *m) {
28 for (int i = 0; m->children && m->children[i]; i++) {
29 bp_match_t *child = m->children[i];
30 int childheight = height_of_match(child);
31 if (childheight > height) height = childheight;
37 // Print a visual explanation for the as-yet-unprinted matches provided.
39 static void _explain_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen) {
40 const char *V = "│"; // Vertical bar
41 const char *H = "─"; // Horizontal bar
42 const char *color = (depth % 2 == 0) ? "34" : "33";
44 bp_match_t *viz = firstmatch->m;
45 // This is a heuristic: print matches first if they have more submatches.
46 // In general, this helps reduce the height of the final output by allowing
47 // for more rows that show the same rule matching in multiple places.
48 // TODO: there may be a better heuristic that optimizes for this factor
49 // while also printing earlier matches first when it doesn't affect overall
51 for (match_node_t *p = firstmatch; p; p = p->next)
52 if (height_of_match(p->m) > height_of_match(viz)) viz = p->m;
53 const char *viz_type = viz->pat->start;
54 size_t viz_typelen = (size_t)(viz->pat->end - viz->pat->start);
56 // Backrefs use added dim quote marks to indicate that the pattern is a
57 // literal string being matched. (Backrefs have start/end inside the text
58 // input, instead of something the user typed in)
59 if (viz_type >= text && viz_type <= &text[textlen])
60 printf("\033[%zuG\033[0;2m\"\033[%s;1m", 2 * textlen + 3, color);
61 else if (viz->pat->type == BP_STRING && (viz->end - viz->start) == (long)viz_typelen)
62 printf("\033[%zuG\033[%s;1m\"", 2 * textlen + 3, color);
63 else printf("\033[%zuG\033[%s;1m", 2 * textlen + 3, color);
65 for (size_t i = 0; i < viz_typelen; i++) {
66 switch (viz_type[i]) {
67 case '\n': printf("↵"); break;
68 case '\t': printf("⇥"); break;
69 default: printf("%c", viz_type[i]); break;
73 if (viz_type >= text && viz_type <= &text[textlen]) printf("\033[0;2m\"");
74 else if (viz->pat->type == BP_STRING && (viz->end - viz->start) == (long)viz_typelen) printf("\"");
78 match_node_t *children = NULL;
79 match_node_t **nextchild = &children;
81 #define RIGHT_TYPE(m) \
82 (m->m->pat->end == m->m->pat->start + viz_typelen && strncmp(m->m->pat->start, viz_type, viz_typelen) == 0)
83 // Print nonzero-width first:
84 for (match_node_t *m = firstmatch; m; m = m->next) {
86 // Instead of printing each subchain on its own line, flatten them all out at once:
87 if (m->m->pat->type == BP_CHAIN) {
88 bp_match_t *tmp = m->m;
89 while (tmp->pat->type == BP_CHAIN) {
90 *nextchild = new (match_node_t);
91 (*nextchild)->m = tmp->children[0];
92 nextchild = &((*nextchild)->next);
93 tmp = tmp->children[1];
95 *nextchild = new (match_node_t);
96 (*nextchild)->m = tmp;
97 nextchild = &((*nextchild)->next);
99 for (int i = 0; m->m->children && m->m->children[i]; i++) {
100 *nextchild = new (match_node_t);
101 (*nextchild)->m = m->m->children[i];
102 nextchild = &((*nextchild)->next);
105 if (m->m->end == m->m->start) continue;
106 printf("\033[%zdG\033[0;2m%s\033[0;7;%sm", 1 + 2 * (m->m->start - text), V, color);
107 for (const char *c = m->m->start; c < m->m->end; ++c) {
109 if (c > m->m->start) printf(" ");
111 // while ((*c & 0xC0) != 0x80) printf("%c", *(c++));
112 if (*c == '\n') printf("↵");
113 else if (*c == '\t') printf("⇥");
114 else printf("%c", *c);
116 printf("\033[0;2m%s\033[m", V);
118 *nextchild = new (match_node_t);
119 (*nextchild)->m = m->m;
120 nextchild = &((*nextchild)->next);
121 printf("\033[%zdG\033[0;2m%s", 1 + 2 * (m->m->start - text), V);
122 for (ssize_t i = (ssize_t)(2 * (m->m->end - m->m->start) - 1); i > 0; i--)
124 if (m->m->end > m->m->start) printf("\033[0;2m%s", V);
129 // Print stars for zero-width:
130 for (match_node_t *m = firstmatch; m; m = m->next) {
131 if (m->m->end > m->m->start) continue;
133 printf("\033[%zdG\033[7;%smâ–’\033[m", 1 + 2 * (m->m->start - text), color);
135 printf("\033[%zdG\033[0;2m%s\033[m", 1 + 2 * (m->m->start - text), V);
141 for (match_node_t *m = firstmatch; m; m = m->next) {
142 if (m->m->end == m->m->start) {
143 if (!RIGHT_TYPE(m)) printf("\033[%zdG\033[0;2m%s", 1 + 2 * (m->m->start - text), V);
145 const char *l = "â””";
146 const char *r = "┘";
147 for (match_node_t *c = children; c; c = c->next) {
148 if (c->m->start == m->m->start || c->m->end == m->m->start) l = V;
149 if (c->m->start == m->m->end || c->m->end == m->m->end) r = V;
151 printf("\033[%zdG\033[0;2m%s", 1 + 2 * (m->m->start - text), l);
152 const char *h = RIGHT_TYPE(m) ? H : " ";
153 for (ssize_t n = (ssize_t)(2 * (m->m->end - m->m->start) - 1); n > 0; n--)
155 printf("%s\033[m", r);
162 if (children) _explain_matches(children, depth + 1, text, textlen);
164 for (match_node_t *c = children, *next = NULL; c; c = next) {
171 // Print a visualization of a match object.
174 void explain_match(bp_match_t *m) {
175 printf("\033[?7l"); // Disable line wrapping
176 match_node_t first = {.m = m};
177 _explain_matches(&first, 0, m->start, (size_t)(m->end - m->start));
178 printf("\033[?7h"); // Re-enable line wrapping
181 static inline int fputc_safe(FILE *out, char c, print_options_t *opts) {
182 int printed = fputc(c, out);
183 if (c == '\n' && opts && opts->on_nl) {
185 if (opts->replace_color) printed += fprintf(out, "%s", opts->replace_color);
191 int fprint_match(FILE *out, const char *file_start, bp_match_t *m, print_options_t *opts) {
193 if (m->pat->type == BP_REPLACE) {
194 auto rep = When(m->pat, BP_REPLACE);
195 const char *text = rep->text;
196 const char *end = &text[rep->len];
197 if (opts && opts->replace_color) printed += fprintf(out, "%s", opts->replace_color);
199 // TODO: clean up the line numbering code
200 for (const char *r = text; r < end;) {
201 // Capture substitution
202 if (*r == '@' && r + 1 < end && r[1] != '@') {
203 const char *next = r + 1;
204 // Retrieve the capture value:
205 bp_match_t *cap = NULL;
206 if (isdigit(*next)) {
207 int n = (int)strtol(next, (char **)&next, 10);
208 cap = get_numbered_capture(m->children[0], n);
210 const char *name = next, *name_end = after_name(next, end);
212 cap = get_named_capture(m->children[0], name, (size_t)(name_end - name));
214 if (next < m->end && *next == ';') ++next;
219 printed += fprint_match(out, file_start, cap, opts);
220 if (opts && opts->replace_color) printed += fprintf(out, "%s", opts->replace_color);
226 // Bugfix: if pattern matches at the start of a line and first printed character is from
227 // the replacement string, we need to be sure that the line number gets printed.
228 // Regression test: `seq 3 | bp '{..$}' -r '#@0'` (should not print "#" before line numbers)
230 if (opts && opts->fprint_between) {
231 printed += opts->fprint_between(out, m->start, m->start, opts->match_color);
232 if (opts->replace_color) printed += fprintf(out, "%s", opts->replace_color);
238 if (*r == 'N') { // \N (nodent)
240 // Mildly hacky: nodents here are based on the *first line*
241 // of the match. If the match spans multiple lines, or if
242 // the replacement text contains newlines, this may get weird.
243 const char *line_start = m->start;
244 while (line_start > file_start && line_start[-1] != '\n')
246 printed += fputc_safe(out, '\n', opts);
247 for (const char *p = line_start; p < m->start && (*p == ' ' || *p == '\t'); ++p)
248 printed += fputc(*p, out);
251 printed += fputc_safe(out, unescapechar(r, &r, end), opts);
253 printed += fputc_safe(out, *r, opts);
258 if (opts && opts->match_color) printed += fprintf(out, "%s", opts->match_color);
259 const char *prev = m->start;
260 for (int i = 0; m->children && m->children[i]; i++) {
261 bp_match_t *child = m->children[i];
262 // Skip children from e.g. zero-width matches like >@foo
263 if (!(prev <= child->start && child->start <= m->end && prev <= child->end && child->end <= m->end))
265 if (child->start > prev) {
266 if (opts && opts->fprint_between)
267 printed += opts->fprint_between(out, prev, child->start, opts->match_color);
268 else printed += fwrite(prev, sizeof(char), (size_t)(child->start - prev), out);
270 printed += fprint_match(out, file_start, child, opts);
271 if (opts && opts->match_color) printed += fprintf(out, "%s", opts->match_color);
275 if (opts && opts->fprint_between) printed += opts->fprint_between(out, prev, m->end, opts->match_color);
276 else printed += fwrite(prev, sizeof(char), (size_t)(m->end - prev), out);
282 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0