code / bp

Lines4.3K C3.3K Markdown541 YAML273 make110 Shell77 Lua54
(282 lines)
1 //
2 // printmatch.c - Debug visualization of pattern matches.
3 //
5 #include <ctype.h>
6 #include <stdio.h>
7 #include <string.h>
9 #include "match.h"
10 #include "printmatch.h"
11 #include "utils.h"
13 typedef struct match_node_s {
14 bp_match_t *m;
15 struct match_node_s *next;
16 } match_node_t;
18 __attribute__((nonnull, pure)) static int height_of_match(bp_match_t *m);
19 __attribute__((nonnull)) static void _explain_matches(match_node_t *firstmatch, int depth, const char *text,
20 size_t textlen);
22 //
23 // Return the height of a match object (i.e. the number of descendents of the
24 // structure).
25 //
26 static int height_of_match(bp_match_t *m) {
27 int height = 0;
28 for (int i = 0; m->children && m->children[i]; i++) {
29 bp_match_t *child = m->children[i];
30 int childheight = height_of_match(child);
31 if (childheight > height) height = childheight;
33 return 1 + height;
36 //
37 // Print a visual explanation for the as-yet-unprinted matches provided.
38 //
39 static void _explain_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen) {
40 const char *V = "│"; // Vertical bar
41 const char *H = "─"; // Horizontal bar
42 const char *color = (depth % 2 == 0) ? "34" : "33";
44 bp_match_t *viz = firstmatch->m;
45 // This is a heuristic: print matches first if they have more submatches.
46 // In general, this helps reduce the height of the final output by allowing
47 // for more rows that show the same rule matching in multiple places.
48 // TODO: there may be a better heuristic that optimizes for this factor
49 // while also printing earlier matches first when it doesn't affect overall
50 // output height.
51 for (match_node_t *p = firstmatch; p; p = p->next)
52 if (height_of_match(p->m) > height_of_match(viz)) viz = p->m;
53 const char *viz_type = viz->pat->start;
54 size_t viz_typelen = (size_t)(viz->pat->end - viz->pat->start);
56 // Backrefs use added dim quote marks to indicate that the pattern is a
57 // literal string being matched. (Backrefs have start/end inside the text
58 // input, instead of something the user typed in)
59 if (viz_type >= text && viz_type <= &text[textlen])
60 printf("\033[%zuG\033[0;2m\"\033[%s;1m", 2 * textlen + 3, color);
61 else if (viz->pat->type == BP_STRING && (viz->end - viz->start) == (long)viz_typelen)
62 printf("\033[%zuG\033[%s;1m\"", 2 * textlen + 3, color);
63 else printf("\033[%zuG\033[%s;1m", 2 * textlen + 3, color);
65 for (size_t i = 0; i < viz_typelen; i++) {
66 switch (viz_type[i]) {
67 case '\n': printf("↵"); break;
68 case '\t': printf("⇥"); break;
69 default: printf("%c", viz_type[i]); break;
73 if (viz_type >= text && viz_type <= &text[textlen]) printf("\033[0;2m\"");
74 else if (viz->pat->type == BP_STRING && (viz->end - viz->start) == (long)viz_typelen) printf("\"");
76 printf("\033[m");
78 match_node_t *children = NULL;
79 match_node_t **nextchild = &children;
81 #define RIGHT_TYPE(m) \
82 (m->m->pat->end == m->m->pat->start + viz_typelen && strncmp(m->m->pat->start, viz_type, viz_typelen) == 0)
83 // Print nonzero-width first:
84 for (match_node_t *m = firstmatch; m; m = m->next) {
85 if (RIGHT_TYPE(m)) {
86 // Instead of printing each subchain on its own line, flatten them all out at once:
87 if (m->m->pat->type == BP_CHAIN) {
88 bp_match_t *tmp = m->m;
89 while (tmp->pat->type == BP_CHAIN) {
90 *nextchild = new (match_node_t);
91 (*nextchild)->m = tmp->children[0];
92 nextchild = &((*nextchild)->next);
93 tmp = tmp->children[1];
95 *nextchild = new (match_node_t);
96 (*nextchild)->m = tmp;
97 nextchild = &((*nextchild)->next);
98 } else {
99 for (int i = 0; m->m->children && m->m->children[i]; i++) {
100 *nextchild = new (match_node_t);
101 (*nextchild)->m = m->m->children[i];
102 nextchild = &((*nextchild)->next);
105 if (m->m->end == m->m->start) continue;
106 printf("\033[%zdG\033[0;2m%s\033[0;7;%sm", 1 + 2 * (m->m->start - text), V, color);
107 for (const char *c = m->m->start; c < m->m->end; ++c) {
108 // TODO: newline
109 if (c > m->m->start) printf(" ");
110 // TODO: utf8
111 // while ((*c & 0xC0) != 0x80) printf("%c", *(c++));
112 if (*c == '\n') printf("↵");
113 else if (*c == '\t') printf("⇥");
114 else printf("%c", *c);
116 printf("\033[0;2m%s\033[m", V);
117 } else {
118 *nextchild = new (match_node_t);
119 (*nextchild)->m = m->m;
120 nextchild = &((*nextchild)->next);
121 printf("\033[%zdG\033[0;2m%s", 1 + 2 * (m->m->start - text), V);
122 for (ssize_t i = (ssize_t)(2 * (m->m->end - m->m->start) - 1); i > 0; i--)
123 printf(" ");
124 if (m->m->end > m->m->start) printf("\033[0;2m%s", V);
125 printf("\033[m");
129 // Print stars for zero-width:
130 for (match_node_t *m = firstmatch; m; m = m->next) {
131 if (m->m->end > m->m->start) continue;
132 if (RIGHT_TYPE(m)) {
133 printf("\033[%zdG\033[7;%smâ–’\033[m", 1 + 2 * (m->m->start - text), color);
134 } else {
135 printf("\033[%zdG\033[0;2m%s\033[m", 1 + 2 * (m->m->start - text), V);
139 printf("\n");
141 for (match_node_t *m = firstmatch; m; m = m->next) {
142 if (m->m->end == m->m->start) {
143 if (!RIGHT_TYPE(m)) printf("\033[%zdG\033[0;2m%s", 1 + 2 * (m->m->start - text), V);
144 } else {
145 const char *l = "â””";
146 const char *r = "┘";
147 for (match_node_t *c = children; c; c = c->next) {
148 if (c->m->start == m->m->start || c->m->end == m->m->start) l = V;
149 if (c->m->start == m->m->end || c->m->end == m->m->end) r = V;
151 printf("\033[%zdG\033[0;2m%s", 1 + 2 * (m->m->start - text), l);
152 const char *h = RIGHT_TYPE(m) ? H : " ";
153 for (ssize_t n = (ssize_t)(2 * (m->m->end - m->m->start) - 1); n > 0; n--)
154 printf("%s", h);
155 printf("%s\033[m", r);
158 #undef RIGHT_TYPE
160 printf("\n");
162 if (children) _explain_matches(children, depth + 1, text, textlen);
164 for (match_node_t *c = children, *next = NULL; c; c = next) {
165 next = c->next;
166 delete (&c);
171 // Print a visualization of a match object.
173 public
174 void explain_match(bp_match_t *m) {
175 printf("\033[?7l"); // Disable line wrapping
176 match_node_t first = {.m = m};
177 _explain_matches(&first, 0, m->start, (size_t)(m->end - m->start));
178 printf("\033[?7h"); // Re-enable line wrapping
181 static inline int fputc_safe(FILE *out, char c, print_options_t *opts) {
182 int printed = fputc(c, out);
183 if (c == '\n' && opts && opts->on_nl) {
184 opts->on_nl(out);
185 if (opts->replace_color) printed += fprintf(out, "%s", opts->replace_color);
187 return printed;
190 public
191 int fprint_match(FILE *out, const char *file_start, bp_match_t *m, print_options_t *opts) {
192 int printed = 0;
193 if (m->pat->type == BP_REPLACE) {
194 auto rep = When(m->pat, BP_REPLACE);
195 const char *text = rep->text;
196 const char *end = &text[rep->len];
197 if (opts && opts->replace_color) printed += fprintf(out, "%s", opts->replace_color);
199 // TODO: clean up the line numbering code
200 for (const char *r = text; r < end;) {
201 // Capture substitution
202 if (*r == '@' && r + 1 < end && r[1] != '@') {
203 const char *next = r + 1;
204 // Retrieve the capture value:
205 bp_match_t *cap = NULL;
206 if (isdigit(*next)) {
207 int n = (int)strtol(next, (char **)&next, 10);
208 cap = get_numbered_capture(m->children[0], n);
209 } else {
210 const char *name = next, *name_end = after_name(next, end);
211 if (name_end) {
212 cap = get_named_capture(m->children[0], name, (size_t)(name_end - name));
213 next = name_end;
214 if (next < m->end && *next == ';') ++next;
218 if (cap != NULL) {
219 printed += fprint_match(out, file_start, cap, opts);
220 if (opts && opts->replace_color) printed += fprintf(out, "%s", opts->replace_color);
221 r = next;
222 continue;
226 // Bugfix: if pattern matches at the start of a line and first printed character is from
227 // the replacement string, we need to be sure that the line number gets printed.
228 // Regression test: `seq 3 | bp '{..$}' -r '#@0'` (should not print "#" before line numbers)
229 if (r == text) {
230 if (opts && opts->fprint_between) {
231 printed += opts->fprint_between(out, m->start, m->start, opts->match_color);
232 if (opts->replace_color) printed += fprintf(out, "%s", opts->replace_color);
236 if (*r == '\\') {
237 ++r;
238 if (*r == 'N') { // \N (nodent)
239 ++r;
240 // Mildly hacky: nodents here are based on the *first line*
241 // of the match. If the match spans multiple lines, or if
242 // the replacement text contains newlines, this may get weird.
243 const char *line_start = m->start;
244 while (line_start > file_start && line_start[-1] != '\n')
245 --line_start;
246 printed += fputc_safe(out, '\n', opts);
247 for (const char *p = line_start; p < m->start && (*p == ' ' || *p == '\t'); ++p)
248 printed += fputc(*p, out);
249 continue;
251 printed += fputc_safe(out, unescapechar(r, &r, end), opts);
252 } else {
253 printed += fputc_safe(out, *r, opts);
254 ++r;
257 } else {
258 if (opts && opts->match_color) printed += fprintf(out, "%s", opts->match_color);
259 const char *prev = m->start;
260 for (int i = 0; m->children && m->children[i]; i++) {
261 bp_match_t *child = m->children[i];
262 // Skip children from e.g. zero-width matches like >@foo
263 if (!(prev <= child->start && child->start <= m->end && prev <= child->end && child->end <= m->end))
264 continue;
265 if (child->start > prev) {
266 if (opts && opts->fprint_between)
267 printed += opts->fprint_between(out, prev, child->start, opts->match_color);
268 else printed += fwrite(prev, sizeof(char), (size_t)(child->start - prev), out);
270 printed += fprint_match(out, file_start, child, opts);
271 if (opts && opts->match_color) printed += fprintf(out, "%s", opts->match_color);
272 prev = child->end;
274 if (m->end > prev) {
275 if (opts && opts->fprint_between) printed += opts->fprint_between(out, prev, m->end, opts->match_color);
276 else printed += fwrite(prev, sizeof(char), (size_t)(m->end - prev), out);
279 return printed;
282 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0