aboutsummaryrefslogtreecommitdiff

// // printmatch.c - Debug visualization of pattern matches. //

#include #include #include

#include “match.h” #include “printmatch.h” #include “utils.h”

typedef struct matchnodes { bpmatcht *m; struct matchnodes *next; } matchnodet;

attribute((nonnull, pure)) static int heightofmatch(bpmatcht *m); attribute((nonnull)) static void explainmatches(matchnodet *firstmatch, int depth, const char *text, size_t textlen);

// // Return the height of a match object (i.e. the number of descendents of the // structure). // static int heightofmatch(bpmatcht *m) { int height = 0; for (int i = 0; m->children && m->children[i]; i++) { bpmatcht *child = m->children[i]; int childheight = heightofmatch(child); if (childheight > height) height = childheight; } return 1 + height; }

// // Print a visual explanation for the as-yet-unprinted matches provided. // static void explainmatches(matchnodet *firstmatch, int depth, const char *text, size_t textlen) { const char *V = “│”; // Vertical bar const char *H = “─”; // Horizontal bar const char *color = (depth % 2 == 0) ? “34” : “33”;

bp_match_t *viz = firstmatch->m;
// This is a heuristic: print matches first if they have more submatches.
// In general, this helps reduce the height of the final output by allowing
// for more rows that show the same rule matching in multiple places.
// TODO: there may be a better heuristic that optimizes for this factor
// while also printing earlier matches first when it doesn't affect overall
// output height.
for (match_node_t *p = firstmatch; p; p = p->next)
    if (height_of_match(p->m) > height_of_match(viz)) viz = p->m;
const char *viz_type = viz->pat->start;
size_t viz_typelen = (size_t)(viz->pat->end - viz->pat->start);

// Backrefs use added dim quote marks to indicate that the pattern is a
// literal string being matched. (Backrefs have start/end inside the text
// input, instead of something the user typed in)
if (viz_type >= text && viz_type <= &text[textlen])
    printf("\033[%zuG\033[0;2m\"\033[%s;1m", 2 * textlen + 3, color);
else if (viz->pat->type == BP_STRING && (viz->end - viz->start) == (long)viz_typelen)
    printf("\033[%zuG\033[%s;1m\"", 2 * textlen + 3, color);
else printf("\033[%zuG\033[%s;1m", 2 * textlen + 3, color);

for (size_t i = 0; i < viz_typelen; i++) {
    switch (viz_type[i]) {
    case '\n': printf("↵"); break;
    case '\t': printf("⇥"); break;
    default: printf("%c", viz_type[i]); break;
    }
}

if (viz_type >= text && viz_type <= &text[textlen]) printf("\033[0;2m\"");
else if (viz->pat->type == BP_STRING && (viz->end - viz->start) == (long)viz_typelen) printf("\"");

printf("\033[m");

match_node_t *children = NULL;
match_node_t **nextchild = &children;

#define RIGHTTYPE(m)
(m->m->pat->end == m->m->pat->start + viz
typelen && strncmp(m->m->pat->start, viztype, viztypelen) == 0) // Print nonzero-width first: for (matchnodet *m = firstmatch; m; m = m->next) { if (RIGHTTYPE(m)) { // Instead of printing each subchain on its own line, flatten them all out at once: if (m->m->pat->type == BPCHAIN) { bpmatcht *tmp = m->m; while (tmp->pat->type == BPCHAIN) { *nextchild = new (matchnodet); (nextchild)->m = tmp->children[0]; nextchild = &((nextchild)->next); tmp = tmp->children[1]; } *nextchild = new (matchnodet); (nextchild)->m = tmp; nextchild = &((nextchild)->next); } else { for (int i = 0; m->m->children && m->m->children[i]; i++) { *nextchild = new (matchnodet); (nextchild)->m = m->m->children[i]; nextchild = &((nextchild)->next); } } if (m->m->end == m->m->start) continue; printf(“\033[%zdG\033[0;2m%s\033[0;7;%sm”, 1 + 2 * (m->m->start - text), V, color); for (const char c = m->m->start; c < m->m->end; ++c) { // TODO: newline if (c > m->m->start) printf(“ “); // TODO: utf8 // while ((c & 0xC0) != 0x80) printf(“%c”, (c++)); if (c == ‘\n’) printf(“↵”); else if (*c == ‘\t’) printf(“⇥”); else printf(“%c”, *c); } printf(“\033[0;2m%s\033[m”, V); } else { *nextchild = new (matchnodet); (nextchild)->m = m->m; nextchild = &((nextchild)->next); printf(“\033[%zdG\033[0;2m%s”, 1 + 2 * (m->m->start - text), V); for (ssizet i = (ssize_t)(2 * (m->m->end - m->m->start) - 1); i > 0; i–) printf(“ “); if (m->m->end > m->m->start) printf(“\033[0;2m%s”, V); printf(“\033[m”); } }

// Print stars for zero-width:
for (match_node_t *m = firstmatch; m; m = m->next) {
    if (m->m->end > m->m->start) continue;
    if (RIGHT_TYPE(m)) {
        printf("\033[%zdG\033[7;%sm▒\033[m", 1 + 2 * (m->m->start - text), color);
    } else {
        printf("\033[%zdG\033[0;2m%s\033[m", 1 + 2 * (m->m->start - text), V);
    }
}

printf("\n");

for (match_node_t *m = firstmatch; m; m = m->next) {
    if (m->m->end == m->m->start) {
        if (!RIGHT_TYPE(m)) printf("\033[%zdG\033[0;2m%s", 1 + 2 * (m->m->start - text), V);
    } else {
        const char *l = "└";
        const char *r = "┘";
        for (match_node_t *c = children; c; c = c->next) {
            if (c->m->start == m->m->start || c->m->end == m->m->start) l = V;
            if (c->m->start == m->m->end || c->m->end == m->m->end) r = V;
        }
        printf("\033[%zdG\033[0;2m%s", 1 + 2 * (m->m->start - text), l);
        const char *h = RIGHT_TYPE(m) ? H : " ";
        for (ssize_t n = (ssize_t)(2 * (m->m->end - m->m->start) - 1); n > 0; n--)
            printf("%s", h);
        printf("%s\033[m", r);
    }
}

#undef RIGHT_TYPE

printf("\n");

if (children) _explain_matches(children, depth + 1, text, textlen);

for (match_node_t *c = children, *next = NULL; c; c = next) {
    next = c->next;
    delete (&c);
}

}

// // Print a visualization of a match object. // public void explainmatch(bpmatcht *m) { printf(“\033[?7l”); // Disable line wrapping matchnode_t first = {.m = m}; explainmatches(&first, 0, m->start, (size_t)(m->end - m->start)); printf(“\033[?7h”); // Re-enable line wrapping }

static inline int fputcsafe(FILE *out, char c, printoptionst *opts) { int printed = fputc(c, out); if (c == ‘\n’ && opts && opts->onnl) { opts->onnl(out); if (opts->replacecolor) printed += fprintf(out, “%s”, opts->replace_color); } return printed; }

public int fprintmatch(FILE *out, const char *filestart, bpmatcht *m, printoptionst *opts) { int printed = 0; if (m->pat->type == BPREPLACE) { auto rep = When(m->pat, BPREPLACE); const char *text = rep->text; const char *end = &text[rep->len]; if (opts && opts->replacecolor) printed += fprintf(out, “%s”, opts->replacecolor);

    // TODO: clean up the line numbering code
    for (const char *r = text; r < end;) {
        // Capture substitution
        if (*r == '@' && r + 1 < end && r[1] != '@') {
            const char *next = r + 1;
            // Retrieve the capture value:
            bp_match_t *cap = NULL;
            if (isdigit(*next)) {
                int n = (int)strtol(next, (char **)&next, 10);
                cap = get_numbered_capture(m->children[0], n);
            } else {
                const char *name = next, *name_end = after_name(next, end);
                if (name_end) {
                    cap = get_named_capture(m->children[0], name, (size_t)(name_end - name));
                    next = name_end;
                    if (next < m->end && *next == ';') ++next;
                }
            }

            if (cap != NULL) {
                printed += fprint_match(out, file_start, cap, opts);
                if (opts && opts->replace_color) printed += fprintf(out, "%s", opts->replace_color);
                r = next;
                continue;
            }
        }

        // Bugfix: if pattern matches at the start of a line and first printed character is from
        // the replacement string, we need to be sure that the line number gets printed.
        // Regression test: `seq 3 | bp '{..$}' -r '#@0'` (should not print "#" before line numbers)
        if (r == text) {
            if (opts && opts->fprint_between) {
                printed += opts->fprint_between(out, m->start, m->start, opts->match_color);
                if (opts->replace_color) printed += fprintf(out, "%s", opts->replace_color);
            }
        }

        if (*r == '\\') {
            ++r;
            if (*r == 'N') { // \N (nodent)
                ++r;
                // Mildly hacky: nodents here are based on the *first line*
                // of the match. If the match spans multiple lines, or if
                // the replacement text contains newlines, this may get weird.
                const char *line_start = m->start;
                while (line_start > file_start && line_start[-1] != '\n')
                    --line_start;
                printed += fputc_safe(out, '\n', opts);
                for (const char *p = line_start; p < m->start && (*p == ' ' || *p == '\t'); ++p)
                    printed += fputc(*p, out);
                continue;
            }
            printed += fputc_safe(out, unescapechar(r, &r, end), opts);
        } else {
            printed += fputc_safe(out, *r, opts);
            ++r;
        }
    }
} else {
    if (opts && opts->match_color) printed += fprintf(out, "%s", opts->match_color);
    const char *prev = m->start;
    for (int i = 0; m->children && m->children[i]; i++) {
        bp_match_t *child = m->children[i];
        // Skip children from e.g. zero-width matches like >@foo
        if (!(prev <= child->start && child->start <= m->end && prev <= child->end && child->end <= m->end))
            continue;
        if (child->start > prev) {
            if (opts && opts->fprint_between)
                printed += opts->fprint_between(out, prev, child->start, opts->match_color);
            else printed += fwrite(prev, sizeof(char), (size_t)(child->start - prev), out);
        }
        printed += fprint_match(out, file_start, child, opts);
        if (opts && opts->match_color) printed += fprintf(out, "%s", opts->match_color);
        prev = child->end;
    }
    if (m->end > prev) {
        if (opts && opts->fprint_between) printed += opts->fprint_between(out, prev, m->end, opts->match_color);
        else printed += fwrite(prev, sizeof(char), (size_t)(m->end - prev), out);
    }
}
return printed;

}

// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,:0