WIP
This commit is contained in:
parent
c81601729e
commit
04eb92153f
@ -39,7 +39,7 @@ clean:
|
||||
lbp.o: lbp.c
|
||||
$(CC) -c $(ALL_FLAGS) -o $@ $<
|
||||
|
||||
bp.so: lbp.o ../print.o ../files.o ../pattern.o ../utils.o ../utf8.o ../match.o ../definitions.o
|
||||
bp.so: lbp.o ../pattern.o ../utils.o ../utf8.o ../match.o ../definitions.o
|
||||
$(MAKESO) -o $@ $^
|
||||
|
||||
test: bp.so
|
||||
|
60
Lua/lbp.c
60
Lua/lbp.c
@ -11,8 +11,6 @@
|
||||
#include "lua.h"
|
||||
#include "lauxlib.h"
|
||||
|
||||
#include "../print.h"
|
||||
#include "../files.h"
|
||||
#include "../pattern.h"
|
||||
#include "../match.h"
|
||||
|
||||
@ -33,19 +31,12 @@ static inline void push_parse_error(lua_State *L, maybe_pat_t m)
|
||||
free(buf);
|
||||
}
|
||||
|
||||
static void push_matchstring(lua_State *L, file_t *f, match_t *m)
|
||||
static void push_matchstring(lua_State *L, match_t *m)
|
||||
{
|
||||
char *buf = NULL;
|
||||
size_t size = 0;
|
||||
FILE *out = open_memstream(&buf, &size);
|
||||
printer_t pr = {
|
||||
.file = f,
|
||||
.context_before = NO_CONTEXT,
|
||||
.context_after = NO_CONTEXT,
|
||||
.use_color = 0,
|
||||
.lineformat = "",
|
||||
};
|
||||
print_match(out, &pr, m);
|
||||
fprint_match(out, m->start, m);
|
||||
fflush(out);
|
||||
lua_pushlstring(L, buf, size);
|
||||
fclose(out);
|
||||
@ -54,7 +45,7 @@ static void push_matchstring(lua_State *L, file_t *f, match_t *m)
|
||||
static int Ltostring(lua_State *L)
|
||||
{
|
||||
match_t **m = (match_t**)lua_touserdata(L, 1);
|
||||
push_matchstring(L, NULL, *m);
|
||||
push_matchstring(L, *m);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -80,7 +71,7 @@ static int Lindex(lua_State *L)
|
||||
if (type == LUA_TNUMBER) {
|
||||
int n = luaL_checkinteger(L, 2);
|
||||
if (n == 0) {
|
||||
push_matchstring(L, NULL, *m);
|
||||
push_matchstring(L, *m);
|
||||
return 1;
|
||||
} else if (n > 0) {
|
||||
ret = get_numbered_capture(*m, n);
|
||||
@ -133,18 +124,15 @@ static int Lmatch(lua_State *L)
|
||||
if (index > (lua_Integer)strlen(text)+1)
|
||||
return 0;
|
||||
|
||||
file_t *pat_file = spoof_file(NULL, "<pattern argument>", pat_text, patlen);
|
||||
maybe_pat_t maybe_pat = bp_pattern(pat_file->start, pat_file->end);
|
||||
maybe_pat_t maybe_pat = bp_pattern(pat_text, pat_text + patlen);
|
||||
if (!maybe_pat.success) {
|
||||
push_parse_error(L, maybe_pat);
|
||||
destroy_file(&pat_file);
|
||||
return 0;
|
||||
}
|
||||
|
||||
file_t *text_file = spoof_file(NULL, "<text argument>", text+(index-1), textlen);
|
||||
match_t *m = NULL;
|
||||
int ret = 0;
|
||||
if (next_match(&m, NULL, text_file, maybe_pat.value.pat, NULL, false)) {
|
||||
if (next_match(&m, NULL, text, &text[textlen], maybe_pat.value.pat, NULL, false)) {
|
||||
|
||||
// lua_createtable(L, 0, 1);
|
||||
|
||||
@ -158,17 +146,13 @@ static int Lmatch(lua_State *L)
|
||||
// int n = 1;
|
||||
// assign_captures(L, text_file, m, &n);
|
||||
|
||||
lua_pushinteger(L, (int)(m->start - text_file->start) + index);
|
||||
lua_pushinteger(L, (int)(m->start - text) + index);
|
||||
lua_pushinteger(L, (int)(m->end - m->start));
|
||||
|
||||
// stop_matching(&m);
|
||||
ret = 3;
|
||||
}
|
||||
|
||||
// destroy_file(&pat_file);
|
||||
// destroy_file(&text_file);
|
||||
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -182,48 +166,34 @@ static int Lreplace(lua_State *L)
|
||||
if (index > (lua_Integer)strlen(text)+1)
|
||||
index = (lua_Integer)strlen(text)+1;
|
||||
|
||||
file_t *pat_file = spoof_file(NULL, "<pattern argument>", pat_text, patlen);
|
||||
maybe_pat_t maybe_pat = bp_pattern(pat_file->start, pat_file->end);
|
||||
maybe_pat_t maybe_pat = bp_pattern(pat_text, pat_text + patlen);
|
||||
if (!maybe_pat.success) {
|
||||
push_parse_error(L, maybe_pat);
|
||||
destroy_file(&pat_file);
|
||||
return 0;
|
||||
}
|
||||
file_t *rep_file = spoof_file(NULL, "<replacement argument>", rep_text, replen);
|
||||
maybe_pat = bp_replacement(maybe_pat.value.pat, rep_file->start, rep_file->end);
|
||||
maybe_pat = bp_replacement(maybe_pat.value.pat, rep_text, rep_text + replen);
|
||||
if (!maybe_pat.success) {
|
||||
push_parse_error(L, maybe_pat);
|
||||
destroy_file(&pat_file);
|
||||
destroy_file(&rep_file);
|
||||
return 0;
|
||||
}
|
||||
|
||||
file_t *text_file = spoof_file(NULL, "<text argument>", text+(index-1), textlen);
|
||||
char *buf = NULL;
|
||||
size_t size = 0;
|
||||
FILE *out = open_memstream(&buf, &size);
|
||||
printer_t pr = {
|
||||
.file = text_file,
|
||||
.context_before = ALL_CONTEXT,
|
||||
.context_after = ALL_CONTEXT,
|
||||
.use_color = 0,
|
||||
.lineformat = "",
|
||||
};
|
||||
int replacements = 0;
|
||||
for (match_t *m = NULL; next_match(&m, NULL, text_file, maybe_pat.value.pat, NULL, false); ) {
|
||||
print_match(out, &pr, m);
|
||||
const char *prev = text;
|
||||
for (match_t *m = NULL; next_match(&m, NULL, text, &text[textlen], maybe_pat.value.pat, NULL, false); ) {
|
||||
fwrite(prev, sizeof(char), (size_t)(m->start - prev), out);
|
||||
fprint_match(out, text, m);
|
||||
prev = m->end;
|
||||
++replacements;
|
||||
}
|
||||
print_match(out, &pr, NULL);
|
||||
fwrite(prev, sizeof(char), (size_t)(&text[textlen] - prev), out);
|
||||
fflush(out);
|
||||
lua_pushlstring(L, buf, size);
|
||||
lua_pushinteger(L, replacements);
|
||||
fclose(out);
|
||||
|
||||
// destroy_file(&pat_file);
|
||||
// destroy_file(&rep_file);
|
||||
// destroy_file(&text_file);
|
||||
|
||||
return 2;
|
||||
}
|
||||
|
||||
|
77
bp.c
77
bp.c
@ -180,7 +180,7 @@ static int is_text_file(const char *filename)
|
||||
static int print_matches_as_json(def_t *defs, file_t *f, pat_t *pattern)
|
||||
{
|
||||
int nmatches = 0;
|
||||
for (match_t *m = NULL; next_match(&m, defs, f, pattern, options.skip, options.ignorecase); ) {
|
||||
for (match_t *m = NULL; next_match(&m, defs, f->start, f->end, pattern, options.skip, options.ignorecase); ) {
|
||||
if (++nmatches > 1)
|
||||
printf(",\n");
|
||||
printf("{\"filename\":\"%s\",\"match\":", f->filename);
|
||||
@ -196,7 +196,7 @@ static int print_matches_as_json(def_t *defs, file_t *f, pat_t *pattern)
|
||||
static int explain_matches(def_t *defs, file_t *f, pat_t *pattern)
|
||||
{
|
||||
int nmatches = 0;
|
||||
for (match_t *m = NULL; next_match(&m, defs, f, pattern, options.skip, options.ignorecase); ) {
|
||||
for (match_t *m = NULL; next_match(&m, defs, f->start, f->end, pattern, options.skip, options.ignorecase); ) {
|
||||
if (++nmatches == 1) {
|
||||
if (options.print_filenames)
|
||||
fprint_filename(stdout, f->filename);
|
||||
@ -234,6 +234,36 @@ static void sig_handler(int sig)
|
||||
if (kill(0, sig)) _exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
void fprint_between(FILE *out, file_t *f, const char *prev, const char *next)
|
||||
{
|
||||
// if (options.context_before == NO_CONTEXT && options.context_after == NO_CONTEXT)
|
||||
// return;
|
||||
if (options.context_before == ALL_CONTEXT || options.context_after == ALL_CONTEXT) {
|
||||
fwrite(prev ? prev : f->start, sizeof(char), (size_t)((next ? next : f->end) - (prev ? prev : f->start)), out);
|
||||
return;
|
||||
}
|
||||
const char *after_prev = prev, *before_next = next;
|
||||
if (prev && options.context_after >= 0) {
|
||||
size_t after_prev_line = get_line_number(f, prev) + (size_t)options.context_after + 1;
|
||||
after_prev = get_line(f, after_prev_line > f->nlines ? f->nlines : after_prev_line);
|
||||
}
|
||||
if (next && options.context_before >= 0) {
|
||||
size_t before_next_line = get_line_number(f, next);
|
||||
before_next_line = options.context_before >= (int)before_next_line ? 1 : before_next_line - (size_t)options.context_before;
|
||||
before_next = get_line(f, before_next_line);
|
||||
}
|
||||
if (!prev) {
|
||||
fwrite(before_next, sizeof(char), (size_t)(next - before_next), out);
|
||||
} else if (!next) {
|
||||
fwrite(prev, sizeof(char), (size_t)(after_prev - prev), out);
|
||||
} else if (after_prev >= before_next) {
|
||||
fwrite(prev, sizeof(char), (size_t)(next - prev), out);
|
||||
} else {
|
||||
fwrite(prev, sizeof(char), (size_t)(after_prev - prev), out);
|
||||
fwrite(before_next, sizeof(char), (size_t)(next - before_next), out);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Print all the matches in a file.
|
||||
//
|
||||
@ -241,29 +271,18 @@ static int print_matches(FILE *out, def_t *defs, file_t *f, pat_t *pattern)
|
||||
{
|
||||
static int printed_filenames = 0;
|
||||
int matches = 0;
|
||||
printer_t pr = {
|
||||
.file = f,
|
||||
.context_before = options.context_before,
|
||||
.context_after = options.context_after,
|
||||
.use_color = options.format == FORMAT_FANCY,
|
||||
.lineformat = LINE_FORMATS[options.format],
|
||||
};
|
||||
|
||||
for (match_t *m = NULL; next_match(&m, defs, f, pattern, options.skip, options.ignorecase); ) {
|
||||
const char *prev = NULL;
|
||||
for (match_t *m = NULL; next_match(&m, defs, f->start, f->end, pattern, options.skip, options.ignorecase); ) {
|
||||
if (++matches == 1 && options.print_filenames) {
|
||||
if (printed_filenames++ > 0) printf("\n");
|
||||
fprint_filename(out, f->filename);
|
||||
}
|
||||
print_match(out, &pr, m);
|
||||
fprint_between(out, f, prev, m->start);
|
||||
fprint_match(out, f->start, m);
|
||||
prev = m->end;
|
||||
}
|
||||
|
||||
if (matches > 0 || (f->filename[0] == '\0' && options.context_before == ALL_CONTEXT)) {
|
||||
// Print trailing context lines:
|
||||
print_match(out, &pr, NULL);
|
||||
// Guarantee trailing newline:
|
||||
if (!pr.needs_line_number) fprintf(out, "\n");
|
||||
}
|
||||
|
||||
if (matches > 0)
|
||||
fprint_between(out, f, prev, NULL);
|
||||
return matches;
|
||||
}
|
||||
|
||||
@ -285,7 +304,7 @@ static int process_file(def_t *defs, const char *filename, pat_t *pattern)
|
||||
matches += explain_matches(defs, f, pattern);
|
||||
} else if (options.mode == MODE_LISTFILES) {
|
||||
match_t *m = NULL;
|
||||
if (next_match(&m, defs, f, pattern, options.skip, options.ignorecase)) {
|
||||
if (next_match(&m, defs, f->start, f->end, pattern, options.skip, options.ignorecase)) {
|
||||
printf("%s\n", f->filename);
|
||||
matches += 1;
|
||||
}
|
||||
@ -294,7 +313,7 @@ static int process_file(def_t *defs, const char *filename, pat_t *pattern)
|
||||
matches += print_matches_as_json(defs, f, pattern);
|
||||
} else if (options.mode == MODE_INPLACE) {
|
||||
match_t *m = NULL;
|
||||
bool found = next_match(&m, defs, f, pattern, options.skip, options.ignorecase);
|
||||
bool found = next_match(&m, defs, f->start, f->end, pattern, options.skip, options.ignorecase);
|
||||
stop_matching(&m);
|
||||
if (!found) return 0;
|
||||
|
||||
@ -393,6 +412,20 @@ static int process_git_files(def_t *defs, pat_t *pattern, int argc, char *argv[]
|
||||
return found;
|
||||
}
|
||||
|
||||
//
|
||||
// Load the given grammar (semicolon-separated definitions)
|
||||
// and return the first rule defined.
|
||||
//
|
||||
static def_t *load_grammar(def_t *defs, file_t *f)
|
||||
{
|
||||
maybe_pat_t maybe_pat = bp_pattern(f->start, f->end);
|
||||
if (!maybe_pat.success)
|
||||
file_err(f, maybe_pat.value.error.start, maybe_pat.value.error.end, maybe_pat.value.error.msg);
|
||||
for (pat_t *p = maybe_pat.value.pat; p && p->type == BP_DEFINITION; p = p->args.def.pat)
|
||||
defs = with_def(defs, p->args.def.namelen, p->args.def.name, p->args.def.def);
|
||||
return defs;
|
||||
}
|
||||
|
||||
//
|
||||
// Convert a context string to an integer
|
||||
//
|
||||
|
@ -27,20 +27,6 @@ def_t *with_def(def_t *defs, size_t namelen, const char *name, pat_t *pat)
|
||||
return def;
|
||||
}
|
||||
|
||||
//
|
||||
// Load the given grammar (semicolon-separated definitions)
|
||||
// and return the first rule defined.
|
||||
//
|
||||
def_t *load_grammar(def_t *defs, file_t *f)
|
||||
{
|
||||
maybe_pat_t maybe_pat = bp_pattern(f->start, f->end);
|
||||
if (!maybe_pat.success)
|
||||
file_err(f, maybe_pat.value.error.start, maybe_pat.value.error.end, maybe_pat.value.error.msg);
|
||||
for (pat_t *p = maybe_pat.value.pat; p && p->type == BP_DEFINITION; p = p->args.def.pat)
|
||||
defs = with_def(defs, p->args.def.namelen, p->args.def.name, p->args.def.def);
|
||||
return defs;
|
||||
}
|
||||
|
||||
//
|
||||
// Look up a backreference or grammar definition by name
|
||||
//
|
||||
|
@ -4,7 +4,6 @@
|
||||
#ifndef DEFINITIONS__H
|
||||
#define DEFINITIONS__H
|
||||
|
||||
#include "files.h"
|
||||
#include "pattern.h"
|
||||
|
||||
//
|
||||
@ -20,8 +19,6 @@ typedef struct def_s {
|
||||
|
||||
__attribute__((nonnull(3,4), returns_nonnull))
|
||||
def_t *with_def(def_t *defs, size_t namelen, const char *name, pat_t *pat);
|
||||
__attribute__((nonnull(2)))
|
||||
def_t *load_grammar(def_t *defs, file_t *f);
|
||||
__attribute__((pure, nonnull(3)))
|
||||
def_t *lookup(def_t *defs, size_t namelen, const char *name);
|
||||
__attribute__((nonnull(1)))
|
||||
|
4
files.c
4
files.c
@ -36,7 +36,7 @@ static void populate_lines(file_t *f)
|
||||
f->lines = grow(f->lines, linecap *= 2);
|
||||
f->lines[n] = p;
|
||||
do {
|
||||
char *nl = strchr(p, '\n');
|
||||
char *nl = memchr(p, '\n', (size_t)(f->end - p));
|
||||
if (nl) {
|
||||
p = nl+1;
|
||||
break;
|
||||
@ -234,7 +234,7 @@ void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, cons
|
||||
va_end(args);
|
||||
(void)fputc('\n', dest);
|
||||
|
||||
const char *eol = linenum == f->nlines ? strchr(line, '\0') : strchr(line, '\n');
|
||||
const char *eol = linenum == f->nlines ? f->end : strchr(line, '\n');
|
||||
if (end == NULL || end > eol) end = eol;
|
||||
fprintf(dest, "\033[2m%5lu\033(0\x78\033(B\033[m%.*s\033[41;30m%.*s\033[m%.*s\n",
|
||||
linenum,
|
||||
|
319
match.c
319
match.c
@ -2,6 +2,7 @@
|
||||
// match.c - Code for the BP virtual machine that performs the matching.
|
||||
//
|
||||
|
||||
#include <ctype.h>
|
||||
#include <err.h>
|
||||
#include <limits.h>
|
||||
#include <stdbool.h>
|
||||
@ -17,11 +18,20 @@
|
||||
|
||||
#define MAX_CACHE_SIZE (1<<14)
|
||||
|
||||
// Cache datastructure
|
||||
typedef struct {
|
||||
size_t size, occupancy;
|
||||
match_t **matches;
|
||||
} cache_t;
|
||||
|
||||
// Data structure for various ambient state for matching
|
||||
typedef struct {
|
||||
def_t *defs;
|
||||
cache_t *cache;
|
||||
const char *start, *end;
|
||||
bool ignorecase;
|
||||
} match_context_t;
|
||||
|
||||
// New match objects are either recycled from unused match objects or allocated
|
||||
// from the heap. While it is in use, the match object is stored in the
|
||||
// `in_use_matches` linked list. Once it is no longer needed, it is moved to
|
||||
@ -33,8 +43,8 @@ static match_t *in_use_matches = NULL;
|
||||
|
||||
#define MATCHES(...) (match_t*[]){__VA_ARGS__, NULL}
|
||||
|
||||
__attribute__((hot, nonnull(2,3,4,5)))
|
||||
static match_t *match(def_t *defs, cache_t *cache, file_t *f, const char *str, pat_t *pat, bool ignorecase);
|
||||
__attribute__((hot, nonnull(1,2,3)))
|
||||
static match_t *match(match_context_t *ctx, const char *str, pat_t *pat);
|
||||
|
||||
// Store a value and update its refcount
|
||||
static inline void add_owner(match_t** owner, match_t* owned)
|
||||
@ -239,22 +249,22 @@ static pat_t *first_pat(def_t *defs, pat_t *pat)
|
||||
//
|
||||
// Find the next match after prev (or the first match if prev is NULL)
|
||||
//
|
||||
__attribute__((nonnull(3,5)))
|
||||
static match_t *_next_match(def_t *defs, cache_t *cache, file_t *f, const char *str, pat_t *pat, pat_t *skip, bool ignorecase)
|
||||
__attribute__((nonnull(1,2,3)))
|
||||
static match_t *_next_match(match_context_t *ctx, const char *str, pat_t *pat, pat_t *skip)
|
||||
{
|
||||
// Prune the unnecessary entries from the cache (those not between start/end)
|
||||
if (cache->matches) {
|
||||
for (size_t i = 0; i < cache->size; i++) {
|
||||
for (match_t *m = cache->matches[i], *next = NULL; m; m = next) {
|
||||
if (ctx->cache->matches) {
|
||||
for (size_t i = 0; i < ctx->cache->size; i++) {
|
||||
for (match_t *m = ctx->cache->matches[i], *next = NULL; m; m = next) {
|
||||
next = m->cache.next;
|
||||
if (m->start < f->start || (m->end ? m->end : m->start) > f->end)
|
||||
cache_remove(cache, m);
|
||||
if (m->start < ctx->start || (m->end ? m->end : m->start) > ctx->end)
|
||||
cache_remove(ctx->cache, m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pat = deref(defs, pat);
|
||||
pat_t *first = first_pat(defs, pat);
|
||||
pat = deref(ctx->defs, pat);
|
||||
pat_t *first = first_pat(ctx->defs, pat);
|
||||
|
||||
// Performance optimization: if the pattern starts with a string literal,
|
||||
// we can just rely on the highly optimized strstr()/strcasestr()
|
||||
@ -264,27 +274,27 @@ static match_t *_next_match(def_t *defs, cache_t *cache, file_t *f, const char *
|
||||
if (first->args.string[i] == '\0')
|
||||
goto pattern_search;
|
||||
char *tmp = strndup(first->args.string, first->min_matchlen);
|
||||
char *found = (ignorecase ? strcasestr : strstr)(str, tmp);
|
||||
char *found = (ctx->ignorecase ? strcasestr : strstr)(str, tmp);
|
||||
if (found)
|
||||
str = found;
|
||||
else
|
||||
str += strlen(str); // Use += strlen here instead of f->end to handle files with NULL bytes
|
||||
str += strlen(str); // Use += strlen here instead of ctx->end to handle files with NULL bytes
|
||||
free(tmp);
|
||||
}
|
||||
|
||||
pattern_search:
|
||||
if (str > f->end) return NULL;
|
||||
if (str > ctx->end) return NULL;
|
||||
|
||||
do {
|
||||
match_t *m = match(defs, cache, f, str, pat, ignorecase);
|
||||
match_t *m = match(ctx, str, pat);
|
||||
if (m) return m;
|
||||
if (first->type == BP_START_OF_FILE) return NULL;
|
||||
match_t *s;
|
||||
if (skip && (s = match(defs, cache, f, str, skip, ignorecase))) {
|
||||
if (skip && (s = match(ctx, str, skip))) {
|
||||
str = s->end > str ? s->end : str + 1;
|
||||
recycle_if_unused(&s);
|
||||
} else str = next_char(str, f->end);
|
||||
} while (str < f->end);
|
||||
} else str = next_char(str, ctx->end);
|
||||
} while (str < ctx->end);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -293,13 +303,14 @@ static match_t *_next_match(def_t *defs, cache_t *cache, file_t *f, const char *
|
||||
// match object, or NULL if no match is found.
|
||||
// The returned value should be free()'d to avoid memory leaking.
|
||||
//
|
||||
static match_t *match(def_t *defs, cache_t *cache, file_t *f, const char *str, pat_t *pat, bool ignorecase)
|
||||
static match_t *match(match_context_t *ctx, const char *str, pat_t *pat)
|
||||
{
|
||||
switch (pat->type) {
|
||||
case BP_DEFINITION: {
|
||||
def_t *defs2 = with_def(defs, pat->args.def.namelen, pat->args.def.name, pat->args.def.def);
|
||||
match_t *m = match(defs2, cache, f, str, pat->args.def.pat ? pat->args.def.pat : pat->args.def.def, ignorecase);
|
||||
defs = free_defs(defs2, defs);
|
||||
match_context_t ctx2 = *ctx;
|
||||
ctx2.defs = with_def(ctx->defs, pat->args.def.namelen, pat->args.def.name, pat->args.def.def);
|
||||
match_t *m = match(&ctx2, str, pat->args.def.pat ? pat->args.def.pat : pat->args.def.def);
|
||||
free_defs(ctx2.defs, ctx->defs);
|
||||
return m;
|
||||
}
|
||||
case BP_LEFTRECURSION: {
|
||||
@ -312,59 +323,60 @@ static match_t *match(def_t *defs, cache_t *cache, file_t *f, const char *str, p
|
||||
++pat->args.leftrec.visits;
|
||||
return pat->args.leftrec.match;
|
||||
} else {
|
||||
return match(defs, cache, f, str, pat->args.leftrec.fallback, ignorecase);
|
||||
return match(ctx, str, pat->args.leftrec.fallback);
|
||||
}
|
||||
}
|
||||
case BP_ANYCHAR: {
|
||||
return (str < f->end && *str != '\n') ? new_match(defs, pat, str, next_char(str, f->end), NULL) : NULL;
|
||||
return (str < ctx->end && *str != '\n') ? new_match(ctx->defs, pat, str, next_char(str, ctx->end), NULL) : NULL;
|
||||
}
|
||||
case BP_ID_START: {
|
||||
return (str < f->end && isidstart(str, f->end)) ? new_match(defs, pat, str, next_char(str, f->end), NULL) : NULL;
|
||||
return (str < ctx->end && isidstart(str, ctx->end)) ? new_match(ctx->defs, pat, str, next_char(str, ctx->end), NULL) : NULL;
|
||||
}
|
||||
case BP_ID_CONTINUE: {
|
||||
return (str < f->end && isidcontinue(str, f->end)) ? new_match(defs, pat, str, next_char(str, f->end), NULL) : NULL;
|
||||
return (str < ctx->end && isidcontinue(str, ctx->end)) ? new_match(ctx->defs, pat, str, next_char(str, ctx->end), NULL) : NULL;
|
||||
}
|
||||
case BP_START_OF_FILE: {
|
||||
return (str == f->start) ? new_match(defs, pat, str, str, NULL) : NULL;
|
||||
return (str == ctx->start) ? new_match(ctx->defs, pat, str, str, NULL) : NULL;
|
||||
}
|
||||
case BP_START_OF_LINE: {
|
||||
return (str == f->start || str[-1] == '\n') ? new_match(defs, pat, str, str, NULL) : NULL;
|
||||
return (str == ctx->start || str[-1] == '\n') ? new_match(ctx->defs, pat, str, str, NULL) : NULL;
|
||||
}
|
||||
case BP_END_OF_FILE: {
|
||||
return (str == f->end || (str == f->end-1 && *str == '\n')) ? new_match(defs, pat, str, str, NULL) : NULL;
|
||||
return (str == ctx->end || (str == ctx->end-1 && *str == '\n')) ? new_match(ctx->defs, pat, str, str, NULL) : NULL;
|
||||
}
|
||||
case BP_END_OF_LINE: {
|
||||
return (str == f->end || *str == '\n') ? new_match(defs, pat, str, str, NULL) : NULL;
|
||||
return (str == ctx->end || *str == '\n') ? new_match(ctx->defs, pat, str, str, NULL) : NULL;
|
||||
}
|
||||
case BP_WORD_BOUNDARY: {
|
||||
return (str == f->start || isidcontinue(str, f->end) != isidcontinue(prev_char(f->start, str), f->end)) ? new_match(defs, pat, str, str, NULL) : NULL;
|
||||
return (str == ctx->start || isidcontinue(str, ctx->end) != isidcontinue(prev_char(ctx->start, str), ctx->end)) ?
|
||||
new_match(ctx->defs, pat, str, str, NULL) : NULL;
|
||||
}
|
||||
case BP_STRING: {
|
||||
if (&str[pat->min_matchlen] > f->end) return NULL;
|
||||
if (pat->min_matchlen > 0 && (ignorecase ? memicmp : memcmp)(str, pat->args.string, pat->min_matchlen) != 0)
|
||||
if (&str[pat->min_matchlen] > ctx->end) return NULL;
|
||||
if (pat->min_matchlen > 0 && (ctx->ignorecase ? memicmp : memcmp)(str, pat->args.string, pat->min_matchlen) != 0)
|
||||
return NULL;
|
||||
return new_match(defs, pat, str, str + pat->min_matchlen, NULL);
|
||||
return new_match(ctx->defs, pat, str, str + pat->min_matchlen, NULL);
|
||||
}
|
||||
case BP_RANGE: {
|
||||
if (str >= f->end) return NULL;
|
||||
if (str >= ctx->end) return NULL;
|
||||
if ((unsigned char)*str < pat->args.range.low || (unsigned char)*str > pat->args.range.high)
|
||||
return NULL;
|
||||
return new_match(defs, pat, str, str+1, NULL);
|
||||
return new_match(ctx->defs, pat, str, str+1, NULL);
|
||||
}
|
||||
case BP_NOT: {
|
||||
match_t *m = match(defs, cache, f, str, pat->args.pat, ignorecase);
|
||||
match_t *m = match(ctx, str, pat->args.pat);
|
||||
if (m != NULL) {
|
||||
recycle_if_unused(&m);
|
||||
return NULL;
|
||||
}
|
||||
return new_match(defs, pat, str, str, NULL);
|
||||
return new_match(ctx->defs, pat, str, str, NULL);
|
||||
}
|
||||
case BP_UPTO: case BP_UPTO_STRICT: {
|
||||
match_t *m = new_match(defs, pat, str, str, NULL);
|
||||
pat_t *target = deref(defs, pat->args.multiple.first),
|
||||
*skip = deref(defs, pat->args.multiple.second);
|
||||
match_t *m = new_match(ctx->defs, pat, str, str, NULL);
|
||||
pat_t *target = deref(ctx->defs, pat->args.multiple.first),
|
||||
*skip = deref(ctx->defs, pat->args.multiple.second);
|
||||
if (!target && !skip) {
|
||||
while (str < f->end && *str != '\n') ++str;
|
||||
while (str < ctx->end && *str != '\n') ++str;
|
||||
m->end = str;
|
||||
return m;
|
||||
}
|
||||
@ -373,18 +385,18 @@ static match_t *match(def_t *defs, cache_t *cache, file_t *f, const char *str, p
|
||||
for (const char *prev = NULL; prev < str; ) {
|
||||
prev = str;
|
||||
if (target) {
|
||||
match_t *p = match(defs, cache, f, str, target, ignorecase);
|
||||
match_t *p = match(ctx, str, target);
|
||||
if (p != NULL) {
|
||||
recycle_if_unused(&p);
|
||||
m->end = str;
|
||||
return m;
|
||||
}
|
||||
} else if (str == f->end) {
|
||||
} else if (str == ctx->end) {
|
||||
m->end = str;
|
||||
return m;
|
||||
}
|
||||
if (skip) {
|
||||
match_t *s = match(defs, cache, f, str, skip, ignorecase);
|
||||
match_t *s = match(ctx, str, skip);
|
||||
if (s != NULL) {
|
||||
str = s->end;
|
||||
if (nchildren+2 >= child_cap) {
|
||||
@ -398,29 +410,29 @@ static match_t *match(def_t *defs, cache_t *cache, file_t *f, const char *str, p
|
||||
// This isn't in the for() structure because there needs to
|
||||
// be at least once chance to match the pattern, even if
|
||||
// we're at the end of the string already (e.g. "..$").
|
||||
if (str < f->end && *str != '\n' && pat->type != BP_UPTO_STRICT)
|
||||
str = next_char(str, f->end);
|
||||
if (str < ctx->end && *str != '\n' && pat->type != BP_UPTO_STRICT)
|
||||
str = next_char(str, ctx->end);
|
||||
}
|
||||
recycle_if_unused(&m);
|
||||
return NULL;
|
||||
}
|
||||
case BP_REPEAT: {
|
||||
match_t *m = new_match(defs, pat, str, str, NULL);
|
||||
match_t *m = new_match(ctx->defs, pat, str, str, NULL);
|
||||
size_t reps = 0;
|
||||
ssize_t max = pat->args.repetitions.max;
|
||||
pat_t *repeating = deref(defs, pat->args.repetitions.repeat_pat);
|
||||
pat_t *sep = deref(defs, pat->args.repetitions.sep);
|
||||
pat_t *repeating = deref(ctx->defs, pat->args.repetitions.repeat_pat);
|
||||
pat_t *sep = deref(ctx->defs, pat->args.repetitions.sep);
|
||||
size_t child_cap = 0, nchildren = 0;
|
||||
for (reps = 0; max == -1 || reps < (size_t)max; ++reps) {
|
||||
const char *start = str;
|
||||
// Separator
|
||||
match_t *msep = NULL;
|
||||
if (sep != NULL && reps > 0) {
|
||||
msep = match(defs, cache, f, str, sep, ignorecase);
|
||||
msep = match(ctx, str, sep);
|
||||
if (msep == NULL) break;
|
||||
str = msep->end;
|
||||
}
|
||||
match_t *mp = match(defs, cache, f, str, repeating, ignorecase);
|
||||
match_t *mp = match(ctx, str, repeating);
|
||||
if (mp == NULL) {
|
||||
str = start;
|
||||
if (msep) recycle_if_unused(&msep);
|
||||
@ -465,51 +477,52 @@ static match_t *match(def_t *defs, cache_t *cache, file_t *f, const char *str, p
|
||||
return m;
|
||||
}
|
||||
case BP_AFTER: {
|
||||
pat_t *back = deref(defs, pat->args.pat);
|
||||
pat_t *back = deref(ctx->defs, pat->args.pat);
|
||||
if (!back) return NULL;
|
||||
|
||||
// We only care about the region from the backtrack pos up to the
|
||||
// current pos, so mock it out as a file slice.
|
||||
// TODO: this breaks ^/^^/$/$$, but that can probably be ignored
|
||||
// because you rarely need to check those in a backtrack.
|
||||
cache_t slice_cache = {0};
|
||||
file_t slice;
|
||||
slice_file(&slice, f, f->start, str);
|
||||
match_context_t slice_ctx = *ctx;
|
||||
slice_ctx.cache = &(cache_t){0};
|
||||
slice_ctx.start = ctx->start;
|
||||
slice_ctx.end = str;
|
||||
for (const char *pos = &str[-(long)back->min_matchlen];
|
||||
pos >= f->start && (back->max_matchlen == -1 || pos >= &str[-(int)back->max_matchlen]);
|
||||
pos = prev_char(f->start, pos)) {
|
||||
cache_destroy(&slice_cache);
|
||||
slice.start = (char*)pos;
|
||||
match_t *m = match(defs, &slice_cache, &slice, pos, back, ignorecase);
|
||||
pos >= ctx->start && (back->max_matchlen == -1 || pos >= &str[-(int)back->max_matchlen]);
|
||||
pos = prev_char(ctx->start, pos)) {
|
||||
cache_destroy(slice_ctx.cache);
|
||||
slice_ctx.start = (char*)pos;
|
||||
match_t *m = match(&slice_ctx, pos, back);
|
||||
// Match should not go past str (i.e. (<"AB" "B") should match "ABB", but not "AB")
|
||||
if (m && m->end != str)
|
||||
recycle_if_unused(&m);
|
||||
else if (m) {
|
||||
cache_destroy(&slice_cache);
|
||||
return new_match(defs, pat, str, str, MATCHES(m));
|
||||
cache_destroy(slice_ctx.cache);
|
||||
return new_match(ctx->defs, pat, str, str, MATCHES(m));
|
||||
}
|
||||
if (pos == f->start) break;
|
||||
if (pos == ctx->start) break;
|
||||
// To prevent extreme performance degradation, don't keep
|
||||
// walking backwards endlessly over newlines.
|
||||
if (back->max_matchlen == -1 && *pos == '\n') break;
|
||||
}
|
||||
cache_destroy(&slice_cache);
|
||||
cache_destroy(slice_ctx.cache);
|
||||
return NULL;
|
||||
}
|
||||
case BP_BEFORE: {
|
||||
match_t *after = match(defs, cache, f, str, pat->args.pat, ignorecase);
|
||||
return after ? new_match(defs, pat, str, str, MATCHES(after)) : NULL;
|
||||
match_t *after = match(ctx, str, pat->args.pat);
|
||||
return after ? new_match(ctx->defs, pat, str, str, MATCHES(after)) : NULL;
|
||||
}
|
||||
case BP_CAPTURE: {
|
||||
match_t *p = match(defs, cache, f, str, pat->args.pat, ignorecase);
|
||||
return p ? new_match(defs, pat, str, p->end, MATCHES(p)) : NULL;
|
||||
match_t *p = match(ctx, str, pat->args.pat);
|
||||
return p ? new_match(ctx->defs, pat, str, p->end, MATCHES(p)) : NULL;
|
||||
}
|
||||
case BP_OTHERWISE: {
|
||||
match_t *m = match(defs, cache, f, str, pat->args.multiple.first, ignorecase);
|
||||
return m ? m : match(defs, cache, f, str, pat->args.multiple.second, ignorecase);
|
||||
match_t *m = match(ctx, str, pat->args.multiple.first);
|
||||
return m ? m : match(ctx, str, pat->args.multiple.second);
|
||||
}
|
||||
case BP_CHAIN: {
|
||||
match_t *m1 = match(defs, cache, f, str, pat->args.multiple.first, ignorecase);
|
||||
match_t *m1 = match(ctx, str, pat->args.multiple.first);
|
||||
if (m1 == NULL) return NULL;
|
||||
|
||||
match_t *m2;
|
||||
@ -518,17 +531,18 @@ static match_t *match(def_t *defs, cache_t *cache, file_t *f, const char *str, p
|
||||
// Temporarily add a rule that the backref name matches the
|
||||
// exact string of the original match (no replacements)
|
||||
pat_t *backref = bp_raw_literal(m1->start, (size_t)(m1->end - m1->start));
|
||||
def_t *defs2 = with_def(defs, m1->pat->args.capture.namelen, m1->pat->args.capture.name, backref);
|
||||
match_context_t ctx2 = *ctx;
|
||||
ctx2.defs = with_def(ctx->defs, m1->pat->args.capture.namelen, m1->pat->args.capture.name, backref);
|
||||
++m1->refcount; {
|
||||
m2 = match(defs2, cache, f, m1->end, pat->args.multiple.second, ignorecase);
|
||||
m2 = match(&ctx2, m1->end, pat->args.multiple.second);
|
||||
if (!m2) { // No need to keep the backref in memory if it didn't match
|
||||
free_pat(backref);
|
||||
backref = NULL;
|
||||
}
|
||||
defs = free_defs(defs2, defs);
|
||||
free_defs(ctx2.defs, ctx->defs);
|
||||
} --m1->refcount;
|
||||
} else {
|
||||
m2 = match(defs, cache, f, m1->end, pat->args.multiple.second, ignorecase);
|
||||
m2 = match(ctx, m1->end, pat->args.multiple.second);
|
||||
}
|
||||
|
||||
if (m2 == NULL) {
|
||||
@ -536,42 +550,43 @@ static match_t *match(def_t *defs, cache_t *cache, file_t *f, const char *str, p
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return new_match(defs, pat, str, m2->end, MATCHES(m1, m2));
|
||||
return new_match(ctx->defs, pat, str, m2->end, MATCHES(m1, m2));
|
||||
}
|
||||
case BP_MATCH: case BP_NOT_MATCH: {
|
||||
match_t *m1 = match(defs, cache, f, str, pat->args.multiple.first, ignorecase);
|
||||
match_t *m1 = match(ctx, str, pat->args.multiple.first);
|
||||
if (m1 == NULL) return NULL;
|
||||
|
||||
// <p1>~<p2> matches iff the text of <p1> matches <p2>
|
||||
// <p1>!~<p2> matches iff the text of <p1> does not match <p2>
|
||||
cache_t slice_cache = {0};
|
||||
file_t slice;
|
||||
slice_file(&slice, f, m1->start, m1->end);
|
||||
match_t *m2 = _next_match(defs, &slice_cache, &slice, slice.start, pat->args.multiple.second, NULL, ignorecase);
|
||||
match_context_t slice_ctx = *ctx;
|
||||
slice_ctx.cache = &(cache_t){0};
|
||||
slice_ctx.start = m1->start;
|
||||
slice_ctx.end = m1->end;
|
||||
match_t *m2 = _next_match(&slice_ctx, slice_ctx.start, pat->args.multiple.second, NULL);
|
||||
if ((!m2 && pat->type == BP_MATCH) || (m2 && pat->type == BP_NOT_MATCH)) {
|
||||
cache_destroy(&slice_cache);
|
||||
cache_destroy(slice_ctx.cache);
|
||||
if (m2) recycle_if_unused(&m2);
|
||||
recycle_if_unused(&m1);
|
||||
return NULL;
|
||||
}
|
||||
match_t *ret = new_match(defs, pat, m1->start, m1->end, (pat->type == BP_MATCH) ? MATCHES(m1, m2) : MATCHES(m1));
|
||||
cache_destroy(&slice_cache);
|
||||
match_t *ret = new_match(ctx->defs, pat, m1->start, m1->end, (pat->type == BP_MATCH) ? MATCHES(m1, m2) : MATCHES(m1));
|
||||
cache_destroy(slice_ctx.cache);
|
||||
return ret;
|
||||
}
|
||||
case BP_REPLACE: {
|
||||
match_t *p = NULL;
|
||||
if (pat->args.replace.pat) {
|
||||
p = match(defs, cache, f, str, pat->args.replace.pat, ignorecase);
|
||||
p = match(ctx, str, pat->args.replace.pat);
|
||||
if (p == NULL) return NULL;
|
||||
}
|
||||
return new_match(defs, pat, str, p ? p->end : str, MATCHES(p));
|
||||
return new_match(ctx->defs, pat, str, p ? p->end : str, MATCHES(p));
|
||||
}
|
||||
case BP_REF: {
|
||||
match_t *cached;
|
||||
if (cache_get(cache, defs, str, pat, &cached))
|
||||
if (cache_get(ctx->cache, ctx->defs, str, pat, &cached))
|
||||
return cached;
|
||||
|
||||
def_t *def = lookup(defs, pat->args.ref.len, pat->args.ref.name);
|
||||
def_t *def = lookup(ctx->defs, pat->args.ref.len, pat->args.ref.name);
|
||||
if (def == NULL)
|
||||
errx(EXIT_FAILURE, "Unknown identifier: '%.*s'", (int)pat->args.ref.len, pat->args.ref.name);
|
||||
pat_t *ref = def->pat;
|
||||
@ -589,17 +604,18 @@ static match_t *match(def_t *defs, cache_t *cache, file_t *f, const char *str, p
|
||||
.fallback = ref,
|
||||
},
|
||||
};
|
||||
def_t defs2 = {
|
||||
match_context_t ctx2 = *ctx;
|
||||
ctx2.defs = &(def_t){
|
||||
.namelen = def->namelen,
|
||||
.name = def->name,
|
||||
.pat = &rec_op,
|
||||
.next = defs,
|
||||
.next = ctx->defs,
|
||||
};
|
||||
|
||||
const char *prev = str;
|
||||
match_t *m = match(&defs2, cache, f, str, ref, ignorecase);
|
||||
match_t *m = match(&ctx2, str, ref);
|
||||
if (m == NULL) {
|
||||
cache_save(cache, defs, str, pat, NULL);
|
||||
cache_save(ctx->cache, ctx->defs, str, pat, NULL);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -608,7 +624,7 @@ static match_t *match(def_t *defs, cache_t *cache, file_t *f, const char *str, p
|
||||
remove_ownership(&rec_op.args.leftrec.match);
|
||||
add_owner(&rec_op.args.leftrec.match, m);
|
||||
prev = m->end;
|
||||
match_t *m2 = match(&defs2, cache, f, str, ref, ignorecase);
|
||||
match_t *m2 = match(&ctx2, str, ref);
|
||||
if (m2 == NULL) break;
|
||||
if (m2->end <= prev) {
|
||||
recycle_if_unused(&m2);
|
||||
@ -622,8 +638,8 @@ static match_t *match(def_t *defs, cache_t *cache, file_t *f, const char *str, p
|
||||
// leftrec.match is GC'd. It also helps with visualization of match
|
||||
// results.
|
||||
// OPTIMIZE: remove this if necessary
|
||||
match_t *wrap = new_match(defs, pat, m->start, m->end, MATCHES(m));
|
||||
cache_save(cache, defs, str, pat, wrap);
|
||||
match_t *wrap = new_match(ctx->defs, pat, m->start, m->end, MATCHES(m));
|
||||
cache_save(ctx->cache, ctx->defs, str, pat, wrap);
|
||||
|
||||
if (rec_op.args.leftrec.match)
|
||||
remove_ownership(&rec_op.args.leftrec.match);
|
||||
@ -635,21 +651,21 @@ static match_t *match(def_t *defs, cache_t *cache, file_t *f, const char *str, p
|
||||
const char *start = str;
|
||||
|
||||
const char *p = str;
|
||||
while (p > f->start && p[-1] != '\n') --p;
|
||||
while (p > ctx->start && p[-1] != '\n') --p;
|
||||
|
||||
// Current indentation:
|
||||
char denter = *p;
|
||||
int dents = 0;
|
||||
if (denter == ' ' || denter == '\t') {
|
||||
for (; *p == denter && p < f->end; ++p) ++dents;
|
||||
for (; *p == denter && p < ctx->end; ++p) ++dents;
|
||||
}
|
||||
|
||||
// Subsequent indentation:
|
||||
while (*str == '\n' || *str == '\n') ++str;
|
||||
for (int i = 0; i < dents; i++)
|
||||
if (&str[i] >= f->end || str[i] != denter) return NULL;
|
||||
if (&str[i] >= ctx->end || str[i] != denter) return NULL;
|
||||
|
||||
return new_match(defs, pat, start, &str[dents], NULL);
|
||||
return new_match(ctx->defs, pat, start, &str[dents], NULL);
|
||||
}
|
||||
default: {
|
||||
errx(EXIT_FAILURE, "Unknown pattern type: %u", pat->type);
|
||||
@ -750,26 +766,33 @@ size_t free_all_matches(void)
|
||||
// Iterate over matches.
|
||||
// Usage: for (match_t *m = NULL; next_match(&m, ...); ) {...}
|
||||
//
|
||||
bool next_match(match_t **m, def_t *defs, file_t *f, pat_t *pat, pat_t *skip, bool ignorecase)
|
||||
bool next_match(match_t **m, def_t *defs, const char *start, const char *end, pat_t *pat, pat_t *skip, bool ignorecase)
|
||||
{
|
||||
static cache_t cache = {0};
|
||||
if (!f || !pat) { // Cleanup for stop_matching()
|
||||
if (!pat) { // Cleanup for stop_matching()
|
||||
recycle_if_unused(m);
|
||||
cache_destroy(&cache);
|
||||
return false;
|
||||
}
|
||||
|
||||
const char *start;
|
||||
const char *pos;
|
||||
if (*m) {
|
||||
// Make sure forward progress is occurring, even after zero-width matches:
|
||||
start = ((*m)->end > (*m)->start) ? (*m)->end : (*m)->end+1;
|
||||
pos = ((*m)->end > (*m)->start) ? (*m)->end : (*m)->end+1;
|
||||
recycle_if_unused(m);
|
||||
} else {
|
||||
start = f->start;
|
||||
pos = start;
|
||||
cache_destroy(&cache);
|
||||
}
|
||||
|
||||
*m = (start <= f->end) ? _next_match(defs, &cache, f, start, pat, skip, ignorecase) : NULL;
|
||||
match_context_t ctx = {
|
||||
.defs = defs,
|
||||
.cache = &cache,
|
||||
.start = start,
|
||||
.end = end,
|
||||
.ignorecase = ignorecase,
|
||||
};
|
||||
*m = (pos <= end) ? _next_match(&ctx, pos, pat, skip) : NULL;
|
||||
if (!*m) cache_destroy(&cache);
|
||||
return *m != NULL;
|
||||
}
|
||||
@ -820,4 +843,88 @@ match_t *get_named_capture(match_t *m, const char *name, size_t namelen)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void fprint_match(FILE *out, const char *file_start, match_t *m)
|
||||
{
|
||||
if (m->pat->type == BP_REPLACE) {
|
||||
const char *text = m->pat->args.replace.text;
|
||||
const char *end = &text[m->pat->args.replace.len];
|
||||
|
||||
// TODO: clean up the line numbering code
|
||||
for (const char *r = text; r < end; ) {
|
||||
// Capture substitution
|
||||
if (*r == '@' && r+1 < end && r[1] != '@') {
|
||||
++r;
|
||||
|
||||
// Retrieve the capture value:
|
||||
match_t *cap = NULL;
|
||||
if (isdigit(*r)) {
|
||||
int n = (int)strtol(r, (char**)&r, 10);
|
||||
cap = get_numbered_capture(m->children[0], n);
|
||||
} else {
|
||||
const char *name = r, *end = after_name(r);
|
||||
if (end > name) {
|
||||
cap = get_named_capture(m->children[0], name, (size_t)(end - name));
|
||||
r = end;
|
||||
if (r < m->end && *r == ';') ++r;
|
||||
}
|
||||
}
|
||||
|
||||
if (cap != NULL) {
|
||||
fprint_match(out, file_start, cap);
|
||||
continue;
|
||||
} else {
|
||||
--r;
|
||||
}
|
||||
}
|
||||
|
||||
if (*r == '\\') {
|
||||
++r;
|
||||
if (*r == 'N') { // \N (nodent)
|
||||
++r;
|
||||
// Mildly hacky: nodents here are based on the *first line*
|
||||
// of the match. If the match spans multiple lines, or if
|
||||
// the replacement text contains newlines, this may get weird.
|
||||
const char *line_start = m->start;
|
||||
while (line_start > file_start && line_start[-1] != '\n') --line_start;
|
||||
char denter = *line_start == '\t' ? '\t' : ' ';
|
||||
fputc('\n', out);
|
||||
if (denter == ' ' || denter == '\t') {
|
||||
for (const char *p = line_start; p && *p == denter && p < m->start; ++p)
|
||||
fputc(denter, out);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
const char *start = r;
|
||||
char c = unescapechar(r, &r);
|
||||
if (r > start) (void)fputc(c, out);
|
||||
else (void)fputc('\\', out);
|
||||
continue;
|
||||
} else if (*r == '\n') {
|
||||
(void)fputc('\n', out);
|
||||
++r;
|
||||
continue;
|
||||
} else {
|
||||
(void)fputc(*r, out);
|
||||
++r;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const char *prev = m->start;
|
||||
for (int i = 0; m->children && m->children[i]; i++) {
|
||||
match_t *child = m->children[i];
|
||||
// Skip children from e.g. zero-width matches like >@foo
|
||||
if (!(prev <= child->start && child->start <= m->end &&
|
||||
prev <= child->end && child->end <= m->end))
|
||||
continue;
|
||||
if (child->start > prev)
|
||||
fwrite(prev, sizeof(char), (size_t)(child->start - prev), out);
|
||||
fprint_match(out, file_start, child);
|
||||
prev = child->end;
|
||||
}
|
||||
if (m->end > prev)
|
||||
fwrite(prev, sizeof(char), (size_t)(m->end - prev), out);
|
||||
}
|
||||
}
|
||||
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||
|
7
match.h
7
match.h
@ -7,7 +7,6 @@
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "files.h"
|
||||
#include "pattern.h"
|
||||
#include "definitions.h"
|
||||
|
||||
@ -40,12 +39,14 @@ __attribute__((nonnull))
|
||||
void recycle_if_unused(match_t **at_m);
|
||||
size_t free_all_matches(void);
|
||||
size_t recycle_all_matches(void);
|
||||
bool next_match(match_t **m, def_t *defs, file_t *f, pat_t *pat, pat_t *skip, bool ignorecase);
|
||||
#define stop_matching(m) next_match(m, NULL, NULL, NULL, NULL, 0)
|
||||
bool next_match(match_t **m, def_t *defs, const char *start, const char *end, pat_t *pat, pat_t *skip, bool ignorecase);
|
||||
#define stop_matching(m) next_match(m, NULL, NULL, NULL, NULL, NULL, 0)
|
||||
__attribute__((nonnull))
|
||||
match_t *get_numbered_capture(match_t *m, int n);
|
||||
__attribute__((nonnull, pure))
|
||||
match_t *get_named_capture(match_t *m, const char *name, size_t namelen);
|
||||
__attribute__((nonnull))
|
||||
void fprint_match(FILE *out, const char *file_start, match_t *m);
|
||||
|
||||
#endif
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||
|
Loading…
Reference in New Issue
Block a user