From cc84c3d7916640b75ca4dc0785f9b1f417df1664 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Mon, 19 Jul 2021 19:57:59 -0700 Subject: Made escape sequence handling stricter: no longer supporting arbitrary characters, only special escapes like \n, hex sequences like \x0a, octal sequences like \012, and backslashes \\ --- bp.1 | 4 ++-- bp.1.md | 4 ++-- pattern.c | 4 +++- print.c | 13 ++++++++----- utils.c | 7 +++++-- 5 files changed, 20 insertions(+), 12 deletions(-) diff --git a/bp.1 b/bp.1 index 3fa6b45..77966c0 100644 --- a/bp.1 +++ b/bp.1 @@ -95,8 +95,8 @@ with one or two patterns. \f[B]bp\f[R] is designed around this fact. The default mode for bp patterns is \[lq]string pattern mode\[rq]. In string pattern mode, all characters are interpreted literally except -for the backslash (\f[B]\[rs]\f[R]), which may be followed by a bp -pattern (see the \f[B]PATTERNS\f[R] section below). +for the backslash (\f[B]\[rs]\f[R]), which may be followed by an escape +or a bp pattern (see the \f[B]PATTERNS\f[R] section below). Optionally, the bp pattern may be terminated by a semicolon (\f[B];\f[R]). .SH PATTERNS diff --git a/bp.1.md b/bp.1.md index 6991b6a..60a51a9 100644 --- a/bp.1.md +++ b/bp.1.md @@ -90,8 +90,8 @@ literal strings, or strings that are primarily plain strings, with one or two patterns. `bp` is designed around this fact. The default mode for bp patterns is "string pattern mode". In string pattern mode, all characters are interpreted literally except for the backslash (`\`), which may be followed by -a bp pattern (see the **PATTERNS** section below). Optionally, the bp pattern -may be terminated by a semicolon (`;`). +an escape or a bp pattern (see the **PATTERNS** section below). Optionally, the +bp pattern may be terminated by a semicolon (`;`). # PATTERNS diff --git a/pattern.c b/pattern.c index 91b9351..32b1376 100644 --- a/pattern.c +++ b/pattern.c @@ -306,9 +306,11 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) const char *opstart = str; unsigned char e = (unsigned char)unescapechar(str, &str); + if (str == opstart) + file_err(f, start, str, "This isn't a valid escape sequence."); if (matchchar(&str, '-')) { // Escape range (e.g. \x00-\xFF) if (next_char(f, str) != str+1) - file_err(f, start, next_char(f, str), "Sorry, UTF8 escape sequences are not supported."); + file_err(f, start, next_char(f, str), "Sorry, UTF8 escape sequences are not supported in ranges."); const char *seqstart = str; unsigned char e2 = (unsigned char)unescapechar(str, &str); if (str == seqstart) diff --git a/print.c b/print.c index eb5f67d..a66893b 100644 --- a/print.c +++ b/print.c @@ -162,12 +162,15 @@ static void _print_match(FILE *out, printer_t *pr, match_t *m) } continue; } + const char *start = r; char c = unescapechar(r, &r); - (void)fputc(c, out); - if (c == '\n') { - ++line; - pr->needs_line_number = 1; - } + if (r > start) { + (void)fputc(c, out); + if (c == '\n') { + ++line; + pr->needs_line_number = 1; + } + } else (void)fputc('\\', out); continue; } else if (*r == '\n') { (void)fputc('\n', out); diff --git a/utils.c b/utils.c index fd13918..93f6ebd 100644 --- a/utils.c +++ b/utils.c @@ -88,7 +88,7 @@ char unescapechar(const char *escaped, const char **end) case 'a': ret = '\a'; break; case 'b': ret = '\b'; break; case 'n': ret = '\n'; break; case 'r': ret = '\r'; break; case 't': ret = '\t'; break; case 'v': ret = '\v'; break; - case 'e': ret = '\033'; break; + case 'e': ret = '\033'; break; case '\\': ret = '\\'; break; case 'x': { // Hex static const unsigned char hextable[255] = { ['0']=0x10, ['1']=0x1, ['2']=0x2, ['3']=0x3, ['4']=0x4, @@ -114,7 +114,10 @@ char unescapechar(const char *escaped, const char **end) } break; } - default: break; + default: { + if (end) *end = escaped; + return (char)0; + } } if (end) *end = &escaped[len]; return (char)ret; -- cgit v1.2.3