From cc84c3d7916640b75ca4dc0785f9b1f417df1664 Mon Sep 17 00:00:00 2001
From: Bruce Hill <bruce@bruce-hill.com>
Date: Mon, 19 Jul 2021 19:57:59 -0700
Subject: Made escape sequence handling stricter: no longer supporting
 arbitrary characters, only special escapes like \n, hex sequences like \x0a,
 octal sequences like \012, and backslashes \\

---
 bp.1      |  4 ++--
 bp.1.md   |  4 ++--
 pattern.c |  4 +++-
 print.c   | 13 ++++++++-----
 utils.c   |  7 +++++--
 5 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/bp.1 b/bp.1
index 3fa6b45..77966c0 100644
--- a/bp.1
+++ b/bp.1
@@ -95,8 +95,8 @@ with one or two patterns.
 \f[B]bp\f[R] is designed around this fact.
 The default mode for bp patterns is \[lq]string pattern mode\[rq].
 In string pattern mode, all characters are interpreted literally except
-for the backslash (\f[B]\[rs]\f[R]), which may be followed by a bp
-pattern (see the \f[B]PATTERNS\f[R] section below).
+for the backslash (\f[B]\[rs]\f[R]), which may be followed by an escape
+or a bp pattern (see the \f[B]PATTERNS\f[R] section below).
 Optionally, the bp pattern may be terminated by a semicolon
 (\f[B];\f[R]).
 .SH PATTERNS
diff --git a/bp.1.md b/bp.1.md
index 6991b6a..60a51a9 100644
--- a/bp.1.md
+++ b/bp.1.md
@@ -90,8 +90,8 @@ literal strings, or strings that are primarily plain strings, with one or two
 patterns. `bp` is designed around this fact. The default mode for bp patterns
 is "string pattern mode". In string pattern mode, all characters are
 interpreted literally except for the backslash (`\`), which may be followed by
-a bp pattern (see the **PATTERNS** section below). Optionally, the bp pattern
-may be terminated by a semicolon (`;`).
+an escape or a bp pattern (see the **PATTERNS** section below). Optionally, the
+bp pattern may be terminated by a semicolon (`;`).
 
 
 # PATTERNS
diff --git a/pattern.c b/pattern.c
index 91b9351..32b1376 100644
--- a/pattern.c
+++ b/pattern.c
@@ -306,9 +306,11 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
 
                 const char *opstart = str;
                 unsigned char e = (unsigned char)unescapechar(str, &str);
+                if (str == opstart)
+                    file_err(f, start, str, "This isn't a valid escape sequence.");
                 if (matchchar(&str, '-')) { // Escape range (e.g. \x00-\xFF)
                     if (next_char(f, str) != str+1)
-                        file_err(f, start, next_char(f, str), "Sorry, UTF8 escape sequences are not supported.");
+                        file_err(f, start, next_char(f, str), "Sorry, UTF8 escape sequences are not supported in ranges.");
                     const char *seqstart = str;
                     unsigned char e2 = (unsigned char)unescapechar(str, &str);
                     if (str == seqstart)
diff --git a/print.c b/print.c
index eb5f67d..a66893b 100644
--- a/print.c
+++ b/print.c
@@ -162,12 +162,15 @@ static void _print_match(FILE *out, printer_t *pr, match_t *m)
                     }
                     continue;
                 }
+                const char *start = r;
                 char c = unescapechar(r, &r);
-                (void)fputc(c, out);
-                if (c == '\n') {
-                    ++line;
-                    pr->needs_line_number = 1;
-                }
+                if (r > start) {
+                    (void)fputc(c, out);
+                    if (c == '\n') {
+                        ++line;
+                        pr->needs_line_number = 1;
+                    }
+                } else (void)fputc('\\', out);
                 continue;
             } else if (*r == '\n') {
                 (void)fputc('\n', out);
diff --git a/utils.c b/utils.c
index fd13918..93f6ebd 100644
--- a/utils.c
+++ b/utils.c
@@ -88,7 +88,7 @@ char unescapechar(const char *escaped, const char **end)
         case 'a': ret = '\a'; break; case 'b': ret = '\b'; break;
         case 'n': ret = '\n'; break; case 'r': ret = '\r'; break;
         case 't': ret = '\t'; break; case 'v': ret = '\v'; break;
-        case 'e': ret = '\033'; break;
+        case 'e': ret = '\033'; break; case '\\': ret = '\\'; break;
         case 'x': { // Hex
             static const unsigned char hextable[255] = {
                 ['0']=0x10, ['1']=0x1, ['2']=0x2, ['3']=0x3, ['4']=0x4,
@@ -114,7 +114,10 @@ char unescapechar(const char *escaped, const char **end)
             }
             break;
         }
-        default: break;
+        default: {
+            if (end) *end = escaped;
+            return (char)0;
+        }
     }
     if (end) *end = &escaped[len];
     return (char)ret;
-- 
cgit v1.2.3