From ad85fb1da590f988cb64f270cf2239c06405b2a7 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Sat, 17 Jul 2021 14:19:55 -0700 Subject: Added support for multiple escape sequences: \n,r,t --- bp.1 | 16 +++++++++----- bp.1.md | 9 +++++--- pattern.c | 75 ++++++++++++++++++++++++++++++++++----------------------------- 3 files changed, 57 insertions(+), 43 deletions(-) diff --git a/bp.1 b/bp.1 index 6ef59e2..6b65b0d 100644 --- a/bp.1 +++ b/bp.1 @@ -1,4 +1,4 @@ -.\" Automatically generated by Pandoc 2.11.3 +.\" Automatically generated by Pandoc 2.14.0.2 .\" .TH "BP" "1" "May 17 2021" "" "" .hy @@ -160,16 +160,16 @@ Escape sequences are not allowed. The literal character \f[I]c\f[R] (e.g.\ \f[B]\[ga]\[at]\f[R] matches the \[lq]\[at]\[rq] character) .TP -\f[B]\[ga]\f[R]\f[I]c1\f[R]\f[B],\f[R]\f[I]c2\f[R] -The literal character \f[I]c1\f[R] or \f[I]c2\f[R] -(e.g.\ \f[B]\[ga]a,e,i,o,u\f[R]) -.TP \f[B]\[ga]\f[R]\f[I]c1\f[R]\f[B]-\f[R]\f[I]c2\f[R] The character range \f[I]c1\f[R] to \f[I]c2\f[R] (e.g.\ \f[B]\[ga]a-z\f[R]). Multiple ranges can be combined with a comma (e.g.\ \f[B]\[ga]a-z,A-Z\f[R]). .TP +\f[B]\[ga]\f[R]\f[I]c1\f[R]\f[B],\f[R]\f[I]c2\f[R] +Any one of the given character or character ranges \f[I]c1\f[R] or +\f[I]c2\f[R] (e.g.\ \f[B]\[ga]a,e,i,o,u,0-9\f[R]) +.TP \f[B]\[rs]\f[R]\f[I]esc\f[R] An escape sequence (e.g.\ \f[B]\[rs]n\f[R], \f[B]\[rs]x1F\f[R], \f[B]\[rs]033\f[R], etc.) @@ -178,6 +178,10 @@ An escape sequence (e.g.\ \f[B]\[rs]n\f[R], \f[B]\[rs]x1F\f[R], An escape sequence range from \f[I]esc1\f[R] to \f[I]esc2\f[R] (e.g.\ \f[B]\[rs]x00-x1F\f[R]) .TP +\f[B]\[rs]\f[R]\f[I]esc1\f[R]\f[B],\f[R]\f[I]esc2\f[R] +Any one of the given escape sequences or ranges \f[I]esc1\f[R] or +\f[I]esc2\f[R] (e.g.\ \f[B]\[rs]r,n,x01-x04\f[R]) +.TP \f[B]\[rs]N\f[R] A special case escape that matches a \[lq]nodent\[rq]: one or more newlines followed by the same indentation that occurs on the current @@ -205,7 +209,7 @@ At least \f[I]N\f[R] or more repetitions of \f[I]pat\f[R] .TP \f[B]*\f[R] \f[I]pat\f[R] Some \f[I]pat\f[R]s (zero or more, e.g.\ \f[B]* \[dq]x\[dq]\f[R] matches -\f[B]\[dq]\[lq]\f[R], \f[B]\[rq]x\[lq]\f[R], \f[B]\[rq]xx\[dq]\f[R], +\f[B]\[lq]\[lq]\f[R], \f[B]\[rq]x\[rq]\f[R], \f[B]\[lq]xx\[rq]\f[R], etc.) .TP \f[B]+\f[R] \f[I]pat\f[R] diff --git a/bp.1.md b/bp.1.md index a716512..d8b95bc 100644 --- a/bp.1.md +++ b/bp.1.md @@ -146,19 +146,22 @@ sequences are not allowed. `` ` ``*c* : The literal character *c* (e.g. `` `@ `` matches the "@" character) -`` ` ``*c1*`,`*c2* -: The literal character *c1* or *c2* (e.g. `` `a,e,i,o,u ``) - `` ` ``*c1*`-`*c2* : The character range *c1* to *c2* (e.g. `` `a-z ``). Multiple ranges can be combined with a comma (e.g. `` `a-z,A-Z ``). +`` ` ``*c1*`,`*c2* +: Any one of the given character or character ranges *c1* or *c2* (e.g. `` `a,e,i,o,u,0-9 ``) + `\`*esc* : An escape sequence (e.g. `\n`, `\x1F`, `\033`, etc.) `\`*esc1*`-`*esc2* : An escape sequence range from *esc1* to *esc2* (e.g. `\x00-x1F`) +`\`*esc1*`,`*esc2* +: Any one of the given escape sequences or ranges *esc1* or *esc2* (e.g. `\r,n,x01-x04`) + `\N` : A special case escape that matches a "nodent": one or more newlines followed by the same indentation that occurs on the current line. diff --git a/pattern.c b/pattern.c index 6340047..faaacb1 100644 --- a/pattern.c +++ b/pattern.c @@ -250,7 +250,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) // Char literals case '`': { pat_t *all = NULL; - do { + do { // Comma-separated items: if (str >= f->end || !*str || *str == '\n') file_err(f, str, str, "There should be a character here after the '`'"); @@ -288,39 +288,46 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) if (!*str || *str == '\n') file_err(f, str, str, "There should be an escape sequence here after this backslash."); - if (matchchar(&str, 'N')) // \N (nodent) - return new_pat(f, start, str, 1, -1, BP_NODENT); - - const char *opstart = str; - unsigned char e = (unsigned char)unescapechar(str, &str); - if (matchchar(&str, '-')) { // Escape range (e.g. \x00-\xFF) - if (next_char(f, str) != str+1) - file_err(f, start, next_char(f, str), "Sorry, UTF8 escape sequences are not supported."); - const char *seqstart = str; - unsigned char e2 = (unsigned char)unescapechar(str, &str); - if (str == seqstart) - file_err(f, seqstart, str+1, "This value isn't a valid escape sequence"); - if (e2 < e) - file_err(f, start, str, "Escape ranges should be low-to-high, but this is high-to-low."); - pat_t *esc = new_pat(f, opstart, str, 1, 1, BP_RANGE); - esc->args.range.low = e; - esc->args.range.high = e2; - return esc; - } else if (str > opstart) { - pat_t *esc = new_pat(f, start, str, 1, 1, BP_STRING); - char *s = xcalloc(sizeof(char), 2); - s[0] = (char)e; - esc->args.string = s; - return esc; - } else { - const char *next = next_char(f, opstart); - size_t len = (size_t)(next-opstart); - pat_t *esc = new_pat(f, start, next, len, (ssize_t)len, BP_STRING); - char *s = xcalloc(sizeof(char), 1+len); - memcpy(s, opstart, len); - esc->args.string = s; - return esc; - } + pat_t *all = NULL; + do { // Comma-separated items: + if (matchchar(&str, 'N')) { // \N (nodent) + all = either_pat(f, all, new_pat(f, start, str, 1, -1, BP_NODENT)); + continue; + } + + const char *opstart = str; + unsigned char e = (unsigned char)unescapechar(str, &str); + if (matchchar(&str, '-')) { // Escape range (e.g. \x00-\xFF) + if (next_char(f, str) != str+1) + file_err(f, start, next_char(f, str), "Sorry, UTF8 escape sequences are not supported."); + const char *seqstart = str; + unsigned char e2 = (unsigned char)unescapechar(str, &str); + if (str == seqstart) + file_err(f, seqstart, str+1, "This value isn't a valid escape sequence"); + if (e2 < e) + file_err(f, start, str, "Escape ranges should be low-to-high, but this is high-to-low."); + pat_t *esc = new_pat(f, opstart, str, 1, 1, BP_RANGE); + esc->args.range.low = e; + esc->args.range.high = e2; + all = either_pat(f, all, esc); + } else if (str > opstart) { + pat_t *esc = new_pat(f, start, str, 1, 1, BP_STRING); + char *s = xcalloc(sizeof(char), 2); + s[0] = (char)e; + esc->args.string = s; + all = either_pat(f, all, esc); + } else { + const char *next = next_char(f, opstart); + size_t len = (size_t)(next-opstart); + pat_t *esc = new_pat(f, start, next, len, (ssize_t)len, BP_STRING); + char *s = xcalloc(sizeof(char), 1+len); + memcpy(s, opstart, len); + esc->args.string = s; + all = either_pat(f, all, esc); + } + } while (matchchar(&str, ',')); + + return all; } // String literal case '"': case '\'': case '{': case '\002': { -- cgit v1.2.3