diff --git a/README.md b/README.md index cbd55ee..1bc1d8f 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,7 @@ Pattern | Meaning `_` | Zero or more whitespace characters (excluding newlines) `` `c `` | The literal character `c` `` `a-z `` | The character range `a` through `z` +`` `a,b `` | The character `a` or the character `b` `\n`, `\033`, `\x0A`, etc. | An escape sequence character `\x00-xFF` | An escape sequence range (byte `0x00` through `0xFF` here) `!pat` | `pat` does not match at the current position diff --git a/bp.1 b/bp.1 index 48076ec..11429e2 100644 --- a/bp.1 +++ b/bp.1 @@ -111,6 +111,10 @@ The literal \fBcharacter-\fI\fR .B `\fI\fB-\fI\fR The \fBcharacter-range-\fI\fB-to-\fI\fR +.B `\fI\fB,\fI\fR +The literal \fBcharacter-\fI\fB-or-\fI\fR (can include arbitrarily many +comma-separated characters or character ranges). + .B \\\\\fI\fR The \fBescape-sequence-\fI\fR (\fB\\n\fR, \fB\\x1F\fR, \fB\\033\fR, etc.) diff --git a/compiler.c b/compiler.c index fb90bc9..937407d 100644 --- a/compiler.c +++ b/compiler.c @@ -142,27 +142,56 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) } // Char literals case '`': { - char c = *str; - ++str; - if (!c || c == '\n') - file_err(f, str, str, "There should be a character here after the '`'"); - op->len = 1; - if (matchchar(&str, '-')) { // Range - char c2 = *str; - if (!c2 || c2 == '\n') - file_err(f, str, str, "There should be a character here to complete the character range."); - if (c2 < c) - file_err(f, origin, str+1, "Character ranges must be low-to-high, but this is high-to-low."); - op->op = VM_RANGE; - op->args.range.low = (unsigned char)c; - op->args.range.high = (unsigned char)c2; + vm_op_t *all = NULL; + do { + char c = *str; + if (!c || c == '\n') + file_err(f, str, str, "There should be a character here after the '`'"); + + if (op == NULL) + op = new(vm_op_t); + + op->start = str-1; + op->len = 1; ++str; - } else { - op->op = VM_STRING; - char *s = xcalloc(sizeof(char), 2); - s[0] = c; - op->args.s = s; - } + if (matchchar(&str, '-')) { // Range + char c2 = *str; + if (!c2 || c2 == '\n') + file_err(f, str, str, "There should be a character here to complete the character range."); + op->op = VM_RANGE; + if (c < c2) { + op->args.range.low = (unsigned char)c; + op->args.range.high = (unsigned char)c2; + } else { + op->args.range.low = (unsigned char)c2; + op->args.range.high = (unsigned char)c; + } + ++str; + } else { + op->op = VM_STRING; + char *s = xcalloc(sizeof(char), 2); + s[0] = c; + op->args.s = s; + } + + op->end = str; + + if (all == NULL) { + all = op; + } else { + vm_op_t *either = new(vm_op_t); + either->op = VM_OTHERWISE; + either->start = all->start; + either->end = op->end; + either->args.multiple.first = all; + either->args.multiple.second = op; + either->len = 1; + all = either; + } + op = NULL; + } while (matchchar(&str, ',')); + + op = all; break; } // Escapes