diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2020-12-19 18:53:51 -0800 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2020-12-19 18:53:51 -0800 |
| commit | c28e6472594637c7c2d4ed826c376fa84737777e (patch) | |
| tree | 81c7e88a9fb829489a29f7f9f34800c954e1f770 | |
| parent | c477bfdbd5d6464cde70806d9a6704c54ede3e88 (diff) | |
Added `a,b,c support
| -rw-r--r-- | README.md | 1 | ||||
| -rw-r--r-- | bp.1 | 4 | ||||
| -rw-r--r-- | compiler.c | 69 |
3 files changed, 54 insertions, 20 deletions
@@ -44,6 +44,7 @@ Pattern | Meaning `_` | Zero or more whitespace characters (excluding newlines) `` `c `` | The literal character `c` `` `a-z `` | The character range `a` through `z` +`` `a,b `` | The character `a` or the character `b` `\n`, `\033`, `\x0A`, etc. | An escape sequence character `\x00-xFF` | An escape sequence range (byte `0x00` through `0xFF` here) `!pat` | `pat` does not match at the current position @@ -111,6 +111,10 @@ The literal \fBcharacter-\fI<c>\fR .B `\fI<c1>\fB-\fI<c2>\fR The \fBcharacter-range-\fI<c1>\fB-to-\fI<c2>\fR +.B `\fI<c1>\fB,\fI<c2>\fR +The literal \fBcharacter-\fI<c1>\fB-or-\fI<c2>\fR (can include arbitrarily many +comma-separated characters or character ranges). + .B \\\\\fI<esc>\fR The \fBescape-sequence-\fI<esc>\fR (\fB\\n\fR, \fB\\x1F\fR, \fB\\033\fR, etc.) @@ -142,27 +142,56 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) } // Char literals case '`': { - char c = *str; - ++str; - if (!c || c == '\n') - file_err(f, str, str, "There should be a character here after the '`'"); - op->len = 1; - if (matchchar(&str, '-')) { // Range - char c2 = *str; - if (!c2 || c2 == '\n') - file_err(f, str, str, "There should be a character here to complete the character range."); - if (c2 < c) - file_err(f, origin, str+1, "Character ranges must be low-to-high, but this is high-to-low."); - op->op = VM_RANGE; - op->args.range.low = (unsigned char)c; - op->args.range.high = (unsigned char)c2; + vm_op_t *all = NULL; + do { + char c = *str; + if (!c || c == '\n') + file_err(f, str, str, "There should be a character here after the '`'"); + + if (op == NULL) + op = new(vm_op_t); + + op->start = str-1; + op->len = 1; ++str; - } else { - op->op = VM_STRING; - char *s = xcalloc(sizeof(char), 2); - s[0] = c; - op->args.s = s; - } + if (matchchar(&str, '-')) { // Range + char c2 = *str; + if (!c2 || c2 == '\n') + file_err(f, str, str, "There should be a character here to complete the character range."); + op->op = VM_RANGE; + if (c < c2) { + op->args.range.low = (unsigned char)c; + op->args.range.high = (unsigned char)c2; + } else { + op->args.range.low = (unsigned char)c2; + op->args.range.high = (unsigned char)c; + } + ++str; + } else { + op->op = VM_STRING; + char *s = xcalloc(sizeof(char), 2); + s[0] = c; + op->args.s = s; + } + + op->end = str; + + if (all == NULL) { + all = op; + } else { + vm_op_t *either = new(vm_op_t); + either->op = VM_OTHERWISE; + either->start = all->start; + either->end = op->end; + either->args.multiple.first = all; + either->args.multiple.second = op; + either->len = 1; + all = either; + } + op = NULL; + } while (matchchar(&str, ',')); + + op = all; break; } // Escapes |
