Updated '|' to mean word boundary and \N for nodent

This commit is contained in:
Bruce Hill 2020-12-14 21:28:00 -08:00
parent 8b50fc5d1d
commit 3753bc1be0
4 changed files with 22 additions and 11 deletions

View File

@ -162,6 +162,12 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
case '\\': { case '\\': {
if (!*str || *str == '\n') if (!*str || *str == '\n')
file_err(f, str, str, "There should be an escape sequence here after this backslash."); file_err(f, str, str, "There should be an escape sequence here after this backslash.");
if (matchchar(&str, 'N')) { // \N (nodent)
op->op = VM_NODENT;
break;
}
op->len = 1; op->len = 1;
unsigned char e = unescapechar(str, &str); unsigned char e = unescapechar(str, &str);
if (*str == '-') { // Escape range (e.g. \x00-\xFF) if (*str == '-') { // Escape range (e.g. \x00-\xFF)
@ -397,7 +403,7 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
break; break;
} }
// Special rules: // Special rules:
case '_': case '^': case '$': { case '_': case '^': case '$': case '|': {
if (matchchar(&str, c)) { // double __, ^^, $$ if (matchchar(&str, c)) { // double __, ^^, $$
char tmp[3] = {c, c, '\0'}; char tmp[3] = {c, c, '\0'};
op->args.s = strdup(tmp); op->args.s = strdup(tmp);
@ -415,10 +421,6 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
op->op = VM_REF; op->op = VM_REF;
break; break;
} }
case '|': {
op->op = VM_NODENT;
break;
}
default: { default: {
// Reference // Reference
if (isalpha(c)) { if (isalpha(c)) {
@ -488,6 +490,13 @@ vm_op_t *bpeg_stringpattern(file_t *f, const char *str)
if (*str == '\\') { if (*str == '\\') {
if (!str[1] || str[1] == '\n') if (!str[1] || str[1] == '\n')
file_err(f, str, str, "There should be an escape sequence or pattern here after this backslash."); file_err(f, str, str, "There should be an escape sequence or pattern here after this backslash.");
if (matchchar(&str, 'N')) { // \N (nodent)
interp = calloc(sizeof(vm_op_t), 1);
interp->op = VM_NODENT;
break;
}
const char *after_escape; const char *after_escape;
unsigned char e = unescapechar(&str[1], &after_escape); unsigned char e = unescapechar(&str[1], &after_escape);
if (e != str[1]) { if (e != str[1]) {

View File

@ -39,7 +39,7 @@ vm_op_t *load_grammar(grammar_t *g, file_t *f)
while (*src) { while (*src) {
const char *name = src; const char *name = src;
const char *name_end = after_name(name); const char *name_end = after_name(name);
check(name_end > name, "Invalid name for definition"); check(name_end > name, "Invalid name for definition: %s", name);
name = strndup(name, (size_t)(name_end-name)); name = strndup(name, (size_t)(name_end-name));
src = after_spaces(name_end); src = after_spaces(name_end);
check(matchchar(&src, ':'), "Expected ':' in definition"); check(matchchar(&src, ':'), "Expected ':' in definition");

View File

@ -7,11 +7,12 @@ Def: @name=id _ `: __ (
/ @!={...>(`;/id_`:/$) => "Invalid definition: @0"}) / @!={...>(`;/id_`:/$) => "Invalid definition: @0"})
# This is used for command line arguments: # This is used for command line arguments:
String-pattern: ... % (`\ (escape-sequence / pat [`;])) String-pattern: ... % (Nodent / Escape / `\ pat [`;])
pat: simple-pat !(__("!="/"==")) / suffixed-pat pat: simple-pat !(__("!="/"==")) / suffixed-pat
simple-pat: Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No simple-pat: Upto-and / Dot / String / Char-range / Char / Nodent / Escape-range
/ Nodent / Repeat / Optional / After / Before / Capture / Replace / Ref / parens / Escape / Repeat / Optional / After / Before / Capture / Replace
/ Ref / parens
suffixed-pat: ( suffixed-pat: (
Eq-pat Eq-pat
@ -39,7 +40,7 @@ escape-sequence: (
/ `x 2 (`0-9/`a-f/`A-F) / `x 2 (`0-9/`a-f/`A-F)
) )
No: `! (_@pat / @!={=>"Expected a pattern after the exclamation mark"}) No: `! (_@pat / @!={=>"Expected a pattern after the exclamation mark"})
Nodent: `| Nodent: `\ `N
Upto-and: 2-3`. [_@first=simple-pat] [__`%__@second=simple-pat] Upto-and: 2-3`. [_@first=simple-pat] [__`%__@second=simple-pat]
Repeat: ( Repeat: (
@min='' `* @max='' @min='' `* @max=''
@ -70,7 +71,7 @@ $: !.
^^: !<$. ^^: !<$.
^: !<. ^: !<.
id: "^^" / "^" / "__" / "_" / "$$" / "$" / (`a-z/`A-Z) *(`a-z/`A-Z/`0-9/`-) id: "^^" / "^" / "__" / "_" / "$$" / "$" / "|" / (`a-z/`A-Z) *(`a-z/`A-Z/`0-9/`-)
line-comment: `# .. $ line-comment: `# .. $
block-comment: "#("..")#" % block-comment block-comment: "#("..")#" % block-comment

View File

@ -44,6 +44,7 @@ const char *after_spaces(const char *str)
*/ */
const char *after_name(const char *str) const char *after_name(const char *str)
{ {
if (*str == '|') return &str[1];
if (*str == '^' || *str == '_' || *str == '$') { if (*str == '^' || *str == '_' || *str == '$') {
return (str[1] == *str) ? &str[2] : &str[1]; return (str[1] == *str) ? &str[2] : &str[1];
} }