aboutsummaryrefslogtreecommitdiff
path: root/types.h
blob: dd7de77088275025eeeb4e2db80b3f214a02f239 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
//
// types.h - Datatypes used by BP
//
#ifndef TYPES__H
#define TYPES__H

#include <stdbool.h>
#include <sys/types.h>

#define UNBOUNDED(pat) ((pat)->max_matchlen == -1)

// BP virtual machine pattern types
enum pattype_e {
    BP_ANYCHAR       = 1,
    BP_ID_START      = 2,
    BP_ID_CONTINUE   = 3,
    BP_STRING        = 4,
    BP_RANGE         = 5,
    BP_NOT           = 6,
    BP_UPTO          = 7,
    BP_UPTO_STRICT   = 8,
    BP_REPEAT        = 9,
    BP_BEFORE        = 10,
    BP_AFTER         = 11,
    BP_CAPTURE       = 12,
    BP_OTHERWISE     = 13,
    BP_CHAIN         = 14,
    BP_MATCH         = 15,
    BP_NOT_MATCH     = 16,
    BP_REPLACE       = 17,
    BP_REF           = 18,
    BP_NODENT        = 19,
    BP_START_OF_FILE = 20,
    BP_START_OF_LINE = 21,
    BP_END_OF_FILE   = 22,
    BP_END_OF_LINE   = 23,
    BP_WORD_BOUNDARY = 24,
    BP_DEFINITION    = 25,
    BP_LEFTRECURSION = 26,
    BP_ERROR         = 27,
};

struct match_s; // forward declared to resolve circular struct defs

//
// A struct reperesenting a BP virtual machine operation
//
typedef struct pat_s {
    struct pat_s *next;
    enum pattype_e type;
    const char *start, *end;
    // The bounds of the match length (used for backtracking)
    size_t min_matchlen;
    ssize_t max_matchlen; // -1 means unbounded length
    union {
        const char *string;
        struct {
            const char *name;
            size_t len;
        } ref;
        struct {
            const char *name;
            size_t namelen;
            struct pat_s *def, *pat;
        } def;
        struct {
            unsigned char low, high;
        } range;
        struct {
            size_t min;
            ssize_t max;
            struct pat_s *sep, *repeat_pat;
        } repetitions;
        // TODO: use a linked list instead of a binary tree
        struct {
            struct pat_s *first, *second;
        } multiple;
        struct {
            struct pat_s *pat;
            const char *text;
            size_t len;
        } replace;
        struct {
            struct pat_s *capture_pat;
            const char *name;
            size_t namelen;
        } capture;
        struct match_s *backref;
        struct {
            struct match_s *match;
            unsigned int visits;
            const char *at;
            struct pat_s *fallback;
        } leftrec;
        struct pat_s *pat;
    } args;
    size_t id;
} pat_t;

typedef struct {
    struct match_s **home, *next;
} match_dll_t;

// #define MATCH_FROM(node, name) ((match_t*)((char*)node + (size_t)(&((match_t*)0)->name)))

//
// Pattern matching result object
//
typedef struct match_s {
    // Where the match starts and ends (end is after the last character)
    const char *start, *end;
    pat_t *pat;
    // Intrusive linked list nodes for garbage collection and cache buckets:
    match_dll_t gc, cache;
    size_t defs_id;
    int refcount;
    struct match_s **children;
    struct match_s *_children[3];
} match_t;

//
// Pattern matching rule definition(s)
//
typedef struct def_s {
    size_t id;
    size_t namelen;
    const char *name;
    pat_t *pat;
    struct def_s *next;
} def_t;

#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1