aboutsummaryrefslogtreecommitdiff
path: root/src/stdlib/simpleparse.c
blob: 3929c32ba06b40aa3c3a929a4dc87417133d8010 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
// A simple text parsing primitive:
//     const char *line = "foo.txt:15";
//     const char *filename; int line;
//     if (strparse(line, &filename, ":", &line)) { success...}
// or:
//     FILE *f = ...;
//     if (fparse(f, &filename, ":", &line)) { success... }

#include <ctype.h>
#include <gc.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "simpleparse.h"
#include "util.h"

static bool _match_word(const char **str, const char *target) {
    size_t len = strlen(target);
    if (strncasecmp(*str, target, len) == 0 && !isalnum((*str)[len]) && (*str)[len] != '_') {
        *str += len;
        return true;
    }
    return false;
}

public
const char *simpleparse(const char *str, int n, parse_type_e types[n], void *destinations[n]) {
    for (int i = 0; i < n; i++) {
        switch (types[i]) {
        case PARSE_SOME_OF: {
            if (destinations[i]) str += strspn(str, (char *)destinations[i]);
            break;
        }
        case PARSE_LITERAL: {
            const char *target = (const char *)destinations[i];
            if (target) {
                if (strncmp(str, target, strlen(target)) != 0) return str;
                str += strlen(target);
            }
            break;
        }
        case PARSE_STRING: {
            size_t len;
            static const char matching_pair[256] = {[(int)'('] = ')', [(int)'{'] = '}',   [(int)'['] = ']',
                                                    [(int)'"'] = '"', [(int)'\''] = '\'', [(int)'`'] = '`',
                                                    [(int)'<'] = '>'};
            if (i > 0 && i + 1 < n && types[i - 1] == PARSE_LITERAL && types[i + 1] == PARSE_LITERAL
                && destinations[i - 1] && destinations[i + 1] && strlen((char *)destinations[i - 1]) == 1
                && strlen((char *)destinations[i + 1]) == 1
                && *(char *)destinations[i + 1] == matching_pair[(int)*(char *)destinations[i - 1]]) {
                len = 0;
                char special_characters[4] = {'\\', *(char *)destinations[i - 1], *(char *)destinations[i + 1], 0};
                for (int depth = 1; depth > 0;) {
                    len += strcspn(str + len, special_characters);
                    if (str[len] == '\0') {
                        return str;
                    } else if (str[len] == '\\'
                               && (special_characters[1] == '"' || special_characters[1] == '\''
                                   || special_characters[1] == '`')) {
                        if (str[len + 1] == '\0') return str;
                        len += 2;
                    } else if (str[len] == special_characters[2]) { // Check for closing quotes before opening quotes
                        depth -= 1;
                        if (depth > 0) len += 1;
                    } else if (str[len] == special_characters[1]) {
                        depth += 1;
                        if (depth > 999999) return str;
                        len += 1;
                    }
                }
            } else if (i + 1 < n && types[i + 1] == PARSE_LITERAL) {
                const char *terminator = (const char *)destinations[i + 1];
                if (terminator) {
                    const char *end = strstr(str, terminator);
                    if (!end) return str;
                    len = (size_t)((ptrdiff_t)end - (ptrdiff_t)str);
                } else {
                    len = strlen(str);
                }
            } else if (i + 1 < n && types[i + 1] == PARSE_SOME_OF) {
                len = destinations[i + 1] ? strcspn(str, (char *)destinations[i + 1]) : strlen(str);
                ;
            } else {
                len = strlen(str);
            }
            if (destinations[i]) {
                char *matched = GC_MALLOC_ATOMIC(len + 1);
                memcpy(matched, str, len);
                matched[len] = '\0';
                *(const char **)destinations[i] = matched;
            }
            str += len;
            break;
        }
        case PARSE_DOUBLE: {
            char *end = NULL;
            double val = strtod(str, &end);
            if (end == str) return str;
            if (destinations[i]) *(double *)destinations[i] = val;
            str = end;
            break;
        }
        case PARSE_LONG: {
            char *end = NULL;
            long val = strtol(str, &end, 10);
            if (end == str) return str;
            if (destinations[i]) *(long *)destinations[i] = val;
            str = end;
            break;
        }
        case PARSE_BOOL: {
            if (_match_word(&str, "true") || _match_word(&str, "yes") || _match_word(&str, "on")
                || _match_word(&str, "1")) {
                if (destinations[i]) *(bool *)destinations[i] = true;
            } else if (_match_word(&str, "false") || _match_word(&str, "no") || _match_word(&str, "off")
                       || _match_word(&str, "0")) {
                if (destinations[i]) *(bool *)destinations[i] = false;
            } else {
                return str;
            }
            break;
        }
        default: break;
        }
    }
    return NULL;
}

// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0