aboutsummaryrefslogtreecommitdiff
path: root/src/stdlib/simpleparse.c
blob: 1ee6403619adabb96f3fc6574fe724867e71ceb1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
// A simple text parsing primitive:
//     const char *line = "foo.txt:15";
//     const char *filename; int line;
//     if (strparse(line, &filename, ":", &line)) { success...}
// or:
//     FILE *f = ...;
//     if (fparse(f, &filename, ":", &line)) { success... }

#include <ctype.h>
#include <gc.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "simpleparse.h"
#include "util.h"

static bool _match_word(const char **str, const char *target) {
    size_t len = strlen(target);
    if (strncasecmp(*str, target, len) == 0 && !isalnum((*str)[len]) && (*str)[len] != '_') {
        *str += len;
        return true;
    }
    return false;
}

public const char *simpleparse(const char *str, int n, parse_type_e types[n], void *destinations[n])
{
    for (int i = 0; i < n; i++) {
        switch (types[i]) {
        case PARSE_SOME_OF: {
            if (destinations[i]) str += strspn(str, (char*)destinations[i]);
            break;
        }
        case PARSE_LITERAL: {
            const char *target = (const char*)destinations[i];
            if (target) {
                if (strncmp(str, target, strlen(target)) != 0)
                    return str;
                str += strlen(target);
            }
            break;
        }
        case PARSE_STRING: {
            size_t len;
            static const char matching_pair[256] = {[(int)'(']=')', [(int)'{']='}', [(int)'[']=']',
                [(int)'"']='"', [(int)'\'']='\'', [(int)'`']='`', [(int)'<']='>'};
            if (i > 0 && i + 1 < n && types[i-1] == PARSE_LITERAL && types[i+1] == PARSE_LITERAL
                && destinations[i-1] && destinations[i+1]
                && strlen((char*)destinations[i-1]) == 1 && strlen((char*)destinations[i+1]) == 1
                && *(char*)destinations[i+1] == matching_pair[(int)*(char*)destinations[i-1]]) {
                len = 0;
                char special_characters[4] = {'\\', *(char*)destinations[i-1], *(char*)destinations[i+1], 0};
                for (int depth = 1; depth > 0; ) {
                    len += strcspn(str + len, special_characters);
                    if (str[len] == '\0') {
                        return str;
                    } else if (str[len] == '\\'
                             && (special_characters[1] == '"' || special_characters[1] == '\'' || special_characters[1] == '`')) {
                        if (str[len+1] == '\0') return str;
                        len += 2; 
                    } else if (str[len] == special_characters[2]) { // Check for closing quotes before opening quotes
                        depth -= 1;
                        if (depth > 0) len += 1;
                    } else if (str[len] == special_characters[1]) {
                        depth += 1;
                        if (depth > 999999) return str;
                        len += 1;
                    }
                }
            } else if (i + 1 < n && types[i+1] == PARSE_LITERAL) {
                const char *terminator = (const char*)destinations[i+1];
                if (terminator) {
                    const char *end = strstr(str, terminator);
                    if (!end) return str;
                    len = (size_t)((ptrdiff_t)end - (ptrdiff_t)str);
                } else {
                    len = strlen(str);
                }
            } else if (i + 1 < n && types[i+1] == PARSE_SOME_OF) {
                len = destinations[i+1] ? strcspn(str, (char*)destinations[i+1]) : strlen(str);;
            } else {
                len = strlen(str);
            }
            if (destinations[i]) {
                char *matched = GC_MALLOC_ATOMIC(len+1);
                memcpy(matched, str, len);
                matched[len] = '\0';
                *(const char**)destinations[i] = matched;
            }
            str += len;
            break;
        }
        case PARSE_DOUBLE: {
            char *end = NULL;
            double val = strtod(str, &end);
            if (end == str) return str;
            if (destinations[i]) *(double*)destinations[i] = val;
            str = end;
            break;
        }
        case PARSE_LONG: {
            char *end = NULL;
            long val = strtol(str, &end, 10);
            if (end == str) return str;
            if (destinations[i]) *(long*)destinations[i] = val;
            str = end;
            break;
        }
        case PARSE_BOOL: {
            if (_match_word(&str, "true") || _match_word(&str, "yes") || _match_word(&str, "on") || _match_word(&str, "1")) {
                if (destinations[i]) *(bool*)destinations[i] = true;
            } else if (_match_word(&str, "false") || _match_word(&str, "no") || _match_word(&str, "off") || _match_word(&str, "0")) {
                if (destinations[i]) *(bool*)destinations[i] = false;
            } else {
                return str;
            }
            break;
        }
        default: break;
        }
    }
    return NULL;
}

// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0