code / tomo

Lines41.3K C23.7K Markdown9.7K YAML5.0K Tomo2.3K
7 others 763
Python231 Shell230 make212 INI47 Text21 SVG16 Lua6
(130 lines)
1 // A simple text parsing primitive:
2 // const char *line = "foo.txt:15";
3 // const char *filename; int line;
4 // if (strparse(line, &filename, ":", &line)) { success...}
5 // or:
6 // FILE *f = ...;
7 // if (fparse(f, &filename, ":", &line)) { success... }
9 #include <ctype.h>
10 #include <gc.h>
11 #include <stdbool.h>
12 #include <stdint.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
17 #include "simpleparse.h"
18 #include "util.h"
20 static bool _match_word(const char **str, const char *target) {
21 size_t len = strlen(target);
22 if (strncasecmp(*str, target, len) == 0 && !isalnum((*str)[len]) && (*str)[len] != '_') {
23 *str += len;
24 return true;
26 return false;
29 public
30 const char *simpleparse(const char *str, int n, parse_type_e types[n], void *destinations[n]) {
31 for (int i = 0; i < n; i++) {
32 switch (types[i]) {
33 case PARSE_SOME_OF: {
34 if (destinations[i]) str += strspn(str, (char *)destinations[i]);
35 break;
37 case PARSE_LITERAL: {
38 const char *target = (const char *)destinations[i];
39 if (target) {
40 if (strncmp(str, target, strlen(target)) != 0) return str;
41 str += strlen(target);
43 break;
45 case PARSE_STRING: {
46 size_t len;
47 static const char matching_pair[256] = {[(int)'('] = ')', [(int)'{'] = '}', [(int)'['] = ']',
48 [(int)'"'] = '"', [(int)'\''] = '\'', [(int)'`'] = '`',
49 [(int)'<'] = '>'};
50 if (i > 0 && i + 1 < n && types[i - 1] == PARSE_LITERAL && types[i + 1] == PARSE_LITERAL
51 && destinations[i - 1] && destinations[i + 1] && strlen((char *)destinations[i - 1]) == 1
52 && strlen((char *)destinations[i + 1]) == 1
53 && *(char *)destinations[i + 1] == matching_pair[(int)*(char *)destinations[i - 1]]) {
54 len = 0;
55 char special_characters[4] = {'\\', *(char *)destinations[i - 1], *(char *)destinations[i + 1], 0};
56 for (int depth = 1; depth > 0;) {
57 len += strcspn(str + len, special_characters);
58 if (str[len] == '\0') {
59 return str;
60 } else if (str[len] == '\\'
61 && (special_characters[1] == '"' || special_characters[1] == '\''
62 || special_characters[1] == '`')) {
63 if (str[len + 1] == '\0') return str;
64 len += 2;
65 } else if (str[len] == special_characters[2]) { // Check for closing quotes before opening quotes
66 depth -= 1;
67 if (depth > 0) len += 1;
68 } else if (str[len] == special_characters[1]) {
69 depth += 1;
70 if (depth > 999999) return str;
71 len += 1;
74 } else if (i + 1 < n && types[i + 1] == PARSE_LITERAL) {
75 const char *terminator = (const char *)destinations[i + 1];
76 if (terminator) {
77 const char *end = strstr(str, terminator);
78 if (!end) return str;
79 len = (size_t)((ptrdiff_t)end - (ptrdiff_t)str);
80 } else {
81 len = strlen(str);
83 } else if (i + 1 < n && types[i + 1] == PARSE_SOME_OF) {
84 len = destinations[i + 1] ? strcspn(str, (char *)destinations[i + 1]) : strlen(str);
86 } else {
87 len = strlen(str);
89 if (destinations[i]) {
90 char *matched = GC_MALLOC_ATOMIC(len + 1);
91 memcpy(matched, str, len);
92 matched[len] = '\0';
93 *(const char **)destinations[i] = matched;
95 str += len;
96 break;
98 case PARSE_DOUBLE: {
99 char *end = NULL;
100 double val = strtod(str, &end);
101 if (end == str) return str;
102 if (destinations[i]) *(double *)destinations[i] = val;
103 str = end;
104 break;
106 case PARSE_LONG: {
107 char *end = NULL;
108 long val = strtol(str, &end, 10);
109 if (end == str) return str;
110 if (destinations[i]) *(long *)destinations[i] = val;
111 str = end;
112 break;
114 case PARSE_BOOL: {
115 if (_match_word(&str, "true") || _match_word(&str, "yes") || _match_word(&str, "on")
116 || _match_word(&str, "1")) {
117 if (destinations[i]) *(bool *)destinations[i] = true;
118 } else if (_match_word(&str, "false") || _match_word(&str, "no") || _match_word(&str, "off")
119 || _match_word(&str, "0")) {
120 if (destinations[i]) *(bool *)destinations[i] = false;
121 } else {
122 return str;
124 break;
126 default: break;
129 return NULL;