aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2025-09-24 20:22:00 -0400
committerBruce Hill <bruce@bruce-hill.com>2025-09-24 20:22:00 -0400
commit3d5944a732e34b6dd01921dee991dee54af47e18 (patch)
tree97d17a4e7feb97d367060a184907a6978352d5ec
parent20c11b29b3a63c221cac942a17bf9abcf8b9bafe (diff)
Autoformatting with clang-format
-rw-r--r--.clang-format274
-rw-r--r--.clangd3
-rw-r--r--Lua/lbp.c157
-rw-r--r--bp.c288
-rw-r--r--files.c127
-rw-r--r--files.h25
-rw-r--r--match.c352
-rw-r--r--match.h12
-rw-r--r--pattern.c389
-rw-r--r--pattern.h138
-rw-r--r--printmatch.c108
-rw-r--r--printmatch.h7
-rw-r--r--utf8.c505
-rw-r--r--utf8.h12
-rw-r--r--utils.c81
-rw-r--r--utils.h45
16 files changed, 1383 insertions, 1140 deletions
diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..82e3ce3
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,274 @@
+---
+Language: Cpp
+AccessModifierOffset: -2
+AlignAfterOpenBracket: Align
+AlignArrayOfStructures: None
+AlignConsecutiveAssignments:
+ Enabled: false
+ AcrossEmptyLines: false
+ AcrossComments: false
+ AlignCompound: false
+ AlignFunctionPointers: false
+ PadOperators: true
+AlignConsecutiveBitFields:
+ Enabled: false
+ AcrossEmptyLines: false
+ AcrossComments: false
+ AlignCompound: false
+ AlignFunctionPointers: false
+ PadOperators: false
+AlignConsecutiveDeclarations:
+ Enabled: false
+ AcrossEmptyLines: false
+ AcrossComments: false
+ AlignCompound: false
+ AlignFunctionPointers: false
+ PadOperators: false
+AlignConsecutiveMacros:
+ Enabled: false
+ AcrossEmptyLines: false
+ AcrossComments: false
+ AlignCompound: false
+ AlignFunctionPointers: false
+ PadOperators: false
+AlignConsecutiveShortCaseStatements:
+ Enabled: false
+ AcrossEmptyLines: false
+ AcrossComments: false
+ AlignCaseArrows: false
+ AlignCaseColons: false
+AlignConsecutiveTableGenBreakingDAGArgColons:
+ Enabled: false
+ AcrossEmptyLines: false
+ AcrossComments: false
+ AlignCompound: false
+ AlignFunctionPointers: false
+ PadOperators: false
+AlignConsecutiveTableGenCondOperatorColons:
+ Enabled: false
+ AcrossEmptyLines: false
+ AcrossComments: false
+ AlignCompound: false
+ AlignFunctionPointers: false
+ PadOperators: false
+AlignConsecutiveTableGenDefinitionColons:
+ Enabled: false
+ AcrossEmptyLines: false
+ AcrossComments: false
+ AlignCompound: false
+ AlignFunctionPointers: false
+ PadOperators: false
+AlignEscapedNewlines: Right
+AlignOperands: Align
+AlignTrailingComments:
+ Kind: Never
+ OverEmptyLines: 0
+AllowAllArgumentsOnNextLine: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowBreakBeforeNoexceptSpecifier: Never
+AllowShortBlocksOnASingleLine: Never
+AllowShortCaseExpressionOnASingleLine: true
+AllowShortCaseLabelsOnASingleLine: true
+AllowShortCompoundRequirementOnASingleLine: true
+AllowShortEnumsOnASingleLine: true
+AllowShortFunctionsOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: AllIfsAndElse
+AllowShortLambdasOnASingleLine: All
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AttributeMacros:
+ - __capability
+BinPackArguments: true
+BinPackParameters: true
+BitFieldColonSpacing: Both
+BraceWrapping:
+ AfterCaseLabel: false
+ AfterClass: false
+ AfterControlStatement: Never
+ AfterEnum: false
+ AfterExternBlock: false
+ AfterFunction: false
+ AfterNamespace: false
+ AfterObjCDeclaration: false
+ AfterStruct: false
+ AfterUnion: false
+ BeforeCatch: false
+ BeforeElse: false
+ BeforeLambdaBody: false
+ BeforeWhile: false
+ IndentBraces: false
+ SplitEmptyFunction: true
+ SplitEmptyRecord: true
+ SplitEmptyNamespace: true
+BreakAdjacentStringLiterals: true
+BreakAfterAttributes: Leave
+BreakAfterJavaFieldAnnotations: false
+BreakAfterReturnType: None
+BreakArrays: true
+BreakBeforeBinaryOperators: NonAssignment
+BreakBeforeConceptDeclarations: Always
+BreakBeforeBraces: Attach
+BreakBeforeInlineASMColon: OnlyMultiline
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializers: BeforeColon
+BreakFunctionDefinitionParameters: false
+BreakInheritanceList: BeforeColon
+BreakStringLiterals: true
+BreakTemplateDeclarations: MultiLine
+ColumnLimit: 120
+CommentPragmas: '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat: false
+EmptyLineAfterAccessModifier: Never
+EmptyLineBeforeAccessModifier: LogicalBlock
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
+ForEachMacros:
+ - foreach
+ - Q_FOREACH
+ - BOOST_FOREACH
+IfMacros:
+ - KJ_IF_MAYBE
+IncludeBlocks: Preserve
+IncludeCategories:
+ - Regex: '^"(llvm|llvm-c|clang|clang-c)/'
+ Priority: 2
+ SortPriority: 0
+ CaseSensitive: false
+ - Regex: '^(<|"(gtest|gmock|isl|json)/)'
+ Priority: 3
+ SortPriority: 0
+ CaseSensitive: false
+ - Regex: '.*'
+ Priority: 1
+ SortPriority: 0
+ CaseSensitive: false
+IncludeIsMainRegex: '(Test)?$'
+IncludeIsMainSourceRegex: ''
+IndentAccessModifiers: false
+IndentCaseBlocks: false
+IndentCaseLabels: false
+IndentExternBlock: AfterExternBlock
+IndentGotoLabels: true
+IndentPPDirectives: None
+IndentRequiresClause: true
+IndentWidth: 4
+IndentWrappedFunctionNames: false
+InsertBraces: false
+InsertNewlineAtEOF: false
+InsertTrailingCommas: None
+IntegerLiteralSeparator:
+ Binary: 0
+ BinaryMinDigits: 0
+ Decimal: 0
+ DecimalMinDigits: 0
+ Hex: 0
+ HexMinDigits: 0
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLines:
+ AtEndOfFile: false
+ AtStartOfBlock: true
+ AtStartOfFile: true
+LambdaBodyIndentation: Signature
+LineEnding: DeriveLF
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MainIncludeChar: Quote
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBinPackProtocolList: Auto
+ObjCBlockIndentWidth: 2
+ObjCBreakBeforeNestedBlockParam: true
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PackConstructorInitializers: BinPack
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 19
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakOpenParenthesis: 0
+PenaltyBreakScopeResolution: 500
+PenaltyBreakString: 1000
+PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 1000000
+PenaltyIndentedWhitespace: 0
+PenaltyReturnTypeOnItsOwnLine: 60
+PointerAlignment: Right
+PPIndentWidth: -1
+QualifierAlignment: Leave
+ReferenceAlignment: Pointer
+ReflowComments: true
+RemoveBracesLLVM: false
+RemoveParentheses: Leave
+RemoveSemicolon: false
+RequiresClausePosition: OwnLine
+RequiresExpressionIndentation: OuterScope
+SeparateDefinitionBlocks: Leave
+ShortNamespaceLines: 1
+SkipMacroDefinitionBody: false
+SortIncludes: CaseSensitive
+SortJavaStaticImport: Before
+SortUsingDeclarations: LexicographicNumeric
+SpaceAfterCStyleCast: false
+SpaceAfterLogicalNot: false
+SpaceAfterTemplateKeyword: true
+SpaceAroundPointerQualifiers: Default
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCaseColon: false
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeJsonColon: false
+SpaceBeforeParens: ControlStatements
+SpaceBeforeParensOptions:
+ AfterControlStatements: true
+ AfterForeachMacros: true
+ AfterFunctionDefinitionName: false
+ AfterFunctionDeclarationName: false
+ AfterIfMacros: true
+ AfterOverloadedOperator: false
+ AfterPlacementOperator: true
+ AfterRequiresInClause: false
+ AfterRequiresInExpression: false
+ BeforeNonEmptyParentheses: false
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceBeforeSquareBrackets: false
+SpaceInEmptyBlock: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles: Never
+SpacesInContainerLiterals: true
+SpacesInLineCommentPrefix:
+ Minimum: 1
+ Maximum: -1
+SpacesInParens: Never
+SpacesInParensOptions:
+ ExceptDoubleParentheses: false
+ InCStyleCasts: false
+ InConditionalStatements: false
+ InEmptyParentheses: false
+ Other: false
+SpacesInSquareBrackets: false
+Standard: Latest
+StatementAttributeLikeMacros:
+ - Q_EMIT
+StatementMacros:
+ - Q_UNUSED
+ - QT_REQUIRE_VERSION
+TableGenBreakInsideDAGArg: DontBreak
+TabWidth: 4
+UseTab: Never
+VerilogBreakBetweenInstancePorts: true
+WhitespaceSensitiveMacros:
+ - BOOST_PP_STRINGIZE
+ - CF_SWIFT_NAME
+ - NS_SWIFT_NAME
+ - PP_STRINGIZE
+ - STRINGIZE
+...
+
diff --git a/.clangd b/.clangd
new file mode 100644
index 0000000..6cf2eba
--- /dev/null
+++ b/.clangd
@@ -0,0 +1,3 @@
+CompileFlags:
+ Add: [-xc]
+Checks: '-bugprone-suspicious-missing-comma'
diff --git a/Lua/lbp.c b/Lua/lbp.c
index 0f59653..3ea18e8 100644
--- a/Lua/lbp.c
+++ b/Lua/lbp.c
@@ -1,14 +1,14 @@
/*
-* lbp.c - bp library for lua
-* API:
-* bp.match(pat, str, [start_index]) -> nil or match_table
-* bp.replace(pat, replacement, str, [start_index]) -> str with replacements, num_replacements
-* for match_table in bp.matches(pat, str, [start_index]) do ... end
-* bp.compile(pat) -> pattern object
-* pat:match(str, [start_index])
-* pat:replace(replacement, str, [start_index])
-* for match in pat:matches(str, [start_index]) do ... end
-*/
+ * lbp.c - bp library for lua
+ * API:
+ * bp.match(pat, str, [start_index]) -> nil or match_table
+ * bp.replace(pat, replacement, str, [start_index]) -> str with replacements, num_replacements
+ * for match_table in bp.matches(pat, str, [start_index]) do ... end
+ * bp.compile(pat) -> pattern object
+ * pat:match(str, [start_index])
+ * pat:replace(replacement, str, [start_index])
+ * for match in pat:matches(str, [start_index]) do ... end
+ */
#include <fcntl.h>
#include <stdlib.h>
@@ -16,11 +16,11 @@
#include <sys/mman.h>
#include <sys/stat.h>
-#include "lua.h"
#include "lauxlib.h"
+#include "lua.h"
-#include "../pattern.h"
#include "../match.h"
+#include "../pattern.h"
#include "../printmatch.h"
#include "../utils.h"
@@ -34,25 +34,22 @@ static void push_match(lua_State *L, bp_match_t *m, const char *start);
lua_State *cur_state = NULL;
-static void match_error(char **msg)
-{
+static void match_error(char **msg) {
lua_pushstring(cur_state, *msg);
free(*msg);
*msg = NULL;
lua_error(cur_state);
}
-static inline void raise_parse_error(lua_State *L, maybe_pat_t m)
-{
+static inline void raise_parse_error(lua_State *L, maybe_pat_t m) {
size_t err_len = (size_t)(m.value.error.end - m.value.error.start);
- char *buf = calloc(err_len+1, sizeof(char));
+ char *buf = calloc(err_len + 1, sizeof(char));
memcpy(buf, m.value.error.start, err_len);
luaL_error(L, "%s: \"%s\"", m.value.error.msg, buf);
free(buf);
}
-static int Lcompile(lua_State *L)
-{
+static int Lcompile(lua_State *L) {
size_t patlen;
const char *pat_text = luaL_checklstring(L, 1, &patlen);
maybe_pat_t maybe_pat = bp_pattern(pat_text, pat_text + patlen);
@@ -60,18 +57,17 @@ static int Lcompile(lua_State *L)
raise_parse_error(L, maybe_pat);
return 0;
}
- bp_pat_t **pat_storage = (bp_pat_t**)lua_newuserdatauv(L, sizeof(bp_pat_t*), 1);
+ bp_pat_t **pat_storage = (bp_pat_t **)lua_newuserdatauv(L, sizeof(bp_pat_t *), 1);
*pat_storage = maybe_pat.value.pat;
lua_pushvalue(L, 1);
lua_setiuservalue(L, -2, 1);
- lua_pushlightuserdata(L, (void*)&PAT_METATABLE);
+ lua_pushlightuserdata(L, (void *)&PAT_METATABLE);
lua_gettable(L, LUA_REGISTRYINDEX);
lua_setmetatable(L, -2);
return 1;
}
-static void push_matchstring(lua_State *L, bp_match_t *m)
-{
+static void push_matchstring(lua_State *L, bp_match_t *m) {
char *buf = NULL;
size_t size = 0;
FILE *out = open_memstream(&buf, &size);
@@ -81,8 +77,7 @@ static void push_matchstring(lua_State *L, bp_match_t *m)
fclose(out);
}
-static bp_match_t *get_first_capture(bp_match_t *m)
-{
+static bp_match_t *get_first_capture(bp_match_t *m) {
if (m->pat->type == BP_TAGGED) {
return m;
} else if (m->pat->type == BP_CAPTURE && !When(m->pat, BP_CAPTURE)->name) {
@@ -96,8 +91,7 @@ static bp_match_t *get_first_capture(bp_match_t *m)
return NULL;
}
-static void set_capture_fields(lua_State *L, bp_match_t *m, int *n, const char *start)
-{
+static void set_capture_fields(lua_State *L, bp_match_t *m, int *n, const char *start) {
if (m->pat->type == BP_CAPTURE) {
bp_match_t *cap = get_first_capture(m->children[0]);
if (!cap) cap = m->children[0];
@@ -119,10 +113,9 @@ static void set_capture_fields(lua_State *L, bp_match_t *m, int *n, const char *
}
}
-static void push_match(lua_State *L, bp_match_t *m, const char *start)
-{
+static void push_match(lua_State *L, bp_match_t *m, const char *start) {
lua_createtable(L, 1, 2);
- lua_pushlightuserdata(L, (void*)&MATCH_METATABLE);
+ lua_pushlightuserdata(L, (void *)&MATCH_METATABLE);
lua_gettable(L, LUA_REGISTRYINDEX);
lua_setmetatable(L, -2);
push_matchstring(L, m);
@@ -144,11 +137,9 @@ static void push_match(lua_State *L, bp_match_t *m, const char *start)
lua_setfield(L, -2, "after");
}
-static int Lmatch(lua_State *L)
-{
+static int Lmatch(lua_State *L) {
if (lua_isstring(L, 1)) {
- if (Lcompile(L) != 1)
- return 0;
+ if (Lcompile(L) != 1) return 0;
lua_replace(L, 1);
}
bp_pat_t **at_pat = lua_touserdata(L, 1);
@@ -162,19 +153,17 @@ static int Lmatch(lua_State *L)
lua_getfield(L, 3, "start");
lua_getfield(L, 3, "after");
index = luaL_optinteger(L, -1, 1);
- if (lua_rawequal(L, -1, -2))
- ++index;
+ if (lua_rawequal(L, -1, -2)) ++index;
} else {
index = luaL_optinteger(L, 3, 1);
}
- if (index > (lua_Integer)strlen(text)+1)
- return 0;
+ if (index > (lua_Integer)strlen(text) + 1) return 0;
bp_match_t *m = NULL;
int ret = 0;
cur_state = L;
bp_errhand_t old = bp_set_error_handler(match_error);
- if (next_match(&m, text+index-1, &text[textlen], pat, builtins, NULL, false)) {
+ if (next_match(&m, text + index - 1, &text[textlen], pat, builtins, NULL, false)) {
push_match(L, m, text);
stop_matching(&m);
ret = 1;
@@ -183,11 +172,9 @@ static int Lmatch(lua_State *L)
return ret;
}
-static int Lreplace(lua_State *L)
-{
+static int Lreplace(lua_State *L) {
if (lua_isstring(L, 1)) {
- if (Lcompile(L) != 1)
- return 0;
+ if (Lcompile(L) != 1) return 0;
lua_replace(L, 1);
}
bp_pat_t **at_pat = lua_touserdata(L, 1);
@@ -198,8 +185,7 @@ static int Lreplace(lua_State *L)
const char *rep_text = luaL_checklstring(L, 2, &replen);
const char *text = luaL_checklstring(L, 3, &textlen);
lua_Integer index = luaL_optinteger(L, 4, 1);
- if (index > (lua_Integer)strlen(text)+1)
- index = (lua_Integer)strlen(text)+1;
+ if (index > (lua_Integer)strlen(text) + 1) index = (lua_Integer)strlen(text) + 1;
maybe_pat_t maybe_replacement = bp_replacement(pat, rep_text, rep_text + replen);
if (!maybe_replacement.success) {
@@ -215,7 +201,7 @@ static int Lreplace(lua_State *L)
bp_pat_t *rep_pat = maybe_replacement.value.pat;
cur_state = L;
bp_errhand_t old = bp_set_error_handler(match_error);
- for (bp_match_t *m = NULL; next_match(&m, text, &text[textlen], rep_pat, builtins, NULL, false); ) {
+ for (bp_match_t *m = NULL; next_match(&m, text, &text[textlen], rep_pat, builtins, NULL, false);) {
fwrite(prev, sizeof(char), (size_t)(m->start - prev), out);
fprint_match(out, text, m, NULL);
prev = m->end;
@@ -233,8 +219,7 @@ static int Lreplace(lua_State *L)
return 2;
}
-static int iter(lua_State *L)
-{
+static int iter(lua_State *L) {
lua_geti(L, 1, 1);
lua_geti(L, 1, 2);
lua_replace(L, 1);
@@ -242,14 +227,12 @@ static int iter(lua_State *L)
return Lmatch(L);
}
-static int Lmatches(lua_State *L)
-{
+static int Lmatches(lua_State *L) {
int nargs = lua_gettop(L);
lua_pushcfunction(L, iter); // iter
lua_createtable(L, 2, 0); // state: {pat, str}
if (lua_isstring(L, 1)) {
- if (Lcompile(L) != 1)
- return 0;
+ if (Lcompile(L) != 1) return 0;
} else {
lua_pushvalue(L, 1);
}
@@ -262,20 +245,17 @@ static int Lmatches(lua_State *L)
return 3;
}
-static int Lmatch_tostring(lua_State *L)
-{
+static int Lmatch_tostring(lua_State *L) {
lua_geti(L, 1, 0);
return 1;
}
-static int Lpat_source(lua_State *L)
-{
+static int Lpat_source(lua_State *L) {
lua_getiuservalue(L, 1, 1);
return 1;
}
-static int Lpat_tostring(lua_State *L)
-{
+static int Lpat_tostring(lua_State *L) {
luaL_Buffer b;
luaL_buffinit(L, &b);
luaL_addstring(&b, "Pattern [[");
@@ -286,8 +266,7 @@ static int Lpat_tostring(lua_State *L)
return 1;
}
-static int Lpat_gc(lua_State *L)
-{
+static int Lpat_gc(lua_State *L) {
(void)L;
bp_pat_t **at_pat = lua_touserdata(L, 1);
bp_pat_t *pat = *at_pat;
@@ -296,8 +275,7 @@ static int Lpat_gc(lua_State *L)
return 0;
}
-static int Lpat_join(lua_State *L, const char *joiner)
-{
+static int Lpat_join(lua_State *L, const char *joiner) {
if (!lua_isstring(L, 1)) {
lua_pushcfunction(L, Lpat_source);
lua_pushvalue(L, 1);
@@ -330,62 +308,39 @@ static int Lpat_join(lua_State *L, const char *joiner)
return 1;
}
-static int Lpat_concat(lua_State *L)
-{
- return Lpat_join(L, " ");
-}
+static int Lpat_concat(lua_State *L) { return Lpat_join(L, " "); }
-static int Lpat_div(lua_State *L)
-{
- return Lpat_join(L, " / ");
-}
+static int Lpat_div(lua_State *L) { return Lpat_join(L, " / "); }
-static const luaL_Reg match_metamethods[] = {
- {"__tostring", Lmatch_tostring},
- {NULL, NULL}
-};
+static const luaL_Reg match_metamethods[] = {{"__tostring", Lmatch_tostring}, {NULL, NULL}};
static const luaL_Reg pat_methods[] = {
- {"match", Lmatch},
- {"replace", Lreplace},
- {"matches", Lmatches},
- {"getsource", Lpat_source},
- {NULL, NULL}
-};
-
-static const luaL_Reg pat_metamethods[] = {
- {"__gc", Lpat_gc},
- {"__concat", Lpat_concat},
- {"__div", Lpat_div},
- {"__tostring", Lpat_tostring},
- {"__index", NULL}, // placeholder for pat_methods
- {NULL, NULL}
-};
+ {"match", Lmatch}, {"replace", Lreplace}, {"matches", Lmatches}, {"getsource", Lpat_source}, {NULL, NULL}};
+
+static const luaL_Reg pat_metamethods[] = {{"__gc", Lpat_gc}, {"__concat", Lpat_concat},
+ {"__div", Lpat_div}, {"__tostring", Lpat_tostring},
+ {"__index", NULL}, // placeholder for pat_methods
+ {NULL, NULL}};
static const luaL_Reg bp_methods[] = {
- {"match", Lmatch},
- {"replace", Lreplace},
- {"compile", Lcompile},
- {"matches", Lmatches},
- {NULL, NULL}
-};
-
-public LUALIB_API int luaopen_bp(lua_State *L)
-{
- maybe_pat_t maybe_pat = bp_pattern(builtins_source, builtins_source+strlen(builtins_source));
+ {"match", Lmatch}, {"replace", Lreplace}, {"compile", Lcompile}, {"matches", Lmatches}, {NULL, NULL}};
+
+public
+LUALIB_API int luaopen_bp(lua_State *L) {
+ maybe_pat_t maybe_pat = bp_pattern(builtins_source, builtins_source + strlen(builtins_source));
if (!maybe_pat.success) {
raise_parse_error(L, maybe_pat);
return 0;
}
builtins = maybe_pat.value.pat;
- lua_pushlightuserdata(L, (void*)&PAT_METATABLE);
+ lua_pushlightuserdata(L, (void *)&PAT_METATABLE);
luaL_newlib(L, pat_metamethods);
luaL_newlib(L, pat_methods);
lua_setfield(L, -2, "__index");
lua_settable(L, LUA_REGISTRYINDEX);
- lua_pushlightuserdata(L, (void*)&MATCH_METATABLE);
+ lua_pushlightuserdata(L, (void *)&MATCH_METATABLE);
luaL_newlib(L, match_metamethods);
lua_settable(L, LUA_REGISTRYINDEX);
diff --git a/bp.c b/bp.c
index 236aab0..d9cab52 100644
--- a/bp.c
+++ b/bp.c
@@ -30,27 +30,27 @@
#define BP_NAME "bp"
#endif
-static const char *description = BP_NAME" - a Parsing Expression Grammar command line tool";
-static const char *usage = (
- "Usage:\n"
- " "BP_NAME" [flags] <pattern> [<files>...]\n\n"
- "Flags:\n"
- " -A --context-after <n> set number of lines of context to print after the match\n"
- " -B --context-before <n> set number of lines of context to print before the match\n"
- " -C --context <context> set number of lines of context to print before and after the match\n"
- " -G --git in a git repository, treat filenames as patterns for `git ls-files`\n"
- " -I --inplace modify a file in-place\n"
- " -c --case use case sensitivity\n"
- " -e --explain explain the matches\n"
- " -f --format fancy|plain|bare|file:line set the output format\n"
- " -g --grammar <grammar-file> use the specified file as a grammar\n"
- " -h --help print the usage and quit\n"
- " -i --ignore-case preform matching case-insensitively\n"
- " -l --list-files list filenames only\n"
- " -r --replace <replacement> replace the input pattern with the given replacement\n"
- " -s --skip <skip-pattern> skip over the given pattern when looking for matches\n"
- " -v --verbose print verbose debugging info\n"
- " -w --word <string-pat> find words matching the given string pattern\n");
+static const char *description = BP_NAME " - a Parsing Expression Grammar command line tool";
+static const char *usage =
+ ("Usage:\n"
+ " " BP_NAME " [flags] <pattern> [<files>...]\n\n"
+ "Flags:\n"
+ " -A --context-after <n> set number of lines of context to print after the match\n"
+ " -B --context-before <n> set number of lines of context to print before the match\n"
+ " -C --context <context> set number of lines of context to print before and after the match\n"
+ " -G --git in a git repository, treat filenames as patterns for `git ls-files`\n"
+ " -I --inplace modify a file in-place\n"
+ " -c --case use case sensitivity\n"
+ " -e --explain explain the matches\n"
+ " -f --format fancy|plain|bare|file:line set the output format\n"
+ " -g --grammar <grammar-file> use the specified file as a grammar\n"
+ " -h --help print the usage and quit\n"
+ " -i --ignore-case preform matching case-insensitively\n"
+ " -l --list-files list filenames only\n"
+ " -r --replace <replacement> replace the input pattern with the given replacement\n"
+ " -s --skip <skip-pattern> skip over the given pattern when looking for matches\n"
+ " -v --verbose print verbose debugging info\n"
+ " -w --word <string-pat> find words matching the given string pattern\n");
// Used as a heuristic to check if a file is binary or text:
#define CHECK_FIRST_N_BYTES 256
@@ -91,8 +91,7 @@ static file_t *backup_file;
//
// Helper function to reduce code duplication
//
-static inline void fprint_filename(FILE *out, const char *filename)
-{
+static inline void fprint_filename(FILE *out, const char *filename) {
if (!filename[0]) return;
if (options.format == FORMAT_FANCY) fprintf(out, "\033[0;1;4;33m%s\033[m\n", filename);
else fprintf(out, "%s:\n", filename);
@@ -101,59 +100,51 @@ static inline void fprint_filename(FILE *out, const char *filename)
//
// If there was a parse error while building a pattern, print an error message and exit.
//
-static inline bp_pat_t *assert_pat(const char *start, const char *end, maybe_pat_t maybe_pat)
-{
+static inline bp_pat_t *assert_pat(const char *start, const char *end, maybe_pat_t maybe_pat) {
if (!end) end = start + strlen(start);
if (!maybe_pat.success) {
- const char *err_start = maybe_pat.value.error.start,
- *err_end = maybe_pat.value.error.end,
- *err_msg = maybe_pat.value.error.msg;
+ const char *err_start = maybe_pat.value.error.start, *err_end = maybe_pat.value.error.end,
+ *err_msg = maybe_pat.value.error.msg;
const char *nl = memrchr(start, '\n', (size_t)(err_start - start));
- const char *sol = nl ? nl+1 : start;
+ const char *sol = nl ? nl + 1 : start;
nl = memchr(err_start, '\n', (size_t)(end - err_start));
const char *eol = nl ? nl : end;
if (eol < err_end) err_end = eol;
fprintf(stderr, "\033[31;1m%s\033[0m\n", err_msg);
- fprintf(stderr, "%.*s\033[41;30m%.*s\033[m%.*s\n",
- (int)(err_start - sol), sol,
- (int)(err_end - err_start), err_start,
- (int)(eol - err_end), err_end);
+ fprintf(stderr, "%.*s\033[41;30m%.*s\033[m%.*s\n", (int)(err_start - sol), sol, (int)(err_end - err_start),
+ err_start, (int)(eol - err_end), err_end);
fprintf(stderr, "\033[34;1m");
const char *p = sol;
- for (; p < err_start; ++p) (void)fputc(*p == '\t' ? '\t' : ' ', stderr);
+ for (; p < err_start; ++p)
+ (void)fputc(*p == '\t' ? '\t' : ' ', stderr);
if (err_start == err_end) ++err_end;
for (; p < err_end; ++p)
if (*p == '\t')
// Some janky hacks: 8 ^'s, backtrack 8 spaces, move forward a tab stop, clear any ^'s that overshot
fprintf(stderr, "^^^^^^^^\033[8D\033[I\033[K");
- else
- (void)fputc('^', stderr);
+ else (void)fputc('^', stderr);
fprintf(stderr, "\033[m\n");
exit(EXIT_FAILURE);
}
return maybe_pat.value.pat;
}
-
//
// Look for a key/value flag at the first position in the given argument list.
// If the flag is found, update `next` to point to the next place to check for a flag.
// The contents of argv[0] may be modified for single-char flags.
// Return the flag's value.
//
-__attribute__((nonnull))
-static char *get_flag(char *argv[], const char *flag, char ***next)
-{
+__attribute__((nonnull)) static char *get_flag(char *argv[], const char *flag, char ***next) {
size_t n = strlen(flag);
if (strncmp(argv[0], flag, n) != 0) return NULL;
if (argv[0][n] == '=') { // --foo=baz, -f=baz
*next = &argv[1];
- return &argv[0][n+1];
+ return &argv[0][n + 1];
} else if (argv[0][n] == '\0') { // --foo baz, -f baz
- if (!argv[1])
- errx(EXIT_FAILURE, "Expected argument after '%s'\n\n%s", flag, usage);
+ if (!argv[1]) errx(EXIT_FAILURE, "Expected argument after '%s'\n\n%s", flag, usage);
*next = &argv[2];
return argv[1];
} else if (flag[0] == '-' && flag[1] != '-' && flag[2] == '\0') { // -f...
@@ -169,16 +160,14 @@ static char *get_flag(char *argv[], const char *flag, char ***next)
// The contents of argv[0] may be modified for single-char flags.
// Return a boolean for whether or not the flag was found.
//
-__attribute__((nonnull))
-static bool get_boolflag(char *argv[], const char *flag, char ***next)
-{
+__attribute__((nonnull)) static bool get_boolflag(char *argv[], const char *flag, char ***next) {
size_t n = strlen(flag);
if (strncmp(argv[0], flag, n) != 0) return false;
if (argv[0][n] == '\0') { // --foo, -f
*next = &argv[1];
return true;
} else if (flag[0] == '-' && flag[1] != '-' && flag[2] == '\0') { // -f...
- memmove(&argv[0][1], &argv[0][2], 1+strlen(&argv[0][2])); // Shift the flags down
+ memmove(&argv[0][1], &argv[0][2], 1 + strlen(&argv[0][2])); // Shift the flags down
*next = argv;
return true;
}
@@ -189,32 +178,27 @@ static bool get_boolflag(char *argv[], const char *flag, char ***next)
// Scan the first few dozen bytes of a file and return 1 if the contents all
// look like printable text characters, otherwise return 0.
//
-static int is_text_file(const char *filename)
-{
+static int is_text_file(const char *filename) {
int fd = open(filename, O_RDONLY);
if (fd < 0) return 0;
char buf[CHECK_FIRST_N_BYTES];
- ssize_t len = read(fd, buf, sizeof(buf)/sizeof(char));
+ ssize_t len = read(fd, buf, sizeof(buf) / sizeof(char));
(void)close(fd);
if (len < 0) return 0;
for (ssize_t i = 0; i < len; i++)
- if (isascii(buf[i]) && !(isprint(buf[i]) || isspace(buf[i])))
- return 0;
+ if (isascii(buf[i]) && !(isprint(buf[i]) || isspace(buf[i]))) return 0;
return 1;
}
//
// Print matches in a visual explanation style
//
-static int explain_matches(file_t *f, bp_pat_t *pattern, bp_pat_t *defs)
-{
+static int explain_matches(file_t *f, bp_pat_t *pattern, bp_pat_t *defs) {
int nmatches = 0;
- for (bp_match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) {
+ for (bp_match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase);) {
if (++nmatches == 1) {
- if (options.print_filenames)
- fprint_filename(stdout, f->filename);
- } else
- printf("\n\n");
+ if (options.print_filenames) fprint_filename(stdout, f->filename);
+ } else printf("\n\n");
explain_match(m);
}
return nmatches;
@@ -224,14 +208,11 @@ static int explain_matches(file_t *f, bp_pat_t *pattern, bp_pat_t *defs)
// Cleanup function to ensure no temp files are left around if the program
// exits unexpectedly.
//
-static void cleanup(void)
-{
+static void cleanup(void) {
if (modifying_file && backup_file) {
rewind(modifying_file);
ftruncate(fileno(modifying_file), 0);
- (void)fwrite(backup_file->start, 1,
- (size_t)(backup_file->end - backup_file->start),
- modifying_file);
+ (void)fwrite(backup_file->start, 1, (size_t)(backup_file->end - backup_file->start), modifying_file);
fclose(modifying_file);
modifying_file = NULL;
}
@@ -241,23 +222,22 @@ static void cleanup(void)
//
// Signal handler to ensure cleanup happens.
//
-static void sig_handler(int sig)
-{
+static void sig_handler(int sig) {
cleanup();
if (kill(0, sig)) _exit(EXIT_FAILURE);
}
-int fprint_linenum(FILE *out, file_t *f, int linenum, const char *normal_color)
-{
+int fprint_linenum(FILE *out, file_t *f, int linenum, const char *normal_color) {
int printed = 0;
switch (options.format) {
- case FORMAT_FANCY: case FORMAT_PLAIN: {
+ case FORMAT_FANCY:
+ case FORMAT_PLAIN: {
int space = 0;
- for (int i = (int)f->nlines; i > 0; i /= 10) ++space;
+ for (int i = (int)f->nlines; i > 0; i /= 10)
+ ++space;
if (options.format == FORMAT_FANCY)
printed += fprintf(out, "\033[0;2m%*d\033(0\x78\033(B%s", space, linenum, normal_color ? normal_color : "");
- else
- printed += fprintf(out, "%*d|", space, linenum);
+ else printed += fprintf(out, "%*d|", space, linenum);
break;
}
case FORMAT_FILE_LINE: {
@@ -271,8 +251,7 @@ int fprint_linenum(FILE *out, file_t *f, int linenum, const char *normal_color)
static file_t *printing_file = NULL;
static int last_line_num = -1;
-static int _fprint_between(FILE *out, const char *start, const char *end, const char *normal_color)
-{
+static int _fprint_between(FILE *out, const char *start, const char *end, const char *normal_color) {
int printed = 0;
do {
// Cheeky lookbehind to see if line number should be printed
@@ -295,8 +274,7 @@ static int _fprint_between(FILE *out, const char *start, const char *end, const
return printed;
}
-static void fprint_context(FILE *out, file_t *f, const char *prev, const char *next)
-{
+static void fprint_context(FILE *out, file_t *f, const char *prev, const char *next) {
if (options.context_before == ALL_CONTEXT || options.context_after == ALL_CONTEXT) {
_fprint_between(out, prev ? prev : f->start, next ? next : f->end, "\033[m");
return;
@@ -304,15 +282,18 @@ static void fprint_context(FILE *out, file_t *f, const char *prev, const char *n
const char *before_next = next;
if (next && options.context_before >= 0) {
size_t line_before_next = get_line_number(printing_file, next);
- line_before_next = options.context_before >= (int)line_before_next ? 1 : line_before_next - (size_t)options.context_before;
+ line_before_next =
+ options.context_before >= (int)line_before_next ? 1 : line_before_next - (size_t)options.context_before;
before_next = get_line(printing_file, line_before_next);
if (prev && before_next < prev) before_next = prev;
}
const char *after_prev = prev;
if (prev && options.context_after >= 0) {
size_t line_after_prev = get_line_number(printing_file, prev) + (size_t)options.context_after + 1;
- after_prev = line_after_prev > printing_file->nlines ?
- printing_file->end : get_line(printing_file, line_after_prev > printing_file->nlines ? printing_file->nlines : line_after_prev);
+ after_prev = line_after_prev > printing_file->nlines
+ ? printing_file->end
+ : get_line(printing_file,
+ line_after_prev > printing_file->nlines ? printing_file->nlines : line_after_prev);
if (next && after_prev > next) after_prev = next;
}
if (next && prev && after_prev >= before_next) {
@@ -323,11 +304,12 @@ static void fprint_context(FILE *out, file_t *f, const char *prev, const char *n
}
}
-static void on_nl(FILE *out)
-{
+static void on_nl(FILE *out) {
switch (options.format) {
- case FORMAT_FANCY: case FORMAT_PLAIN:
- for (int i = (int)printing_file->nlines; i > 0; i /= 10) fputc('.', out);
+ case FORMAT_FANCY:
+ case FORMAT_PLAIN:
+ for (int i = (int)printing_file->nlines; i > 0; i /= 10)
+ fputc('.', out);
fprintf(out, "%s", options.format == FORMAT_FANCY ? "\033[0;2m\033(0\x78\033(B\033[m" : "|");
break;
default: break;
@@ -337,8 +319,7 @@ static void on_nl(FILE *out)
//
// Print all the matches in a file.
//
-static int print_matches(FILE *out, file_t *f, bp_pat_t *pattern, bp_pat_t *defs)
-{
+static int print_matches(FILE *out, file_t *f, bp_pat_t *pattern, bp_pat_t *defs) {
static int printed_filenames = 0;
int matches = 0;
const char *prev = NULL;
@@ -352,7 +333,7 @@ static int print_matches(FILE *out, file_t *f, bp_pat_t *pattern, bp_pat_t *defs
print_opts.replace_color = "\033[0;34;1m";
print_opts.normal_color = "\033[m";
}
- for (bp_match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) {
+ for (bp_match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase);) {
if (++matches == 1 && options.print_filenames) {
if (printed_filenames++ > 0) printf("\n");
fprint_filename(out, f->filename);
@@ -381,9 +362,7 @@ static int print_matches(FILE *out, file_t *f, bp_pat_t *pattern, bp_pat_t *defs
// For a given filename, open the file and attempt to match the given pattern
// against it, printing any results according to the flags.
//
-__attribute__((nonnull))
-static int process_file(const char *filename, bp_pat_t *pattern, bp_pat_t *defs)
-{
+__attribute__((nonnull)) static int process_file(const char *filename, bp_pat_t *pattern, bp_pat_t *defs) {
file_t *f = load_file(NULL, filename);
if (f == NULL) {
fprintf(stderr, "Could not open file: %s\n%s\n", filename, strerror(errno));
@@ -416,15 +395,17 @@ static int process_file(const char *filename, bp_pat_t *pattern, bp_pat_t *defs)
// Set these temporary values in case the program crashes while in the
// middle of inplace modifying a file. If that happens, these variables
// are used to restore the original file contents.
- modifying_file = out; backup_file = f;
+ modifying_file = out;
+ backup_file = f;
{
matches += print_matches(out, f, pattern, defs);
}
- modifying_file = NULL; backup_file = NULL;
+ modifying_file = NULL;
+ backup_file = NULL;
fclose(out);
if (matches > 0)
- printf(getenv("NO_COLOR") ? "%s: %d replacement%s\n" : "\x1b[33;1m%s:\x1b[m %d replacement%s\n",
- filename, matches, matches == 1 ? "" : "s");
+ printf(getenv("NO_COLOR") ? "%s: %d replacement%s\n" : "\x1b[33;1m%s:\x1b[m %d replacement%s\n", filename,
+ matches, matches == 1 ? "" : "s");
} else {
matches += print_matches(stdout, f, pattern, defs);
}
@@ -440,12 +421,10 @@ static int process_file(const char *filename, bp_pat_t *pattern, bp_pat_t *defs)
//
// Recursively process all non-dotfile files in the given directory.
//
-__attribute__((nonnull))
-static int process_dir(const char *dirname, bp_pat_t *pattern, bp_pat_t *defs)
-{
+__attribute__((nonnull)) static int process_dir(const char *dirname, bp_pat_t *pattern, bp_pat_t *defs) {
int matches = 0;
glob_t globbuf;
- char globpath[PATH_MAX+1] = {'\0'};
+ char globpath[PATH_MAX + 1] = {'\0'};
if (snprintf(globpath, PATH_MAX, "%s/*", dirname) > (int)PATH_MAX)
errx(EXIT_FAILURE, "Filename is too long: %s/*", dirname);
int status = glob(globpath, 0, NULL, &globbuf);
@@ -455,12 +434,9 @@ static int process_dir(const char *dirname, bp_pat_t *pattern, bp_pat_t *defs)
struct stat statbuf;
for (size_t i = 0; i < globbuf.gl_pathc; i++) {
if (lstat(globbuf.gl_pathv[i], &statbuf) != 0) continue;
- if (S_ISLNK(statbuf.st_mode))
- continue; // Skip symbolic links
- else if (S_ISDIR(statbuf.st_mode))
- matches += process_dir(globbuf.gl_pathv[i], pattern, defs);
- else if (is_text_file(globbuf.gl_pathv[i]))
- matches += process_file(globbuf.gl_pathv[i], pattern, defs);
+ if (S_ISLNK(statbuf.st_mode)) continue; // Skip symbolic links
+ else if (S_ISDIR(statbuf.st_mode)) matches += process_dir(globbuf.gl_pathv[i], pattern, defs);
+ else if (is_text_file(globbuf.gl_pathv[i])) matches += process_file(globbuf.gl_pathv[i], pattern, defs);
}
}
globfree(&globbuf);
@@ -470,22 +446,21 @@ static int process_dir(const char *dirname, bp_pat_t *pattern, bp_pat_t *defs)
//
// Process git files using `git ls-files ...`
//
-__attribute__((nonnull(1)))
-static int process_git_files(bp_pat_t *pattern, bp_pat_t *defs, int argc, char *argv[])
-{
+__attribute__((nonnull(1))) static int process_git_files(bp_pat_t *pattern, bp_pat_t *defs, int argc, char *argv[]) {
int fds[2];
require(pipe(fds), "Failed to create pipe");
pid_t child = require(fork(), "Failed to fork");
if (child == 0) {
- const char **git_args = new(char*[3+argc+1]);
+ const char **git_args = new (char * [3 + argc + 1]);
int g = 0;
git_args[g++] = "git";
git_args[g++] = "ls-files";
git_args[g++] = "-z";
- while (*argv) git_args[g++] = *(argv++);
+ while (*argv)
+ git_args[g++] = *(argv++);
require(dup2(fds[STDOUT_FILENO], STDOUT_FILENO), "Failed to hook up pipe to stdout");
require(close(fds[STDIN_FILENO]), "Failed to close read end of pipe");
- (void)execvp("git", (char**)git_args);
+ (void)execvp("git", (char **)git_args);
_exit(EXIT_FAILURE);
}
require(close(fds[STDOUT_FILENO]), "Failed to close write end of pipe");
@@ -495,12 +470,12 @@ static int process_git_files(bp_pat_t *pattern, bp_pat_t *defs, int argc, char *
int found = 0;
while (getdelim(&path, &path_size, '\0', fp) > 0)
found += process_file(path, pattern, defs);
- if (path) delete(&path);
+ if (path) delete (&path);
require(fclose(fp), "Failed to close read end of pipe");
int status;
- while (waitpid(child, &status, 0) != child) continue;
- if (!((WIFEXITED(status) == 1) && (WEXITSTATUS(status) == 0)))
- errx(EXIT_FAILURE, "`git ls-files -z` failed.");
+ while (waitpid(child, &status, 0) != child)
+ continue;
+ if (!((WIFEXITED(status) == 1) && (WEXITSTATUS(status) == 0))) errx(EXIT_FAILURE, "`git ls-files -z` failed.");
return found;
}
@@ -508,16 +483,14 @@ static int process_git_files(bp_pat_t *pattern, bp_pat_t *defs, int argc, char *
// Load the given grammar (semicolon-separated definitions)
// and return the first rule defined.
//
-static bp_pat_t *load_grammar(bp_pat_t *defs, file_t *f)
-{
+static bp_pat_t *load_grammar(bp_pat_t *defs, file_t *f) {
return chain_together(defs, assert_pat(f->start, f->end, bp_pattern(f->start, f->end)));
}
//
// Convert a context string to an integer
//
-static int context_from_flag(const char *flag)
-{
+static int context_from_flag(const char *flag) {
if (streq(flag, "all")) return ALL_CONTEXT;
if (streq(flag, "none")) return NO_CONTEXT;
return (int)strtol(flag, NULL, 10);
@@ -526,11 +499,9 @@ static int context_from_flag(const char *flag)
//
// Check if any letters are uppercase
//
-static bool any_uppercase(const char *str)
-{
+static bool any_uppercase(const char *str) {
for (; *str; ++str) {
- if (isupper(*str))
- return true;
+ if (isupper(*str)) return true;
}
return false;
}
@@ -538,8 +509,7 @@ static bool any_uppercase(const char *str)
#define FLAG(f) (flag = get_flag(argv, f, &argv))
#define BOOLFLAG(f) get_boolflag(argv, f, &argv)
-int main(int argc, char *argv[])
-{
+int main(int argc, char *argv[]) {
char *flag = NULL;
bp_pat_t *defs = NULL;
@@ -547,9 +517,9 @@ int main(int argc, char *argv[])
bp_pat_t *pattern = NULL;
// Load builtins:
- file_t *builtins_file = load_file(&loaded_files, "/etc/"BP_NAME"/builtins.bp");
+ file_t *builtins_file = load_file(&loaded_files, "/etc/" BP_NAME "/builtins.bp");
if (builtins_file) defs = load_grammar(defs, builtins_file);
- file_t *local_file = load_filef(&loaded_files, "%s/.config/"BP_NAME"/builtins.bp", getenv("HOME"));
+ file_t *local_file = load_filef(&loaded_files, "%s/.config/" BP_NAME "/builtins.bp", getenv("HOME"));
if (local_file) defs = load_grammar(defs, local_file);
bool explicit_case_sensitivity = false;
@@ -580,58 +550,50 @@ int main(int argc, char *argv[])
explicit_case_sensitivity = true;
} else if (BOOLFLAG("-l") || BOOLFLAG("--list-files")) {
options.mode = MODE_LISTFILES;
- } else if (FLAG("-r") || FLAG("--replace")) {
- if (!pattern)
- errx(EXIT_FAILURE, "No pattern has been defined for replacement to operate on");
+ } else if (FLAG("-r") || FLAG("--replace")) {
+ if (!pattern) errx(EXIT_FAILURE, "No pattern has been defined for replacement to operate on");
// TODO: spoof file as sprintf("pattern => '%s'", flag)
// except that would require handling edge cases like quotation marks etc.
- pattern = assert_pat(flag, NULL, bp_replacement(pattern, flag, flag+strlen(flag)));
+ pattern = assert_pat(flag, NULL, bp_replacement(pattern, flag, flag + strlen(flag)));
if (options.context_before == USE_DEFAULT_CONTEXT) options.context_before = ALL_CONTEXT;
if (options.context_after == USE_DEFAULT_CONTEXT) options.context_after = ALL_CONTEXT;
- } else if (FLAG("-g") || FLAG("--grammar")) {
+ } else if (FLAG("-g") || FLAG("--grammar")) {
file_t *f = NULL;
- if (strlen(flag) > 3 && strncmp(&flag[strlen(flag)-3], ".bp", 3) == 0)
- f = load_file(&loaded_files, flag);
- if (f == NULL)
- f = load_filef(&loaded_files, "%s/.config/"BP_NAME"/%s.bp", getenv("HOME"), flag);
- if (f == NULL)
- f = load_filef(&loaded_files, "/etc/"BP_NAME"/%s.bp", flag);
- if (f == NULL)
- errx(EXIT_FAILURE, "Couldn't find grammar: %s", flag);
+ if (strlen(flag) > 3 && strncmp(&flag[strlen(flag) - 3], ".bp", 3) == 0) f = load_file(&loaded_files, flag);
+ if (f == NULL) f = load_filef(&loaded_files, "%s/.config/" BP_NAME "/%s.bp", getenv("HOME"), flag);
+ if (f == NULL) f = load_filef(&loaded_files, "/etc/" BP_NAME "/%s.bp", flag);
+ if (f == NULL) errx(EXIT_FAILURE, "Couldn't find grammar: %s", flag);
defs = load_grammar(defs, f); // Keep in memory for debug output
- } else if (FLAG("-w") || FLAG("--word")) {
+ } else if (FLAG("-w") || FLAG("--word")) {
require(asprintf(&flag, "{|}%s{|}", flag), "Could not allocate memory");
file_t *arg_file = spoof_file(&loaded_files, "<word pattern>", flag, -1);
- if (!explicit_case_sensitivity)
- options.ignorecase = !any_uppercase(flag);
- delete(&flag);
+ if (!explicit_case_sensitivity) options.ignorecase = !any_uppercase(flag);
+ delete (&flag);
bp_pat_t *p = assert_pat(arg_file->start, arg_file->end, bp_stringpattern(arg_file->start, arg_file->end));
pattern = chain_together(pattern, p);
- } else if (FLAG("-s") || FLAG("--skip")) {
- bp_pat_t *s = assert_pat(flag, NULL, bp_pattern(flag, flag+strlen(flag)));
+ } else if (FLAG("-s") || FLAG("--skip")) {
+ bp_pat_t *s = assert_pat(flag, NULL, bp_pattern(flag, flag + strlen(flag)));
options.skip = either_pat(options.skip, s);
- } else if (FLAG("-C") || FLAG("--context")) {
+ } else if (FLAG("-C") || FLAG("--context")) {
options.context_before = options.context_after = context_from_flag(flag);
- } else if (FLAG("-B") || FLAG("--before-context")) {
+ } else if (FLAG("-B") || FLAG("--before-context")) {
options.context_before = context_from_flag(flag);
- } else if (FLAG("-A") || FLAG("--after-context")) {
+ } else if (FLAG("-A") || FLAG("--after-context")) {
options.context_after = context_from_flag(flag);
- } else if (FLAG("-f") || FLAG("--format")) {
+ } else if (FLAG("-f") || FLAG("--format")) {
if (streq(flag, "fancy")) options.format = FORMAT_FANCY;
else if (streq(flag, "plain")) options.format = FORMAT_PLAIN;
else if (streq(flag, "bare")) options.format = FORMAT_BARE;
else if (streq(flag, "file:line")) {
options.format = FORMAT_FILE_LINE;
options.print_filenames = 0;
- } else if (!streq(flag, "auto"))
- errx(EXIT_FAILURE, "Unknown --format option: %s", flag);
+ } else if (!streq(flag, "auto")) errx(EXIT_FAILURE, "Unknown --format option: %s", flag);
} else if (argv[0][0] != '-' || strncmp(argv[0], "->", 2) == 0) {
// As a special case, support `bp '->foo'` as a way to search for
// pointer field accesses without needing to escape anything.
if (pattern != NULL) break;
- bp_pat_t *p = assert_pat(argv[0], NULL, bp_stringpattern(argv[0], argv[0]+strlen(argv[0])));
- if (!explicit_case_sensitivity)
- options.ignorecase = !any_uppercase(argv[0]);
+ bp_pat_t *p = assert_pat(argv[0], NULL, bp_stringpattern(argv[0], argv[0] + strlen(argv[0])));
+ if (!explicit_case_sensitivity) options.ignorecase = !any_uppercase(argv[0]);
pattern = chain_together(pattern, p);
++argv;
} else {
@@ -639,10 +601,10 @@ int main(int argc, char *argv[])
}
}
- if (pattern == NULL)
- errx(EXIT_FAILURE, "No pattern provided.\n\n%s", usage);
+ if (pattern == NULL) errx(EXIT_FAILURE, "No pattern provided.\n\n%s", usage);
- for (argc = 0; argv[argc]; ++argc) ; // update argc
+ for (argc = 0; argv[argc]; ++argc)
+ ; // update argc
if (options.context_before == USE_DEFAULT_CONTEXT) options.context_before = 0;
if (options.context_after == USE_DEFAULT_CONTEXT) options.context_after = 0;
@@ -654,7 +616,7 @@ int main(int argc, char *argv[])
// be sure to clean it up before exiting.
int signals[] = {SIGTERM, SIGINT, SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, SIGSEGV, SIGTSTP};
struct sigaction sa = {.sa_handler = &sig_handler, .sa_flags = (int)(SA_NODEFER | SA_RESETHAND)};
- for (size_t i = 0; i < sizeof(signals)/sizeof(signals[0]); i++)
+ for (size_t i = 0; i < sizeof(signals) / sizeof(signals[0]); i++)
require(sigaction(signals[i], &sa, NULL), "Failed to set signal handler");
// Handle exit() calls gracefully:
@@ -668,8 +630,7 @@ int main(int argc, char *argv[])
// Default to git mode if there's a .git directory and no files were specified:
struct stat gitdir;
- if (argc == 0 && stat(".git", &gitdir) == 0 && S_ISDIR(gitdir.st_mode))
- options.git_mode = true;
+ if (argc == 0 && stat(".git", &gitdir) == 0 && S_ISDIR(gitdir.st_mode)) options.git_mode = true;
int found = 0;
if (!isatty(STDIN_FILENO) && !argv[0]) {
@@ -682,13 +643,14 @@ int main(int argc, char *argv[])
} else if (argv[0]) {
// Files passed in as command line args:
struct stat statbuf;
- if (!argv[1] && !(stat(argv[0], &statbuf) == 0 && S_ISDIR(statbuf.st_mode))) // Don't print filename for single-file matching
+ if (!argv[1]
+ && !(stat(argv[0], &statbuf) == 0
+ && S_ISDIR(statbuf.st_mode))) // Don't print filename for single-file matching
options.print_filenames = false;
- for ( ; argv[0]; argv++) {
+ for (; argv[0]; argv++) {
if (stat(argv[0], &statbuf) == 0 && S_ISDIR(statbuf.st_mode)) // Symlinks are okay if manually specified
found += process_dir(argv[0], pattern, defs);
- else
- found += process_file(argv[0], pattern, defs);
+ else found += process_file(argv[0], pattern, defs);
}
} else {
// No files, no piped in input, so use files in current dir, recursively
diff --git a/files.c b/files.c
index f7910a5..b328056 100644
--- a/files.c
+++ b/files.c
@@ -11,29 +11,24 @@
#include <sys/stat.h>
#include "files.h"
-#include "match.h"
-#include "pattern.h"
#include "utils.h"
//
// In the file object, populate the `lines` array with pointers to the
// beginning of each line.
//
-__attribute__((nonnull))
-static void populate_lines(file_t *f)
-{
+__attribute__((nonnull)) static void populate_lines(file_t *f) {
// Calculate line numbers:
size_t linecap = 10;
- f->lines = new(const char*[linecap]);
+ f->lines = new (const char *[linecap]);
f->nlines = 0;
char *p = f->start;
for (size_t n = 0; p && p <= f->end; ++n) {
++f->nlines;
- if (n >= linecap)
- f->lines = grow(f->lines, linecap *= 2);
+ if (n >= linecap) f->lines = grow(f->lines, linecap *= 2);
f->lines[n] = p;
char *nl = memchr(p, '\n', (size_t)(f->end - p));
- if (nl && nl < f->end) p = nl+1;
+ if (nl && nl < f->end) p = nl + 1;
else break;
}
}
@@ -42,13 +37,12 @@ static void populate_lines(file_t *f)
// Read an entire file into memory, using a printf-style formatting string to
// construct the filename.
//
-public file_t *load_filef(file_t **files, const char *fmt, ...)
-{
- char filename[PATH_MAX+1] = {'\0'};
+public
+file_t *load_filef(file_t **files, const char *fmt, ...) {
+ char filename[PATH_MAX + 1] = {'\0'};
va_list args;
va_start(args, fmt);
- if (vsnprintf(filename, PATH_MAX, fmt, args) > (int)PATH_MAX)
- errx(EXIT_FAILURE, "File name is too large");
+ if (vsnprintf(filename, PATH_MAX, fmt, args) > (int)PATH_MAX) errx(EXIT_FAILURE, "File name is too large");
va_end(args);
return load_file(files, filename);
}
@@ -56,8 +50,8 @@ public file_t *load_filef(file_t **files, const char *fmt, ...)
//
// Read an entire file into memory.
//
-public file_t *load_file(file_t **files, const char *filename)
-{
+public
+file_t *load_file(file_t **files, const char *filename) {
int fd = filename[0] == '\0' ? STDIN_FILENO : open(filename, O_RDONLY);
if (fd < 0) {
// Check for <file>:<line>
@@ -68,9 +62,9 @@ public file_t *load_file(file_t **files, const char *filename)
*colon = '\0';
file_t *f = load_file(files, tmp);
if (!f) return f;
- long line = strtol(colon+1, &colon, 10);
- f->start = (char*)get_line(f, (size_t)line);
- f->end = (char*)get_line(f, (size_t)line+1);
+ long line = strtol(colon + 1, &colon, 10);
+ f->start = (char *)get_line(f, (size_t)line);
+ f->end = (char *)get_line(f, (size_t)line + 1);
return f;
}
return NULL;
@@ -78,13 +72,12 @@ public file_t *load_file(file_t **files, const char *filename)
filename = checked_strdup(filename);
for (const char *slashes = strstr(filename, "//"); slashes; slashes = strstr(slashes, "//"))
- memmove((char*)slashes, slashes+1, strlen(slashes+1)+1);
- file_t *f = new(file_t);
+ memmove((char *)slashes, slashes + 1, strlen(slashes + 1) + 1);
+ file_t *f = new (file_t);
f->filename = filename;
struct stat sb;
- if (fstat(fd, &sb) == -1)
- goto read_file;
+ if (fstat(fd, &sb) == -1) goto read_file;
f->mmapped = mmap(NULL, (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (f->mmapped == MAP_FAILED) {
@@ -95,24 +88,21 @@ public file_t *load_file(file_t **files, const char *filename)
f->end = &f->mmapped[sb.st_size];
goto finished_loading;
- read_file:
- {
- size_t capacity = 1000, length = 0;
- f->allocated = new(char[capacity]);
- ssize_t just_read;
- while ((just_read=read(fd, &f->allocated[length], (capacity-1) - length)) > 0) {
- length += (size_t)just_read;
- if (length >= capacity-1)
- f->allocated = grow(f->allocated, capacity *= 2);
- }
- f->allocated[length] = '\0';
- f->start = f->allocated;
- f->end = &f->allocated[length];
+read_file: {
+ size_t capacity = 1000, length = 0;
+ f->allocated = new (char[capacity]);
+ ssize_t just_read;
+ while ((just_read = read(fd, &f->allocated[length], (capacity - 1) - length)) > 0) {
+ length += (size_t)just_read;
+ if (length >= capacity - 1) f->allocated = grow(f->allocated, capacity *= 2);
}
+ f->allocated[length] = '\0';
+ f->start = f->allocated;
+ f->end = &f->allocated[length];
+}
- finished_loading:
- if (fd != STDIN_FILENO)
- require(close(fd), "Failed to close file");
+finished_loading:
+ if (fd != STDIN_FILENO) require(close(fd), "Failed to close file");
populate_lines(f);
if (files != NULL) {
@@ -125,26 +115,26 @@ public file_t *load_file(file_t **files, const char *filename)
//
// Set a file struct to represent a region of a different file.
//
-public void slice_file(file_t *slice, file_t *src, const char *start, const char *end)
-{
+public
+void slice_file(file_t *slice, file_t *src, const char *start, const char *end) {
memset(slice, 0, sizeof(file_t));
slice->filename = src->filename;
slice->lines = src->lines;
slice->nlines = src->nlines;
- slice->start = (char*)start;
- slice->end = (char*)end;
+ slice->start = (char *)start;
+ slice->end = (char *)end;
}
//
// Create a virtual file from a string.
//
-public file_t *spoof_file(file_t **files, const char *filename, const char *text, ssize_t _len)
-{
+public
+file_t *spoof_file(file_t **files, const char *filename, const char *text, ssize_t _len) {
if (filename == NULL) filename = "";
- file_t *f = new(file_t);
+ file_t *f = new (file_t);
size_t len = _len == -1 ? strlen(text) : (size_t)_len;
f->filename = checked_strdup(filename);
- f->allocated = new(char[len+1]);
+ f->allocated = new (char[len + 1]);
memcpy(f->allocated, text, len);
f->start = &f->allocated[0];
f->end = &f->allocated[len];
@@ -160,43 +150,36 @@ public file_t *spoof_file(file_t **files, const char *filename, const char *text
// Free a file and all memory contained inside its members, then set the input
// pointer to NULL.
//
-public void destroy_file(file_t **at_f)
-{
- file_t *f = (file_t*)*at_f;
- if (f->filename)
- delete(&f->filename);
+public
+void destroy_file(file_t **at_f) {
+ file_t *f = (file_t *)*at_f;
+ if (f->filename) delete (&f->filename);
- if (f->lines)
- delete(&f->lines);
+ if (f->lines) delete (&f->lines);
- if (f->allocated)
- delete(&f->allocated);
+ if (f->allocated) delete (&f->allocated);
if (f->mmapped) {
- require(munmap(f->mmapped, (size_t)(f->end - f->mmapped)),
- "Failure to un-memory-map some memory");
+ require(munmap(f->mmapped, (size_t)(f->end - f->mmapped)), "Failure to un-memory-map some memory");
f->mmapped = NULL;
}
- delete(at_f);
+ delete (at_f);
}
//
// Given a pointer, determine which line number it points to.
//
-public size_t get_line_number(file_t *f, const char *p)
-{
+public
+size_t get_line_number(file_t *f, const char *p) {
if (f->nlines == 0) return 0;
// Binary search:
- size_t lo = 0, hi = f->nlines-1;
+ size_t lo = 0, hi = f->nlines - 1;
while (lo <= hi) {
size_t mid = (lo + hi) / 2;
- if (f->lines[mid] == p)
- return mid + 1;
- else if (f->lines[mid] < p)
- lo = mid + 1;
- else if (f->lines[mid] > p)
- hi = mid - 1;
+ if (f->lines[mid] == p) return mid + 1;
+ else if (f->lines[mid] < p) lo = mid + 1;
+ else if (f->lines[mid] > p) hi = mid - 1;
}
return lo; // Return the line number whose line starts closest before p
}
@@ -204,8 +187,8 @@ public size_t get_line_number(file_t *f, const char *p)
//
// Given a pointer, determine which line column it points to.
//
-public size_t get_line_column(file_t *f, const char *p)
-{
+public
+size_t get_line_column(file_t *f, const char *p) {
size_t line_no = get_line_number(f, p);
return 1 + (size_t)(p - f->lines[line_no]);
}
@@ -213,8 +196,8 @@ public size_t get_line_column(file_t *f, const char *p)
//
// Return a pointer to the line with the specified line number.
//
-public const char *get_line(file_t *f, size_t line_number)
-{
+public
+const char *get_line(file_t *f, size_t line_number) {
if (line_number == 0 || line_number > f->nlines) return NULL;
return f->lines[line_number - 1];
}
diff --git a/files.h b/files.h
index 2b2b64c..e224dc5 100644
--- a/files.h
+++ b/files.h
@@ -14,21 +14,14 @@ typedef struct file_s {
size_t nlines;
} file_t;
-__attribute__((nonnull(2)))
-file_t *load_file(file_t **files, const char *filename);
-__attribute__((format(printf,2,3)))
-file_t *load_filef(file_t **files, const char *fmt, ...);
-__attribute__((nonnull))
-void slice_file(file_t *slice, file_t *src, const char *start, const char *end);
-__attribute__((nonnull(3), returns_nonnull))
-file_t *spoof_file(file_t **files, const char *filename, const char *text, ssize_t len);
-__attribute__((nonnull))
-void destroy_file(file_t **f);
-__attribute__((pure, nonnull))
-size_t get_line_number(file_t *f, const char *p);
-__attribute__((pure, nonnull))
-size_t get_line_column(file_t *f, const char *p);
-__attribute__((pure, nonnull))
-const char *get_line(file_t *f, size_t line_number);
+__attribute__((nonnull(2))) file_t *load_file(file_t **files, const char *filename);
+__attribute__((format(printf, 2, 3))) file_t *load_filef(file_t **files, const char *fmt, ...);
+__attribute__((nonnull)) void slice_file(file_t *slice, file_t *src, const char *start, const char *end);
+__attribute__((nonnull(3), returns_nonnull)) file_t *spoof_file(file_t **files, const char *filename, const char *text,
+ ssize_t len);
+__attribute__((nonnull)) void destroy_file(file_t **f);
+__attribute__((pure, nonnull)) size_t get_line_number(file_t *f, const char *p);
+__attribute__((pure, nonnull)) size_t get_line_column(file_t *f, const char *p);
+__attribute__((pure, nonnull)) const char *get_line(file_t *f, size_t line_number);
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/match.c b/match.c
index e714a45..8175c0c 100644
--- a/match.c
+++ b/match.c
@@ -13,10 +13,10 @@
#include "match.h"
#include "pattern.h"
-#include "utils.h"
#include "utf8.h"
+#include "utils.h"
-#define MAX_CACHE_SIZE (1<<14)
+#define MAX_CACHE_SIZE (1 << 14)
// Cache entries for results of matching a pattern at a string position
typedef struct cache_entry_s {
@@ -51,31 +51,27 @@ typedef struct match_ctx_s {
static bp_match_t *unused_matches = NULL;
static bp_match_t *in_use_matches = NULL;
-static void default_error_handler(char **msg) {
- errx(EXIT_FAILURE, "%s", *msg);
-}
+static void default_error_handler(char **msg) { errx(EXIT_FAILURE, "%s", *msg); }
static bp_errhand_t error_handler = default_error_handler;
-public bp_errhand_t bp_set_error_handler(bp_errhand_t new_handler)
-{
+public
+bp_errhand_t bp_set_error_handler(bp_errhand_t new_handler) {
bp_errhand_t old_handler = error_handler;
error_handler = new_handler;
return old_handler;
}
-#define MATCHES(...) (bp_match_t*[]){__VA_ARGS__, NULL}
+#define MATCHES(...) \
+ (bp_match_t *[]) { __VA_ARGS__, NULL }
-__attribute__((hot, nonnull(1,2,3)))
-static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat);
-__attribute__((returns_nonnull))
-static bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end, bp_match_t *children[]);
+__attribute__((hot, nonnull(1, 2, 3))) static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat);
+__attribute__((returns_nonnull)) static bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end,
+ bp_match_t *children[]);
char *error_message = NULL;
-__attribute__((format(printf,2,3)))
-static inline void match_error(match_ctx_t *ctx, const char *fmt, ...)
-{
+__attribute__((format(printf, 2, 3))) static inline void match_error(match_ctx_t *ctx, const char *fmt, ...) {
va_list args;
va_start(args, fmt);
if (error_message) free(error_message);
@@ -84,8 +80,7 @@ static inline void match_error(match_ctx_t *ctx, const char *fmt, ...)
longjmp(ctx->error_jump, 1);
}
-static bp_match_t *clone_match(bp_match_t *m)
-{
+static bp_match_t *clone_match(bp_match_t *m) {
if (!m) return NULL;
bp_match_t *ret = new_match(m->pat, m->start, m->end, NULL);
if (m->children) {
@@ -95,9 +90,10 @@ static bp_match_t *clone_match(bp_match_t *m)
ret->children = ret->_children;
}
for (int i = 0; m->children[i]; i++) {
- if (nchildren+1 >= child_cap) {
+ if (nchildren + 1 >= child_cap) {
ret->children = grow(ret->children, child_cap += 5);
- for (size_t j = nchildren; j < child_cap; j++) ret->children[j] = NULL;
+ for (size_t j = nchildren; j < child_cap; j++)
+ ret->children[j] = NULL;
}
ret->children[nchildren++] = clone_match(m->children[i]);
}
@@ -106,10 +102,8 @@ static bp_match_t *clone_match(bp_match_t *m)
}
// Prepend to a doubly linked list
-static inline void gc_list_prepend(bp_match_t **head, bp_match_t *m)
-{
- if (m->gc.home)
- errx(1, "Node already has a home");
+static inline void gc_list_prepend(bp_match_t **head, bp_match_t *m) {
+ if (m->gc.home) errx(1, "Node already has a home");
m->gc.home = head;
m->gc.next = *head;
if (*head) (*head)->gc.home = &m->gc.next;
@@ -117,10 +111,8 @@ static inline void gc_list_prepend(bp_match_t **head, bp_match_t *m)
}
// Remove from a doubly linked list
-static inline void gc_list_remove(bp_match_t *m)
-{
- if (!m->gc.home)
- errx(1, "Attempt to remove something that isn't in a list");
+static inline void gc_list_remove(bp_match_t *m) {
+ if (!m->gc.home) errx(1, "Attempt to remove something that isn't in a list");
*m->gc.home = m->gc.next;
if (m->gc.next) m->gc.next->gc.home = m->gc.home;
m->gc.home = NULL;
@@ -130,20 +122,16 @@ static inline void gc_list_remove(bp_match_t *m)
//
// Hash a string position/pattern.
//
-static inline size_t hash(const char *str, size_t pat_id)
-{
- return (size_t)str + 2*pat_id;
-}
+static inline size_t hash(const char *str, size_t pat_id) { return (size_t)str + 2 * pat_id; }
//
// Check if we have cached a failure to match a given pattern at the given position.
//
-static bool has_cached_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
-{
+static bool has_cached_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat) {
if (!ctx->cache->fails) return false;
- for (cache_entry_t *fail = &ctx->cache->fails[hash(str, pat->id) & (ctx->cache->size-1)]; fail; fail = fail->next_probe) {
- if (fail->pat == pat && fail->start == str)
- return true;
+ for (cache_entry_t *fail = &ctx->cache->fails[hash(str, pat->id) & (ctx->cache->size - 1)]; fail;
+ fail = fail->next_probe) {
+ if (fail->pat == pat && fail->start == str) return true;
}
return false;
}
@@ -151,9 +139,8 @@ static bool has_cached_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
//
// Insert into the hash table using a chained scatter table approach.
//
-static void _hash_insert(cache_t *cache, const char *str, bp_pat_t *pat)
-{
- size_t h = hash(str, pat->id) & (cache->size-1);
+static void _hash_insert(cache_t *cache, const char *str, bp_pat_t *pat) {
+ size_t h = hash(str, pat->id) & (cache->size - 1);
if (cache->fails[h].pat == NULL) { // No collision
cache->fails[h].pat = pat;
cache->fails[h].start = str;
@@ -162,14 +149,14 @@ static void _hash_insert(cache_t *cache, const char *str, bp_pat_t *pat)
return;
}
- if (cache->fails[h].pat == pat && cache->fails[h].start == str)
- return; // Duplicate entry, just leave it be
+ if (cache->fails[h].pat == pat && cache->fails[h].start == str) return; // Duplicate entry, just leave it be
// Shuffle the colliding entry along to a free space:
- while (cache->fails[cache->next_free].pat) ++cache->next_free;
+ while (cache->fails[cache->next_free].pat)
+ ++cache->next_free;
cache_entry_t *free_slot = &cache->fails[cache->next_free];
*free_slot = cache->fails[h];
- size_t h_orig = hash(free_slot->start, free_slot->pat->id) & (cache->size-1);
+ size_t h_orig = hash(free_slot->start, free_slot->pat->id) & (cache->size - 1);
// Put the new entry in its desired slot
cache->fails[h].pat = pat;
@@ -179,7 +166,8 @@ static void _hash_insert(cache_t *cache, const char *str, bp_pat_t *pat)
if (h_orig != h) { // Maintain the chain that points to the colliding entry
cache_entry_t *prev = &cache->fails[h_orig]; // Start of the chain
- while (prev->next_probe != &cache->fails[h]) prev = prev->next_probe;
+ while (prev->next_probe != &cache->fails[h])
+ prev = prev->next_probe;
prev->next_probe = free_slot;
}
}
@@ -187,23 +175,21 @@ static void _hash_insert(cache_t *cache, const char *str, bp_pat_t *pat)
//
// Save a match in the cache.
//
-static void cache_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
-{
+static void cache_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat) {
cache_t *cache = ctx->cache;
// Grow the hash if needed (>99% utilization):
- if (cache->occupancy+1 > (cache->size*99)/100) {
+ if (cache->occupancy + 1 > (cache->size * 99) / 100) {
cache_entry_t *old_fails = cache->fails;
size_t old_size = cache->size;
- cache->size = old_size == 0 ? 16 : 2*old_size;
- cache->fails = new(cache_entry_t[cache->size]);
+ cache->size = old_size == 0 ? 16 : 2 * old_size;
+ cache->fails = new (cache_entry_t[cache->size]);
cache->next_free = 0;
// Rehash:
for (size_t i = 0; i < old_size; i++) {
- if (old_fails[i].pat)
- _hash_insert(cache, old_fails[i].start, old_fails[i].pat);
+ if (old_fails[i].pat) _hash_insert(cache, old_fails[i].start, old_fails[i].pat);
}
- if (old_fails) delete(&old_fails);
+ if (old_fails) delete (&old_fails);
}
_hash_insert(cache, str, pat);
@@ -212,18 +198,16 @@ static void cache_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
//
// Clear and deallocate the cache.
//
-void cache_destroy(match_ctx_t *ctx)
-{
+void cache_destroy(match_ctx_t *ctx) {
cache_t *cache = ctx->cache;
- if (cache->fails) delete(&cache->fails);
+ if (cache->fails) delete (&cache->fails);
memset(cache, 0, sizeof(cache_t));
}
//
// Look up a pattern definition by name from a definition pattern.
//
-static bp_pat_t *_lookup_def(match_ctx_t *ctx, bp_pat_t *defs, const char *name, size_t namelen)
-{
+static bp_pat_t *_lookup_def(match_ctx_t *ctx, bp_pat_t *defs, const char *name, size_t namelen) {
while (defs != NULL) {
if (defs->type == BP_CHAIN) {
auto chain = When(defs, BP_CHAIN);
@@ -232,8 +216,7 @@ static bp_pat_t *_lookup_def(match_ctx_t *ctx, bp_pat_t *defs, const char *name,
defs = chain->first;
} else if (defs->type == BP_DEFINITIONS) {
auto def = When(defs, BP_DEFINITIONS);
- if (namelen == def->namelen && strncmp(def->name, name, namelen) == 0)
- return def->meaning;
+ if (namelen == def->namelen && strncmp(def->name, name, namelen) == 0) return def->meaning;
defs = def->next_def;
} else {
match_error(ctx, "Invalid pattern type in definitions");
@@ -246,9 +229,7 @@ static bp_pat_t *_lookup_def(match_ctx_t *ctx, bp_pat_t *defs, const char *name,
//
// Look up a pattern definition by name from a context.
//
-__attribute__((nonnull(2)))
-bp_pat_t *lookup_ctx(match_ctx_t *ctx, const char *name, size_t namelen)
-{
+__attribute__((nonnull(2))) bp_pat_t *lookup_ctx(match_ctx_t *ctx, const char *name, size_t namelen) {
for (; ctx; ctx = ctx->parent_ctx) {
bp_pat_t *def = _lookup_def(ctx, ctx->defs, name, namelen);
if (def) return def;
@@ -260,9 +241,7 @@ bp_pat_t *lookup_ctx(match_ctx_t *ctx, const char *name, size_t namelen)
// If the given pattern is a reference, look it up and return the referenced
// pattern. This is used for an optimization to avoid repeated lookups.
//
-__attribute__((nonnull(1)))
-static inline bp_pat_t *deref(match_ctx_t *ctx, bp_pat_t *pat)
-{
+__attribute__((nonnull(1))) static inline bp_pat_t *deref(match_ctx_t *ctx, bp_pat_t *pat) {
if (pat && pat->type == BP_REF) {
auto ref = When(pat, BP_REF);
bp_pat_t *def = lookup_ctx(ctx, ref->name, ref->len);
@@ -276,33 +255,26 @@ static inline bp_pat_t *deref(match_ctx_t *ctx, bp_pat_t *pat)
// match for the whole pattern to match (if any). Ideally, this would be a
// string literal that can be quickly scanned for.
//
-static bp_pat_t *get_prerequisite(match_ctx_t *ctx, bp_pat_t *pat)
-{
+static bp_pat_t *get_prerequisite(match_ctx_t *ctx, bp_pat_t *pat) {
int derefs = 0;
- for (bp_pat_t *p = pat; p; ) {
+ for (bp_pat_t *p = pat; p;) {
switch (p->type) {
- case BP_BEFORE:
- p = When(p, BP_BEFORE)->pat; break;
+ case BP_BEFORE: p = When(p, BP_BEFORE)->pat; break;
case BP_REPEAT:
- if (When(p, BP_REPEAT)->min == 0)
- return p;
- p = When(p, BP_REPEAT)->repeat_pat; break;
- case BP_CAPTURE:
- p = When(p, BP_CAPTURE)->pat; break;
- case BP_TAGGED:
- p = When(p, BP_TAGGED)->pat; break;
+ if (When(p, BP_REPEAT)->min == 0) return p;
+ p = When(p, BP_REPEAT)->repeat_pat;
+ break;
+ case BP_CAPTURE: p = When(p, BP_CAPTURE)->pat; break;
+ case BP_TAGGED: p = When(p, BP_TAGGED)->pat; break;
case BP_CHAIN: {
auto chain = When(p, BP_CHAIN);
// If pattern is something like (|"foo"|), then use "foo" as the first thing to scan for
p = chain->first->max_matchlen == 0 ? chain->second : chain->first;
break;
}
- case BP_MATCH:
- p = When(p, BP_MATCH)->pat; break;
- case BP_NOT_MATCH:
- p = When(p, BP_NOT_MATCH)->pat; break;
- case BP_REPLACE:
- p = When(p, BP_REPLACE)->pat; break;
+ case BP_MATCH: p = When(p, BP_MATCH)->pat; break;
+ case BP_NOT_MATCH: p = When(p, BP_NOT_MATCH)->pat; break;
+ case BP_REPLACE: p = When(p, BP_REPLACE)->pat; break;
case BP_REF: {
if (++derefs > 10) return p; // In case of left recursion
bp_pat_t *p2 = deref(ctx, p);
@@ -319,31 +291,28 @@ static bp_pat_t *get_prerequisite(match_ctx_t *ctx, bp_pat_t *pat)
//
// Find the next match after prev (or the first match if prev is NULL)
//
-__attribute__((nonnull(1,2,3)))
-static bp_match_t *_next_match(match_ctx_t *ctx, const char *str, bp_pat_t *pat, bp_pat_t *skip)
-{
+__attribute__((nonnull(1, 2, 3))) static bp_match_t *_next_match(match_ctx_t *ctx, const char *str, bp_pat_t *pat,
+ bp_pat_t *skip) {
// Clear the cache so it's not full of old cache values from different parts of the file:
cache_destroy(ctx);
bp_pat_t *first = get_prerequisite(ctx, pat);
// Don't bother looping if this can only match at the start/end:
- if (first->type == BP_START_OF_FILE)
- return match(ctx, str, pat);
- else if (first->type == BP_END_OF_FILE)
- return match(ctx, ctx->end, pat);
+ if (first->type == BP_START_OF_FILE) return match(ctx, str, pat);
+ else if (first->type == BP_END_OF_FILE) return match(ctx, ctx->end, pat);
// Performance optimization: if the pattern starts with a string literal,
// we can just rely on the highly optimized memmem() implementation to skip
// past areas where we know we won't find a match.
if (!skip && first->type == BP_STRING && first->min_matchlen > 0) {
- char *found = ctx->ignorecase ?
- strcasestr(str, When(first, BP_STRING)->string)
- : memmem(str, (size_t)(ctx->end - str), When(first, BP_STRING)->string, first->min_matchlen);
+ char *found = ctx->ignorecase
+ ? strcasestr(str, When(first, BP_STRING)->string)
+ : memmem(str, (size_t)(ctx->end - str), When(first, BP_STRING)->string, first->min_matchlen);
str = found ? found : ctx->end;
} else if (!skip && str > ctx->start && (first->type == BP_START_OF_LINE || first->type == BP_END_OF_LINE)) {
char *found = memchr(str, '\n', (size_t)(ctx->end - str));
- str = found ? (first->type == BP_START_OF_LINE ? found+1 : found) : ctx->end;
+ str = found ? (first->type == BP_START_OF_LINE ? found + 1 : found) : ctx->end;
}
do {
@@ -363,8 +332,7 @@ static bp_match_t *_next_match(match_ctx_t *ctx, const char *str, bp_pat_t *pat,
// match object, or NULL if no match is found.
// The returned value should be free()'d to avoid memory leaking.
//
-static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
-{
+static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) {
switch (pat->type) {
case BP_DEFINITIONS: {
match_ctx_t ctx2 = *ctx;
@@ -393,10 +361,12 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
return (str < ctx->end && *str != '\n') ? new_match(pat, str, next_char(str, ctx->end), NULL) : NULL;
}
case BP_ID_START: {
- return (str < ctx->end && isidstart(str, ctx->end)) ? new_match(pat, str, next_char(str, ctx->end), NULL) : NULL;
+ return (str < ctx->end && isidstart(str, ctx->end)) ? new_match(pat, str, next_char(str, ctx->end), NULL)
+ : NULL;
}
case BP_ID_CONTINUE: {
- return (str < ctx->end && isidcontinue(str, ctx->end)) ? new_match(pat, str, next_char(str, ctx->end), NULL) : NULL;
+ return (str < ctx->end && isidcontinue(str, ctx->end)) ? new_match(pat, str, next_char(str, ctx->end), NULL)
+ : NULL;
}
case BP_START_OF_FILE: {
return (str == ctx->start) ? new_match(pat, str, str, NULL) : NULL;
@@ -405,27 +375,28 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
return (str == ctx->start || str[-1] == '\n') ? new_match(pat, str, str, NULL) : NULL;
}
case BP_END_OF_FILE: {
- return (str == ctx->end || (str == ctx->end-1 && *str == '\n')) ? new_match(pat, str, str, NULL) : NULL;
+ return (str == ctx->end || (str == ctx->end - 1 && *str == '\n')) ? new_match(pat, str, str, NULL) : NULL;
}
case BP_END_OF_LINE: {
return (str == ctx->end || *str == '\n') ? new_match(pat, str, str, NULL) : NULL;
}
case BP_WORD_BOUNDARY: {
- return (str == ctx->start || isidcontinue(str, ctx->end) != isidcontinue(prev_char(ctx->start, str), ctx->end)) ?
- new_match(pat, str, str, NULL) : NULL;
+ return (str == ctx->start || isidcontinue(str, ctx->end) != isidcontinue(prev_char(ctx->start, str), ctx->end))
+ ? new_match(pat, str, str, NULL)
+ : NULL;
}
case BP_STRING: {
if (&str[pat->min_matchlen] > ctx->end) return NULL;
- if (pat->min_matchlen > 0 && (ctx->ignorecase ? strncasecmp : strncmp)(str, When(pat, BP_STRING)->string, pat->min_matchlen) != 0)
+ if (pat->min_matchlen > 0
+ && (ctx->ignorecase ? strncasecmp : strncmp)(str, When(pat, BP_STRING)->string, pat->min_matchlen) != 0)
return NULL;
return new_match(pat, str, str + pat->min_matchlen, NULL);
}
case BP_RANGE: {
if (str >= ctx->end) return NULL;
auto range = When(pat, BP_RANGE);
- if ((unsigned char)*str < range->low || (unsigned char)*str > range->high)
- return NULL;
- return new_match(pat, str, str+1, NULL);
+ if ((unsigned char)*str < range->low || (unsigned char)*str > range->high) return NULL;
+ return new_match(pat, str, str + 1, NULL);
}
case BP_NOT: {
bp_match_t *m = match(ctx, str, When(pat, BP_NOT)->pat);
@@ -435,18 +406,21 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
}
return new_match(pat, str, str, NULL);
}
- case BP_UPTO: case BP_UPTO_STRICT: {
+ case BP_UPTO:
+ case BP_UPTO_STRICT: {
bp_match_t *m = new_match(pat, str, str, NULL);
- bp_pat_t *target = deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->target : When(pat, BP_UPTO_STRICT)->target),
- *skip = deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->skip : When(pat, BP_UPTO_STRICT)->skip);
+ bp_pat_t *target =
+ deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->target : When(pat, BP_UPTO_STRICT)->target),
+ *skip = deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->skip : When(pat, BP_UPTO_STRICT)->skip);
if (!target && !skip) {
- while (str < ctx->end && *str != '\n') ++str;
+ while (str < ctx->end && *str != '\n')
+ ++str;
m->end = str;
return m;
}
size_t child_cap = 0, nchildren = 0;
- for (const char *prev = NULL; prev < str; ) {
+ for (const char *prev = NULL; prev < str;) {
prev = str;
if (target) {
bp_match_t *p = match(ctx, str, target);
@@ -463,9 +437,10 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
bp_match_t *s = match(ctx, str, skip);
if (s != NULL) {
str = s->end;
- if (nchildren+2 >= child_cap) {
+ if (nchildren + 2 >= child_cap) {
m->children = grow(m->children, child_cap += 5);
- for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
+ for (size_t i = nchildren; i < child_cap; i++)
+ m->children[i] = NULL;
}
m->children[nchildren++] = s;
continue;
@@ -474,8 +449,7 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
// This isn't in the for() structure because there needs to
// be at least once chance to match the pattern, even if
// we're at the end of the string already (e.g. "..$").
- if (str < ctx->end && *str != '\n' && pat->type != BP_UPTO_STRICT)
- str = next_char(str, ctx->end);
+ if (str < ctx->end && *str != '\n' && pat->type != BP_UPTO_STRICT) str = next_char(str, ctx->end);
}
recycle_match(&m);
return NULL;
@@ -511,23 +485,23 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
// of looping infinitely.
if (msep) recycle_match(&msep);
recycle_match(&mp);
- if (repeat->max == -1)
- reps = ~(size_t)0;
- else
- reps = (size_t)repeat->max;
+ if (repeat->max == -1) reps = ~(size_t)0;
+ else reps = (size_t)repeat->max;
break;
}
if (msep) {
- if (nchildren+2 >= child_cap) {
+ if (nchildren + 2 >= child_cap) {
m->children = grow(m->children, child_cap += 5);
- for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
+ for (size_t i = nchildren; i < child_cap; i++)
+ m->children[i] = NULL;
}
m->children[nchildren++] = msep;
}
- if (nchildren+2 >= child_cap) {
+ if (nchildren + 2 >= child_cap) {
m->children = grow(m->children, child_cap += 5);
- for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
+ for (size_t i = nchildren; i < child_cap; i++)
+ m->children[i] = NULL;
}
m->children[nchildren++] = mp;
str = mp->end;
@@ -556,11 +530,10 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
pos >= ctx->start && (back->max_matchlen == -1 || pos >= &str[-(int)back->max_matchlen]);
pos = prev_char(ctx->start, pos)) {
cache_destroy(&slice_ctx);
- slice_ctx.start = (char*)pos;
+ slice_ctx.start = (char *)pos;
bp_match_t *m = match(&slice_ctx, pos, back);
// Match should not go past str (i.e. (<"AB" "B") should match "ABB", but not "AB")
- if (m && m->end != str)
- recycle_match(&m);
+ if (m && m->end != str) recycle_match(&m);
else if (m) {
cache_destroy(&slice_ctx);
return new_match(pat, str, str, MATCHES(m));
@@ -577,10 +550,10 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
bp_match_t *after = match(ctx, str, When(pat, BP_BEFORE)->pat);
return after ? new_match(pat, str, str, MATCHES(after)) : NULL;
}
- case BP_CAPTURE: case BP_TAGGED: {
+ case BP_CAPTURE:
+ case BP_TAGGED: {
bp_pat_t *to_match = pat->type == BP_CAPTURE ? When(pat, BP_CAPTURE)->pat : When(pat, BP_TAGGED)->pat;
- if (!to_match)
- return new_match(pat, str, str, NULL);
+ if (!to_match) return new_match(pat, str, str, NULL);
bp_match_t *p = match(ctx, str, to_match);
return p ? new_match(pat, str, p->end, MATCHES(p)) : NULL;
}
@@ -611,7 +584,8 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
bp_pat_t *backref;
if (m1->children && m1->children[0]->pat->type == BP_CURDENT) {
const char *linestart = m1->start;
- while (linestart > ctx->start && linestart[-1] != '\n') --linestart;
+ while (linestart > ctx->start && linestart[-1] != '\n')
+ --linestart;
// Current indentation:
char denter = *linestart;
@@ -629,12 +603,14 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
ctx2.parent_ctx = ctx;
ctx2.defs = &(bp_pat_t){
.type = BP_DEFINITIONS,
- .start = m1->pat->start, .end = m1->pat->end,
- .__tagged.BP_DEFINITIONS = {
- .name = When(m1->pat, BP_CAPTURE)->name,
- .namelen = When(m1->pat, BP_CAPTURE)->namelen,
- .meaning = backref,
- },
+ .start = m1->pat->start,
+ .end = m1->pat->end,
+ .__tagged.BP_DEFINITIONS =
+ {
+ .name = When(m1->pat, BP_CAPTURE)->name,
+ .namelen = When(m1->pat, BP_CAPTURE)->namelen,
+ .meaning = backref,
+ },
};
m2 = match(&ctx2, m1->end, chain->second);
if (!m2) // No need to keep the backref in memory if it didn't match
@@ -651,7 +627,8 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
return new_match(pat, str, m2->end, MATCHES(m1, m2));
}
- case BP_MATCH: case BP_NOT_MATCH: {
+ case BP_MATCH:
+ case BP_NOT_MATCH: {
bp_pat_t *target = pat->type == BP_MATCH ? When(pat, BP_MATCH)->pat : When(pat, BP_NOT_MATCH)->pat;
bp_match_t *m1 = match(ctx, str, target);
if (m1 == NULL) return NULL;
@@ -687,8 +664,7 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
return new_match(pat, str, p ? p->end : str, MATCHES(p));
}
case BP_REF: {
- if (has_cached_failure(ctx, str, pat))
- return NULL;
+ if (has_cached_failure(ctx, str, pat)) return NULL;
auto ref_pat = When(pat, BP_REF);
bp_pat_t *ref = lookup_ctx(ctx, ref_pat->name, ref_pat->len);
@@ -697,27 +673,31 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
return NULL;
}
- if (ref->type == BP_LEFTRECURSION)
- return match(ctx, str, ref);
+ if (ref->type == BP_LEFTRECURSION) return match(ctx, str, ref);
bp_pat_t rec_op = {
.type = BP_LEFTRECURSION,
- .start = ref->start, .end = ref->end,
- .min_matchlen = 0, .max_matchlen = -1,
- .__tagged.BP_LEFTRECURSION = {
- .match = NULL,
- .visited = false,
- .at = str,
- .fallback = pat,
- .ctx = (void*)ctx,
- },
+ .start = ref->start,
+ .end = ref->end,
+ .min_matchlen = 0,
+ .max_matchlen = -1,
+ .__tagged.BP_LEFTRECURSION =
+ {
+ .match = NULL,
+ .visited = false,
+ .at = str,
+ .fallback = pat,
+ .ctx = (void *)ctx,
+ },
};
match_ctx_t ctx2 = *ctx;
ctx2.parent_ctx = ctx;
ctx2.defs = &(bp_pat_t){
.type = BP_DEFINITIONS,
- .start = pat->start, .end = pat->end,
- .__tagged.BP_DEFINITIONS = {
+ .start = pat->start,
+ .end = pat->end,
+ .__tagged.BP_DEFINITIONS =
+ {
.name = ref_pat->name,
.namelen = ref_pat->len,
.meaning = &rec_op,
@@ -758,17 +738,20 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
const char *start = str;
const char *p = str;
- while (p > ctx->start && p[-1] != '\n') --p;
+ while (p > ctx->start && p[-1] != '\n')
+ --p;
// Current indentation:
char denter = *p;
int dents = 0;
if (denter == ' ' || denter == '\t') {
- for (; *p == denter && p < ctx->end; ++p) ++dents;
+ for (; *p == denter && p < ctx->end; ++p)
+ ++dents;
}
// Subsequent indentation:
- while (*str == '\n' || *str == '\n') ++str;
+ while (*str == '\n' || *str == '\n')
+ ++str;
for (int i = 0; i < dents; i++)
if (&str[i] >= ctx->end || str[i] != denter) return NULL;
@@ -787,15 +770,14 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
//
// Return a match object which can be used (may be allocated or recycled).
//
-bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end, bp_match_t *children[])
-{
+bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end, bp_match_t *children[]) {
bp_match_t *m;
if (unused_matches) {
m = unused_matches;
gc_list_remove(m);
memset(m, 0, sizeof(bp_match_t));
} else {
- m = new(bp_match_t);
+ m = new (bp_match_t);
}
// Keep track of the object:
gc_list_prepend(&in_use_matches, m);
@@ -816,14 +798,13 @@ bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end, bp_matc
// If the given match is not currently a child member of another match (or
// otherwise reserved) then put it back in the pool of unused match objects.
//
-public void recycle_match(bp_match_t **at_m)
-{
+public
+void recycle_match(bp_match_t **at_m) {
bp_match_t *m = *at_m;
if (m->children) {
for (int i = 0; m->children[i]; i++)
recycle_match(&m->children[i]);
- if (m->children != m->_children)
- delete(&m->children);
+ if (m->children != m->_children) delete (&m->children);
}
gc_list_remove(m);
@@ -835,13 +816,12 @@ public void recycle_match(bp_match_t **at_m)
//
// Force all match objects into the pool of unused match objects.
//
-public size_t recycle_all_matches(void)
-{
+public
+size_t recycle_all_matches(void) {
size_t count = 0;
for (bp_match_t *m; (m = in_use_matches); ++count) {
gc_list_remove(m);
- if (m->children && m->children != m->_children)
- delete(&m->children);
+ if (m->children && m->children != m->_children) delete (&m->children);
gc_list_prepend(&unused_matches, m);
}
return count;
@@ -850,13 +830,13 @@ public size_t recycle_all_matches(void)
//
// Free all match objects in memory.
//
-public size_t free_all_matches(void)
-{
+public
+size_t free_all_matches(void) {
size_t count = 0;
recycle_all_matches();
for (bp_match_t *m; (m = unused_matches); ++count) {
gc_list_remove(m);
- delete(&m);
+ delete (&m);
}
return count;
}
@@ -865,12 +845,13 @@ public size_t free_all_matches(void)
// Iterate over matches.
// Usage: for (bp_match_t *m = NULL; next_match(&m, ...); ) {...}
//
-public bool next_match(bp_match_t **m, const char *start, const char *end, bp_pat_t *pat, bp_pat_t *defs, bp_pat_t *skip, bool ignorecase)
-{
+public
+bool next_match(bp_match_t **m, const char *start, const char *end, bp_pat_t *pat, bp_pat_t *defs, bp_pat_t *skip,
+ bool ignorecase) {
const char *pos;
if (*m) {
// Make sure forward progress is occurring, even after zero-width matches:
- pos = ((*m)->end > (*m)->start) ? (*m)->end : (*m)->end+1;
+ pos = ((*m)->end > (*m)->start) ? (*m)->end : (*m)->end + 1;
recycle_match(m);
} else {
pos = start;
@@ -895,8 +876,7 @@ public bool next_match(bp_match_t **m, const char *start, const char *end, bp_pa
recycle_all_matches();
cache_destroy(&ctx);
*m = NULL;
- if (error_handler)
- error_handler(&error_message);
+ if (error_handler) error_handler(&error_message);
if (error_message) {
free(error_message);
@@ -909,9 +889,7 @@ public bool next_match(bp_match_t **m, const char *start, const char *end, bp_pa
//
// Helper function to track state while doing a depth-first search.
//
-__attribute__((nonnull))
-static bp_match_t *_get_numbered_capture(bp_match_t *m, int *n)
-{
+__attribute__((nonnull)) static bp_match_t *_get_numbered_capture(bp_match_t *m, int *n) {
if ((m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->namelen == 0) || m->pat->type == BP_TAGGED) {
if (*n == 1) {
return m;
@@ -921,8 +899,7 @@ static bp_match_t *_get_numbered_capture(bp_match_t *m, int *n)
}
}
- if (m->pat->type == BP_CAPTURE || m->pat->type == BP_TAGGED)
- return NULL;
+ if (m->pat->type == BP_CAPTURE || m->pat->type == BP_TAGGED) return NULL;
if (m->children) {
for (int i = 0; m->children[i]; i++) {
@@ -936,8 +913,8 @@ static bp_match_t *_get_numbered_capture(bp_match_t *m, int *n)
//
// Get a specific numbered pattern capture.
//
-public bp_match_t *get_numbered_capture(bp_match_t *m, int n)
-{
+public
+bp_match_t *get_numbered_capture(bp_match_t *m, int n) {
if (n <= 0) return m;
if (m->pat->type == BP_TAGGED || m->pat->type == BP_CAPTURE) {
if (n == 1 && m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->namelen == 0) return m;
@@ -956,15 +933,12 @@ public bp_match_t *get_numbered_capture(bp_match_t *m, int n)
//
// Helper function for get_named_capture()
//
-bp_match_t *_get_named_capture(bp_match_t *m, const char *name, size_t namelen)
-{
- if (m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->name
- && When(m->pat, BP_CAPTURE)->namelen == namelen
+bp_match_t *_get_named_capture(bp_match_t *m, const char *name, size_t namelen) {
+ if (m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->name && When(m->pat, BP_CAPTURE)->namelen == namelen
&& strncmp(When(m->pat, BP_CAPTURE)->name, name, When(m->pat, BP_CAPTURE)->namelen) == 0)
return m;
- if (m->pat->type == BP_TAGGED || m->pat->type == BP_CAPTURE)
- return NULL;
+ if (m->pat->type == BP_TAGGED || m->pat->type == BP_CAPTURE) return NULL;
if (m->children) {
for (int i = 0; m->children[i]; i++) {
@@ -978,10 +952,10 @@ bp_match_t *_get_named_capture(bp_match_t *m, const char *name, size_t namelen)
//
// Get a capture with a specific name.
//
-public bp_match_t *get_named_capture(bp_match_t *m, const char *name, ssize_t _namelen)
-{
+public
+bp_match_t *get_named_capture(bp_match_t *m, const char *name, ssize_t _namelen) {
size_t namelen = _namelen < 0 ? strlen(name) : (size_t)_namelen;
- if (m->pat->type == BP_TAGGED) {// || (m->pat->type == BP_CAPTURE && m->pat->args.capture.namelen > 0)) {
+ if (m->pat->type == BP_TAGGED) { // || (m->pat->type == BP_CAPTURE && m->pat->args.capture.namelen > 0)) {
if (m->children) {
for (int i = 0; m->children[i]; i++) {
bp_match_t *cap = _get_named_capture(m->children[i], name, namelen);
diff --git a/match.h b/match.h
index 98de0c0..29cafa6 100644
--- a/match.h
+++ b/match.h
@@ -27,16 +27,14 @@ struct bp_match_s {
typedef void (*bp_errhand_t)(char **err_msg);
-__attribute__((nonnull))
-void recycle_match(bp_match_t **at_m);
+__attribute__((nonnull)) void recycle_match(bp_match_t **at_m);
size_t free_all_matches(void);
size_t recycle_all_matches(void);
-bool next_match(bp_match_t **m, const char *start, const char *end, bp_pat_t *pat, bp_pat_t *defs, bp_pat_t *skip, bool ignorecase);
+bool next_match(bp_match_t **m, const char *start, const char *end, bp_pat_t *pat, bp_pat_t *defs, bp_pat_t *skip,
+ bool ignorecase);
#define stop_matching(m) next_match(m, NULL, NULL, NULL, NULL, NULL, 0)
bp_errhand_t bp_set_error_handler(bp_errhand_t handler);
-__attribute__((nonnull))
-bp_match_t *get_numbered_capture(bp_match_t *m, int n);
-__attribute__((nonnull, pure))
-bp_match_t *get_named_capture(bp_match_t *m, const char *name, ssize_t namelen);
+__attribute__((nonnull)) bp_match_t *get_numbered_capture(bp_match_t *m, int n);
+__attribute__((nonnull, pure)) bp_match_t *get_named_capture(bp_match_t *m, const char *name, ssize_t namelen);
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/pattern.c b/pattern.c
index 5778012..5c75d9b 100644
--- a/pattern.c
+++ b/pattern.c
@@ -5,25 +5,28 @@
#include <err.h>
#include <setjmp.h>
#include <stdbool.h>
-#include <stdlib.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "pattern.h"
-#include "utils.h"
#include "utf8.h"
+#include "utils.h"
-#define Pattern(_tag, _start, _end, _min, _max, ...) allocate_pat((bp_pat_t){.type=_tag, .start=_start, .end=_end, \
- .min_matchlen=_min, .max_matchlen=_max, .__tagged._tag={__VA_ARGS__}})
+#define Pattern(_tag, _start, _end, _min, _max, ...) \
+ allocate_pat((bp_pat_t){.type = _tag, \
+ .start = _start, \
+ .end = _end, \
+ .min_matchlen = _min, \
+ .max_matchlen = _max, \
+ .__tagged._tag = {__VA_ARGS__}})
#define UNBOUNDED(pat) ((pat)->max_matchlen == -1)
static bp_pat_t *allocated_pats = NULL;
-__attribute__((nonnull))
-static bp_pat_t *bp_pattern_nl(const char *str, const char *end, bool allow_nl);
-__attribute__((nonnull))
-static bp_pat_t *bp_simplepattern(const char *str, const char *end);
+__attribute__((nonnull)) static bp_pat_t *bp_pattern_nl(const char *str, const char *end, bool allow_nl);
+__attribute__((nonnull)) static bp_pat_t *bp_simplepattern(const char *str, const char *end);
// For error-handling purposes, use setjmp/longjmp to break out of deeply
// recursive function calls when a parse error occurs.
@@ -31,14 +34,18 @@ bool is_in_try_catch = false;
static jmp_buf err_jmp;
static maybe_pat_t parse_error = {.success = false};
-#define __TRY_PATTERN__ bool was_in_try_catch = is_in_try_catch; \
- if (!is_in_try_catch) { is_in_try_catch = true; if (setjmp(err_jmp)) return parse_error; }
-#define __END_TRY_PATTERN__ if (!was_in_try_catch) is_in_try_catch = false;
+#define __TRY_PATTERN__ \
+ bool was_in_try_catch = is_in_try_catch; \
+ if (!is_in_try_catch) { \
+ is_in_try_catch = true; \
+ if (setjmp(err_jmp)) return parse_error; \
+ }
+#define __END_TRY_PATTERN__ \
+ if (!was_in_try_catch) is_in_try_catch = false;
-static inline void parse_err(const char *start, const char *end, const char *msg)
-{
+static inline void parse_err(const char *start, const char *end, const char *msg) {
if (!is_in_try_catch) {
- fprintf(stderr, "Parse error: %s\n%.*s\n", msg, (int)(end-start), start);
+ fprintf(stderr, "Parse error: %s\n%.*s\n", msg, (int)(end - start), start);
exit(1);
}
parse_error.value.error.start = start;
@@ -51,10 +58,10 @@ static inline void parse_err(const char *start, const char *end, const char *msg
// Allocate a new pattern for this file (ensuring it will be automatically
// freed when the file is freed)
//
-public bp_pat_t *allocate_pat(bp_pat_t pat)
-{
+public
+bp_pat_t *allocate_pat(bp_pat_t pat) {
static size_t next_pat_id = 1;
- bp_pat_t *allocated = new(bp_pat_t);
+ bp_pat_t *allocated = new (bp_pat_t);
*allocated = pat;
allocated->home = &allocated_pats;
allocated->next = allocated_pats;
@@ -67,23 +74,21 @@ public bp_pat_t *allocate_pat(bp_pat_t pat)
//
// Helper function to initialize a range object.
//
-__attribute__((nonnull(1,2,5)))
-static bp_pat_t *new_range(const char *start, const char *end, size_t min, ssize_t max, bp_pat_t *repeating, bp_pat_t *sep)
-{
- size_t minlen = min*repeating->min_matchlen + (min > 0 ? min-1 : 0)*(sep ? sep->min_matchlen : 0);
- ssize_t maxlen = (max == -1 || UNBOUNDED(repeating) || (max != 0 && max != 1 && sep && UNBOUNDED(sep))) ? (ssize_t)-1
- : max*repeating->max_matchlen + (ssize_t)(max > 0 ? min-1 : 0)*(ssize_t)(sep ? sep->min_matchlen : 0);
- return Pattern(BP_REPEAT, start, end, minlen, maxlen,
- .min=min, .max=max, .repeat_pat=repeating, .sep=sep);
+__attribute__((nonnull(1, 2, 5))) static bp_pat_t *new_range(const char *start, const char *end, size_t min,
+ ssize_t max, bp_pat_t *repeating, bp_pat_t *sep) {
+ size_t minlen = min * repeating->min_matchlen + (min > 0 ? min - 1 : 0) * (sep ? sep->min_matchlen : 0);
+ ssize_t maxlen =
+ (max == -1 || UNBOUNDED(repeating) || (max != 0 && max != 1 && sep && UNBOUNDED(sep)))
+ ? (ssize_t)-1
+ : max * repeating->max_matchlen + (ssize_t)(max > 0 ? min - 1 : 0) * (ssize_t)(sep ? sep->min_matchlen : 0);
+ return Pattern(BP_REPEAT, start, end, minlen, maxlen, .min = min, .max = max, .repeat_pat = repeating, .sep = sep);
}
//
// Take a pattern and expand it into a chain of patterns if it's followed by
// any patterns (e.g. "`x `y"), otherwise return the original input.
//
-__attribute__((nonnull))
-static bp_pat_t *expand_chain(bp_pat_t *first, const char *end, bool allow_nl)
-{
+__attribute__((nonnull)) static bp_pat_t *expand_chain(bp_pat_t *first, const char *end, bool allow_nl) {
const char *str = after_spaces(first->end, allow_nl, end);
bp_pat_t *second = bp_simplepattern(str, end);
if (second == NULL) return first;
@@ -94,9 +99,7 @@ static bp_pat_t *expand_chain(bp_pat_t *first, const char *end, bool allow_nl)
//
// Match trailing => replacements (with optional pattern beforehand)
//
-__attribute__((nonnull))
-static bp_pat_t *expand_replacements(bp_pat_t *replace_pat, const char *end, bool allow_nl)
-{
+__attribute__((nonnull)) static bp_pat_t *expand_replacements(bp_pat_t *replace_pat, const char *end, bool allow_nl) {
const char *str = replace_pat->end;
while (matchstr(&str, "=>", allow_nl, end)) {
const char *repstr;
@@ -108,21 +111,19 @@ static bp_pat_t *expand_replacements(bp_pat_t *replace_pat, const char *end, boo
for (; str < end && *str != closequote; str = next_char(str, end)) {
if (*str == '\\') {
if (!str[1] || str[1] == '\n')
- parse_err(str, str+1,
- "There should be an escape sequence after this backslash.");
+ parse_err(str, str + 1, "There should be an escape sequence after this backslash.");
str = next_char(str, end);
}
}
- replen = (size_t)(str-repstr);
+ replen = (size_t)(str - repstr);
(void)matchchar(&str, closequote, true, end);
} else {
repstr = "";
replen = 0;
}
- replace_pat = Pattern(BP_REPLACE, replace_pat->start, str,
- replace_pat->min_matchlen, replace_pat->max_matchlen,
- .pat=replace_pat, .text=repstr, .len=replen);
+ replace_pat = Pattern(BP_REPLACE, replace_pat->start, str, replace_pat->min_matchlen, replace_pat->max_matchlen,
+ .pat = replace_pat, .text = repstr, .len = replen);
}
return replace_pat;
}
@@ -132,9 +133,7 @@ static bp_pat_t *expand_replacements(bp_pat_t *replace_pat, const char *end, boo
// chain of choices if it's followed by any "/"-separated patterns (e.g.
// "`x/`y"), otherwise return the original input.
//
-__attribute__((nonnull))
-static bp_pat_t *expand_choices(bp_pat_t *first, const char *end, bool allow_nl)
-{
+__attribute__((nonnull)) static bp_pat_t *expand_choices(bp_pat_t *first, const char *end, bool allow_nl) {
first = expand_chain(first, end, allow_nl);
first = expand_replacements(first, end, allow_nl);
const char *str = first->end;
@@ -143,9 +142,8 @@ static bp_pat_t *expand_choices(bp_pat_t *first, const char *end, bool allow_nl)
bp_pat_t *second = bp_simplepattern(str, end);
if (second) str = second->end;
if (matchstr(&str, "=>", allow_nl, end))
- second = expand_replacements(second ? second : Pattern(BP_STRING, str-2, str-2, 0, 0), end, allow_nl);
- if (!second)
- parse_err(str, str, "There should be a pattern here after a '/'");
+ second = expand_replacements(second ? second : Pattern(BP_STRING, str - 2, str - 2, 0, 0), end, allow_nl);
+ if (!second) parse_err(str, str, "There should be a pattern here after a '/'");
second = expand_choices(second, end, allow_nl);
return either_pat(first, second);
}
@@ -154,8 +152,8 @@ static bp_pat_t *expand_choices(bp_pat_t *first, const char *end, bool allow_nl)
// Given two patterns, return a new pattern for the first pattern followed by
// the second. If either pattern is NULL, return the other.
//
-public bp_pat_t *chain_together(bp_pat_t *first, bp_pat_t *second)
-{
+public
+bp_pat_t *chain_together(bp_pat_t *first, bp_pat_t *second) {
if (first == NULL) return second;
if (second == NULL) return first;
@@ -163,34 +161,34 @@ public bp_pat_t *chain_together(bp_pat_t *first, bp_pat_t *second)
if (second->type == BP_STRING && second->max_matchlen == 0) return first;
if (first->type == BP_DEFINITIONS && second->type == BP_DEFINITIONS) {
- return Pattern(BP_CHAIN, first->start, second->end, second->min_matchlen, second->max_matchlen, .first=first, .second=second);
+ return Pattern(BP_CHAIN, first->start, second->end, second->min_matchlen, second->max_matchlen, .first = first,
+ .second = second);
}
size_t minlen = first->min_matchlen + second->min_matchlen;
ssize_t maxlen = (UNBOUNDED(first) || UNBOUNDED(second)) ? (ssize_t)-1 : first->max_matchlen + second->max_matchlen;
- return Pattern(BP_CHAIN, first->start, second->end, minlen, maxlen, .first=first, .second=second);
+ return Pattern(BP_CHAIN, first->start, second->end, minlen, maxlen, .first = first, .second = second);
}
//
// Given two patterns, return a new pattern for matching either the first
// pattern or the second. If either pattern is NULL, return the other.
//
-public bp_pat_t *either_pat(bp_pat_t *first, bp_pat_t *second)
-{
+public
+bp_pat_t *either_pat(bp_pat_t *first, bp_pat_t *second) {
if (first == NULL) return second;
if (second == NULL) return first;
size_t minlen = first->min_matchlen < second->min_matchlen ? first->min_matchlen : second->min_matchlen;
- ssize_t maxlen = (UNBOUNDED(first) || UNBOUNDED(second)) ? (ssize_t)-1 :
- (first->max_matchlen > second->max_matchlen ? first->max_matchlen : second->max_matchlen);
- return Pattern(BP_OTHERWISE, first->start, second->end, minlen, maxlen, .first=first, .second=second);
+ ssize_t maxlen = (UNBOUNDED(first) || UNBOUNDED(second))
+ ? (ssize_t)-1
+ : (first->max_matchlen > second->max_matchlen ? first->max_matchlen : second->max_matchlen);
+ return Pattern(BP_OTHERWISE, first->start, second->end, minlen, maxlen, .first = first, .second = second);
}
//
// Parse a definition
//
-__attribute__((nonnull))
-static bp_pat_t *_bp_definition(const char *start, const char *end)
-{
+__attribute__((nonnull)) static bp_pat_t *_bp_definition(const char *start, const char *end) {
if (start >= end || !(isalpha(*start) || *start == '_')) return NULL;
const char *str = after_name(start, end);
size_t namelen = (size_t)(str - start);
@@ -201,20 +199,19 @@ static bp_pat_t *_bp_definition(const char *start, const char *end)
str = def->end;
(void)matchchar(&str, ';', false, end); // Optional semicolon
if (is_tagged) { // `id:: foo` means define a rule named `id` that gives captures an `id` tag
- def = Pattern(BP_TAGGED, def->start, def->end, def->min_matchlen, def->max_matchlen,
- .pat=def, .name=start, .namelen=namelen);
+ def = Pattern(BP_TAGGED, def->start, def->end, def->min_matchlen, def->max_matchlen, .pat = def, .name = start,
+ .namelen = namelen);
}
bp_pat_t *next_def = _bp_definition(after_spaces(str, true, end), end);
- return Pattern(BP_DEFINITIONS, start, next_def ? next_def->end : str, 0, -1,
- .name=start, .namelen=namelen, .meaning=def, .next_def=next_def);
+ return Pattern(BP_DEFINITIONS, start, next_def ? next_def->end : str, 0, -1, .name = start, .namelen = namelen,
+ .meaning = def, .next_def = next_def);
}
//
// Compile a string of BP code into a BP pattern object.
//
-__attribute__((nonnull))
-static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_stringpattern)
-{
+__attribute__((nonnull)) static bp_pat_t *_bp_simplepattern(const char *str, const char *end,
+ bool inside_stringpattern) {
str = after_spaces(str, false, end);
if (!*str) return NULL;
const char *start = str;
@@ -227,22 +224,18 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside
// In other words, `...foo` parses as `(.)(..foo)` instead of `(..(.)) (foo)`
// This is so that `...` can mean "at least one character upto" instead of "upto any character",
// which is tautologically the same as matching any single character.
- if (*str == '.' && (str+1 >= end || str[1] != '.')) { // ".."
+ if (*str == '.' && (str + 1 >= end || str[1] != '.')) { // ".."
str = next_char(str, end);
enum bp_pattype_e type = BP_UPTO;
bp_pat_t *extra_arg = NULL;
if (matchchar(&str, '%', false, end)) {
extra_arg = bp_simplepattern(str, end);
- if (extra_arg)
- str = extra_arg->end;
- else
- parse_err(str, str, "There should be a pattern to skip here after the '%'");
+ if (extra_arg) str = extra_arg->end;
+ else parse_err(str, str, "There should be a pattern to skip here after the '%'");
} else if (matchchar(&str, '=', false, end)) {
extra_arg = bp_simplepattern(str, end);
- if (extra_arg)
- str = extra_arg->end;
- else
- parse_err(str, str, "There should be a pattern here after the '='");
+ if (extra_arg) str = extra_arg->end;
+ else parse_err(str, str, "There should be a pattern here after the '='");
type = BP_UPTO_STRICT;
}
bp_pat_t *target;
@@ -255,9 +248,8 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside
while (target && target->type == BP_STRING && target->max_matchlen == 0)
target = bp_simplepattern(target->end, end);
}
- return type == BP_UPTO ?
- Pattern(BP_UPTO, start, str, 0, -1, .target=target, .skip=extra_arg)
- : Pattern(BP_UPTO_STRICT, start, str, 0, -1, .target=target, .skip=extra_arg);
+ return type == BP_UPTO ? Pattern(BP_UPTO, start, str, 0, -1, .target = target, .skip = extra_arg)
+ : Pattern(BP_UPTO_STRICT, start, str, 0, -1, .target = target, .skip = extra_arg);
} else {
return Pattern(BP_ANYCHAR, start, str, 1, UTF8_MAXCHARLEN);
}
@@ -273,7 +265,7 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside
str = next_char(c1_loc, end);
if (*str == '-') { // Range
const char *c2_loc = ++str;
- if (next_char(c1_loc, end) > c1_loc+1 || next_char(c2_loc, end) > c2_loc+1)
+ if (next_char(c1_loc, end) > c1_loc + 1 || next_char(c2_loc, end) > c2_loc + 1)
parse_err(start, next_char(c2_loc, end), "Sorry, UTF-8 character ranges are not yet supported.");
char c1 = *c1_loc, c2 = *c2_loc;
if (!c2 || c2 == '\n')
@@ -284,11 +276,12 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside
c2 = tmp;
}
str = next_char(c2_loc, end);
- bp_pat_t *pat = Pattern(BP_RANGE, start == c1_loc - 1 ? start : c1_loc, str, 1, 1, .low=c1, .high=c2);
+ bp_pat_t *pat =
+ Pattern(BP_RANGE, start == c1_loc - 1 ? start : c1_loc, str, 1, 1, .low = c1, .high = c2);
all = either_pat(all, pat);
} else {
size_t len = (size_t)(str - c1_loc);
- bp_pat_t *pat = Pattern(BP_STRING, start, str, len, (ssize_t)len, .string=strndup(c1_loc, len));
+ bp_pat_t *pat = Pattern(BP_STRING, start, str, len, (ssize_t)len, .string = strndup(c1_loc, len));
all = either_pat(all, pat);
}
} while (*str++ == ',');
@@ -297,12 +290,11 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside
}
// Escapes
case '\\': {
- if (!*str || *str == '\n')
- parse_err(str, str, "There should be an escape sequence here after this backslash.");
+ if (!*str || *str == '\n') parse_err(str, str, "There should be an escape sequence here after this backslash.");
bp_pat_t *all = NULL;
do { // Comma-separated items:
- const char *itemstart = str-1;
+ const char *itemstart = str - 1;
if (*str == 'N') { // \N (nodent)
all = either_pat(all, Pattern(BP_NODENT, itemstart, ++str, 1, -1));
continue;
@@ -322,21 +314,19 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside
const char *opstart = str;
unsigned char e_low = (unsigned char)unescapechar(str, &str, end);
- if (str == opstart)
- parse_err(start, str+1, "This isn't a valid escape sequence.");
+ if (str == opstart) parse_err(start, str + 1, "This isn't a valid escape sequence.");
unsigned char e_high = e_low;
if (*str == '-') { // Escape range (e.g. \x00-\xFF)
++str;
- if (next_char(str, end) != str+1)
+ if (next_char(str, end) != str + 1)
parse_err(start, next_char(str, end), "Sorry, UTF8 escape sequences are not supported in ranges.");
const char *seqstart = str;
e_high = (unsigned char)unescapechar(str, &str, end);
- if (str == seqstart)
- parse_err(seqstart, str+1, "This value isn't a valid escape sequence");
+ if (str == seqstart) parse_err(seqstart, str + 1, "This value isn't a valid escape sequence");
if (e_high < e_low)
parse_err(start, str, "Escape ranges should be low-to-high, but this is high-to-low.");
}
- bp_pat_t *esc = Pattern(BP_RANGE, start, str, 1, 1, .low=e_low, .high=e_high);
+ bp_pat_t *esc = Pattern(BP_RANGE, start, str, 1, 1, .low = e_low, .high = e_high);
all = either_pat(all, esc);
} while (*str == ',' && str++ < end);
@@ -347,33 +337,44 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside
return Pattern(BP_WORD_BOUNDARY, start, str, 0, 0);
}
// String literal
- case '"': case '\'': case '\002': case '}': {
+ case '"':
+ case '\'':
+ case '\002':
+ case '}': {
char endquote = c == '\002' ? '\003' : (c == '}' ? '{' : c);
- char *litstart = (char*)str;
+ char *litstart = (char *)str;
while (str < end && *str != endquote)
str = next_char(str, end);
size_t len = (size_t)(str - litstart);
str = next_char(str, end);
if (c == '}') ++start; // Don't include the "}" in the pattern source range
- return Pattern(BP_STRING, start, str, len, (ssize_t)len, .string=strndup(litstart, len));
+ return Pattern(BP_STRING, start, str, len, (ssize_t)len, .string = strndup(litstart, len));
}
// Not <pat>
case '!': {
bp_pat_t *p = bp_simplepattern(str, end);
if (!p) parse_err(str, str, "There should be a pattern after this '!'");
- return Pattern(BP_NOT, start, p->end, 0, 0, .pat=p);
+ return Pattern(BP_NOT, start, p->end, 0, 0, .pat = p);
}
// Number of repetitions: <N>(-<N> / - / + / "")
- case '0': case '1': case '2': case '3': case '4': case '5':
- case '6': case '7': case '8': case '9': {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9': {
size_t min = 0;
ssize_t max = -1;
--str;
- long n1 = strtol(str, (char**)&str, 10);
+ long n1 = strtol(str, (char **)&str, 10);
if (matchchar(&str, '-', false, end)) {
str = after_spaces(str, false, end);
const char *numstart = str;
- long n2 = strtol(str, (char**)&str, 10);
+ long n2 = strtol(str, (char **)&str, 10);
if (str == numstart) min = 0, max = (ssize_t)n1;
else min = (size_t)n1, max = (ssize_t)n2;
} else if (matchchar(&str, '+', false, end)) {
@@ -382,14 +383,12 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside
min = (size_t)n1, max = (ssize_t)n1;
}
bp_pat_t *repeating = bp_simplepattern(str, end);
- if (!repeating)
- parse_err(str, str, "There should be a pattern after this repetition count.");
+ if (!repeating) parse_err(str, str, "There should be a pattern after this repetition count.");
str = repeating->end;
bp_pat_t *sep = NULL;
if (matchchar(&str, '%', false, end)) {
sep = bp_simplepattern(str, end);
- if (!sep)
- parse_err(str, str, "There should be a separator pattern after this '%%'");
+ if (!sep) parse_err(str, str, "There should be a separator pattern after this '%%'");
str = sep->end;
} else {
str = repeating->end;
@@ -399,22 +398,19 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside
// Lookbehind
case '<': {
bp_pat_t *behind = bp_simplepattern(str, end);
- if (!behind)
- parse_err(str, str, "There should be a pattern after this '<'");
- return Pattern(BP_AFTER, start, behind->end, 0, 0, .pat=behind);
+ if (!behind) parse_err(str, str, "There should be a pattern after this '<'");
+ return Pattern(BP_AFTER, start, behind->end, 0, 0, .pat = behind);
}
// Lookahead
case '>': {
bp_pat_t *ahead = bp_simplepattern(str, end);
- if (!ahead)
- parse_err(str, str, "There should be a pattern after this '>'");
- return Pattern(BP_BEFORE, start, ahead->end, 0, 0, .pat=ahead);
+ if (!ahead) parse_err(str, str, "There should be a pattern after this '>'");
+ return Pattern(BP_BEFORE, start, ahead->end, 0, 0, .pat = ahead);
}
// Parentheses
case '(': {
bp_pat_t *pat = bp_pattern_nl(str, end, true);
- if (!pat)
- parse_err(str, str, "There should be a valid pattern after this parenthesis.");
+ if (!pat) parse_err(str, str, "There should be a valid pattern after this parenthesis.");
str = pat->end;
if (!matchchar(&str, ')', true, end)) parse_err(str, str, "Missing paren: )");
pat->start = start;
@@ -424,24 +420,22 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside
// Square brackets
case '[': {
bp_pat_t *maybe = bp_pattern_nl(str, end, true);
- if (!maybe)
- parse_err(str, str, "There should be a valid pattern after this square bracket.");
+ if (!maybe) parse_err(str, str, "There should be a valid pattern after this square bracket.");
str = maybe->end;
(void)matchchar(&str, ']', true, end);
return new_range(start, str, 0, 1, maybe, NULL);
}
// Repeating
- case '*': case '+': {
+ case '*':
+ case '+': {
size_t min = (size_t)(c == '*' ? 0 : 1);
bp_pat_t *repeating = bp_simplepattern(str, end);
- if (!repeating)
- parse_err(str, str, "There should be a valid pattern to repeat here");
+ if (!repeating) parse_err(str, str, "There should be a valid pattern to repeat here");
str = repeating->end;
bp_pat_t *sep = NULL;
if (matchchar(&str, '%', false, end)) {
sep = bp_simplepattern(str, end);
- if (!sep)
- parse_err(str, str, "There should be a separator pattern after the '%%' here.");
+ if (!sep) parse_err(str, str, "There should be a separator pattern after the '%%' here.");
str = sep->end;
}
return new_range(start, str, min, -1, repeating, sep);
@@ -451,16 +445,15 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside
if (matchchar(&str, ':', false, end)) { // Tagged capture @:Foo=pat
const char *name = str;
str = after_name(name, end);
- if (str <= name)
- parse_err(start, str, "There should be an identifier after this '@:'");
+ if (str <= name) parse_err(start, str, "There should be an identifier after this '@:'");
size_t namelen = (size_t)(str - name);
bp_pat_t *p = NULL;
if (matchchar(&str, '=', false, end)) {
p = bp_simplepattern(str, end);
if (p) str = p->end;
}
- return Pattern(BP_TAGGED, start, str, p ? p->min_matchlen : 0, p ? p->max_matchlen : 0,
- .pat=p, .name=name, .namelen=namelen);
+ return Pattern(BP_TAGGED, start, str, p ? p->min_matchlen : 0, p ? p->max_matchlen : 0, .pat = p,
+ .name = name, .namelen = namelen);
}
const char *name = NULL;
@@ -470,31 +463,28 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside
bool backreffable = false;
if (a > str && matchchar(&eq, ':', false, end)) {
name = str;
- namelen = (size_t)(a-str);
+ namelen = (size_t)(a - str);
str = eq;
backreffable = true;
} else if (a > str && !matchstr(&eq, "=>", false, end) && matchchar(&eq, '=', false, end)) {
name = str;
- namelen = (size_t)(a-str);
+ namelen = (size_t)(a - str);
str = eq;
}
bp_pat_t *pat = bp_simplepattern(str, end);
- if (!pat)
- parse_err(str, str, "There should be a valid pattern here to capture after the '@'");
+ if (!pat) parse_err(str, str, "There should be a valid pattern here to capture after the '@'");
- return Pattern(BP_CAPTURE, start, pat->end, pat->min_matchlen, pat->max_matchlen,
- .pat = pat, .name = name, .namelen = namelen, .backreffable = backreffable);
+ return Pattern(BP_CAPTURE, start, pat->end, pat->min_matchlen, pat->max_matchlen, .pat = pat, .name = name,
+ .namelen = namelen, .backreffable = backreffable);
}
// Start of file/line
case '^': {
- if (*str == '^')
- return Pattern(BP_START_OF_FILE, start, ++str, 0, 0);
+ if (*str == '^') return Pattern(BP_START_OF_FILE, start, ++str, 0, 0);
return Pattern(BP_START_OF_LINE, start, str, 0, 0);
}
// End of file/line:
case '$': {
- if (*str == '$')
- return Pattern(BP_END_OF_FILE, start, ++str, 0, 0);
+ if (*str == '$') return Pattern(BP_END_OF_FILE, start, ++str, 0, 0);
return Pattern(BP_END_OF_LINE, start, str, 0, 0);
}
default: {
@@ -504,7 +494,7 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside
if (!isalpha(c) && c != '_') return NULL;
str = after_name(start, end);
size_t namelen = (size_t)(str - start);
- return Pattern(BP_REF, start, str, 0, -1, .name=start, .len=namelen);
+ return Pattern(BP_REF, start, str, 0, -1, .name = start, .len = namelen);
}
}
}
@@ -513,20 +503,19 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside
// Similar to bp_simplepattern, except that the pattern begins with an implicit
// '}' open quote that can be closed with '{'
//
-public maybe_pat_t bp_stringpattern(const char *str, const char *end)
-{
+public
+maybe_pat_t bp_stringpattern(const char *str, const char *end) {
__TRY_PATTERN__
if (!end) end = str + strlen(str);
- char *start = (char*)str;
+ char *start = (char *)str;
while (str < end && *str != '{')
str = next_char(str, end);
size_t len = (size_t)(str - start);
- bp_pat_t *pat = len > 0 ? Pattern(BP_STRING, start, str, len, (ssize_t)len, .string=strndup(start, len)) : NULL;
+ bp_pat_t *pat = len > 0 ? Pattern(BP_STRING, start, str, len, (ssize_t)len, .string = strndup(start, len)) : NULL;
str += 1;
if (str < end) {
bp_pat_t *interp = bp_pattern_nl(str, end, true);
- if (interp)
- pat = chain_together(pat, interp);
+ if (interp) pat = chain_together(pat, interp);
pat->end = end;
}
__END_TRY_PATTERN__
@@ -536,8 +525,7 @@ public maybe_pat_t bp_stringpattern(const char *str, const char *end)
//
// Wrapper for _bp_simplepattern() that expands any postfix operators (~, !~)
//
-static bp_pat_t *bp_simplepattern(const char *str, const char *end)
-{
+static bp_pat_t *bp_simplepattern(const char *str, const char *end) {
const char *start = str;
bp_pat_t *pat = _bp_simplepattern(str, end, false);
if (pat == NULL) return pat;
@@ -546,20 +534,18 @@ static bp_pat_t *bp_simplepattern(const char *str, const char *end)
// Expand postfix operators (if any)
while (str < end) {
enum bp_pattype_e type;
- if (matchchar(&str, '~', false, end))
- type = BP_MATCH;
- else if (matchstr(&str, "!~", false, end))
- type = BP_NOT_MATCH;
+ if (matchchar(&str, '~', false, end)) type = BP_MATCH;
+ else if (matchstr(&str, "!~", false, end)) type = BP_NOT_MATCH;
else break;
bp_pat_t *first = pat;
bp_pat_t *second = bp_simplepattern(str, end);
- if (!second)
- parse_err(str, str, "There should be a valid pattern here");
+ if (!second) parse_err(str, str, "There should be a valid pattern here");
- pat = type == BP_MATCH ?
- Pattern(BP_MATCH, start, second->end, first->min_matchlen, first->max_matchlen, .pat=first, .must_match=second)
- : Pattern(BP_NOT_MATCH, start, second->end, first->min_matchlen, first->max_matchlen, .pat=first, .must_not_match=second);
+ pat = type == BP_MATCH ? Pattern(BP_MATCH, start, second->end, first->min_matchlen, first->max_matchlen,
+ .pat = first, .must_match = second)
+ : Pattern(BP_NOT_MATCH, start, second->end, first->min_matchlen, first->max_matchlen,
+ .pat = first, .must_not_match = second);
str = pat->end;
}
@@ -570,8 +556,8 @@ static bp_pat_t *bp_simplepattern(const char *str, const char *end)
// Given a pattern and a replacement string, compile the two into a BP
// replace pattern.
//
-public maybe_pat_t bp_replacement(bp_pat_t *replacepat, const char *replacement, const char *end)
-{
+public
+maybe_pat_t bp_replacement(bp_pat_t *replacepat, const char *replacement, const char *end) {
const char *p = replacement;
if (!end) end = replacement + strlen(replacement);
__TRY_PATTERN__
@@ -583,84 +569,97 @@ public maybe_pat_t bp_replacement(bp_pat_t *replacepat, const char *replacement,
}
}
__END_TRY_PATTERN__
- size_t rlen = (size_t)(p-replacement);
- char *rcpy = new(char[rlen + 1]);
+ size_t rlen = (size_t)(p - replacement);
+ char *rcpy = new (char[rlen + 1]);
memcpy(rcpy, replacement, rlen);
- bp_pat_t *pat = Pattern(BP_REPLACE, replacepat->start, replacepat->end, replacepat->min_matchlen, replacepat->max_matchlen,
- .pat=replacepat, .text=rcpy, .len=rlen);
+ bp_pat_t *pat = Pattern(BP_REPLACE, replacepat->start, replacepat->end, replacepat->min_matchlen,
+ replacepat->max_matchlen, .pat = replacepat, .text = rcpy, .len = rlen);
return (maybe_pat_t){.success = true, .value.pat = pat};
}
-static bp_pat_t *bp_pattern_nl(const char *str, const char *end, bool allow_nl)
-{
+static bp_pat_t *bp_pattern_nl(const char *str, const char *end, bool allow_nl) {
str = after_spaces(str, allow_nl, end);
bp_pat_t *pat = bp_simplepattern(str, end);
if (pat != NULL) pat = expand_choices(pat, end, allow_nl);
if (matchstr(&str, "=>", allow_nl, end))
- pat = expand_replacements(pat ? pat : Pattern(BP_STRING, str-2, str-2, 0, 0), end, allow_nl);
+ pat = expand_replacements(pat ? pat : Pattern(BP_STRING, str - 2, str - 2, 0, 0), end, allow_nl);
return pat;
}
//
// Return a new back reference to an existing match.
//
-public bp_pat_t *bp_raw_literal(const char *str, size_t len)
-{
- return Pattern(BP_STRING, str, &str[len], len, (ssize_t)len, .string=strndup(str, len));
+public
+bp_pat_t *bp_raw_literal(const char *str, size_t len) {
+ return Pattern(BP_STRING, str, &str[len], len, (ssize_t)len, .string = strndup(str, len));
}
//
// Compile a string representing a BP pattern into a pattern object.
//
-public maybe_pat_t bp_pattern(const char *str, const char *end)
-{
+public
+maybe_pat_t bp_pattern(const char *str, const char *end) {
str = after_spaces(str, true, end);
if (!end) end = str + strlen(str);
__TRY_PATTERN__
bp_pat_t *ret = bp_pattern_nl(str, end, false);
__END_TRY_PATTERN__
if (ret && after_spaces(ret->end, true, end) < end)
- return (maybe_pat_t){.success = false, .value.error.start = ret->end, .value.error.end = end, .value.error.msg = "Failed to parse this part of the pattern"};
- else if (ret)
- return (maybe_pat_t){.success = true, .value.pat = ret};
+ return (maybe_pat_t){.success = false,
+ .value.error.start = ret->end,
+ .value.error.end = end,
+ .value.error.msg = "Failed to parse this part of the pattern"};
+ else if (ret) return (maybe_pat_t){.success = true, .value.pat = ret};
else
- return (maybe_pat_t){.success = false, .value.error.start = str, .value.error.end = end, .value.error.msg = "Failed to parse this pattern"};
+ return (maybe_pat_t){.success = false,
+ .value.error.start = str,
+ .value.error.end = end,
+ .value.error.msg = "Failed to parse this pattern"};
}
-public void free_all_pats(void)
-{
+public
+void free_all_pats(void) {
while (allocated_pats) {
bp_pat_t *tofree = allocated_pats;
allocated_pats = tofree->next;
- delete(&tofree);
+ delete (&tofree);
}
}
-public void delete_pat(bp_pat_t **at_pat, bool recursive)
-{
+public
+void delete_pat(bp_pat_t **at_pat, bool recursive) {
bp_pat_t *pat = *at_pat;
if (!pat) return;
-#define T(tag, ...) case tag: { auto _data = When(pat, tag); __VA_ARGS__; break; }
+#define T(tag, ...) \
+ case tag: { \
+ auto _data = When(pat, tag); \
+ __VA_ARGS__; \
+ break; \
+ }
#define F(field) delete_pat(&_data->field, true)
if (recursive) {
switch (pat->type) {
- T(BP_DEFINITIONS, F(meaning), F(next_def))
- T(BP_REPEAT, F(sep), F(repeat_pat))
- T(BP_CHAIN, F(first), F(second))
- T(BP_UPTO, F(target), F(skip))
- T(BP_UPTO_STRICT, F(target), F(skip))
- T(BP_OTHERWISE, F(first), F(second))
- T(BP_MATCH, F(pat), F(must_match))
- T(BP_NOT_MATCH, F(pat), F(must_not_match))
- T(BP_REPLACE, F(pat))
- T(BP_CAPTURE, F(pat))
- T(BP_TAGGED, F(pat))
- T(BP_NOT, F(pat))
- T(BP_AFTER, F(pat))
- T(BP_BEFORE, F(pat))
- T(BP_LEFTRECURSION, F(fallback))
- T(BP_STRING, if (_data->string) { free((char*)_data->string); _data->string = NULL; })
+ T(BP_DEFINITIONS, F(meaning), F(next_def))
+ T(BP_REPEAT, F(sep), F(repeat_pat))
+ T(BP_CHAIN, F(first), F(second))
+ T(BP_UPTO, F(target), F(skip))
+ T(BP_UPTO_STRICT, F(target), F(skip))
+ T(BP_OTHERWISE, F(first), F(second))
+ T(BP_MATCH, F(pat), F(must_match))
+ T(BP_NOT_MATCH, F(pat), F(must_not_match))
+ T(BP_REPLACE, F(pat))
+ T(BP_CAPTURE, F(pat))
+ T(BP_TAGGED, F(pat))
+ T(BP_NOT, F(pat))
+ T(BP_AFTER, F(pat))
+ T(BP_BEFORE, F(pat))
+ T(BP_LEFTRECURSION, F(fallback))
+ T(
+ BP_STRING, if (_data->string) {
+ free((char *)_data->string);
+ _data->string = NULL;
+ })
default: break;
}
}
@@ -669,14 +668,20 @@ public void delete_pat(bp_pat_t **at_pat, bool recursive)
if (pat->home) *(pat->home) = pat->next;
if (pat->next) pat->next->home = pat->home;
- delete(at_pat);
+ delete (at_pat);
}
-int fprint_pattern(FILE *stream, bp_pat_t *pat)
-{
+int fprint_pattern(FILE *stream, bp_pat_t *pat) {
if (!pat) return fputs("(null)", stream);
switch (pat->type) {
-#define CASE(name, ...) case BP_ ## name: { __auto_type data = pat->__tagged.BP_##name; (void)data; int _printed = fputs(#name, stream); __VA_ARGS__; return _printed; }
+#define CASE(name, ...) \
+ case BP_##name: { \
+ __auto_type data = pat->__tagged.BP_##name; \
+ (void)data; \
+ int _printed = fputs(#name, stream); \
+ __VA_ARGS__; \
+ return _printed; \
+ }
#define FMT(...) _printed += fprintf(stream, __VA_ARGS__)
#define PAT(p) _printed += fprint_pattern(stream, p)
CASE(ERROR)
@@ -691,7 +696,8 @@ int fprint_pattern(FILE *stream, bp_pat_t *pat)
CASE(REPEAT, FMT("(%u-%d, ", data.min, data.max); PAT(data.repeat_pat); FMT(", sep="); PAT(data.sep); FMT(")"))
CASE(BEFORE, FMT("("); PAT(data.pat); FMT(")"))
CASE(AFTER, FMT("("); PAT(data.pat); FMT(")"))
- CASE(CAPTURE, FMT("("); PAT(data.pat); FMT(", name=%.*s, backref=%s)", data.namelen, data.name, data.backreffable ? "yes" : "no"))
+ CASE(CAPTURE, FMT("("); PAT(data.pat);
+ FMT(", name=%.*s, backref=%s)", data.namelen, data.name, data.backreffable ? "yes" : "no"))
CASE(OTHERWISE, FMT("("); PAT(data.first); FMT(", "); PAT(data.second); FMT(")"))
CASE(CHAIN, FMT("("); PAT(data.first); FMT(", "); PAT(data.second); FMT(")"))
CASE(MATCH, FMT("("); PAT(data.pat); FMT(", matches="); PAT(data.must_match); FMT(")"))
@@ -706,7 +712,8 @@ int fprint_pattern(FILE *stream, bp_pat_t *pat)
CASE(END_OF_LINE)
CASE(WORD_BOUNDARY)
CASE(DEFINITIONS, FMT("(%.*s=", data.namelen, data.name); PAT(data.meaning); FMT("); "); PAT(data.next_def))
- CASE(TAGGED, FMT("(%.*s=", data.namelen, data.name); PAT(data.pat); FMT(" backref=%s)", data.backreffable ? "yes" : "no"))
+ CASE(TAGGED, FMT("(%.*s=", data.namelen, data.name); PAT(data.pat);
+ FMT(" backref=%s)", data.backreffable ? "yes" : "no"))
#undef PAT
#undef FMT
#undef P
diff --git a/pattern.h b/pattern.h
index a2ca785..f0baba4 100644
--- a/pattern.h
+++ b/pattern.h
@@ -3,42 +3,42 @@
//
#pragma once
+#include <err.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <sys/types.h>
-#include <err.h>
// BP virtual machine pattern types
enum bp_pattype_e {
- BP_ERROR = 0,
- BP_ANYCHAR = 1,
- BP_ID_START = 2,
- BP_ID_CONTINUE = 3,
- BP_STRING = 4,
- BP_RANGE = 5,
- BP_NOT = 6,
- BP_UPTO = 7,
- BP_UPTO_STRICT = 8,
- BP_REPEAT = 9,
- BP_BEFORE = 10,
- BP_AFTER = 11,
- BP_CAPTURE = 12,
- BP_OTHERWISE = 13,
- BP_CHAIN = 14,
- BP_MATCH = 15,
- BP_NOT_MATCH = 16,
- BP_REPLACE = 17,
- BP_REF = 18,
- BP_NODENT = 19,
- BP_CURDENT = 20,
+ BP_ERROR = 0,
+ BP_ANYCHAR = 1,
+ BP_ID_START = 2,
+ BP_ID_CONTINUE = 3,
+ BP_STRING = 4,
+ BP_RANGE = 5,
+ BP_NOT = 6,
+ BP_UPTO = 7,
+ BP_UPTO_STRICT = 8,
+ BP_REPEAT = 9,
+ BP_BEFORE = 10,
+ BP_AFTER = 11,
+ BP_CAPTURE = 12,
+ BP_OTHERWISE = 13,
+ BP_CHAIN = 14,
+ BP_MATCH = 15,
+ BP_NOT_MATCH = 16,
+ BP_REPLACE = 17,
+ BP_REF = 18,
+ BP_NODENT = 19,
+ BP_CURDENT = 20,
BP_START_OF_FILE = 21,
BP_START_OF_LINE = 22,
- BP_END_OF_FILE = 23,
- BP_END_OF_LINE = 24,
+ BP_END_OF_FILE = 23,
+ BP_END_OF_LINE = 24,
BP_WORD_BOUNDARY = 25,
- BP_DEFINITIONS = 26,
- BP_TAGGED = 27,
+ BP_DEFINITIONS = 26,
+ BP_TAGGED = 27,
BP_LEFTRECURSION = 28,
};
@@ -58,21 +58,39 @@ struct bp_pat_s {
struct {
const char *start, *end, *msg;
} BP_ERROR;
- struct {} BP_ANYCHAR;
- struct {} BP_ID_START;
- struct {} BP_ID_CONTINUE;
- struct {const char *string; size_t len; } BP_STRING;
- struct {unsigned char low, high; } BP_RANGE;
- struct {bp_pat_t *pat;} BP_NOT;
- struct {bp_pat_t *target, *skip;} BP_UPTO;
- struct {bp_pat_t *target, *skip;} BP_UPTO_STRICT;
+ struct {
+ } BP_ANYCHAR;
+ struct {
+ } BP_ID_START;
+ struct {
+ } BP_ID_CONTINUE;
+ struct {
+ const char *string;
+ size_t len;
+ } BP_STRING;
+ struct {
+ unsigned char low, high;
+ } BP_RANGE;
+ struct {
+ bp_pat_t *pat;
+ } BP_NOT;
+ struct {
+ bp_pat_t *target, *skip;
+ } BP_UPTO;
+ struct {
+ bp_pat_t *target, *skip;
+ } BP_UPTO_STRICT;
struct {
uint32_t min;
int32_t max;
bp_pat_t *sep, *repeat_pat;
} BP_REPEAT;
- struct {bp_pat_t *pat;} BP_BEFORE;
- struct {bp_pat_t *pat;} BP_AFTER;
+ struct {
+ bp_pat_t *pat;
+ } BP_BEFORE;
+ struct {
+ bp_pat_t *pat;
+ } BP_AFTER;
struct {
bp_pat_t *pat;
const char *name;
@@ -85,8 +103,12 @@ struct bp_pat_s {
struct {
bp_pat_t *first, *second;
} BP_CHAIN;
- struct {bp_pat_t *pat, *must_match;} BP_MATCH;
- struct {bp_pat_t *pat, *must_not_match;} BP_NOT_MATCH;
+ struct {
+ bp_pat_t *pat, *must_match;
+ } BP_MATCH;
+ struct {
+ bp_pat_t *pat, *must_not_match;
+ } BP_NOT_MATCH;
struct {
bp_pat_t *pat;
const char *text;
@@ -96,13 +118,20 @@ struct bp_pat_s {
const char *name;
uint32_t len;
} BP_REF;
- struct {} BP_NODENT;
- struct {} BP_CURDENT;
- struct {} BP_START_OF_FILE;
- struct {} BP_START_OF_LINE;
- struct {} BP_END_OF_FILE;
- struct {} BP_END_OF_LINE;
- struct {} BP_WORD_BOUNDARY;
+ struct {
+ } BP_NODENT;
+ struct {
+ } BP_CURDENT;
+ struct {
+ } BP_START_OF_FILE;
+ struct {
+ } BP_START_OF_LINE;
+ struct {
+ } BP_END_OF_FILE;
+ struct {
+ } BP_END_OF_LINE;
+ struct {
+ } BP_WORD_BOUNDARY;
struct {
const char *name;
uint32_t namelen;
@@ -142,21 +171,16 @@ typedef struct {
} value;
} maybe_pat_t;
-__attribute__((returns_nonnull))
-bp_pat_t *allocate_pat(bp_pat_t pat);
-__attribute__((nonnull, returns_nonnull))
-bp_pat_t *bp_raw_literal(const char *str, size_t len);
-__attribute__((nonnull(1)))
-maybe_pat_t bp_stringpattern(const char *str, const char *end);
-__attribute__((nonnull(1,2)))
-maybe_pat_t bp_replacement(bp_pat_t *replacepat, const char *replacement, const char *end);
+__attribute__((returns_nonnull)) bp_pat_t *allocate_pat(bp_pat_t pat);
+__attribute__((nonnull, returns_nonnull)) bp_pat_t *bp_raw_literal(const char *str, size_t len);
+__attribute__((nonnull(1))) maybe_pat_t bp_stringpattern(const char *str, const char *end);
+__attribute__((nonnull(1, 2))) maybe_pat_t bp_replacement(bp_pat_t *replacepat, const char *replacement,
+ const char *end);
bp_pat_t *chain_together(bp_pat_t *first, bp_pat_t *second);
bp_pat_t *either_pat(bp_pat_t *first, bp_pat_t *second);
-__attribute__((nonnull(1)))
-maybe_pat_t bp_pattern(const char *str, const char *end);
+__attribute__((nonnull(1))) maybe_pat_t bp_pattern(const char *str, const char *end);
void free_all_pats(void);
-__attribute__((nonnull))
-void delete_pat(bp_pat_t **at_pat, bool recursive);
+__attribute__((nonnull)) void delete_pat(bp_pat_t **at_pat, bool recursive);
int fprint_pattern(FILE *stream, bp_pat_t *pat);
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/printmatch.c b/printmatch.c
index 9dccb10..24d301c 100644
--- a/printmatch.c
+++ b/printmatch.c
@@ -2,9 +2,9 @@
// printmatch.c - Debug visualization of pattern matches.
//
+#include <ctype.h>
#include <stdio.h>
#include <string.h>
-#include <ctype.h>
#include "match.h"
#include "printmatch.h"
@@ -15,17 +15,15 @@ typedef struct match_node_s {
struct match_node_s *next;
} match_node_t;
-__attribute__((nonnull, pure))
-static int height_of_match(bp_match_t *m);
-__attribute__((nonnull))
-static void _explain_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen);
+__attribute__((nonnull, pure)) static int height_of_match(bp_match_t *m);
+__attribute__((nonnull)) static void _explain_matches(match_node_t *firstmatch, int depth, const char *text,
+ size_t textlen);
//
// Return the height of a match object (i.e. the number of descendents of the
// structure).
//
-static int height_of_match(bp_match_t *m)
-{
+static int height_of_match(bp_match_t *m) {
int height = 0;
for (int i = 0; m->children && m->children[i]; i++) {
bp_match_t *child = m->children[i];
@@ -38,8 +36,7 @@ static int height_of_match(bp_match_t *m)
//
// Print a visual explanation for the as-yet-unprinted matches provided.
//
-static void _explain_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen)
-{
+static void _explain_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen) {
const char *V = "│"; // Vertical bar
const char *H = "─"; // Horizontal bar
const char *color = (depth % 2 == 0) ? "34" : "33";
@@ -52,8 +49,7 @@ static void _explain_matches(match_node_t *firstmatch, int depth, const char *te
// while also printing earlier matches first when it doesn't affect overall
// output height.
for (match_node_t *p = firstmatch; p; p = p->next)
- if (height_of_match(p->m) > height_of_match(viz))
- viz = p->m;
+ if (height_of_match(p->m) > height_of_match(viz)) viz = p->m;
const char *viz_type = viz->pat->start;
size_t viz_typelen = (size_t)(viz->pat->end - viz->pat->start);
@@ -61,11 +57,10 @@ static void _explain_matches(match_node_t *firstmatch, int depth, const char *te
// literal string being matched. (Backrefs have start/end inside the text
// input, instead of something the user typed in)
if (viz_type >= text && viz_type <= &text[textlen])
- printf("\033[%zuG\033[0;2m\"\033[%s;1m", 2*textlen+3, color);
+ printf("\033[%zuG\033[0;2m\"\033[%s;1m", 2 * textlen + 3, color);
else if (viz->pat->type == BP_STRING && (viz->end - viz->start) == (long)viz_typelen)
- printf("\033[%zuG\033[%s;1m\"", 2*textlen+3, color);
- else
- printf("\033[%zuG\033[%s;1m", 2*textlen+3, color);
+ printf("\033[%zuG\033[%s;1m\"", 2 * textlen + 3, color);
+ else printf("\033[%zuG\033[%s;1m", 2 * textlen + 3, color);
for (size_t i = 0; i < viz_typelen; i++) {
switch (viz_type[i]) {
@@ -75,17 +70,16 @@ static void _explain_matches(match_node_t *firstmatch, int depth, const char *te
}
}
- if (viz_type >= text && viz_type <= &text[textlen])
- printf("\033[0;2m\"");
- else if (viz->pat->type == BP_STRING && (viz->end - viz->start) == (long)viz_typelen)
- printf("\"");
+ if (viz_type >= text && viz_type <= &text[textlen]) printf("\033[0;2m\"");
+ else if (viz->pat->type == BP_STRING && (viz->end - viz->start) == (long)viz_typelen) printf("\"");
printf("\033[m");
match_node_t *children = NULL;
match_node_t **nextchild = &children;
-#define RIGHT_TYPE(m) (m->m->pat->end == m->m->pat->start + viz_typelen && strncmp(m->m->pat->start, viz_type, viz_typelen) == 0)
+#define RIGHT_TYPE(m) \
+ (m->m->pat->end == m->m->pat->start + viz_typelen && strncmp(m->m->pat->start, viz_type, viz_typelen) == 0)
// Print nonzero-width first:
for (match_node_t *m = firstmatch; m; m = m->next) {
if (RIGHT_TYPE(m)) {
@@ -93,45 +87,41 @@ static void _explain_matches(match_node_t *firstmatch, int depth, const char *te
if (m->m->pat->type == BP_CHAIN) {
bp_match_t *tmp = m->m;
while (tmp->pat->type == BP_CHAIN) {
- *nextchild = new(match_node_t);
+ *nextchild = new (match_node_t);
(*nextchild)->m = tmp->children[0];
nextchild = &((*nextchild)->next);
tmp = tmp->children[1];
}
- *nextchild = new(match_node_t);
+ *nextchild = new (match_node_t);
(*nextchild)->m = tmp;
nextchild = &((*nextchild)->next);
} else {
for (int i = 0; m->m->children && m->m->children[i]; i++) {
- *nextchild = new(match_node_t);
+ *nextchild = new (match_node_t);
(*nextchild)->m = m->m->children[i];
nextchild = &((*nextchild)->next);
}
}
if (m->m->end == m->m->start) continue;
- printf("\033[%zdG\033[0;2m%s\033[0;7;%sm", 1+2*(m->m->start - text), V, color);
+ printf("\033[%zdG\033[0;2m%s\033[0;7;%sm", 1 + 2 * (m->m->start - text), V, color);
for (const char *c = m->m->start; c < m->m->end; ++c) {
// TODO: newline
if (c > m->m->start) printf(" ");
// TODO: utf8
- //while ((*c & 0xC0) != 0x80) printf("%c", *(c++));
- if (*c == '\n')
- printf("↵");
- else if (*c == '\t')
- printf("⇥");
- else
- printf("%c", *c);
+ // while ((*c & 0xC0) != 0x80) printf("%c", *(c++));
+ if (*c == '\n') printf("↵");
+ else if (*c == '\t') printf("⇥");
+ else printf("%c", *c);
}
printf("\033[0;2m%s\033[m", V);
} else {
- *nextchild = new(match_node_t);
+ *nextchild = new (match_node_t);
(*nextchild)->m = m->m;
nextchild = &((*nextchild)->next);
- printf("\033[%zdG\033[0;2m%s", 1+2*(m->m->start - text), V);
- for (ssize_t i = (ssize_t)(2*(m->m->end - m->m->start)-1); i > 0; i--)
+ printf("\033[%zdG\033[0;2m%s", 1 + 2 * (m->m->start - text), V);
+ for (ssize_t i = (ssize_t)(2 * (m->m->end - m->m->start) - 1); i > 0; i--)
printf(" ");
- if (m->m->end > m->m->start)
- printf("\033[0;2m%s", V);
+ if (m->m->end > m->m->start) printf("\033[0;2m%s", V);
printf("\033[m");
}
}
@@ -140,9 +130,9 @@ static void _explain_matches(match_node_t *firstmatch, int depth, const char *te
for (match_node_t *m = firstmatch; m; m = m->next) {
if (m->m->end > m->m->start) continue;
if (RIGHT_TYPE(m)) {
- printf("\033[%zdG\033[7;%smâ–’\033[m", 1+2*(m->m->start - text), color);
+ printf("\033[%zdG\033[7;%smâ–’\033[m", 1 + 2 * (m->m->start - text), color);
} else {
- printf("\033[%zdG\033[0;2m%s\033[m", 1+2*(m->m->start - text), V);
+ printf("\033[%zdG\033[0;2m%s\033[m", 1 + 2 * (m->m->start - text), V);
}
}
@@ -150,18 +140,17 @@ static void _explain_matches(match_node_t *firstmatch, int depth, const char *te
for (match_node_t *m = firstmatch; m; m = m->next) {
if (m->m->end == m->m->start) {
- if (!RIGHT_TYPE(m))
- printf("\033[%zdG\033[0;2m%s", 1 + 2*(m->m->start - text), V);
+ if (!RIGHT_TYPE(m)) printf("\033[%zdG\033[0;2m%s", 1 + 2 * (m->m->start - text), V);
} else {
const char *l = "â””";
const char *r = "┘";
for (match_node_t *c = children; c; c = c->next) {
if (c->m->start == m->m->start || c->m->end == m->m->start) l = V;
- if (c->m->start == m->m->end || c->m->end == m->m->end) r = V;
+ if (c->m->start == m->m->end || c->m->end == m->m->end) r = V;
}
- printf("\033[%zdG\033[0;2m%s", 1 + 2*(m->m->start - text), l);
+ printf("\033[%zdG\033[0;2m%s", 1 + 2 * (m->m->start - text), l);
const char *h = RIGHT_TYPE(m) ? H : " ";
- for (ssize_t n = (ssize_t)(2*(m->m->end - m->m->start) - 1); n > 0; n--)
+ for (ssize_t n = (ssize_t)(2 * (m->m->end - m->m->start) - 1); n > 0; n--)
printf("%s", h);
printf("%s\033[m", r);
}
@@ -170,28 +159,26 @@ static void _explain_matches(match_node_t *firstmatch, int depth, const char *te
printf("\n");
- if (children)
- _explain_matches(children, depth+1, text, textlen);
+ if (children) _explain_matches(children, depth + 1, text, textlen);
for (match_node_t *c = children, *next = NULL; c; c = next) {
next = c->next;
- delete(&c);
+ delete (&c);
}
}
//
// Print a visualization of a match object.
//
-public void explain_match(bp_match_t *m)
-{
+public
+void explain_match(bp_match_t *m) {
printf("\033[?7l"); // Disable line wrapping
match_node_t first = {.m = m};
_explain_matches(&first, 0, m->start, (size_t)(m->end - m->start));
printf("\033[?7h"); // Re-enable line wrapping
}
-static inline int fputc_safe(FILE *out, char c, print_options_t *opts)
-{
+static inline int fputc_safe(FILE *out, char c, print_options_t *opts) {
int printed = fputc(c, out);
if (c == '\n' && opts && opts->on_nl) {
opts->on_nl(out);
@@ -200,8 +187,8 @@ static inline int fputc_safe(FILE *out, char c, print_options_t *opts)
return printed;
}
-public int fprint_match(FILE *out, const char *file_start, bp_match_t *m, print_options_t *opts)
-{
+public
+int fprint_match(FILE *out, const char *file_start, bp_match_t *m, print_options_t *opts) {
int printed = 0;
if (m->pat->type == BP_REPLACE) {
auto rep = When(m->pat, BP_REPLACE);
@@ -210,14 +197,14 @@ public int fprint_match(FILE *out, const char *file_start, bp_match_t *m, print_
if (opts && opts->replace_color) printed += fprintf(out, "%s", opts->replace_color);
// TODO: clean up the line numbering code
- for (const char *r = text; r < end; ) {
+ for (const char *r = text; r < end;) {
// Capture substitution
- if (*r == '@' && r+1 < end && r[1] != '@') {
- const char *next = r+1;
+ if (*r == '@' && r + 1 < end && r[1] != '@') {
+ const char *next = r + 1;
// Retrieve the capture value:
bp_match_t *cap = NULL;
if (isdigit(*next)) {
- int n = (int)strtol(next, (char**)&next, 10);
+ int n = (int)strtol(next, (char **)&next, 10);
cap = get_numbered_capture(m->children[0], n);
} else {
const char *name = next, *name_end = after_name(next, end);
@@ -254,7 +241,8 @@ public int fprint_match(FILE *out, const char *file_start, bp_match_t *m, print_
// of the match. If the match spans multiple lines, or if
// the replacement text contains newlines, this may get weird.
const char *line_start = m->start;
- while (line_start > file_start && line_start[-1] != '\n') --line_start;
+ while (line_start > file_start && line_start[-1] != '\n')
+ --line_start;
printed += fputc_safe(out, '\n', opts);
for (const char *p = line_start; p < m->start && (*p == ' ' || *p == '\t'); ++p)
printed += fputc(*p, out);
@@ -272,11 +260,11 @@ public int fprint_match(FILE *out, const char *file_start, bp_match_t *m, print_
for (int i = 0; m->children && m->children[i]; i++) {
bp_match_t *child = m->children[i];
// Skip children from e.g. zero-width matches like >@foo
- if (!(prev <= child->start && child->start <= m->end &&
- prev <= child->end && child->end <= m->end))
+ if (!(prev <= child->start && child->start <= m->end && prev <= child->end && child->end <= m->end))
continue;
if (child->start > prev) {
- if (opts && opts->fprint_between) printed += opts->fprint_between(out, prev, child->start, opts->match_color);
+ if (opts && opts->fprint_between)
+ printed += opts->fprint_between(out, prev, child->start, opts->match_color);
else printed += fwrite(prev, sizeof(char), (size_t)(child->start - prev), out);
}
printed += fprint_match(out, file_start, child, opts);
diff --git a/printmatch.h b/printmatch.h
index fa6082c..969fdf3 100644
--- a/printmatch.h
+++ b/printmatch.h
@@ -10,10 +10,9 @@ typedef struct {
int (*fprint_between)(FILE *out, const char *start, const char *end, const char *normal_color);
void (*on_nl)(FILE *out);
} print_options_t;
-__attribute__((nonnull(1,2,3)))
-int fprint_match(FILE *out, const char *file_start, bp_match_t *m, print_options_t *opts);
+__attribute__((nonnull(1, 2, 3))) int fprint_match(FILE *out, const char *file_start, bp_match_t *m,
+ print_options_t *opts);
-__attribute__((nonnull))
-void explain_match(bp_match_t *m);
+__attribute__((nonnull)) void explain_match(bp_match_t *m);
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/utf8.c b/utf8.c
index c5fc14b..062e4f1 100644
--- a/utf8.c
+++ b/utf8.c
@@ -2,284 +2,363 @@
// utf8.c - UTF8 helper functions
//
#include <ctype.h>
-#include <stdint.h>
#include <stdbool.h>
+#include <stdint.h>
#include <unistd.h>
#include "utf8.h"
#include "utils.h"
-#define ARRAY_LEN(a) (sizeof(a)/sizeof((a)[0]))
+#define ARRAY_LEN(a) (sizeof(a) / sizeof((a)[0]))
#define likely(x) __builtin_expect((x), 1)
#define unlikely(x) __builtin_expect((x), 0)
static const uint32_t XID_Start[][2] = {
- {0x0041,0x005A}, {0x0061,0x007A}, {0x00AA,0x00AA}, {0x00B5,0x00B5}, {0x00BA,0x00BA}, {0x00C0,0x00D6}, {0x00D8,0x00F6}, {0x00F8,0x01BA},
- {0x01BB,0x01BB}, {0x01BC,0x01BF}, {0x01C0,0x01C3}, {0x01C4,0x0293}, {0x0294,0x0294}, {0x0295,0x02AF}, {0x02B0,0x02C1}, {0x02C6,0x02D1},
- {0x02E0,0x02E4}, {0x02EC,0x02EC}, {0x02EE,0x02EE}, {0x0370,0x0373}, {0x0374,0x0374}, {0x0376,0x0377}, {0x037B,0x037D}, {0x037F,0x037F},
- {0x0386,0x0386}, {0x0388,0x038A}, {0x038C,0x038C}, {0x038E,0x03A1}, {0x03A3,0x03F5}, {0x03F7,0x0481}, {0x048A,0x052F}, {0x0531,0x0556},
- {0x0559,0x0559}, {0x0560,0x0588}, {0x05D0,0x05EA}, {0x05EF,0x05F2}, {0x0620,0x063F}, {0x0640,0x0640}, {0x0641,0x064A}, {0x066E,0x066F},
- {0x0671,0x06D3}, {0x06D5,0x06D5}, {0x06E5,0x06E6}, {0x06EE,0x06EF}, {0x06FA,0x06FC}, {0x06FF,0x06FF}, {0x0710,0x0710}, {0x0712,0x072F},
- {0x074D,0x07A5}, {0x07B1,0x07B1}, {0x07CA,0x07EA}, {0x07F4,0x07F5}, {0x07FA,0x07FA}, {0x0800,0x0815}, {0x081A,0x081A}, {0x0824,0x0824},
- {0x0828,0x0828}, {0x0840,0x0858}, {0x0860,0x086A}, {0x08A0,0x08B4}, {0x08B6,0x08C7}, {0x0904,0x0939}, {0x093D,0x093D}, {0x0950,0x0950},
- {0x0958,0x0961}, {0x0971,0x0971}, {0x0972,0x0980}, {0x0985,0x098C}, {0x098F,0x0990}, {0x0993,0x09A8}, {0x09AA,0x09B0}, {0x09B2,0x09B2},
- {0x09B6,0x09B9}, {0x09BD,0x09BD}, {0x09CE,0x09CE}, {0x09DC,0x09DD}, {0x09DF,0x09E1}, {0x09F0,0x09F1}, {0x09FC,0x09FC}, {0x0A05,0x0A0A},
- {0x0A0F,0x0A10}, {0x0A13,0x0A28}, {0x0A2A,0x0A30}, {0x0A32,0x0A33}, {0x0A35,0x0A36}, {0x0A38,0x0A39}, {0x0A59,0x0A5C}, {0x0A5E,0x0A5E},
- {0x0A72,0x0A74}, {0x0A85,0x0A8D}, {0x0A8F,0x0A91}, {0x0A93,0x0AA8}, {0x0AAA,0x0AB0}, {0x0AB2,0x0AB3}, {0x0AB5,0x0AB9}, {0x0ABD,0x0ABD},
- {0x0AD0,0x0AD0}, {0x0AE0,0x0AE1}, {0x0AF9,0x0AF9}, {0x0B05,0x0B0C}, {0x0B0F,0x0B10}, {0x0B13,0x0B28}, {0x0B2A,0x0B30}, {0x0B32,0x0B33},
- {0x0B35,0x0B39}, {0x0B3D,0x0B3D}, {0x0B5C,0x0B5D}, {0x0B5F,0x0B61}, {0x0B71,0x0B71}, {0x0B83,0x0B83}, {0x0B85,0x0B8A}, {0x0B8E,0x0B90},
- {0x0B92,0x0B95}, {0x0B99,0x0B9A}, {0x0B9C,0x0B9C}, {0x0B9E,0x0B9F}, {0x0BA3,0x0BA4}, {0x0BA8,0x0BAA}, {0x0BAE,0x0BB9}, {0x0BD0,0x0BD0},
- {0x0C05,0x0C0C}, {0x0C0E,0x0C10}, {0x0C12,0x0C28}, {0x0C2A,0x0C39}, {0x0C3D,0x0C3D}, {0x0C58,0x0C5A}, {0x0C60,0x0C61}, {0x0C80,0x0C80},
- {0x0C85,0x0C8C}, {0x0C8E,0x0C90}, {0x0C92,0x0CA8}, {0x0CAA,0x0CB3}, {0x0CB5,0x0CB9}, {0x0CBD,0x0CBD}, {0x0CDE,0x0CDE}, {0x0CE0,0x0CE1},
- {0x0CF1,0x0CF2}, {0x0D04,0x0D0C}, {0x0D0E,0x0D10}, {0x0D12,0x0D3A}, {0x0D3D,0x0D3D}, {0x0D4E,0x0D4E}, {0x0D54,0x0D56}, {0x0D5F,0x0D61},
- {0x0D7A,0x0D7F}, {0x0D85,0x0D96}, {0x0D9A,0x0DB1}, {0x0DB3,0x0DBB}, {0x0DBD,0x0DBD}, {0x0DC0,0x0DC6}, {0x0E01,0x0E30}, {0x0E32,0x0E32},
- {0x0E40,0x0E45}, {0x0E46,0x0E46}, {0x0E81,0x0E82}, {0x0E84,0x0E84}, {0x0E86,0x0E8A}, {0x0E8C,0x0EA3}, {0x0EA5,0x0EA5}, {0x0EA7,0x0EB0},
- {0x0EB2,0x0EB2}, {0x0EBD,0x0EBD}, {0x0EC0,0x0EC4}, {0x0EC6,0x0EC6}, {0x0EDC,0x0EDF}, {0x0F00,0x0F00}, {0x0F40,0x0F47}, {0x0F49,0x0F6C},
- {0x0F88,0x0F8C}, {0x1000,0x102A}, {0x103F,0x103F}, {0x1050,0x1055}, {0x105A,0x105D}, {0x1061,0x1061}, {0x1065,0x1066}, {0x106E,0x1070},
- {0x1075,0x1081}, {0x108E,0x108E}, {0x10A0,0x10C5}, {0x10C7,0x10C7}, {0x10CD,0x10CD}, {0x10D0,0x10FA}, {0x10FC,0x10FC}, {0x10FD,0x10FF},
- {0x1100,0x1248}, {0x124A,0x124D}, {0x1250,0x1256}, {0x1258,0x1258}, {0x125A,0x125D}, {0x1260,0x1288}, {0x128A,0x128D}, {0x1290,0x12B0},
- {0x12B2,0x12B5}, {0x12B8,0x12BE}, {0x12C0,0x12C0}, {0x12C2,0x12C5}, {0x12C8,0x12D6}, {0x12D8,0x1310}, {0x1312,0x1315}, {0x1318,0x135A},
- {0x1380,0x138F}, {0x13A0,0x13F5}, {0x13F8,0x13FD}, {0x1401,0x166C}, {0x166F,0x167F}, {0x1681,0x169A}, {0x16A0,0x16EA}, {0x16EE,0x16F0},
- {0x16F1,0x16F8}, {0x1700,0x170C}, {0x170E,0x1711}, {0x1720,0x1731}, {0x1740,0x1751}, {0x1760,0x176C}, {0x176E,0x1770}, {0x1780,0x17B3},
- {0x17D7,0x17D7}, {0x17DC,0x17DC}, {0x1820,0x1842}, {0x1843,0x1843}, {0x1844,0x1878}, {0x1880,0x1884}, {0x1885,0x1886}, {0x1887,0x18A8},
- {0x18AA,0x18AA}, {0x18B0,0x18F5}, {0x1900,0x191E}, {0x1950,0x196D}, {0x1970,0x1974}, {0x1980,0x19AB}, {0x19B0,0x19C9}, {0x1A00,0x1A16},
- {0x1A20,0x1A54}, {0x1AA7,0x1AA7}, {0x1B05,0x1B33}, {0x1B45,0x1B4B}, {0x1B83,0x1BA0}, {0x1BAE,0x1BAF}, {0x1BBA,0x1BE5}, {0x1C00,0x1C23},
- {0x1C4D,0x1C4F}, {0x1C5A,0x1C77}, {0x1C78,0x1C7D}, {0x1C80,0x1C88}, {0x1C90,0x1CBA}, {0x1CBD,0x1CBF}, {0x1CE9,0x1CEC}, {0x1CEE,0x1CF3},
- {0x1CF5,0x1CF6}, {0x1CFA,0x1CFA}, {0x1D00,0x1D2B}, {0x1D2C,0x1D6A}, {0x1D6B,0x1D77}, {0x1D78,0x1D78}, {0x1D79,0x1D9A}, {0x1D9B,0x1DBF},
- {0x1E00,0x1F15}, {0x1F18,0x1F1D}, {0x1F20,0x1F45}, {0x1F48,0x1F4D}, {0x1F50,0x1F57}, {0x1F59,0x1F59}, {0x1F5B,0x1F5B}, {0x1F5D,0x1F5D},
- {0x1F5F,0x1F7D}, {0x1F80,0x1FB4}, {0x1FB6,0x1FBC}, {0x1FBE,0x1FBE}, {0x1FC2,0x1FC4}, {0x1FC6,0x1FCC}, {0x1FD0,0x1FD3}, {0x1FD6,0x1FDB},
- {0x1FE0,0x1FEC}, {0x1FF2,0x1FF4}, {0x1FF6,0x1FFC}, {0x2071,0x2071}, {0x207F,0x207F}, {0x2090,0x209C}, {0x2102,0x2102}, {0x2107,0x2107},
- {0x210A,0x2113}, {0x2115,0x2115}, {0x2118,0x2118}, {0x2119,0x211D}, {0x2124,0x2124}, {0x2126,0x2126}, {0x2128,0x2128}, {0x212A,0x212D},
- {0x212E,0x212E}, {0x212F,0x2134}, {0x2135,0x2138}, {0x2139,0x2139}, {0x213C,0x213F}, {0x2145,0x2149}, {0x214E,0x214E}, {0x2160,0x2182},
- {0x2183,0x2184}, {0x2185,0x2188}, {0x2C00,0x2C2E}, {0x2C30,0x2C5E}, {0x2C60,0x2C7B}, {0x2C7C,0x2C7D}, {0x2C7E,0x2CE4}, {0x2CEB,0x2CEE},
- {0x2CF2,0x2CF3}, {0x2D00,0x2D25}, {0x2D27,0x2D27}, {0x2D2D,0x2D2D}, {0x2D30,0x2D67}, {0x2D6F,0x2D6F}, {0x2D80,0x2D96}, {0x2DA0,0x2DA6},
- {0x2DA8,0x2DAE}, {0x2DB0,0x2DB6}, {0x2DB8,0x2DBE}, {0x2DC0,0x2DC6}, {0x2DC8,0x2DCE}, {0x2DD0,0x2DD6}, {0x2DD8,0x2DDE}, {0x3005,0x3005},
- {0x3006,0x3006}, {0x3007,0x3007}, {0x3021,0x3029}, {0x3031,0x3035}, {0x3038,0x303A}, {0x303B,0x303B}, {0x303C,0x303C}, {0x3041,0x3096},
- {0x309D,0x309E}, {0x309F,0x309F}, {0x30A1,0x30FA}, {0x30FC,0x30FE}, {0x30FF,0x30FF}, {0x3105,0x312F}, {0x3131,0x318E}, {0x31A0,0x31BF},
- {0x31F0,0x31FF}, {0x3400,0x4DBF}, {0x4E00,0x9FFC}, {0xA000,0xA014}, {0xA015,0xA015}, {0xA016,0xA48C}, {0xA4D0,0xA4F7}, {0xA4F8,0xA4FD},
- {0xA500,0xA60B}, {0xA60C,0xA60C}, {0xA610,0xA61F}, {0xA62A,0xA62B}, {0xA640,0xA66D}, {0xA66E,0xA66E}, {0xA67F,0xA67F}, {0xA680,0xA69B},
- {0xA69C,0xA69D}, {0xA6A0,0xA6E5}, {0xA6E6,0xA6EF}, {0xA717,0xA71F}, {0xA722,0xA76F}, {0xA770,0xA770}, {0xA771,0xA787}, {0xA788,0xA788},
- {0xA78B,0xA78E}, {0xA78F,0xA78F}, {0xA790,0xA7BF}, {0xA7C2,0xA7CA}, {0xA7F5,0xA7F6}, {0xA7F7,0xA7F7}, {0xA7F8,0xA7F9}, {0xA7FA,0xA7FA},
- {0xA7FB,0xA801}, {0xA803,0xA805}, {0xA807,0xA80A}, {0xA80C,0xA822}, {0xA840,0xA873}, {0xA882,0xA8B3}, {0xA8F2,0xA8F7}, {0xA8FB,0xA8FB},
- {0xA8FD,0xA8FE}, {0xA90A,0xA925}, {0xA930,0xA946}, {0xA960,0xA97C}, {0xA984,0xA9B2}, {0xA9CF,0xA9CF}, {0xA9E0,0xA9E4}, {0xA9E6,0xA9E6},
- {0xA9E7,0xA9EF}, {0xA9FA,0xA9FE}, {0xAA00,0xAA28}, {0xAA40,0xAA42}, {0xAA44,0xAA4B}, {0xAA60,0xAA6F}, {0xAA70,0xAA70}, {0xAA71,0xAA76},
- {0xAA7A,0xAA7A}, {0xAA7E,0xAAAF}, {0xAAB1,0xAAB1}, {0xAAB5,0xAAB6}, {0xAAB9,0xAABD}, {0xAAC0,0xAAC0}, {0xAAC2,0xAAC2}, {0xAADB,0xAADC},
- {0xAADD,0xAADD}, {0xAAE0,0xAAEA}, {0xAAF2,0xAAF2}, {0xAAF3,0xAAF4}, {0xAB01,0xAB06}, {0xAB09,0xAB0E}, {0xAB11,0xAB16}, {0xAB20,0xAB26},
- {0xAB28,0xAB2E}, {0xAB30,0xAB5A}, {0xAB5C,0xAB5F}, {0xAB60,0xAB68}, {0xAB69,0xAB69}, {0xAB70,0xABBF}, {0xABC0,0xABE2}, {0xAC00,0xD7A3},
- {0xD7B0,0xD7C6}, {0xD7CB,0xD7FB}, {0xF900,0xFA6D}, {0xFA70,0xFAD9}, {0xFB00,0xFB06}, {0xFB13,0xFB17}, {0xFB1D,0xFB1D}, {0xFB1F,0xFB28},
- {0xFB2A,0xFB36}, {0xFB38,0xFB3C}, {0xFB3E,0xFB3E}, {0xFB40,0xFB41}, {0xFB43,0xFB44}, {0xFB46,0xFBB1}, {0xFBD3,0xFC5D}, {0xFC64,0xFD3D},
- {0xFD50,0xFD8F}, {0xFD92,0xFDC7}, {0xFDF0,0xFDF9}, {0xFE71,0xFE71}, {0xFE73,0xFE73}, {0xFE77,0xFE77}, {0xFE79,0xFE79}, {0xFE7B,0xFE7B},
- {0xFE7D,0xFE7D}, {0xFE7F,0xFEFC}, {0xFF21,0xFF3A}, {0xFF41,0xFF5A}, {0xFF66,0xFF6F}, {0xFF70,0xFF70}, {0xFF71,0xFF9D}, {0xFFA0,0xFFBE},
- {0xFFC2,0xFFC7}, {0xFFCA,0xFFCF}, {0xFFD2,0xFFD7}, {0xFFDA,0xFFDC}, {0x10000,0x1000B}, {0x1000D,0x10026}, {0x10028,0x1003A}, {0x1003C,0x1003D},
- {0x1003F,0x1004D}, {0x10050,0x1005D}, {0x10080,0x100FA}, {0x10140,0x10174}, {0x10280,0x1029C}, {0x102A0,0x102D0}, {0x10300,0x1031F}, {0x1032D,0x10340},
- {0x10341,0x10341}, {0x10342,0x10349}, {0x1034A,0x1034A}, {0x10350,0x10375}, {0x10380,0x1039D}, {0x103A0,0x103C3}, {0x103C8,0x103CF}, {0x103D1,0x103D5},
- {0x10400,0x1044F}, {0x10450,0x1049D}, {0x104B0,0x104D3}, {0x104D8,0x104FB}, {0x10500,0x10527}, {0x10530,0x10563}, {0x10600,0x10736}, {0x10740,0x10755},
- {0x10760,0x10767}, {0x10800,0x10805}, {0x10808,0x10808}, {0x1080A,0x10835}, {0x10837,0x10838}, {0x1083C,0x1083C}, {0x1083F,0x10855}, {0x10860,0x10876},
- {0x10880,0x1089E}, {0x108E0,0x108F2}, {0x108F4,0x108F5}, {0x10900,0x10915}, {0x10920,0x10939}, {0x10980,0x109B7}, {0x109BE,0x109BF}, {0x10A00,0x10A00},
- {0x10A10,0x10A13}, {0x10A15,0x10A17}, {0x10A19,0x10A35}, {0x10A60,0x10A7C}, {0x10A80,0x10A9C}, {0x10AC0,0x10AC7}, {0x10AC9,0x10AE4}, {0x10B00,0x10B35},
- {0x10B40,0x10B55}, {0x10B60,0x10B72}, {0x10B80,0x10B91}, {0x10C00,0x10C48}, {0x10C80,0x10CB2}, {0x10CC0,0x10CF2}, {0x10D00,0x10D23}, {0x10E80,0x10EA9},
- {0x10EB0,0x10EB1}, {0x10F00,0x10F1C}, {0x10F27,0x10F27}, {0x10F30,0x10F45}, {0x10FB0,0x10FC4}, {0x10FE0,0x10FF6}, {0x11003,0x11037}, {0x11083,0x110AF},
- {0x110D0,0x110E8}, {0x11103,0x11126}, {0x11144,0x11144}, {0x11147,0x11147}, {0x11150,0x11172}, {0x11176,0x11176}, {0x11183,0x111B2}, {0x111C1,0x111C4},
- {0x111DA,0x111DA}, {0x111DC,0x111DC}, {0x11200,0x11211}, {0x11213,0x1122B}, {0x11280,0x11286}, {0x11288,0x11288}, {0x1128A,0x1128D}, {0x1128F,0x1129D},
- {0x1129F,0x112A8}, {0x112B0,0x112DE}, {0x11305,0x1130C}, {0x1130F,0x11310}, {0x11313,0x11328}, {0x1132A,0x11330}, {0x11332,0x11333}, {0x11335,0x11339},
- {0x1133D,0x1133D}, {0x11350,0x11350}, {0x1135D,0x11361}, {0x11400,0x11434}, {0x11447,0x1144A}, {0x1145F,0x11461}, {0x11480,0x114AF}, {0x114C4,0x114C5},
- {0x114C7,0x114C7}, {0x11580,0x115AE}, {0x115D8,0x115DB}, {0x11600,0x1162F}, {0x11644,0x11644}, {0x11680,0x116AA}, {0x116B8,0x116B8}, {0x11700,0x1171A},
- {0x11800,0x1182B}, {0x118A0,0x118DF}, {0x118FF,0x11906}, {0x11909,0x11909}, {0x1190C,0x11913}, {0x11915,0x11916}, {0x11918,0x1192F}, {0x1193F,0x1193F},
- {0x11941,0x11941}, {0x119A0,0x119A7}, {0x119AA,0x119D0}, {0x119E1,0x119E1}, {0x119E3,0x119E3}, {0x11A00,0x11A00}, {0x11A0B,0x11A32}, {0x11A3A,0x11A3A},
- {0x11A50,0x11A50}, {0x11A5C,0x11A89}, {0x11A9D,0x11A9D}, {0x11AC0,0x11AF8}, {0x11C00,0x11C08}, {0x11C0A,0x11C2E}, {0x11C40,0x11C40}, {0x11C72,0x11C8F},
- {0x11D00,0x11D06}, {0x11D08,0x11D09}, {0x11D0B,0x11D30}, {0x11D46,0x11D46}, {0x11D60,0x11D65}, {0x11D67,0x11D68}, {0x11D6A,0x11D89}, {0x11D98,0x11D98},
- {0x11EE0,0x11EF2}, {0x11FB0,0x11FB0}, {0x12000,0x12399}, {0x12400,0x1246E}, {0x12480,0x12543}, {0x13000,0x1342E}, {0x14400,0x14646}, {0x16800,0x16A38},
- {0x16A40,0x16A5E}, {0x16AD0,0x16AED}, {0x16B00,0x16B2F}, {0x16B40,0x16B43}, {0x16B63,0x16B77}, {0x16B7D,0x16B8F}, {0x16E40,0x16E7F}, {0x16F00,0x16F4A},
- {0x16F50,0x16F50}, {0x16F93,0x16F9F}, {0x16FE0,0x16FE1}, {0x16FE3,0x16FE3}, {0x17000,0x187F7}, {0x18800,0x18CD5}, {0x18D00,0x18D08}, {0x1B000,0x1B11E},
- {0x1B150,0x1B152}, {0x1B164,0x1B167}, {0x1B170,0x1B2FB}, {0x1BC00,0x1BC6A}, {0x1BC70,0x1BC7C}, {0x1BC80,0x1BC88}, {0x1BC90,0x1BC99}, {0x1D400,0x1D454},
- {0x1D456,0x1D49C}, {0x1D49E,0x1D49F}, {0x1D4A2,0x1D4A2}, {0x1D4A5,0x1D4A6}, {0x1D4A9,0x1D4AC}, {0x1D4AE,0x1D4B9}, {0x1D4BB,0x1D4BB}, {0x1D4BD,0x1D4C3},
- {0x1D4C5,0x1D505}, {0x1D507,0x1D50A}, {0x1D50D,0x1D514}, {0x1D516,0x1D51C}, {0x1D51E,0x1D539}, {0x1D53B,0x1D53E}, {0x1D540,0x1D544}, {0x1D546,0x1D546},
- {0x1D54A,0x1D550}, {0x1D552,0x1D6A5}, {0x1D6A8,0x1D6C0}, {0x1D6C2,0x1D6DA}, {0x1D6DC,0x1D6FA}, {0x1D6FC,0x1D714}, {0x1D716,0x1D734}, {0x1D736,0x1D74E},
- {0x1D750,0x1D76E}, {0x1D770,0x1D788}, {0x1D78A,0x1D7A8}, {0x1D7AA,0x1D7C2}, {0x1D7C4,0x1D7CB}, {0x1E100,0x1E12C}, {0x1E137,0x1E13D}, {0x1E14E,0x1E14E},
- {0x1E2C0,0x1E2EB}, {0x1E800,0x1E8C4}, {0x1E900,0x1E943}, {0x1E94B,0x1E94B}, {0x1EE00,0x1EE03}, {0x1EE05,0x1EE1F}, {0x1EE21,0x1EE22}, {0x1EE24,0x1EE24},
- {0x1EE27,0x1EE27}, {0x1EE29,0x1EE32}, {0x1EE34,0x1EE37}, {0x1EE39,0x1EE39}, {0x1EE3B,0x1EE3B}, {0x1EE42,0x1EE42}, {0x1EE47,0x1EE47}, {0x1EE49,0x1EE49},
- {0x1EE4B,0x1EE4B}, {0x1EE4D,0x1EE4F}, {0x1EE51,0x1EE52}, {0x1EE54,0x1EE54}, {0x1EE57,0x1EE57}, {0x1EE59,0x1EE59}, {0x1EE5B,0x1EE5B}, {0x1EE5D,0x1EE5D},
- {0x1EE5F,0x1EE5F}, {0x1EE61,0x1EE62}, {0x1EE64,0x1EE64}, {0x1EE67,0x1EE6A}, {0x1EE6C,0x1EE72}, {0x1EE74,0x1EE77}, {0x1EE79,0x1EE7C}, {0x1EE7E,0x1EE7E},
- {0x1EE80,0x1EE89}, {0x1EE8B,0x1EE9B}, {0x1EEA1,0x1EEA3}, {0x1EEA5,0x1EEA9}, {0x1EEAB,0x1EEBB}, {0x20000,0x2A6DD}, {0x2A700,0x2B734}, {0x2B740,0x2B81D},
- {0x2B820,0x2CEA1}, {0x2CEB0,0x2EBE0}, {0x2F800,0x2FA1D}, {0x30000,0x3134A},
-};
+ {0x0041, 0x005A}, {0x0061, 0x007A}, {0x00AA, 0x00AA}, {0x00B5, 0x00B5}, {0x00BA, 0x00BA},
+ {0x00C0, 0x00D6}, {0x00D8, 0x00F6}, {0x00F8, 0x01BA}, {0x01BB, 0x01BB}, {0x01BC, 0x01BF},
+ {0x01C0, 0x01C3}, {0x01C4, 0x0293}, {0x0294, 0x0294}, {0x0295, 0x02AF}, {0x02B0, 0x02C1},
+ {0x02C6, 0x02D1}, {0x02E0, 0x02E4}, {0x02EC, 0x02EC}, {0x02EE, 0x02EE}, {0x0370, 0x0373},
+ {0x0374, 0x0374}, {0x0376, 0x0377}, {0x037B, 0x037D}, {0x037F, 0x037F}, {0x0386, 0x0386},
+ {0x0388, 0x038A}, {0x038C, 0x038C}, {0x038E, 0x03A1}, {0x03A3, 0x03F5}, {0x03F7, 0x0481},
+ {0x048A, 0x052F}, {0x0531, 0x0556}, {0x0559, 0x0559}, {0x0560, 0x0588}, {0x05D0, 0x05EA},
+ {0x05EF, 0x05F2}, {0x0620, 0x063F}, {0x0640, 0x0640}, {0x0641, 0x064A}, {0x066E, 0x066F},
+ {0x0671, 0x06D3}, {0x06D5, 0x06D5}, {0x06E5, 0x06E6}, {0x06EE, 0x06EF}, {0x06FA, 0x06FC},
+ {0x06FF, 0x06FF}, {0x0710, 0x0710}, {0x0712, 0x072F}, {0x074D, 0x07A5}, {0x07B1, 0x07B1},
+ {0x07CA, 0x07EA}, {0x07F4, 0x07F5}, {0x07FA, 0x07FA}, {0x0800, 0x0815}, {0x081A, 0x081A},
+ {0x0824, 0x0824}, {0x0828, 0x0828}, {0x0840, 0x0858}, {0x0860, 0x086A}, {0x08A0, 0x08B4},
+ {0x08B6, 0x08C7}, {0x0904, 0x0939}, {0x093D, 0x093D}, {0x0950, 0x0950}, {0x0958, 0x0961},
+ {0x0971, 0x0971}, {0x0972, 0x0980}, {0x0985, 0x098C}, {0x098F, 0x0990}, {0x0993, 0x09A8},
+ {0x09AA, 0x09B0}, {0x09B2, 0x09B2}, {0x09B6, 0x09B9}, {0x09BD, 0x09BD}, {0x09CE, 0x09CE},
+ {0x09DC, 0x09DD}, {0x09DF, 0x09E1}, {0x09F0, 0x09F1}, {0x09FC, 0x09FC}, {0x0A05, 0x0A0A},
+ {0x0A0F, 0x0A10}, {0x0A13, 0x0A28}, {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36},
+ {0x0A38, 0x0A39}, {0x0A59, 0x0A5C}, {0x0A5E, 0x0A5E}, {0x0A72, 0x0A74}, {0x0A85, 0x0A8D},
+ {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8}, {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9},
+ {0x0ABD, 0x0ABD}, {0x0AD0, 0x0AD0}, {0x0AE0, 0x0AE1}, {0x0AF9, 0x0AF9}, {0x0B05, 0x0B0C},
+ {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33}, {0x0B35, 0x0B39},
+ {0x0B3D, 0x0B3D}, {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, {0x0B71, 0x0B71}, {0x0B83, 0x0B83},
+ {0x0B85, 0x0B8A}, {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9C, 0x0B9C},
+ {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB9}, {0x0BD0, 0x0BD0},
+ {0x0C05, 0x0C0C}, {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C39}, {0x0C3D, 0x0C3D},
+ {0x0C58, 0x0C5A}, {0x0C60, 0x0C61}, {0x0C80, 0x0C80}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90},
+ {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CBD, 0x0CBD}, {0x0CDE, 0x0CDE},
+ {0x0CE0, 0x0CE1}, {0x0CF1, 0x0CF2}, {0x0D04, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D3A},
+ {0x0D3D, 0x0D3D}, {0x0D4E, 0x0D4E}, {0x0D54, 0x0D56}, {0x0D5F, 0x0D61}, {0x0D7A, 0x0D7F},
+ {0x0D85, 0x0D96}, {0x0D9A, 0x0DB1}, {0x0DB3, 0x0DBB}, {0x0DBD, 0x0DBD}, {0x0DC0, 0x0DC6},
+ {0x0E01, 0x0E30}, {0x0E32, 0x0E32}, {0x0E40, 0x0E45}, {0x0E46, 0x0E46}, {0x0E81, 0x0E82},
+ {0x0E84, 0x0E84}, {0x0E86, 0x0E8A}, {0x0E8C, 0x0EA3}, {0x0EA5, 0x0EA5}, {0x0EA7, 0x0EB0},
+ {0x0EB2, 0x0EB2}, {0x0EBD, 0x0EBD}, {0x0EC0, 0x0EC4}, {0x0EC6, 0x0EC6}, {0x0EDC, 0x0EDF},
+ {0x0F00, 0x0F00}, {0x0F40, 0x0F47}, {0x0F49, 0x0F6C}, {0x0F88, 0x0F8C}, {0x1000, 0x102A},
+ {0x103F, 0x103F}, {0x1050, 0x1055}, {0x105A, 0x105D}, {0x1061, 0x1061}, {0x1065, 0x1066},
+ {0x106E, 0x1070}, {0x1075, 0x1081}, {0x108E, 0x108E}, {0x10A0, 0x10C5}, {0x10C7, 0x10C7},
+ {0x10CD, 0x10CD}, {0x10D0, 0x10FA}, {0x10FC, 0x10FC}, {0x10FD, 0x10FF}, {0x1100, 0x1248},
+ {0x124A, 0x124D}, {0x1250, 0x1256}, {0x1258, 0x1258}, {0x125A, 0x125D}, {0x1260, 0x1288},
+ {0x128A, 0x128D}, {0x1290, 0x12B0}, {0x12B2, 0x12B5}, {0x12B8, 0x12BE}, {0x12C0, 0x12C0},
+ {0x12C2, 0x12C5}, {0x12C8, 0x12D6}, {0x12D8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135A},
+ {0x1380, 0x138F}, {0x13A0, 0x13F5}, {0x13F8, 0x13FD}, {0x1401, 0x166C}, {0x166F, 0x167F},
+ {0x1681, 0x169A}, {0x16A0, 0x16EA}, {0x16EE, 0x16F0}, {0x16F1, 0x16F8}, {0x1700, 0x170C},
+ {0x170E, 0x1711}, {0x1720, 0x1731}, {0x1740, 0x1751}, {0x1760, 0x176C}, {0x176E, 0x1770},
+ {0x1780, 0x17B3}, {0x17D7, 0x17D7}, {0x17DC, 0x17DC}, {0x1820, 0x1842}, {0x1843, 0x1843},
+ {0x1844, 0x1878}, {0x1880, 0x1884}, {0x1885, 0x1886}, {0x1887, 0x18A8}, {0x18AA, 0x18AA},
+ {0x18B0, 0x18F5}, {0x1900, 0x191E}, {0x1950, 0x196D}, {0x1970, 0x1974}, {0x1980, 0x19AB},
+ {0x19B0, 0x19C9}, {0x1A00, 0x1A16}, {0x1A20, 0x1A54}, {0x1AA7, 0x1AA7}, {0x1B05, 0x1B33},
+ {0x1B45, 0x1B4B}, {0x1B83, 0x1BA0}, {0x1BAE, 0x1BAF}, {0x1BBA, 0x1BE5}, {0x1C00, 0x1C23},
+ {0x1C4D, 0x1C4F}, {0x1C5A, 0x1C77}, {0x1C78, 0x1C7D}, {0x1C80, 0x1C88}, {0x1C90, 0x1CBA},
+ {0x1CBD, 0x1CBF}, {0x1CE9, 0x1CEC}, {0x1CEE, 0x1CF3}, {0x1CF5, 0x1CF6}, {0x1CFA, 0x1CFA},
+ {0x1D00, 0x1D2B}, {0x1D2C, 0x1D6A}, {0x1D6B, 0x1D77}, {0x1D78, 0x1D78}, {0x1D79, 0x1D9A},
+ {0x1D9B, 0x1DBF}, {0x1E00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D},
+ {0x1F50, 0x1F57}, {0x1F59, 0x1F59}, {0x1F5B, 0x1F5B}, {0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D},
+ {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC}, {0x1FBE, 0x1FBE}, {0x1FC2, 0x1FC4}, {0x1FC6, 0x1FCC},
+ {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB}, {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC},
+ {0x2071, 0x2071}, {0x207F, 0x207F}, {0x2090, 0x209C}, {0x2102, 0x2102}, {0x2107, 0x2107},
+ {0x210A, 0x2113}, {0x2115, 0x2115}, {0x2118, 0x2118}, {0x2119, 0x211D}, {0x2124, 0x2124},
+ {0x2126, 0x2126}, {0x2128, 0x2128}, {0x212A, 0x212D}, {0x212E, 0x212E}, {0x212F, 0x2134},
+ {0x2135, 0x2138}, {0x2139, 0x2139}, {0x213C, 0x213F}, {0x2145, 0x2149}, {0x214E, 0x214E},
+ {0x2160, 0x2182}, {0x2183, 0x2184}, {0x2185, 0x2188}, {0x2C00, 0x2C2E}, {0x2C30, 0x2C5E},
+ {0x2C60, 0x2C7B}, {0x2C7C, 0x2C7D}, {0x2C7E, 0x2CE4}, {0x2CEB, 0x2CEE}, {0x2CF2, 0x2CF3},
+ {0x2D00, 0x2D25}, {0x2D27, 0x2D27}, {0x2D2D, 0x2D2D}, {0x2D30, 0x2D67}, {0x2D6F, 0x2D6F},
+ {0x2D80, 0x2D96}, {0x2DA0, 0x2DA6}, {0x2DA8, 0x2DAE}, {0x2DB0, 0x2DB6}, {0x2DB8, 0x2DBE},
+ {0x2DC0, 0x2DC6}, {0x2DC8, 0x2DCE}, {0x2DD0, 0x2DD6}, {0x2DD8, 0x2DDE}, {0x3005, 0x3005},
+ {0x3006, 0x3006}, {0x3007, 0x3007}, {0x3021, 0x3029}, {0x3031, 0x3035}, {0x3038, 0x303A},
+ {0x303B, 0x303B}, {0x303C, 0x303C}, {0x3041, 0x3096}, {0x309D, 0x309E}, {0x309F, 0x309F},
+ {0x30A1, 0x30FA}, {0x30FC, 0x30FE}, {0x30FF, 0x30FF}, {0x3105, 0x312F}, {0x3131, 0x318E},
+ {0x31A0, 0x31BF}, {0x31F0, 0x31FF}, {0x3400, 0x4DBF}, {0x4E00, 0x9FFC}, {0xA000, 0xA014},
+ {0xA015, 0xA015}, {0xA016, 0xA48C}, {0xA4D0, 0xA4F7}, {0xA4F8, 0xA4FD}, {0xA500, 0xA60B},
+ {0xA60C, 0xA60C}, {0xA610, 0xA61F}, {0xA62A, 0xA62B}, {0xA640, 0xA66D}, {0xA66E, 0xA66E},
+ {0xA67F, 0xA67F}, {0xA680, 0xA69B}, {0xA69C, 0xA69D}, {0xA6A0, 0xA6E5}, {0xA6E6, 0xA6EF},
+ {0xA717, 0xA71F}, {0xA722, 0xA76F}, {0xA770, 0xA770}, {0xA771, 0xA787}, {0xA788, 0xA788},
+ {0xA78B, 0xA78E}, {0xA78F, 0xA78F}, {0xA790, 0xA7BF}, {0xA7C2, 0xA7CA}, {0xA7F5, 0xA7F6},
+ {0xA7F7, 0xA7F7}, {0xA7F8, 0xA7F9}, {0xA7FA, 0xA7FA}, {0xA7FB, 0xA801}, {0xA803, 0xA805},
+ {0xA807, 0xA80A}, {0xA80C, 0xA822}, {0xA840, 0xA873}, {0xA882, 0xA8B3}, {0xA8F2, 0xA8F7},
+ {0xA8FB, 0xA8FB}, {0xA8FD, 0xA8FE}, {0xA90A, 0xA925}, {0xA930, 0xA946}, {0xA960, 0xA97C},
+ {0xA984, 0xA9B2}, {0xA9CF, 0xA9CF}, {0xA9E0, 0xA9E4}, {0xA9E6, 0xA9E6}, {0xA9E7, 0xA9EF},
+ {0xA9FA, 0xA9FE}, {0xAA00, 0xAA28}, {0xAA40, 0xAA42}, {0xAA44, 0xAA4B}, {0xAA60, 0xAA6F},
+ {0xAA70, 0xAA70}, {0xAA71, 0xAA76}, {0xAA7A, 0xAA7A}, {0xAA7E, 0xAAAF}, {0xAAB1, 0xAAB1},
+ {0xAAB5, 0xAAB6}, {0xAAB9, 0xAABD}, {0xAAC0, 0xAAC0}, {0xAAC2, 0xAAC2}, {0xAADB, 0xAADC},
+ {0xAADD, 0xAADD}, {0xAAE0, 0xAAEA}, {0xAAF2, 0xAAF2}, {0xAAF3, 0xAAF4}, {0xAB01, 0xAB06},
+ {0xAB09, 0xAB0E}, {0xAB11, 0xAB16}, {0xAB20, 0xAB26}, {0xAB28, 0xAB2E}, {0xAB30, 0xAB5A},
+ {0xAB5C, 0xAB5F}, {0xAB60, 0xAB68}, {0xAB69, 0xAB69}, {0xAB70, 0xABBF}, {0xABC0, 0xABE2},
+ {0xAC00, 0xD7A3}, {0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB}, {0xF900, 0xFA6D}, {0xFA70, 0xFAD9},
+ {0xFB00, 0xFB06}, {0xFB13, 0xFB17}, {0xFB1D, 0xFB1D}, {0xFB1F, 0xFB28}, {0xFB2A, 0xFB36},
+ {0xFB38, 0xFB3C}, {0xFB3E, 0xFB3E}, {0xFB40, 0xFB41}, {0xFB43, 0xFB44}, {0xFB46, 0xFBB1},
+ {0xFBD3, 0xFC5D}, {0xFC64, 0xFD3D}, {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7}, {0xFDF0, 0xFDF9},
+ {0xFE71, 0xFE71}, {0xFE73, 0xFE73}, {0xFE77, 0xFE77}, {0xFE79, 0xFE79}, {0xFE7B, 0xFE7B},
+ {0xFE7D, 0xFE7D}, {0xFE7F, 0xFEFC}, {0xFF21, 0xFF3A}, {0xFF41, 0xFF5A}, {0xFF66, 0xFF6F},
+ {0xFF70, 0xFF70}, {0xFF71, 0xFF9D}, {0xFFA0, 0xFFBE}, {0xFFC2, 0xFFC7}, {0xFFCA, 0xFFCF},
+ {0xFFD2, 0xFFD7}, {0xFFDA, 0xFFDC}, {0x10000, 0x1000B}, {0x1000D, 0x10026}, {0x10028, 0x1003A},
+ {0x1003C, 0x1003D}, {0x1003F, 0x1004D}, {0x10050, 0x1005D}, {0x10080, 0x100FA}, {0x10140, 0x10174},
+ {0x10280, 0x1029C}, {0x102A0, 0x102D0}, {0x10300, 0x1031F}, {0x1032D, 0x10340}, {0x10341, 0x10341},
+ {0x10342, 0x10349}, {0x1034A, 0x1034A}, {0x10350, 0x10375}, {0x10380, 0x1039D}, {0x103A0, 0x103C3},
+ {0x103C8, 0x103CF}, {0x103D1, 0x103D5}, {0x10400, 0x1044F}, {0x10450, 0x1049D}, {0x104B0, 0x104D3},
+ {0x104D8, 0x104FB}, {0x10500, 0x10527}, {0x10530, 0x10563}, {0x10600, 0x10736}, {0x10740, 0x10755},
+ {0x10760, 0x10767}, {0x10800, 0x10805}, {0x10808, 0x10808}, {0x1080A, 0x10835}, {0x10837, 0x10838},
+ {0x1083C, 0x1083C}, {0x1083F, 0x10855}, {0x10860, 0x10876}, {0x10880, 0x1089E}, {0x108E0, 0x108F2},
+ {0x108F4, 0x108F5}, {0x10900, 0x10915}, {0x10920, 0x10939}, {0x10980, 0x109B7}, {0x109BE, 0x109BF},
+ {0x10A00, 0x10A00}, {0x10A10, 0x10A13}, {0x10A15, 0x10A17}, {0x10A19, 0x10A35}, {0x10A60, 0x10A7C},
+ {0x10A80, 0x10A9C}, {0x10AC0, 0x10AC7}, {0x10AC9, 0x10AE4}, {0x10B00, 0x10B35}, {0x10B40, 0x10B55},
+ {0x10B60, 0x10B72}, {0x10B80, 0x10B91}, {0x10C00, 0x10C48}, {0x10C80, 0x10CB2}, {0x10CC0, 0x10CF2},
+ {0x10D00, 0x10D23}, {0x10E80, 0x10EA9}, {0x10EB0, 0x10EB1}, {0x10F00, 0x10F1C}, {0x10F27, 0x10F27},
+ {0x10F30, 0x10F45}, {0x10FB0, 0x10FC4}, {0x10FE0, 0x10FF6}, {0x11003, 0x11037}, {0x11083, 0x110AF},
+ {0x110D0, 0x110E8}, {0x11103, 0x11126}, {0x11144, 0x11144}, {0x11147, 0x11147}, {0x11150, 0x11172},
+ {0x11176, 0x11176}, {0x11183, 0x111B2}, {0x111C1, 0x111C4}, {0x111DA, 0x111DA}, {0x111DC, 0x111DC},
+ {0x11200, 0x11211}, {0x11213, 0x1122B}, {0x11280, 0x11286}, {0x11288, 0x11288}, {0x1128A, 0x1128D},
+ {0x1128F, 0x1129D}, {0x1129F, 0x112A8}, {0x112B0, 0x112DE}, {0x11305, 0x1130C}, {0x1130F, 0x11310},
+ {0x11313, 0x11328}, {0x1132A, 0x11330}, {0x11332, 0x11333}, {0x11335, 0x11339}, {0x1133D, 0x1133D},
+ {0x11350, 0x11350}, {0x1135D, 0x11361}, {0x11400, 0x11434}, {0x11447, 0x1144A}, {0x1145F, 0x11461},
+ {0x11480, 0x114AF}, {0x114C4, 0x114C5}, {0x114C7, 0x114C7}, {0x11580, 0x115AE}, {0x115D8, 0x115DB},
+ {0x11600, 0x1162F}, {0x11644, 0x11644}, {0x11680, 0x116AA}, {0x116B8, 0x116B8}, {0x11700, 0x1171A},
+ {0x11800, 0x1182B}, {0x118A0, 0x118DF}, {0x118FF, 0x11906}, {0x11909, 0x11909}, {0x1190C, 0x11913},
+ {0x11915, 0x11916}, {0x11918, 0x1192F}, {0x1193F, 0x1193F}, {0x11941, 0x11941}, {0x119A0, 0x119A7},
+ {0x119AA, 0x119D0}, {0x119E1, 0x119E1}, {0x119E3, 0x119E3}, {0x11A00, 0x11A00}, {0x11A0B, 0x11A32},
+ {0x11A3A, 0x11A3A}, {0x11A50, 0x11A50}, {0x11A5C, 0x11A89}, {0x11A9D, 0x11A9D}, {0x11AC0, 0x11AF8},
+ {0x11C00, 0x11C08}, {0x11C0A, 0x11C2E}, {0x11C40, 0x11C40}, {0x11C72, 0x11C8F}, {0x11D00, 0x11D06},
+ {0x11D08, 0x11D09}, {0x11D0B, 0x11D30}, {0x11D46, 0x11D46}, {0x11D60, 0x11D65}, {0x11D67, 0x11D68},
+ {0x11D6A, 0x11D89}, {0x11D98, 0x11D98}, {0x11EE0, 0x11EF2}, {0x11FB0, 0x11FB0}, {0x12000, 0x12399},
+ {0x12400, 0x1246E}, {0x12480, 0x12543}, {0x13000, 0x1342E}, {0x14400, 0x14646}, {0x16800, 0x16A38},
+ {0x16A40, 0x16A5E}, {0x16AD0, 0x16AED}, {0x16B00, 0x16B2F}, {0x16B40, 0x16B43}, {0x16B63, 0x16B77},
+ {0x16B7D, 0x16B8F}, {0x16E40, 0x16E7F}, {0x16F00, 0x16F4A}, {0x16F50, 0x16F50}, {0x16F93, 0x16F9F},
+ {0x16FE0, 0x16FE1}, {0x16FE3, 0x16FE3}, {0x17000, 0x187F7}, {0x18800, 0x18CD5}, {0x18D00, 0x18D08},
+ {0x1B000, 0x1B11E}, {0x1B150, 0x1B152}, {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, {0x1BC00, 0x1BC6A},
+ {0x1BC70, 0x1BC7C}, {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99}, {0x1D400, 0x1D454}, {0x1D456, 0x1D49C},
+ {0x1D49E, 0x1D49F}, {0x1D4A2, 0x1D4A2}, {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC}, {0x1D4AE, 0x1D4B9},
+ {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3}, {0x1D4C5, 0x1D505}, {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514},
+ {0x1D516, 0x1D51C}, {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E}, {0x1D540, 0x1D544}, {0x1D546, 0x1D546},
+ {0x1D54A, 0x1D550}, {0x1D552, 0x1D6A5}, {0x1D6A8, 0x1D6C0}, {0x1D6C2, 0x1D6DA}, {0x1D6DC, 0x1D6FA},
+ {0x1D6FC, 0x1D714}, {0x1D716, 0x1D734}, {0x1D736, 0x1D74E}, {0x1D750, 0x1D76E}, {0x1D770, 0x1D788},
+ {0x1D78A, 0x1D7A8}, {0x1D7AA, 0x1D7C2}, {0x1D7C4, 0x1D7CB}, {0x1E100, 0x1E12C}, {0x1E137, 0x1E13D},
+ {0x1E14E, 0x1E14E}, {0x1E2C0, 0x1E2EB}, {0x1E800, 0x1E8C4}, {0x1E900, 0x1E943}, {0x1E94B, 0x1E94B},
+ {0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F}, {0x1EE21, 0x1EE22}, {0x1EE24, 0x1EE24}, {0x1EE27, 0x1EE27},
+ {0x1EE29, 0x1EE32}, {0x1EE34, 0x1EE37}, {0x1EE39, 0x1EE39}, {0x1EE3B, 0x1EE3B}, {0x1EE42, 0x1EE42},
+ {0x1EE47, 0x1EE47}, {0x1EE49, 0x1EE49}, {0x1EE4B, 0x1EE4B}, {0x1EE4D, 0x1EE4F}, {0x1EE51, 0x1EE52},
+ {0x1EE54, 0x1EE54}, {0x1EE57, 0x1EE57}, {0x1EE59, 0x1EE59}, {0x1EE5B, 0x1EE5B}, {0x1EE5D, 0x1EE5D},
+ {0x1EE5F, 0x1EE5F}, {0x1EE61, 0x1EE62}, {0x1EE64, 0x1EE64}, {0x1EE67, 0x1EE6A}, {0x1EE6C, 0x1EE72},
+ {0x1EE74, 0x1EE77}, {0x1EE79, 0x1EE7C}, {0x1EE7E, 0x1EE7E}, {0x1EE80, 0x1EE89}, {0x1EE8B, 0x1EE9B},
+ {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9}, {0x1EEAB, 0x1EEBB}, {0x20000, 0x2A6DD}, {0x2A700, 0x2B734},
+ {0x2B740, 0x2B81D}, {0x2B820, 0x2CEA1}, {0x2CEB0, 0x2EBE0}, {0x2F800, 0x2FA1D}, {0x30000, 0x3134A},
+};
static const uint32_t XID_Continue_only[][2] = {
- {0x0030,0x0039}, {0x005F,0x005F}, {0x00B7,0x00B7}, {0x0300,0x036F}, {0x0387,0x0387}, {0x0483,0x0487}, {0x0591,0x05BD}, {0x05BF,0x05BF},
- {0x05C1,0x05C2}, {0x05C4,0x05C5}, {0x05C7,0x05C7}, {0x0610,0x061A}, {0x064B,0x065F}, {0x0660,0x0669}, {0x0670,0x0670}, {0x06D6,0x06DC},
- {0x06DF,0x06E4}, {0x06E7,0x06E8}, {0x06EA,0x06ED}, {0x06F0,0x06F9}, {0x0711,0x0711}, {0x0730,0x074A}, {0x07A6,0x07B0}, {0x07C0,0x07C9},
- {0x07EB,0x07F3}, {0x07FD,0x07FD}, {0x0816,0x0819}, {0x081B,0x0823}, {0x0825,0x0827}, {0x0829,0x082D}, {0x0859,0x085B}, {0x08D3,0x08E1},
- {0x08E3,0x0902}, {0x0903,0x0903}, {0x093A,0x093A}, {0x093B,0x093B}, {0x093C,0x093C}, {0x093E,0x0940}, {0x0941,0x0948}, {0x0949,0x094C},
- {0x094D,0x094D}, {0x094E,0x094F}, {0x0951,0x0957}, {0x0962,0x0963}, {0x0966,0x096F}, {0x0981,0x0981}, {0x0982,0x0983}, {0x09BC,0x09BC},
- {0x09BE,0x09C0}, {0x09C1,0x09C4}, {0x09C7,0x09C8}, {0x09CB,0x09CC}, {0x09CD,0x09CD}, {0x09D7,0x09D7}, {0x09E2,0x09E3}, {0x09E6,0x09EF},
- {0x09FE,0x09FE}, {0x0A01,0x0A02}, {0x0A03,0x0A03}, {0x0A3C,0x0A3C}, {0x0A3E,0x0A40}, {0x0A41,0x0A42}, {0x0A47,0x0A48}, {0x0A4B,0x0A4D},
- {0x0A51,0x0A51}, {0x0A66,0x0A6F}, {0x0A70,0x0A71}, {0x0A75,0x0A75}, {0x0A81,0x0A82}, {0x0A83,0x0A83}, {0x0ABC,0x0ABC}, {0x0ABE,0x0AC0},
- {0x0AC1,0x0AC5}, {0x0AC7,0x0AC8}, {0x0AC9,0x0AC9}, {0x0ACB,0x0ACC}, {0x0ACD,0x0ACD}, {0x0AE2,0x0AE3}, {0x0AE6,0x0AEF}, {0x0AFA,0x0AFF},
- {0x0B01,0x0B01}, {0x0B02,0x0B03}, {0x0B3C,0x0B3C}, {0x0B3E,0x0B3E}, {0x0B3F,0x0B3F}, {0x0B40,0x0B40}, {0x0B41,0x0B44}, {0x0B47,0x0B48},
- {0x0B4B,0x0B4C}, {0x0B4D,0x0B4D}, {0x0B55,0x0B56}, {0x0B57,0x0B57}, {0x0B62,0x0B63}, {0x0B66,0x0B6F}, {0x0B82,0x0B82}, {0x0BBE,0x0BBF},
- {0x0BC0,0x0BC0}, {0x0BC1,0x0BC2}, {0x0BC6,0x0BC8}, {0x0BCA,0x0BCC}, {0x0BCD,0x0BCD}, {0x0BD7,0x0BD7}, {0x0BE6,0x0BEF}, {0x0C00,0x0C00},
- {0x0C01,0x0C03}, {0x0C04,0x0C04}, {0x0C3E,0x0C40}, {0x0C41,0x0C44}, {0x0C46,0x0C48}, {0x0C4A,0x0C4D}, {0x0C55,0x0C56}, {0x0C62,0x0C63},
- {0x0C66,0x0C6F}, {0x0C81,0x0C81}, {0x0C82,0x0C83}, {0x0CBC,0x0CBC}, {0x0CBE,0x0CBE}, {0x0CBF,0x0CBF}, {0x0CC0,0x0CC4}, {0x0CC6,0x0CC6},
- {0x0CC7,0x0CC8}, {0x0CCA,0x0CCB}, {0x0CCC,0x0CCD}, {0x0CD5,0x0CD6}, {0x0CE2,0x0CE3}, {0x0CE6,0x0CEF}, {0x0D00,0x0D01}, {0x0D02,0x0D03},
- {0x0D3B,0x0D3C}, {0x0D3E,0x0D40}, {0x0D41,0x0D44}, {0x0D46,0x0D48}, {0x0D4A,0x0D4C}, {0x0D4D,0x0D4D}, {0x0D57,0x0D57}, {0x0D62,0x0D63},
- {0x0D66,0x0D6F}, {0x0D81,0x0D81}, {0x0D82,0x0D83}, {0x0DCA,0x0DCA}, {0x0DCF,0x0DD1}, {0x0DD2,0x0DD4}, {0x0DD6,0x0DD6}, {0x0DD8,0x0DDF},
- {0x0DE6,0x0DEF}, {0x0DF2,0x0DF3}, {0x0E32,0x0E33}, {0x0E34,0x0E3A}, {0x0E47,0x0E4E}, {0x0E50,0x0E59}, {0x0EB2,0x0EB3}, {0x0EB4,0x0EBC},
- {0x0EC8,0x0ECD}, {0x0ED0,0x0ED9}, {0x0F18,0x0F19}, {0x0F20,0x0F29}, {0x0F35,0x0F35}, {0x0F37,0x0F37}, {0x0F39,0x0F39}, {0x0F3E,0x0F3F},
- {0x0F71,0x0F7E}, {0x0F7F,0x0F7F}, {0x0F80,0x0F84}, {0x0F86,0x0F87}, {0x0F8D,0x0F97}, {0x0F99,0x0FBC}, {0x0FC6,0x0FC6}, {0x102B,0x102C},
- {0x102D,0x1030}, {0x1031,0x1031}, {0x1032,0x1037}, {0x1038,0x1038}, {0x1039,0x103A}, {0x103B,0x103C}, {0x103D,0x103E}, {0x1040,0x1049},
- {0x1056,0x1057}, {0x1058,0x1059}, {0x105E,0x1060}, {0x1062,0x1064}, {0x1067,0x106D}, {0x1071,0x1074}, {0x1082,0x1082}, {0x1083,0x1084},
- {0x1085,0x1086}, {0x1087,0x108C}, {0x108D,0x108D}, {0x108F,0x108F}, {0x1090,0x1099}, {0x109A,0x109C}, {0x109D,0x109D}, {0x135D,0x135F},
- {0x1369,0x1371}, {0x1712,0x1714}, {0x1732,0x1734}, {0x1752,0x1753}, {0x1772,0x1773}, {0x17B4,0x17B5}, {0x17B6,0x17B6}, {0x17B7,0x17BD},
- {0x17BE,0x17C5}, {0x17C6,0x17C6}, {0x17C7,0x17C8}, {0x17C9,0x17D3}, {0x17DD,0x17DD}, {0x17E0,0x17E9}, {0x180B,0x180D}, {0x1810,0x1819},
- {0x18A9,0x18A9}, {0x1920,0x1922}, {0x1923,0x1926}, {0x1927,0x1928}, {0x1929,0x192B}, {0x1930,0x1931}, {0x1932,0x1932}, {0x1933,0x1938},
- {0x1939,0x193B}, {0x1946,0x194F}, {0x19D0,0x19D9}, {0x19DA,0x19DA}, {0x1A17,0x1A18}, {0x1A19,0x1A1A}, {0x1A1B,0x1A1B}, {0x1A55,0x1A55},
- {0x1A56,0x1A56}, {0x1A57,0x1A57}, {0x1A58,0x1A5E}, {0x1A60,0x1A60}, {0x1A61,0x1A61}, {0x1A62,0x1A62}, {0x1A63,0x1A64}, {0x1A65,0x1A6C},
- {0x1A6D,0x1A72}, {0x1A73,0x1A7C}, {0x1A7F,0x1A7F}, {0x1A80,0x1A89}, {0x1A90,0x1A99}, {0x1AB0,0x1ABD}, {0x1ABF,0x1AC0}, {0x1B00,0x1B03},
- {0x1B04,0x1B04}, {0x1B34,0x1B34}, {0x1B35,0x1B35}, {0x1B36,0x1B3A}, {0x1B3B,0x1B3B}, {0x1B3C,0x1B3C}, {0x1B3D,0x1B41}, {0x1B42,0x1B42},
- {0x1B43,0x1B44}, {0x1B50,0x1B59}, {0x1B6B,0x1B73}, {0x1B80,0x1B81}, {0x1B82,0x1B82}, {0x1BA1,0x1BA1}, {0x1BA2,0x1BA5}, {0x1BA6,0x1BA7},
- {0x1BA8,0x1BA9}, {0x1BAA,0x1BAA}, {0x1BAB,0x1BAD}, {0x1BB0,0x1BB9}, {0x1BE6,0x1BE6}, {0x1BE7,0x1BE7}, {0x1BE8,0x1BE9}, {0x1BEA,0x1BEC},
- {0x1BED,0x1BED}, {0x1BEE,0x1BEE}, {0x1BEF,0x1BF1}, {0x1BF2,0x1BF3}, {0x1C24,0x1C2B}, {0x1C2C,0x1C33}, {0x1C34,0x1C35}, {0x1C36,0x1C37},
- {0x1C40,0x1C49}, {0x1C50,0x1C59}, {0x1CD0,0x1CD2}, {0x1CD4,0x1CE0}, {0x1CE1,0x1CE1}, {0x1CE2,0x1CE8}, {0x1CED,0x1CED}, {0x1CF4,0x1CF4},
- {0x1CF7,0x1CF7}, {0x1CF8,0x1CF9}, {0x1DC0,0x1DF9}, {0x1DFB,0x1DFF}, {0x203F,0x2040}, {0x2054,0x2054}, {0x20D0,0x20DC}, {0x20E1,0x20E1},
- {0x20E5,0x20F0}, {0x2CEF,0x2CF1}, {0x2D7F,0x2D7F}, {0x2DE0,0x2DFF}, {0x302A,0x302D}, {0x302E,0x302F}, {0x3099,0x309A}, {0xA620,0xA629},
- {0xA66F,0xA66F}, {0xA674,0xA67D}, {0xA69E,0xA69F}, {0xA6F0,0xA6F1}, {0xA802,0xA802}, {0xA806,0xA806}, {0xA80B,0xA80B}, {0xA823,0xA824},
- {0xA825,0xA826}, {0xA827,0xA827}, {0xA82C,0xA82C}, {0xA880,0xA881}, {0xA8B4,0xA8C3}, {0xA8C4,0xA8C5}, {0xA8D0,0xA8D9}, {0xA8E0,0xA8F1},
- {0xA8FF,0xA8FF}, {0xA900,0xA909}, {0xA926,0xA92D}, {0xA947,0xA951}, {0xA952,0xA953}, {0xA980,0xA982}, {0xA983,0xA983}, {0xA9B3,0xA9B3},
- {0xA9B4,0xA9B5}, {0xA9B6,0xA9B9}, {0xA9BA,0xA9BB}, {0xA9BC,0xA9BD}, {0xA9BE,0xA9C0}, {0xA9D0,0xA9D9}, {0xA9E5,0xA9E5}, {0xA9F0,0xA9F9},
- {0xAA29,0xAA2E}, {0xAA2F,0xAA30}, {0xAA31,0xAA32}, {0xAA33,0xAA34}, {0xAA35,0xAA36}, {0xAA43,0xAA43}, {0xAA4C,0xAA4C}, {0xAA4D,0xAA4D},
- {0xAA50,0xAA59}, {0xAA7B,0xAA7B}, {0xAA7C,0xAA7C}, {0xAA7D,0xAA7D}, {0xAAB0,0xAAB0}, {0xAAB2,0xAAB4}, {0xAAB7,0xAAB8}, {0xAABE,0xAABF},
- {0xAAC1,0xAAC1}, {0xAAEB,0xAAEB}, {0xAAEC,0xAAED}, {0xAAEE,0xAAEF}, {0xAAF5,0xAAF5}, {0xAAF6,0xAAF6}, {0xABE3,0xABE4}, {0xABE5,0xABE5},
- {0xABE6,0xABE7}, {0xABE8,0xABE8}, {0xABE9,0xABEA}, {0xABEC,0xABEC}, {0xABED,0xABED}, {0xABF0,0xABF9}, {0xFB1E,0xFB1E}, {0xFE00,0xFE0F},
- {0xFE20,0xFE2F}, {0xFE33,0xFE34}, {0xFE4D,0xFE4F}, {0xFF10,0xFF19}, {0xFF3F,0xFF3F}, {0xFF9E,0xFF9F}, {0x101FD,0x101FD}, {0x102E0,0x102E0},
- {0x10376,0x1037A}, {0x104A0,0x104A9}, {0x10A01,0x10A03}, {0x10A05,0x10A06}, {0x10A0C,0x10A0F}, {0x10A38,0x10A3A}, {0x10A3F,0x10A3F}, {0x10AE5,0x10AE6},
- {0x10D24,0x10D27}, {0x10D30,0x10D39}, {0x10EAB,0x10EAC}, {0x10F46,0x10F50}, {0x11000,0x11000}, {0x11001,0x11001}, {0x11002,0x11002}, {0x11038,0x11046},
- {0x11066,0x1106F}, {0x1107F,0x11081}, {0x11082,0x11082}, {0x110B0,0x110B2}, {0x110B3,0x110B6}, {0x110B7,0x110B8}, {0x110B9,0x110BA}, {0x110F0,0x110F9},
- {0x11100,0x11102}, {0x11127,0x1112B}, {0x1112C,0x1112C}, {0x1112D,0x11134}, {0x11136,0x1113F}, {0x11145,0x11146}, {0x11173,0x11173}, {0x11180,0x11181},
- {0x11182,0x11182}, {0x111B3,0x111B5}, {0x111B6,0x111BE}, {0x111BF,0x111C0}, {0x111C9,0x111CC}, {0x111CE,0x111CE}, {0x111CF,0x111CF}, {0x111D0,0x111D9},
- {0x1122C,0x1122E}, {0x1122F,0x11231}, {0x11232,0x11233}, {0x11234,0x11234}, {0x11235,0x11235}, {0x11236,0x11237}, {0x1123E,0x1123E}, {0x112DF,0x112DF},
- {0x112E0,0x112E2}, {0x112E3,0x112EA}, {0x112F0,0x112F9}, {0x11300,0x11301}, {0x11302,0x11303}, {0x1133B,0x1133C}, {0x1133E,0x1133F}, {0x11340,0x11340},
- {0x11341,0x11344}, {0x11347,0x11348}, {0x1134B,0x1134D}, {0x11357,0x11357}, {0x11362,0x11363}, {0x11366,0x1136C}, {0x11370,0x11374}, {0x11435,0x11437},
- {0x11438,0x1143F}, {0x11440,0x11441}, {0x11442,0x11444}, {0x11445,0x11445}, {0x11446,0x11446}, {0x11450,0x11459}, {0x1145E,0x1145E}, {0x114B0,0x114B2},
- {0x114B3,0x114B8}, {0x114B9,0x114B9}, {0x114BA,0x114BA}, {0x114BB,0x114BE}, {0x114BF,0x114C0}, {0x114C1,0x114C1}, {0x114C2,0x114C3}, {0x114D0,0x114D9},
- {0x115AF,0x115B1}, {0x115B2,0x115B5}, {0x115B8,0x115BB}, {0x115BC,0x115BD}, {0x115BE,0x115BE}, {0x115BF,0x115C0}, {0x115DC,0x115DD}, {0x11630,0x11632},
- {0x11633,0x1163A}, {0x1163B,0x1163C}, {0x1163D,0x1163D}, {0x1163E,0x1163E}, {0x1163F,0x11640}, {0x11650,0x11659}, {0x116AB,0x116AB}, {0x116AC,0x116AC},
- {0x116AD,0x116AD}, {0x116AE,0x116AF}, {0x116B0,0x116B5}, {0x116B6,0x116B6}, {0x116B7,0x116B7}, {0x116C0,0x116C9}, {0x1171D,0x1171F}, {0x11720,0x11721},
- {0x11722,0x11725}, {0x11726,0x11726}, {0x11727,0x1172B}, {0x11730,0x11739}, {0x1182C,0x1182E}, {0x1182F,0x11837}, {0x11838,0x11838}, {0x11839,0x1183A},
- {0x118E0,0x118E9}, {0x11930,0x11935}, {0x11937,0x11938}, {0x1193B,0x1193C}, {0x1193D,0x1193D}, {0x1193E,0x1193E}, {0x11940,0x11940}, {0x11942,0x11942},
- {0x11943,0x11943}, {0x11950,0x11959}, {0x119D1,0x119D3}, {0x119D4,0x119D7}, {0x119DA,0x119DB}, {0x119DC,0x119DF}, {0x119E0,0x119E0}, {0x119E4,0x119E4},
- {0x11A01,0x11A0A}, {0x11A33,0x11A38}, {0x11A39,0x11A39}, {0x11A3B,0x11A3E}, {0x11A47,0x11A47}, {0x11A51,0x11A56}, {0x11A57,0x11A58}, {0x11A59,0x11A5B},
- {0x11A8A,0x11A96}, {0x11A97,0x11A97}, {0x11A98,0x11A99}, {0x11C2F,0x11C2F}, {0x11C30,0x11C36}, {0x11C38,0x11C3D}, {0x11C3E,0x11C3E}, {0x11C3F,0x11C3F},
- {0x11C50,0x11C59}, {0x11C92,0x11CA7}, {0x11CA9,0x11CA9}, {0x11CAA,0x11CB0}, {0x11CB1,0x11CB1}, {0x11CB2,0x11CB3}, {0x11CB4,0x11CB4}, {0x11CB5,0x11CB6},
- {0x11D31,0x11D36}, {0x11D3A,0x11D3A}, {0x11D3C,0x11D3D}, {0x11D3F,0x11D45}, {0x11D47,0x11D47}, {0x11D50,0x11D59}, {0x11D8A,0x11D8E}, {0x11D90,0x11D91},
- {0x11D93,0x11D94}, {0x11D95,0x11D95}, {0x11D96,0x11D96}, {0x11D97,0x11D97}, {0x11DA0,0x11DA9}, {0x11EF3,0x11EF4}, {0x11EF5,0x11EF6}, {0x16A60,0x16A69},
- {0x16AF0,0x16AF4}, {0x16B30,0x16B36}, {0x16B50,0x16B59}, {0x16F4F,0x16F4F}, {0x16F51,0x16F87}, {0x16F8F,0x16F92}, {0x16FE4,0x16FE4}, {0x16FF0,0x16FF1},
- {0x1BC9D,0x1BC9E}, {0x1D165,0x1D166}, {0x1D167,0x1D169}, {0x1D16D,0x1D172}, {0x1D17B,0x1D182}, {0x1D185,0x1D18B}, {0x1D1AA,0x1D1AD}, {0x1D242,0x1D244},
- {0x1D7CE,0x1D7FF}, {0x1DA00,0x1DA36}, {0x1DA3B,0x1DA6C}, {0x1DA75,0x1DA75}, {0x1DA84,0x1DA84}, {0x1DA9B,0x1DA9F}, {0x1DAA1,0x1DAAF}, {0x1E000,0x1E006},
- {0x1E008,0x1E018}, {0x1E01B,0x1E021}, {0x1E023,0x1E024}, {0x1E026,0x1E02A}, {0x1E130,0x1E136}, {0x1E140,0x1E149}, {0x1E2EC,0x1E2EF}, {0x1E2F0,0x1E2F9},
- {0x1E8D0,0x1E8D6}, {0x1E944,0x1E94A}, {0x1E950,0x1E959}, {0x1FBF0,0x1FBF9}, {0xE0100,0xE01EF},
-};
+ {0x0030, 0x0039}, {0x005F, 0x005F}, {0x00B7, 0x00B7}, {0x0300, 0x036F}, {0x0387, 0x0387},
+ {0x0483, 0x0487}, {0x0591, 0x05BD}, {0x05BF, 0x05BF}, {0x05C1, 0x05C2}, {0x05C4, 0x05C5},
+ {0x05C7, 0x05C7}, {0x0610, 0x061A}, {0x064B, 0x065F}, {0x0660, 0x0669}, {0x0670, 0x0670},
+ {0x06D6, 0x06DC}, {0x06DF, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED}, {0x06F0, 0x06F9},
+ {0x0711, 0x0711}, {0x0730, 0x074A}, {0x07A6, 0x07B0}, {0x07C0, 0x07C9}, {0x07EB, 0x07F3},
+ {0x07FD, 0x07FD}, {0x0816, 0x0819}, {0x081B, 0x0823}, {0x0825, 0x0827}, {0x0829, 0x082D},
+ {0x0859, 0x085B}, {0x08D3, 0x08E1}, {0x08E3, 0x0902}, {0x0903, 0x0903}, {0x093A, 0x093A},
+ {0x093B, 0x093B}, {0x093C, 0x093C}, {0x093E, 0x0940}, {0x0941, 0x0948}, {0x0949, 0x094C},
+ {0x094D, 0x094D}, {0x094E, 0x094F}, {0x0951, 0x0957}, {0x0962, 0x0963}, {0x0966, 0x096F},
+ {0x0981, 0x0981}, {0x0982, 0x0983}, {0x09BC, 0x09BC}, {0x09BE, 0x09C0}, {0x09C1, 0x09C4},
+ {0x09C7, 0x09C8}, {0x09CB, 0x09CC}, {0x09CD, 0x09CD}, {0x09D7, 0x09D7}, {0x09E2, 0x09E3},
+ {0x09E6, 0x09EF}, {0x09FE, 0x09FE}, {0x0A01, 0x0A02}, {0x0A03, 0x0A03}, {0x0A3C, 0x0A3C},
+ {0x0A3E, 0x0A40}, {0x0A41, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A51, 0x0A51},
+ {0x0A66, 0x0A6F}, {0x0A70, 0x0A71}, {0x0A75, 0x0A75}, {0x0A81, 0x0A82}, {0x0A83, 0x0A83},
+ {0x0ABC, 0x0ABC}, {0x0ABE, 0x0AC0}, {0x0AC1, 0x0AC5}, {0x0AC7, 0x0AC8}, {0x0AC9, 0x0AC9},
+ {0x0ACB, 0x0ACC}, {0x0ACD, 0x0ACD}, {0x0AE2, 0x0AE3}, {0x0AE6, 0x0AEF}, {0x0AFA, 0x0AFF},
+ {0x0B01, 0x0B01}, {0x0B02, 0x0B03}, {0x0B3C, 0x0B3C}, {0x0B3E, 0x0B3E}, {0x0B3F, 0x0B3F},
+ {0x0B40, 0x0B40}, {0x0B41, 0x0B44}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4C}, {0x0B4D, 0x0B4D},
+ {0x0B55, 0x0B56}, {0x0B57, 0x0B57}, {0x0B62, 0x0B63}, {0x0B66, 0x0B6F}, {0x0B82, 0x0B82},
+ {0x0BBE, 0x0BBF}, {0x0BC0, 0x0BC0}, {0x0BC1, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCC},
+ {0x0BCD, 0x0BCD}, {0x0BD7, 0x0BD7}, {0x0BE6, 0x0BEF}, {0x0C00, 0x0C00}, {0x0C01, 0x0C03},
+ {0x0C04, 0x0C04}, {0x0C3E, 0x0C40}, {0x0C41, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
+ {0x0C55, 0x0C56}, {0x0C62, 0x0C63}, {0x0C66, 0x0C6F}, {0x0C81, 0x0C81}, {0x0C82, 0x0C83},
+ {0x0CBC, 0x0CBC}, {0x0CBE, 0x0CBE}, {0x0CBF, 0x0CBF}, {0x0CC0, 0x0CC4}, {0x0CC6, 0x0CC6},
+ {0x0CC7, 0x0CC8}, {0x0CCA, 0x0CCB}, {0x0CCC, 0x0CCD}, {0x0CD5, 0x0CD6}, {0x0CE2, 0x0CE3},
+ {0x0CE6, 0x0CEF}, {0x0D00, 0x0D01}, {0x0D02, 0x0D03}, {0x0D3B, 0x0D3C}, {0x0D3E, 0x0D40},
+ {0x0D41, 0x0D44}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4C}, {0x0D4D, 0x0D4D}, {0x0D57, 0x0D57},
+ {0x0D62, 0x0D63}, {0x0D66, 0x0D6F}, {0x0D81, 0x0D81}, {0x0D82, 0x0D83}, {0x0DCA, 0x0DCA},
+ {0x0DCF, 0x0DD1}, {0x0DD2, 0x0DD4}, {0x0DD6, 0x0DD6}, {0x0DD8, 0x0DDF}, {0x0DE6, 0x0DEF},
+ {0x0DF2, 0x0DF3}, {0x0E32, 0x0E33}, {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E}, {0x0E50, 0x0E59},
+ {0x0EB2, 0x0EB3}, {0x0EB4, 0x0EBC}, {0x0EC8, 0x0ECD}, {0x0ED0, 0x0ED9}, {0x0F18, 0x0F19},
+ {0x0F20, 0x0F29}, {0x0F35, 0x0F35}, {0x0F37, 0x0F37}, {0x0F39, 0x0F39}, {0x0F3E, 0x0F3F},
+ {0x0F71, 0x0F7E}, {0x0F7F, 0x0F7F}, {0x0F80, 0x0F84}, {0x0F86, 0x0F87}, {0x0F8D, 0x0F97},
+ {0x0F99, 0x0FBC}, {0x0FC6, 0x0FC6}, {0x102B, 0x102C}, {0x102D, 0x1030}, {0x1031, 0x1031},
+ {0x1032, 0x1037}, {0x1038, 0x1038}, {0x1039, 0x103A}, {0x103B, 0x103C}, {0x103D, 0x103E},
+ {0x1040, 0x1049}, {0x1056, 0x1057}, {0x1058, 0x1059}, {0x105E, 0x1060}, {0x1062, 0x1064},
+ {0x1067, 0x106D}, {0x1071, 0x1074}, {0x1082, 0x1082}, {0x1083, 0x1084}, {0x1085, 0x1086},
+ {0x1087, 0x108C}, {0x108D, 0x108D}, {0x108F, 0x108F}, {0x1090, 0x1099}, {0x109A, 0x109C},
+ {0x109D, 0x109D}, {0x135D, 0x135F}, {0x1369, 0x1371}, {0x1712, 0x1714}, {0x1732, 0x1734},
+ {0x1752, 0x1753}, {0x1772, 0x1773}, {0x17B4, 0x17B5}, {0x17B6, 0x17B6}, {0x17B7, 0x17BD},
+ {0x17BE, 0x17C5}, {0x17C6, 0x17C6}, {0x17C7, 0x17C8}, {0x17C9, 0x17D3}, {0x17DD, 0x17DD},
+ {0x17E0, 0x17E9}, {0x180B, 0x180D}, {0x1810, 0x1819}, {0x18A9, 0x18A9}, {0x1920, 0x1922},
+ {0x1923, 0x1926}, {0x1927, 0x1928}, {0x1929, 0x192B}, {0x1930, 0x1931}, {0x1932, 0x1932},
+ {0x1933, 0x1938}, {0x1939, 0x193B}, {0x1946, 0x194F}, {0x19D0, 0x19D9}, {0x19DA, 0x19DA},
+ {0x1A17, 0x1A18}, {0x1A19, 0x1A1A}, {0x1A1B, 0x1A1B}, {0x1A55, 0x1A55}, {0x1A56, 0x1A56},
+ {0x1A57, 0x1A57}, {0x1A58, 0x1A5E}, {0x1A60, 0x1A60}, {0x1A61, 0x1A61}, {0x1A62, 0x1A62},
+ {0x1A63, 0x1A64}, {0x1A65, 0x1A6C}, {0x1A6D, 0x1A72}, {0x1A73, 0x1A7C}, {0x1A7F, 0x1A7F},
+ {0x1A80, 0x1A89}, {0x1A90, 0x1A99}, {0x1AB0, 0x1ABD}, {0x1ABF, 0x1AC0}, {0x1B00, 0x1B03},
+ {0x1B04, 0x1B04}, {0x1B34, 0x1B34}, {0x1B35, 0x1B35}, {0x1B36, 0x1B3A}, {0x1B3B, 0x1B3B},
+ {0x1B3C, 0x1B3C}, {0x1B3D, 0x1B41}, {0x1B42, 0x1B42}, {0x1B43, 0x1B44}, {0x1B50, 0x1B59},
+ {0x1B6B, 0x1B73}, {0x1B80, 0x1B81}, {0x1B82, 0x1B82}, {0x1BA1, 0x1BA1}, {0x1BA2, 0x1BA5},
+ {0x1BA6, 0x1BA7}, {0x1BA8, 0x1BA9}, {0x1BAA, 0x1BAA}, {0x1BAB, 0x1BAD}, {0x1BB0, 0x1BB9},
+ {0x1BE6, 0x1BE6}, {0x1BE7, 0x1BE7}, {0x1BE8, 0x1BE9}, {0x1BEA, 0x1BEC}, {0x1BED, 0x1BED},
+ {0x1BEE, 0x1BEE}, {0x1BEF, 0x1BF1}, {0x1BF2, 0x1BF3}, {0x1C24, 0x1C2B}, {0x1C2C, 0x1C33},
+ {0x1C34, 0x1C35}, {0x1C36, 0x1C37}, {0x1C40, 0x1C49}, {0x1C50, 0x1C59}, {0x1CD0, 0x1CD2},
+ {0x1CD4, 0x1CE0}, {0x1CE1, 0x1CE1}, {0x1CE2, 0x1CE8}, {0x1CED, 0x1CED}, {0x1CF4, 0x1CF4},
+ {0x1CF7, 0x1CF7}, {0x1CF8, 0x1CF9}, {0x1DC0, 0x1DF9}, {0x1DFB, 0x1DFF}, {0x203F, 0x2040},
+ {0x2054, 0x2054}, {0x20D0, 0x20DC}, {0x20E1, 0x20E1}, {0x20E5, 0x20F0}, {0x2CEF, 0x2CF1},
+ {0x2D7F, 0x2D7F}, {0x2DE0, 0x2DFF}, {0x302A, 0x302D}, {0x302E, 0x302F}, {0x3099, 0x309A},
+ {0xA620, 0xA629}, {0xA66F, 0xA66F}, {0xA674, 0xA67D}, {0xA69E, 0xA69F}, {0xA6F0, 0xA6F1},
+ {0xA802, 0xA802}, {0xA806, 0xA806}, {0xA80B, 0xA80B}, {0xA823, 0xA824}, {0xA825, 0xA826},
+ {0xA827, 0xA827}, {0xA82C, 0xA82C}, {0xA880, 0xA881}, {0xA8B4, 0xA8C3}, {0xA8C4, 0xA8C5},
+ {0xA8D0, 0xA8D9}, {0xA8E0, 0xA8F1}, {0xA8FF, 0xA8FF}, {0xA900, 0xA909}, {0xA926, 0xA92D},
+ {0xA947, 0xA951}, {0xA952, 0xA953}, {0xA980, 0xA982}, {0xA983, 0xA983}, {0xA9B3, 0xA9B3},
+ {0xA9B4, 0xA9B5}, {0xA9B6, 0xA9B9}, {0xA9BA, 0xA9BB}, {0xA9BC, 0xA9BD}, {0xA9BE, 0xA9C0},
+ {0xA9D0, 0xA9D9}, {0xA9E5, 0xA9E5}, {0xA9F0, 0xA9F9}, {0xAA29, 0xAA2E}, {0xAA2F, 0xAA30},
+ {0xAA31, 0xAA32}, {0xAA33, 0xAA34}, {0xAA35, 0xAA36}, {0xAA43, 0xAA43}, {0xAA4C, 0xAA4C},
+ {0xAA4D, 0xAA4D}, {0xAA50, 0xAA59}, {0xAA7B, 0xAA7B}, {0xAA7C, 0xAA7C}, {0xAA7D, 0xAA7D},
+ {0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4}, {0xAAB7, 0xAAB8}, {0xAABE, 0xAABF}, {0xAAC1, 0xAAC1},
+ {0xAAEB, 0xAAEB}, {0xAAEC, 0xAAED}, {0xAAEE, 0xAAEF}, {0xAAF5, 0xAAF5}, {0xAAF6, 0xAAF6},
+ {0xABE3, 0xABE4}, {0xABE5, 0xABE5}, {0xABE6, 0xABE7}, {0xABE8, 0xABE8}, {0xABE9, 0xABEA},
+ {0xABEC, 0xABEC}, {0xABED, 0xABED}, {0xABF0, 0xABF9}, {0xFB1E, 0xFB1E}, {0xFE00, 0xFE0F},
+ {0xFE20, 0xFE2F}, {0xFE33, 0xFE34}, {0xFE4D, 0xFE4F}, {0xFF10, 0xFF19}, {0xFF3F, 0xFF3F},
+ {0xFF9E, 0xFF9F}, {0x101FD, 0x101FD}, {0x102E0, 0x102E0}, {0x10376, 0x1037A}, {0x104A0, 0x104A9},
+ {0x10A01, 0x10A03}, {0x10A05, 0x10A06}, {0x10A0C, 0x10A0F}, {0x10A38, 0x10A3A}, {0x10A3F, 0x10A3F},
+ {0x10AE5, 0x10AE6}, {0x10D24, 0x10D27}, {0x10D30, 0x10D39}, {0x10EAB, 0x10EAC}, {0x10F46, 0x10F50},
+ {0x11000, 0x11000}, {0x11001, 0x11001}, {0x11002, 0x11002}, {0x11038, 0x11046}, {0x11066, 0x1106F},
+ {0x1107F, 0x11081}, {0x11082, 0x11082}, {0x110B0, 0x110B2}, {0x110B3, 0x110B6}, {0x110B7, 0x110B8},
+ {0x110B9, 0x110BA}, {0x110F0, 0x110F9}, {0x11100, 0x11102}, {0x11127, 0x1112B}, {0x1112C, 0x1112C},
+ {0x1112D, 0x11134}, {0x11136, 0x1113F}, {0x11145, 0x11146}, {0x11173, 0x11173}, {0x11180, 0x11181},
+ {0x11182, 0x11182}, {0x111B3, 0x111B5}, {0x111B6, 0x111BE}, {0x111BF, 0x111C0}, {0x111C9, 0x111CC},
+ {0x111CE, 0x111CE}, {0x111CF, 0x111CF}, {0x111D0, 0x111D9}, {0x1122C, 0x1122E}, {0x1122F, 0x11231},
+ {0x11232, 0x11233}, {0x11234, 0x11234}, {0x11235, 0x11235}, {0x11236, 0x11237}, {0x1123E, 0x1123E},
+ {0x112DF, 0x112DF}, {0x112E0, 0x112E2}, {0x112E3, 0x112EA}, {0x112F0, 0x112F9}, {0x11300, 0x11301},
+ {0x11302, 0x11303}, {0x1133B, 0x1133C}, {0x1133E, 0x1133F}, {0x11340, 0x11340}, {0x11341, 0x11344},
+ {0x11347, 0x11348}, {0x1134B, 0x1134D}, {0x11357, 0x11357}, {0x11362, 0x11363}, {0x11366, 0x1136C},
+ {0x11370, 0x11374}, {0x11435, 0x11437}, {0x11438, 0x1143F}, {0x11440, 0x11441}, {0x11442, 0x11444},
+ {0x11445, 0x11445}, {0x11446, 0x11446}, {0x11450, 0x11459}, {0x1145E, 0x1145E}, {0x114B0, 0x114B2},
+ {0x114B3, 0x114B8}, {0x114B9, 0x114B9}, {0x114BA, 0x114BA}, {0x114BB, 0x114BE}, {0x114BF, 0x114C0},
+ {0x114C1, 0x114C1}, {0x114C2, 0x114C3}, {0x114D0, 0x114D9}, {0x115AF, 0x115B1}, {0x115B2, 0x115B5},
+ {0x115B8, 0x115BB}, {0x115BC, 0x115BD}, {0x115BE, 0x115BE}, {0x115BF, 0x115C0}, {0x115DC, 0x115DD},
+ {0x11630, 0x11632}, {0x11633, 0x1163A}, {0x1163B, 0x1163C}, {0x1163D, 0x1163D}, {0x1163E, 0x1163E},
+ {0x1163F, 0x11640}, {0x11650, 0x11659}, {0x116AB, 0x116AB}, {0x116AC, 0x116AC}, {0x116AD, 0x116AD},
+ {0x116AE, 0x116AF}, {0x116B0, 0x116B5}, {0x116B6, 0x116B6}, {0x116B7, 0x116B7}, {0x116C0, 0x116C9},
+ {0x1171D, 0x1171F}, {0x11720, 0x11721}, {0x11722, 0x11725}, {0x11726, 0x11726}, {0x11727, 0x1172B},
+ {0x11730, 0x11739}, {0x1182C, 0x1182E}, {0x1182F, 0x11837}, {0x11838, 0x11838}, {0x11839, 0x1183A},
+ {0x118E0, 0x118E9}, {0x11930, 0x11935}, {0x11937, 0x11938}, {0x1193B, 0x1193C}, {0x1193D, 0x1193D},
+ {0x1193E, 0x1193E}, {0x11940, 0x11940}, {0x11942, 0x11942}, {0x11943, 0x11943}, {0x11950, 0x11959},
+ {0x119D1, 0x119D3}, {0x119D4, 0x119D7}, {0x119DA, 0x119DB}, {0x119DC, 0x119DF}, {0x119E0, 0x119E0},
+ {0x119E4, 0x119E4}, {0x11A01, 0x11A0A}, {0x11A33, 0x11A38}, {0x11A39, 0x11A39}, {0x11A3B, 0x11A3E},
+ {0x11A47, 0x11A47}, {0x11A51, 0x11A56}, {0x11A57, 0x11A58}, {0x11A59, 0x11A5B}, {0x11A8A, 0x11A96},
+ {0x11A97, 0x11A97}, {0x11A98, 0x11A99}, {0x11C2F, 0x11C2F}, {0x11C30, 0x11C36}, {0x11C38, 0x11C3D},
+ {0x11C3E, 0x11C3E}, {0x11C3F, 0x11C3F}, {0x11C50, 0x11C59}, {0x11C92, 0x11CA7}, {0x11CA9, 0x11CA9},
+ {0x11CAA, 0x11CB0}, {0x11CB1, 0x11CB1}, {0x11CB2, 0x11CB3}, {0x11CB4, 0x11CB4}, {0x11CB5, 0x11CB6},
+ {0x11D31, 0x11D36}, {0x11D3A, 0x11D3A}, {0x11D3C, 0x11D3D}, {0x11D3F, 0x11D45}, {0x11D47, 0x11D47},
+ {0x11D50, 0x11D59}, {0x11D8A, 0x11D8E}, {0x11D90, 0x11D91}, {0x11D93, 0x11D94}, {0x11D95, 0x11D95},
+ {0x11D96, 0x11D96}, {0x11D97, 0x11D97}, {0x11DA0, 0x11DA9}, {0x11EF3, 0x11EF4}, {0x11EF5, 0x11EF6},
+ {0x16A60, 0x16A69}, {0x16AF0, 0x16AF4}, {0x16B30, 0x16B36}, {0x16B50, 0x16B59}, {0x16F4F, 0x16F4F},
+ {0x16F51, 0x16F87}, {0x16F8F, 0x16F92}, {0x16FE4, 0x16FE4}, {0x16FF0, 0x16FF1}, {0x1BC9D, 0x1BC9E},
+ {0x1D165, 0x1D166}, {0x1D167, 0x1D169}, {0x1D16D, 0x1D172}, {0x1D17B, 0x1D182}, {0x1D185, 0x1D18B},
+ {0x1D1AA, 0x1D1AD}, {0x1D242, 0x1D244}, {0x1D7CE, 0x1D7FF}, {0x1DA00, 0x1DA36}, {0x1DA3B, 0x1DA6C},
+ {0x1DA75, 0x1DA75}, {0x1DA84, 0x1DA84}, {0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF}, {0x1E000, 0x1E006},
+ {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024}, {0x1E026, 0x1E02A}, {0x1E130, 0x1E136},
+ {0x1E140, 0x1E149}, {0x1E2EC, 0x1E2EF}, {0x1E2F0, 0x1E2F9}, {0x1E8D0, 0x1E8D6}, {0x1E944, 0x1E94A},
+ {0x1E950, 0x1E959}, {0x1FBF0, 0x1FBF9}, {0xE0100, 0xE01EF},
+};
//
// Return the location of the next character or UTF8 codepoint.
// (i.e. skip forward one codepoint at a time, not one byte at a time)
//
-public const char *next_char(const char *str, const char *end)
-{
- if (likely(str+1 <= end) && likely((str[0] & 0x80) == 0x0))
- return str+1;
- if (likely(str+2 <= end) && (str[0] & 0xe0) == 0xc0)
- return str+2;
- if (likely(str+3 <= end) && (str[0] & 0xf0) == 0xe0)
- return str+3;
- if (likely(str+4 <= end) && (str[0] & 0xf8) == 0xf0)
- return str+4;
- return likely(str+1 <= end) ? str+1 : end;
+public
+const char *next_char(const char *str, const char *end) {
+ if (likely(str + 1 <= end) && likely((str[0] & 0x80) == 0x0)) return str + 1;
+ if (likely(str + 2 <= end) && (str[0] & 0xe0) == 0xc0) return str + 2;
+ if (likely(str + 3 <= end) && (str[0] & 0xf0) == 0xe0) return str + 3;
+ if (likely(str + 4 <= end) && (str[0] & 0xf8) == 0xf0) return str + 4;
+ return likely(str + 1 <= end) ? str + 1 : end;
}
//
// Return the location of the previous character or UTF8 codepoint.
// (i.e. skip backwards one codepoint at a time, not one byte at a time)
//
-public const char *prev_char(const char *start, const char *str)
-{
- if (likely(str-1 >= start) && likely((str[-1] & 0x80) == 0x0))
- return str-1;
- if (likely(str-2 >= start) && (str[-2] & 0xe0) == 0xc0)
- return str-2;
- if (likely(str-3 >= start) && (str[-3] & 0xf0) == 0xe0)
- return str-3;
- if (likely(str-4 >= start) && (str[-4] & 0xf8) == 0xf0)
- return str-4;
- return likely(str-1 >= start) ? str-1 : start;
+public
+const char *prev_char(const char *start, const char *str) {
+ if (likely(str - 1 >= start) && likely((str[-1] & 0x80) == 0x0)) return str - 1;
+ if (likely(str - 2 >= start) && (str[-2] & 0xe0) == 0xc0) return str - 2;
+ if (likely(str - 3 >= start) && (str[-3] & 0xf0) == 0xe0) return str - 3;
+ if (likely(str - 4 >= start) && (str[-4] & 0xf8) == 0xf0) return str - 4;
+ return likely(str - 1 >= start) ? str - 1 : start;
}
-static uint32_t get_codepoint(const char *str, const char *end)
-{
- if (unlikely(str >= end))
- return (uint32_t)-1;
+static uint32_t get_codepoint(const char *str, const char *end) {
+ if (unlikely(str >= end)) return (uint32_t)-1;
unsigned char c1 = (unsigned char)str[0];
int seqlen;
uint32_t codepoint;
if (likely((c1 & 0x80) == 0)) {
- return (uint32_t) c1;
+ return (uint32_t)c1;
} else if ((c1 & 0xE0) == 0xC0) {
- codepoint = (uint32_t) (c1 & 0x1F);
+ codepoint = (uint32_t)(c1 & 0x1F);
seqlen = 2;
} else if ((c1 & 0xF0) == 0xE0) {
- codepoint = (uint32_t) (c1 & 0x0F);
+ codepoint = (uint32_t)(c1 & 0x0F);
seqlen = 3;
} else if ((c1 & 0xF8) == 0xF0) {
- codepoint = (uint32_t) (c1 & 0x07);
+ codepoint = (uint32_t)(c1 & 0x07);
seqlen = 4;
} else {
return (uint32_t)-1;
}
for (int i = 1; i < seqlen; ++i) {
- if (unlikely((&str[i] >= end) || (str[i] & 0xC0) != 0x80))
- return (uint32_t)-1;
+ if (unlikely((&str[i] >= end) || (str[i] & 0xC0) != 0x80)) return (uint32_t)-1;
codepoint = ((codepoint << 6) | (uint32_t)(str[i] & 0x3F));
}
- return codepoint;
+ return codepoint;
}
-static bool find_in_ranges(uint32_t codepoint, const uint32_t ranges[][2], size_t nranges)
-{
+static bool find_in_ranges(uint32_t codepoint, const uint32_t ranges[][2], size_t nranges) {
// Binary search:
int lo = 0, hi = nranges - 1;
while (lo <= hi) {
int mid = (lo + hi) / 2;
- if (ranges[mid][0] <= codepoint && codepoint <= ranges[mid][1])
- return true;
- else if (codepoint > ranges[mid][1])
- lo = mid + 1;
- else if (codepoint < ranges[mid][0])
- hi = mid - 1;
+ if (ranges[mid][0] <= codepoint && codepoint <= ranges[mid][1]) return true;
+ else if (codepoint > ranges[mid][1]) lo = mid + 1;
+ else if (codepoint < ranges[mid][0]) hi = mid - 1;
}
return false;
}
-public bool isidstart(const char *str, const char *end)
-{
+public
+bool isidstart(const char *str, const char *end) {
if (unlikely(str >= end)) return false;
else if (isalpha(*str) || *str == '_') return true;
else if (likely((*str & 0x80) == 0)) return false;
uint32_t codepoint = get_codepoint(str, end);
- return codepoint != (uint32_t)-1
- && find_in_ranges(codepoint, XID_Start, ARRAY_LEN(XID_Start));
+ return codepoint != (uint32_t)-1 && find_in_ranges(codepoint, XID_Start, ARRAY_LEN(XID_Start));
}
-public bool isidcontinue(const char *str, const char *end)
-{
+public
+bool isidcontinue(const char *str, const char *end) {
if (unlikely(str >= end)) return false;
else if (isalnum(*str) || *str == '_') return true;
else if (likely((*str & 0x80) == 0)) return false;
uint32_t codepoint = get_codepoint(str, end);
return codepoint != (uint32_t)-1
- && (find_in_ranges(codepoint, XID_Start, ARRAY_LEN(XID_Start))
- || find_in_ranges(codepoint, XID_Continue_only, ARRAY_LEN(XID_Continue_only)));
+ && (find_in_ranges(codepoint, XID_Start, ARRAY_LEN(XID_Start))
+ || find_in_ranges(codepoint, XID_Continue_only, ARRAY_LEN(XID_Continue_only)));
}
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/utf8.h b/utf8.h
index f5c251f..076658a 100644
--- a/utf8.h
+++ b/utf8.h
@@ -7,13 +7,9 @@
#define UTF8_MAXCHARLEN 4
-__attribute__((nonnull, pure))
-const char *next_char(const char *str, const char *end);
-__attribute__((nonnull, pure))
-const char *prev_char(const char *start, const char *str);
-__attribute__((nonnull, pure))
-bool isidstart(const char *str, const char *end);
-__attribute__((nonnull, pure))
-bool isidcontinue(const char *str, const char *end);
+__attribute__((nonnull, pure)) const char *next_char(const char *str, const char *end);
+__attribute__((nonnull, pure)) const char *prev_char(const char *start, const char *str);
+__attribute__((nonnull, pure)) bool isidstart(const char *str, const char *end);
+__attribute__((nonnull, pure)) bool isidcontinue(const char *str, const char *end);
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/utils.c b/utils.c
index 78b0a25..ab74bce 100644
--- a/utils.c
+++ b/utils.c
@@ -15,21 +15,24 @@
// Helper function to skip past all spaces (and comments)
// Returns a pointer to the first non-space character.
//
-public const char *after_spaces(const char *str, bool skip_nl, const char *end)
-{
+public
+const char *after_spaces(const char *str, bool skip_nl, const char *end) {
// Skip whitespace and comments:
- skip_whitespace:
+skip_whitespace:
if (str >= end) return str;
switch (*str) {
- case '\r': case '\n':
+ case '\r':
+ case '\n':
if (!skip_nl) break;
- __attribute__ ((fallthrough));
- case ' ': case '\t': {
+ __attribute__((fallthrough));
+ case ' ':
+ case '\t': {
++str;
goto skip_whitespace;
}
case '#': {
- while (str < end && *str != '\n') ++str;
+ while (str < end && *str != '\n')
+ ++str;
goto skip_whitespace;
}
default: break;
@@ -41,8 +44,8 @@ public const char *after_spaces(const char *str, bool skip_nl, const char *end)
// Return the first character after a valid BP name, or NULL if none is
// found.
//
-public const char *after_name(const char *str, const char *end)
-{
+public
+const char *after_name(const char *str, const char *end) {
if (str >= end) return end;
if (*str == '|') return &str[1];
if (*str == '^' || *str == '_' || *str == '$') {
@@ -50,8 +53,7 @@ public const char *after_name(const char *str, const char *end)
}
if (!isalpha(*str)) return NULL;
for (++str; str < end; ++str) {
- if (!(isalnum(*str) || *str == '-'))
- break;
+ if (!(isalnum(*str) || *str == '-')) break;
}
return str;
}
@@ -59,8 +61,8 @@ public const char *after_name(const char *str, const char *end)
//
// Check if a character is found and if so, move past it.
//
-public bool matchchar(const char **str, char c, bool skip_nl, const char *end)
-{
+public
+bool matchchar(const char **str, char c, bool skip_nl, const char *end) {
const char *next = after_spaces(*str, skip_nl, end);
if (next >= end) return false;
if (*next == c) {
@@ -73,8 +75,8 @@ public bool matchchar(const char **str, char c, bool skip_nl, const char *end)
//
// Check if a string is found and if so, move past it.
//
-public bool matchstr(const char **str, const char *target, bool skip_nl, const char *end)
-{
+public
+bool matchstr(const char **str, const char *target, bool skip_nl, const char *end) {
const char *next = after_spaces(*str, skip_nl, end);
if (next + strlen(target) > end) return false;
if (strncmp(next, target, strlen(target)) == 0) {
@@ -89,24 +91,27 @@ public bool matchstr(const char **str, const char *target, bool skip_nl, const c
// character that was escaped.
// Set *end = the first character past the end of the escape sequence.
//
-public char unescapechar(const char *escaped, const char **after, const char *end)
-{
+public
+char unescapechar(const char *escaped, const char **after, const char *end) {
size_t len = 0;
unsigned char ret = '\\';
if (escaped >= end) goto finished;
ret = (unsigned char)*escaped;
++len;
switch (*escaped) {
- case 'a': ret = '\a'; break; case 'b': ret = '\b'; break;
- case 'n': ret = '\n'; break; case 'r': ret = '\r'; break;
- case 't': ret = '\t'; break; case 'v': ret = '\v'; break;
- case 'e': ret = '\033'; break; case '\\': ret = '\\'; break;
+ case 'a': ret = '\a'; break;
+ case 'b': ret = '\b'; break;
+ case 'n': ret = '\n'; break;
+ case 'r': ret = '\r'; break;
+ case 't': ret = '\t'; break;
+ case 'v': ret = '\v'; break;
+ case 'e': ret = '\033'; break;
+ case '\\': ret = '\\'; break;
case 'x': { // Hex
static const unsigned char hextable[255] = {
- ['0']=0x10, ['1']=0x1, ['2']=0x2, ['3']=0x3, ['4']=0x4,
- ['5']=0x5, ['6']=0x6, ['7']=0x7, ['8']=0x8, ['9']=0x9,
- ['a']=0xa, ['b']=0xb, ['c']=0xc, ['d']=0xd, ['e']=0xe, ['f']=0xf,
- ['A']=0xa, ['B']=0xb, ['C']=0xc, ['D']=0xd, ['E']=0xe, ['F']=0xf,
+ ['0'] = 0x10, ['1'] = 0x1, ['2'] = 0x2, ['3'] = 0x3, ['4'] = 0x4, ['5'] = 0x5, ['6'] = 0x6, ['7'] = 0x7,
+ ['8'] = 0x8, ['9'] = 0x9, ['a'] = 0xa, ['b'] = 0xb, ['c'] = 0xc, ['d'] = 0xd, ['e'] = 0xe, ['f'] = 0xf,
+ ['A'] = 0xa, ['B'] = 0xb, ['C'] = 0xc, ['D'] = 0xd, ['E'] = 0xe, ['F'] = 0xf,
};
if (escaped + 2 >= end) {
len = 0;
@@ -117,7 +122,14 @@ public char unescapechar(const char *escaped, const char **after, const char *en
}
break;
}
- case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { // Octal
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7': { // Octal
ret = (unsigned char)(escaped[0] - '0');
if (escaped + 2 >= end) {
len = 0;
@@ -132,11 +144,9 @@ public char unescapechar(const char *escaped, const char **after, const char *en
}
break;
}
- default:
- len = 0;
- goto finished;
+ default: len = 0; goto finished;
}
- finished:
+finished:
if (after) *after = &escaped[len];
return (char)ret;
}
@@ -144,12 +154,11 @@ public char unescapechar(const char *escaped, const char **after, const char *en
//
// Free memory, but also set the pointer to NULL for safety
//
-public void delete(void *p)
-{
- if (*(void**)p == NULL)
- errx(EXIT_FAILURE, "attempt to free(NULL)");
- free(*(void**)p);
- *((void**)p) = NULL;
+public
+void delete(void *p) {
+ if (*(void **)p == NULL) errx(EXIT_FAILURE, "attempt to free(NULL)");
+ free(*(void **)p);
+ *((void **)p) = NULL;
}
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/utils.h b/utils.h
index 9779f78..aa2ccc4 100644
--- a/utils.h
+++ b/utils.h
@@ -6,9 +6,8 @@
#include <err.h>
#include <stdarg.h>
#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+#include <stdlib.h> // IWYU pragma: export
+#include <string.h> // IWYU pragma: export
#include <unistd.h>
#ifndef auto
@@ -18,35 +17,35 @@
#define S1(x) #x
#define S2(x) S1(x)
-#define require(e, msg) ({\
- __typeof__(e) __expr = e; \
- if (_Generic(__expr, int: (ssize_t)__expr < 0, ssize_t: (ssize_t)__expr < 0, default: !__expr)) errx(1, __FILE__":"S2(__LINE__)": " msg); \
- __expr; \
-})
+#define require(e, msg) \
+ ({ \
+ __typeof__(e) __expr = e; \
+ if (_Generic(__expr, int: (ssize_t)__expr < 0, ssize_t: (ssize_t)__expr < 0, default: !__expr)) \
+ errx(1, __FILE__ ":" S2(__LINE__) ": " msg); \
+ __expr; \
+ })
-#define When(x, _tag) ((x)->type == _tag ? &(x)->__tagged._tag : (errx(1, __FILE__ ":%d This was supposed to be a " # _tag "\n", __LINE__), &(x)->__tagged._tag))
+#define When(x, _tag) \
+ ((x)->type == _tag \
+ ? &(x)->__tagged._tag \
+ : (errx(1, __FILE__ ":%d This was supposed to be a " #_tag "\n", __LINE__), &(x)->__tagged._tag))
#ifndef public
-#define public __attribute__ ((visibility ("default")))
+#define public __attribute__((visibility("default")))
#endif
#define new(t) require(calloc(1, sizeof(t)), "`new(" #t ")` allocation failure")
#define checked_strdup(s) require(strdup(s), "`checked_strdup(" #s ")` allocation failure")
-#define grow(arr,n) require(realloc(arr,sizeof(arr[0])*(n)), "`grow(" #arr ", " #n ")` allocation failure")
+#define grow(arr, n) require(realloc(arr, sizeof(arr[0]) * (n)), "`grow(" #arr ", " #n ")` allocation failure")
#define streq(a, b) (strcmp(a, b) == 0)
-__attribute__((nonnull(1)))
-char unescapechar(const char *escaped, const char **after, const char *end);
-__attribute__((pure, nonnull))
-const char *after_name(const char *str, const char *end);
-__attribute__((pure, nonnull, returns_nonnull))
-const char *after_spaces(const char *str, bool skip_nl, const char *end);
-__attribute__((nonnull))
-bool matchchar(const char **str, char c, bool skip_nl, const char *end);
-__attribute__((nonnull))
-bool matchstr(const char **str, const char *target, bool skip_nl, const char *end);
-__attribute__((nonnull))
-void delete(void *p);
+__attribute__((nonnull(1))) char unescapechar(const char *escaped, const char **after, const char *end);
+__attribute__((pure, nonnull)) const char *after_name(const char *str, const char *end);
+__attribute__((pure, nonnull, returns_nonnull)) const char *after_spaces(const char *str, bool skip_nl,
+ const char *end);
+__attribute__((nonnull)) bool matchchar(const char **str, char c, bool skip_nl, const char *end);
+__attribute__((nonnull)) bool matchstr(const char **str, const char *target, bool skip_nl, const char *end);
+__attribute__((nonnull)) void delete(void *p);
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0