diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2025-09-24 20:22:00 -0400 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2025-09-24 20:22:00 -0400 |
| commit | 3d5944a732e34b6dd01921dee991dee54af47e18 (patch) | |
| tree | 97d17a4e7feb97d367060a184907a6978352d5ec | |
| parent | 20c11b29b3a63c221cac942a17bf9abcf8b9bafe (diff) | |
Autoformatting with clang-format
| -rw-r--r-- | .clang-format | 274 | ||||
| -rw-r--r-- | .clangd | 3 | ||||
| -rw-r--r-- | Lua/lbp.c | 157 | ||||
| -rw-r--r-- | bp.c | 288 | ||||
| -rw-r--r-- | files.c | 127 | ||||
| -rw-r--r-- | files.h | 25 | ||||
| -rw-r--r-- | match.c | 352 | ||||
| -rw-r--r-- | match.h | 12 | ||||
| -rw-r--r-- | pattern.c | 389 | ||||
| -rw-r--r-- | pattern.h | 138 | ||||
| -rw-r--r-- | printmatch.c | 108 | ||||
| -rw-r--r-- | printmatch.h | 7 | ||||
| -rw-r--r-- | utf8.c | 505 | ||||
| -rw-r--r-- | utf8.h | 12 | ||||
| -rw-r--r-- | utils.c | 81 | ||||
| -rw-r--r-- | utils.h | 45 |
16 files changed, 1383 insertions, 1140 deletions
diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..82e3ce3 --- /dev/null +++ b/.clang-format @@ -0,0 +1,274 @@ +--- +Language: Cpp +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignArrayOfStructures: None +AlignConsecutiveAssignments: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: true +AlignConsecutiveBitFields: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveDeclarations: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveMacros: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveShortCaseStatements: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCaseArrows: false + AlignCaseColons: false +AlignConsecutiveTableGenBreakingDAGArgColons: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveTableGenCondOperatorColons: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveTableGenDefinitionColons: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: false +AlignEscapedNewlines: Right +AlignOperands: Align +AlignTrailingComments: + Kind: Never + OverEmptyLines: 0 +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowBreakBeforeNoexceptSpecifier: Never +AllowShortBlocksOnASingleLine: Never +AllowShortCaseExpressionOnASingleLine: true +AllowShortCaseLabelsOnASingleLine: true +AllowShortCompoundRequirementOnASingleLine: true +AllowShortEnumsOnASingleLine: true +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: AllIfsAndElse +AllowShortLambdasOnASingleLine: All +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AttributeMacros: + - __capability +BinPackArguments: true +BinPackParameters: true +BitFieldColonSpacing: Both +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterExternBlock: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakAdjacentStringLiterals: true +BreakAfterAttributes: Leave +BreakAfterJavaFieldAnnotations: false +BreakAfterReturnType: None +BreakArrays: true +BreakBeforeBinaryOperators: NonAssignment +BreakBeforeConceptDeclarations: Always +BreakBeforeBraces: Attach +BreakBeforeInlineASMColon: OnlyMultiline +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: BeforeColon +BreakFunctionDefinitionParameters: false +BreakInheritanceList: BeforeColon +BreakStringLiterals: true +BreakTemplateDeclarations: MultiLine +ColumnLimit: 120 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + SortPriority: 0 + CaseSensitive: false + - Regex: '.*' + Priority: 1 + SortPriority: 0 + CaseSensitive: false +IncludeIsMainRegex: '(Test)?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseBlocks: false +IndentCaseLabels: false +IndentExternBlock: AfterExternBlock +IndentGotoLabels: true +IndentPPDirectives: None +IndentRequiresClause: true +IndentWidth: 4 +IndentWrappedFunctionNames: false +InsertBraces: false +InsertNewlineAtEOF: false +InsertTrailingCommas: None +IntegerLiteralSeparator: + Binary: 0 + BinaryMinDigits: 0 + Decimal: 0 + DecimalMinDigits: 0 + Hex: 0 + HexMinDigits: 0 +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLines: + AtEndOfFile: false + AtStartOfBlock: true + AtStartOfFile: true +LambdaBodyIndentation: Signature +LineEnding: DeriveLF +MacroBlockBegin: '' +MacroBlockEnd: '' +MainIncludeChar: Quote +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PackConstructorInitializers: BinPack +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakOpenParenthesis: 0 +PenaltyBreakScopeResolution: 500 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyIndentedWhitespace: 0 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +PPIndentWidth: -1 +QualifierAlignment: Leave +ReferenceAlignment: Pointer +ReflowComments: true +RemoveBracesLLVM: false +RemoveParentheses: Leave +RemoveSemicolon: false +RequiresClausePosition: OwnLine +RequiresExpressionIndentation: OuterScope +SeparateDefinitionBlocks: Leave +ShortNamespaceLines: 1 +SkipMacroDefinitionBody: false +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: LexicographicNumeric +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceAroundPointerQualifiers: Default +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeJsonColon: false +SpaceBeforeParens: ControlStatements +SpaceBeforeParensOptions: + AfterControlStatements: true + AfterForeachMacros: true + AfterFunctionDefinitionName: false + AfterFunctionDeclarationName: false + AfterIfMacros: true + AfterOverloadedOperator: false + AfterPlacementOperator: true + AfterRequiresInClause: false + AfterRequiresInExpression: false + BeforeNonEmptyParentheses: false +SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: Never +SpacesInContainerLiterals: true +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParens: Never +SpacesInParensOptions: + ExceptDoubleParentheses: false + InCStyleCasts: false + InConditionalStatements: false + InEmptyParentheses: false + Other: false +SpacesInSquareBrackets: false +Standard: Latest +StatementAttributeLikeMacros: + - Q_EMIT +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TableGenBreakInsideDAGArg: DontBreak +TabWidth: 4 +UseTab: Never +VerilogBreakBetweenInstancePorts: true +WhitespaceSensitiveMacros: + - BOOST_PP_STRINGIZE + - CF_SWIFT_NAME + - NS_SWIFT_NAME + - PP_STRINGIZE + - STRINGIZE +... + @@ -0,0 +1,3 @@ +CompileFlags: + Add: [-xc] +Checks: '-bugprone-suspicious-missing-comma' @@ -1,14 +1,14 @@ /* -* lbp.c - bp library for lua -* API: -* bp.match(pat, str, [start_index]) -> nil or match_table -* bp.replace(pat, replacement, str, [start_index]) -> str with replacements, num_replacements -* for match_table in bp.matches(pat, str, [start_index]) do ... end -* bp.compile(pat) -> pattern object -* pat:match(str, [start_index]) -* pat:replace(replacement, str, [start_index]) -* for match in pat:matches(str, [start_index]) do ... end -*/ + * lbp.c - bp library for lua + * API: + * bp.match(pat, str, [start_index]) -> nil or match_table + * bp.replace(pat, replacement, str, [start_index]) -> str with replacements, num_replacements + * for match_table in bp.matches(pat, str, [start_index]) do ... end + * bp.compile(pat) -> pattern object + * pat:match(str, [start_index]) + * pat:replace(replacement, str, [start_index]) + * for match in pat:matches(str, [start_index]) do ... end + */ #include <fcntl.h> #include <stdlib.h> @@ -16,11 +16,11 @@ #include <sys/mman.h> #include <sys/stat.h> -#include "lua.h" #include "lauxlib.h" +#include "lua.h" -#include "../pattern.h" #include "../match.h" +#include "../pattern.h" #include "../printmatch.h" #include "../utils.h" @@ -34,25 +34,22 @@ static void push_match(lua_State *L, bp_match_t *m, const char *start); lua_State *cur_state = NULL; -static void match_error(char **msg) -{ +static void match_error(char **msg) { lua_pushstring(cur_state, *msg); free(*msg); *msg = NULL; lua_error(cur_state); } -static inline void raise_parse_error(lua_State *L, maybe_pat_t m) -{ +static inline void raise_parse_error(lua_State *L, maybe_pat_t m) { size_t err_len = (size_t)(m.value.error.end - m.value.error.start); - char *buf = calloc(err_len+1, sizeof(char)); + char *buf = calloc(err_len + 1, sizeof(char)); memcpy(buf, m.value.error.start, err_len); luaL_error(L, "%s: \"%s\"", m.value.error.msg, buf); free(buf); } -static int Lcompile(lua_State *L) -{ +static int Lcompile(lua_State *L) { size_t patlen; const char *pat_text = luaL_checklstring(L, 1, &patlen); maybe_pat_t maybe_pat = bp_pattern(pat_text, pat_text + patlen); @@ -60,18 +57,17 @@ static int Lcompile(lua_State *L) raise_parse_error(L, maybe_pat); return 0; } - bp_pat_t **pat_storage = (bp_pat_t**)lua_newuserdatauv(L, sizeof(bp_pat_t*), 1); + bp_pat_t **pat_storage = (bp_pat_t **)lua_newuserdatauv(L, sizeof(bp_pat_t *), 1); *pat_storage = maybe_pat.value.pat; lua_pushvalue(L, 1); lua_setiuservalue(L, -2, 1); - lua_pushlightuserdata(L, (void*)&PAT_METATABLE); + lua_pushlightuserdata(L, (void *)&PAT_METATABLE); lua_gettable(L, LUA_REGISTRYINDEX); lua_setmetatable(L, -2); return 1; } -static void push_matchstring(lua_State *L, bp_match_t *m) -{ +static void push_matchstring(lua_State *L, bp_match_t *m) { char *buf = NULL; size_t size = 0; FILE *out = open_memstream(&buf, &size); @@ -81,8 +77,7 @@ static void push_matchstring(lua_State *L, bp_match_t *m) fclose(out); } -static bp_match_t *get_first_capture(bp_match_t *m) -{ +static bp_match_t *get_first_capture(bp_match_t *m) { if (m->pat->type == BP_TAGGED) { return m; } else if (m->pat->type == BP_CAPTURE && !When(m->pat, BP_CAPTURE)->name) { @@ -96,8 +91,7 @@ static bp_match_t *get_first_capture(bp_match_t *m) return NULL; } -static void set_capture_fields(lua_State *L, bp_match_t *m, int *n, const char *start) -{ +static void set_capture_fields(lua_State *L, bp_match_t *m, int *n, const char *start) { if (m->pat->type == BP_CAPTURE) { bp_match_t *cap = get_first_capture(m->children[0]); if (!cap) cap = m->children[0]; @@ -119,10 +113,9 @@ static void set_capture_fields(lua_State *L, bp_match_t *m, int *n, const char * } } -static void push_match(lua_State *L, bp_match_t *m, const char *start) -{ +static void push_match(lua_State *L, bp_match_t *m, const char *start) { lua_createtable(L, 1, 2); - lua_pushlightuserdata(L, (void*)&MATCH_METATABLE); + lua_pushlightuserdata(L, (void *)&MATCH_METATABLE); lua_gettable(L, LUA_REGISTRYINDEX); lua_setmetatable(L, -2); push_matchstring(L, m); @@ -144,11 +137,9 @@ static void push_match(lua_State *L, bp_match_t *m, const char *start) lua_setfield(L, -2, "after"); } -static int Lmatch(lua_State *L) -{ +static int Lmatch(lua_State *L) { if (lua_isstring(L, 1)) { - if (Lcompile(L) != 1) - return 0; + if (Lcompile(L) != 1) return 0; lua_replace(L, 1); } bp_pat_t **at_pat = lua_touserdata(L, 1); @@ -162,19 +153,17 @@ static int Lmatch(lua_State *L) lua_getfield(L, 3, "start"); lua_getfield(L, 3, "after"); index = luaL_optinteger(L, -1, 1); - if (lua_rawequal(L, -1, -2)) - ++index; + if (lua_rawequal(L, -1, -2)) ++index; } else { index = luaL_optinteger(L, 3, 1); } - if (index > (lua_Integer)strlen(text)+1) - return 0; + if (index > (lua_Integer)strlen(text) + 1) return 0; bp_match_t *m = NULL; int ret = 0; cur_state = L; bp_errhand_t old = bp_set_error_handler(match_error); - if (next_match(&m, text+index-1, &text[textlen], pat, builtins, NULL, false)) { + if (next_match(&m, text + index - 1, &text[textlen], pat, builtins, NULL, false)) { push_match(L, m, text); stop_matching(&m); ret = 1; @@ -183,11 +172,9 @@ static int Lmatch(lua_State *L) return ret; } -static int Lreplace(lua_State *L) -{ +static int Lreplace(lua_State *L) { if (lua_isstring(L, 1)) { - if (Lcompile(L) != 1) - return 0; + if (Lcompile(L) != 1) return 0; lua_replace(L, 1); } bp_pat_t **at_pat = lua_touserdata(L, 1); @@ -198,8 +185,7 @@ static int Lreplace(lua_State *L) const char *rep_text = luaL_checklstring(L, 2, &replen); const char *text = luaL_checklstring(L, 3, &textlen); lua_Integer index = luaL_optinteger(L, 4, 1); - if (index > (lua_Integer)strlen(text)+1) - index = (lua_Integer)strlen(text)+1; + if (index > (lua_Integer)strlen(text) + 1) index = (lua_Integer)strlen(text) + 1; maybe_pat_t maybe_replacement = bp_replacement(pat, rep_text, rep_text + replen); if (!maybe_replacement.success) { @@ -215,7 +201,7 @@ static int Lreplace(lua_State *L) bp_pat_t *rep_pat = maybe_replacement.value.pat; cur_state = L; bp_errhand_t old = bp_set_error_handler(match_error); - for (bp_match_t *m = NULL; next_match(&m, text, &text[textlen], rep_pat, builtins, NULL, false); ) { + for (bp_match_t *m = NULL; next_match(&m, text, &text[textlen], rep_pat, builtins, NULL, false);) { fwrite(prev, sizeof(char), (size_t)(m->start - prev), out); fprint_match(out, text, m, NULL); prev = m->end; @@ -233,8 +219,7 @@ static int Lreplace(lua_State *L) return 2; } -static int iter(lua_State *L) -{ +static int iter(lua_State *L) { lua_geti(L, 1, 1); lua_geti(L, 1, 2); lua_replace(L, 1); @@ -242,14 +227,12 @@ static int iter(lua_State *L) return Lmatch(L); } -static int Lmatches(lua_State *L) -{ +static int Lmatches(lua_State *L) { int nargs = lua_gettop(L); lua_pushcfunction(L, iter); // iter lua_createtable(L, 2, 0); // state: {pat, str} if (lua_isstring(L, 1)) { - if (Lcompile(L) != 1) - return 0; + if (Lcompile(L) != 1) return 0; } else { lua_pushvalue(L, 1); } @@ -262,20 +245,17 @@ static int Lmatches(lua_State *L) return 3; } -static int Lmatch_tostring(lua_State *L) -{ +static int Lmatch_tostring(lua_State *L) { lua_geti(L, 1, 0); return 1; } -static int Lpat_source(lua_State *L) -{ +static int Lpat_source(lua_State *L) { lua_getiuservalue(L, 1, 1); return 1; } -static int Lpat_tostring(lua_State *L) -{ +static int Lpat_tostring(lua_State *L) { luaL_Buffer b; luaL_buffinit(L, &b); luaL_addstring(&b, "Pattern [["); @@ -286,8 +266,7 @@ static int Lpat_tostring(lua_State *L) return 1; } -static int Lpat_gc(lua_State *L) -{ +static int Lpat_gc(lua_State *L) { (void)L; bp_pat_t **at_pat = lua_touserdata(L, 1); bp_pat_t *pat = *at_pat; @@ -296,8 +275,7 @@ static int Lpat_gc(lua_State *L) return 0; } -static int Lpat_join(lua_State *L, const char *joiner) -{ +static int Lpat_join(lua_State *L, const char *joiner) { if (!lua_isstring(L, 1)) { lua_pushcfunction(L, Lpat_source); lua_pushvalue(L, 1); @@ -330,62 +308,39 @@ static int Lpat_join(lua_State *L, const char *joiner) return 1; } -static int Lpat_concat(lua_State *L) -{ - return Lpat_join(L, " "); -} +static int Lpat_concat(lua_State *L) { return Lpat_join(L, " "); } -static int Lpat_div(lua_State *L) -{ - return Lpat_join(L, " / "); -} +static int Lpat_div(lua_State *L) { return Lpat_join(L, " / "); } -static const luaL_Reg match_metamethods[] = { - {"__tostring", Lmatch_tostring}, - {NULL, NULL} -}; +static const luaL_Reg match_metamethods[] = {{"__tostring", Lmatch_tostring}, {NULL, NULL}}; static const luaL_Reg pat_methods[] = { - {"match", Lmatch}, - {"replace", Lreplace}, - {"matches", Lmatches}, - {"getsource", Lpat_source}, - {NULL, NULL} -}; - -static const luaL_Reg pat_metamethods[] = { - {"__gc", Lpat_gc}, - {"__concat", Lpat_concat}, - {"__div", Lpat_div}, - {"__tostring", Lpat_tostring}, - {"__index", NULL}, // placeholder for pat_methods - {NULL, NULL} -}; + {"match", Lmatch}, {"replace", Lreplace}, {"matches", Lmatches}, {"getsource", Lpat_source}, {NULL, NULL}}; + +static const luaL_Reg pat_metamethods[] = {{"__gc", Lpat_gc}, {"__concat", Lpat_concat}, + {"__div", Lpat_div}, {"__tostring", Lpat_tostring}, + {"__index", NULL}, // placeholder for pat_methods + {NULL, NULL}}; static const luaL_Reg bp_methods[] = { - {"match", Lmatch}, - {"replace", Lreplace}, - {"compile", Lcompile}, - {"matches", Lmatches}, - {NULL, NULL} -}; - -public LUALIB_API int luaopen_bp(lua_State *L) -{ - maybe_pat_t maybe_pat = bp_pattern(builtins_source, builtins_source+strlen(builtins_source)); + {"match", Lmatch}, {"replace", Lreplace}, {"compile", Lcompile}, {"matches", Lmatches}, {NULL, NULL}}; + +public +LUALIB_API int luaopen_bp(lua_State *L) { + maybe_pat_t maybe_pat = bp_pattern(builtins_source, builtins_source + strlen(builtins_source)); if (!maybe_pat.success) { raise_parse_error(L, maybe_pat); return 0; } builtins = maybe_pat.value.pat; - lua_pushlightuserdata(L, (void*)&PAT_METATABLE); + lua_pushlightuserdata(L, (void *)&PAT_METATABLE); luaL_newlib(L, pat_metamethods); luaL_newlib(L, pat_methods); lua_setfield(L, -2, "__index"); lua_settable(L, LUA_REGISTRYINDEX); - lua_pushlightuserdata(L, (void*)&MATCH_METATABLE); + lua_pushlightuserdata(L, (void *)&MATCH_METATABLE); luaL_newlib(L, match_metamethods); lua_settable(L, LUA_REGISTRYINDEX); @@ -30,27 +30,27 @@ #define BP_NAME "bp" #endif -static const char *description = BP_NAME" - a Parsing Expression Grammar command line tool"; -static const char *usage = ( - "Usage:\n" - " "BP_NAME" [flags] <pattern> [<files>...]\n\n" - "Flags:\n" - " -A --context-after <n> set number of lines of context to print after the match\n" - " -B --context-before <n> set number of lines of context to print before the match\n" - " -C --context <context> set number of lines of context to print before and after the match\n" - " -G --git in a git repository, treat filenames as patterns for `git ls-files`\n" - " -I --inplace modify a file in-place\n" - " -c --case use case sensitivity\n" - " -e --explain explain the matches\n" - " -f --format fancy|plain|bare|file:line set the output format\n" - " -g --grammar <grammar-file> use the specified file as a grammar\n" - " -h --help print the usage and quit\n" - " -i --ignore-case preform matching case-insensitively\n" - " -l --list-files list filenames only\n" - " -r --replace <replacement> replace the input pattern with the given replacement\n" - " -s --skip <skip-pattern> skip over the given pattern when looking for matches\n" - " -v --verbose print verbose debugging info\n" - " -w --word <string-pat> find words matching the given string pattern\n"); +static const char *description = BP_NAME " - a Parsing Expression Grammar command line tool"; +static const char *usage = + ("Usage:\n" + " " BP_NAME " [flags] <pattern> [<files>...]\n\n" + "Flags:\n" + " -A --context-after <n> set number of lines of context to print after the match\n" + " -B --context-before <n> set number of lines of context to print before the match\n" + " -C --context <context> set number of lines of context to print before and after the match\n" + " -G --git in a git repository, treat filenames as patterns for `git ls-files`\n" + " -I --inplace modify a file in-place\n" + " -c --case use case sensitivity\n" + " -e --explain explain the matches\n" + " -f --format fancy|plain|bare|file:line set the output format\n" + " -g --grammar <grammar-file> use the specified file as a grammar\n" + " -h --help print the usage and quit\n" + " -i --ignore-case preform matching case-insensitively\n" + " -l --list-files list filenames only\n" + " -r --replace <replacement> replace the input pattern with the given replacement\n" + " -s --skip <skip-pattern> skip over the given pattern when looking for matches\n" + " -v --verbose print verbose debugging info\n" + " -w --word <string-pat> find words matching the given string pattern\n"); // Used as a heuristic to check if a file is binary or text: #define CHECK_FIRST_N_BYTES 256 @@ -91,8 +91,7 @@ static file_t *backup_file; // // Helper function to reduce code duplication // -static inline void fprint_filename(FILE *out, const char *filename) -{ +static inline void fprint_filename(FILE *out, const char *filename) { if (!filename[0]) return; if (options.format == FORMAT_FANCY) fprintf(out, "\033[0;1;4;33m%s\033[m\n", filename); else fprintf(out, "%s:\n", filename); @@ -101,59 +100,51 @@ static inline void fprint_filename(FILE *out, const char *filename) // // If there was a parse error while building a pattern, print an error message and exit. // -static inline bp_pat_t *assert_pat(const char *start, const char *end, maybe_pat_t maybe_pat) -{ +static inline bp_pat_t *assert_pat(const char *start, const char *end, maybe_pat_t maybe_pat) { if (!end) end = start + strlen(start); if (!maybe_pat.success) { - const char *err_start = maybe_pat.value.error.start, - *err_end = maybe_pat.value.error.end, - *err_msg = maybe_pat.value.error.msg; + const char *err_start = maybe_pat.value.error.start, *err_end = maybe_pat.value.error.end, + *err_msg = maybe_pat.value.error.msg; const char *nl = memrchr(start, '\n', (size_t)(err_start - start)); - const char *sol = nl ? nl+1 : start; + const char *sol = nl ? nl + 1 : start; nl = memchr(err_start, '\n', (size_t)(end - err_start)); const char *eol = nl ? nl : end; if (eol < err_end) err_end = eol; fprintf(stderr, "\033[31;1m%s\033[0m\n", err_msg); - fprintf(stderr, "%.*s\033[41;30m%.*s\033[m%.*s\n", - (int)(err_start - sol), sol, - (int)(err_end - err_start), err_start, - (int)(eol - err_end), err_end); + fprintf(stderr, "%.*s\033[41;30m%.*s\033[m%.*s\n", (int)(err_start - sol), sol, (int)(err_end - err_start), + err_start, (int)(eol - err_end), err_end); fprintf(stderr, "\033[34;1m"); const char *p = sol; - for (; p < err_start; ++p) (void)fputc(*p == '\t' ? '\t' : ' ', stderr); + for (; p < err_start; ++p) + (void)fputc(*p == '\t' ? '\t' : ' ', stderr); if (err_start == err_end) ++err_end; for (; p < err_end; ++p) if (*p == '\t') // Some janky hacks: 8 ^'s, backtrack 8 spaces, move forward a tab stop, clear any ^'s that overshot fprintf(stderr, "^^^^^^^^\033[8D\033[I\033[K"); - else - (void)fputc('^', stderr); + else (void)fputc('^', stderr); fprintf(stderr, "\033[m\n"); exit(EXIT_FAILURE); } return maybe_pat.value.pat; } - // // Look for a key/value flag at the first position in the given argument list. // If the flag is found, update `next` to point to the next place to check for a flag. // The contents of argv[0] may be modified for single-char flags. // Return the flag's value. // -__attribute__((nonnull)) -static char *get_flag(char *argv[], const char *flag, char ***next) -{ +__attribute__((nonnull)) static char *get_flag(char *argv[], const char *flag, char ***next) { size_t n = strlen(flag); if (strncmp(argv[0], flag, n) != 0) return NULL; if (argv[0][n] == '=') { // --foo=baz, -f=baz *next = &argv[1]; - return &argv[0][n+1]; + return &argv[0][n + 1]; } else if (argv[0][n] == '\0') { // --foo baz, -f baz - if (!argv[1]) - errx(EXIT_FAILURE, "Expected argument after '%s'\n\n%s", flag, usage); + if (!argv[1]) errx(EXIT_FAILURE, "Expected argument after '%s'\n\n%s", flag, usage); *next = &argv[2]; return argv[1]; } else if (flag[0] == '-' && flag[1] != '-' && flag[2] == '\0') { // -f... @@ -169,16 +160,14 @@ static char *get_flag(char *argv[], const char *flag, char ***next) // The contents of argv[0] may be modified for single-char flags. // Return a boolean for whether or not the flag was found. // -__attribute__((nonnull)) -static bool get_boolflag(char *argv[], const char *flag, char ***next) -{ +__attribute__((nonnull)) static bool get_boolflag(char *argv[], const char *flag, char ***next) { size_t n = strlen(flag); if (strncmp(argv[0], flag, n) != 0) return false; if (argv[0][n] == '\0') { // --foo, -f *next = &argv[1]; return true; } else if (flag[0] == '-' && flag[1] != '-' && flag[2] == '\0') { // -f... - memmove(&argv[0][1], &argv[0][2], 1+strlen(&argv[0][2])); // Shift the flags down + memmove(&argv[0][1], &argv[0][2], 1 + strlen(&argv[0][2])); // Shift the flags down *next = argv; return true; } @@ -189,32 +178,27 @@ static bool get_boolflag(char *argv[], const char *flag, char ***next) // Scan the first few dozen bytes of a file and return 1 if the contents all // look like printable text characters, otherwise return 0. // -static int is_text_file(const char *filename) -{ +static int is_text_file(const char *filename) { int fd = open(filename, O_RDONLY); if (fd < 0) return 0; char buf[CHECK_FIRST_N_BYTES]; - ssize_t len = read(fd, buf, sizeof(buf)/sizeof(char)); + ssize_t len = read(fd, buf, sizeof(buf) / sizeof(char)); (void)close(fd); if (len < 0) return 0; for (ssize_t i = 0; i < len; i++) - if (isascii(buf[i]) && !(isprint(buf[i]) || isspace(buf[i]))) - return 0; + if (isascii(buf[i]) && !(isprint(buf[i]) || isspace(buf[i]))) return 0; return 1; } // // Print matches in a visual explanation style // -static int explain_matches(file_t *f, bp_pat_t *pattern, bp_pat_t *defs) -{ +static int explain_matches(file_t *f, bp_pat_t *pattern, bp_pat_t *defs) { int nmatches = 0; - for (bp_match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) { + for (bp_match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase);) { if (++nmatches == 1) { - if (options.print_filenames) - fprint_filename(stdout, f->filename); - } else - printf("\n\n"); + if (options.print_filenames) fprint_filename(stdout, f->filename); + } else printf("\n\n"); explain_match(m); } return nmatches; @@ -224,14 +208,11 @@ static int explain_matches(file_t *f, bp_pat_t *pattern, bp_pat_t *defs) // Cleanup function to ensure no temp files are left around if the program // exits unexpectedly. // -static void cleanup(void) -{ +static void cleanup(void) { if (modifying_file && backup_file) { rewind(modifying_file); ftruncate(fileno(modifying_file), 0); - (void)fwrite(backup_file->start, 1, - (size_t)(backup_file->end - backup_file->start), - modifying_file); + (void)fwrite(backup_file->start, 1, (size_t)(backup_file->end - backup_file->start), modifying_file); fclose(modifying_file); modifying_file = NULL; } @@ -241,23 +222,22 @@ static void cleanup(void) // // Signal handler to ensure cleanup happens. // -static void sig_handler(int sig) -{ +static void sig_handler(int sig) { cleanup(); if (kill(0, sig)) _exit(EXIT_FAILURE); } -int fprint_linenum(FILE *out, file_t *f, int linenum, const char *normal_color) -{ +int fprint_linenum(FILE *out, file_t *f, int linenum, const char *normal_color) { int printed = 0; switch (options.format) { - case FORMAT_FANCY: case FORMAT_PLAIN: { + case FORMAT_FANCY: + case FORMAT_PLAIN: { int space = 0; - for (int i = (int)f->nlines; i > 0; i /= 10) ++space; + for (int i = (int)f->nlines; i > 0; i /= 10) + ++space; if (options.format == FORMAT_FANCY) printed += fprintf(out, "\033[0;2m%*d\033(0\x78\033(B%s", space, linenum, normal_color ? normal_color : ""); - else - printed += fprintf(out, "%*d|", space, linenum); + else printed += fprintf(out, "%*d|", space, linenum); break; } case FORMAT_FILE_LINE: { @@ -271,8 +251,7 @@ int fprint_linenum(FILE *out, file_t *f, int linenum, const char *normal_color) static file_t *printing_file = NULL; static int last_line_num = -1; -static int _fprint_between(FILE *out, const char *start, const char *end, const char *normal_color) -{ +static int _fprint_between(FILE *out, const char *start, const char *end, const char *normal_color) { int printed = 0; do { // Cheeky lookbehind to see if line number should be printed @@ -295,8 +274,7 @@ static int _fprint_between(FILE *out, const char *start, const char *end, const return printed; } -static void fprint_context(FILE *out, file_t *f, const char *prev, const char *next) -{ +static void fprint_context(FILE *out, file_t *f, const char *prev, const char *next) { if (options.context_before == ALL_CONTEXT || options.context_after == ALL_CONTEXT) { _fprint_between(out, prev ? prev : f->start, next ? next : f->end, "\033[m"); return; @@ -304,15 +282,18 @@ static void fprint_context(FILE *out, file_t *f, const char *prev, const char *n const char *before_next = next; if (next && options.context_before >= 0) { size_t line_before_next = get_line_number(printing_file, next); - line_before_next = options.context_before >= (int)line_before_next ? 1 : line_before_next - (size_t)options.context_before; + line_before_next = + options.context_before >= (int)line_before_next ? 1 : line_before_next - (size_t)options.context_before; before_next = get_line(printing_file, line_before_next); if (prev && before_next < prev) before_next = prev; } const char *after_prev = prev; if (prev && options.context_after >= 0) { size_t line_after_prev = get_line_number(printing_file, prev) + (size_t)options.context_after + 1; - after_prev = line_after_prev > printing_file->nlines ? - printing_file->end : get_line(printing_file, line_after_prev > printing_file->nlines ? printing_file->nlines : line_after_prev); + after_prev = line_after_prev > printing_file->nlines + ? printing_file->end + : get_line(printing_file, + line_after_prev > printing_file->nlines ? printing_file->nlines : line_after_prev); if (next && after_prev > next) after_prev = next; } if (next && prev && after_prev >= before_next) { @@ -323,11 +304,12 @@ static void fprint_context(FILE *out, file_t *f, const char *prev, const char *n } } -static void on_nl(FILE *out) -{ +static void on_nl(FILE *out) { switch (options.format) { - case FORMAT_FANCY: case FORMAT_PLAIN: - for (int i = (int)printing_file->nlines; i > 0; i /= 10) fputc('.', out); + case FORMAT_FANCY: + case FORMAT_PLAIN: + for (int i = (int)printing_file->nlines; i > 0; i /= 10) + fputc('.', out); fprintf(out, "%s", options.format == FORMAT_FANCY ? "\033[0;2m\033(0\x78\033(B\033[m" : "|"); break; default: break; @@ -337,8 +319,7 @@ static void on_nl(FILE *out) // // Print all the matches in a file. // -static int print_matches(FILE *out, file_t *f, bp_pat_t *pattern, bp_pat_t *defs) -{ +static int print_matches(FILE *out, file_t *f, bp_pat_t *pattern, bp_pat_t *defs) { static int printed_filenames = 0; int matches = 0; const char *prev = NULL; @@ -352,7 +333,7 @@ static int print_matches(FILE *out, file_t *f, bp_pat_t *pattern, bp_pat_t *defs print_opts.replace_color = "\033[0;34;1m"; print_opts.normal_color = "\033[m"; } - for (bp_match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) { + for (bp_match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase);) { if (++matches == 1 && options.print_filenames) { if (printed_filenames++ > 0) printf("\n"); fprint_filename(out, f->filename); @@ -381,9 +362,7 @@ static int print_matches(FILE *out, file_t *f, bp_pat_t *pattern, bp_pat_t *defs // For a given filename, open the file and attempt to match the given pattern // against it, printing any results according to the flags. // -__attribute__((nonnull)) -static int process_file(const char *filename, bp_pat_t *pattern, bp_pat_t *defs) -{ +__attribute__((nonnull)) static int process_file(const char *filename, bp_pat_t *pattern, bp_pat_t *defs) { file_t *f = load_file(NULL, filename); if (f == NULL) { fprintf(stderr, "Could not open file: %s\n%s\n", filename, strerror(errno)); @@ -416,15 +395,17 @@ static int process_file(const char *filename, bp_pat_t *pattern, bp_pat_t *defs) // Set these temporary values in case the program crashes while in the // middle of inplace modifying a file. If that happens, these variables // are used to restore the original file contents. - modifying_file = out; backup_file = f; + modifying_file = out; + backup_file = f; { matches += print_matches(out, f, pattern, defs); } - modifying_file = NULL; backup_file = NULL; + modifying_file = NULL; + backup_file = NULL; fclose(out); if (matches > 0) - printf(getenv("NO_COLOR") ? "%s: %d replacement%s\n" : "\x1b[33;1m%s:\x1b[m %d replacement%s\n", - filename, matches, matches == 1 ? "" : "s"); + printf(getenv("NO_COLOR") ? "%s: %d replacement%s\n" : "\x1b[33;1m%s:\x1b[m %d replacement%s\n", filename, + matches, matches == 1 ? "" : "s"); } else { matches += print_matches(stdout, f, pattern, defs); } @@ -440,12 +421,10 @@ static int process_file(const char *filename, bp_pat_t *pattern, bp_pat_t *defs) // // Recursively process all non-dotfile files in the given directory. // -__attribute__((nonnull)) -static int process_dir(const char *dirname, bp_pat_t *pattern, bp_pat_t *defs) -{ +__attribute__((nonnull)) static int process_dir(const char *dirname, bp_pat_t *pattern, bp_pat_t *defs) { int matches = 0; glob_t globbuf; - char globpath[PATH_MAX+1] = {'\0'}; + char globpath[PATH_MAX + 1] = {'\0'}; if (snprintf(globpath, PATH_MAX, "%s/*", dirname) > (int)PATH_MAX) errx(EXIT_FAILURE, "Filename is too long: %s/*", dirname); int status = glob(globpath, 0, NULL, &globbuf); @@ -455,12 +434,9 @@ static int process_dir(const char *dirname, bp_pat_t *pattern, bp_pat_t *defs) struct stat statbuf; for (size_t i = 0; i < globbuf.gl_pathc; i++) { if (lstat(globbuf.gl_pathv[i], &statbuf) != 0) continue; - if (S_ISLNK(statbuf.st_mode)) - continue; // Skip symbolic links - else if (S_ISDIR(statbuf.st_mode)) - matches += process_dir(globbuf.gl_pathv[i], pattern, defs); - else if (is_text_file(globbuf.gl_pathv[i])) - matches += process_file(globbuf.gl_pathv[i], pattern, defs); + if (S_ISLNK(statbuf.st_mode)) continue; // Skip symbolic links + else if (S_ISDIR(statbuf.st_mode)) matches += process_dir(globbuf.gl_pathv[i], pattern, defs); + else if (is_text_file(globbuf.gl_pathv[i])) matches += process_file(globbuf.gl_pathv[i], pattern, defs); } } globfree(&globbuf); @@ -470,22 +446,21 @@ static int process_dir(const char *dirname, bp_pat_t *pattern, bp_pat_t *defs) // // Process git files using `git ls-files ...` // -__attribute__((nonnull(1))) -static int process_git_files(bp_pat_t *pattern, bp_pat_t *defs, int argc, char *argv[]) -{ +__attribute__((nonnull(1))) static int process_git_files(bp_pat_t *pattern, bp_pat_t *defs, int argc, char *argv[]) { int fds[2]; require(pipe(fds), "Failed to create pipe"); pid_t child = require(fork(), "Failed to fork"); if (child == 0) { - const char **git_args = new(char*[3+argc+1]); + const char **git_args = new (char * [3 + argc + 1]); int g = 0; git_args[g++] = "git"; git_args[g++] = "ls-files"; git_args[g++] = "-z"; - while (*argv) git_args[g++] = *(argv++); + while (*argv) + git_args[g++] = *(argv++); require(dup2(fds[STDOUT_FILENO], STDOUT_FILENO), "Failed to hook up pipe to stdout"); require(close(fds[STDIN_FILENO]), "Failed to close read end of pipe"); - (void)execvp("git", (char**)git_args); + (void)execvp("git", (char **)git_args); _exit(EXIT_FAILURE); } require(close(fds[STDOUT_FILENO]), "Failed to close write end of pipe"); @@ -495,12 +470,12 @@ static int process_git_files(bp_pat_t *pattern, bp_pat_t *defs, int argc, char * int found = 0; while (getdelim(&path, &path_size, '\0', fp) > 0) found += process_file(path, pattern, defs); - if (path) delete(&path); + if (path) delete (&path); require(fclose(fp), "Failed to close read end of pipe"); int status; - while (waitpid(child, &status, 0) != child) continue; - if (!((WIFEXITED(status) == 1) && (WEXITSTATUS(status) == 0))) - errx(EXIT_FAILURE, "`git ls-files -z` failed."); + while (waitpid(child, &status, 0) != child) + continue; + if (!((WIFEXITED(status) == 1) && (WEXITSTATUS(status) == 0))) errx(EXIT_FAILURE, "`git ls-files -z` failed."); return found; } @@ -508,16 +483,14 @@ static int process_git_files(bp_pat_t *pattern, bp_pat_t *defs, int argc, char * // Load the given grammar (semicolon-separated definitions) // and return the first rule defined. // -static bp_pat_t *load_grammar(bp_pat_t *defs, file_t *f) -{ +static bp_pat_t *load_grammar(bp_pat_t *defs, file_t *f) { return chain_together(defs, assert_pat(f->start, f->end, bp_pattern(f->start, f->end))); } // // Convert a context string to an integer // -static int context_from_flag(const char *flag) -{ +static int context_from_flag(const char *flag) { if (streq(flag, "all")) return ALL_CONTEXT; if (streq(flag, "none")) return NO_CONTEXT; return (int)strtol(flag, NULL, 10); @@ -526,11 +499,9 @@ static int context_from_flag(const char *flag) // // Check if any letters are uppercase // -static bool any_uppercase(const char *str) -{ +static bool any_uppercase(const char *str) { for (; *str; ++str) { - if (isupper(*str)) - return true; + if (isupper(*str)) return true; } return false; } @@ -538,8 +509,7 @@ static bool any_uppercase(const char *str) #define FLAG(f) (flag = get_flag(argv, f, &argv)) #define BOOLFLAG(f) get_boolflag(argv, f, &argv) -int main(int argc, char *argv[]) -{ +int main(int argc, char *argv[]) { char *flag = NULL; bp_pat_t *defs = NULL; @@ -547,9 +517,9 @@ int main(int argc, char *argv[]) bp_pat_t *pattern = NULL; // Load builtins: - file_t *builtins_file = load_file(&loaded_files, "/etc/"BP_NAME"/builtins.bp"); + file_t *builtins_file = load_file(&loaded_files, "/etc/" BP_NAME "/builtins.bp"); if (builtins_file) defs = load_grammar(defs, builtins_file); - file_t *local_file = load_filef(&loaded_files, "%s/.config/"BP_NAME"/builtins.bp", getenv("HOME")); + file_t *local_file = load_filef(&loaded_files, "%s/.config/" BP_NAME "/builtins.bp", getenv("HOME")); if (local_file) defs = load_grammar(defs, local_file); bool explicit_case_sensitivity = false; @@ -580,58 +550,50 @@ int main(int argc, char *argv[]) explicit_case_sensitivity = true; } else if (BOOLFLAG("-l") || BOOLFLAG("--list-files")) { options.mode = MODE_LISTFILES; - } else if (FLAG("-r") || FLAG("--replace")) { - if (!pattern) - errx(EXIT_FAILURE, "No pattern has been defined for replacement to operate on"); + } else if (FLAG("-r") || FLAG("--replace")) { + if (!pattern) errx(EXIT_FAILURE, "No pattern has been defined for replacement to operate on"); // TODO: spoof file as sprintf("pattern => '%s'", flag) // except that would require handling edge cases like quotation marks etc. - pattern = assert_pat(flag, NULL, bp_replacement(pattern, flag, flag+strlen(flag))); + pattern = assert_pat(flag, NULL, bp_replacement(pattern, flag, flag + strlen(flag))); if (options.context_before == USE_DEFAULT_CONTEXT) options.context_before = ALL_CONTEXT; if (options.context_after == USE_DEFAULT_CONTEXT) options.context_after = ALL_CONTEXT; - } else if (FLAG("-g") || FLAG("--grammar")) { + } else if (FLAG("-g") || FLAG("--grammar")) { file_t *f = NULL; - if (strlen(flag) > 3 && strncmp(&flag[strlen(flag)-3], ".bp", 3) == 0) - f = load_file(&loaded_files, flag); - if (f == NULL) - f = load_filef(&loaded_files, "%s/.config/"BP_NAME"/%s.bp", getenv("HOME"), flag); - if (f == NULL) - f = load_filef(&loaded_files, "/etc/"BP_NAME"/%s.bp", flag); - if (f == NULL) - errx(EXIT_FAILURE, "Couldn't find grammar: %s", flag); + if (strlen(flag) > 3 && strncmp(&flag[strlen(flag) - 3], ".bp", 3) == 0) f = load_file(&loaded_files, flag); + if (f == NULL) f = load_filef(&loaded_files, "%s/.config/" BP_NAME "/%s.bp", getenv("HOME"), flag); + if (f == NULL) f = load_filef(&loaded_files, "/etc/" BP_NAME "/%s.bp", flag); + if (f == NULL) errx(EXIT_FAILURE, "Couldn't find grammar: %s", flag); defs = load_grammar(defs, f); // Keep in memory for debug output - } else if (FLAG("-w") || FLAG("--word")) { + } else if (FLAG("-w") || FLAG("--word")) { require(asprintf(&flag, "{|}%s{|}", flag), "Could not allocate memory"); file_t *arg_file = spoof_file(&loaded_files, "<word pattern>", flag, -1); - if (!explicit_case_sensitivity) - options.ignorecase = !any_uppercase(flag); - delete(&flag); + if (!explicit_case_sensitivity) options.ignorecase = !any_uppercase(flag); + delete (&flag); bp_pat_t *p = assert_pat(arg_file->start, arg_file->end, bp_stringpattern(arg_file->start, arg_file->end)); pattern = chain_together(pattern, p); - } else if (FLAG("-s") || FLAG("--skip")) { - bp_pat_t *s = assert_pat(flag, NULL, bp_pattern(flag, flag+strlen(flag))); + } else if (FLAG("-s") || FLAG("--skip")) { + bp_pat_t *s = assert_pat(flag, NULL, bp_pattern(flag, flag + strlen(flag))); options.skip = either_pat(options.skip, s); - } else if (FLAG("-C") || FLAG("--context")) { + } else if (FLAG("-C") || FLAG("--context")) { options.context_before = options.context_after = context_from_flag(flag); - } else if (FLAG("-B") || FLAG("--before-context")) { + } else if (FLAG("-B") || FLAG("--before-context")) { options.context_before = context_from_flag(flag); - } else if (FLAG("-A") || FLAG("--after-context")) { + } else if (FLAG("-A") || FLAG("--after-context")) { options.context_after = context_from_flag(flag); - } else if (FLAG("-f") || FLAG("--format")) { + } else if (FLAG("-f") || FLAG("--format")) { if (streq(flag, "fancy")) options.format = FORMAT_FANCY; else if (streq(flag, "plain")) options.format = FORMAT_PLAIN; else if (streq(flag, "bare")) options.format = FORMAT_BARE; else if (streq(flag, "file:line")) { options.format = FORMAT_FILE_LINE; options.print_filenames = 0; - } else if (!streq(flag, "auto")) - errx(EXIT_FAILURE, "Unknown --format option: %s", flag); + } else if (!streq(flag, "auto")) errx(EXIT_FAILURE, "Unknown --format option: %s", flag); } else if (argv[0][0] != '-' || strncmp(argv[0], "->", 2) == 0) { // As a special case, support `bp '->foo'` as a way to search for // pointer field accesses without needing to escape anything. if (pattern != NULL) break; - bp_pat_t *p = assert_pat(argv[0], NULL, bp_stringpattern(argv[0], argv[0]+strlen(argv[0]))); - if (!explicit_case_sensitivity) - options.ignorecase = !any_uppercase(argv[0]); + bp_pat_t *p = assert_pat(argv[0], NULL, bp_stringpattern(argv[0], argv[0] + strlen(argv[0]))); + if (!explicit_case_sensitivity) options.ignorecase = !any_uppercase(argv[0]); pattern = chain_together(pattern, p); ++argv; } else { @@ -639,10 +601,10 @@ int main(int argc, char *argv[]) } } - if (pattern == NULL) - errx(EXIT_FAILURE, "No pattern provided.\n\n%s", usage); + if (pattern == NULL) errx(EXIT_FAILURE, "No pattern provided.\n\n%s", usage); - for (argc = 0; argv[argc]; ++argc) ; // update argc + for (argc = 0; argv[argc]; ++argc) + ; // update argc if (options.context_before == USE_DEFAULT_CONTEXT) options.context_before = 0; if (options.context_after == USE_DEFAULT_CONTEXT) options.context_after = 0; @@ -654,7 +616,7 @@ int main(int argc, char *argv[]) // be sure to clean it up before exiting. int signals[] = {SIGTERM, SIGINT, SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, SIGSEGV, SIGTSTP}; struct sigaction sa = {.sa_handler = &sig_handler, .sa_flags = (int)(SA_NODEFER | SA_RESETHAND)}; - for (size_t i = 0; i < sizeof(signals)/sizeof(signals[0]); i++) + for (size_t i = 0; i < sizeof(signals) / sizeof(signals[0]); i++) require(sigaction(signals[i], &sa, NULL), "Failed to set signal handler"); // Handle exit() calls gracefully: @@ -668,8 +630,7 @@ int main(int argc, char *argv[]) // Default to git mode if there's a .git directory and no files were specified: struct stat gitdir; - if (argc == 0 && stat(".git", &gitdir) == 0 && S_ISDIR(gitdir.st_mode)) - options.git_mode = true; + if (argc == 0 && stat(".git", &gitdir) == 0 && S_ISDIR(gitdir.st_mode)) options.git_mode = true; int found = 0; if (!isatty(STDIN_FILENO) && !argv[0]) { @@ -682,13 +643,14 @@ int main(int argc, char *argv[]) } else if (argv[0]) { // Files passed in as command line args: struct stat statbuf; - if (!argv[1] && !(stat(argv[0], &statbuf) == 0 && S_ISDIR(statbuf.st_mode))) // Don't print filename for single-file matching + if (!argv[1] + && !(stat(argv[0], &statbuf) == 0 + && S_ISDIR(statbuf.st_mode))) // Don't print filename for single-file matching options.print_filenames = false; - for ( ; argv[0]; argv++) { + for (; argv[0]; argv++) { if (stat(argv[0], &statbuf) == 0 && S_ISDIR(statbuf.st_mode)) // Symlinks are okay if manually specified found += process_dir(argv[0], pattern, defs); - else - found += process_file(argv[0], pattern, defs); + else found += process_file(argv[0], pattern, defs); } } else { // No files, no piped in input, so use files in current dir, recursively @@ -11,29 +11,24 @@ #include <sys/stat.h> #include "files.h" -#include "match.h" -#include "pattern.h" #include "utils.h" // // In the file object, populate the `lines` array with pointers to the // beginning of each line. // -__attribute__((nonnull)) -static void populate_lines(file_t *f) -{ +__attribute__((nonnull)) static void populate_lines(file_t *f) { // Calculate line numbers: size_t linecap = 10; - f->lines = new(const char*[linecap]); + f->lines = new (const char *[linecap]); f->nlines = 0; char *p = f->start; for (size_t n = 0; p && p <= f->end; ++n) { ++f->nlines; - if (n >= linecap) - f->lines = grow(f->lines, linecap *= 2); + if (n >= linecap) f->lines = grow(f->lines, linecap *= 2); f->lines[n] = p; char *nl = memchr(p, '\n', (size_t)(f->end - p)); - if (nl && nl < f->end) p = nl+1; + if (nl && nl < f->end) p = nl + 1; else break; } } @@ -42,13 +37,12 @@ static void populate_lines(file_t *f) // Read an entire file into memory, using a printf-style formatting string to // construct the filename. // -public file_t *load_filef(file_t **files, const char *fmt, ...) -{ - char filename[PATH_MAX+1] = {'\0'}; +public +file_t *load_filef(file_t **files, const char *fmt, ...) { + char filename[PATH_MAX + 1] = {'\0'}; va_list args; va_start(args, fmt); - if (vsnprintf(filename, PATH_MAX, fmt, args) > (int)PATH_MAX) - errx(EXIT_FAILURE, "File name is too large"); + if (vsnprintf(filename, PATH_MAX, fmt, args) > (int)PATH_MAX) errx(EXIT_FAILURE, "File name is too large"); va_end(args); return load_file(files, filename); } @@ -56,8 +50,8 @@ public file_t *load_filef(file_t **files, const char *fmt, ...) // // Read an entire file into memory. // -public file_t *load_file(file_t **files, const char *filename) -{ +public +file_t *load_file(file_t **files, const char *filename) { int fd = filename[0] == '\0' ? STDIN_FILENO : open(filename, O_RDONLY); if (fd < 0) { // Check for <file>:<line> @@ -68,9 +62,9 @@ public file_t *load_file(file_t **files, const char *filename) *colon = '\0'; file_t *f = load_file(files, tmp); if (!f) return f; - long line = strtol(colon+1, &colon, 10); - f->start = (char*)get_line(f, (size_t)line); - f->end = (char*)get_line(f, (size_t)line+1); + long line = strtol(colon + 1, &colon, 10); + f->start = (char *)get_line(f, (size_t)line); + f->end = (char *)get_line(f, (size_t)line + 1); return f; } return NULL; @@ -78,13 +72,12 @@ public file_t *load_file(file_t **files, const char *filename) filename = checked_strdup(filename); for (const char *slashes = strstr(filename, "//"); slashes; slashes = strstr(slashes, "//")) - memmove((char*)slashes, slashes+1, strlen(slashes+1)+1); - file_t *f = new(file_t); + memmove((char *)slashes, slashes + 1, strlen(slashes + 1) + 1); + file_t *f = new (file_t); f->filename = filename; struct stat sb; - if (fstat(fd, &sb) == -1) - goto read_file; + if (fstat(fd, &sb) == -1) goto read_file; f->mmapped = mmap(NULL, (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (f->mmapped == MAP_FAILED) { @@ -95,24 +88,21 @@ public file_t *load_file(file_t **files, const char *filename) f->end = &f->mmapped[sb.st_size]; goto finished_loading; - read_file: - { - size_t capacity = 1000, length = 0; - f->allocated = new(char[capacity]); - ssize_t just_read; - while ((just_read=read(fd, &f->allocated[length], (capacity-1) - length)) > 0) { - length += (size_t)just_read; - if (length >= capacity-1) - f->allocated = grow(f->allocated, capacity *= 2); - } - f->allocated[length] = '\0'; - f->start = f->allocated; - f->end = &f->allocated[length]; +read_file: { + size_t capacity = 1000, length = 0; + f->allocated = new (char[capacity]); + ssize_t just_read; + while ((just_read = read(fd, &f->allocated[length], (capacity - 1) - length)) > 0) { + length += (size_t)just_read; + if (length >= capacity - 1) f->allocated = grow(f->allocated, capacity *= 2); } + f->allocated[length] = '\0'; + f->start = f->allocated; + f->end = &f->allocated[length]; +} - finished_loading: - if (fd != STDIN_FILENO) - require(close(fd), "Failed to close file"); +finished_loading: + if (fd != STDIN_FILENO) require(close(fd), "Failed to close file"); populate_lines(f); if (files != NULL) { @@ -125,26 +115,26 @@ public file_t *load_file(file_t **files, const char *filename) // // Set a file struct to represent a region of a different file. // -public void slice_file(file_t *slice, file_t *src, const char *start, const char *end) -{ +public +void slice_file(file_t *slice, file_t *src, const char *start, const char *end) { memset(slice, 0, sizeof(file_t)); slice->filename = src->filename; slice->lines = src->lines; slice->nlines = src->nlines; - slice->start = (char*)start; - slice->end = (char*)end; + slice->start = (char *)start; + slice->end = (char *)end; } // // Create a virtual file from a string. // -public file_t *spoof_file(file_t **files, const char *filename, const char *text, ssize_t _len) -{ +public +file_t *spoof_file(file_t **files, const char *filename, const char *text, ssize_t _len) { if (filename == NULL) filename = ""; - file_t *f = new(file_t); + file_t *f = new (file_t); size_t len = _len == -1 ? strlen(text) : (size_t)_len; f->filename = checked_strdup(filename); - f->allocated = new(char[len+1]); + f->allocated = new (char[len + 1]); memcpy(f->allocated, text, len); f->start = &f->allocated[0]; f->end = &f->allocated[len]; @@ -160,43 +150,36 @@ public file_t *spoof_file(file_t **files, const char *filename, const char *text // Free a file and all memory contained inside its members, then set the input // pointer to NULL. // -public void destroy_file(file_t **at_f) -{ - file_t *f = (file_t*)*at_f; - if (f->filename) - delete(&f->filename); +public +void destroy_file(file_t **at_f) { + file_t *f = (file_t *)*at_f; + if (f->filename) delete (&f->filename); - if (f->lines) - delete(&f->lines); + if (f->lines) delete (&f->lines); - if (f->allocated) - delete(&f->allocated); + if (f->allocated) delete (&f->allocated); if (f->mmapped) { - require(munmap(f->mmapped, (size_t)(f->end - f->mmapped)), - "Failure to un-memory-map some memory"); + require(munmap(f->mmapped, (size_t)(f->end - f->mmapped)), "Failure to un-memory-map some memory"); f->mmapped = NULL; } - delete(at_f); + delete (at_f); } // // Given a pointer, determine which line number it points to. // -public size_t get_line_number(file_t *f, const char *p) -{ +public +size_t get_line_number(file_t *f, const char *p) { if (f->nlines == 0) return 0; // Binary search: - size_t lo = 0, hi = f->nlines-1; + size_t lo = 0, hi = f->nlines - 1; while (lo <= hi) { size_t mid = (lo + hi) / 2; - if (f->lines[mid] == p) - return mid + 1; - else if (f->lines[mid] < p) - lo = mid + 1; - else if (f->lines[mid] > p) - hi = mid - 1; + if (f->lines[mid] == p) return mid + 1; + else if (f->lines[mid] < p) lo = mid + 1; + else if (f->lines[mid] > p) hi = mid - 1; } return lo; // Return the line number whose line starts closest before p } @@ -204,8 +187,8 @@ public size_t get_line_number(file_t *f, const char *p) // // Given a pointer, determine which line column it points to. // -public size_t get_line_column(file_t *f, const char *p) -{ +public +size_t get_line_column(file_t *f, const char *p) { size_t line_no = get_line_number(f, p); return 1 + (size_t)(p - f->lines[line_no]); } @@ -213,8 +196,8 @@ public size_t get_line_column(file_t *f, const char *p) // // Return a pointer to the line with the specified line number. // -public const char *get_line(file_t *f, size_t line_number) -{ +public +const char *get_line(file_t *f, size_t line_number) { if (line_number == 0 || line_number > f->nlines) return NULL; return f->lines[line_number - 1]; } @@ -14,21 +14,14 @@ typedef struct file_s { size_t nlines; } file_t; -__attribute__((nonnull(2))) -file_t *load_file(file_t **files, const char *filename); -__attribute__((format(printf,2,3))) -file_t *load_filef(file_t **files, const char *fmt, ...); -__attribute__((nonnull)) -void slice_file(file_t *slice, file_t *src, const char *start, const char *end); -__attribute__((nonnull(3), returns_nonnull)) -file_t *spoof_file(file_t **files, const char *filename, const char *text, ssize_t len); -__attribute__((nonnull)) -void destroy_file(file_t **f); -__attribute__((pure, nonnull)) -size_t get_line_number(file_t *f, const char *p); -__attribute__((pure, nonnull)) -size_t get_line_column(file_t *f, const char *p); -__attribute__((pure, nonnull)) -const char *get_line(file_t *f, size_t line_number); +__attribute__((nonnull(2))) file_t *load_file(file_t **files, const char *filename); +__attribute__((format(printf, 2, 3))) file_t *load_filef(file_t **files, const char *fmt, ...); +__attribute__((nonnull)) void slice_file(file_t *slice, file_t *src, const char *start, const char *end); +__attribute__((nonnull(3), returns_nonnull)) file_t *spoof_file(file_t **files, const char *filename, const char *text, + ssize_t len); +__attribute__((nonnull)) void destroy_file(file_t **f); +__attribute__((pure, nonnull)) size_t get_line_number(file_t *f, const char *p); +__attribute__((pure, nonnull)) size_t get_line_column(file_t *f, const char *p); +__attribute__((pure, nonnull)) const char *get_line(file_t *f, size_t line_number); // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 @@ -13,10 +13,10 @@ #include "match.h" #include "pattern.h" -#include "utils.h" #include "utf8.h" +#include "utils.h" -#define MAX_CACHE_SIZE (1<<14) +#define MAX_CACHE_SIZE (1 << 14) // Cache entries for results of matching a pattern at a string position typedef struct cache_entry_s { @@ -51,31 +51,27 @@ typedef struct match_ctx_s { static bp_match_t *unused_matches = NULL; static bp_match_t *in_use_matches = NULL; -static void default_error_handler(char **msg) { - errx(EXIT_FAILURE, "%s", *msg); -} +static void default_error_handler(char **msg) { errx(EXIT_FAILURE, "%s", *msg); } static bp_errhand_t error_handler = default_error_handler; -public bp_errhand_t bp_set_error_handler(bp_errhand_t new_handler) -{ +public +bp_errhand_t bp_set_error_handler(bp_errhand_t new_handler) { bp_errhand_t old_handler = error_handler; error_handler = new_handler; return old_handler; } -#define MATCHES(...) (bp_match_t*[]){__VA_ARGS__, NULL} +#define MATCHES(...) \ + (bp_match_t *[]) { __VA_ARGS__, NULL } -__attribute__((hot, nonnull(1,2,3))) -static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat); -__attribute__((returns_nonnull)) -static bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end, bp_match_t *children[]); +__attribute__((hot, nonnull(1, 2, 3))) static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat); +__attribute__((returns_nonnull)) static bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end, + bp_match_t *children[]); char *error_message = NULL; -__attribute__((format(printf,2,3))) -static inline void match_error(match_ctx_t *ctx, const char *fmt, ...) -{ +__attribute__((format(printf, 2, 3))) static inline void match_error(match_ctx_t *ctx, const char *fmt, ...) { va_list args; va_start(args, fmt); if (error_message) free(error_message); @@ -84,8 +80,7 @@ static inline void match_error(match_ctx_t *ctx, const char *fmt, ...) longjmp(ctx->error_jump, 1); } -static bp_match_t *clone_match(bp_match_t *m) -{ +static bp_match_t *clone_match(bp_match_t *m) { if (!m) return NULL; bp_match_t *ret = new_match(m->pat, m->start, m->end, NULL); if (m->children) { @@ -95,9 +90,10 @@ static bp_match_t *clone_match(bp_match_t *m) ret->children = ret->_children; } for (int i = 0; m->children[i]; i++) { - if (nchildren+1 >= child_cap) { + if (nchildren + 1 >= child_cap) { ret->children = grow(ret->children, child_cap += 5); - for (size_t j = nchildren; j < child_cap; j++) ret->children[j] = NULL; + for (size_t j = nchildren; j < child_cap; j++) + ret->children[j] = NULL; } ret->children[nchildren++] = clone_match(m->children[i]); } @@ -106,10 +102,8 @@ static bp_match_t *clone_match(bp_match_t *m) } // Prepend to a doubly linked list -static inline void gc_list_prepend(bp_match_t **head, bp_match_t *m) -{ - if (m->gc.home) - errx(1, "Node already has a home"); +static inline void gc_list_prepend(bp_match_t **head, bp_match_t *m) { + if (m->gc.home) errx(1, "Node already has a home"); m->gc.home = head; m->gc.next = *head; if (*head) (*head)->gc.home = &m->gc.next; @@ -117,10 +111,8 @@ static inline void gc_list_prepend(bp_match_t **head, bp_match_t *m) } // Remove from a doubly linked list -static inline void gc_list_remove(bp_match_t *m) -{ - if (!m->gc.home) - errx(1, "Attempt to remove something that isn't in a list"); +static inline void gc_list_remove(bp_match_t *m) { + if (!m->gc.home) errx(1, "Attempt to remove something that isn't in a list"); *m->gc.home = m->gc.next; if (m->gc.next) m->gc.next->gc.home = m->gc.home; m->gc.home = NULL; @@ -130,20 +122,16 @@ static inline void gc_list_remove(bp_match_t *m) // // Hash a string position/pattern. // -static inline size_t hash(const char *str, size_t pat_id) -{ - return (size_t)str + 2*pat_id; -} +static inline size_t hash(const char *str, size_t pat_id) { return (size_t)str + 2 * pat_id; } // // Check if we have cached a failure to match a given pattern at the given position. // -static bool has_cached_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat) -{ +static bool has_cached_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat) { if (!ctx->cache->fails) return false; - for (cache_entry_t *fail = &ctx->cache->fails[hash(str, pat->id) & (ctx->cache->size-1)]; fail; fail = fail->next_probe) { - if (fail->pat == pat && fail->start == str) - return true; + for (cache_entry_t *fail = &ctx->cache->fails[hash(str, pat->id) & (ctx->cache->size - 1)]; fail; + fail = fail->next_probe) { + if (fail->pat == pat && fail->start == str) return true; } return false; } @@ -151,9 +139,8 @@ static bool has_cached_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat) // // Insert into the hash table using a chained scatter table approach. // -static void _hash_insert(cache_t *cache, const char *str, bp_pat_t *pat) -{ - size_t h = hash(str, pat->id) & (cache->size-1); +static void _hash_insert(cache_t *cache, const char *str, bp_pat_t *pat) { + size_t h = hash(str, pat->id) & (cache->size - 1); if (cache->fails[h].pat == NULL) { // No collision cache->fails[h].pat = pat; cache->fails[h].start = str; @@ -162,14 +149,14 @@ static void _hash_insert(cache_t *cache, const char *str, bp_pat_t *pat) return; } - if (cache->fails[h].pat == pat && cache->fails[h].start == str) - return; // Duplicate entry, just leave it be + if (cache->fails[h].pat == pat && cache->fails[h].start == str) return; // Duplicate entry, just leave it be // Shuffle the colliding entry along to a free space: - while (cache->fails[cache->next_free].pat) ++cache->next_free; + while (cache->fails[cache->next_free].pat) + ++cache->next_free; cache_entry_t *free_slot = &cache->fails[cache->next_free]; *free_slot = cache->fails[h]; - size_t h_orig = hash(free_slot->start, free_slot->pat->id) & (cache->size-1); + size_t h_orig = hash(free_slot->start, free_slot->pat->id) & (cache->size - 1); // Put the new entry in its desired slot cache->fails[h].pat = pat; @@ -179,7 +166,8 @@ static void _hash_insert(cache_t *cache, const char *str, bp_pat_t *pat) if (h_orig != h) { // Maintain the chain that points to the colliding entry cache_entry_t *prev = &cache->fails[h_orig]; // Start of the chain - while (prev->next_probe != &cache->fails[h]) prev = prev->next_probe; + while (prev->next_probe != &cache->fails[h]) + prev = prev->next_probe; prev->next_probe = free_slot; } } @@ -187,23 +175,21 @@ static void _hash_insert(cache_t *cache, const char *str, bp_pat_t *pat) // // Save a match in the cache. // -static void cache_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat) -{ +static void cache_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat) { cache_t *cache = ctx->cache; // Grow the hash if needed (>99% utilization): - if (cache->occupancy+1 > (cache->size*99)/100) { + if (cache->occupancy + 1 > (cache->size * 99) / 100) { cache_entry_t *old_fails = cache->fails; size_t old_size = cache->size; - cache->size = old_size == 0 ? 16 : 2*old_size; - cache->fails = new(cache_entry_t[cache->size]); + cache->size = old_size == 0 ? 16 : 2 * old_size; + cache->fails = new (cache_entry_t[cache->size]); cache->next_free = 0; // Rehash: for (size_t i = 0; i < old_size; i++) { - if (old_fails[i].pat) - _hash_insert(cache, old_fails[i].start, old_fails[i].pat); + if (old_fails[i].pat) _hash_insert(cache, old_fails[i].start, old_fails[i].pat); } - if (old_fails) delete(&old_fails); + if (old_fails) delete (&old_fails); } _hash_insert(cache, str, pat); @@ -212,18 +198,16 @@ static void cache_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat) // // Clear and deallocate the cache. // -void cache_destroy(match_ctx_t *ctx) -{ +void cache_destroy(match_ctx_t *ctx) { cache_t *cache = ctx->cache; - if (cache->fails) delete(&cache->fails); + if (cache->fails) delete (&cache->fails); memset(cache, 0, sizeof(cache_t)); } // // Look up a pattern definition by name from a definition pattern. // -static bp_pat_t *_lookup_def(match_ctx_t *ctx, bp_pat_t *defs, const char *name, size_t namelen) -{ +static bp_pat_t *_lookup_def(match_ctx_t *ctx, bp_pat_t *defs, const char *name, size_t namelen) { while (defs != NULL) { if (defs->type == BP_CHAIN) { auto chain = When(defs, BP_CHAIN); @@ -232,8 +216,7 @@ static bp_pat_t *_lookup_def(match_ctx_t *ctx, bp_pat_t *defs, const char *name, defs = chain->first; } else if (defs->type == BP_DEFINITIONS) { auto def = When(defs, BP_DEFINITIONS); - if (namelen == def->namelen && strncmp(def->name, name, namelen) == 0) - return def->meaning; + if (namelen == def->namelen && strncmp(def->name, name, namelen) == 0) return def->meaning; defs = def->next_def; } else { match_error(ctx, "Invalid pattern type in definitions"); @@ -246,9 +229,7 @@ static bp_pat_t *_lookup_def(match_ctx_t *ctx, bp_pat_t *defs, const char *name, // // Look up a pattern definition by name from a context. // -__attribute__((nonnull(2))) -bp_pat_t *lookup_ctx(match_ctx_t *ctx, const char *name, size_t namelen) -{ +__attribute__((nonnull(2))) bp_pat_t *lookup_ctx(match_ctx_t *ctx, const char *name, size_t namelen) { for (; ctx; ctx = ctx->parent_ctx) { bp_pat_t *def = _lookup_def(ctx, ctx->defs, name, namelen); if (def) return def; @@ -260,9 +241,7 @@ bp_pat_t *lookup_ctx(match_ctx_t *ctx, const char *name, size_t namelen) // If the given pattern is a reference, look it up and return the referenced // pattern. This is used for an optimization to avoid repeated lookups. // -__attribute__((nonnull(1))) -static inline bp_pat_t *deref(match_ctx_t *ctx, bp_pat_t *pat) -{ +__attribute__((nonnull(1))) static inline bp_pat_t *deref(match_ctx_t *ctx, bp_pat_t *pat) { if (pat && pat->type == BP_REF) { auto ref = When(pat, BP_REF); bp_pat_t *def = lookup_ctx(ctx, ref->name, ref->len); @@ -276,33 +255,26 @@ static inline bp_pat_t *deref(match_ctx_t *ctx, bp_pat_t *pat) // match for the whole pattern to match (if any). Ideally, this would be a // string literal that can be quickly scanned for. // -static bp_pat_t *get_prerequisite(match_ctx_t *ctx, bp_pat_t *pat) -{ +static bp_pat_t *get_prerequisite(match_ctx_t *ctx, bp_pat_t *pat) { int derefs = 0; - for (bp_pat_t *p = pat; p; ) { + for (bp_pat_t *p = pat; p;) { switch (p->type) { - case BP_BEFORE: - p = When(p, BP_BEFORE)->pat; break; + case BP_BEFORE: p = When(p, BP_BEFORE)->pat; break; case BP_REPEAT: - if (When(p, BP_REPEAT)->min == 0) - return p; - p = When(p, BP_REPEAT)->repeat_pat; break; - case BP_CAPTURE: - p = When(p, BP_CAPTURE)->pat; break; - case BP_TAGGED: - p = When(p, BP_TAGGED)->pat; break; + if (When(p, BP_REPEAT)->min == 0) return p; + p = When(p, BP_REPEAT)->repeat_pat; + break; + case BP_CAPTURE: p = When(p, BP_CAPTURE)->pat; break; + case BP_TAGGED: p = When(p, BP_TAGGED)->pat; break; case BP_CHAIN: { auto chain = When(p, BP_CHAIN); // If pattern is something like (|"foo"|), then use "foo" as the first thing to scan for p = chain->first->max_matchlen == 0 ? chain->second : chain->first; break; } - case BP_MATCH: - p = When(p, BP_MATCH)->pat; break; - case BP_NOT_MATCH: - p = When(p, BP_NOT_MATCH)->pat; break; - case BP_REPLACE: - p = When(p, BP_REPLACE)->pat; break; + case BP_MATCH: p = When(p, BP_MATCH)->pat; break; + case BP_NOT_MATCH: p = When(p, BP_NOT_MATCH)->pat; break; + case BP_REPLACE: p = When(p, BP_REPLACE)->pat; break; case BP_REF: { if (++derefs > 10) return p; // In case of left recursion bp_pat_t *p2 = deref(ctx, p); @@ -319,31 +291,28 @@ static bp_pat_t *get_prerequisite(match_ctx_t *ctx, bp_pat_t *pat) // // Find the next match after prev (or the first match if prev is NULL) // -__attribute__((nonnull(1,2,3))) -static bp_match_t *_next_match(match_ctx_t *ctx, const char *str, bp_pat_t *pat, bp_pat_t *skip) -{ +__attribute__((nonnull(1, 2, 3))) static bp_match_t *_next_match(match_ctx_t *ctx, const char *str, bp_pat_t *pat, + bp_pat_t *skip) { // Clear the cache so it's not full of old cache values from different parts of the file: cache_destroy(ctx); bp_pat_t *first = get_prerequisite(ctx, pat); // Don't bother looping if this can only match at the start/end: - if (first->type == BP_START_OF_FILE) - return match(ctx, str, pat); - else if (first->type == BP_END_OF_FILE) - return match(ctx, ctx->end, pat); + if (first->type == BP_START_OF_FILE) return match(ctx, str, pat); + else if (first->type == BP_END_OF_FILE) return match(ctx, ctx->end, pat); // Performance optimization: if the pattern starts with a string literal, // we can just rely on the highly optimized memmem() implementation to skip // past areas where we know we won't find a match. if (!skip && first->type == BP_STRING && first->min_matchlen > 0) { - char *found = ctx->ignorecase ? - strcasestr(str, When(first, BP_STRING)->string) - : memmem(str, (size_t)(ctx->end - str), When(first, BP_STRING)->string, first->min_matchlen); + char *found = ctx->ignorecase + ? strcasestr(str, When(first, BP_STRING)->string) + : memmem(str, (size_t)(ctx->end - str), When(first, BP_STRING)->string, first->min_matchlen); str = found ? found : ctx->end; } else if (!skip && str > ctx->start && (first->type == BP_START_OF_LINE || first->type == BP_END_OF_LINE)) { char *found = memchr(str, '\n', (size_t)(ctx->end - str)); - str = found ? (first->type == BP_START_OF_LINE ? found+1 : found) : ctx->end; + str = found ? (first->type == BP_START_OF_LINE ? found + 1 : found) : ctx->end; } do { @@ -363,8 +332,7 @@ static bp_match_t *_next_match(match_ctx_t *ctx, const char *str, bp_pat_t *pat, // match object, or NULL if no match is found. // The returned value should be free()'d to avoid memory leaking. // -static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) -{ +static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) { switch (pat->type) { case BP_DEFINITIONS: { match_ctx_t ctx2 = *ctx; @@ -393,10 +361,12 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) return (str < ctx->end && *str != '\n') ? new_match(pat, str, next_char(str, ctx->end), NULL) : NULL; } case BP_ID_START: { - return (str < ctx->end && isidstart(str, ctx->end)) ? new_match(pat, str, next_char(str, ctx->end), NULL) : NULL; + return (str < ctx->end && isidstart(str, ctx->end)) ? new_match(pat, str, next_char(str, ctx->end), NULL) + : NULL; } case BP_ID_CONTINUE: { - return (str < ctx->end && isidcontinue(str, ctx->end)) ? new_match(pat, str, next_char(str, ctx->end), NULL) : NULL; + return (str < ctx->end && isidcontinue(str, ctx->end)) ? new_match(pat, str, next_char(str, ctx->end), NULL) + : NULL; } case BP_START_OF_FILE: { return (str == ctx->start) ? new_match(pat, str, str, NULL) : NULL; @@ -405,27 +375,28 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) return (str == ctx->start || str[-1] == '\n') ? new_match(pat, str, str, NULL) : NULL; } case BP_END_OF_FILE: { - return (str == ctx->end || (str == ctx->end-1 && *str == '\n')) ? new_match(pat, str, str, NULL) : NULL; + return (str == ctx->end || (str == ctx->end - 1 && *str == '\n')) ? new_match(pat, str, str, NULL) : NULL; } case BP_END_OF_LINE: { return (str == ctx->end || *str == '\n') ? new_match(pat, str, str, NULL) : NULL; } case BP_WORD_BOUNDARY: { - return (str == ctx->start || isidcontinue(str, ctx->end) != isidcontinue(prev_char(ctx->start, str), ctx->end)) ? - new_match(pat, str, str, NULL) : NULL; + return (str == ctx->start || isidcontinue(str, ctx->end) != isidcontinue(prev_char(ctx->start, str), ctx->end)) + ? new_match(pat, str, str, NULL) + : NULL; } case BP_STRING: { if (&str[pat->min_matchlen] > ctx->end) return NULL; - if (pat->min_matchlen > 0 && (ctx->ignorecase ? strncasecmp : strncmp)(str, When(pat, BP_STRING)->string, pat->min_matchlen) != 0) + if (pat->min_matchlen > 0 + && (ctx->ignorecase ? strncasecmp : strncmp)(str, When(pat, BP_STRING)->string, pat->min_matchlen) != 0) return NULL; return new_match(pat, str, str + pat->min_matchlen, NULL); } case BP_RANGE: { if (str >= ctx->end) return NULL; auto range = When(pat, BP_RANGE); - if ((unsigned char)*str < range->low || (unsigned char)*str > range->high) - return NULL; - return new_match(pat, str, str+1, NULL); + if ((unsigned char)*str < range->low || (unsigned char)*str > range->high) return NULL; + return new_match(pat, str, str + 1, NULL); } case BP_NOT: { bp_match_t *m = match(ctx, str, When(pat, BP_NOT)->pat); @@ -435,18 +406,21 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) } return new_match(pat, str, str, NULL); } - case BP_UPTO: case BP_UPTO_STRICT: { + case BP_UPTO: + case BP_UPTO_STRICT: { bp_match_t *m = new_match(pat, str, str, NULL); - bp_pat_t *target = deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->target : When(pat, BP_UPTO_STRICT)->target), - *skip = deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->skip : When(pat, BP_UPTO_STRICT)->skip); + bp_pat_t *target = + deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->target : When(pat, BP_UPTO_STRICT)->target), + *skip = deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->skip : When(pat, BP_UPTO_STRICT)->skip); if (!target && !skip) { - while (str < ctx->end && *str != '\n') ++str; + while (str < ctx->end && *str != '\n') + ++str; m->end = str; return m; } size_t child_cap = 0, nchildren = 0; - for (const char *prev = NULL; prev < str; ) { + for (const char *prev = NULL; prev < str;) { prev = str; if (target) { bp_match_t *p = match(ctx, str, target); @@ -463,9 +437,10 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) bp_match_t *s = match(ctx, str, skip); if (s != NULL) { str = s->end; - if (nchildren+2 >= child_cap) { + if (nchildren + 2 >= child_cap) { m->children = grow(m->children, child_cap += 5); - for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL; + for (size_t i = nchildren; i < child_cap; i++) + m->children[i] = NULL; } m->children[nchildren++] = s; continue; @@ -474,8 +449,7 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) // This isn't in the for() structure because there needs to // be at least once chance to match the pattern, even if // we're at the end of the string already (e.g. "..$"). - if (str < ctx->end && *str != '\n' && pat->type != BP_UPTO_STRICT) - str = next_char(str, ctx->end); + if (str < ctx->end && *str != '\n' && pat->type != BP_UPTO_STRICT) str = next_char(str, ctx->end); } recycle_match(&m); return NULL; @@ -511,23 +485,23 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) // of looping infinitely. if (msep) recycle_match(&msep); recycle_match(&mp); - if (repeat->max == -1) - reps = ~(size_t)0; - else - reps = (size_t)repeat->max; + if (repeat->max == -1) reps = ~(size_t)0; + else reps = (size_t)repeat->max; break; } if (msep) { - if (nchildren+2 >= child_cap) { + if (nchildren + 2 >= child_cap) { m->children = grow(m->children, child_cap += 5); - for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL; + for (size_t i = nchildren; i < child_cap; i++) + m->children[i] = NULL; } m->children[nchildren++] = msep; } - if (nchildren+2 >= child_cap) { + if (nchildren + 2 >= child_cap) { m->children = grow(m->children, child_cap += 5); - for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL; + for (size_t i = nchildren; i < child_cap; i++) + m->children[i] = NULL; } m->children[nchildren++] = mp; str = mp->end; @@ -556,11 +530,10 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) pos >= ctx->start && (back->max_matchlen == -1 || pos >= &str[-(int)back->max_matchlen]); pos = prev_char(ctx->start, pos)) { cache_destroy(&slice_ctx); - slice_ctx.start = (char*)pos; + slice_ctx.start = (char *)pos; bp_match_t *m = match(&slice_ctx, pos, back); // Match should not go past str (i.e. (<"AB" "B") should match "ABB", but not "AB") - if (m && m->end != str) - recycle_match(&m); + if (m && m->end != str) recycle_match(&m); else if (m) { cache_destroy(&slice_ctx); return new_match(pat, str, str, MATCHES(m)); @@ -577,10 +550,10 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) bp_match_t *after = match(ctx, str, When(pat, BP_BEFORE)->pat); return after ? new_match(pat, str, str, MATCHES(after)) : NULL; } - case BP_CAPTURE: case BP_TAGGED: { + case BP_CAPTURE: + case BP_TAGGED: { bp_pat_t *to_match = pat->type == BP_CAPTURE ? When(pat, BP_CAPTURE)->pat : When(pat, BP_TAGGED)->pat; - if (!to_match) - return new_match(pat, str, str, NULL); + if (!to_match) return new_match(pat, str, str, NULL); bp_match_t *p = match(ctx, str, to_match); return p ? new_match(pat, str, p->end, MATCHES(p)) : NULL; } @@ -611,7 +584,8 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) bp_pat_t *backref; if (m1->children && m1->children[0]->pat->type == BP_CURDENT) { const char *linestart = m1->start; - while (linestart > ctx->start && linestart[-1] != '\n') --linestart; + while (linestart > ctx->start && linestart[-1] != '\n') + --linestart; // Current indentation: char denter = *linestart; @@ -629,12 +603,14 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) ctx2.parent_ctx = ctx; ctx2.defs = &(bp_pat_t){ .type = BP_DEFINITIONS, - .start = m1->pat->start, .end = m1->pat->end, - .__tagged.BP_DEFINITIONS = { - .name = When(m1->pat, BP_CAPTURE)->name, - .namelen = When(m1->pat, BP_CAPTURE)->namelen, - .meaning = backref, - }, + .start = m1->pat->start, + .end = m1->pat->end, + .__tagged.BP_DEFINITIONS = + { + .name = When(m1->pat, BP_CAPTURE)->name, + .namelen = When(m1->pat, BP_CAPTURE)->namelen, + .meaning = backref, + }, }; m2 = match(&ctx2, m1->end, chain->second); if (!m2) // No need to keep the backref in memory if it didn't match @@ -651,7 +627,8 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) return new_match(pat, str, m2->end, MATCHES(m1, m2)); } - case BP_MATCH: case BP_NOT_MATCH: { + case BP_MATCH: + case BP_NOT_MATCH: { bp_pat_t *target = pat->type == BP_MATCH ? When(pat, BP_MATCH)->pat : When(pat, BP_NOT_MATCH)->pat; bp_match_t *m1 = match(ctx, str, target); if (m1 == NULL) return NULL; @@ -687,8 +664,7 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) return new_match(pat, str, p ? p->end : str, MATCHES(p)); } case BP_REF: { - if (has_cached_failure(ctx, str, pat)) - return NULL; + if (has_cached_failure(ctx, str, pat)) return NULL; auto ref_pat = When(pat, BP_REF); bp_pat_t *ref = lookup_ctx(ctx, ref_pat->name, ref_pat->len); @@ -697,27 +673,31 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) return NULL; } - if (ref->type == BP_LEFTRECURSION) - return match(ctx, str, ref); + if (ref->type == BP_LEFTRECURSION) return match(ctx, str, ref); bp_pat_t rec_op = { .type = BP_LEFTRECURSION, - .start = ref->start, .end = ref->end, - .min_matchlen = 0, .max_matchlen = -1, - .__tagged.BP_LEFTRECURSION = { - .match = NULL, - .visited = false, - .at = str, - .fallback = pat, - .ctx = (void*)ctx, - }, + .start = ref->start, + .end = ref->end, + .min_matchlen = 0, + .max_matchlen = -1, + .__tagged.BP_LEFTRECURSION = + { + .match = NULL, + .visited = false, + .at = str, + .fallback = pat, + .ctx = (void *)ctx, + }, }; match_ctx_t ctx2 = *ctx; ctx2.parent_ctx = ctx; ctx2.defs = &(bp_pat_t){ .type = BP_DEFINITIONS, - .start = pat->start, .end = pat->end, - .__tagged.BP_DEFINITIONS = { + .start = pat->start, + .end = pat->end, + .__tagged.BP_DEFINITIONS = + { .name = ref_pat->name, .namelen = ref_pat->len, .meaning = &rec_op, @@ -758,17 +738,20 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) const char *start = str; const char *p = str; - while (p > ctx->start && p[-1] != '\n') --p; + while (p > ctx->start && p[-1] != '\n') + --p; // Current indentation: char denter = *p; int dents = 0; if (denter == ' ' || denter == '\t') { - for (; *p == denter && p < ctx->end; ++p) ++dents; + for (; *p == denter && p < ctx->end; ++p) + ++dents; } // Subsequent indentation: - while (*str == '\n' || *str == '\n') ++str; + while (*str == '\n' || *str == '\n') + ++str; for (int i = 0; i < dents; i++) if (&str[i] >= ctx->end || str[i] != denter) return NULL; @@ -787,15 +770,14 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) // // Return a match object which can be used (may be allocated or recycled). // -bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end, bp_match_t *children[]) -{ +bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end, bp_match_t *children[]) { bp_match_t *m; if (unused_matches) { m = unused_matches; gc_list_remove(m); memset(m, 0, sizeof(bp_match_t)); } else { - m = new(bp_match_t); + m = new (bp_match_t); } // Keep track of the object: gc_list_prepend(&in_use_matches, m); @@ -816,14 +798,13 @@ bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end, bp_matc // If the given match is not currently a child member of another match (or // otherwise reserved) then put it back in the pool of unused match objects. // -public void recycle_match(bp_match_t **at_m) -{ +public +void recycle_match(bp_match_t **at_m) { bp_match_t *m = *at_m; if (m->children) { for (int i = 0; m->children[i]; i++) recycle_match(&m->children[i]); - if (m->children != m->_children) - delete(&m->children); + if (m->children != m->_children) delete (&m->children); } gc_list_remove(m); @@ -835,13 +816,12 @@ public void recycle_match(bp_match_t **at_m) // // Force all match objects into the pool of unused match objects. // -public size_t recycle_all_matches(void) -{ +public +size_t recycle_all_matches(void) { size_t count = 0; for (bp_match_t *m; (m = in_use_matches); ++count) { gc_list_remove(m); - if (m->children && m->children != m->_children) - delete(&m->children); + if (m->children && m->children != m->_children) delete (&m->children); gc_list_prepend(&unused_matches, m); } return count; @@ -850,13 +830,13 @@ public size_t recycle_all_matches(void) // // Free all match objects in memory. // -public size_t free_all_matches(void) -{ +public +size_t free_all_matches(void) { size_t count = 0; recycle_all_matches(); for (bp_match_t *m; (m = unused_matches); ++count) { gc_list_remove(m); - delete(&m); + delete (&m); } return count; } @@ -865,12 +845,13 @@ public size_t free_all_matches(void) // Iterate over matches. // Usage: for (bp_match_t *m = NULL; next_match(&m, ...); ) {...} // -public bool next_match(bp_match_t **m, const char *start, const char *end, bp_pat_t *pat, bp_pat_t *defs, bp_pat_t *skip, bool ignorecase) -{ +public +bool next_match(bp_match_t **m, const char *start, const char *end, bp_pat_t *pat, bp_pat_t *defs, bp_pat_t *skip, + bool ignorecase) { const char *pos; if (*m) { // Make sure forward progress is occurring, even after zero-width matches: - pos = ((*m)->end > (*m)->start) ? (*m)->end : (*m)->end+1; + pos = ((*m)->end > (*m)->start) ? (*m)->end : (*m)->end + 1; recycle_match(m); } else { pos = start; @@ -895,8 +876,7 @@ public bool next_match(bp_match_t **m, const char *start, const char *end, bp_pa recycle_all_matches(); cache_destroy(&ctx); *m = NULL; - if (error_handler) - error_handler(&error_message); + if (error_handler) error_handler(&error_message); if (error_message) { free(error_message); @@ -909,9 +889,7 @@ public bool next_match(bp_match_t **m, const char *start, const char *end, bp_pa // // Helper function to track state while doing a depth-first search. // -__attribute__((nonnull)) -static bp_match_t *_get_numbered_capture(bp_match_t *m, int *n) -{ +__attribute__((nonnull)) static bp_match_t *_get_numbered_capture(bp_match_t *m, int *n) { if ((m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->namelen == 0) || m->pat->type == BP_TAGGED) { if (*n == 1) { return m; @@ -921,8 +899,7 @@ static bp_match_t *_get_numbered_capture(bp_match_t *m, int *n) } } - if (m->pat->type == BP_CAPTURE || m->pat->type == BP_TAGGED) - return NULL; + if (m->pat->type == BP_CAPTURE || m->pat->type == BP_TAGGED) return NULL; if (m->children) { for (int i = 0; m->children[i]; i++) { @@ -936,8 +913,8 @@ static bp_match_t *_get_numbered_capture(bp_match_t *m, int *n) // // Get a specific numbered pattern capture. // -public bp_match_t *get_numbered_capture(bp_match_t *m, int n) -{ +public +bp_match_t *get_numbered_capture(bp_match_t *m, int n) { if (n <= 0) return m; if (m->pat->type == BP_TAGGED || m->pat->type == BP_CAPTURE) { if (n == 1 && m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->namelen == 0) return m; @@ -956,15 +933,12 @@ public bp_match_t *get_numbered_capture(bp_match_t *m, int n) // // Helper function for get_named_capture() // -bp_match_t *_get_named_capture(bp_match_t *m, const char *name, size_t namelen) -{ - if (m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->name - && When(m->pat, BP_CAPTURE)->namelen == namelen +bp_match_t *_get_named_capture(bp_match_t *m, const char *name, size_t namelen) { + if (m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->name && When(m->pat, BP_CAPTURE)->namelen == namelen && strncmp(When(m->pat, BP_CAPTURE)->name, name, When(m->pat, BP_CAPTURE)->namelen) == 0) return m; - if (m->pat->type == BP_TAGGED || m->pat->type == BP_CAPTURE) - return NULL; + if (m->pat->type == BP_TAGGED || m->pat->type == BP_CAPTURE) return NULL; if (m->children) { for (int i = 0; m->children[i]; i++) { @@ -978,10 +952,10 @@ bp_match_t *_get_named_capture(bp_match_t *m, const char *name, size_t namelen) // // Get a capture with a specific name. // -public bp_match_t *get_named_capture(bp_match_t *m, const char *name, ssize_t _namelen) -{ +public +bp_match_t *get_named_capture(bp_match_t *m, const char *name, ssize_t _namelen) { size_t namelen = _namelen < 0 ? strlen(name) : (size_t)_namelen; - if (m->pat->type == BP_TAGGED) {// || (m->pat->type == BP_CAPTURE && m->pat->args.capture.namelen > 0)) { + if (m->pat->type == BP_TAGGED) { // || (m->pat->type == BP_CAPTURE && m->pat->args.capture.namelen > 0)) { if (m->children) { for (int i = 0; m->children[i]; i++) { bp_match_t *cap = _get_named_capture(m->children[i], name, namelen); @@ -27,16 +27,14 @@ struct bp_match_s { typedef void (*bp_errhand_t)(char **err_msg); -__attribute__((nonnull)) -void recycle_match(bp_match_t **at_m); +__attribute__((nonnull)) void recycle_match(bp_match_t **at_m); size_t free_all_matches(void); size_t recycle_all_matches(void); -bool next_match(bp_match_t **m, const char *start, const char *end, bp_pat_t *pat, bp_pat_t *defs, bp_pat_t *skip, bool ignorecase); +bool next_match(bp_match_t **m, const char *start, const char *end, bp_pat_t *pat, bp_pat_t *defs, bp_pat_t *skip, + bool ignorecase); #define stop_matching(m) next_match(m, NULL, NULL, NULL, NULL, NULL, 0) bp_errhand_t bp_set_error_handler(bp_errhand_t handler); -__attribute__((nonnull)) -bp_match_t *get_numbered_capture(bp_match_t *m, int n); -__attribute__((nonnull, pure)) -bp_match_t *get_named_capture(bp_match_t *m, const char *name, ssize_t namelen); +__attribute__((nonnull)) bp_match_t *get_numbered_capture(bp_match_t *m, int n); +__attribute__((nonnull, pure)) bp_match_t *get_named_capture(bp_match_t *m, const char *name, ssize_t namelen); // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 @@ -5,25 +5,28 @@ #include <err.h> #include <setjmp.h> #include <stdbool.h> -#include <stdlib.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> #include <unistd.h> #include "pattern.h" -#include "utils.h" #include "utf8.h" +#include "utils.h" -#define Pattern(_tag, _start, _end, _min, _max, ...) allocate_pat((bp_pat_t){.type=_tag, .start=_start, .end=_end, \ - .min_matchlen=_min, .max_matchlen=_max, .__tagged._tag={__VA_ARGS__}}) +#define Pattern(_tag, _start, _end, _min, _max, ...) \ + allocate_pat((bp_pat_t){.type = _tag, \ + .start = _start, \ + .end = _end, \ + .min_matchlen = _min, \ + .max_matchlen = _max, \ + .__tagged._tag = {__VA_ARGS__}}) #define UNBOUNDED(pat) ((pat)->max_matchlen == -1) static bp_pat_t *allocated_pats = NULL; -__attribute__((nonnull)) -static bp_pat_t *bp_pattern_nl(const char *str, const char *end, bool allow_nl); -__attribute__((nonnull)) -static bp_pat_t *bp_simplepattern(const char *str, const char *end); +__attribute__((nonnull)) static bp_pat_t *bp_pattern_nl(const char *str, const char *end, bool allow_nl); +__attribute__((nonnull)) static bp_pat_t *bp_simplepattern(const char *str, const char *end); // For error-handling purposes, use setjmp/longjmp to break out of deeply // recursive function calls when a parse error occurs. @@ -31,14 +34,18 @@ bool is_in_try_catch = false; static jmp_buf err_jmp; static maybe_pat_t parse_error = {.success = false}; -#define __TRY_PATTERN__ bool was_in_try_catch = is_in_try_catch; \ - if (!is_in_try_catch) { is_in_try_catch = true; if (setjmp(err_jmp)) return parse_error; } -#define __END_TRY_PATTERN__ if (!was_in_try_catch) is_in_try_catch = false; +#define __TRY_PATTERN__ \ + bool was_in_try_catch = is_in_try_catch; \ + if (!is_in_try_catch) { \ + is_in_try_catch = true; \ + if (setjmp(err_jmp)) return parse_error; \ + } +#define __END_TRY_PATTERN__ \ + if (!was_in_try_catch) is_in_try_catch = false; -static inline void parse_err(const char *start, const char *end, const char *msg) -{ +static inline void parse_err(const char *start, const char *end, const char *msg) { if (!is_in_try_catch) { - fprintf(stderr, "Parse error: %s\n%.*s\n", msg, (int)(end-start), start); + fprintf(stderr, "Parse error: %s\n%.*s\n", msg, (int)(end - start), start); exit(1); } parse_error.value.error.start = start; @@ -51,10 +58,10 @@ static inline void parse_err(const char *start, const char *end, const char *msg // Allocate a new pattern for this file (ensuring it will be automatically // freed when the file is freed) // -public bp_pat_t *allocate_pat(bp_pat_t pat) -{ +public +bp_pat_t *allocate_pat(bp_pat_t pat) { static size_t next_pat_id = 1; - bp_pat_t *allocated = new(bp_pat_t); + bp_pat_t *allocated = new (bp_pat_t); *allocated = pat; allocated->home = &allocated_pats; allocated->next = allocated_pats; @@ -67,23 +74,21 @@ public bp_pat_t *allocate_pat(bp_pat_t pat) // // Helper function to initialize a range object. // -__attribute__((nonnull(1,2,5))) -static bp_pat_t *new_range(const char *start, const char *end, size_t min, ssize_t max, bp_pat_t *repeating, bp_pat_t *sep) -{ - size_t minlen = min*repeating->min_matchlen + (min > 0 ? min-1 : 0)*(sep ? sep->min_matchlen : 0); - ssize_t maxlen = (max == -1 || UNBOUNDED(repeating) || (max != 0 && max != 1 && sep && UNBOUNDED(sep))) ? (ssize_t)-1 - : max*repeating->max_matchlen + (ssize_t)(max > 0 ? min-1 : 0)*(ssize_t)(sep ? sep->min_matchlen : 0); - return Pattern(BP_REPEAT, start, end, minlen, maxlen, - .min=min, .max=max, .repeat_pat=repeating, .sep=sep); +__attribute__((nonnull(1, 2, 5))) static bp_pat_t *new_range(const char *start, const char *end, size_t min, + ssize_t max, bp_pat_t *repeating, bp_pat_t *sep) { + size_t minlen = min * repeating->min_matchlen + (min > 0 ? min - 1 : 0) * (sep ? sep->min_matchlen : 0); + ssize_t maxlen = + (max == -1 || UNBOUNDED(repeating) || (max != 0 && max != 1 && sep && UNBOUNDED(sep))) + ? (ssize_t)-1 + : max * repeating->max_matchlen + (ssize_t)(max > 0 ? min - 1 : 0) * (ssize_t)(sep ? sep->min_matchlen : 0); + return Pattern(BP_REPEAT, start, end, minlen, maxlen, .min = min, .max = max, .repeat_pat = repeating, .sep = sep); } // // Take a pattern and expand it into a chain of patterns if it's followed by // any patterns (e.g. "`x `y"), otherwise return the original input. // -__attribute__((nonnull)) -static bp_pat_t *expand_chain(bp_pat_t *first, const char *end, bool allow_nl) -{ +__attribute__((nonnull)) static bp_pat_t *expand_chain(bp_pat_t *first, const char *end, bool allow_nl) { const char *str = after_spaces(first->end, allow_nl, end); bp_pat_t *second = bp_simplepattern(str, end); if (second == NULL) return first; @@ -94,9 +99,7 @@ static bp_pat_t *expand_chain(bp_pat_t *first, const char *end, bool allow_nl) // // Match trailing => replacements (with optional pattern beforehand) // -__attribute__((nonnull)) -static bp_pat_t *expand_replacements(bp_pat_t *replace_pat, const char *end, bool allow_nl) -{ +__attribute__((nonnull)) static bp_pat_t *expand_replacements(bp_pat_t *replace_pat, const char *end, bool allow_nl) { const char *str = replace_pat->end; while (matchstr(&str, "=>", allow_nl, end)) { const char *repstr; @@ -108,21 +111,19 @@ static bp_pat_t *expand_replacements(bp_pat_t *replace_pat, const char *end, boo for (; str < end && *str != closequote; str = next_char(str, end)) { if (*str == '\\') { if (!str[1] || str[1] == '\n') - parse_err(str, str+1, - "There should be an escape sequence after this backslash."); + parse_err(str, str + 1, "There should be an escape sequence after this backslash."); str = next_char(str, end); } } - replen = (size_t)(str-repstr); + replen = (size_t)(str - repstr); (void)matchchar(&str, closequote, true, end); } else { repstr = ""; replen = 0; } - replace_pat = Pattern(BP_REPLACE, replace_pat->start, str, - replace_pat->min_matchlen, replace_pat->max_matchlen, - .pat=replace_pat, .text=repstr, .len=replen); + replace_pat = Pattern(BP_REPLACE, replace_pat->start, str, replace_pat->min_matchlen, replace_pat->max_matchlen, + .pat = replace_pat, .text = repstr, .len = replen); } return replace_pat; } @@ -132,9 +133,7 @@ static bp_pat_t *expand_replacements(bp_pat_t *replace_pat, const char *end, boo // chain of choices if it's followed by any "/"-separated patterns (e.g. // "`x/`y"), otherwise return the original input. // -__attribute__((nonnull)) -static bp_pat_t *expand_choices(bp_pat_t *first, const char *end, bool allow_nl) -{ +__attribute__((nonnull)) static bp_pat_t *expand_choices(bp_pat_t *first, const char *end, bool allow_nl) { first = expand_chain(first, end, allow_nl); first = expand_replacements(first, end, allow_nl); const char *str = first->end; @@ -143,9 +142,8 @@ static bp_pat_t *expand_choices(bp_pat_t *first, const char *end, bool allow_nl) bp_pat_t *second = bp_simplepattern(str, end); if (second) str = second->end; if (matchstr(&str, "=>", allow_nl, end)) - second = expand_replacements(second ? second : Pattern(BP_STRING, str-2, str-2, 0, 0), end, allow_nl); - if (!second) - parse_err(str, str, "There should be a pattern here after a '/'"); + second = expand_replacements(second ? second : Pattern(BP_STRING, str - 2, str - 2, 0, 0), end, allow_nl); + if (!second) parse_err(str, str, "There should be a pattern here after a '/'"); second = expand_choices(second, end, allow_nl); return either_pat(first, second); } @@ -154,8 +152,8 @@ static bp_pat_t *expand_choices(bp_pat_t *first, const char *end, bool allow_nl) // Given two patterns, return a new pattern for the first pattern followed by // the second. If either pattern is NULL, return the other. // -public bp_pat_t *chain_together(bp_pat_t *first, bp_pat_t *second) -{ +public +bp_pat_t *chain_together(bp_pat_t *first, bp_pat_t *second) { if (first == NULL) return second; if (second == NULL) return first; @@ -163,34 +161,34 @@ public bp_pat_t *chain_together(bp_pat_t *first, bp_pat_t *second) if (second->type == BP_STRING && second->max_matchlen == 0) return first; if (first->type == BP_DEFINITIONS && second->type == BP_DEFINITIONS) { - return Pattern(BP_CHAIN, first->start, second->end, second->min_matchlen, second->max_matchlen, .first=first, .second=second); + return Pattern(BP_CHAIN, first->start, second->end, second->min_matchlen, second->max_matchlen, .first = first, + .second = second); } size_t minlen = first->min_matchlen + second->min_matchlen; ssize_t maxlen = (UNBOUNDED(first) || UNBOUNDED(second)) ? (ssize_t)-1 : first->max_matchlen + second->max_matchlen; - return Pattern(BP_CHAIN, first->start, second->end, minlen, maxlen, .first=first, .second=second); + return Pattern(BP_CHAIN, first->start, second->end, minlen, maxlen, .first = first, .second = second); } // // Given two patterns, return a new pattern for matching either the first // pattern or the second. If either pattern is NULL, return the other. // -public bp_pat_t *either_pat(bp_pat_t *first, bp_pat_t *second) -{ +public +bp_pat_t *either_pat(bp_pat_t *first, bp_pat_t *second) { if (first == NULL) return second; if (second == NULL) return first; size_t minlen = first->min_matchlen < second->min_matchlen ? first->min_matchlen : second->min_matchlen; - ssize_t maxlen = (UNBOUNDED(first) || UNBOUNDED(second)) ? (ssize_t)-1 : - (first->max_matchlen > second->max_matchlen ? first->max_matchlen : second->max_matchlen); - return Pattern(BP_OTHERWISE, first->start, second->end, minlen, maxlen, .first=first, .second=second); + ssize_t maxlen = (UNBOUNDED(first) || UNBOUNDED(second)) + ? (ssize_t)-1 + : (first->max_matchlen > second->max_matchlen ? first->max_matchlen : second->max_matchlen); + return Pattern(BP_OTHERWISE, first->start, second->end, minlen, maxlen, .first = first, .second = second); } // // Parse a definition // -__attribute__((nonnull)) -static bp_pat_t *_bp_definition(const char *start, const char *end) -{ +__attribute__((nonnull)) static bp_pat_t *_bp_definition(const char *start, const char *end) { if (start >= end || !(isalpha(*start) || *start == '_')) return NULL; const char *str = after_name(start, end); size_t namelen = (size_t)(str - start); @@ -201,20 +199,19 @@ static bp_pat_t *_bp_definition(const char *start, const char *end) str = def->end; (void)matchchar(&str, ';', false, end); // Optional semicolon if (is_tagged) { // `id:: foo` means define a rule named `id` that gives captures an `id` tag - def = Pattern(BP_TAGGED, def->start, def->end, def->min_matchlen, def->max_matchlen, - .pat=def, .name=start, .namelen=namelen); + def = Pattern(BP_TAGGED, def->start, def->end, def->min_matchlen, def->max_matchlen, .pat = def, .name = start, + .namelen = namelen); } bp_pat_t *next_def = _bp_definition(after_spaces(str, true, end), end); - return Pattern(BP_DEFINITIONS, start, next_def ? next_def->end : str, 0, -1, - .name=start, .namelen=namelen, .meaning=def, .next_def=next_def); + return Pattern(BP_DEFINITIONS, start, next_def ? next_def->end : str, 0, -1, .name = start, .namelen = namelen, + .meaning = def, .next_def = next_def); } // // Compile a string of BP code into a BP pattern object. // -__attribute__((nonnull)) -static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_stringpattern) -{ +__attribute__((nonnull)) static bp_pat_t *_bp_simplepattern(const char *str, const char *end, + bool inside_stringpattern) { str = after_spaces(str, false, end); if (!*str) return NULL; const char *start = str; @@ -227,22 +224,18 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside // In other words, `...foo` parses as `(.)(..foo)` instead of `(..(.)) (foo)` // This is so that `...` can mean "at least one character upto" instead of "upto any character", // which is tautologically the same as matching any single character. - if (*str == '.' && (str+1 >= end || str[1] != '.')) { // ".." + if (*str == '.' && (str + 1 >= end || str[1] != '.')) { // ".." str = next_char(str, end); enum bp_pattype_e type = BP_UPTO; bp_pat_t *extra_arg = NULL; if (matchchar(&str, '%', false, end)) { extra_arg = bp_simplepattern(str, end); - if (extra_arg) - str = extra_arg->end; - else - parse_err(str, str, "There should be a pattern to skip here after the '%'"); + if (extra_arg) str = extra_arg->end; + else parse_err(str, str, "There should be a pattern to skip here after the '%'"); } else if (matchchar(&str, '=', false, end)) { extra_arg = bp_simplepattern(str, end); - if (extra_arg) - str = extra_arg->end; - else - parse_err(str, str, "There should be a pattern here after the '='"); + if (extra_arg) str = extra_arg->end; + else parse_err(str, str, "There should be a pattern here after the '='"); type = BP_UPTO_STRICT; } bp_pat_t *target; @@ -255,9 +248,8 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside while (target && target->type == BP_STRING && target->max_matchlen == 0) target = bp_simplepattern(target->end, end); } - return type == BP_UPTO ? - Pattern(BP_UPTO, start, str, 0, -1, .target=target, .skip=extra_arg) - : Pattern(BP_UPTO_STRICT, start, str, 0, -1, .target=target, .skip=extra_arg); + return type == BP_UPTO ? Pattern(BP_UPTO, start, str, 0, -1, .target = target, .skip = extra_arg) + : Pattern(BP_UPTO_STRICT, start, str, 0, -1, .target = target, .skip = extra_arg); } else { return Pattern(BP_ANYCHAR, start, str, 1, UTF8_MAXCHARLEN); } @@ -273,7 +265,7 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside str = next_char(c1_loc, end); if (*str == '-') { // Range const char *c2_loc = ++str; - if (next_char(c1_loc, end) > c1_loc+1 || next_char(c2_loc, end) > c2_loc+1) + if (next_char(c1_loc, end) > c1_loc + 1 || next_char(c2_loc, end) > c2_loc + 1) parse_err(start, next_char(c2_loc, end), "Sorry, UTF-8 character ranges are not yet supported."); char c1 = *c1_loc, c2 = *c2_loc; if (!c2 || c2 == '\n') @@ -284,11 +276,12 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside c2 = tmp; } str = next_char(c2_loc, end); - bp_pat_t *pat = Pattern(BP_RANGE, start == c1_loc - 1 ? start : c1_loc, str, 1, 1, .low=c1, .high=c2); + bp_pat_t *pat = + Pattern(BP_RANGE, start == c1_loc - 1 ? start : c1_loc, str, 1, 1, .low = c1, .high = c2); all = either_pat(all, pat); } else { size_t len = (size_t)(str - c1_loc); - bp_pat_t *pat = Pattern(BP_STRING, start, str, len, (ssize_t)len, .string=strndup(c1_loc, len)); + bp_pat_t *pat = Pattern(BP_STRING, start, str, len, (ssize_t)len, .string = strndup(c1_loc, len)); all = either_pat(all, pat); } } while (*str++ == ','); @@ -297,12 +290,11 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside } // Escapes case '\\': { - if (!*str || *str == '\n') - parse_err(str, str, "There should be an escape sequence here after this backslash."); + if (!*str || *str == '\n') parse_err(str, str, "There should be an escape sequence here after this backslash."); bp_pat_t *all = NULL; do { // Comma-separated items: - const char *itemstart = str-1; + const char *itemstart = str - 1; if (*str == 'N') { // \N (nodent) all = either_pat(all, Pattern(BP_NODENT, itemstart, ++str, 1, -1)); continue; @@ -322,21 +314,19 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside const char *opstart = str; unsigned char e_low = (unsigned char)unescapechar(str, &str, end); - if (str == opstart) - parse_err(start, str+1, "This isn't a valid escape sequence."); + if (str == opstart) parse_err(start, str + 1, "This isn't a valid escape sequence."); unsigned char e_high = e_low; if (*str == '-') { // Escape range (e.g. \x00-\xFF) ++str; - if (next_char(str, end) != str+1) + if (next_char(str, end) != str + 1) parse_err(start, next_char(str, end), "Sorry, UTF8 escape sequences are not supported in ranges."); const char *seqstart = str; e_high = (unsigned char)unescapechar(str, &str, end); - if (str == seqstart) - parse_err(seqstart, str+1, "This value isn't a valid escape sequence"); + if (str == seqstart) parse_err(seqstart, str + 1, "This value isn't a valid escape sequence"); if (e_high < e_low) parse_err(start, str, "Escape ranges should be low-to-high, but this is high-to-low."); } - bp_pat_t *esc = Pattern(BP_RANGE, start, str, 1, 1, .low=e_low, .high=e_high); + bp_pat_t *esc = Pattern(BP_RANGE, start, str, 1, 1, .low = e_low, .high = e_high); all = either_pat(all, esc); } while (*str == ',' && str++ < end); @@ -347,33 +337,44 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside return Pattern(BP_WORD_BOUNDARY, start, str, 0, 0); } // String literal - case '"': case '\'': case '\002': case '}': { + case '"': + case '\'': + case '\002': + case '}': { char endquote = c == '\002' ? '\003' : (c == '}' ? '{' : c); - char *litstart = (char*)str; + char *litstart = (char *)str; while (str < end && *str != endquote) str = next_char(str, end); size_t len = (size_t)(str - litstart); str = next_char(str, end); if (c == '}') ++start; // Don't include the "}" in the pattern source range - return Pattern(BP_STRING, start, str, len, (ssize_t)len, .string=strndup(litstart, len)); + return Pattern(BP_STRING, start, str, len, (ssize_t)len, .string = strndup(litstart, len)); } // Not <pat> case '!': { bp_pat_t *p = bp_simplepattern(str, end); if (!p) parse_err(str, str, "There should be a pattern after this '!'"); - return Pattern(BP_NOT, start, p->end, 0, 0, .pat=p); + return Pattern(BP_NOT, start, p->end, 0, 0, .pat = p); } // Number of repetitions: <N>(-<N> / - / + / "") - case '0': case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { size_t min = 0; ssize_t max = -1; --str; - long n1 = strtol(str, (char**)&str, 10); + long n1 = strtol(str, (char **)&str, 10); if (matchchar(&str, '-', false, end)) { str = after_spaces(str, false, end); const char *numstart = str; - long n2 = strtol(str, (char**)&str, 10); + long n2 = strtol(str, (char **)&str, 10); if (str == numstart) min = 0, max = (ssize_t)n1; else min = (size_t)n1, max = (ssize_t)n2; } else if (matchchar(&str, '+', false, end)) { @@ -382,14 +383,12 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside min = (size_t)n1, max = (ssize_t)n1; } bp_pat_t *repeating = bp_simplepattern(str, end); - if (!repeating) - parse_err(str, str, "There should be a pattern after this repetition count."); + if (!repeating) parse_err(str, str, "There should be a pattern after this repetition count."); str = repeating->end; bp_pat_t *sep = NULL; if (matchchar(&str, '%', false, end)) { sep = bp_simplepattern(str, end); - if (!sep) - parse_err(str, str, "There should be a separator pattern after this '%%'"); + if (!sep) parse_err(str, str, "There should be a separator pattern after this '%%'"); str = sep->end; } else { str = repeating->end; @@ -399,22 +398,19 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside // Lookbehind case '<': { bp_pat_t *behind = bp_simplepattern(str, end); - if (!behind) - parse_err(str, str, "There should be a pattern after this '<'"); - return Pattern(BP_AFTER, start, behind->end, 0, 0, .pat=behind); + if (!behind) parse_err(str, str, "There should be a pattern after this '<'"); + return Pattern(BP_AFTER, start, behind->end, 0, 0, .pat = behind); } // Lookahead case '>': { bp_pat_t *ahead = bp_simplepattern(str, end); - if (!ahead) - parse_err(str, str, "There should be a pattern after this '>'"); - return Pattern(BP_BEFORE, start, ahead->end, 0, 0, .pat=ahead); + if (!ahead) parse_err(str, str, "There should be a pattern after this '>'"); + return Pattern(BP_BEFORE, start, ahead->end, 0, 0, .pat = ahead); } // Parentheses case '(': { bp_pat_t *pat = bp_pattern_nl(str, end, true); - if (!pat) - parse_err(str, str, "There should be a valid pattern after this parenthesis."); + if (!pat) parse_err(str, str, "There should be a valid pattern after this parenthesis."); str = pat->end; if (!matchchar(&str, ')', true, end)) parse_err(str, str, "Missing paren: )"); pat->start = start; @@ -424,24 +420,22 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside // Square brackets case '[': { bp_pat_t *maybe = bp_pattern_nl(str, end, true); - if (!maybe) - parse_err(str, str, "There should be a valid pattern after this square bracket."); + if (!maybe) parse_err(str, str, "There should be a valid pattern after this square bracket."); str = maybe->end; (void)matchchar(&str, ']', true, end); return new_range(start, str, 0, 1, maybe, NULL); } // Repeating - case '*': case '+': { + case '*': + case '+': { size_t min = (size_t)(c == '*' ? 0 : 1); bp_pat_t *repeating = bp_simplepattern(str, end); - if (!repeating) - parse_err(str, str, "There should be a valid pattern to repeat here"); + if (!repeating) parse_err(str, str, "There should be a valid pattern to repeat here"); str = repeating->end; bp_pat_t *sep = NULL; if (matchchar(&str, '%', false, end)) { sep = bp_simplepattern(str, end); - if (!sep) - parse_err(str, str, "There should be a separator pattern after the '%%' here."); + if (!sep) parse_err(str, str, "There should be a separator pattern after the '%%' here."); str = sep->end; } return new_range(start, str, min, -1, repeating, sep); @@ -451,16 +445,15 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside if (matchchar(&str, ':', false, end)) { // Tagged capture @:Foo=pat const char *name = str; str = after_name(name, end); - if (str <= name) - parse_err(start, str, "There should be an identifier after this '@:'"); + if (str <= name) parse_err(start, str, "There should be an identifier after this '@:'"); size_t namelen = (size_t)(str - name); bp_pat_t *p = NULL; if (matchchar(&str, '=', false, end)) { p = bp_simplepattern(str, end); if (p) str = p->end; } - return Pattern(BP_TAGGED, start, str, p ? p->min_matchlen : 0, p ? p->max_matchlen : 0, - .pat=p, .name=name, .namelen=namelen); + return Pattern(BP_TAGGED, start, str, p ? p->min_matchlen : 0, p ? p->max_matchlen : 0, .pat = p, + .name = name, .namelen = namelen); } const char *name = NULL; @@ -470,31 +463,28 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside bool backreffable = false; if (a > str && matchchar(&eq, ':', false, end)) { name = str; - namelen = (size_t)(a-str); + namelen = (size_t)(a - str); str = eq; backreffable = true; } else if (a > str && !matchstr(&eq, "=>", false, end) && matchchar(&eq, '=', false, end)) { name = str; - namelen = (size_t)(a-str); + namelen = (size_t)(a - str); str = eq; } bp_pat_t *pat = bp_simplepattern(str, end); - if (!pat) - parse_err(str, str, "There should be a valid pattern here to capture after the '@'"); + if (!pat) parse_err(str, str, "There should be a valid pattern here to capture after the '@'"); - return Pattern(BP_CAPTURE, start, pat->end, pat->min_matchlen, pat->max_matchlen, - .pat = pat, .name = name, .namelen = namelen, .backreffable = backreffable); + return Pattern(BP_CAPTURE, start, pat->end, pat->min_matchlen, pat->max_matchlen, .pat = pat, .name = name, + .namelen = namelen, .backreffable = backreffable); } // Start of file/line case '^': { - if (*str == '^') - return Pattern(BP_START_OF_FILE, start, ++str, 0, 0); + if (*str == '^') return Pattern(BP_START_OF_FILE, start, ++str, 0, 0); return Pattern(BP_START_OF_LINE, start, str, 0, 0); } // End of file/line: case '$': { - if (*str == '$') - return Pattern(BP_END_OF_FILE, start, ++str, 0, 0); + if (*str == '$') return Pattern(BP_END_OF_FILE, start, ++str, 0, 0); return Pattern(BP_END_OF_LINE, start, str, 0, 0); } default: { @@ -504,7 +494,7 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside if (!isalpha(c) && c != '_') return NULL; str = after_name(start, end); size_t namelen = (size_t)(str - start); - return Pattern(BP_REF, start, str, 0, -1, .name=start, .len=namelen); + return Pattern(BP_REF, start, str, 0, -1, .name = start, .len = namelen); } } } @@ -513,20 +503,19 @@ static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside // Similar to bp_simplepattern, except that the pattern begins with an implicit // '}' open quote that can be closed with '{' // -public maybe_pat_t bp_stringpattern(const char *str, const char *end) -{ +public +maybe_pat_t bp_stringpattern(const char *str, const char *end) { __TRY_PATTERN__ if (!end) end = str + strlen(str); - char *start = (char*)str; + char *start = (char *)str; while (str < end && *str != '{') str = next_char(str, end); size_t len = (size_t)(str - start); - bp_pat_t *pat = len > 0 ? Pattern(BP_STRING, start, str, len, (ssize_t)len, .string=strndup(start, len)) : NULL; + bp_pat_t *pat = len > 0 ? Pattern(BP_STRING, start, str, len, (ssize_t)len, .string = strndup(start, len)) : NULL; str += 1; if (str < end) { bp_pat_t *interp = bp_pattern_nl(str, end, true); - if (interp) - pat = chain_together(pat, interp); + if (interp) pat = chain_together(pat, interp); pat->end = end; } __END_TRY_PATTERN__ @@ -536,8 +525,7 @@ public maybe_pat_t bp_stringpattern(const char *str, const char *end) // // Wrapper for _bp_simplepattern() that expands any postfix operators (~, !~) // -static bp_pat_t *bp_simplepattern(const char *str, const char *end) -{ +static bp_pat_t *bp_simplepattern(const char *str, const char *end) { const char *start = str; bp_pat_t *pat = _bp_simplepattern(str, end, false); if (pat == NULL) return pat; @@ -546,20 +534,18 @@ static bp_pat_t *bp_simplepattern(const char *str, const char *end) // Expand postfix operators (if any) while (str < end) { enum bp_pattype_e type; - if (matchchar(&str, '~', false, end)) - type = BP_MATCH; - else if (matchstr(&str, "!~", false, end)) - type = BP_NOT_MATCH; + if (matchchar(&str, '~', false, end)) type = BP_MATCH; + else if (matchstr(&str, "!~", false, end)) type = BP_NOT_MATCH; else break; bp_pat_t *first = pat; bp_pat_t *second = bp_simplepattern(str, end); - if (!second) - parse_err(str, str, "There should be a valid pattern here"); + if (!second) parse_err(str, str, "There should be a valid pattern here"); - pat = type == BP_MATCH ? - Pattern(BP_MATCH, start, second->end, first->min_matchlen, first->max_matchlen, .pat=first, .must_match=second) - : Pattern(BP_NOT_MATCH, start, second->end, first->min_matchlen, first->max_matchlen, .pat=first, .must_not_match=second); + pat = type == BP_MATCH ? Pattern(BP_MATCH, start, second->end, first->min_matchlen, first->max_matchlen, + .pat = first, .must_match = second) + : Pattern(BP_NOT_MATCH, start, second->end, first->min_matchlen, first->max_matchlen, + .pat = first, .must_not_match = second); str = pat->end; } @@ -570,8 +556,8 @@ static bp_pat_t *bp_simplepattern(const char *str, const char *end) // Given a pattern and a replacement string, compile the two into a BP // replace pattern. // -public maybe_pat_t bp_replacement(bp_pat_t *replacepat, const char *replacement, const char *end) -{ +public +maybe_pat_t bp_replacement(bp_pat_t *replacepat, const char *replacement, const char *end) { const char *p = replacement; if (!end) end = replacement + strlen(replacement); __TRY_PATTERN__ @@ -583,84 +569,97 @@ public maybe_pat_t bp_replacement(bp_pat_t *replacepat, const char *replacement, } } __END_TRY_PATTERN__ - size_t rlen = (size_t)(p-replacement); - char *rcpy = new(char[rlen + 1]); + size_t rlen = (size_t)(p - replacement); + char *rcpy = new (char[rlen + 1]); memcpy(rcpy, replacement, rlen); - bp_pat_t *pat = Pattern(BP_REPLACE, replacepat->start, replacepat->end, replacepat->min_matchlen, replacepat->max_matchlen, - .pat=replacepat, .text=rcpy, .len=rlen); + bp_pat_t *pat = Pattern(BP_REPLACE, replacepat->start, replacepat->end, replacepat->min_matchlen, + replacepat->max_matchlen, .pat = replacepat, .text = rcpy, .len = rlen); return (maybe_pat_t){.success = true, .value.pat = pat}; } -static bp_pat_t *bp_pattern_nl(const char *str, const char *end, bool allow_nl) -{ +static bp_pat_t *bp_pattern_nl(const char *str, const char *end, bool allow_nl) { str = after_spaces(str, allow_nl, end); bp_pat_t *pat = bp_simplepattern(str, end); if (pat != NULL) pat = expand_choices(pat, end, allow_nl); if (matchstr(&str, "=>", allow_nl, end)) - pat = expand_replacements(pat ? pat : Pattern(BP_STRING, str-2, str-2, 0, 0), end, allow_nl); + pat = expand_replacements(pat ? pat : Pattern(BP_STRING, str - 2, str - 2, 0, 0), end, allow_nl); return pat; } // // Return a new back reference to an existing match. // -public bp_pat_t *bp_raw_literal(const char *str, size_t len) -{ - return Pattern(BP_STRING, str, &str[len], len, (ssize_t)len, .string=strndup(str, len)); +public +bp_pat_t *bp_raw_literal(const char *str, size_t len) { + return Pattern(BP_STRING, str, &str[len], len, (ssize_t)len, .string = strndup(str, len)); } // // Compile a string representing a BP pattern into a pattern object. // -public maybe_pat_t bp_pattern(const char *str, const char *end) -{ +public +maybe_pat_t bp_pattern(const char *str, const char *end) { str = after_spaces(str, true, end); if (!end) end = str + strlen(str); __TRY_PATTERN__ bp_pat_t *ret = bp_pattern_nl(str, end, false); __END_TRY_PATTERN__ if (ret && after_spaces(ret->end, true, end) < end) - return (maybe_pat_t){.success = false, .value.error.start = ret->end, .value.error.end = end, .value.error.msg = "Failed to parse this part of the pattern"}; - else if (ret) - return (maybe_pat_t){.success = true, .value.pat = ret}; + return (maybe_pat_t){.success = false, + .value.error.start = ret->end, + .value.error.end = end, + .value.error.msg = "Failed to parse this part of the pattern"}; + else if (ret) return (maybe_pat_t){.success = true, .value.pat = ret}; else - return (maybe_pat_t){.success = false, .value.error.start = str, .value.error.end = end, .value.error.msg = "Failed to parse this pattern"}; + return (maybe_pat_t){.success = false, + .value.error.start = str, + .value.error.end = end, + .value.error.msg = "Failed to parse this pattern"}; } -public void free_all_pats(void) -{ +public +void free_all_pats(void) { while (allocated_pats) { bp_pat_t *tofree = allocated_pats; allocated_pats = tofree->next; - delete(&tofree); + delete (&tofree); } } -public void delete_pat(bp_pat_t **at_pat, bool recursive) -{ +public +void delete_pat(bp_pat_t **at_pat, bool recursive) { bp_pat_t *pat = *at_pat; if (!pat) return; -#define T(tag, ...) case tag: { auto _data = When(pat, tag); __VA_ARGS__; break; } +#define T(tag, ...) \ + case tag: { \ + auto _data = When(pat, tag); \ + __VA_ARGS__; \ + break; \ + } #define F(field) delete_pat(&_data->field, true) if (recursive) { switch (pat->type) { - T(BP_DEFINITIONS, F(meaning), F(next_def)) - T(BP_REPEAT, F(sep), F(repeat_pat)) - T(BP_CHAIN, F(first), F(second)) - T(BP_UPTO, F(target), F(skip)) - T(BP_UPTO_STRICT, F(target), F(skip)) - T(BP_OTHERWISE, F(first), F(second)) - T(BP_MATCH, F(pat), F(must_match)) - T(BP_NOT_MATCH, F(pat), F(must_not_match)) - T(BP_REPLACE, F(pat)) - T(BP_CAPTURE, F(pat)) - T(BP_TAGGED, F(pat)) - T(BP_NOT, F(pat)) - T(BP_AFTER, F(pat)) - T(BP_BEFORE, F(pat)) - T(BP_LEFTRECURSION, F(fallback)) - T(BP_STRING, if (_data->string) { free((char*)_data->string); _data->string = NULL; }) + T(BP_DEFINITIONS, F(meaning), F(next_def)) + T(BP_REPEAT, F(sep), F(repeat_pat)) + T(BP_CHAIN, F(first), F(second)) + T(BP_UPTO, F(target), F(skip)) + T(BP_UPTO_STRICT, F(target), F(skip)) + T(BP_OTHERWISE, F(first), F(second)) + T(BP_MATCH, F(pat), F(must_match)) + T(BP_NOT_MATCH, F(pat), F(must_not_match)) + T(BP_REPLACE, F(pat)) + T(BP_CAPTURE, F(pat)) + T(BP_TAGGED, F(pat)) + T(BP_NOT, F(pat)) + T(BP_AFTER, F(pat)) + T(BP_BEFORE, F(pat)) + T(BP_LEFTRECURSION, F(fallback)) + T( + BP_STRING, if (_data->string) { + free((char *)_data->string); + _data->string = NULL; + }) default: break; } } @@ -669,14 +668,20 @@ public void delete_pat(bp_pat_t **at_pat, bool recursive) if (pat->home) *(pat->home) = pat->next; if (pat->next) pat->next->home = pat->home; - delete(at_pat); + delete (at_pat); } -int fprint_pattern(FILE *stream, bp_pat_t *pat) -{ +int fprint_pattern(FILE *stream, bp_pat_t *pat) { if (!pat) return fputs("(null)", stream); switch (pat->type) { -#define CASE(name, ...) case BP_ ## name: { __auto_type data = pat->__tagged.BP_##name; (void)data; int _printed = fputs(#name, stream); __VA_ARGS__; return _printed; } +#define CASE(name, ...) \ + case BP_##name: { \ + __auto_type data = pat->__tagged.BP_##name; \ + (void)data; \ + int _printed = fputs(#name, stream); \ + __VA_ARGS__; \ + return _printed; \ + } #define FMT(...) _printed += fprintf(stream, __VA_ARGS__) #define PAT(p) _printed += fprint_pattern(stream, p) CASE(ERROR) @@ -691,7 +696,8 @@ int fprint_pattern(FILE *stream, bp_pat_t *pat) CASE(REPEAT, FMT("(%u-%d, ", data.min, data.max); PAT(data.repeat_pat); FMT(", sep="); PAT(data.sep); FMT(")")) CASE(BEFORE, FMT("("); PAT(data.pat); FMT(")")) CASE(AFTER, FMT("("); PAT(data.pat); FMT(")")) - CASE(CAPTURE, FMT("("); PAT(data.pat); FMT(", name=%.*s, backref=%s)", data.namelen, data.name, data.backreffable ? "yes" : "no")) + CASE(CAPTURE, FMT("("); PAT(data.pat); + FMT(", name=%.*s, backref=%s)", data.namelen, data.name, data.backreffable ? "yes" : "no")) CASE(OTHERWISE, FMT("("); PAT(data.first); FMT(", "); PAT(data.second); FMT(")")) CASE(CHAIN, FMT("("); PAT(data.first); FMT(", "); PAT(data.second); FMT(")")) CASE(MATCH, FMT("("); PAT(data.pat); FMT(", matches="); PAT(data.must_match); FMT(")")) @@ -706,7 +712,8 @@ int fprint_pattern(FILE *stream, bp_pat_t *pat) CASE(END_OF_LINE) CASE(WORD_BOUNDARY) CASE(DEFINITIONS, FMT("(%.*s=", data.namelen, data.name); PAT(data.meaning); FMT("); "); PAT(data.next_def)) - CASE(TAGGED, FMT("(%.*s=", data.namelen, data.name); PAT(data.pat); FMT(" backref=%s)", data.backreffable ? "yes" : "no")) + CASE(TAGGED, FMT("(%.*s=", data.namelen, data.name); PAT(data.pat); + FMT(" backref=%s)", data.backreffable ? "yes" : "no")) #undef PAT #undef FMT #undef P @@ -3,42 +3,42 @@ // #pragma once +#include <err.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <sys/types.h> -#include <err.h> // BP virtual machine pattern types enum bp_pattype_e { - BP_ERROR = 0, - BP_ANYCHAR = 1, - BP_ID_START = 2, - BP_ID_CONTINUE = 3, - BP_STRING = 4, - BP_RANGE = 5, - BP_NOT = 6, - BP_UPTO = 7, - BP_UPTO_STRICT = 8, - BP_REPEAT = 9, - BP_BEFORE = 10, - BP_AFTER = 11, - BP_CAPTURE = 12, - BP_OTHERWISE = 13, - BP_CHAIN = 14, - BP_MATCH = 15, - BP_NOT_MATCH = 16, - BP_REPLACE = 17, - BP_REF = 18, - BP_NODENT = 19, - BP_CURDENT = 20, + BP_ERROR = 0, + BP_ANYCHAR = 1, + BP_ID_START = 2, + BP_ID_CONTINUE = 3, + BP_STRING = 4, + BP_RANGE = 5, + BP_NOT = 6, + BP_UPTO = 7, + BP_UPTO_STRICT = 8, + BP_REPEAT = 9, + BP_BEFORE = 10, + BP_AFTER = 11, + BP_CAPTURE = 12, + BP_OTHERWISE = 13, + BP_CHAIN = 14, + BP_MATCH = 15, + BP_NOT_MATCH = 16, + BP_REPLACE = 17, + BP_REF = 18, + BP_NODENT = 19, + BP_CURDENT = 20, BP_START_OF_FILE = 21, BP_START_OF_LINE = 22, - BP_END_OF_FILE = 23, - BP_END_OF_LINE = 24, + BP_END_OF_FILE = 23, + BP_END_OF_LINE = 24, BP_WORD_BOUNDARY = 25, - BP_DEFINITIONS = 26, - BP_TAGGED = 27, + BP_DEFINITIONS = 26, + BP_TAGGED = 27, BP_LEFTRECURSION = 28, }; @@ -58,21 +58,39 @@ struct bp_pat_s { struct { const char *start, *end, *msg; } BP_ERROR; - struct {} BP_ANYCHAR; - struct {} BP_ID_START; - struct {} BP_ID_CONTINUE; - struct {const char *string; size_t len; } BP_STRING; - struct {unsigned char low, high; } BP_RANGE; - struct {bp_pat_t *pat;} BP_NOT; - struct {bp_pat_t *target, *skip;} BP_UPTO; - struct {bp_pat_t *target, *skip;} BP_UPTO_STRICT; + struct { + } BP_ANYCHAR; + struct { + } BP_ID_START; + struct { + } BP_ID_CONTINUE; + struct { + const char *string; + size_t len; + } BP_STRING; + struct { + unsigned char low, high; + } BP_RANGE; + struct { + bp_pat_t *pat; + } BP_NOT; + struct { + bp_pat_t *target, *skip; + } BP_UPTO; + struct { + bp_pat_t *target, *skip; + } BP_UPTO_STRICT; struct { uint32_t min; int32_t max; bp_pat_t *sep, *repeat_pat; } BP_REPEAT; - struct {bp_pat_t *pat;} BP_BEFORE; - struct {bp_pat_t *pat;} BP_AFTER; + struct { + bp_pat_t *pat; + } BP_BEFORE; + struct { + bp_pat_t *pat; + } BP_AFTER; struct { bp_pat_t *pat; const char *name; @@ -85,8 +103,12 @@ struct bp_pat_s { struct { bp_pat_t *first, *second; } BP_CHAIN; - struct {bp_pat_t *pat, *must_match;} BP_MATCH; - struct {bp_pat_t *pat, *must_not_match;} BP_NOT_MATCH; + struct { + bp_pat_t *pat, *must_match; + } BP_MATCH; + struct { + bp_pat_t *pat, *must_not_match; + } BP_NOT_MATCH; struct { bp_pat_t *pat; const char *text; @@ -96,13 +118,20 @@ struct bp_pat_s { const char *name; uint32_t len; } BP_REF; - struct {} BP_NODENT; - struct {} BP_CURDENT; - struct {} BP_START_OF_FILE; - struct {} BP_START_OF_LINE; - struct {} BP_END_OF_FILE; - struct {} BP_END_OF_LINE; - struct {} BP_WORD_BOUNDARY; + struct { + } BP_NODENT; + struct { + } BP_CURDENT; + struct { + } BP_START_OF_FILE; + struct { + } BP_START_OF_LINE; + struct { + } BP_END_OF_FILE; + struct { + } BP_END_OF_LINE; + struct { + } BP_WORD_BOUNDARY; struct { const char *name; uint32_t namelen; @@ -142,21 +171,16 @@ typedef struct { } value; } maybe_pat_t; -__attribute__((returns_nonnull)) -bp_pat_t *allocate_pat(bp_pat_t pat); -__attribute__((nonnull, returns_nonnull)) -bp_pat_t *bp_raw_literal(const char *str, size_t len); -__attribute__((nonnull(1))) -maybe_pat_t bp_stringpattern(const char *str, const char *end); -__attribute__((nonnull(1,2))) -maybe_pat_t bp_replacement(bp_pat_t *replacepat, const char *replacement, const char *end); +__attribute__((returns_nonnull)) bp_pat_t *allocate_pat(bp_pat_t pat); +__attribute__((nonnull, returns_nonnull)) bp_pat_t *bp_raw_literal(const char *str, size_t len); +__attribute__((nonnull(1))) maybe_pat_t bp_stringpattern(const char *str, const char *end); +__attribute__((nonnull(1, 2))) maybe_pat_t bp_replacement(bp_pat_t *replacepat, const char *replacement, + const char *end); bp_pat_t *chain_together(bp_pat_t *first, bp_pat_t *second); bp_pat_t *either_pat(bp_pat_t *first, bp_pat_t *second); -__attribute__((nonnull(1))) -maybe_pat_t bp_pattern(const char *str, const char *end); +__attribute__((nonnull(1))) maybe_pat_t bp_pattern(const char *str, const char *end); void free_all_pats(void); -__attribute__((nonnull)) -void delete_pat(bp_pat_t **at_pat, bool recursive); +__attribute__((nonnull)) void delete_pat(bp_pat_t **at_pat, bool recursive); int fprint_pattern(FILE *stream, bp_pat_t *pat); // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/printmatch.c b/printmatch.c index 9dccb10..24d301c 100644 --- a/printmatch.c +++ b/printmatch.c @@ -2,9 +2,9 @@ // printmatch.c - Debug visualization of pattern matches. // +#include <ctype.h> #include <stdio.h> #include <string.h> -#include <ctype.h> #include "match.h" #include "printmatch.h" @@ -15,17 +15,15 @@ typedef struct match_node_s { struct match_node_s *next; } match_node_t; -__attribute__((nonnull, pure)) -static int height_of_match(bp_match_t *m); -__attribute__((nonnull)) -static void _explain_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen); +__attribute__((nonnull, pure)) static int height_of_match(bp_match_t *m); +__attribute__((nonnull)) static void _explain_matches(match_node_t *firstmatch, int depth, const char *text, + size_t textlen); // // Return the height of a match object (i.e. the number of descendents of the // structure). // -static int height_of_match(bp_match_t *m) -{ +static int height_of_match(bp_match_t *m) { int height = 0; for (int i = 0; m->children && m->children[i]; i++) { bp_match_t *child = m->children[i]; @@ -38,8 +36,7 @@ static int height_of_match(bp_match_t *m) // // Print a visual explanation for the as-yet-unprinted matches provided. // -static void _explain_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen) -{ +static void _explain_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen) { const char *V = "│"; // Vertical bar const char *H = "─"; // Horizontal bar const char *color = (depth % 2 == 0) ? "34" : "33"; @@ -52,8 +49,7 @@ static void _explain_matches(match_node_t *firstmatch, int depth, const char *te // while also printing earlier matches first when it doesn't affect overall // output height. for (match_node_t *p = firstmatch; p; p = p->next) - if (height_of_match(p->m) > height_of_match(viz)) - viz = p->m; + if (height_of_match(p->m) > height_of_match(viz)) viz = p->m; const char *viz_type = viz->pat->start; size_t viz_typelen = (size_t)(viz->pat->end - viz->pat->start); @@ -61,11 +57,10 @@ static void _explain_matches(match_node_t *firstmatch, int depth, const char *te // literal string being matched. (Backrefs have start/end inside the text // input, instead of something the user typed in) if (viz_type >= text && viz_type <= &text[textlen]) - printf("\033[%zuG\033[0;2m\"\033[%s;1m", 2*textlen+3, color); + printf("\033[%zuG\033[0;2m\"\033[%s;1m", 2 * textlen + 3, color); else if (viz->pat->type == BP_STRING && (viz->end - viz->start) == (long)viz_typelen) - printf("\033[%zuG\033[%s;1m\"", 2*textlen+3, color); - else - printf("\033[%zuG\033[%s;1m", 2*textlen+3, color); + printf("\033[%zuG\033[%s;1m\"", 2 * textlen + 3, color); + else printf("\033[%zuG\033[%s;1m", 2 * textlen + 3, color); for (size_t i = 0; i < viz_typelen; i++) { switch (viz_type[i]) { @@ -75,17 +70,16 @@ static void _explain_matches(match_node_t *firstmatch, int depth, const char *te } } - if (viz_type >= text && viz_type <= &text[textlen]) - printf("\033[0;2m\""); - else if (viz->pat->type == BP_STRING && (viz->end - viz->start) == (long)viz_typelen) - printf("\""); + if (viz_type >= text && viz_type <= &text[textlen]) printf("\033[0;2m\""); + else if (viz->pat->type == BP_STRING && (viz->end - viz->start) == (long)viz_typelen) printf("\""); printf("\033[m"); match_node_t *children = NULL; match_node_t **nextchild = &children; -#define RIGHT_TYPE(m) (m->m->pat->end == m->m->pat->start + viz_typelen && strncmp(m->m->pat->start, viz_type, viz_typelen) == 0) +#define RIGHT_TYPE(m) \ + (m->m->pat->end == m->m->pat->start + viz_typelen && strncmp(m->m->pat->start, viz_type, viz_typelen) == 0) // Print nonzero-width first: for (match_node_t *m = firstmatch; m; m = m->next) { if (RIGHT_TYPE(m)) { @@ -93,45 +87,41 @@ static void _explain_matches(match_node_t *firstmatch, int depth, const char *te if (m->m->pat->type == BP_CHAIN) { bp_match_t *tmp = m->m; while (tmp->pat->type == BP_CHAIN) { - *nextchild = new(match_node_t); + *nextchild = new (match_node_t); (*nextchild)->m = tmp->children[0]; nextchild = &((*nextchild)->next); tmp = tmp->children[1]; } - *nextchild = new(match_node_t); + *nextchild = new (match_node_t); (*nextchild)->m = tmp; nextchild = &((*nextchild)->next); } else { for (int i = 0; m->m->children && m->m->children[i]; i++) { - *nextchild = new(match_node_t); + *nextchild = new (match_node_t); (*nextchild)->m = m->m->children[i]; nextchild = &((*nextchild)->next); } } if (m->m->end == m->m->start) continue; - printf("\033[%zdG\033[0;2m%s\033[0;7;%sm", 1+2*(m->m->start - text), V, color); + printf("\033[%zdG\033[0;2m%s\033[0;7;%sm", 1 + 2 * (m->m->start - text), V, color); for (const char *c = m->m->start; c < m->m->end; ++c) { // TODO: newline if (c > m->m->start) printf(" "); // TODO: utf8 - //while ((*c & 0xC0) != 0x80) printf("%c", *(c++)); - if (*c == '\n') - printf("↵"); - else if (*c == '\t') - printf("⇥"); - else - printf("%c", *c); + // while ((*c & 0xC0) != 0x80) printf("%c", *(c++)); + if (*c == '\n') printf("↵"); + else if (*c == '\t') printf("⇥"); + else printf("%c", *c); } printf("\033[0;2m%s\033[m", V); } else { - *nextchild = new(match_node_t); + *nextchild = new (match_node_t); (*nextchild)->m = m->m; nextchild = &((*nextchild)->next); - printf("\033[%zdG\033[0;2m%s", 1+2*(m->m->start - text), V); - for (ssize_t i = (ssize_t)(2*(m->m->end - m->m->start)-1); i > 0; i--) + printf("\033[%zdG\033[0;2m%s", 1 + 2 * (m->m->start - text), V); + for (ssize_t i = (ssize_t)(2 * (m->m->end - m->m->start) - 1); i > 0; i--) printf(" "); - if (m->m->end > m->m->start) - printf("\033[0;2m%s", V); + if (m->m->end > m->m->start) printf("\033[0;2m%s", V); printf("\033[m"); } } @@ -140,9 +130,9 @@ static void _explain_matches(match_node_t *firstmatch, int depth, const char *te for (match_node_t *m = firstmatch; m; m = m->next) { if (m->m->end > m->m->start) continue; if (RIGHT_TYPE(m)) { - printf("\033[%zdG\033[7;%sm▒\033[m", 1+2*(m->m->start - text), color); + printf("\033[%zdG\033[7;%sm▒\033[m", 1 + 2 * (m->m->start - text), color); } else { - printf("\033[%zdG\033[0;2m%s\033[m", 1+2*(m->m->start - text), V); + printf("\033[%zdG\033[0;2m%s\033[m", 1 + 2 * (m->m->start - text), V); } } @@ -150,18 +140,17 @@ static void _explain_matches(match_node_t *firstmatch, int depth, const char *te for (match_node_t *m = firstmatch; m; m = m->next) { if (m->m->end == m->m->start) { - if (!RIGHT_TYPE(m)) - printf("\033[%zdG\033[0;2m%s", 1 + 2*(m->m->start - text), V); + if (!RIGHT_TYPE(m)) printf("\033[%zdG\033[0;2m%s", 1 + 2 * (m->m->start - text), V); } else { const char *l = "└"; const char *r = "┘"; for (match_node_t *c = children; c; c = c->next) { if (c->m->start == m->m->start || c->m->end == m->m->start) l = V; - if (c->m->start == m->m->end || c->m->end == m->m->end) r = V; + if (c->m->start == m->m->end || c->m->end == m->m->end) r = V; } - printf("\033[%zdG\033[0;2m%s", 1 + 2*(m->m->start - text), l); + printf("\033[%zdG\033[0;2m%s", 1 + 2 * (m->m->start - text), l); const char *h = RIGHT_TYPE(m) ? H : " "; - for (ssize_t n = (ssize_t)(2*(m->m->end - m->m->start) - 1); n > 0; n--) + for (ssize_t n = (ssize_t)(2 * (m->m->end - m->m->start) - 1); n > 0; n--) printf("%s", h); printf("%s\033[m", r); } @@ -170,28 +159,26 @@ static void _explain_matches(match_node_t *firstmatch, int depth, const char *te printf("\n"); - if (children) - _explain_matches(children, depth+1, text, textlen); + if (children) _explain_matches(children, depth + 1, text, textlen); for (match_node_t *c = children, *next = NULL; c; c = next) { next = c->next; - delete(&c); + delete (&c); } } // // Print a visualization of a match object. // -public void explain_match(bp_match_t *m) -{ +public +void explain_match(bp_match_t *m) { printf("\033[?7l"); // Disable line wrapping match_node_t first = {.m = m}; _explain_matches(&first, 0, m->start, (size_t)(m->end - m->start)); printf("\033[?7h"); // Re-enable line wrapping } -static inline int fputc_safe(FILE *out, char c, print_options_t *opts) -{ +static inline int fputc_safe(FILE *out, char c, print_options_t *opts) { int printed = fputc(c, out); if (c == '\n' && opts && opts->on_nl) { opts->on_nl(out); @@ -200,8 +187,8 @@ static inline int fputc_safe(FILE *out, char c, print_options_t *opts) return printed; } -public int fprint_match(FILE *out, const char *file_start, bp_match_t *m, print_options_t *opts) -{ +public +int fprint_match(FILE *out, const char *file_start, bp_match_t *m, print_options_t *opts) { int printed = 0; if (m->pat->type == BP_REPLACE) { auto rep = When(m->pat, BP_REPLACE); @@ -210,14 +197,14 @@ public int fprint_match(FILE *out, const char *file_start, bp_match_t *m, print_ if (opts && opts->replace_color) printed += fprintf(out, "%s", opts->replace_color); // TODO: clean up the line numbering code - for (const char *r = text; r < end; ) { + for (const char *r = text; r < end;) { // Capture substitution - if (*r == '@' && r+1 < end && r[1] != '@') { - const char *next = r+1; + if (*r == '@' && r + 1 < end && r[1] != '@') { + const char *next = r + 1; // Retrieve the capture value: bp_match_t *cap = NULL; if (isdigit(*next)) { - int n = (int)strtol(next, (char**)&next, 10); + int n = (int)strtol(next, (char **)&next, 10); cap = get_numbered_capture(m->children[0], n); } else { const char *name = next, *name_end = after_name(next, end); @@ -254,7 +241,8 @@ public int fprint_match(FILE *out, const char *file_start, bp_match_t *m, print_ // of the match. If the match spans multiple lines, or if // the replacement text contains newlines, this may get weird. const char *line_start = m->start; - while (line_start > file_start && line_start[-1] != '\n') --line_start; + while (line_start > file_start && line_start[-1] != '\n') + --line_start; printed += fputc_safe(out, '\n', opts); for (const char *p = line_start; p < m->start && (*p == ' ' || *p == '\t'); ++p) printed += fputc(*p, out); @@ -272,11 +260,11 @@ public int fprint_match(FILE *out, const char *file_start, bp_match_t *m, print_ for (int i = 0; m->children && m->children[i]; i++) { bp_match_t *child = m->children[i]; // Skip children from e.g. zero-width matches like >@foo - if (!(prev <= child->start && child->start <= m->end && - prev <= child->end && child->end <= m->end)) + if (!(prev <= child->start && child->start <= m->end && prev <= child->end && child->end <= m->end)) continue; if (child->start > prev) { - if (opts && opts->fprint_between) printed += opts->fprint_between(out, prev, child->start, opts->match_color); + if (opts && opts->fprint_between) + printed += opts->fprint_between(out, prev, child->start, opts->match_color); else printed += fwrite(prev, sizeof(char), (size_t)(child->start - prev), out); } printed += fprint_match(out, file_start, child, opts); diff --git a/printmatch.h b/printmatch.h index fa6082c..969fdf3 100644 --- a/printmatch.h +++ b/printmatch.h @@ -10,10 +10,9 @@ typedef struct { int (*fprint_between)(FILE *out, const char *start, const char *end, const char *normal_color); void (*on_nl)(FILE *out); } print_options_t; -__attribute__((nonnull(1,2,3))) -int fprint_match(FILE *out, const char *file_start, bp_match_t *m, print_options_t *opts); +__attribute__((nonnull(1, 2, 3))) int fprint_match(FILE *out, const char *file_start, bp_match_t *m, + print_options_t *opts); -__attribute__((nonnull)) -void explain_match(bp_match_t *m); +__attribute__((nonnull)) void explain_match(bp_match_t *m); // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 @@ -2,284 +2,363 @@ // utf8.c - UTF8 helper functions // #include <ctype.h> -#include <stdint.h> #include <stdbool.h> +#include <stdint.h> #include <unistd.h> #include "utf8.h" #include "utils.h" -#define ARRAY_LEN(a) (sizeof(a)/sizeof((a)[0])) +#define ARRAY_LEN(a) (sizeof(a) / sizeof((a)[0])) #define likely(x) __builtin_expect((x), 1) #define unlikely(x) __builtin_expect((x), 0) static const uint32_t XID_Start[][2] = { - {0x0041,0x005A}, {0x0061,0x007A}, {0x00AA,0x00AA}, {0x00B5,0x00B5}, {0x00BA,0x00BA}, {0x00C0,0x00D6}, {0x00D8,0x00F6}, {0x00F8,0x01BA}, - {0x01BB,0x01BB}, {0x01BC,0x01BF}, {0x01C0,0x01C3}, {0x01C4,0x0293}, {0x0294,0x0294}, {0x0295,0x02AF}, {0x02B0,0x02C1}, {0x02C6,0x02D1}, - {0x02E0,0x02E4}, {0x02EC,0x02EC}, {0x02EE,0x02EE}, {0x0370,0x0373}, {0x0374,0x0374}, {0x0376,0x0377}, {0x037B,0x037D}, {0x037F,0x037F}, - {0x0386,0x0386}, {0x0388,0x038A}, {0x038C,0x038C}, {0x038E,0x03A1}, {0x03A3,0x03F5}, {0x03F7,0x0481}, {0x048A,0x052F}, {0x0531,0x0556}, - {0x0559,0x0559}, {0x0560,0x0588}, {0x05D0,0x05EA}, {0x05EF,0x05F2}, {0x0620,0x063F}, {0x0640,0x0640}, {0x0641,0x064A}, {0x066E,0x066F}, - {0x0671,0x06D3}, {0x06D5,0x06D5}, {0x06E5,0x06E6}, {0x06EE,0x06EF}, {0x06FA,0x06FC}, {0x06FF,0x06FF}, {0x0710,0x0710}, {0x0712,0x072F}, - {0x074D,0x07A5}, {0x07B1,0x07B1}, {0x07CA,0x07EA}, {0x07F4,0x07F5}, {0x07FA,0x07FA}, {0x0800,0x0815}, {0x081A,0x081A}, {0x0824,0x0824}, - {0x0828,0x0828}, {0x0840,0x0858}, {0x0860,0x086A}, {0x08A0,0x08B4}, {0x08B6,0x08C7}, {0x0904,0x0939}, {0x093D,0x093D}, {0x0950,0x0950}, - {0x0958,0x0961}, {0x0971,0x0971}, {0x0972,0x0980}, {0x0985,0x098C}, {0x098F,0x0990}, {0x0993,0x09A8}, {0x09AA,0x09B0}, {0x09B2,0x09B2}, - {0x09B6,0x09B9}, {0x09BD,0x09BD}, {0x09CE,0x09CE}, {0x09DC,0x09DD}, {0x09DF,0x09E1}, {0x09F0,0x09F1}, {0x09FC,0x09FC}, {0x0A05,0x0A0A}, - {0x0A0F,0x0A10}, {0x0A13,0x0A28}, {0x0A2A,0x0A30}, {0x0A32,0x0A33}, {0x0A35,0x0A36}, {0x0A38,0x0A39}, {0x0A59,0x0A5C}, {0x0A5E,0x0A5E}, - {0x0A72,0x0A74}, {0x0A85,0x0A8D}, {0x0A8F,0x0A91}, {0x0A93,0x0AA8}, {0x0AAA,0x0AB0}, {0x0AB2,0x0AB3}, {0x0AB5,0x0AB9}, {0x0ABD,0x0ABD}, - {0x0AD0,0x0AD0}, {0x0AE0,0x0AE1}, {0x0AF9,0x0AF9}, {0x0B05,0x0B0C}, {0x0B0F,0x0B10}, {0x0B13,0x0B28}, {0x0B2A,0x0B30}, {0x0B32,0x0B33}, - {0x0B35,0x0B39}, {0x0B3D,0x0B3D}, {0x0B5C,0x0B5D}, {0x0B5F,0x0B61}, {0x0B71,0x0B71}, {0x0B83,0x0B83}, {0x0B85,0x0B8A}, {0x0B8E,0x0B90}, - {0x0B92,0x0B95}, {0x0B99,0x0B9A}, {0x0B9C,0x0B9C}, {0x0B9E,0x0B9F}, {0x0BA3,0x0BA4}, {0x0BA8,0x0BAA}, {0x0BAE,0x0BB9}, {0x0BD0,0x0BD0}, - {0x0C05,0x0C0C}, {0x0C0E,0x0C10}, {0x0C12,0x0C28}, {0x0C2A,0x0C39}, {0x0C3D,0x0C3D}, {0x0C58,0x0C5A}, {0x0C60,0x0C61}, {0x0C80,0x0C80}, - {0x0C85,0x0C8C}, {0x0C8E,0x0C90}, {0x0C92,0x0CA8}, {0x0CAA,0x0CB3}, {0x0CB5,0x0CB9}, {0x0CBD,0x0CBD}, {0x0CDE,0x0CDE}, {0x0CE0,0x0CE1}, - {0x0CF1,0x0CF2}, {0x0D04,0x0D0C}, {0x0D0E,0x0D10}, {0x0D12,0x0D3A}, {0x0D3D,0x0D3D}, {0x0D4E,0x0D4E}, {0x0D54,0x0D56}, {0x0D5F,0x0D61}, - {0x0D7A,0x0D7F}, {0x0D85,0x0D96}, {0x0D9A,0x0DB1}, {0x0DB3,0x0DBB}, {0x0DBD,0x0DBD}, {0x0DC0,0x0DC6}, {0x0E01,0x0E30}, {0x0E32,0x0E32}, - {0x0E40,0x0E45}, {0x0E46,0x0E46}, {0x0E81,0x0E82}, {0x0E84,0x0E84}, {0x0E86,0x0E8A}, {0x0E8C,0x0EA3}, {0x0EA5,0x0EA5}, {0x0EA7,0x0EB0}, - {0x0EB2,0x0EB2}, {0x0EBD,0x0EBD}, {0x0EC0,0x0EC4}, {0x0EC6,0x0EC6}, {0x0EDC,0x0EDF}, {0x0F00,0x0F00}, {0x0F40,0x0F47}, {0x0F49,0x0F6C}, - {0x0F88,0x0F8C}, {0x1000,0x102A}, {0x103F,0x103F}, {0x1050,0x1055}, {0x105A,0x105D}, {0x1061,0x1061}, {0x1065,0x1066}, {0x106E,0x1070}, - {0x1075,0x1081}, {0x108E,0x108E}, {0x10A0,0x10C5}, {0x10C7,0x10C7}, {0x10CD,0x10CD}, {0x10D0,0x10FA}, {0x10FC,0x10FC}, {0x10FD,0x10FF}, - {0x1100,0x1248}, {0x124A,0x124D}, {0x1250,0x1256}, {0x1258,0x1258}, {0x125A,0x125D}, {0x1260,0x1288}, {0x128A,0x128D}, {0x1290,0x12B0}, - {0x12B2,0x12B5}, {0x12B8,0x12BE}, {0x12C0,0x12C0}, {0x12C2,0x12C5}, {0x12C8,0x12D6}, {0x12D8,0x1310}, {0x1312,0x1315}, {0x1318,0x135A}, - {0x1380,0x138F}, {0x13A0,0x13F5}, {0x13F8,0x13FD}, {0x1401,0x166C}, {0x166F,0x167F}, {0x1681,0x169A}, {0x16A0,0x16EA}, {0x16EE,0x16F0}, - {0x16F1,0x16F8}, {0x1700,0x170C}, {0x170E,0x1711}, {0x1720,0x1731}, {0x1740,0x1751}, {0x1760,0x176C}, {0x176E,0x1770}, {0x1780,0x17B3}, - {0x17D7,0x17D7}, {0x17DC,0x17DC}, {0x1820,0x1842}, {0x1843,0x1843}, {0x1844,0x1878}, {0x1880,0x1884}, {0x1885,0x1886}, {0x1887,0x18A8}, - {0x18AA,0x18AA}, {0x18B0,0x18F5}, {0x1900,0x191E}, {0x1950,0x196D}, {0x1970,0x1974}, {0x1980,0x19AB}, {0x19B0,0x19C9}, {0x1A00,0x1A16}, - {0x1A20,0x1A54}, {0x1AA7,0x1AA7}, {0x1B05,0x1B33}, {0x1B45,0x1B4B}, {0x1B83,0x1BA0}, {0x1BAE,0x1BAF}, {0x1BBA,0x1BE5}, {0x1C00,0x1C23}, - {0x1C4D,0x1C4F}, {0x1C5A,0x1C77}, {0x1C78,0x1C7D}, {0x1C80,0x1C88}, {0x1C90,0x1CBA}, {0x1CBD,0x1CBF}, {0x1CE9,0x1CEC}, {0x1CEE,0x1CF3}, - {0x1CF5,0x1CF6}, {0x1CFA,0x1CFA}, {0x1D00,0x1D2B}, {0x1D2C,0x1D6A}, {0x1D6B,0x1D77}, {0x1D78,0x1D78}, {0x1D79,0x1D9A}, {0x1D9B,0x1DBF}, - {0x1E00,0x1F15}, {0x1F18,0x1F1D}, {0x1F20,0x1F45}, {0x1F48,0x1F4D}, {0x1F50,0x1F57}, {0x1F59,0x1F59}, {0x1F5B,0x1F5B}, {0x1F5D,0x1F5D}, - {0x1F5F,0x1F7D}, {0x1F80,0x1FB4}, {0x1FB6,0x1FBC}, {0x1FBE,0x1FBE}, {0x1FC2,0x1FC4}, {0x1FC6,0x1FCC}, {0x1FD0,0x1FD3}, {0x1FD6,0x1FDB}, - {0x1FE0,0x1FEC}, {0x1FF2,0x1FF4}, {0x1FF6,0x1FFC}, {0x2071,0x2071}, {0x207F,0x207F}, {0x2090,0x209C}, {0x2102,0x2102}, {0x2107,0x2107}, - {0x210A,0x2113}, {0x2115,0x2115}, {0x2118,0x2118}, {0x2119,0x211D}, {0x2124,0x2124}, {0x2126,0x2126}, {0x2128,0x2128}, {0x212A,0x212D}, - {0x212E,0x212E}, {0x212F,0x2134}, {0x2135,0x2138}, {0x2139,0x2139}, {0x213C,0x213F}, {0x2145,0x2149}, {0x214E,0x214E}, {0x2160,0x2182}, - {0x2183,0x2184}, {0x2185,0x2188}, {0x2C00,0x2C2E}, {0x2C30,0x2C5E}, {0x2C60,0x2C7B}, {0x2C7C,0x2C7D}, {0x2C7E,0x2CE4}, {0x2CEB,0x2CEE}, - {0x2CF2,0x2CF3}, {0x2D00,0x2D25}, {0x2D27,0x2D27}, {0x2D2D,0x2D2D}, {0x2D30,0x2D67}, {0x2D6F,0x2D6F}, {0x2D80,0x2D96}, {0x2DA0,0x2DA6}, - {0x2DA8,0x2DAE}, {0x2DB0,0x2DB6}, {0x2DB8,0x2DBE}, {0x2DC0,0x2DC6}, {0x2DC8,0x2DCE}, {0x2DD0,0x2DD6}, {0x2DD8,0x2DDE}, {0x3005,0x3005}, - {0x3006,0x3006}, {0x3007,0x3007}, {0x3021,0x3029}, {0x3031,0x3035}, {0x3038,0x303A}, {0x303B,0x303B}, {0x303C,0x303C}, {0x3041,0x3096}, - {0x309D,0x309E}, {0x309F,0x309F}, {0x30A1,0x30FA}, {0x30FC,0x30FE}, {0x30FF,0x30FF}, {0x3105,0x312F}, {0x3131,0x318E}, {0x31A0,0x31BF}, - {0x31F0,0x31FF}, {0x3400,0x4DBF}, {0x4E00,0x9FFC}, {0xA000,0xA014}, {0xA015,0xA015}, {0xA016,0xA48C}, {0xA4D0,0xA4F7}, {0xA4F8,0xA4FD}, - {0xA500,0xA60B}, {0xA60C,0xA60C}, {0xA610,0xA61F}, {0xA62A,0xA62B}, {0xA640,0xA66D}, {0xA66E,0xA66E}, {0xA67F,0xA67F}, {0xA680,0xA69B}, - {0xA69C,0xA69D}, {0xA6A0,0xA6E5}, {0xA6E6,0xA6EF}, {0xA717,0xA71F}, {0xA722,0xA76F}, {0xA770,0xA770}, {0xA771,0xA787}, {0xA788,0xA788}, - {0xA78B,0xA78E}, {0xA78F,0xA78F}, {0xA790,0xA7BF}, {0xA7C2,0xA7CA}, {0xA7F5,0xA7F6}, {0xA7F7,0xA7F7}, {0xA7F8,0xA7F9}, {0xA7FA,0xA7FA}, - {0xA7FB,0xA801}, {0xA803,0xA805}, {0xA807,0xA80A}, {0xA80C,0xA822}, {0xA840,0xA873}, {0xA882,0xA8B3}, {0xA8F2,0xA8F7}, {0xA8FB,0xA8FB}, - {0xA8FD,0xA8FE}, {0xA90A,0xA925}, {0xA930,0xA946}, {0xA960,0xA97C}, {0xA984,0xA9B2}, {0xA9CF,0xA9CF}, {0xA9E0,0xA9E4}, {0xA9E6,0xA9E6}, - {0xA9E7,0xA9EF}, {0xA9FA,0xA9FE}, {0xAA00,0xAA28}, {0xAA40,0xAA42}, {0xAA44,0xAA4B}, {0xAA60,0xAA6F}, {0xAA70,0xAA70}, {0xAA71,0xAA76}, - {0xAA7A,0xAA7A}, {0xAA7E,0xAAAF}, {0xAAB1,0xAAB1}, {0xAAB5,0xAAB6}, {0xAAB9,0xAABD}, {0xAAC0,0xAAC0}, {0xAAC2,0xAAC2}, {0xAADB,0xAADC}, - {0xAADD,0xAADD}, {0xAAE0,0xAAEA}, {0xAAF2,0xAAF2}, {0xAAF3,0xAAF4}, {0xAB01,0xAB06}, {0xAB09,0xAB0E}, {0xAB11,0xAB16}, {0xAB20,0xAB26}, - {0xAB28,0xAB2E}, {0xAB30,0xAB5A}, {0xAB5C,0xAB5F}, {0xAB60,0xAB68}, {0xAB69,0xAB69}, {0xAB70,0xABBF}, {0xABC0,0xABE2}, {0xAC00,0xD7A3}, - {0xD7B0,0xD7C6}, {0xD7CB,0xD7FB}, {0xF900,0xFA6D}, {0xFA70,0xFAD9}, {0xFB00,0xFB06}, {0xFB13,0xFB17}, {0xFB1D,0xFB1D}, {0xFB1F,0xFB28}, - {0xFB2A,0xFB36}, {0xFB38,0xFB3C}, {0xFB3E,0xFB3E}, {0xFB40,0xFB41}, {0xFB43,0xFB44}, {0xFB46,0xFBB1}, {0xFBD3,0xFC5D}, {0xFC64,0xFD3D}, - {0xFD50,0xFD8F}, {0xFD92,0xFDC7}, {0xFDF0,0xFDF9}, {0xFE71,0xFE71}, {0xFE73,0xFE73}, {0xFE77,0xFE77}, {0xFE79,0xFE79}, {0xFE7B,0xFE7B}, - {0xFE7D,0xFE7D}, {0xFE7F,0xFEFC}, {0xFF21,0xFF3A}, {0xFF41,0xFF5A}, {0xFF66,0xFF6F}, {0xFF70,0xFF70}, {0xFF71,0xFF9D}, {0xFFA0,0xFFBE}, - {0xFFC2,0xFFC7}, {0xFFCA,0xFFCF}, {0xFFD2,0xFFD7}, {0xFFDA,0xFFDC}, {0x10000,0x1000B}, {0x1000D,0x10026}, {0x10028,0x1003A}, {0x1003C,0x1003D}, - {0x1003F,0x1004D}, {0x10050,0x1005D}, {0x10080,0x100FA}, {0x10140,0x10174}, {0x10280,0x1029C}, {0x102A0,0x102D0}, {0x10300,0x1031F}, {0x1032D,0x10340}, - {0x10341,0x10341}, {0x10342,0x10349}, {0x1034A,0x1034A}, {0x10350,0x10375}, {0x10380,0x1039D}, {0x103A0,0x103C3}, {0x103C8,0x103CF}, {0x103D1,0x103D5}, - {0x10400,0x1044F}, {0x10450,0x1049D}, {0x104B0,0x104D3}, {0x104D8,0x104FB}, {0x10500,0x10527}, {0x10530,0x10563}, {0x10600,0x10736}, {0x10740,0x10755}, - {0x10760,0x10767}, {0x10800,0x10805}, {0x10808,0x10808}, {0x1080A,0x10835}, {0x10837,0x10838}, {0x1083C,0x1083C}, {0x1083F,0x10855}, {0x10860,0x10876}, - {0x10880,0x1089E}, {0x108E0,0x108F2}, {0x108F4,0x108F5}, {0x10900,0x10915}, {0x10920,0x10939}, {0x10980,0x109B7}, {0x109BE,0x109BF}, {0x10A00,0x10A00}, - {0x10A10,0x10A13}, {0x10A15,0x10A17}, {0x10A19,0x10A35}, {0x10A60,0x10A7C}, {0x10A80,0x10A9C}, {0x10AC0,0x10AC7}, {0x10AC9,0x10AE4}, {0x10B00,0x10B35}, - {0x10B40,0x10B55}, {0x10B60,0x10B72}, {0x10B80,0x10B91}, {0x10C00,0x10C48}, {0x10C80,0x10CB2}, {0x10CC0,0x10CF2}, {0x10D00,0x10D23}, {0x10E80,0x10EA9}, - {0x10EB0,0x10EB1}, {0x10F00,0x10F1C}, {0x10F27,0x10F27}, {0x10F30,0x10F45}, {0x10FB0,0x10FC4}, {0x10FE0,0x10FF6}, {0x11003,0x11037}, {0x11083,0x110AF}, - {0x110D0,0x110E8}, {0x11103,0x11126}, {0x11144,0x11144}, {0x11147,0x11147}, {0x11150,0x11172}, {0x11176,0x11176}, {0x11183,0x111B2}, {0x111C1,0x111C4}, - {0x111DA,0x111DA}, {0x111DC,0x111DC}, {0x11200,0x11211}, {0x11213,0x1122B}, {0x11280,0x11286}, {0x11288,0x11288}, {0x1128A,0x1128D}, {0x1128F,0x1129D}, - {0x1129F,0x112A8}, {0x112B0,0x112DE}, {0x11305,0x1130C}, {0x1130F,0x11310}, {0x11313,0x11328}, {0x1132A,0x11330}, {0x11332,0x11333}, {0x11335,0x11339}, - {0x1133D,0x1133D}, {0x11350,0x11350}, {0x1135D,0x11361}, {0x11400,0x11434}, {0x11447,0x1144A}, {0x1145F,0x11461}, {0x11480,0x114AF}, {0x114C4,0x114C5}, - {0x114C7,0x114C7}, {0x11580,0x115AE}, {0x115D8,0x115DB}, {0x11600,0x1162F}, {0x11644,0x11644}, {0x11680,0x116AA}, {0x116B8,0x116B8}, {0x11700,0x1171A}, - {0x11800,0x1182B}, {0x118A0,0x118DF}, {0x118FF,0x11906}, {0x11909,0x11909}, {0x1190C,0x11913}, {0x11915,0x11916}, {0x11918,0x1192F}, {0x1193F,0x1193F}, - {0x11941,0x11941}, {0x119A0,0x119A7}, {0x119AA,0x119D0}, {0x119E1,0x119E1}, {0x119E3,0x119E3}, {0x11A00,0x11A00}, {0x11A0B,0x11A32}, {0x11A3A,0x11A3A}, - {0x11A50,0x11A50}, {0x11A5C,0x11A89}, {0x11A9D,0x11A9D}, {0x11AC0,0x11AF8}, {0x11C00,0x11C08}, {0x11C0A,0x11C2E}, {0x11C40,0x11C40}, {0x11C72,0x11C8F}, - {0x11D00,0x11D06}, {0x11D08,0x11D09}, {0x11D0B,0x11D30}, {0x11D46,0x11D46}, {0x11D60,0x11D65}, {0x11D67,0x11D68}, {0x11D6A,0x11D89}, {0x11D98,0x11D98}, - {0x11EE0,0x11EF2}, {0x11FB0,0x11FB0}, {0x12000,0x12399}, {0x12400,0x1246E}, {0x12480,0x12543}, {0x13000,0x1342E}, {0x14400,0x14646}, {0x16800,0x16A38}, - {0x16A40,0x16A5E}, {0x16AD0,0x16AED}, {0x16B00,0x16B2F}, {0x16B40,0x16B43}, {0x16B63,0x16B77}, {0x16B7D,0x16B8F}, {0x16E40,0x16E7F}, {0x16F00,0x16F4A}, - {0x16F50,0x16F50}, {0x16F93,0x16F9F}, {0x16FE0,0x16FE1}, {0x16FE3,0x16FE3}, {0x17000,0x187F7}, {0x18800,0x18CD5}, {0x18D00,0x18D08}, {0x1B000,0x1B11E}, - {0x1B150,0x1B152}, {0x1B164,0x1B167}, {0x1B170,0x1B2FB}, {0x1BC00,0x1BC6A}, {0x1BC70,0x1BC7C}, {0x1BC80,0x1BC88}, {0x1BC90,0x1BC99}, {0x1D400,0x1D454}, - {0x1D456,0x1D49C}, {0x1D49E,0x1D49F}, {0x1D4A2,0x1D4A2}, {0x1D4A5,0x1D4A6}, {0x1D4A9,0x1D4AC}, {0x1D4AE,0x1D4B9}, {0x1D4BB,0x1D4BB}, {0x1D4BD,0x1D4C3}, - {0x1D4C5,0x1D505}, {0x1D507,0x1D50A}, {0x1D50D,0x1D514}, {0x1D516,0x1D51C}, {0x1D51E,0x1D539}, {0x1D53B,0x1D53E}, {0x1D540,0x1D544}, {0x1D546,0x1D546}, - {0x1D54A,0x1D550}, {0x1D552,0x1D6A5}, {0x1D6A8,0x1D6C0}, {0x1D6C2,0x1D6DA}, {0x1D6DC,0x1D6FA}, {0x1D6FC,0x1D714}, {0x1D716,0x1D734}, {0x1D736,0x1D74E}, - {0x1D750,0x1D76E}, {0x1D770,0x1D788}, {0x1D78A,0x1D7A8}, {0x1D7AA,0x1D7C2}, {0x1D7C4,0x1D7CB}, {0x1E100,0x1E12C}, {0x1E137,0x1E13D}, {0x1E14E,0x1E14E}, - {0x1E2C0,0x1E2EB}, {0x1E800,0x1E8C4}, {0x1E900,0x1E943}, {0x1E94B,0x1E94B}, {0x1EE00,0x1EE03}, {0x1EE05,0x1EE1F}, {0x1EE21,0x1EE22}, {0x1EE24,0x1EE24}, - {0x1EE27,0x1EE27}, {0x1EE29,0x1EE32}, {0x1EE34,0x1EE37}, {0x1EE39,0x1EE39}, {0x1EE3B,0x1EE3B}, {0x1EE42,0x1EE42}, {0x1EE47,0x1EE47}, {0x1EE49,0x1EE49}, - {0x1EE4B,0x1EE4B}, {0x1EE4D,0x1EE4F}, {0x1EE51,0x1EE52}, {0x1EE54,0x1EE54}, {0x1EE57,0x1EE57}, {0x1EE59,0x1EE59}, {0x1EE5B,0x1EE5B}, {0x1EE5D,0x1EE5D}, - {0x1EE5F,0x1EE5F}, {0x1EE61,0x1EE62}, {0x1EE64,0x1EE64}, {0x1EE67,0x1EE6A}, {0x1EE6C,0x1EE72}, {0x1EE74,0x1EE77}, {0x1EE79,0x1EE7C}, {0x1EE7E,0x1EE7E}, - {0x1EE80,0x1EE89}, {0x1EE8B,0x1EE9B}, {0x1EEA1,0x1EEA3}, {0x1EEA5,0x1EEA9}, {0x1EEAB,0x1EEBB}, {0x20000,0x2A6DD}, {0x2A700,0x2B734}, {0x2B740,0x2B81D}, - {0x2B820,0x2CEA1}, {0x2CEB0,0x2EBE0}, {0x2F800,0x2FA1D}, {0x30000,0x3134A}, -}; + {0x0041, 0x005A}, {0x0061, 0x007A}, {0x00AA, 0x00AA}, {0x00B5, 0x00B5}, {0x00BA, 0x00BA}, + {0x00C0, 0x00D6}, {0x00D8, 0x00F6}, {0x00F8, 0x01BA}, {0x01BB, 0x01BB}, {0x01BC, 0x01BF}, + {0x01C0, 0x01C3}, {0x01C4, 0x0293}, {0x0294, 0x0294}, {0x0295, 0x02AF}, {0x02B0, 0x02C1}, + {0x02C6, 0x02D1}, {0x02E0, 0x02E4}, {0x02EC, 0x02EC}, {0x02EE, 0x02EE}, {0x0370, 0x0373}, + {0x0374, 0x0374}, {0x0376, 0x0377}, {0x037B, 0x037D}, {0x037F, 0x037F}, {0x0386, 0x0386}, + {0x0388, 0x038A}, {0x038C, 0x038C}, {0x038E, 0x03A1}, {0x03A3, 0x03F5}, {0x03F7, 0x0481}, + {0x048A, 0x052F}, {0x0531, 0x0556}, {0x0559, 0x0559}, {0x0560, 0x0588}, {0x05D0, 0x05EA}, + {0x05EF, 0x05F2}, {0x0620, 0x063F}, {0x0640, 0x0640}, {0x0641, 0x064A}, {0x066E, 0x066F}, + {0x0671, 0x06D3}, {0x06D5, 0x06D5}, {0x06E5, 0x06E6}, {0x06EE, 0x06EF}, {0x06FA, 0x06FC}, + {0x06FF, 0x06FF}, {0x0710, 0x0710}, {0x0712, 0x072F}, {0x074D, 0x07A5}, {0x07B1, 0x07B1}, + {0x07CA, 0x07EA}, {0x07F4, 0x07F5}, {0x07FA, 0x07FA}, {0x0800, 0x0815}, {0x081A, 0x081A}, + {0x0824, 0x0824}, {0x0828, 0x0828}, {0x0840, 0x0858}, {0x0860, 0x086A}, {0x08A0, 0x08B4}, + {0x08B6, 0x08C7}, {0x0904, 0x0939}, {0x093D, 0x093D}, {0x0950, 0x0950}, {0x0958, 0x0961}, + {0x0971, 0x0971}, {0x0972, 0x0980}, {0x0985, 0x098C}, {0x098F, 0x0990}, {0x0993, 0x09A8}, + {0x09AA, 0x09B0}, {0x09B2, 0x09B2}, {0x09B6, 0x09B9}, {0x09BD, 0x09BD}, {0x09CE, 0x09CE}, + {0x09DC, 0x09DD}, {0x09DF, 0x09E1}, {0x09F0, 0x09F1}, {0x09FC, 0x09FC}, {0x0A05, 0x0A0A}, + {0x0A0F, 0x0A10}, {0x0A13, 0x0A28}, {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, + {0x0A38, 0x0A39}, {0x0A59, 0x0A5C}, {0x0A5E, 0x0A5E}, {0x0A72, 0x0A74}, {0x0A85, 0x0A8D}, + {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8}, {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, + {0x0ABD, 0x0ABD}, {0x0AD0, 0x0AD0}, {0x0AE0, 0x0AE1}, {0x0AF9, 0x0AF9}, {0x0B05, 0x0B0C}, + {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33}, {0x0B35, 0x0B39}, + {0x0B3D, 0x0B3D}, {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, {0x0B71, 0x0B71}, {0x0B83, 0x0B83}, + {0x0B85, 0x0B8A}, {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9C, 0x0B9C}, + {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB9}, {0x0BD0, 0x0BD0}, + {0x0C05, 0x0C0C}, {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C39}, {0x0C3D, 0x0C3D}, + {0x0C58, 0x0C5A}, {0x0C60, 0x0C61}, {0x0C80, 0x0C80}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90}, + {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CBD, 0x0CBD}, {0x0CDE, 0x0CDE}, + {0x0CE0, 0x0CE1}, {0x0CF1, 0x0CF2}, {0x0D04, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D3A}, + {0x0D3D, 0x0D3D}, {0x0D4E, 0x0D4E}, {0x0D54, 0x0D56}, {0x0D5F, 0x0D61}, {0x0D7A, 0x0D7F}, + {0x0D85, 0x0D96}, {0x0D9A, 0x0DB1}, {0x0DB3, 0x0DBB}, {0x0DBD, 0x0DBD}, {0x0DC0, 0x0DC6}, + {0x0E01, 0x0E30}, {0x0E32, 0x0E32}, {0x0E40, 0x0E45}, {0x0E46, 0x0E46}, {0x0E81, 0x0E82}, + {0x0E84, 0x0E84}, {0x0E86, 0x0E8A}, {0x0E8C, 0x0EA3}, {0x0EA5, 0x0EA5}, {0x0EA7, 0x0EB0}, + {0x0EB2, 0x0EB2}, {0x0EBD, 0x0EBD}, {0x0EC0, 0x0EC4}, {0x0EC6, 0x0EC6}, {0x0EDC, 0x0EDF}, + {0x0F00, 0x0F00}, {0x0F40, 0x0F47}, {0x0F49, 0x0F6C}, {0x0F88, 0x0F8C}, {0x1000, 0x102A}, + {0x103F, 0x103F}, {0x1050, 0x1055}, {0x105A, 0x105D}, {0x1061, 0x1061}, {0x1065, 0x1066}, + {0x106E, 0x1070}, {0x1075, 0x1081}, {0x108E, 0x108E}, {0x10A0, 0x10C5}, {0x10C7, 0x10C7}, + {0x10CD, 0x10CD}, {0x10D0, 0x10FA}, {0x10FC, 0x10FC}, {0x10FD, 0x10FF}, {0x1100, 0x1248}, + {0x124A, 0x124D}, {0x1250, 0x1256}, {0x1258, 0x1258}, {0x125A, 0x125D}, {0x1260, 0x1288}, + {0x128A, 0x128D}, {0x1290, 0x12B0}, {0x12B2, 0x12B5}, {0x12B8, 0x12BE}, {0x12C0, 0x12C0}, + {0x12C2, 0x12C5}, {0x12C8, 0x12D6}, {0x12D8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135A}, + {0x1380, 0x138F}, {0x13A0, 0x13F5}, {0x13F8, 0x13FD}, {0x1401, 0x166C}, {0x166F, 0x167F}, + {0x1681, 0x169A}, {0x16A0, 0x16EA}, {0x16EE, 0x16F0}, {0x16F1, 0x16F8}, {0x1700, 0x170C}, + {0x170E, 0x1711}, {0x1720, 0x1731}, {0x1740, 0x1751}, {0x1760, 0x176C}, {0x176E, 0x1770}, + {0x1780, 0x17B3}, {0x17D7, 0x17D7}, {0x17DC, 0x17DC}, {0x1820, 0x1842}, {0x1843, 0x1843}, + {0x1844, 0x1878}, {0x1880, 0x1884}, {0x1885, 0x1886}, {0x1887, 0x18A8}, {0x18AA, 0x18AA}, + {0x18B0, 0x18F5}, {0x1900, 0x191E}, {0x1950, 0x196D}, {0x1970, 0x1974}, {0x1980, 0x19AB}, + {0x19B0, 0x19C9}, {0x1A00, 0x1A16}, {0x1A20, 0x1A54}, {0x1AA7, 0x1AA7}, {0x1B05, 0x1B33}, + {0x1B45, 0x1B4B}, {0x1B83, 0x1BA0}, {0x1BAE, 0x1BAF}, {0x1BBA, 0x1BE5}, {0x1C00, 0x1C23}, + {0x1C4D, 0x1C4F}, {0x1C5A, 0x1C77}, {0x1C78, 0x1C7D}, {0x1C80, 0x1C88}, {0x1C90, 0x1CBA}, + {0x1CBD, 0x1CBF}, {0x1CE9, 0x1CEC}, {0x1CEE, 0x1CF3}, {0x1CF5, 0x1CF6}, {0x1CFA, 0x1CFA}, + {0x1D00, 0x1D2B}, {0x1D2C, 0x1D6A}, {0x1D6B, 0x1D77}, {0x1D78, 0x1D78}, {0x1D79, 0x1D9A}, + {0x1D9B, 0x1DBF}, {0x1E00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D}, + {0x1F50, 0x1F57}, {0x1F59, 0x1F59}, {0x1F5B, 0x1F5B}, {0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D}, + {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC}, {0x1FBE, 0x1FBE}, {0x1FC2, 0x1FC4}, {0x1FC6, 0x1FCC}, + {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB}, {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, + {0x2071, 0x2071}, {0x207F, 0x207F}, {0x2090, 0x209C}, {0x2102, 0x2102}, {0x2107, 0x2107}, + {0x210A, 0x2113}, {0x2115, 0x2115}, {0x2118, 0x2118}, {0x2119, 0x211D}, {0x2124, 0x2124}, + {0x2126, 0x2126}, {0x2128, 0x2128}, {0x212A, 0x212D}, {0x212E, 0x212E}, {0x212F, 0x2134}, + {0x2135, 0x2138}, {0x2139, 0x2139}, {0x213C, 0x213F}, {0x2145, 0x2149}, {0x214E, 0x214E}, + {0x2160, 0x2182}, {0x2183, 0x2184}, {0x2185, 0x2188}, {0x2C00, 0x2C2E}, {0x2C30, 0x2C5E}, + {0x2C60, 0x2C7B}, {0x2C7C, 0x2C7D}, {0x2C7E, 0x2CE4}, {0x2CEB, 0x2CEE}, {0x2CF2, 0x2CF3}, + {0x2D00, 0x2D25}, {0x2D27, 0x2D27}, {0x2D2D, 0x2D2D}, {0x2D30, 0x2D67}, {0x2D6F, 0x2D6F}, + {0x2D80, 0x2D96}, {0x2DA0, 0x2DA6}, {0x2DA8, 0x2DAE}, {0x2DB0, 0x2DB6}, {0x2DB8, 0x2DBE}, + {0x2DC0, 0x2DC6}, {0x2DC8, 0x2DCE}, {0x2DD0, 0x2DD6}, {0x2DD8, 0x2DDE}, {0x3005, 0x3005}, + {0x3006, 0x3006}, {0x3007, 0x3007}, {0x3021, 0x3029}, {0x3031, 0x3035}, {0x3038, 0x303A}, + {0x303B, 0x303B}, {0x303C, 0x303C}, {0x3041, 0x3096}, {0x309D, 0x309E}, {0x309F, 0x309F}, + {0x30A1, 0x30FA}, {0x30FC, 0x30FE}, {0x30FF, 0x30FF}, {0x3105, 0x312F}, {0x3131, 0x318E}, + {0x31A0, 0x31BF}, {0x31F0, 0x31FF}, {0x3400, 0x4DBF}, {0x4E00, 0x9FFC}, {0xA000, 0xA014}, + {0xA015, 0xA015}, {0xA016, 0xA48C}, {0xA4D0, 0xA4F7}, {0xA4F8, 0xA4FD}, {0xA500, 0xA60B}, + {0xA60C, 0xA60C}, {0xA610, 0xA61F}, {0xA62A, 0xA62B}, {0xA640, 0xA66D}, {0xA66E, 0xA66E}, + {0xA67F, 0xA67F}, {0xA680, 0xA69B}, {0xA69C, 0xA69D}, {0xA6A0, 0xA6E5}, {0xA6E6, 0xA6EF}, + {0xA717, 0xA71F}, {0xA722, 0xA76F}, {0xA770, 0xA770}, {0xA771, 0xA787}, {0xA788, 0xA788}, + {0xA78B, 0xA78E}, {0xA78F, 0xA78F}, {0xA790, 0xA7BF}, {0xA7C2, 0xA7CA}, {0xA7F5, 0xA7F6}, + {0xA7F7, 0xA7F7}, {0xA7F8, 0xA7F9}, {0xA7FA, 0xA7FA}, {0xA7FB, 0xA801}, {0xA803, 0xA805}, + {0xA807, 0xA80A}, {0xA80C, 0xA822}, {0xA840, 0xA873}, {0xA882, 0xA8B3}, {0xA8F2, 0xA8F7}, + {0xA8FB, 0xA8FB}, {0xA8FD, 0xA8FE}, {0xA90A, 0xA925}, {0xA930, 0xA946}, {0xA960, 0xA97C}, + {0xA984, 0xA9B2}, {0xA9CF, 0xA9CF}, {0xA9E0, 0xA9E4}, {0xA9E6, 0xA9E6}, {0xA9E7, 0xA9EF}, + {0xA9FA, 0xA9FE}, {0xAA00, 0xAA28}, {0xAA40, 0xAA42}, {0xAA44, 0xAA4B}, {0xAA60, 0xAA6F}, + {0xAA70, 0xAA70}, {0xAA71, 0xAA76}, {0xAA7A, 0xAA7A}, {0xAA7E, 0xAAAF}, {0xAAB1, 0xAAB1}, + {0xAAB5, 0xAAB6}, {0xAAB9, 0xAABD}, {0xAAC0, 0xAAC0}, {0xAAC2, 0xAAC2}, {0xAADB, 0xAADC}, + {0xAADD, 0xAADD}, {0xAAE0, 0xAAEA}, {0xAAF2, 0xAAF2}, {0xAAF3, 0xAAF4}, {0xAB01, 0xAB06}, + {0xAB09, 0xAB0E}, {0xAB11, 0xAB16}, {0xAB20, 0xAB26}, {0xAB28, 0xAB2E}, {0xAB30, 0xAB5A}, + {0xAB5C, 0xAB5F}, {0xAB60, 0xAB68}, {0xAB69, 0xAB69}, {0xAB70, 0xABBF}, {0xABC0, 0xABE2}, + {0xAC00, 0xD7A3}, {0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB}, {0xF900, 0xFA6D}, {0xFA70, 0xFAD9}, + {0xFB00, 0xFB06}, {0xFB13, 0xFB17}, {0xFB1D, 0xFB1D}, {0xFB1F, 0xFB28}, {0xFB2A, 0xFB36}, + {0xFB38, 0xFB3C}, {0xFB3E, 0xFB3E}, {0xFB40, 0xFB41}, {0xFB43, 0xFB44}, {0xFB46, 0xFBB1}, + {0xFBD3, 0xFC5D}, {0xFC64, 0xFD3D}, {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7}, {0xFDF0, 0xFDF9}, + {0xFE71, 0xFE71}, {0xFE73, 0xFE73}, {0xFE77, 0xFE77}, {0xFE79, 0xFE79}, {0xFE7B, 0xFE7B}, + {0xFE7D, 0xFE7D}, {0xFE7F, 0xFEFC}, {0xFF21, 0xFF3A}, {0xFF41, 0xFF5A}, {0xFF66, 0xFF6F}, + {0xFF70, 0xFF70}, {0xFF71, 0xFF9D}, {0xFFA0, 0xFFBE}, {0xFFC2, 0xFFC7}, {0xFFCA, 0xFFCF}, + {0xFFD2, 0xFFD7}, {0xFFDA, 0xFFDC}, {0x10000, 0x1000B}, {0x1000D, 0x10026}, {0x10028, 0x1003A}, + {0x1003C, 0x1003D}, {0x1003F, 0x1004D}, {0x10050, 0x1005D}, {0x10080, 0x100FA}, {0x10140, 0x10174}, + {0x10280, 0x1029C}, {0x102A0, 0x102D0}, {0x10300, 0x1031F}, {0x1032D, 0x10340}, {0x10341, 0x10341}, + {0x10342, 0x10349}, {0x1034A, 0x1034A}, {0x10350, 0x10375}, {0x10380, 0x1039D}, {0x103A0, 0x103C3}, + {0x103C8, 0x103CF}, {0x103D1, 0x103D5}, {0x10400, 0x1044F}, {0x10450, 0x1049D}, {0x104B0, 0x104D3}, + {0x104D8, 0x104FB}, {0x10500, 0x10527}, {0x10530, 0x10563}, {0x10600, 0x10736}, {0x10740, 0x10755}, + {0x10760, 0x10767}, {0x10800, 0x10805}, {0x10808, 0x10808}, {0x1080A, 0x10835}, {0x10837, 0x10838}, + {0x1083C, 0x1083C}, {0x1083F, 0x10855}, {0x10860, 0x10876}, {0x10880, 0x1089E}, {0x108E0, 0x108F2}, + {0x108F4, 0x108F5}, {0x10900, 0x10915}, {0x10920, 0x10939}, {0x10980, 0x109B7}, {0x109BE, 0x109BF}, + {0x10A00, 0x10A00}, {0x10A10, 0x10A13}, {0x10A15, 0x10A17}, {0x10A19, 0x10A35}, {0x10A60, 0x10A7C}, + {0x10A80, 0x10A9C}, {0x10AC0, 0x10AC7}, {0x10AC9, 0x10AE4}, {0x10B00, 0x10B35}, {0x10B40, 0x10B55}, + {0x10B60, 0x10B72}, {0x10B80, 0x10B91}, {0x10C00, 0x10C48}, {0x10C80, 0x10CB2}, {0x10CC0, 0x10CF2}, + {0x10D00, 0x10D23}, {0x10E80, 0x10EA9}, {0x10EB0, 0x10EB1}, {0x10F00, 0x10F1C}, {0x10F27, 0x10F27}, + {0x10F30, 0x10F45}, {0x10FB0, 0x10FC4}, {0x10FE0, 0x10FF6}, {0x11003, 0x11037}, {0x11083, 0x110AF}, + {0x110D0, 0x110E8}, {0x11103, 0x11126}, {0x11144, 0x11144}, {0x11147, 0x11147}, {0x11150, 0x11172}, + {0x11176, 0x11176}, {0x11183, 0x111B2}, {0x111C1, 0x111C4}, {0x111DA, 0x111DA}, {0x111DC, 0x111DC}, + {0x11200, 0x11211}, {0x11213, 0x1122B}, {0x11280, 0x11286}, {0x11288, 0x11288}, {0x1128A, 0x1128D}, + {0x1128F, 0x1129D}, {0x1129F, 0x112A8}, {0x112B0, 0x112DE}, {0x11305, 0x1130C}, {0x1130F, 0x11310}, + {0x11313, 0x11328}, {0x1132A, 0x11330}, {0x11332, 0x11333}, {0x11335, 0x11339}, {0x1133D, 0x1133D}, + {0x11350, 0x11350}, {0x1135D, 0x11361}, {0x11400, 0x11434}, {0x11447, 0x1144A}, {0x1145F, 0x11461}, + {0x11480, 0x114AF}, {0x114C4, 0x114C5}, {0x114C7, 0x114C7}, {0x11580, 0x115AE}, {0x115D8, 0x115DB}, + {0x11600, 0x1162F}, {0x11644, 0x11644}, {0x11680, 0x116AA}, {0x116B8, 0x116B8}, {0x11700, 0x1171A}, + {0x11800, 0x1182B}, {0x118A0, 0x118DF}, {0x118FF, 0x11906}, {0x11909, 0x11909}, {0x1190C, 0x11913}, + {0x11915, 0x11916}, {0x11918, 0x1192F}, {0x1193F, 0x1193F}, {0x11941, 0x11941}, {0x119A0, 0x119A7}, + {0x119AA, 0x119D0}, {0x119E1, 0x119E1}, {0x119E3, 0x119E3}, {0x11A00, 0x11A00}, {0x11A0B, 0x11A32}, + {0x11A3A, 0x11A3A}, {0x11A50, 0x11A50}, {0x11A5C, 0x11A89}, {0x11A9D, 0x11A9D}, {0x11AC0, 0x11AF8}, + {0x11C00, 0x11C08}, {0x11C0A, 0x11C2E}, {0x11C40, 0x11C40}, {0x11C72, 0x11C8F}, {0x11D00, 0x11D06}, + {0x11D08, 0x11D09}, {0x11D0B, 0x11D30}, {0x11D46, 0x11D46}, {0x11D60, 0x11D65}, {0x11D67, 0x11D68}, + {0x11D6A, 0x11D89}, {0x11D98, 0x11D98}, {0x11EE0, 0x11EF2}, {0x11FB0, 0x11FB0}, {0x12000, 0x12399}, + {0x12400, 0x1246E}, {0x12480, 0x12543}, {0x13000, 0x1342E}, {0x14400, 0x14646}, {0x16800, 0x16A38}, + {0x16A40, 0x16A5E}, {0x16AD0, 0x16AED}, {0x16B00, 0x16B2F}, {0x16B40, 0x16B43}, {0x16B63, 0x16B77}, + {0x16B7D, 0x16B8F}, {0x16E40, 0x16E7F}, {0x16F00, 0x16F4A}, {0x16F50, 0x16F50}, {0x16F93, 0x16F9F}, + {0x16FE0, 0x16FE1}, {0x16FE3, 0x16FE3}, {0x17000, 0x187F7}, {0x18800, 0x18CD5}, {0x18D00, 0x18D08}, + {0x1B000, 0x1B11E}, {0x1B150, 0x1B152}, {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, {0x1BC00, 0x1BC6A}, + {0x1BC70, 0x1BC7C}, {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99}, {0x1D400, 0x1D454}, {0x1D456, 0x1D49C}, + {0x1D49E, 0x1D49F}, {0x1D4A2, 0x1D4A2}, {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC}, {0x1D4AE, 0x1D4B9}, + {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3}, {0x1D4C5, 0x1D505}, {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514}, + {0x1D516, 0x1D51C}, {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E}, {0x1D540, 0x1D544}, {0x1D546, 0x1D546}, + {0x1D54A, 0x1D550}, {0x1D552, 0x1D6A5}, {0x1D6A8, 0x1D6C0}, {0x1D6C2, 0x1D6DA}, {0x1D6DC, 0x1D6FA}, + {0x1D6FC, 0x1D714}, {0x1D716, 0x1D734}, {0x1D736, 0x1D74E}, {0x1D750, 0x1D76E}, {0x1D770, 0x1D788}, + {0x1D78A, 0x1D7A8}, {0x1D7AA, 0x1D7C2}, {0x1D7C4, 0x1D7CB}, {0x1E100, 0x1E12C}, {0x1E137, 0x1E13D}, + {0x1E14E, 0x1E14E}, {0x1E2C0, 0x1E2EB}, {0x1E800, 0x1E8C4}, {0x1E900, 0x1E943}, {0x1E94B, 0x1E94B}, + {0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F}, {0x1EE21, 0x1EE22}, {0x1EE24, 0x1EE24}, {0x1EE27, 0x1EE27}, + {0x1EE29, 0x1EE32}, {0x1EE34, 0x1EE37}, {0x1EE39, 0x1EE39}, {0x1EE3B, 0x1EE3B}, {0x1EE42, 0x1EE42}, + {0x1EE47, 0x1EE47}, {0x1EE49, 0x1EE49}, {0x1EE4B, 0x1EE4B}, {0x1EE4D, 0x1EE4F}, {0x1EE51, 0x1EE52}, + {0x1EE54, 0x1EE54}, {0x1EE57, 0x1EE57}, {0x1EE59, 0x1EE59}, {0x1EE5B, 0x1EE5B}, {0x1EE5D, 0x1EE5D}, + {0x1EE5F, 0x1EE5F}, {0x1EE61, 0x1EE62}, {0x1EE64, 0x1EE64}, {0x1EE67, 0x1EE6A}, {0x1EE6C, 0x1EE72}, + {0x1EE74, 0x1EE77}, {0x1EE79, 0x1EE7C}, {0x1EE7E, 0x1EE7E}, {0x1EE80, 0x1EE89}, {0x1EE8B, 0x1EE9B}, + {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9}, {0x1EEAB, 0x1EEBB}, {0x20000, 0x2A6DD}, {0x2A700, 0x2B734}, + {0x2B740, 0x2B81D}, {0x2B820, 0x2CEA1}, {0x2CEB0, 0x2EBE0}, {0x2F800, 0x2FA1D}, {0x30000, 0x3134A}, +}; static const uint32_t XID_Continue_only[][2] = { - {0x0030,0x0039}, {0x005F,0x005F}, {0x00B7,0x00B7}, {0x0300,0x036F}, {0x0387,0x0387}, {0x0483,0x0487}, {0x0591,0x05BD}, {0x05BF,0x05BF}, - {0x05C1,0x05C2}, {0x05C4,0x05C5}, {0x05C7,0x05C7}, {0x0610,0x061A}, {0x064B,0x065F}, {0x0660,0x0669}, {0x0670,0x0670}, {0x06D6,0x06DC}, - {0x06DF,0x06E4}, {0x06E7,0x06E8}, {0x06EA,0x06ED}, {0x06F0,0x06F9}, {0x0711,0x0711}, {0x0730,0x074A}, {0x07A6,0x07B0}, {0x07C0,0x07C9}, - {0x07EB,0x07F3}, {0x07FD,0x07FD}, {0x0816,0x0819}, {0x081B,0x0823}, {0x0825,0x0827}, {0x0829,0x082D}, {0x0859,0x085B}, {0x08D3,0x08E1}, - {0x08E3,0x0902}, {0x0903,0x0903}, {0x093A,0x093A}, {0x093B,0x093B}, {0x093C,0x093C}, {0x093E,0x0940}, {0x0941,0x0948}, {0x0949,0x094C}, - {0x094D,0x094D}, {0x094E,0x094F}, {0x0951,0x0957}, {0x0962,0x0963}, {0x0966,0x096F}, {0x0981,0x0981}, {0x0982,0x0983}, {0x09BC,0x09BC}, - {0x09BE,0x09C0}, {0x09C1,0x09C4}, {0x09C7,0x09C8}, {0x09CB,0x09CC}, {0x09CD,0x09CD}, {0x09D7,0x09D7}, {0x09E2,0x09E3}, {0x09E6,0x09EF}, - {0x09FE,0x09FE}, {0x0A01,0x0A02}, {0x0A03,0x0A03}, {0x0A3C,0x0A3C}, {0x0A3E,0x0A40}, {0x0A41,0x0A42}, {0x0A47,0x0A48}, {0x0A4B,0x0A4D}, - {0x0A51,0x0A51}, {0x0A66,0x0A6F}, {0x0A70,0x0A71}, {0x0A75,0x0A75}, {0x0A81,0x0A82}, {0x0A83,0x0A83}, {0x0ABC,0x0ABC}, {0x0ABE,0x0AC0}, - {0x0AC1,0x0AC5}, {0x0AC7,0x0AC8}, {0x0AC9,0x0AC9}, {0x0ACB,0x0ACC}, {0x0ACD,0x0ACD}, {0x0AE2,0x0AE3}, {0x0AE6,0x0AEF}, {0x0AFA,0x0AFF}, - {0x0B01,0x0B01}, {0x0B02,0x0B03}, {0x0B3C,0x0B3C}, {0x0B3E,0x0B3E}, {0x0B3F,0x0B3F}, {0x0B40,0x0B40}, {0x0B41,0x0B44}, {0x0B47,0x0B48}, - {0x0B4B,0x0B4C}, {0x0B4D,0x0B4D}, {0x0B55,0x0B56}, {0x0B57,0x0B57}, {0x0B62,0x0B63}, {0x0B66,0x0B6F}, {0x0B82,0x0B82}, {0x0BBE,0x0BBF}, - {0x0BC0,0x0BC0}, {0x0BC1,0x0BC2}, {0x0BC6,0x0BC8}, {0x0BCA,0x0BCC}, {0x0BCD,0x0BCD}, {0x0BD7,0x0BD7}, {0x0BE6,0x0BEF}, {0x0C00,0x0C00}, - {0x0C01,0x0C03}, {0x0C04,0x0C04}, {0x0C3E,0x0C40}, {0x0C41,0x0C44}, {0x0C46,0x0C48}, {0x0C4A,0x0C4D}, {0x0C55,0x0C56}, {0x0C62,0x0C63}, - {0x0C66,0x0C6F}, {0x0C81,0x0C81}, {0x0C82,0x0C83}, {0x0CBC,0x0CBC}, {0x0CBE,0x0CBE}, {0x0CBF,0x0CBF}, {0x0CC0,0x0CC4}, {0x0CC6,0x0CC6}, - {0x0CC7,0x0CC8}, {0x0CCA,0x0CCB}, {0x0CCC,0x0CCD}, {0x0CD5,0x0CD6}, {0x0CE2,0x0CE3}, {0x0CE6,0x0CEF}, {0x0D00,0x0D01}, {0x0D02,0x0D03}, - {0x0D3B,0x0D3C}, {0x0D3E,0x0D40}, {0x0D41,0x0D44}, {0x0D46,0x0D48}, {0x0D4A,0x0D4C}, {0x0D4D,0x0D4D}, {0x0D57,0x0D57}, {0x0D62,0x0D63}, - {0x0D66,0x0D6F}, {0x0D81,0x0D81}, {0x0D82,0x0D83}, {0x0DCA,0x0DCA}, {0x0DCF,0x0DD1}, {0x0DD2,0x0DD4}, {0x0DD6,0x0DD6}, {0x0DD8,0x0DDF}, - {0x0DE6,0x0DEF}, {0x0DF2,0x0DF3}, {0x0E32,0x0E33}, {0x0E34,0x0E3A}, {0x0E47,0x0E4E}, {0x0E50,0x0E59}, {0x0EB2,0x0EB3}, {0x0EB4,0x0EBC}, - {0x0EC8,0x0ECD}, {0x0ED0,0x0ED9}, {0x0F18,0x0F19}, {0x0F20,0x0F29}, {0x0F35,0x0F35}, {0x0F37,0x0F37}, {0x0F39,0x0F39}, {0x0F3E,0x0F3F}, - {0x0F71,0x0F7E}, {0x0F7F,0x0F7F}, {0x0F80,0x0F84}, {0x0F86,0x0F87}, {0x0F8D,0x0F97}, {0x0F99,0x0FBC}, {0x0FC6,0x0FC6}, {0x102B,0x102C}, - {0x102D,0x1030}, {0x1031,0x1031}, {0x1032,0x1037}, {0x1038,0x1038}, {0x1039,0x103A}, {0x103B,0x103C}, {0x103D,0x103E}, {0x1040,0x1049}, - {0x1056,0x1057}, {0x1058,0x1059}, {0x105E,0x1060}, {0x1062,0x1064}, {0x1067,0x106D}, {0x1071,0x1074}, {0x1082,0x1082}, {0x1083,0x1084}, - {0x1085,0x1086}, {0x1087,0x108C}, {0x108D,0x108D}, {0x108F,0x108F}, {0x1090,0x1099}, {0x109A,0x109C}, {0x109D,0x109D}, {0x135D,0x135F}, - {0x1369,0x1371}, {0x1712,0x1714}, {0x1732,0x1734}, {0x1752,0x1753}, {0x1772,0x1773}, {0x17B4,0x17B5}, {0x17B6,0x17B6}, {0x17B7,0x17BD}, - {0x17BE,0x17C5}, {0x17C6,0x17C6}, {0x17C7,0x17C8}, {0x17C9,0x17D3}, {0x17DD,0x17DD}, {0x17E0,0x17E9}, {0x180B,0x180D}, {0x1810,0x1819}, - {0x18A9,0x18A9}, {0x1920,0x1922}, {0x1923,0x1926}, {0x1927,0x1928}, {0x1929,0x192B}, {0x1930,0x1931}, {0x1932,0x1932}, {0x1933,0x1938}, - {0x1939,0x193B}, {0x1946,0x194F}, {0x19D0,0x19D9}, {0x19DA,0x19DA}, {0x1A17,0x1A18}, {0x1A19,0x1A1A}, {0x1A1B,0x1A1B}, {0x1A55,0x1A55}, - {0x1A56,0x1A56}, {0x1A57,0x1A57}, {0x1A58,0x1A5E}, {0x1A60,0x1A60}, {0x1A61,0x1A61}, {0x1A62,0x1A62}, {0x1A63,0x1A64}, {0x1A65,0x1A6C}, - {0x1A6D,0x1A72}, {0x1A73,0x1A7C}, {0x1A7F,0x1A7F}, {0x1A80,0x1A89}, {0x1A90,0x1A99}, {0x1AB0,0x1ABD}, {0x1ABF,0x1AC0}, {0x1B00,0x1B03}, - {0x1B04,0x1B04}, {0x1B34,0x1B34}, {0x1B35,0x1B35}, {0x1B36,0x1B3A}, {0x1B3B,0x1B3B}, {0x1B3C,0x1B3C}, {0x1B3D,0x1B41}, {0x1B42,0x1B42}, - {0x1B43,0x1B44}, {0x1B50,0x1B59}, {0x1B6B,0x1B73}, {0x1B80,0x1B81}, {0x1B82,0x1B82}, {0x1BA1,0x1BA1}, {0x1BA2,0x1BA5}, {0x1BA6,0x1BA7}, - {0x1BA8,0x1BA9}, {0x1BAA,0x1BAA}, {0x1BAB,0x1BAD}, {0x1BB0,0x1BB9}, {0x1BE6,0x1BE6}, {0x1BE7,0x1BE7}, {0x1BE8,0x1BE9}, {0x1BEA,0x1BEC}, - {0x1BED,0x1BED}, {0x1BEE,0x1BEE}, {0x1BEF,0x1BF1}, {0x1BF2,0x1BF3}, {0x1C24,0x1C2B}, {0x1C2C,0x1C33}, {0x1C34,0x1C35}, {0x1C36,0x1C37}, - {0x1C40,0x1C49}, {0x1C50,0x1C59}, {0x1CD0,0x1CD2}, {0x1CD4,0x1CE0}, {0x1CE1,0x1CE1}, {0x1CE2,0x1CE8}, {0x1CED,0x1CED}, {0x1CF4,0x1CF4}, - {0x1CF7,0x1CF7}, {0x1CF8,0x1CF9}, {0x1DC0,0x1DF9}, {0x1DFB,0x1DFF}, {0x203F,0x2040}, {0x2054,0x2054}, {0x20D0,0x20DC}, {0x20E1,0x20E1}, - {0x20E5,0x20F0}, {0x2CEF,0x2CF1}, {0x2D7F,0x2D7F}, {0x2DE0,0x2DFF}, {0x302A,0x302D}, {0x302E,0x302F}, {0x3099,0x309A}, {0xA620,0xA629}, - {0xA66F,0xA66F}, {0xA674,0xA67D}, {0xA69E,0xA69F}, {0xA6F0,0xA6F1}, {0xA802,0xA802}, {0xA806,0xA806}, {0xA80B,0xA80B}, {0xA823,0xA824}, - {0xA825,0xA826}, {0xA827,0xA827}, {0xA82C,0xA82C}, {0xA880,0xA881}, {0xA8B4,0xA8C3}, {0xA8C4,0xA8C5}, {0xA8D0,0xA8D9}, {0xA8E0,0xA8F1}, - {0xA8FF,0xA8FF}, {0xA900,0xA909}, {0xA926,0xA92D}, {0xA947,0xA951}, {0xA952,0xA953}, {0xA980,0xA982}, {0xA983,0xA983}, {0xA9B3,0xA9B3}, - {0xA9B4,0xA9B5}, {0xA9B6,0xA9B9}, {0xA9BA,0xA9BB}, {0xA9BC,0xA9BD}, {0xA9BE,0xA9C0}, {0xA9D0,0xA9D9}, {0xA9E5,0xA9E5}, {0xA9F0,0xA9F9}, - {0xAA29,0xAA2E}, {0xAA2F,0xAA30}, {0xAA31,0xAA32}, {0xAA33,0xAA34}, {0xAA35,0xAA36}, {0xAA43,0xAA43}, {0xAA4C,0xAA4C}, {0xAA4D,0xAA4D}, - {0xAA50,0xAA59}, {0xAA7B,0xAA7B}, {0xAA7C,0xAA7C}, {0xAA7D,0xAA7D}, {0xAAB0,0xAAB0}, {0xAAB2,0xAAB4}, {0xAAB7,0xAAB8}, {0xAABE,0xAABF}, - {0xAAC1,0xAAC1}, {0xAAEB,0xAAEB}, {0xAAEC,0xAAED}, {0xAAEE,0xAAEF}, {0xAAF5,0xAAF5}, {0xAAF6,0xAAF6}, {0xABE3,0xABE4}, {0xABE5,0xABE5}, - {0xABE6,0xABE7}, {0xABE8,0xABE8}, {0xABE9,0xABEA}, {0xABEC,0xABEC}, {0xABED,0xABED}, {0xABF0,0xABF9}, {0xFB1E,0xFB1E}, {0xFE00,0xFE0F}, - {0xFE20,0xFE2F}, {0xFE33,0xFE34}, {0xFE4D,0xFE4F}, {0xFF10,0xFF19}, {0xFF3F,0xFF3F}, {0xFF9E,0xFF9F}, {0x101FD,0x101FD}, {0x102E0,0x102E0}, - {0x10376,0x1037A}, {0x104A0,0x104A9}, {0x10A01,0x10A03}, {0x10A05,0x10A06}, {0x10A0C,0x10A0F}, {0x10A38,0x10A3A}, {0x10A3F,0x10A3F}, {0x10AE5,0x10AE6}, - {0x10D24,0x10D27}, {0x10D30,0x10D39}, {0x10EAB,0x10EAC}, {0x10F46,0x10F50}, {0x11000,0x11000}, {0x11001,0x11001}, {0x11002,0x11002}, {0x11038,0x11046}, - {0x11066,0x1106F}, {0x1107F,0x11081}, {0x11082,0x11082}, {0x110B0,0x110B2}, {0x110B3,0x110B6}, {0x110B7,0x110B8}, {0x110B9,0x110BA}, {0x110F0,0x110F9}, - {0x11100,0x11102}, {0x11127,0x1112B}, {0x1112C,0x1112C}, {0x1112D,0x11134}, {0x11136,0x1113F}, {0x11145,0x11146}, {0x11173,0x11173}, {0x11180,0x11181}, - {0x11182,0x11182}, {0x111B3,0x111B5}, {0x111B6,0x111BE}, {0x111BF,0x111C0}, {0x111C9,0x111CC}, {0x111CE,0x111CE}, {0x111CF,0x111CF}, {0x111D0,0x111D9}, - {0x1122C,0x1122E}, {0x1122F,0x11231}, {0x11232,0x11233}, {0x11234,0x11234}, {0x11235,0x11235}, {0x11236,0x11237}, {0x1123E,0x1123E}, {0x112DF,0x112DF}, - {0x112E0,0x112E2}, {0x112E3,0x112EA}, {0x112F0,0x112F9}, {0x11300,0x11301}, {0x11302,0x11303}, {0x1133B,0x1133C}, {0x1133E,0x1133F}, {0x11340,0x11340}, - {0x11341,0x11344}, {0x11347,0x11348}, {0x1134B,0x1134D}, {0x11357,0x11357}, {0x11362,0x11363}, {0x11366,0x1136C}, {0x11370,0x11374}, {0x11435,0x11437}, - {0x11438,0x1143F}, {0x11440,0x11441}, {0x11442,0x11444}, {0x11445,0x11445}, {0x11446,0x11446}, {0x11450,0x11459}, {0x1145E,0x1145E}, {0x114B0,0x114B2}, - {0x114B3,0x114B8}, {0x114B9,0x114B9}, {0x114BA,0x114BA}, {0x114BB,0x114BE}, {0x114BF,0x114C0}, {0x114C1,0x114C1}, {0x114C2,0x114C3}, {0x114D0,0x114D9}, - {0x115AF,0x115B1}, {0x115B2,0x115B5}, {0x115B8,0x115BB}, {0x115BC,0x115BD}, {0x115BE,0x115BE}, {0x115BF,0x115C0}, {0x115DC,0x115DD}, {0x11630,0x11632}, - {0x11633,0x1163A}, {0x1163B,0x1163C}, {0x1163D,0x1163D}, {0x1163E,0x1163E}, {0x1163F,0x11640}, {0x11650,0x11659}, {0x116AB,0x116AB}, {0x116AC,0x116AC}, - {0x116AD,0x116AD}, {0x116AE,0x116AF}, {0x116B0,0x116B5}, {0x116B6,0x116B6}, {0x116B7,0x116B7}, {0x116C0,0x116C9}, {0x1171D,0x1171F}, {0x11720,0x11721}, - {0x11722,0x11725}, {0x11726,0x11726}, {0x11727,0x1172B}, {0x11730,0x11739}, {0x1182C,0x1182E}, {0x1182F,0x11837}, {0x11838,0x11838}, {0x11839,0x1183A}, - {0x118E0,0x118E9}, {0x11930,0x11935}, {0x11937,0x11938}, {0x1193B,0x1193C}, {0x1193D,0x1193D}, {0x1193E,0x1193E}, {0x11940,0x11940}, {0x11942,0x11942}, - {0x11943,0x11943}, {0x11950,0x11959}, {0x119D1,0x119D3}, {0x119D4,0x119D7}, {0x119DA,0x119DB}, {0x119DC,0x119DF}, {0x119E0,0x119E0}, {0x119E4,0x119E4}, - {0x11A01,0x11A0A}, {0x11A33,0x11A38}, {0x11A39,0x11A39}, {0x11A3B,0x11A3E}, {0x11A47,0x11A47}, {0x11A51,0x11A56}, {0x11A57,0x11A58}, {0x11A59,0x11A5B}, - {0x11A8A,0x11A96}, {0x11A97,0x11A97}, {0x11A98,0x11A99}, {0x11C2F,0x11C2F}, {0x11C30,0x11C36}, {0x11C38,0x11C3D}, {0x11C3E,0x11C3E}, {0x11C3F,0x11C3F}, - {0x11C50,0x11C59}, {0x11C92,0x11CA7}, {0x11CA9,0x11CA9}, {0x11CAA,0x11CB0}, {0x11CB1,0x11CB1}, {0x11CB2,0x11CB3}, {0x11CB4,0x11CB4}, {0x11CB5,0x11CB6}, - {0x11D31,0x11D36}, {0x11D3A,0x11D3A}, {0x11D3C,0x11D3D}, {0x11D3F,0x11D45}, {0x11D47,0x11D47}, {0x11D50,0x11D59}, {0x11D8A,0x11D8E}, {0x11D90,0x11D91}, - {0x11D93,0x11D94}, {0x11D95,0x11D95}, {0x11D96,0x11D96}, {0x11D97,0x11D97}, {0x11DA0,0x11DA9}, {0x11EF3,0x11EF4}, {0x11EF5,0x11EF6}, {0x16A60,0x16A69}, - {0x16AF0,0x16AF4}, {0x16B30,0x16B36}, {0x16B50,0x16B59}, {0x16F4F,0x16F4F}, {0x16F51,0x16F87}, {0x16F8F,0x16F92}, {0x16FE4,0x16FE4}, {0x16FF0,0x16FF1}, - {0x1BC9D,0x1BC9E}, {0x1D165,0x1D166}, {0x1D167,0x1D169}, {0x1D16D,0x1D172}, {0x1D17B,0x1D182}, {0x1D185,0x1D18B}, {0x1D1AA,0x1D1AD}, {0x1D242,0x1D244}, - {0x1D7CE,0x1D7FF}, {0x1DA00,0x1DA36}, {0x1DA3B,0x1DA6C}, {0x1DA75,0x1DA75}, {0x1DA84,0x1DA84}, {0x1DA9B,0x1DA9F}, {0x1DAA1,0x1DAAF}, {0x1E000,0x1E006}, - {0x1E008,0x1E018}, {0x1E01B,0x1E021}, {0x1E023,0x1E024}, {0x1E026,0x1E02A}, {0x1E130,0x1E136}, {0x1E140,0x1E149}, {0x1E2EC,0x1E2EF}, {0x1E2F0,0x1E2F9}, - {0x1E8D0,0x1E8D6}, {0x1E944,0x1E94A}, {0x1E950,0x1E959}, {0x1FBF0,0x1FBF9}, {0xE0100,0xE01EF}, -}; + {0x0030, 0x0039}, {0x005F, 0x005F}, {0x00B7, 0x00B7}, {0x0300, 0x036F}, {0x0387, 0x0387}, + {0x0483, 0x0487}, {0x0591, 0x05BD}, {0x05BF, 0x05BF}, {0x05C1, 0x05C2}, {0x05C4, 0x05C5}, + {0x05C7, 0x05C7}, {0x0610, 0x061A}, {0x064B, 0x065F}, {0x0660, 0x0669}, {0x0670, 0x0670}, + {0x06D6, 0x06DC}, {0x06DF, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED}, {0x06F0, 0x06F9}, + {0x0711, 0x0711}, {0x0730, 0x074A}, {0x07A6, 0x07B0}, {0x07C0, 0x07C9}, {0x07EB, 0x07F3}, + {0x07FD, 0x07FD}, {0x0816, 0x0819}, {0x081B, 0x0823}, {0x0825, 0x0827}, {0x0829, 0x082D}, + {0x0859, 0x085B}, {0x08D3, 0x08E1}, {0x08E3, 0x0902}, {0x0903, 0x0903}, {0x093A, 0x093A}, + {0x093B, 0x093B}, {0x093C, 0x093C}, {0x093E, 0x0940}, {0x0941, 0x0948}, {0x0949, 0x094C}, + {0x094D, 0x094D}, {0x094E, 0x094F}, {0x0951, 0x0957}, {0x0962, 0x0963}, {0x0966, 0x096F}, + {0x0981, 0x0981}, {0x0982, 0x0983}, {0x09BC, 0x09BC}, {0x09BE, 0x09C0}, {0x09C1, 0x09C4}, + {0x09C7, 0x09C8}, {0x09CB, 0x09CC}, {0x09CD, 0x09CD}, {0x09D7, 0x09D7}, {0x09E2, 0x09E3}, + {0x09E6, 0x09EF}, {0x09FE, 0x09FE}, {0x0A01, 0x0A02}, {0x0A03, 0x0A03}, {0x0A3C, 0x0A3C}, + {0x0A3E, 0x0A40}, {0x0A41, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A51, 0x0A51}, + {0x0A66, 0x0A6F}, {0x0A70, 0x0A71}, {0x0A75, 0x0A75}, {0x0A81, 0x0A82}, {0x0A83, 0x0A83}, + {0x0ABC, 0x0ABC}, {0x0ABE, 0x0AC0}, {0x0AC1, 0x0AC5}, {0x0AC7, 0x0AC8}, {0x0AC9, 0x0AC9}, + {0x0ACB, 0x0ACC}, {0x0ACD, 0x0ACD}, {0x0AE2, 0x0AE3}, {0x0AE6, 0x0AEF}, {0x0AFA, 0x0AFF}, + {0x0B01, 0x0B01}, {0x0B02, 0x0B03}, {0x0B3C, 0x0B3C}, {0x0B3E, 0x0B3E}, {0x0B3F, 0x0B3F}, + {0x0B40, 0x0B40}, {0x0B41, 0x0B44}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4C}, {0x0B4D, 0x0B4D}, + {0x0B55, 0x0B56}, {0x0B57, 0x0B57}, {0x0B62, 0x0B63}, {0x0B66, 0x0B6F}, {0x0B82, 0x0B82}, + {0x0BBE, 0x0BBF}, {0x0BC0, 0x0BC0}, {0x0BC1, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCC}, + {0x0BCD, 0x0BCD}, {0x0BD7, 0x0BD7}, {0x0BE6, 0x0BEF}, {0x0C00, 0x0C00}, {0x0C01, 0x0C03}, + {0x0C04, 0x0C04}, {0x0C3E, 0x0C40}, {0x0C41, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D}, + {0x0C55, 0x0C56}, {0x0C62, 0x0C63}, {0x0C66, 0x0C6F}, {0x0C81, 0x0C81}, {0x0C82, 0x0C83}, + {0x0CBC, 0x0CBC}, {0x0CBE, 0x0CBE}, {0x0CBF, 0x0CBF}, {0x0CC0, 0x0CC4}, {0x0CC6, 0x0CC6}, + {0x0CC7, 0x0CC8}, {0x0CCA, 0x0CCB}, {0x0CCC, 0x0CCD}, {0x0CD5, 0x0CD6}, {0x0CE2, 0x0CE3}, + {0x0CE6, 0x0CEF}, {0x0D00, 0x0D01}, {0x0D02, 0x0D03}, {0x0D3B, 0x0D3C}, {0x0D3E, 0x0D40}, + {0x0D41, 0x0D44}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4C}, {0x0D4D, 0x0D4D}, {0x0D57, 0x0D57}, + {0x0D62, 0x0D63}, {0x0D66, 0x0D6F}, {0x0D81, 0x0D81}, {0x0D82, 0x0D83}, {0x0DCA, 0x0DCA}, + {0x0DCF, 0x0DD1}, {0x0DD2, 0x0DD4}, {0x0DD6, 0x0DD6}, {0x0DD8, 0x0DDF}, {0x0DE6, 0x0DEF}, + {0x0DF2, 0x0DF3}, {0x0E32, 0x0E33}, {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E}, {0x0E50, 0x0E59}, + {0x0EB2, 0x0EB3}, {0x0EB4, 0x0EBC}, {0x0EC8, 0x0ECD}, {0x0ED0, 0x0ED9}, {0x0F18, 0x0F19}, + {0x0F20, 0x0F29}, {0x0F35, 0x0F35}, {0x0F37, 0x0F37}, {0x0F39, 0x0F39}, {0x0F3E, 0x0F3F}, + {0x0F71, 0x0F7E}, {0x0F7F, 0x0F7F}, {0x0F80, 0x0F84}, {0x0F86, 0x0F87}, {0x0F8D, 0x0F97}, + {0x0F99, 0x0FBC}, {0x0FC6, 0x0FC6}, {0x102B, 0x102C}, {0x102D, 0x1030}, {0x1031, 0x1031}, + {0x1032, 0x1037}, {0x1038, 0x1038}, {0x1039, 0x103A}, {0x103B, 0x103C}, {0x103D, 0x103E}, + {0x1040, 0x1049}, {0x1056, 0x1057}, {0x1058, 0x1059}, {0x105E, 0x1060}, {0x1062, 0x1064}, + {0x1067, 0x106D}, {0x1071, 0x1074}, {0x1082, 0x1082}, {0x1083, 0x1084}, {0x1085, 0x1086}, + {0x1087, 0x108C}, {0x108D, 0x108D}, {0x108F, 0x108F}, {0x1090, 0x1099}, {0x109A, 0x109C}, + {0x109D, 0x109D}, {0x135D, 0x135F}, {0x1369, 0x1371}, {0x1712, 0x1714}, {0x1732, 0x1734}, + {0x1752, 0x1753}, {0x1772, 0x1773}, {0x17B4, 0x17B5}, {0x17B6, 0x17B6}, {0x17B7, 0x17BD}, + {0x17BE, 0x17C5}, {0x17C6, 0x17C6}, {0x17C7, 0x17C8}, {0x17C9, 0x17D3}, {0x17DD, 0x17DD}, + {0x17E0, 0x17E9}, {0x180B, 0x180D}, {0x1810, 0x1819}, {0x18A9, 0x18A9}, {0x1920, 0x1922}, + {0x1923, 0x1926}, {0x1927, 0x1928}, {0x1929, 0x192B}, {0x1930, 0x1931}, {0x1932, 0x1932}, + {0x1933, 0x1938}, {0x1939, 0x193B}, {0x1946, 0x194F}, {0x19D0, 0x19D9}, {0x19DA, 0x19DA}, + {0x1A17, 0x1A18}, {0x1A19, 0x1A1A}, {0x1A1B, 0x1A1B}, {0x1A55, 0x1A55}, {0x1A56, 0x1A56}, + {0x1A57, 0x1A57}, {0x1A58, 0x1A5E}, {0x1A60, 0x1A60}, {0x1A61, 0x1A61}, {0x1A62, 0x1A62}, + {0x1A63, 0x1A64}, {0x1A65, 0x1A6C}, {0x1A6D, 0x1A72}, {0x1A73, 0x1A7C}, {0x1A7F, 0x1A7F}, + {0x1A80, 0x1A89}, {0x1A90, 0x1A99}, {0x1AB0, 0x1ABD}, {0x1ABF, 0x1AC0}, {0x1B00, 0x1B03}, + {0x1B04, 0x1B04}, {0x1B34, 0x1B34}, {0x1B35, 0x1B35}, {0x1B36, 0x1B3A}, {0x1B3B, 0x1B3B}, + {0x1B3C, 0x1B3C}, {0x1B3D, 0x1B41}, {0x1B42, 0x1B42}, {0x1B43, 0x1B44}, {0x1B50, 0x1B59}, + {0x1B6B, 0x1B73}, {0x1B80, 0x1B81}, {0x1B82, 0x1B82}, {0x1BA1, 0x1BA1}, {0x1BA2, 0x1BA5}, + {0x1BA6, 0x1BA7}, {0x1BA8, 0x1BA9}, {0x1BAA, 0x1BAA}, {0x1BAB, 0x1BAD}, {0x1BB0, 0x1BB9}, + {0x1BE6, 0x1BE6}, {0x1BE7, 0x1BE7}, {0x1BE8, 0x1BE9}, {0x1BEA, 0x1BEC}, {0x1BED, 0x1BED}, + {0x1BEE, 0x1BEE}, {0x1BEF, 0x1BF1}, {0x1BF2, 0x1BF3}, {0x1C24, 0x1C2B}, {0x1C2C, 0x1C33}, + {0x1C34, 0x1C35}, {0x1C36, 0x1C37}, {0x1C40, 0x1C49}, {0x1C50, 0x1C59}, {0x1CD0, 0x1CD2}, + {0x1CD4, 0x1CE0}, {0x1CE1, 0x1CE1}, {0x1CE2, 0x1CE8}, {0x1CED, 0x1CED}, {0x1CF4, 0x1CF4}, + {0x1CF7, 0x1CF7}, {0x1CF8, 0x1CF9}, {0x1DC0, 0x1DF9}, {0x1DFB, 0x1DFF}, {0x203F, 0x2040}, + {0x2054, 0x2054}, {0x20D0, 0x20DC}, {0x20E1, 0x20E1}, {0x20E5, 0x20F0}, {0x2CEF, 0x2CF1}, + {0x2D7F, 0x2D7F}, {0x2DE0, 0x2DFF}, {0x302A, 0x302D}, {0x302E, 0x302F}, {0x3099, 0x309A}, + {0xA620, 0xA629}, {0xA66F, 0xA66F}, {0xA674, 0xA67D}, {0xA69E, 0xA69F}, {0xA6F0, 0xA6F1}, + {0xA802, 0xA802}, {0xA806, 0xA806}, {0xA80B, 0xA80B}, {0xA823, 0xA824}, {0xA825, 0xA826}, + {0xA827, 0xA827}, {0xA82C, 0xA82C}, {0xA880, 0xA881}, {0xA8B4, 0xA8C3}, {0xA8C4, 0xA8C5}, + {0xA8D0, 0xA8D9}, {0xA8E0, 0xA8F1}, {0xA8FF, 0xA8FF}, {0xA900, 0xA909}, {0xA926, 0xA92D}, + {0xA947, 0xA951}, {0xA952, 0xA953}, {0xA980, 0xA982}, {0xA983, 0xA983}, {0xA9B3, 0xA9B3}, + {0xA9B4, 0xA9B5}, {0xA9B6, 0xA9B9}, {0xA9BA, 0xA9BB}, {0xA9BC, 0xA9BD}, {0xA9BE, 0xA9C0}, + {0xA9D0, 0xA9D9}, {0xA9E5, 0xA9E5}, {0xA9F0, 0xA9F9}, {0xAA29, 0xAA2E}, {0xAA2F, 0xAA30}, + {0xAA31, 0xAA32}, {0xAA33, 0xAA34}, {0xAA35, 0xAA36}, {0xAA43, 0xAA43}, {0xAA4C, 0xAA4C}, + {0xAA4D, 0xAA4D}, {0xAA50, 0xAA59}, {0xAA7B, 0xAA7B}, {0xAA7C, 0xAA7C}, {0xAA7D, 0xAA7D}, + {0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4}, {0xAAB7, 0xAAB8}, {0xAABE, 0xAABF}, {0xAAC1, 0xAAC1}, + {0xAAEB, 0xAAEB}, {0xAAEC, 0xAAED}, {0xAAEE, 0xAAEF}, {0xAAF5, 0xAAF5}, {0xAAF6, 0xAAF6}, + {0xABE3, 0xABE4}, {0xABE5, 0xABE5}, {0xABE6, 0xABE7}, {0xABE8, 0xABE8}, {0xABE9, 0xABEA}, + {0xABEC, 0xABEC}, {0xABED, 0xABED}, {0xABF0, 0xABF9}, {0xFB1E, 0xFB1E}, {0xFE00, 0xFE0F}, + {0xFE20, 0xFE2F}, {0xFE33, 0xFE34}, {0xFE4D, 0xFE4F}, {0xFF10, 0xFF19}, {0xFF3F, 0xFF3F}, + {0xFF9E, 0xFF9F}, {0x101FD, 0x101FD}, {0x102E0, 0x102E0}, {0x10376, 0x1037A}, {0x104A0, 0x104A9}, + {0x10A01, 0x10A03}, {0x10A05, 0x10A06}, {0x10A0C, 0x10A0F}, {0x10A38, 0x10A3A}, {0x10A3F, 0x10A3F}, + {0x10AE5, 0x10AE6}, {0x10D24, 0x10D27}, {0x10D30, 0x10D39}, {0x10EAB, 0x10EAC}, {0x10F46, 0x10F50}, + {0x11000, 0x11000}, {0x11001, 0x11001}, {0x11002, 0x11002}, {0x11038, 0x11046}, {0x11066, 0x1106F}, + {0x1107F, 0x11081}, {0x11082, 0x11082}, {0x110B0, 0x110B2}, {0x110B3, 0x110B6}, {0x110B7, 0x110B8}, + {0x110B9, 0x110BA}, {0x110F0, 0x110F9}, {0x11100, 0x11102}, {0x11127, 0x1112B}, {0x1112C, 0x1112C}, + {0x1112D, 0x11134}, {0x11136, 0x1113F}, {0x11145, 0x11146}, {0x11173, 0x11173}, {0x11180, 0x11181}, + {0x11182, 0x11182}, {0x111B3, 0x111B5}, {0x111B6, 0x111BE}, {0x111BF, 0x111C0}, {0x111C9, 0x111CC}, + {0x111CE, 0x111CE}, {0x111CF, 0x111CF}, {0x111D0, 0x111D9}, {0x1122C, 0x1122E}, {0x1122F, 0x11231}, + {0x11232, 0x11233}, {0x11234, 0x11234}, {0x11235, 0x11235}, {0x11236, 0x11237}, {0x1123E, 0x1123E}, + {0x112DF, 0x112DF}, {0x112E0, 0x112E2}, {0x112E3, 0x112EA}, {0x112F0, 0x112F9}, {0x11300, 0x11301}, + {0x11302, 0x11303}, {0x1133B, 0x1133C}, {0x1133E, 0x1133F}, {0x11340, 0x11340}, {0x11341, 0x11344}, + {0x11347, 0x11348}, {0x1134B, 0x1134D}, {0x11357, 0x11357}, {0x11362, 0x11363}, {0x11366, 0x1136C}, + {0x11370, 0x11374}, {0x11435, 0x11437}, {0x11438, 0x1143F}, {0x11440, 0x11441}, {0x11442, 0x11444}, + {0x11445, 0x11445}, {0x11446, 0x11446}, {0x11450, 0x11459}, {0x1145E, 0x1145E}, {0x114B0, 0x114B2}, + {0x114B3, 0x114B8}, {0x114B9, 0x114B9}, {0x114BA, 0x114BA}, {0x114BB, 0x114BE}, {0x114BF, 0x114C0}, + {0x114C1, 0x114C1}, {0x114C2, 0x114C3}, {0x114D0, 0x114D9}, {0x115AF, 0x115B1}, {0x115B2, 0x115B5}, + {0x115B8, 0x115BB}, {0x115BC, 0x115BD}, {0x115BE, 0x115BE}, {0x115BF, 0x115C0}, {0x115DC, 0x115DD}, + {0x11630, 0x11632}, {0x11633, 0x1163A}, {0x1163B, 0x1163C}, {0x1163D, 0x1163D}, {0x1163E, 0x1163E}, + {0x1163F, 0x11640}, {0x11650, 0x11659}, {0x116AB, 0x116AB}, {0x116AC, 0x116AC}, {0x116AD, 0x116AD}, + {0x116AE, 0x116AF}, {0x116B0, 0x116B5}, {0x116B6, 0x116B6}, {0x116B7, 0x116B7}, {0x116C0, 0x116C9}, + {0x1171D, 0x1171F}, {0x11720, 0x11721}, {0x11722, 0x11725}, {0x11726, 0x11726}, {0x11727, 0x1172B}, + {0x11730, 0x11739}, {0x1182C, 0x1182E}, {0x1182F, 0x11837}, {0x11838, 0x11838}, {0x11839, 0x1183A}, + {0x118E0, 0x118E9}, {0x11930, 0x11935}, {0x11937, 0x11938}, {0x1193B, 0x1193C}, {0x1193D, 0x1193D}, + {0x1193E, 0x1193E}, {0x11940, 0x11940}, {0x11942, 0x11942}, {0x11943, 0x11943}, {0x11950, 0x11959}, + {0x119D1, 0x119D3}, {0x119D4, 0x119D7}, {0x119DA, 0x119DB}, {0x119DC, 0x119DF}, {0x119E0, 0x119E0}, + {0x119E4, 0x119E4}, {0x11A01, 0x11A0A}, {0x11A33, 0x11A38}, {0x11A39, 0x11A39}, {0x11A3B, 0x11A3E}, + {0x11A47, 0x11A47}, {0x11A51, 0x11A56}, {0x11A57, 0x11A58}, {0x11A59, 0x11A5B}, {0x11A8A, 0x11A96}, + {0x11A97, 0x11A97}, {0x11A98, 0x11A99}, {0x11C2F, 0x11C2F}, {0x11C30, 0x11C36}, {0x11C38, 0x11C3D}, + {0x11C3E, 0x11C3E}, {0x11C3F, 0x11C3F}, {0x11C50, 0x11C59}, {0x11C92, 0x11CA7}, {0x11CA9, 0x11CA9}, + {0x11CAA, 0x11CB0}, {0x11CB1, 0x11CB1}, {0x11CB2, 0x11CB3}, {0x11CB4, 0x11CB4}, {0x11CB5, 0x11CB6}, + {0x11D31, 0x11D36}, {0x11D3A, 0x11D3A}, {0x11D3C, 0x11D3D}, {0x11D3F, 0x11D45}, {0x11D47, 0x11D47}, + {0x11D50, 0x11D59}, {0x11D8A, 0x11D8E}, {0x11D90, 0x11D91}, {0x11D93, 0x11D94}, {0x11D95, 0x11D95}, + {0x11D96, 0x11D96}, {0x11D97, 0x11D97}, {0x11DA0, 0x11DA9}, {0x11EF3, 0x11EF4}, {0x11EF5, 0x11EF6}, + {0x16A60, 0x16A69}, {0x16AF0, 0x16AF4}, {0x16B30, 0x16B36}, {0x16B50, 0x16B59}, {0x16F4F, 0x16F4F}, + {0x16F51, 0x16F87}, {0x16F8F, 0x16F92}, {0x16FE4, 0x16FE4}, {0x16FF0, 0x16FF1}, {0x1BC9D, 0x1BC9E}, + {0x1D165, 0x1D166}, {0x1D167, 0x1D169}, {0x1D16D, 0x1D172}, {0x1D17B, 0x1D182}, {0x1D185, 0x1D18B}, + {0x1D1AA, 0x1D1AD}, {0x1D242, 0x1D244}, {0x1D7CE, 0x1D7FF}, {0x1DA00, 0x1DA36}, {0x1DA3B, 0x1DA6C}, + {0x1DA75, 0x1DA75}, {0x1DA84, 0x1DA84}, {0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF}, {0x1E000, 0x1E006}, + {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024}, {0x1E026, 0x1E02A}, {0x1E130, 0x1E136}, + {0x1E140, 0x1E149}, {0x1E2EC, 0x1E2EF}, {0x1E2F0, 0x1E2F9}, {0x1E8D0, 0x1E8D6}, {0x1E944, 0x1E94A}, + {0x1E950, 0x1E959}, {0x1FBF0, 0x1FBF9}, {0xE0100, 0xE01EF}, +}; // // Return the location of the next character or UTF8 codepoint. // (i.e. skip forward one codepoint at a time, not one byte at a time) // -public const char *next_char(const char *str, const char *end) -{ - if (likely(str+1 <= end) && likely((str[0] & 0x80) == 0x0)) - return str+1; - if (likely(str+2 <= end) && (str[0] & 0xe0) == 0xc0) - return str+2; - if (likely(str+3 <= end) && (str[0] & 0xf0) == 0xe0) - return str+3; - if (likely(str+4 <= end) && (str[0] & 0xf8) == 0xf0) - return str+4; - return likely(str+1 <= end) ? str+1 : end; +public +const char *next_char(const char *str, const char *end) { + if (likely(str + 1 <= end) && likely((str[0] & 0x80) == 0x0)) return str + 1; + if (likely(str + 2 <= end) && (str[0] & 0xe0) == 0xc0) return str + 2; + if (likely(str + 3 <= end) && (str[0] & 0xf0) == 0xe0) return str + 3; + if (likely(str + 4 <= end) && (str[0] & 0xf8) == 0xf0) return str + 4; + return likely(str + 1 <= end) ? str + 1 : end; } // // Return the location of the previous character or UTF8 codepoint. // (i.e. skip backwards one codepoint at a time, not one byte at a time) // -public const char *prev_char(const char *start, const char *str) -{ - if (likely(str-1 >= start) && likely((str[-1] & 0x80) == 0x0)) - return str-1; - if (likely(str-2 >= start) && (str[-2] & 0xe0) == 0xc0) - return str-2; - if (likely(str-3 >= start) && (str[-3] & 0xf0) == 0xe0) - return str-3; - if (likely(str-4 >= start) && (str[-4] & 0xf8) == 0xf0) - return str-4; - return likely(str-1 >= start) ? str-1 : start; +public +const char *prev_char(const char *start, const char *str) { + if (likely(str - 1 >= start) && likely((str[-1] & 0x80) == 0x0)) return str - 1; + if (likely(str - 2 >= start) && (str[-2] & 0xe0) == 0xc0) return str - 2; + if (likely(str - 3 >= start) && (str[-3] & 0xf0) == 0xe0) return str - 3; + if (likely(str - 4 >= start) && (str[-4] & 0xf8) == 0xf0) return str - 4; + return likely(str - 1 >= start) ? str - 1 : start; } -static uint32_t get_codepoint(const char *str, const char *end) -{ - if (unlikely(str >= end)) - return (uint32_t)-1; +static uint32_t get_codepoint(const char *str, const char *end) { + if (unlikely(str >= end)) return (uint32_t)-1; unsigned char c1 = (unsigned char)str[0]; int seqlen; uint32_t codepoint; if (likely((c1 & 0x80) == 0)) { - return (uint32_t) c1; + return (uint32_t)c1; } else if ((c1 & 0xE0) == 0xC0) { - codepoint = (uint32_t) (c1 & 0x1F); + codepoint = (uint32_t)(c1 & 0x1F); seqlen = 2; } else if ((c1 & 0xF0) == 0xE0) { - codepoint = (uint32_t) (c1 & 0x0F); + codepoint = (uint32_t)(c1 & 0x0F); seqlen = 3; } else if ((c1 & 0xF8) == 0xF0) { - codepoint = (uint32_t) (c1 & 0x07); + codepoint = (uint32_t)(c1 & 0x07); seqlen = 4; } else { return (uint32_t)-1; } for (int i = 1; i < seqlen; ++i) { - if (unlikely((&str[i] >= end) || (str[i] & 0xC0) != 0x80)) - return (uint32_t)-1; + if (unlikely((&str[i] >= end) || (str[i] & 0xC0) != 0x80)) return (uint32_t)-1; codepoint = ((codepoint << 6) | (uint32_t)(str[i] & 0x3F)); } - return codepoint; + return codepoint; } -static bool find_in_ranges(uint32_t codepoint, const uint32_t ranges[][2], size_t nranges) -{ +static bool find_in_ranges(uint32_t codepoint, const uint32_t ranges[][2], size_t nranges) { // Binary search: int lo = 0, hi = nranges - 1; while (lo <= hi) { int mid = (lo + hi) / 2; - if (ranges[mid][0] <= codepoint && codepoint <= ranges[mid][1]) - return true; - else if (codepoint > ranges[mid][1]) - lo = mid + 1; - else if (codepoint < ranges[mid][0]) - hi = mid - 1; + if (ranges[mid][0] <= codepoint && codepoint <= ranges[mid][1]) return true; + else if (codepoint > ranges[mid][1]) lo = mid + 1; + else if (codepoint < ranges[mid][0]) hi = mid - 1; } return false; } -public bool isidstart(const char *str, const char *end) -{ +public +bool isidstart(const char *str, const char *end) { if (unlikely(str >= end)) return false; else if (isalpha(*str) || *str == '_') return true; else if (likely((*str & 0x80) == 0)) return false; uint32_t codepoint = get_codepoint(str, end); - return codepoint != (uint32_t)-1 - && find_in_ranges(codepoint, XID_Start, ARRAY_LEN(XID_Start)); + return codepoint != (uint32_t)-1 && find_in_ranges(codepoint, XID_Start, ARRAY_LEN(XID_Start)); } -public bool isidcontinue(const char *str, const char *end) -{ +public +bool isidcontinue(const char *str, const char *end) { if (unlikely(str >= end)) return false; else if (isalnum(*str) || *str == '_') return true; else if (likely((*str & 0x80) == 0)) return false; uint32_t codepoint = get_codepoint(str, end); return codepoint != (uint32_t)-1 - && (find_in_ranges(codepoint, XID_Start, ARRAY_LEN(XID_Start)) - || find_in_ranges(codepoint, XID_Continue_only, ARRAY_LEN(XID_Continue_only))); + && (find_in_ranges(codepoint, XID_Start, ARRAY_LEN(XID_Start)) + || find_in_ranges(codepoint, XID_Continue_only, ARRAY_LEN(XID_Continue_only))); } // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 @@ -7,13 +7,9 @@ #define UTF8_MAXCHARLEN 4 -__attribute__((nonnull, pure)) -const char *next_char(const char *str, const char *end); -__attribute__((nonnull, pure)) -const char *prev_char(const char *start, const char *str); -__attribute__((nonnull, pure)) -bool isidstart(const char *str, const char *end); -__attribute__((nonnull, pure)) -bool isidcontinue(const char *str, const char *end); +__attribute__((nonnull, pure)) const char *next_char(const char *str, const char *end); +__attribute__((nonnull, pure)) const char *prev_char(const char *start, const char *str); +__attribute__((nonnull, pure)) bool isidstart(const char *str, const char *end); +__attribute__((nonnull, pure)) bool isidcontinue(const char *str, const char *end); // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 @@ -15,21 +15,24 @@ // Helper function to skip past all spaces (and comments) // Returns a pointer to the first non-space character. // -public const char *after_spaces(const char *str, bool skip_nl, const char *end) -{ +public +const char *after_spaces(const char *str, bool skip_nl, const char *end) { // Skip whitespace and comments: - skip_whitespace: +skip_whitespace: if (str >= end) return str; switch (*str) { - case '\r': case '\n': + case '\r': + case '\n': if (!skip_nl) break; - __attribute__ ((fallthrough)); - case ' ': case '\t': { + __attribute__((fallthrough)); + case ' ': + case '\t': { ++str; goto skip_whitespace; } case '#': { - while (str < end && *str != '\n') ++str; + while (str < end && *str != '\n') + ++str; goto skip_whitespace; } default: break; @@ -41,8 +44,8 @@ public const char *after_spaces(const char *str, bool skip_nl, const char *end) // Return the first character after a valid BP name, or NULL if none is // found. // -public const char *after_name(const char *str, const char *end) -{ +public +const char *after_name(const char *str, const char *end) { if (str >= end) return end; if (*str == '|') return &str[1]; if (*str == '^' || *str == '_' || *str == '$') { @@ -50,8 +53,7 @@ public const char *after_name(const char *str, const char *end) } if (!isalpha(*str)) return NULL; for (++str; str < end; ++str) { - if (!(isalnum(*str) || *str == '-')) - break; + if (!(isalnum(*str) || *str == '-')) break; } return str; } @@ -59,8 +61,8 @@ public const char *after_name(const char *str, const char *end) // // Check if a character is found and if so, move past it. // -public bool matchchar(const char **str, char c, bool skip_nl, const char *end) -{ +public +bool matchchar(const char **str, char c, bool skip_nl, const char *end) { const char *next = after_spaces(*str, skip_nl, end); if (next >= end) return false; if (*next == c) { @@ -73,8 +75,8 @@ public bool matchchar(const char **str, char c, bool skip_nl, const char *end) // // Check if a string is found and if so, move past it. // -public bool matchstr(const char **str, const char *target, bool skip_nl, const char *end) -{ +public +bool matchstr(const char **str, const char *target, bool skip_nl, const char *end) { const char *next = after_spaces(*str, skip_nl, end); if (next + strlen(target) > end) return false; if (strncmp(next, target, strlen(target)) == 0) { @@ -89,24 +91,27 @@ public bool matchstr(const char **str, const char *target, bool skip_nl, const c // character that was escaped. // Set *end = the first character past the end of the escape sequence. // -public char unescapechar(const char *escaped, const char **after, const char *end) -{ +public +char unescapechar(const char *escaped, const char **after, const char *end) { size_t len = 0; unsigned char ret = '\\'; if (escaped >= end) goto finished; ret = (unsigned char)*escaped; ++len; switch (*escaped) { - case 'a': ret = '\a'; break; case 'b': ret = '\b'; break; - case 'n': ret = '\n'; break; case 'r': ret = '\r'; break; - case 't': ret = '\t'; break; case 'v': ret = '\v'; break; - case 'e': ret = '\033'; break; case '\\': ret = '\\'; break; + case 'a': ret = '\a'; break; + case 'b': ret = '\b'; break; + case 'n': ret = '\n'; break; + case 'r': ret = '\r'; break; + case 't': ret = '\t'; break; + case 'v': ret = '\v'; break; + case 'e': ret = '\033'; break; + case '\\': ret = '\\'; break; case 'x': { // Hex static const unsigned char hextable[255] = { - ['0']=0x10, ['1']=0x1, ['2']=0x2, ['3']=0x3, ['4']=0x4, - ['5']=0x5, ['6']=0x6, ['7']=0x7, ['8']=0x8, ['9']=0x9, - ['a']=0xa, ['b']=0xb, ['c']=0xc, ['d']=0xd, ['e']=0xe, ['f']=0xf, - ['A']=0xa, ['B']=0xb, ['C']=0xc, ['D']=0xd, ['E']=0xe, ['F']=0xf, + ['0'] = 0x10, ['1'] = 0x1, ['2'] = 0x2, ['3'] = 0x3, ['4'] = 0x4, ['5'] = 0x5, ['6'] = 0x6, ['7'] = 0x7, + ['8'] = 0x8, ['9'] = 0x9, ['a'] = 0xa, ['b'] = 0xb, ['c'] = 0xc, ['d'] = 0xd, ['e'] = 0xe, ['f'] = 0xf, + ['A'] = 0xa, ['B'] = 0xb, ['C'] = 0xc, ['D'] = 0xd, ['E'] = 0xe, ['F'] = 0xf, }; if (escaped + 2 >= end) { len = 0; @@ -117,7 +122,14 @@ public char unescapechar(const char *escaped, const char **after, const char *en } break; } - case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { // Octal + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': { // Octal ret = (unsigned char)(escaped[0] - '0'); if (escaped + 2 >= end) { len = 0; @@ -132,11 +144,9 @@ public char unescapechar(const char *escaped, const char **after, const char *en } break; } - default: - len = 0; - goto finished; + default: len = 0; goto finished; } - finished: +finished: if (after) *after = &escaped[len]; return (char)ret; } @@ -144,12 +154,11 @@ public char unescapechar(const char *escaped, const char **after, const char *en // // Free memory, but also set the pointer to NULL for safety // -public void delete(void *p) -{ - if (*(void**)p == NULL) - errx(EXIT_FAILURE, "attempt to free(NULL)"); - free(*(void**)p); - *((void**)p) = NULL; +public +void delete(void *p) { + if (*(void **)p == NULL) errx(EXIT_FAILURE, "attempt to free(NULL)"); + free(*(void **)p); + *((void **)p) = NULL; } // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 @@ -6,9 +6,8 @@ #include <err.h> #include <stdarg.h> #include <stdbool.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> +#include <stdlib.h> // IWYU pragma: export +#include <string.h> // IWYU pragma: export #include <unistd.h> #ifndef auto @@ -18,35 +17,35 @@ #define S1(x) #x #define S2(x) S1(x) -#define require(e, msg) ({\ - __typeof__(e) __expr = e; \ - if (_Generic(__expr, int: (ssize_t)__expr < 0, ssize_t: (ssize_t)__expr < 0, default: !__expr)) errx(1, __FILE__":"S2(__LINE__)": " msg); \ - __expr; \ -}) +#define require(e, msg) \ + ({ \ + __typeof__(e) __expr = e; \ + if (_Generic(__expr, int: (ssize_t)__expr < 0, ssize_t: (ssize_t)__expr < 0, default: !__expr)) \ + errx(1, __FILE__ ":" S2(__LINE__) ": " msg); \ + __expr; \ + }) -#define When(x, _tag) ((x)->type == _tag ? &(x)->__tagged._tag : (errx(1, __FILE__ ":%d This was supposed to be a " # _tag "\n", __LINE__), &(x)->__tagged._tag)) +#define When(x, _tag) \ + ((x)->type == _tag \ + ? &(x)->__tagged._tag \ + : (errx(1, __FILE__ ":%d This was supposed to be a " #_tag "\n", __LINE__), &(x)->__tagged._tag)) #ifndef public -#define public __attribute__ ((visibility ("default"))) +#define public __attribute__((visibility("default"))) #endif #define new(t) require(calloc(1, sizeof(t)), "`new(" #t ")` allocation failure") #define checked_strdup(s) require(strdup(s), "`checked_strdup(" #s ")` allocation failure") -#define grow(arr,n) require(realloc(arr,sizeof(arr[0])*(n)), "`grow(" #arr ", " #n ")` allocation failure") +#define grow(arr, n) require(realloc(arr, sizeof(arr[0]) * (n)), "`grow(" #arr ", " #n ")` allocation failure") #define streq(a, b) (strcmp(a, b) == 0) -__attribute__((nonnull(1))) -char unescapechar(const char *escaped, const char **after, const char *end); -__attribute__((pure, nonnull)) -const char *after_name(const char *str, const char *end); -__attribute__((pure, nonnull, returns_nonnull)) -const char *after_spaces(const char *str, bool skip_nl, const char *end); -__attribute__((nonnull)) -bool matchchar(const char **str, char c, bool skip_nl, const char *end); -__attribute__((nonnull)) -bool matchstr(const char **str, const char *target, bool skip_nl, const char *end); -__attribute__((nonnull)) -void delete(void *p); +__attribute__((nonnull(1))) char unescapechar(const char *escaped, const char **after, const char *end); +__attribute__((pure, nonnull)) const char *after_name(const char *str, const char *end); +__attribute__((pure, nonnull, returns_nonnull)) const char *after_spaces(const char *str, bool skip_nl, + const char *end); +__attribute__((nonnull)) bool matchchar(const char **str, char c, bool skip_nl, const char *end); +__attribute__((nonnull)) bool matchstr(const char **str, const char *target, bool skip_nl, const char *end); +__attribute__((nonnull)) void delete(void *p); // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 |
