Cleaned up lpeg matching stuff to use lpeg.Carg(1) instead of

lpeg.userdata, and switched indent/dedent/nodent to use a number to
track indentation instead of a stack.
This commit is contained in:
Bruce Hill 2018-05-26 19:24:22 -07:00
parent 0c7c06beab
commit 6ce32bdd25
3 changed files with 66 additions and 77 deletions

View File

@ -28,8 +28,8 @@ end
re = require('re') re = require('re')
lpeg = require('lpeg') lpeg = require('lpeg')
lpeg.setmaxstack(10000) lpeg.setmaxstack(10000)
local P, R, V, S, Cg, C, Cp, B local P, R, V, S, Cg, C, Cp, B, Cmt, Carg
P, R, V, S, Cg, C, Cp, B = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.Cg, lpeg.C, lpeg.Cp, lpeg.B P, R, V, S, Cg, C, Cp, B, Cmt, Carg = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.Cg, lpeg.C, lpeg.Cp, lpeg.B, lpeg.Cmt, lpeg.Carg
local utils = require('utils') local utils = require('utils')
local new_uuid = require('uuid') local new_uuid = require('uuid')
local immutable = require('immutable') local immutable = require('immutable')
@ -183,42 +183,38 @@ do
_with_0.operator = _with_0.operator_char ^ 1 _with_0.operator = _with_0.operator_char ^ 1
_with_0.utf8_char = (R("\194\223") * R("\128\191") + R("\224\239") * R("\128\191") * R("\128\191") + R("\240\244") * R("\128\191") * R("\128\191") * R("\128\191")) _with_0.utf8_char = (R("\194\223") * R("\128\191") + R("\224\239") * R("\128\191") * R("\128\191") + R("\240\244") * R("\128\191") * R("\128\191") * R("\128\191"))
_with_0.ident_char = R("az", "AZ", "09") + P("_") + _with_0.utf8_char _with_0.ident_char = R("az", "AZ", "09") + P("_") + _with_0.utf8_char
_with_0.indent = P(function(self, start) _with_0.indent = Cmt(Carg(1), function(self, start, userdata)
local nodent = lpeg.userdata.indent_stack[#lpeg.userdata.indent_stack] if #self:match("^[ ]*", start) == userdata.indent + 4 then
local indented = nodent .. " " userdata.indent = userdata.indent + 4
if self:sub(start, start + #indented - 1) == indented then return start + userdata.indent
insert(lpeg.userdata.indent_stack, indented)
return start + #indented
end end
end) end)
_with_0.dedent = P(function(self, start) _with_0.dedent = Cmt(Carg(1), function(self, start, userdata)
local nodent = lpeg.userdata.indent_stack[#lpeg.userdata.indent_stack] if #self:match("^[ ]*", start) <= userdata.indent - 4 then
local spaces = self:match("^[ ]*", start) userdata.indent = userdata.indent - 4
if #spaces <= #nodent - 4 then
remove(lpeg.userdata.indent_stack)
return start return start
end end
end) end)
_with_0.nodent = P(function(self, start) _with_0.nodent = Cmt(Carg(1), function(self, start, userdata)
local nodent = lpeg.userdata.indent_stack[#lpeg.userdata.indent_stack] if #self:match("^[ ]*", start) >= userdata.indent then
if self:sub(start, start + #nodent - 1) == nodent then return start + userdata.indent
return start + #nodent
end end
end) end)
_with_0.error = function(src, end_pos, start_pos, err_msg) _with_0.userdata = Carg(1)
local seen_errors = lpeg.userdata.errors _with_0.error = function(src, end_pos, start_pos, err_msg, userdata)
local seen_errors = userdata.errors
if seen_errors[start_pos] then if seen_errors[start_pos] then
return true return true
end end
local err_pos = start_pos local err_pos = start_pos
local text_loc = lpeg.userdata.source:sub(err_pos, err_pos) local text_loc = userdata.source:sub(err_pos, err_pos)
local line_no = text_loc:get_line_number() local line_no = text_loc:get_line_number()
src = FILE_CACHE[text_loc.filename] src = FILE_CACHE[text_loc.filename]
local prev_line = line_no == 1 and "" or src:sub(LINE_STARTS[src][line_no - 1] or 1, LINE_STARTS[src][line_no] - 2) local prev_line = line_no == 1 and "" or src:sub(LINE_STARTS[src][line_no - 1] or 1, LINE_STARTS[src][line_no] - 2)
local err_line = src:sub(LINE_STARTS[src][line_no], (LINE_STARTS[src][line_no + 1] or 0) - 2) local err_line = src:sub(LINE_STARTS[src][line_no], (LINE_STARTS[src][line_no + 1] or 0) - 2)
local next_line = src:sub(LINE_STARTS[src][line_no + 1] or -1, (LINE_STARTS[src][line_no + 2] or 0) - 2) local next_line = src:sub(LINE_STARTS[src][line_no + 1] or -1, (LINE_STARTS[src][line_no + 2] or 0) - 2)
local pointer = ("-"):rep(err_pos - LINE_STARTS[src][line_no]) .. "^" local pointer = ("-"):rep(err_pos - LINE_STARTS[src][line_no]) .. "^"
err_msg = (err_msg or "Parse error") .. " at " .. tostring(lpeg.userdata.source.filename) .. ":" .. tostring(line_no) .. ":\n" err_msg = (err_msg or "Parse error") .. " at " .. tostring(userdata.source.filename) .. ":" .. tostring(line_no) .. ":\n"
if #prev_line > 0 then if #prev_line > 0 then
err_msg = err_msg .. ("\n" .. prev_line) err_msg = err_msg .. ("\n" .. prev_line)
end end
@ -234,8 +230,8 @@ end
setmetatable(NOMSU_DEFS, { setmetatable(NOMSU_DEFS, {
__index = function(self, key) __index = function(self, key)
local make_node local make_node
make_node = function(start, value, stop) make_node = function(start, value, stop, userdata)
local source = lpeg.userdata.source:sub(start, stop) local source = userdata.source:sub(start, stop)
local tree local tree
if Types[key].is_multi then if Types[key].is_multi then
tree = Types[key](Tuple(unpack(value)), source) tree = Types[key](Tuple(unpack(value)), source)
@ -255,7 +251,7 @@ do
anon_def <- ({ident} (" "*) ":" anon_def <- ({ident} (" "*) ":"
{((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- %2" {((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- %2"
captured_def <- ({ident} (" "*) "(" {ident} ")" (" "*) ":" captured_def <- ({ident} (" "*) "(" {ident} ")" (" "*) ":"
{((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- (({} %3 {}) -> %2)" {((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- (({} %3 {} %%userdata) -> %2)"
ident <- [a-zA-Z_][a-zA-Z0-9_]* ident <- [a-zA-Z_][a-zA-Z0-9_]*
comment <- "--" [^%nl]* comment <- "--" [^%nl]*
]]) ]])
@ -322,17 +318,14 @@ do
end end
local userdata = { local userdata = {
source_code = nomsu_code, source_code = nomsu_code,
indent_stack = { indent = 0,
""
},
errors = { }, errors = { },
source = nomsu_code.source source = nomsu_code.source
} }
local old_userdata local tree = NOMSU_PATTERN:match(tostring(nomsu_code), nil, userdata)
old_userdata, lpeg.userdata = lpeg.userdata, userdata if not (tree) then
local tree = NOMSU_PATTERN:match(tostring(nomsu_code)) error("In file " .. tostring(colored.blue(filename)) .. " failed to parse:\n" .. tostring(colored.onyellow(colored.black(nomsu_code))))
lpeg.userdata = old_userdata end
assert(tree, "In file " .. tostring(colored.blue(filename)) .. " failed to parse:\n" .. tostring(colored.onyellow(colored.black(nomsu_code))))
if next(userdata.errors) then if next(userdata.errors) then
local keys = utils.keys(userdata.errors) local keys = utils.keys(userdata.errors)
table.sort(keys) table.sort(keys)

View File

@ -33,7 +33,7 @@ if jit
re = require 're' re = require 're'
lpeg = require 'lpeg' lpeg = require 'lpeg'
lpeg.setmaxstack 10000 lpeg.setmaxstack 10000
{:P,:R,:V,:S,:Cg,:C,:Cp,:B} = lpeg {:P,:R,:V,:S,:Cg,:C,:Cp,:B,:Cmt,:Carg} = lpeg
utils = require 'utils' utils = require 'utils'
new_uuid = require 'uuid' new_uuid = require 'uuid'
immutable = require 'immutable' immutable = require 'immutable'
@ -156,42 +156,39 @@ NOMSU_DEFS = with {}
-- If the line begins with #indent+4 spaces, the pattern matches *those* spaces -- If the line begins with #indent+4 spaces, the pattern matches *those* spaces
-- and adds them to the stack (not any more). -- and adds them to the stack (not any more).
.indent = P (start)=> .indent = Cmt Carg(1), (start, userdata)=>
nodent = lpeg.userdata.indent_stack[#lpeg.userdata.indent_stack] if #@match("^[ ]*", start) == userdata.indent + 4
indented = nodent.." " userdata.indent += 4
if @sub(start, start+#indented-1) == indented return start + userdata.indent
insert(lpeg.userdata.indent_stack, indented)
return start + #indented
-- If the number of leading space characters is <= the number of space on the top of the -- If the number of leading space characters is <= the number of space on the top of the
-- stack minus 4, this pattern matches and pops off the top of the stack exactly once. -- stack minus 4, this pattern matches and pops off the top of the stack exactly once.
.dedent = P (start)=> .dedent = Cmt Carg(1), (start, userdata)=>
nodent = lpeg.userdata.indent_stack[#lpeg.userdata.indent_stack] if #@match("^[ ]*", start) <= userdata.indent - 4
spaces = @match("^[ ]*", start) userdata.indent -= 4
if #spaces <= #nodent-4
remove(lpeg.userdata.indent_stack)
return start return start
-- If the number of leading space characters is >= the number on the top of the -- If the number of leading space characters is >= the number on the top of the
-- stack, this pattern matches and does not modify the stack. -- stack, this pattern matches and does not modify the stack.
.nodent = P (start)=> .nodent = Cmt Carg(1), (start, userdata)=>
nodent = lpeg.userdata.indent_stack[#lpeg.userdata.indent_stack] if #@match("^[ ]*", start) >= userdata.indent
if @sub(start, start+#nodent-1) == nodent return start + userdata.indent
return start + #nodent
.error = (src,end_pos,start_pos,err_msg)-> .userdata = Carg(1)
seen_errors = lpeg.userdata.errors
.error = (src,end_pos,start_pos,err_msg,userdata)->
seen_errors = userdata.errors
if seen_errors[start_pos] if seen_errors[start_pos]
return true return true
err_pos = start_pos err_pos = start_pos
--if src\sub(err_pos,err_pos)\match("[\r\n]") --if src\sub(err_pos,err_pos)\match("[\r\n]")
-- err_pos += #src\match("[ \t\n\r]*", err_pos) -- err_pos += #src\match("[ \t\n\r]*", err_pos)
text_loc = lpeg.userdata.source\sub(err_pos,err_pos) text_loc = userdata.source\sub(err_pos,err_pos)
line_no = text_loc\get_line_number! line_no = text_loc\get_line_number!
src = FILE_CACHE[text_loc.filename] src = FILE_CACHE[text_loc.filename]
prev_line = line_no == 1 and "" or src\sub(LINE_STARTS[src][line_no-1] or 1, LINE_STARTS[src][line_no]-2) prev_line = line_no == 1 and "" or src\sub(LINE_STARTS[src][line_no-1] or 1, LINE_STARTS[src][line_no]-2)
err_line = src\sub(LINE_STARTS[src][line_no], (LINE_STARTS[src][line_no+1] or 0)-2) err_line = src\sub(LINE_STARTS[src][line_no], (LINE_STARTS[src][line_no+1] or 0)-2)
next_line = src\sub(LINE_STARTS[src][line_no+1] or -1, (LINE_STARTS[src][line_no+2] or 0)-2) next_line = src\sub(LINE_STARTS[src][line_no+1] or -1, (LINE_STARTS[src][line_no+2] or 0)-2)
pointer = ("-")\rep(err_pos-LINE_STARTS[src][line_no]) .. "^" pointer = ("-")\rep(err_pos-LINE_STARTS[src][line_no]) .. "^"
err_msg = (err_msg or "Parse error").." at #{lpeg.userdata.source.filename}:#{line_no}:\n" err_msg = (err_msg or "Parse error").." at #{userdata.source.filename}:#{line_no}:\n"
if #prev_line > 0 then err_msg ..= "\n"..prev_line if #prev_line > 0 then err_msg ..= "\n"..prev_line
err_msg ..= "\n#{err_line}\n#{pointer}" err_msg ..= "\n#{err_line}\n#{pointer}"
if #next_line > 0 then err_msg ..= "\n"..next_line if #next_line > 0 then err_msg ..= "\n"..next_line
@ -200,8 +197,8 @@ NOMSU_DEFS = with {}
return true return true
setmetatable(NOMSU_DEFS, {__index:(key)=> setmetatable(NOMSU_DEFS, {__index:(key)=>
make_node = (start, value, stop)-> make_node = (start, value, stop, userdata)->
source = lpeg.userdata.source\sub(start, stop) source = userdata.source\sub(start, stop)
tree = if Types[key].is_multi tree = if Types[key].is_multi
Types[key](Tuple(unpack(value)), source) Types[key](Tuple(unpack(value)), source)
else Types[key](value, source) else Types[key](value, source)
@ -219,7 +216,7 @@ NOMSU_PATTERN = do
anon_def <- ({ident} (" "*) ":" anon_def <- ({ident} (" "*) ":"
{((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- %2" {((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- %2"
captured_def <- ({ident} (" "*) "(" {ident} ")" (" "*) ":" captured_def <- ({ident} (" "*) "(" {ident} ")" (" "*) ":"
{((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- (({} %3 {}) -> %2)" {((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- (({} %3 {} %%userdata) -> %2)"
ident <- [a-zA-Z_][a-zA-Z0-9_]* ident <- [a-zA-Z_][a-zA-Z0-9_]*
comment <- "--" [^%nl]* comment <- "--" [^%nl]*
]] ]]
@ -329,16 +326,15 @@ class NomsuCompiler
filename = "<nomsu chunk ##{_nomsu_chunk_counter}>.nom" filename = "<nomsu chunk ##{_nomsu_chunk_counter}>.nom"
FILE_CACHE[filename] = nomsu_code FILE_CACHE[filename] = nomsu_code
nomsu_code = Nomsu(filename, nomsu_code) nomsu_code = Nomsu(filename, nomsu_code)
userdata = { userdata = {
source_code:nomsu_code, indent_stack: {""}, errors: {}, source_code:nomsu_code, indent: 0, errors: {},
source: nomsu_code.source, source: nomsu_code.source,
} }
tree = NOMSU_PATTERN\match(tostring(nomsu_code), nil, userdata)
old_userdata, lpeg.userdata = lpeg.userdata, userdata
tree = NOMSU_PATTERN\match(tostring(nomsu_code))
lpeg.userdata = old_userdata
assert tree, "In file #{colored.blue filename} failed to parse:\n#{colored.onyellow colored.black nomsu_code}" unless tree
error "In file #{colored.blue filename} failed to parse:\n#{colored.onyellow colored.black nomsu_code}"
if next(userdata.errors) if next(userdata.errors)
keys = utils.keys(userdata.errors) keys = utils.keys(userdata.errors)

View File

@ -3,16 +3,16 @@ file:
(ignored_line %nl)* (ignored_line %nl)*
(block / action / expression)? (block / action / expression)?
(%nl ignored_line)* (%nl ignored_line)*
(!. / (({} (.* -> "Parse error")) => error)) (!. / (({} (.* -> "Parse error") %userdata) => error))
shebang: "#!" [^%nl]* (!. / %nl) shebang: "#!" [^%nl]* (!. / %nl)
inline_block (Block): inline_block (Block):
{| inline_statement (%ws* ";" %ws* inline_statement)+ |} {| inline_statement (%ws* ";" %ws* inline_statement)+ |}
block (Block): block (Block):
{| statement (nodent (statement / (({} ([^%nl]* -> "Error while parsing block line")) => error)))+ |} {| statement (nodent (statement / (({} ([^%nl]* -> "Error while parsing block line") %userdata) => error)))+ |}
statement: (action / expression) (eol / (({} ([^%nl]* -> "Error while parsing line")) => error)) statement: (action / expression) (eol / (({} ([^%nl]* -> "Error while parsing line") %userdata) => error))
inline_statement: inline_action / inline_expression inline_statement: inline_action / inline_expression
noindex_inline_expression: noindex_inline_expression:
@ -20,8 +20,8 @@ noindex_inline_expression:
/ ( "(" / ( "("
%ws* (inline_block / inline_action / inline_expression) %ws* %ws* (inline_block / inline_action / inline_expression) %ws*
(")" (")"
/ (({} ((!. / &%nl) -> 'Expected to find a ) before the end of the line')) => error) / (({} ((!. / &%nl) -> 'Expected to find a ) before the end of the line') %userdata) => error)
/ (({} ([^%nl]* -> 'Error while parsing subexpression')) => error) / (({} ([^%nl]* -> 'Error while parsing subexpression') %userdata) => error)
) )
) )
inline_expression: inline_expression:
@ -30,7 +30,7 @@ indented_expression:
indented_text / indented_nomsu / indented_list / indented_dict indented_text / indented_nomsu / indented_list / indented_dict
/ ("(..)"? indent / ("(..)"? indent
(block / action / expression) (block / action / expression)
(dedent / (({} (non_dedent_error -> "Error while parsing indented expression")) => error)) (dedent / (({} (non_dedent_error -> "Error while parsing indented expression") %userdata) => error))
) )
expression: expression:
inline_expression / (":" %ws* (inline_block / inline_action / inline_expression) eol) / indented_expression inline_expression / (":" %ws* (inline_block / inline_action / inline_expression) eol) / indented_expression
@ -60,7 +60,7 @@ inline_text (Text):
'"' {| '"' {|
({~ (('\"' -> '"') / ('\\' -> '\') / %escaped_char / [^%nl\"])+ ~} ({~ (('\"' -> '"') / ('\\' -> '\') / %escaped_char / [^%nl\"])+ ~}
/ inline_text_interpolation)* / inline_text_interpolation)*
|} ('"' / (({} ([^%nl]*->'Failed to find a closing " mark on the same line')) => error)) |} ('"' / (({} ([^%nl]*->'Failed to find a closing " mark on the same line') %userdata) => error))
-- Have to use "%indent" instead of "indent" etc. to avoid messing up text lines that start with "#" -- Have to use "%indent" instead of "indent" etc. to avoid messing up text lines that start with "#"
indented_text (Text): indented_text (Text):
@ -69,15 +69,15 @@ indented_text (Text):
({~ ({~
(("\\" -> "\") / (("\" nodent "..") -> "")/ (%nl+ {~ %nodent -> "" ~}) / [^%nl\] / (!text_interpolation "\"))+ (("\\" -> "\") / (("\" nodent "..") -> "")/ (%nl+ {~ %nodent -> "" ~}) / [^%nl\] / (!text_interpolation "\"))+
~} / text_interpolation)* ~} / text_interpolation)*
|} (((!.) &%dedent) / (&(%nl %dedent)) / (({} (non_dedent_error -> "Error while parsing Text")) => error)) |} (((!.) %dedent) / (&(%nl %dedent)) / (({} (non_dedent_error -> "Error while parsing Text") %userdata) => error))
inline_text_interpolation: inline_text_interpolation:
"\" ( "\" (
variable / inline_list / inline_dict / inline_text variable / inline_list / inline_dict / inline_text
/ ("(" / ("("
%ws* (inline_block / inline_action / inline_expression) %ws* %ws* (inline_block / inline_action / inline_expression) %ws*
(")" (")"
/ (({} (&%nl -> 'Expected to find a ")" before the end of the line')) => error) / (({} (&%nl -> 'Expected to find a ")" before the end of the line') %userdata) => error)
/ (({} ([^%nl]* -> 'Error while parsing text interpolation')) => error)) / (({} ([^%nl]* -> 'Error while parsing text interpolation') %userdata) => error))
) )
) )
text_interpolation: text_interpolation:
@ -93,11 +93,11 @@ variable (Var): "%" { ((!"'" %operator) / plain_word)* }
inline_list (List): inline_list (List):
!('[..]') !('[..]')
"[" %ws* {| (inline_list_item (comma inline_list_item)* comma?)? |} %ws* "[" %ws* {| (inline_list_item (comma inline_list_item)* comma?)? |} %ws*
("]" / (({} ([^%nl]*->"Failed to find a closing ] on the same line")) => error)) ("]" / (({} ([^%nl]*->"Failed to find a closing ] on the same line") %userdata) => error))
indented_list (List): indented_list (List):
"[..]" indent "[..]" indent
{| list_line (nodent list_line)* |} {| list_line (nodent list_line)* |}
(dedent / (({} (non_dedent_error -> "Error while parsing list")) => error)) (dedent / (({} (non_dedent_error -> "Error while parsing list") %userdata) => error))
list_line: list_line:
((action / expression) !comma) ((action / expression) !comma)
/ (inline_list_item (comma list_line?)?) / (inline_list_item (comma list_line?)?)
@ -107,12 +107,12 @@ inline_dict (Dict):
!('{..}') !('{..}')
"{" %ws* {| (inline_dict_entry (comma inline_dict_entry)*)? |} %ws* "{" %ws* {| (inline_dict_entry (comma inline_dict_entry)*)? |} %ws*
("}" ("}"
/ (({} (%ws* comma? (!. / &%nl)->"Failed to find a closing } on the same line")) => error) / (({} (%ws* comma? (!. / &%nl)->"Failed to find a closing } on the same line") %userdata) => error)
/ (({} ([^%nl]*->"Error while parsing dictionary")) => error)) / (({} ([^%nl]*->"Error while parsing dictionary") %userdata) => error))
indented_dict (Dict): indented_dict (Dict):
"{..}" indent "{..}" indent
{| dict_line (nodent dict_line)* |} {| dict_line (nodent dict_line)* |}
(dedent / (({} (non_dedent_error -> "Error while parsing dict")) => error)) (dedent / (({} (non_dedent_error -> "Error while parsing dict") %userdata) => error))
dict_line: dict_line:
(dict_entry !comma) / (inline_dict_entry (comma dict_line?)?) (dict_entry !comma) / (inline_dict_entry (comma dict_line?)?)
dict_entry(DictEntry): dict_entry(DictEntry):
@ -129,7 +129,7 @@ eol: %ws* eol_comment? (!. / &%nl)
ignored_line: (%nodent comment) / (%ws* (!. / &%nl)) ignored_line: (%nodent comment) / (%ws* (!. / &%nl))
indent: eol (%nl ignored_line)* %nl %indent (comment (%nl ignored_line)* nodent)? indent: eol (%nl ignored_line)* %nl %indent (comment (%nl ignored_line)* nodent)?
nodent: eol (%nl ignored_line)* %nl %nodent nodent: eol (%nl ignored_line)* %nl %nodent
dedent: eol (%nl ignored_line)* (((!.) &%dedent) / (&(%nl %dedent))) dedent: eol (%nl ignored_line)* (((!.) %dedent) / (&(%nl %dedent)))
non_dedent_error: (!dedent .)* eol (%nl ignored_line)* (!. / &%nl) non_dedent_error: (!dedent .)* eol (%nl ignored_line)* (!. / &%nl)
comma: %ws* "," %ws* comma: %ws* "," %ws*
dotdot: nodent ".." dotdot: nodent ".."