Cleaned up lpeg matching stuff to use lpeg.Carg(1) instead of

lpeg.userdata, and switched indent/dedent/nodent to use a number to
track indentation instead of a stack.
This commit is contained in:
Bruce Hill 2018-05-26 19:24:22 -07:00
parent 0c7c06beab
commit 6ce32bdd25
3 changed files with 66 additions and 77 deletions

View File

@ -28,8 +28,8 @@ end
re = require('re')
lpeg = require('lpeg')
lpeg.setmaxstack(10000)
local P, R, V, S, Cg, C, Cp, B
P, R, V, S, Cg, C, Cp, B = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.Cg, lpeg.C, lpeg.Cp, lpeg.B
local P, R, V, S, Cg, C, Cp, B, Cmt, Carg
P, R, V, S, Cg, C, Cp, B, Cmt, Carg = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.Cg, lpeg.C, lpeg.Cp, lpeg.B, lpeg.Cmt, lpeg.Carg
local utils = require('utils')
local new_uuid = require('uuid')
local immutable = require('immutable')
@ -183,42 +183,38 @@ do
_with_0.operator = _with_0.operator_char ^ 1
_with_0.utf8_char = (R("\194\223") * R("\128\191") + R("\224\239") * R("\128\191") * R("\128\191") + R("\240\244") * R("\128\191") * R("\128\191") * R("\128\191"))
_with_0.ident_char = R("az", "AZ", "09") + P("_") + _with_0.utf8_char
_with_0.indent = P(function(self, start)
local nodent = lpeg.userdata.indent_stack[#lpeg.userdata.indent_stack]
local indented = nodent .. " "
if self:sub(start, start + #indented - 1) == indented then
insert(lpeg.userdata.indent_stack, indented)
return start + #indented
_with_0.indent = Cmt(Carg(1), function(self, start, userdata)
if #self:match("^[ ]*", start) == userdata.indent + 4 then
userdata.indent = userdata.indent + 4
return start + userdata.indent
end
end)
_with_0.dedent = P(function(self, start)
local nodent = lpeg.userdata.indent_stack[#lpeg.userdata.indent_stack]
local spaces = self:match("^[ ]*", start)
if #spaces <= #nodent - 4 then
remove(lpeg.userdata.indent_stack)
_with_0.dedent = Cmt(Carg(1), function(self, start, userdata)
if #self:match("^[ ]*", start) <= userdata.indent - 4 then
userdata.indent = userdata.indent - 4
return start
end
end)
_with_0.nodent = P(function(self, start)
local nodent = lpeg.userdata.indent_stack[#lpeg.userdata.indent_stack]
if self:sub(start, start + #nodent - 1) == nodent then
return start + #nodent
_with_0.nodent = Cmt(Carg(1), function(self, start, userdata)
if #self:match("^[ ]*", start) >= userdata.indent then
return start + userdata.indent
end
end)
_with_0.error = function(src, end_pos, start_pos, err_msg)
local seen_errors = lpeg.userdata.errors
_with_0.userdata = Carg(1)
_with_0.error = function(src, end_pos, start_pos, err_msg, userdata)
local seen_errors = userdata.errors
if seen_errors[start_pos] then
return true
end
local err_pos = start_pos
local text_loc = lpeg.userdata.source:sub(err_pos, err_pos)
local text_loc = userdata.source:sub(err_pos, err_pos)
local line_no = text_loc:get_line_number()
src = FILE_CACHE[text_loc.filename]
local prev_line = line_no == 1 and "" or src:sub(LINE_STARTS[src][line_no - 1] or 1, LINE_STARTS[src][line_no] - 2)
local err_line = src:sub(LINE_STARTS[src][line_no], (LINE_STARTS[src][line_no + 1] or 0) - 2)
local next_line = src:sub(LINE_STARTS[src][line_no + 1] or -1, (LINE_STARTS[src][line_no + 2] or 0) - 2)
local pointer = ("-"):rep(err_pos - LINE_STARTS[src][line_no]) .. "^"
err_msg = (err_msg or "Parse error") .. " at " .. tostring(lpeg.userdata.source.filename) .. ":" .. tostring(line_no) .. ":\n"
err_msg = (err_msg or "Parse error") .. " at " .. tostring(userdata.source.filename) .. ":" .. tostring(line_no) .. ":\n"
if #prev_line > 0 then
err_msg = err_msg .. ("\n" .. prev_line)
end
@ -234,8 +230,8 @@ end
setmetatable(NOMSU_DEFS, {
__index = function(self, key)
local make_node
make_node = function(start, value, stop)
local source = lpeg.userdata.source:sub(start, stop)
make_node = function(start, value, stop, userdata)
local source = userdata.source:sub(start, stop)
local tree
if Types[key].is_multi then
tree = Types[key](Tuple(unpack(value)), source)
@ -255,7 +251,7 @@ do
anon_def <- ({ident} (" "*) ":"
{((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- %2"
captured_def <- ({ident} (" "*) "(" {ident} ")" (" "*) ":"
{((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- (({} %3 {}) -> %2)"
{((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- (({} %3 {} %%userdata) -> %2)"
ident <- [a-zA-Z_][a-zA-Z0-9_]*
comment <- "--" [^%nl]*
]])
@ -322,17 +318,14 @@ do
end
local userdata = {
source_code = nomsu_code,
indent_stack = {
""
},
indent = 0,
errors = { },
source = nomsu_code.source
}
local old_userdata
old_userdata, lpeg.userdata = lpeg.userdata, userdata
local tree = NOMSU_PATTERN:match(tostring(nomsu_code))
lpeg.userdata = old_userdata
assert(tree, "In file " .. tostring(colored.blue(filename)) .. " failed to parse:\n" .. tostring(colored.onyellow(colored.black(nomsu_code))))
local tree = NOMSU_PATTERN:match(tostring(nomsu_code), nil, userdata)
if not (tree) then
error("In file " .. tostring(colored.blue(filename)) .. " failed to parse:\n" .. tostring(colored.onyellow(colored.black(nomsu_code))))
end
if next(userdata.errors) then
local keys = utils.keys(userdata.errors)
table.sort(keys)

View File

@ -33,7 +33,7 @@ if jit
re = require 're'
lpeg = require 'lpeg'
lpeg.setmaxstack 10000
{:P,:R,:V,:S,:Cg,:C,:Cp,:B} = lpeg
{:P,:R,:V,:S,:Cg,:C,:Cp,:B,:Cmt,:Carg} = lpeg
utils = require 'utils'
new_uuid = require 'uuid'
immutable = require 'immutable'
@ -156,42 +156,39 @@ NOMSU_DEFS = with {}
-- If the line begins with #indent+4 spaces, the pattern matches *those* spaces
-- and adds them to the stack (not any more).
.indent = P (start)=>
nodent = lpeg.userdata.indent_stack[#lpeg.userdata.indent_stack]
indented = nodent.." "
if @sub(start, start+#indented-1) == indented
insert(lpeg.userdata.indent_stack, indented)
return start + #indented
.indent = Cmt Carg(1), (start, userdata)=>
if #@match("^[ ]*", start) == userdata.indent + 4
userdata.indent += 4
return start + userdata.indent
-- If the number of leading space characters is <= the number of space on the top of the
-- stack minus 4, this pattern matches and pops off the top of the stack exactly once.
.dedent = P (start)=>
nodent = lpeg.userdata.indent_stack[#lpeg.userdata.indent_stack]
spaces = @match("^[ ]*", start)
if #spaces <= #nodent-4
remove(lpeg.userdata.indent_stack)
.dedent = Cmt Carg(1), (start, userdata)=>
if #@match("^[ ]*", start) <= userdata.indent - 4
userdata.indent -= 4
return start
-- If the number of leading space characters is >= the number on the top of the
-- stack, this pattern matches and does not modify the stack.
.nodent = P (start)=>
nodent = lpeg.userdata.indent_stack[#lpeg.userdata.indent_stack]
if @sub(start, start+#nodent-1) == nodent
return start + #nodent
.nodent = Cmt Carg(1), (start, userdata)=>
if #@match("^[ ]*", start) >= userdata.indent
return start + userdata.indent
.error = (src,end_pos,start_pos,err_msg)->
seen_errors = lpeg.userdata.errors
.userdata = Carg(1)
.error = (src,end_pos,start_pos,err_msg,userdata)->
seen_errors = userdata.errors
if seen_errors[start_pos]
return true
err_pos = start_pos
--if src\sub(err_pos,err_pos)\match("[\r\n]")
-- err_pos += #src\match("[ \t\n\r]*", err_pos)
text_loc = lpeg.userdata.source\sub(err_pos,err_pos)
text_loc = userdata.source\sub(err_pos,err_pos)
line_no = text_loc\get_line_number!
src = FILE_CACHE[text_loc.filename]
prev_line = line_no == 1 and "" or src\sub(LINE_STARTS[src][line_no-1] or 1, LINE_STARTS[src][line_no]-2)
err_line = src\sub(LINE_STARTS[src][line_no], (LINE_STARTS[src][line_no+1] or 0)-2)
next_line = src\sub(LINE_STARTS[src][line_no+1] or -1, (LINE_STARTS[src][line_no+2] or 0)-2)
pointer = ("-")\rep(err_pos-LINE_STARTS[src][line_no]) .. "^"
err_msg = (err_msg or "Parse error").." at #{lpeg.userdata.source.filename}:#{line_no}:\n"
err_msg = (err_msg or "Parse error").." at #{userdata.source.filename}:#{line_no}:\n"
if #prev_line > 0 then err_msg ..= "\n"..prev_line
err_msg ..= "\n#{err_line}\n#{pointer}"
if #next_line > 0 then err_msg ..= "\n"..next_line
@ -200,8 +197,8 @@ NOMSU_DEFS = with {}
return true
setmetatable(NOMSU_DEFS, {__index:(key)=>
make_node = (start, value, stop)->
source = lpeg.userdata.source\sub(start, stop)
make_node = (start, value, stop, userdata)->
source = userdata.source\sub(start, stop)
tree = if Types[key].is_multi
Types[key](Tuple(unpack(value)), source)
else Types[key](value, source)
@ -219,7 +216,7 @@ NOMSU_PATTERN = do
anon_def <- ({ident} (" "*) ":"
{((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- %2"
captured_def <- ({ident} (" "*) "(" {ident} ")" (" "*) ":"
{((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- (({} %3 {}) -> %2)"
{((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- (({} %3 {} %%userdata) -> %2)"
ident <- [a-zA-Z_][a-zA-Z0-9_]*
comment <- "--" [^%nl]*
]]
@ -329,16 +326,15 @@ class NomsuCompiler
filename = "<nomsu chunk ##{_nomsu_chunk_counter}>.nom"
FILE_CACHE[filename] = nomsu_code
nomsu_code = Nomsu(filename, nomsu_code)
userdata = {
source_code:nomsu_code, indent_stack: {""}, errors: {},
source_code:nomsu_code, indent: 0, errors: {},
source: nomsu_code.source,
}
tree = NOMSU_PATTERN\match(tostring(nomsu_code), nil, userdata)
old_userdata, lpeg.userdata = lpeg.userdata, userdata
tree = NOMSU_PATTERN\match(tostring(nomsu_code))
lpeg.userdata = old_userdata
assert tree, "In file #{colored.blue filename} failed to parse:\n#{colored.onyellow colored.black nomsu_code}"
unless tree
error "In file #{colored.blue filename} failed to parse:\n#{colored.onyellow colored.black nomsu_code}"
if next(userdata.errors)
keys = utils.keys(userdata.errors)

View File

@ -3,16 +3,16 @@ file:
(ignored_line %nl)*
(block / action / expression)?
(%nl ignored_line)*
(!. / (({} (.* -> "Parse error")) => error))
(!. / (({} (.* -> "Parse error") %userdata) => error))
shebang: "#!" [^%nl]* (!. / %nl)
inline_block (Block):
{| inline_statement (%ws* ";" %ws* inline_statement)+ |}
block (Block):
{| statement (nodent (statement / (({} ([^%nl]* -> "Error while parsing block line")) => error)))+ |}
{| statement (nodent (statement / (({} ([^%nl]* -> "Error while parsing block line") %userdata) => error)))+ |}
statement: (action / expression) (eol / (({} ([^%nl]* -> "Error while parsing line")) => error))
statement: (action / expression) (eol / (({} ([^%nl]* -> "Error while parsing line") %userdata) => error))
inline_statement: inline_action / inline_expression
noindex_inline_expression:
@ -20,8 +20,8 @@ noindex_inline_expression:
/ ( "("
%ws* (inline_block / inline_action / inline_expression) %ws*
(")"
/ (({} ((!. / &%nl) -> 'Expected to find a ) before the end of the line')) => error)
/ (({} ([^%nl]* -> 'Error while parsing subexpression')) => error)
/ (({} ((!. / &%nl) -> 'Expected to find a ) before the end of the line') %userdata) => error)
/ (({} ([^%nl]* -> 'Error while parsing subexpression') %userdata) => error)
)
)
inline_expression:
@ -30,7 +30,7 @@ indented_expression:
indented_text / indented_nomsu / indented_list / indented_dict
/ ("(..)"? indent
(block / action / expression)
(dedent / (({} (non_dedent_error -> "Error while parsing indented expression")) => error))
(dedent / (({} (non_dedent_error -> "Error while parsing indented expression") %userdata) => error))
)
expression:
inline_expression / (":" %ws* (inline_block / inline_action / inline_expression) eol) / indented_expression
@ -60,7 +60,7 @@ inline_text (Text):
'"' {|
({~ (('\"' -> '"') / ('\\' -> '\') / %escaped_char / [^%nl\"])+ ~}
/ inline_text_interpolation)*
|} ('"' / (({} ([^%nl]*->'Failed to find a closing " mark on the same line')) => error))
|} ('"' / (({} ([^%nl]*->'Failed to find a closing " mark on the same line') %userdata) => error))
-- Have to use "%indent" instead of "indent" etc. to avoid messing up text lines that start with "#"
indented_text (Text):
@ -69,15 +69,15 @@ indented_text (Text):
({~
(("\\" -> "\") / (("\" nodent "..") -> "")/ (%nl+ {~ %nodent -> "" ~}) / [^%nl\] / (!text_interpolation "\"))+
~} / text_interpolation)*
|} (((!.) &%dedent) / (&(%nl %dedent)) / (({} (non_dedent_error -> "Error while parsing Text")) => error))
|} (((!.) %dedent) / (&(%nl %dedent)) / (({} (non_dedent_error -> "Error while parsing Text") %userdata) => error))
inline_text_interpolation:
"\" (
variable / inline_list / inline_dict / inline_text
/ ("("
%ws* (inline_block / inline_action / inline_expression) %ws*
(")"
/ (({} (&%nl -> 'Expected to find a ")" before the end of the line')) => error)
/ (({} ([^%nl]* -> 'Error while parsing text interpolation')) => error))
/ (({} (&%nl -> 'Expected to find a ")" before the end of the line') %userdata) => error)
/ (({} ([^%nl]* -> 'Error while parsing text interpolation') %userdata) => error))
)
)
text_interpolation:
@ -93,11 +93,11 @@ variable (Var): "%" { ((!"'" %operator) / plain_word)* }
inline_list (List):
!('[..]')
"[" %ws* {| (inline_list_item (comma inline_list_item)* comma?)? |} %ws*
("]" / (({} ([^%nl]*->"Failed to find a closing ] on the same line")) => error))
("]" / (({} ([^%nl]*->"Failed to find a closing ] on the same line") %userdata) => error))
indented_list (List):
"[..]" indent
{| list_line (nodent list_line)* |}
(dedent / (({} (non_dedent_error -> "Error while parsing list")) => error))
(dedent / (({} (non_dedent_error -> "Error while parsing list") %userdata) => error))
list_line:
((action / expression) !comma)
/ (inline_list_item (comma list_line?)?)
@ -107,12 +107,12 @@ inline_dict (Dict):
!('{..}')
"{" %ws* {| (inline_dict_entry (comma inline_dict_entry)*)? |} %ws*
("}"
/ (({} (%ws* comma? (!. / &%nl)->"Failed to find a closing } on the same line")) => error)
/ (({} ([^%nl]*->"Error while parsing dictionary")) => error))
/ (({} (%ws* comma? (!. / &%nl)->"Failed to find a closing } on the same line") %userdata) => error)
/ (({} ([^%nl]*->"Error while parsing dictionary") %userdata) => error))
indented_dict (Dict):
"{..}" indent
{| dict_line (nodent dict_line)* |}
(dedent / (({} (non_dedent_error -> "Error while parsing dict")) => error))
(dedent / (({} (non_dedent_error -> "Error while parsing dict") %userdata) => error))
dict_line:
(dict_entry !comma) / (inline_dict_entry (comma dict_line?)?)
dict_entry(DictEntry):
@ -129,7 +129,7 @@ eol: %ws* eol_comment? (!. / &%nl)
ignored_line: (%nodent comment) / (%ws* (!. / &%nl))
indent: eol (%nl ignored_line)* %nl %indent (comment (%nl ignored_line)* nodent)?
nodent: eol (%nl ignored_line)* %nl %nodent
dedent: eol (%nl ignored_line)* (((!.) &%dedent) / (&(%nl %dedent)))
dedent: eol (%nl ignored_line)* (((!.) %dedent) / (&(%nl %dedent)))
non_dedent_error: (!dedent .)* eol (%nl ignored_line)* (!. / &%nl)
comma: %ws* "," %ws*
dotdot: nodent ".."