Moved parsing into a different file
This commit is contained in:
parent
16d127abb5
commit
a22de3fdbb
164
parser.lua
Normal file
164
parser.lua
Normal file
@ -0,0 +1,164 @@
|
||||
local lpeg = require('lpeg')
|
||||
local re = require('re')
|
||||
lpeg.setmaxstack(10000)
|
||||
local P, R, V, S, Cg, C, Cp, B, Cmt, Carg
|
||||
P, R, V, S, Cg, C, Cp, B, Cmt, Carg = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.Cg, lpeg.C, lpeg.Cp, lpeg.B, lpeg.Cmt, lpeg.Carg
|
||||
local utils = require('utils')
|
||||
local match, sub, rep, gsub, format, byte, find
|
||||
do
|
||||
local _obj_0 = string
|
||||
match, sub, rep, gsub, format, byte, match, find = _obj_0.match, _obj_0.sub, _obj_0.rep, _obj_0.gsub, _obj_0.format, _obj_0.byte, _obj_0.match, _obj_0.find
|
||||
end
|
||||
local AST = require("nomsu_tree")
|
||||
local NOMSU_DEFS
|
||||
do
|
||||
local _with_0 = { }
|
||||
_with_0.nl = P("\r") ^ -1 * P("\n")
|
||||
_with_0.ws = S(" \t")
|
||||
_with_0.tonumber = tonumber
|
||||
local string_escapes = {
|
||||
n = "\n",
|
||||
t = "\t",
|
||||
b = "\b",
|
||||
a = "\a",
|
||||
v = "\v",
|
||||
f = "\f",
|
||||
r = "\r"
|
||||
}
|
||||
local digit, hex = R('09'), R('09', 'af', 'AF')
|
||||
_with_0.escaped_char = (P("\\") * S("xX") * C(hex * hex)) / function(self)
|
||||
return string.char(tonumber(self, 16))
|
||||
end
|
||||
_with_0.escaped_char = _with_0.escaped_char + ((P("\\") * C(digit * (digit ^ -2))) / function(self)
|
||||
return string.char(tonumber(self))
|
||||
end)
|
||||
_with_0.escaped_char = _with_0.escaped_char + ((P("\\") * C(S("ntbavfr"))) / string_escapes)
|
||||
_with_0.operator_char = S("'`~!@$^&*-+=|<>?/")
|
||||
_with_0.utf8_char = (R("\194\223") * R("\128\191") + R("\224\239") * R("\128\191") * R("\128\191") + R("\240\244") * R("\128\191") * R("\128\191") * R("\128\191"))
|
||||
_with_0.ident_char = R("az", "AZ", "09") + P("_") + _with_0.utf8_char
|
||||
_with_0.indent = Cmt(Carg(1), function(self, start, userdata)
|
||||
local indented = userdata.indent .. ' '
|
||||
if sub(self, start, start + #indented - 1) == indented then
|
||||
userdata.indent = indented
|
||||
return start + #indented
|
||||
end
|
||||
end)
|
||||
_with_0.dedent = Cmt(Carg(1), function(self, start, userdata)
|
||||
local dedented = sub(userdata.indent, 1, -5)
|
||||
if #match(self, "^[ ]*", start) <= #dedented then
|
||||
userdata.indent = dedented
|
||||
return start
|
||||
end
|
||||
end)
|
||||
_with_0.nodent = Cmt(Carg(1), function(self, start, userdata)
|
||||
if sub(self, start, start + #userdata.indent - 1) == userdata.indent then
|
||||
return start + #userdata.indent
|
||||
end
|
||||
end)
|
||||
_with_0.userdata = Carg(1)
|
||||
_with_0.error = function(src, end_pos, start_pos, err_msg, userdata)
|
||||
local seen_errors = userdata.errors
|
||||
if seen_errors[start_pos] then
|
||||
return true
|
||||
end
|
||||
if utils.size(seen_errors) >= 10 then
|
||||
seen_errors[start_pos + 1] = colored.bright(colored.yellow(colored.onred("Too many errors, canceling parsing...")))
|
||||
return #src + 1
|
||||
end
|
||||
local err_pos = start_pos
|
||||
local line_no = pos_to_line(src, err_pos)
|
||||
src = FILE_CACHE[userdata.source.filename]
|
||||
local line_starts = LINE_STARTS[src]
|
||||
local prev_line = line_no == 1 and "" or src:sub(line_starts[line_no - 1] or 1, line_starts[line_no] - 2)
|
||||
local err_line = src:sub(line_starts[line_no], (line_starts[line_no + 1] or 0) - 2)
|
||||
local next_line = src:sub(line_starts[line_no + 1] or -1, (line_starts[line_no + 2] or 0) - 2)
|
||||
local i = err_pos - line_starts[line_no]
|
||||
local pointer = ("-"):rep(i) .. "^"
|
||||
err_msg = colored.bright(colored.yellow(colored.onred((err_msg or "Parse error") .. " at " .. tostring(userdata.source.filename) .. ":" .. tostring(line_no) .. ":")))
|
||||
if #prev_line > 0 then
|
||||
err_msg = err_msg .. ("\n" .. colored.dim(prev_line))
|
||||
end
|
||||
err_line = colored.white(err_line:sub(1, i)) .. colored.bright(colored.red(err_line:sub(i + 1, i + 1))) .. colored.dim(err_line:sub(i + 2, -1))
|
||||
err_msg = err_msg .. "\n" .. tostring(err_line) .. "\n" .. tostring(colored.red(pointer))
|
||||
if #next_line > 0 then
|
||||
err_msg = err_msg .. ("\n" .. colored.dim(next_line))
|
||||
end
|
||||
seen_errors[start_pos] = err_msg
|
||||
return true
|
||||
end
|
||||
NOMSU_DEFS = _with_0
|
||||
end
|
||||
setmetatable(NOMSU_DEFS, {
|
||||
__index = function(self, key)
|
||||
local make_node
|
||||
make_node = function(start, value, stop, userdata)
|
||||
if userdata.source then
|
||||
do
|
||||
local _with_0 = userdata.source
|
||||
value.source = Source(_with_0.filename, _with_0.start + start - 1, _with_0.start + stop - 1)
|
||||
end
|
||||
end
|
||||
setmetatable(value, AST[key])
|
||||
if value.__init then
|
||||
value:__init()
|
||||
end
|
||||
for i = 1, #value do
|
||||
assert(value[i])
|
||||
end
|
||||
return value
|
||||
end
|
||||
self[key] = make_node
|
||||
return make_node
|
||||
end
|
||||
})
|
||||
local NOMSU_PATTERN
|
||||
do
|
||||
local peg_tidier = re.compile([[ file <- {~ %nl* (def/comment) (%nl+ (def/comment))* %nl* ~}
|
||||
def <- anon_def / captured_def
|
||||
anon_def <- ({ident} (" "*) ":"
|
||||
{((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- %2"
|
||||
captured_def <- ({ident} (" "*) "(" {ident} ")" (" "*) ":"
|
||||
{((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- (({} %3 {} %%userdata) -> %2)"
|
||||
ident <- [a-zA-Z_][a-zA-Z0-9_]*
|
||||
comment <- "--" [^%nl]*
|
||||
]])
|
||||
local nomsu_peg = peg_tidier:match(FILE_CACHE["nomsu.peg"])
|
||||
NOMSU_PATTERN = re.compile(nomsu_peg, NOMSU_DEFS)
|
||||
end
|
||||
local parse
|
||||
parse = function(nomsu_code, source)
|
||||
if source == nil then
|
||||
source = nil
|
||||
end
|
||||
nomsu_code = tostring(nomsu_code)
|
||||
local userdata = {
|
||||
indent = "",
|
||||
errors = { },
|
||||
source = source
|
||||
}
|
||||
local tree = NOMSU_PATTERN:match(nomsu_code, nil, userdata)
|
||||
if not (tree) then
|
||||
error("In file " .. tostring(colored.blue(tostring(source or "<unknown>"))) .. " failed to parse:\n" .. tostring(colored.onyellow(colored.black(nomsu_code))))
|
||||
end
|
||||
if type(tree) == 'number' then
|
||||
tree = nil
|
||||
end
|
||||
if next(userdata.errors) then
|
||||
local keys = utils.keys(userdata.errors)
|
||||
table.sort(keys)
|
||||
local errors
|
||||
do
|
||||
local _accum_0 = { }
|
||||
local _len_0 = 1
|
||||
for _index_0 = 1, #keys do
|
||||
local k = keys[_index_0]
|
||||
_accum_0[_len_0] = userdata.errors[k]
|
||||
_len_0 = _len_0 + 1
|
||||
end
|
||||
errors = _accum_0
|
||||
end
|
||||
error(table.concat(errors, "\n\n"), 0)
|
||||
end
|
||||
return tree
|
||||
end
|
||||
return parse
|
123
parser.moon
Normal file
123
parser.moon
Normal file
@ -0,0 +1,123 @@
|
||||
lpeg = require 'lpeg'
|
||||
re = require 're'
|
||||
lpeg.setmaxstack 10000
|
||||
{:P,:R,:V,:S,:Cg,:C,:Cp,:B,:Cmt,:Carg} = lpeg
|
||||
utils = require 'utils'
|
||||
{:match, :sub, :rep, :gsub, :format, :byte, :match, :find} = string
|
||||
{:NomsuCode, :LuaCode, :Source} = require "code_obj"
|
||||
AST = require "nomsu_tree"
|
||||
|
||||
NOMSU_DEFS = with {}
|
||||
-- Newline supports either windows-style CR+LF or unix-style LF
|
||||
.nl = P("\r")^-1 * P("\n")
|
||||
.ws = S(" \t")
|
||||
.tonumber = tonumber
|
||||
string_escapes = n:"\n", t:"\t", b:"\b", a:"\a", v:"\v", f:"\f", r:"\r"
|
||||
digit, hex = R('09'), R('09','af','AF')
|
||||
.escaped_char = (P("\\")*S("xX")*C(hex*hex)) / => string.char(tonumber(@, 16))
|
||||
.escaped_char += (P("\\")*C(digit*(digit^-2))) / => string.char(tonumber @)
|
||||
.escaped_char += (P("\\")*C(S("ntbavfr"))) / string_escapes
|
||||
.operator_char = S("'`~!@$^&*-+=|<>?/")
|
||||
.utf8_char = (
|
||||
R("\194\223")*R("\128\191") +
|
||||
R("\224\239")*R("\128\191")*R("\128\191") +
|
||||
R("\240\244")*R("\128\191")*R("\128\191")*R("\128\191"))
|
||||
.ident_char = R("az","AZ","09") + P("_") + .utf8_char
|
||||
|
||||
-- If the line begins with #indent+4 spaces, the pattern matches *those* spaces
|
||||
-- and adds them to the current indent (not any more).
|
||||
.indent = Cmt Carg(1), (start, userdata)=>
|
||||
indented = userdata.indent..' '
|
||||
if sub(@, start, start+#indented-1) == indented
|
||||
userdata.indent = indented
|
||||
return start + #indented
|
||||
-- If the number of leading space characters is <= the number of spaces in the current
|
||||
-- indent minus 4, this pattern matches and decrements the current indent exactly once.
|
||||
.dedent = Cmt Carg(1), (start, userdata)=>
|
||||
dedented = sub(userdata.indent, 1, -5)
|
||||
if #match(@, "^[ ]*", start) <= #dedented
|
||||
userdata.indent = dedented
|
||||
return start
|
||||
-- If the number of leading space characters is >= the number of spaces in the current
|
||||
-- indent, this pattern matches and does not modify the indent.
|
||||
.nodent = Cmt Carg(1), (start, userdata)=>
|
||||
if sub(@, start, start+#userdata.indent-1) == userdata.indent
|
||||
return start + #userdata.indent
|
||||
|
||||
.userdata = Carg(1)
|
||||
|
||||
.error = (src,end_pos,start_pos,err_msg,userdata)->
|
||||
seen_errors = userdata.errors
|
||||
if seen_errors[start_pos]
|
||||
return true
|
||||
if utils.size(seen_errors) >= 10
|
||||
seen_errors[start_pos+1] = colored.bright colored.yellow colored.onred "Too many errors, canceling parsing..."
|
||||
return #src+1
|
||||
err_pos = start_pos
|
||||
line_no = pos_to_line(src, err_pos)
|
||||
src = FILE_CACHE[userdata.source.filename]
|
||||
line_starts = LINE_STARTS[src]
|
||||
prev_line = line_no == 1 and "" or src\sub(line_starts[line_no-1] or 1, line_starts[line_no]-2)
|
||||
err_line = src\sub(line_starts[line_no], (line_starts[line_no+1] or 0)-2)
|
||||
next_line = src\sub(line_starts[line_no+1] or -1, (line_starts[line_no+2] or 0)-2)
|
||||
i = err_pos-line_starts[line_no]
|
||||
pointer = ("-")\rep(i) .. "^"
|
||||
err_msg = colored.bright colored.yellow colored.onred (err_msg or "Parse error").." at #{userdata.source.filename}:#{line_no}:"
|
||||
if #prev_line > 0 then err_msg ..= "\n"..colored.dim(prev_line)
|
||||
err_line = colored.white(err_line\sub(1, i))..colored.bright(colored.red(err_line\sub(i+1,i+1)))..colored.dim(err_line\sub(i+2,-1))
|
||||
err_msg ..= "\n#{err_line}\n#{colored.red pointer}"
|
||||
if #next_line > 0 then err_msg ..= "\n"..colored.dim(next_line)
|
||||
--error(err_msg)
|
||||
seen_errors[start_pos] = err_msg
|
||||
return true
|
||||
|
||||
setmetatable(NOMSU_DEFS, {__index:(key)=>
|
||||
make_node = (start, value, stop, userdata)->
|
||||
if userdata.source
|
||||
with userdata.source
|
||||
value.source = Source(.filename, .start + start-1, .start + stop-1)
|
||||
setmetatable(value, AST[key])
|
||||
if value.__init then value\__init!
|
||||
for i=1,#value do assert(value[i])
|
||||
return value
|
||||
|
||||
self[key] = make_node
|
||||
return make_node
|
||||
})
|
||||
|
||||
NOMSU_PATTERN = do
|
||||
-- Just for cleanliness, I put the language spec in its own file using a slightly modified
|
||||
-- version of the lpeg.re syntax.
|
||||
peg_tidier = re.compile [[
|
||||
file <- {~ %nl* (def/comment) (%nl+ (def/comment))* %nl* ~}
|
||||
def <- anon_def / captured_def
|
||||
anon_def <- ({ident} (" "*) ":"
|
||||
{((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- %2"
|
||||
captured_def <- ({ident} (" "*) "(" {ident} ")" (" "*) ":"
|
||||
{((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- (({} %3 {} %%userdata) -> %2)"
|
||||
ident <- [a-zA-Z_][a-zA-Z0-9_]*
|
||||
comment <- "--" [^%nl]*
|
||||
]]
|
||||
nomsu_peg = peg_tidier\match(FILE_CACHE["nomsu.peg"])
|
||||
re.compile(nomsu_peg, NOMSU_DEFS)
|
||||
|
||||
parse = (nomsu_code, source=nil)->
|
||||
nomsu_code = tostring(nomsu_code)
|
||||
userdata = {
|
||||
indent: "", errors: {}, :source
|
||||
}
|
||||
tree = NOMSU_PATTERN\match(nomsu_code, nil, userdata)
|
||||
unless tree
|
||||
error "In file #{colored.blue tostring(source or "<unknown>")} failed to parse:\n#{colored.onyellow colored.black nomsu_code}"
|
||||
if type(tree) == 'number'
|
||||
tree = nil
|
||||
|
||||
if next(userdata.errors)
|
||||
keys = utils.keys(userdata.errors)
|
||||
table.sort(keys)
|
||||
errors = [userdata.errors[k] for k in *keys]
|
||||
error(table.concat(errors, "\n\n"), 0)
|
||||
|
||||
return tree
|
||||
|
||||
return parse
|
Loading…
Reference in New Issue
Block a user