2018-06-19 01:12:43 -07:00
|
|
|
-- This file contains the parser, which converts Nomsu text into abstract syntax trees
|
2018-06-18 15:46:28 -07:00
|
|
|
lpeg = require 'lpeg'
|
|
|
|
re = require 're'
|
|
|
|
lpeg.setmaxstack 10000
|
2018-06-24 23:21:07 -07:00
|
|
|
{:P,:R,:S,:C,:Cmt,:Carg} = lpeg
|
2018-06-24 23:18:32 -07:00
|
|
|
{:match, :sub} = string
|
2018-07-12 16:14:29 -07:00
|
|
|
{:insert, :remove} = table
|
2018-06-28 14:12:24 -07:00
|
|
|
files = require 'files'
|
2018-06-18 15:46:28 -07:00
|
|
|
{:NomsuCode, :LuaCode, :Source} = require "code_obj"
|
|
|
|
AST = require "nomsu_tree"
|
|
|
|
|
|
|
|
NOMSU_DEFS = with {}
|
|
|
|
-- Newline supports either windows-style CR+LF or unix-style LF
|
|
|
|
.nl = P("\r")^-1 * P("\n")
|
|
|
|
.ws = S(" \t")
|
|
|
|
.tonumber = tonumber
|
2018-07-14 14:41:17 -07:00
|
|
|
.table = -> {}
|
2018-07-13 09:56:12 -07:00
|
|
|
.unpack = unpack or table.unpack
|
2018-06-18 15:46:28 -07:00
|
|
|
string_escapes = n:"\n", t:"\t", b:"\b", a:"\a", v:"\v", f:"\f", r:"\r"
|
|
|
|
digit, hex = R('09'), R('09','af','AF')
|
|
|
|
.escaped_char = (P("\\")*S("xX")*C(hex*hex)) / => string.char(tonumber(@, 16))
|
|
|
|
.escaped_char += (P("\\")*C(digit*(digit^-2))) / => string.char(tonumber @)
|
|
|
|
.escaped_char += (P("\\")*C(S("ntbavfr"))) / string_escapes
|
|
|
|
.operator_char = S("'`~!@$^&*-+=|<>?/")
|
|
|
|
.utf8_char = (
|
|
|
|
R("\194\223")*R("\128\191") +
|
|
|
|
R("\224\239")*R("\128\191")*R("\128\191") +
|
|
|
|
R("\240\244")*R("\128\191")*R("\128\191")*R("\128\191"))
|
|
|
|
.ident_char = R("az","AZ","09") + P("_") + .utf8_char
|
|
|
|
|
|
|
|
.userdata = Carg(1)
|
|
|
|
|
2018-07-14 14:41:17 -07:00
|
|
|
.add_comment = (src,end_pos,start_pos,comment,userdata)->
|
|
|
|
userdata.comments[start_pos] = comment
|
|
|
|
return true
|
|
|
|
|
2018-06-18 15:46:28 -07:00
|
|
|
.error = (src,end_pos,start_pos,err_msg,userdata)->
|
|
|
|
seen_errors = userdata.errors
|
|
|
|
if seen_errors[start_pos]
|
|
|
|
return true
|
2018-06-24 23:21:07 -07:00
|
|
|
num_errors = 0
|
|
|
|
for _ in pairs(seen_errors) do num_errors += 1
|
|
|
|
if num_errors >= 10
|
2018-06-18 15:46:28 -07:00
|
|
|
seen_errors[start_pos+1] = colored.bright colored.yellow colored.onred "Too many errors, canceling parsing..."
|
|
|
|
return #src+1
|
|
|
|
err_pos = start_pos
|
2018-06-28 14:12:24 -07:00
|
|
|
line_no = files.get_line_number(src, err_pos)
|
2018-06-23 17:22:23 -07:00
|
|
|
--src = files.read(userdata.source.filename)
|
2018-06-28 14:12:24 -07:00
|
|
|
prev_line = line_no == 1 and "" or files.get_line(src, line_no-1)
|
|
|
|
err_line = files.get_line(src, line_no)
|
|
|
|
next_line = files.get_line(src, line_no+1)
|
|
|
|
i = err_pos-files.get_line_starts(src)[line_no]
|
2018-06-18 15:46:28 -07:00
|
|
|
pointer = ("-")\rep(i) .. "^"
|
|
|
|
err_msg = colored.bright colored.yellow colored.onred (err_msg or "Parse error").." at #{userdata.source.filename}:#{line_no}:"
|
|
|
|
if #prev_line > 0 then err_msg ..= "\n"..colored.dim(prev_line)
|
|
|
|
err_line = colored.white(err_line\sub(1, i))..colored.bright(colored.red(err_line\sub(i+1,i+1)))..colored.dim(err_line\sub(i+2,-1))
|
|
|
|
err_msg ..= "\n#{err_line}\n#{colored.red pointer}"
|
|
|
|
if #next_line > 0 then err_msg ..= "\n"..colored.dim(next_line)
|
|
|
|
seen_errors[start_pos] = err_msg
|
|
|
|
return true
|
|
|
|
|
|
|
|
setmetatable(NOMSU_DEFS, {__index:(key)=>
|
|
|
|
make_node = (start, value, stop, userdata)->
|
|
|
|
if userdata.source
|
|
|
|
with userdata.source
|
|
|
|
value.source = Source(.filename, .start + start-1, .start + stop-1)
|
|
|
|
setmetatable(value, AST[key])
|
|
|
|
if value.__init then value\__init!
|
|
|
|
return value
|
|
|
|
|
|
|
|
self[key] = make_node
|
|
|
|
return make_node
|
|
|
|
})
|
|
|
|
|
2018-07-15 19:41:22 -07:00
|
|
|
Parser = {version:2, patterns:{}}
|
|
|
|
do
|
2018-06-18 15:46:28 -07:00
|
|
|
-- Just for cleanliness, I put the language spec in its own file using a slightly modified
|
|
|
|
-- version of the lpeg.re syntax.
|
|
|
|
peg_tidier = re.compile [[
|
2018-07-15 19:41:22 -07:00
|
|
|
file <- %nl* {~ (def/comment) (%nl+ (def/comment))* %nl* ~}
|
2018-06-18 15:46:28 -07:00
|
|
|
def <- anon_def / captured_def
|
|
|
|
anon_def <- ({ident} (" "*) ":"
|
2018-07-12 16:14:29 -07:00
|
|
|
{~ ((%nl " "+ def_line?)+) / def_line ~}) -> "%1 <- %2"
|
2018-06-18 15:46:28 -07:00
|
|
|
captured_def <- ({ident} (" "*) "(" {ident} ")" (" "*) ":"
|
2018-07-12 16:14:29 -07:00
|
|
|
{~ ((%nl " "+ def_line?)+) / def_line ~}) -> "%1 <- (({} {| %3 |} {} %%userdata) -> %2)"
|
|
|
|
def_line <- (err / [^%nl])+
|
|
|
|
err <- ("(!!" { (!("!!)") .)* } "!!)") -> "(({} (%1) %%userdata) => error)"
|
2018-06-18 15:46:28 -07:00
|
|
|
ident <- [a-zA-Z_][a-zA-Z0-9_]*
|
|
|
|
comment <- "--" [^%nl]*
|
2018-07-15 19:41:22 -07:00
|
|
|
]]
|
|
|
|
for version=1,Parser.version
|
|
|
|
peg_file = io.open("nomsu.#{version}.peg")
|
|
|
|
if not peg_file and package.nomsupath
|
|
|
|
for path in package.nomsupath\gmatch("[^;]+")
|
|
|
|
peg_file = io.open(path.."/nomsu.#{version}.peg")
|
|
|
|
break if peg_file
|
|
|
|
assert(peg_file, "could not find nomsu .peg file")
|
|
|
|
nomsu_peg = peg_tidier\match(peg_file\read('*a'))
|
|
|
|
peg_file\close!
|
|
|
|
Parser.patterns[version] = re.compile(nomsu_peg, NOMSU_DEFS)
|
2018-06-18 15:46:28 -07:00
|
|
|
|
2018-07-15 19:41:22 -07:00
|
|
|
Parser.parse = (nomsu_code, source=nil, version=nil)->
|
2018-06-27 10:22:58 -07:00
|
|
|
source or= nomsu_code.source
|
2018-06-18 15:46:28 -07:00
|
|
|
nomsu_code = tostring(nomsu_code)
|
2018-07-15 19:41:22 -07:00
|
|
|
version or= nomsu_code\match("^#![^\n]*nomsu[ ]+-V[ ]*([0-9.]+)")
|
2018-07-17 17:25:12 -07:00
|
|
|
syntax_version = version and tonumber(version\match("^[0-9]+")) or Parser.version
|
2018-06-18 15:46:28 -07:00
|
|
|
userdata = {
|
2018-07-14 14:41:17 -07:00
|
|
|
errors: {}, :source, comments: {}
|
2018-06-18 15:46:28 -07:00
|
|
|
}
|
2018-07-17 17:25:12 -07:00
|
|
|
tree = Parser.patterns[syntax_version]\match(nomsu_code, nil, userdata)
|
2018-06-18 15:46:28 -07:00
|
|
|
unless tree
|
|
|
|
error "In file #{colored.blue tostring(source or "<unknown>")} failed to parse:\n#{colored.onyellow colored.black nomsu_code}"
|
|
|
|
if type(tree) == 'number'
|
2018-06-28 14:12:24 -07:00
|
|
|
return nil
|
2018-06-18 15:46:28 -07:00
|
|
|
|
|
|
|
if next(userdata.errors)
|
2018-06-24 23:21:07 -07:00
|
|
|
keys = [k for k,v in pairs(userdata.errors)]
|
2018-06-18 15:46:28 -07:00
|
|
|
table.sort(keys)
|
|
|
|
errors = [userdata.errors[k] for k in *keys]
|
2018-07-17 17:25:12 -07:00
|
|
|
error("Errors occurred while parsing (v#{syntax_version}):\n\n"..table.concat(errors, "\n\n"), 0)
|
2018-07-17 14:12:11 -07:00
|
|
|
|
|
|
|
comments = [{comment:c, pos:p} for p,c in pairs(userdata.comments)]
|
|
|
|
-- Sort in descending order so we can pop the first comments off the end one at a time
|
|
|
|
table.sort comments, (a,b)-> a.pos > b.pos
|
|
|
|
comment_i = 1
|
|
|
|
walk_tree = (t)->
|
|
|
|
export comment_i
|
|
|
|
comment_buff = {}
|
|
|
|
while comments[#comments] and comments[#comments].pos <= t.source.start
|
|
|
|
table.insert(comment_buff, table.remove(comments))
|
|
|
|
for x in *t
|
|
|
|
if AST.is_syntax_tree x
|
|
|
|
walk_tree x
|
|
|
|
while comments[#comments] and comments[#comments].pos <= t.source.stop
|
|
|
|
table.insert(comment_buff, table.remove(comments))
|
|
|
|
t.comments = comment_buff if #comment_buff > 0
|
|
|
|
walk_tree tree
|
|
|
|
|
2018-06-18 15:46:28 -07:00
|
|
|
return tree
|
|
|
|
|
2018-07-17 15:00:57 -07:00
|
|
|
Parser.is_operator = (s)->
|
2018-07-17 23:33:49 -07:00
|
|
|
return not not (NOMSU_DEFS.operator_char^1 * -1)\match(s)
|
|
|
|
|
|
|
|
Parser.is_identifier = (s)->
|
|
|
|
return not not (NOMSU_DEFS.ident_char^1 * -1)\match(s)
|
2018-07-17 15:00:57 -07:00
|
|
|
|
2018-07-20 20:13:01 -07:00
|
|
|
inline_escaper = re.compile "{~ (%utf8_char / ('\"' -> '\\\"') / ('\n' -> '\\n') / ('\t' -> '\\t') / ('\b' -> '\\b') / ('\a' -> '\\a') / ('\v' -> '\\v') / ('\f' -> '\\f') / ('\r' -> '\\r') / ('\\' -> '\\\\') / ([^ -~] -> escape) / .)* ~}", {utf8_char: NOMSU_DEFS.utf8_char, escape:(=> ("\\%03d")\format(@byte!))}
|
2018-07-19 20:41:31 -07:00
|
|
|
Parser.inline_escape = (s)->
|
|
|
|
return inline_escaper\match(s)
|
|
|
|
escaper = re.compile "{~ (%utf8_char / ('\\' -> '\\\\') / [\n\r\t -~] / (. -> escape))* ~}", {utf8_char: NOMSU_DEFS.utf8_char, escape:(=> ("\\%03d")\format(@byte!))}
|
|
|
|
Parser.escape = (s)->
|
|
|
|
return escaper\match(s)
|
|
|
|
|
2018-06-23 00:57:31 -07:00
|
|
|
return Parser
|