Added unicode support and cleaned up the parser a bit.

This commit is contained in:
Bruce Hill 2017-12-08 15:37:36 -08:00
parent 5903820699
commit 8da2b1d64c
2 changed files with 94 additions and 53 deletions

View File

@ -33,8 +33,8 @@ if _VERSION == "Lua 5.1" then
end end
end end
lpeg.setmaxstack(10000) lpeg.setmaxstack(10000)
local P, V, S, Cg, C, Cp, B, Cmt local P, R, V, S, Cg, C, Cp, B, Cmt
P, V, S, Cg, C, Cp, B, Cmt = lpeg.P, lpeg.V, lpeg.S, lpeg.Cg, lpeg.C, lpeg.Cp, lpeg.B, lpeg.Cmt P, R, V, S, Cg, C, Cp, B, Cmt = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.Cg, lpeg.C, lpeg.Cp, lpeg.B, lpeg.Cmt
local STRING_ESCAPES = { local STRING_ESCAPES = {
n = "\n", n = "\n",
t = "\t", t = "\t",
@ -71,7 +71,7 @@ local nomsu = [=[ file <- ({{| shebang?
(ignored_line %nl)* (ignored_line %nl)*
statements (nodent statements)* statements (nodent statements)*
(%nl ignored_line)* %nl? (%nl ignored_line)* %nl?
(({.+} ("" -> "Unexpected end of file")) => error)? |} }) -> File (({.+} ("" -> "Parse error")) => error)? |} }) -> File
shebang <- "#!" [^%nl]* %nl shebang <- "#!" [^%nl]* %nl
@ -114,7 +114,7 @@ local nomsu = [=[ file <- ({{| shebang?
(expression (dotdot / tok_gap))* word ((dotdot / tok_gap) (expression / word))* (expression (dotdot / tok_gap))* word ((dotdot / tok_gap) (expression / word))*
|} }) -> FunctionCall |} }) -> FunctionCall
word <- ({ { (%wordbreaker+) / (!number %wordchar+) } }) -> Word word <- ({ { %operator / (!number %plain_word) } }) -> Word
inline_string <- ({ '"' {| inline_string <- ({ '"' {|
({~ (("\\" -> "\") / ('\"' -> '"') / ("\n" -> " ({~ (("\\" -> "\") / ('\"' -> '"') / ("\n" -> "
@ -130,9 +130,9 @@ local nomsu = [=[ file <- ({{| shebang?
number <- ({ (("-"? (([0-9]+ "." [0-9]+) / ("." [0-9]+) / ([0-9]+)))-> tonumber) }) -> Number number <- ({ (("-"? (([0-9]+ "." [0-9]+) / ("." [0-9]+) / ([0-9]+)))-> tonumber) }) -> Number
-- Variables can be nameless (i.e. just %) and can't contain wordbreakers like apostrophe -- Variables can be nameless (i.e. just %) and can't contain operators like apostrophe
-- which is a hack to allow %'s to parse as "%" and "' s" separately -- which is a hack to allow %'s to parse as "%" and "' s" separately
variable <- ({ ("%" { %wordchar* }) }) -> Var variable <- ({ ("%" { %plain_word? }) }) -> Var
inline_list <- ({ {| inline_list <- ({ {|
("[" %ws? ((inline_list_item comma)* inline_list_item comma?)? %ws? "]") ("[" %ws? ((inline_list_item comma)* inline_list_item comma?)? %ws? "]")
@ -153,24 +153,27 @@ local nomsu = [=[ file <- ({{| shebang?
indent <- eol (%nl ignored_line)* %nl %indented ((block_comment/line_comment) (%nl ignored_line)* nodent)? indent <- eol (%nl ignored_line)* %nl %indented ((block_comment/line_comment) (%nl ignored_line)* nodent)?
nodent <- eol (%nl ignored_line)* %nl %nodented nodent <- eol (%nl ignored_line)* %nl %nodented
dedent <- eol (%nl ignored_line)* (((!.) &%dedented) / (&(%nl %dedented))) dedent <- eol (%nl ignored_line)* (((!.) &%dedented) / (&(%nl %dedented)))
tok_gap <- %ws / %prev_edge / &("[" / "\" / [.,:;{("#%] / &%wordbreaker) tok_gap <- %ws / %prev_edge / &("[" / "\" / [.,:;{("#%] / &%operator)
comma <- %ws? "," %ws? comma <- %ws? "," %ws?
semicolon <- %ws? ";" %ws? semicolon <- %ws? ";" %ws?
dotdot <- nodent ".." %ws? dotdot <- nodent ".." %ws?
]=] ]=]
local CURRENT_FILE = nil local CURRENT_FILE = nil
local whitespace = S(" \t") ^ 1 local whitespace = S(" \t") ^ 1
local wordbreaker = ("'~`!@$^&*-+=|<>?/") local operator = S("'~`!@$^&*-+=|<>?/") ^ 1
local utf8_continuation = R("\128\191")
local utf8_char = (R("\194\223") * utf8_continuation + R("\224\239") * utf8_continuation * utf8_continuation + R("\240\244") * utf8_continuation * utf8_continuation * utf8_continuation)
local plain_word = (R('az', 'AZ', '09') + S("_") + utf8_char) ^ 1
local defs = { local defs = {
ws = whitespace, ws = whitespace,
nl = P("\n"), nl = P("\n"),
tonumber = tonumber, tonumber = tonumber,
wordbreaker = S(wordbreaker), operator = operator,
wordchar = P(1) - S(' \t\n\r%#:;,.{}[]()"\\' .. wordbreaker), plain_word = plain_word,
indented = Cmt(S(" \t") ^ 0 * (#(P(1) - S(" \t\n") + (-P(1)))), check_indent), indented = Cmt(S(" \t") ^ 0 * (#(P(1) - S(" \t\n") + (-P(1)))), check_indent),
nodented = Cmt(S(" \t") ^ 0 * (#(P(1) - S(" \t\n") + (-P(1)))), check_nodent), nodented = Cmt(S(" \t") ^ 0 * (#(P(1) - S(" \t\n") + (-P(1)))), check_nodent),
dedented = Cmt(S(" \t") ^ 0 * (#(P(1) - S(" \t\n") + (-P(1)))), check_dedent), dedented = Cmt(S(" \t") ^ 0 * (#(P(1) - S(" \t\n") + (-P(1)))), check_dedent),
prev_edge = B(S(" \t\n.,:;}])\"\\" .. wordbreaker)), prev_edge = B(S(" \t\n.,:;}])\"\\'~`!@$^&*-+=|<>?/")),
line_no = function(src, pos) line_no = function(src, pos)
local line_no = 1 local line_no = 1
for _ in src:sub(1, pos):gmatch("\n") do for _ in src:sub(1, pos):gmatch("\n") do
@ -263,6 +266,9 @@ do
elseif type(signature) == 'table' and type(signature[1]) == 'string' then elseif type(signature) == 'table' and type(signature[1]) == 'string' then
signature = self:get_stubs(signature) signature = self:get_stubs(signature)
end end
if self.debug then
self:write(colored.magenta("Defined rule " .. tostring(repr(signature))))
end
assert(type(thunk) == 'function', "Bad thunk: " .. tostring(repr(thunk))) assert(type(thunk) == 'function', "Bad thunk: " .. tostring(repr(thunk)))
local canonical_args = nil local canonical_args = nil
local canonical_escaped_args = nil local canonical_escaped_args = nil
@ -276,7 +282,9 @@ do
def_number = self.__class.def_number, def_number = self.__class.def_number,
defs = self.defs defs = self.defs
} }
local where_defs_go = ((getmetatable(self.defs) or { }).__newindex) or self.defs local where_defs_go = (getmetatable(self.defs) or {
__newindex = self.defs
}).__newindex
for _index_0 = 1, #signature do for _index_0 = 1, #signature do
local _des_0 = signature[_index_0] local _des_0 = signature[_index_0]
local stub, arg_names, escaped_args local stub, arg_names, escaped_args
@ -455,7 +463,10 @@ do
self:writeln(tostring(colored.bright("WITH ARGS:")) .. " " .. tostring(colored.dim(repr(args)))) self:writeln(tostring(colored.bright("WITH ARGS:")) .. " " .. tostring(colored.dim(repr(args))))
end end
insert(self.callstack, "#macro") insert(self.callstack, "#macro")
local old_tree
old_tree, self.defs["#macro_tree"] = self.defs["#macro_tree"], tree
local expr, statement = self:call(tree.stub, tree.line_no, unpack(args)) local expr, statement = self:call(tree.stub, tree.line_no, unpack(args))
self.defs["#macro_tree"] = old_tree
remove(self.callstack) remove(self.callstack)
return expr, statement return expr, statement
end, end,
@ -591,7 +602,7 @@ do
self:writeln(colored.bright("PARSED TO TREE:")) self:writeln(colored.bright("PARSED TO TREE:"))
self:print_tree(statement) self:print_tree(statement)
end end
local ok, expr, statements = pcall(self.tree_to_lua, self, statement) local ok, expr, statements = pcall(self.tree_to_lua, self, statement, filename)
if not ok then if not ok then
self:errorln(tostring(colored.red("Error occurred in statement:")) .. "\n" .. tostring(colored.bright(colored.yellow(statement.src)))) self:errorln(tostring(colored.red("Error occurred in statement:")) .. "\n" .. tostring(colored.bright(colored.yellow(statement.src))))
error(expr) error(expr)
@ -642,8 +653,8 @@ return ret;
end);]]):format(concat(buffer, "\n")) end);]]):format(concat(buffer, "\n"))
return return_value, lua_code, vars return return_value, lua_code, vars
end, end,
tree_to_value = function(self, tree, vars) tree_to_value = function(self, tree, vars, filename)
local code = "return (function(nomsu, vars)\nreturn " .. tostring(self:tree_to_lua(tree)) .. ";\nend);" local code = "return (function(nomsu, vars)\nreturn " .. tostring(self:tree_to_lua(tree, filename)) .. ";\nend);"
if self.debug then if self.debug then
self:writeln(tostring(colored.bright("RUNNING LUA TO GET VALUE:")) .. "\n" .. tostring(colored.blue(colored.bright(code)))) self:writeln(tostring(colored.bright("RUNNING LUA TO GET VALUE:")) .. "\n" .. tostring(colored.blue(colored.bright(code))))
end end
@ -838,7 +849,7 @@ end);]]):format(concat(buffer, "\n"))
return error("Unsupported value_to_nomsu type: " .. tostring(type(value))) return error("Unsupported value_to_nomsu type: " .. tostring(type(value)))
end end
end, end,
tree_to_lua = function(self, tree) tree_to_lua = function(self, tree, filename)
assert(tree, "No tree provided.") assert(tree, "No tree provided.")
if not tree.type then if not tree.type then
self:errorln(debug.traceback()) self:errorln(debug.traceback())
@ -848,13 +859,13 @@ end);]]):format(concat(buffer, "\n"))
if "File" == _exp_0 then if "File" == _exp_0 then
return error("Should not be converting File to lua through this function.") return error("Should not be converting File to lua through this function.")
elseif "Nomsu" == _exp_0 then elseif "Nomsu" == _exp_0 then
return "nomsu:parse(" .. tostring(repr(tree.value.src)) .. ", " .. tostring(repr(CURRENT_FILE)) .. ").value[1]", nil return "nomsu:parse(" .. tostring(repr(tree.value.src)) .. ", " .. tostring(repr(tree.line_no)) .. ").value[1]", nil
elseif "Thunk" == _exp_0 then elseif "Thunk" == _exp_0 then
local lua_bits = { } local lua_bits = { }
local _list_0 = tree.value local _list_0 = tree.value
for _index_0 = 1, #_list_0 do for _index_0 = 1, #_list_0 do
local arg = _list_0[_index_0] local arg = _list_0[_index_0]
local expr, statement = self:tree_to_lua(arg) local expr, statement = self:tree_to_lua(arg, filename)
if statement then if statement then
insert(lua_bits, statement) insert(lua_bits, statement)
end end
@ -916,10 +927,11 @@ end)]]):format(concat(lua_bits, "\n"))
if escaped_args[arg_names[arg_num]] then if escaped_args[arg_names[arg_num]] then
arg = { arg = {
type = "Nomsu", type = "Nomsu",
value = arg value = arg,
line_no = tree.line_no
} }
end end
local expr, statement = self:tree_to_lua(arg) local expr, statement = self:tree_to_lua(arg, filename)
if statement then if statement then
self:error("Cannot use [[" .. tostring(arg.src) .. "]] as a function argument, since it's not an expression.") self:error("Cannot use [[" .. tostring(arg.src) .. "]] as a function argument, since it's not an expression.")
end end
@ -953,7 +965,7 @@ end)]]):format(concat(lua_bits, "\n"))
insert(concat_parts, repr(string_buffer)) insert(concat_parts, repr(string_buffer))
string_buffer = "" string_buffer = ""
end end
local expr, statement = self:tree_to_lua(bit) local expr, statement = self:tree_to_lua(bit, filename)
if self.debug then if self.debug then
self:writeln((colored.bright("INTERP:"))) self:writeln((colored.bright("INTERP:")))
self:print_tree(bit) self:print_tree(bit)
@ -984,7 +996,7 @@ end)]]):format(concat(lua_bits, "\n"))
local _list_0 = tree.value local _list_0 = tree.value
for _index_0 = 1, #_list_0 do for _index_0 = 1, #_list_0 do
local item = _list_0[_index_0] local item = _list_0[_index_0]
local expr, statement = self:tree_to_lua(item) local expr, statement = self:tree_to_lua(item, filename)
if statement then if statement then
self:error("Cannot use [[" .. tostring(item.src) .. "]] as a list item, since it's not an expression.") self:error("Cannot use [[" .. tostring(item.src) .. "]] as a list item, since it's not an expression.")
end end
@ -1088,8 +1100,11 @@ end)]]):format(concat(lua_bits, "\n"))
self:error("Nothing to get stub from") self:error("Nothing to get stub from")
end end
if type(x) == 'string' then if type(x) == 'string' then
x = x:gsub("\n%s*%.%.", " "):gsub("([" .. tostring(wordbreaker) .. "]+)", " %1 "):gsub("%s+", " ") x = x:gsub("\n%s*%.%.", " ")
x = x:gsub("^%s*", ""):gsub("%s*$", "") x = lpeg.Cs((operator / (function(op)
return " " .. tostring(op) .. " "
end) + 1) ^ 0):match(x)
x = x:gsub("%s+", " "):gsub("^%s*", ""):gsub("%s*$", "")
local stub = x:gsub("%%%S+", "%%"):gsub("\\", "") local stub = x:gsub("%%%S+", "%%"):gsub("\\", "")
local arg_names local arg_names
do do
@ -1196,7 +1211,21 @@ end)]]):format(concat(lua_bits, "\n"))
end)()) end)())
for i = #self.callstack, 1, -1 do for i = #self.callstack, 1, -1 do
if self.callstack[i] ~= "#macro" then if self.callstack[i] ~= "#macro" then
error_msg = error_msg .. "\n " .. tostring(("%-" .. tostring(maxlen) .. "s"):format(self.callstack[i][2])) .. "| " .. tostring(self.callstack[i][1]) local line_no = self.callstack[i][2]
if line_no then
local nums
do
local _accum_0 = { }
local _len_0 = 1
for n in line_no:gmatch(":([0-9]+)") do
_accum_0[_len_0] = tonumber(n)
_len_0 = _len_0 + 1
end
nums = _accum_0
end
line_no = line_no:gsub(":.*$", ":" .. tostring(utils.sum(nums) - #nums + 1))
end
error_msg = error_msg .. "\n " .. tostring(("%-" .. tostring(maxlen) .. "s"):format(line_no)) .. "| " .. tostring(self.callstack[i][1])
end end
end end
error_msg = error_msg .. "\n <top level>" error_msg = error_msg .. "\n <top level>"
@ -1215,7 +1244,7 @@ end)]]):format(concat(lua_bits, "\n"))
end, end,
initialize_core = function(self) initialize_core = function(self)
local nomsu_string_as_lua local nomsu_string_as_lua
nomsu_string_as_lua = function(self, code, tree) nomsu_string_as_lua = function(self, code)
local concat_parts = { } local concat_parts = { }
local _list_0 = code.value local _list_0 = code.value
for _index_0 = 1, #_list_0 do for _index_0 = 1, #_list_0 do
@ -1223,9 +1252,9 @@ end)]]):format(concat(lua_bits, "\n"))
if type(bit) == "string" then if type(bit) == "string" then
insert(concat_parts, bit) insert(concat_parts, bit)
elseif type(bit) == "table" and bit.type == "FunctionCall" and bit.src == "__src__" then elseif type(bit) == "table" and bit.type == "FunctionCall" and bit.src == "__src__" then
insert(concat_parts, repr(tree.src)) insert(concat_parts, repr(self.defs["#macro_tree"].src))
else else
local expr, statement = self:tree_to_lua(bit) local expr, statement = self:tree_to_lua(bit, filename)
if statement then if statement then
self:error("Cannot use [[" .. tostring(bit.src) .. "]] as a string interpolation value, since it's not an expression.") self:error("Cannot use [[" .. tostring(bit.src) .. "]] as a string interpolation value, since it's not an expression.")
end end

View File

@ -39,7 +39,7 @@ if _VERSION == "Lua 5.1"
-- Add compiler options for optimization level (compile-fast vs. run-fast, etc.) -- Add compiler options for optimization level (compile-fast vs. run-fast, etc.)
lpeg.setmaxstack 10000 -- whoa lpeg.setmaxstack 10000 -- whoa
{:P,:V,:S,:Cg,:C,:Cp,:B,:Cmt} = lpeg {:P,:R,:V,:S,:Cg,:C,:Cp,:B,:Cmt} = lpeg
STRING_ESCAPES = n:"\n", t:"\t", b:"\b", a:"\a", v:"\v", f:"\f", r:"\r" STRING_ESCAPES = n:"\n", t:"\t", b:"\b", a:"\a", v:"\v", f:"\f", r:"\r"
-- NOTE: this treats tabs as equivalent to 1 space -- NOTE: this treats tabs as equivalent to 1 space
@ -64,7 +64,7 @@ nomsu = [=[
(ignored_line %nl)* (ignored_line %nl)*
statements (nodent statements)* statements (nodent statements)*
(%nl ignored_line)* %nl? (%nl ignored_line)* %nl?
(({.+} ("" -> "Unexpected end of file")) => error)? |} }) -> File (({.+} ("" -> "Parse error")) => error)? |} }) -> File
shebang <- "#!" [^%nl]* %nl shebang <- "#!" [^%nl]* %nl
@ -107,7 +107,7 @@ nomsu = [=[
(expression (dotdot / tok_gap))* word ((dotdot / tok_gap) (expression / word))* (expression (dotdot / tok_gap))* word ((dotdot / tok_gap) (expression / word))*
|} }) -> FunctionCall |} }) -> FunctionCall
word <- ({ { (%wordbreaker+) / (!number %wordchar+) } }) -> Word word <- ({ { %operator / (!number %plain_word) } }) -> Word
inline_string <- ({ '"' {| inline_string <- ({ '"' {|
({~ (("\\" -> "\") / ('\"' -> '"') / ("\n" -> " ({~ (("\\" -> "\") / ('\"' -> '"') / ("\n" -> "
@ -123,9 +123,9 @@ nomsu = [=[
number <- ({ (("-"? (([0-9]+ "." [0-9]+) / ("." [0-9]+) / ([0-9]+)))-> tonumber) }) -> Number number <- ({ (("-"? (([0-9]+ "." [0-9]+) / ("." [0-9]+) / ([0-9]+)))-> tonumber) }) -> Number
-- Variables can be nameless (i.e. just %) and can't contain wordbreakers like apostrophe -- Variables can be nameless (i.e. just %) and can't contain operators like apostrophe
-- which is a hack to allow %'s to parse as "%" and "' s" separately -- which is a hack to allow %'s to parse as "%" and "' s" separately
variable <- ({ ("%" { %wordchar* }) }) -> Var variable <- ({ ("%" { %plain_word? }) }) -> Var
inline_list <- ({ {| inline_list <- ({ {|
("[" %ws? ((inline_list_item comma)* inline_list_item comma?)? %ws? "]") ("[" %ws? ((inline_list_item comma)* inline_list_item comma?)? %ws? "]")
@ -146,7 +146,7 @@ nomsu = [=[
indent <- eol (%nl ignored_line)* %nl %indented ((block_comment/line_comment) (%nl ignored_line)* nodent)? indent <- eol (%nl ignored_line)* %nl %indented ((block_comment/line_comment) (%nl ignored_line)* nodent)?
nodent <- eol (%nl ignored_line)* %nl %nodented nodent <- eol (%nl ignored_line)* %nl %nodented
dedent <- eol (%nl ignored_line)* (((!.) &%dedented) / (&(%nl %dedented))) dedent <- eol (%nl ignored_line)* (((!.) &%dedented) / (&(%nl %dedented)))
tok_gap <- %ws / %prev_edge / &("[" / "\" / [.,:;{("#%] / &%wordbreaker) tok_gap <- %ws / %prev_edge / &("[" / "\" / [.,:;{("#%] / &%operator)
comma <- %ws? "," %ws? comma <- %ws? "," %ws?
semicolon <- %ws? ";" %ws? semicolon <- %ws? ";" %ws?
dotdot <- nodent ".." %ws? dotdot <- nodent ".." %ws?
@ -154,14 +154,19 @@ nomsu = [=[
CURRENT_FILE = nil CURRENT_FILE = nil
whitespace = S(" \t")^1 whitespace = S(" \t")^1
wordbreaker = ("'~`!@$^&*-+=|<>?/") operator = S("'~`!@$^&*-+=|<>?/")^1
utf8_continuation = R("\128\191")
utf8_char = (
R("\194\223")*utf8_continuation +
R("\224\239")*utf8_continuation*utf8_continuation +
R("\240\244")*utf8_continuation*utf8_continuation*utf8_continuation)
plain_word = (R('az','AZ','09') + S("_") + utf8_char)^1
defs = defs =
ws:whitespace, nl: P("\n"), :tonumber, wordbreaker:S(wordbreaker) ws:whitespace, nl: P("\n"), :tonumber, :operator, :plain_word
wordchar: P(1)-S(' \t\n\r%#:;,.{}[]()"\\'..wordbreaker)
indented: Cmt(S(" \t")^0 * (#(P(1)-S(" \t\n") + (-P(1)))), check_indent) indented: Cmt(S(" \t")^0 * (#(P(1)-S(" \t\n") + (-P(1)))), check_indent)
nodented: Cmt(S(" \t")^0 * (#(P(1)-S(" \t\n") + (-P(1)))), check_nodent) nodented: Cmt(S(" \t")^0 * (#(P(1)-S(" \t\n") + (-P(1)))), check_nodent)
dedented: Cmt(S(" \t")^0 * (#(P(1)-S(" \t\n") + (-P(1)))), check_dedent) dedented: Cmt(S(" \t")^0 * (#(P(1)-S(" \t\n") + (-P(1)))), check_dedent)
prev_edge: B(S(" \t\n.,:;}])\"\\"..wordbreaker)) prev_edge: B(S(" \t\n.,:;}])\"\\'~`!@$^&*-+=|<>?/")) -- Includes "operator"
line_no: (src, pos)-> line_no: (src, pos)->
line_no = 1 line_no = 1
for _ in src\sub(1,pos)\gmatch("\n") do line_no += 1 for _ in src\sub(1,pos)\gmatch("\n") do line_no += 1
@ -227,13 +232,15 @@ class NomsuCompiler
signature = @get_stubs {signature} signature = @get_stubs {signature}
elseif type(signature) == 'table' and type(signature[1]) == 'string' elseif type(signature) == 'table' and type(signature[1]) == 'string'
signature = @get_stubs signature signature = @get_stubs signature
if @debug
@write colored.magenta "Defined rule #{repr signature}"
assert type(thunk) == 'function', "Bad thunk: #{repr thunk}" assert type(thunk) == 'function', "Bad thunk: #{repr thunk}"
canonical_args = nil canonical_args = nil
canonical_escaped_args = nil canonical_escaped_args = nil
aliases = {} aliases = {}
@@def_number += 1 @@def_number += 1
def = {:thunk, :src, :is_macro, aliases:{}, def_number:@@def_number, defs:@defs} def = {:thunk, :src, :is_macro, aliases:{}, def_number:@@def_number, defs:@defs}
where_defs_go = ((getmetatable(@defs) or {}).__newindex) or @defs where_defs_go = (getmetatable(@defs) or {__newindex:@defs}).__newindex
for {stub, arg_names, escaped_args} in *signature for {stub, arg_names, escaped_args} in *signature
assert stub, "NO STUB FOUND: #{repr signature}" assert stub, "NO STUB FOUND: #{repr signature}"
if @debug then @writeln "#{colored.bright "DEFINING RULE:"} #{colored.underscore colored.magenta repr(stub)} #{colored.bright "WITH ARGS"} #{colored.dim repr(arg_names)}" if @debug then @writeln "#{colored.bright "DEFINING RULE:"} #{colored.underscore colored.magenta repr(stub)} #{colored.bright "WITH ARGS"} #{colored.dim repr(arg_names)}"
@ -410,7 +417,7 @@ class NomsuCompiler
@writeln "#{colored.bright "RUNNING NOMSU:"}\n#{colored.bright colored.yellow statement.src}" @writeln "#{colored.bright "RUNNING NOMSU:"}\n#{colored.bright colored.yellow statement.src}"
@writeln colored.bright("PARSED TO TREE:") @writeln colored.bright("PARSED TO TREE:")
@print_tree statement @print_tree statement
ok,expr,statements = pcall(@tree_to_lua, self, statement) ok,expr,statements = pcall(@tree_to_lua, self, statement, filename)
if not ok if not ok
@errorln "#{colored.red "Error occurred in statement:"}\n#{colored.bright colored.yellow statement.src}" @errorln "#{colored.red "Error occurred in statement:"}\n#{colored.bright colored.yellow statement.src}"
error(expr) error(expr)
@ -451,8 +458,8 @@ return ret;
end);]])\format(concat(buffer, "\n")) end);]])\format(concat(buffer, "\n"))
return return_value, lua_code, vars return return_value, lua_code, vars
tree_to_value: (tree, vars)=> tree_to_value: (tree, vars, filename)=>
code = "return (function(nomsu, vars)\nreturn #{@tree_to_lua(tree)};\nend);" code = "return (function(nomsu, vars)\nreturn #{@tree_to_lua(tree, filename)};\nend);"
if @debug if @debug
@writeln "#{colored.bright "RUNNING LUA TO GET VALUE:"}\n#{colored.blue colored.bright(code)}" @writeln "#{colored.bright "RUNNING LUA TO GET VALUE:"}\n#{colored.blue colored.bright(code)}"
lua_thunk, err = load(code) lua_thunk, err = load(code)
@ -579,7 +586,7 @@ end);]])\format(concat(buffer, "\n"))
else else
error("Unsupported value_to_nomsu type: #{type(value)}") error("Unsupported value_to_nomsu type: #{type(value)}")
tree_to_lua: (tree)=> tree_to_lua: (tree, filename)=>
-- Return <lua code for value>, <additional lua code> -- Return <lua code for value>, <additional lua code>
assert tree, "No tree provided." assert tree, "No tree provided."
if not tree.type if not tree.type
@ -590,12 +597,12 @@ end);]])\format(concat(buffer, "\n"))
error("Should not be converting File to lua through this function.") error("Should not be converting File to lua through this function.")
when "Nomsu" when "Nomsu"
return "nomsu:parse(#{repr tree.value.src}, #{repr CURRENT_FILE}).value[1]", nil return "nomsu:parse(#{repr tree.value.src}, #{repr tree.line_no}).value[1]", nil
when "Thunk" when "Thunk"
lua_bits = {} lua_bits = {}
for arg in *tree.value for arg in *tree.value
expr,statement = @tree_to_lua arg expr,statement = @tree_to_lua arg, filename
if statement then insert lua_bits, statement if statement then insert lua_bits, statement
if expr then insert lua_bits, "ret = #{expr};" if expr then insert lua_bits, "ret = #{expr};"
return ([[ return ([[
@ -625,8 +632,8 @@ end)]])\format(concat(lua_bits, "\n"))
for arg in *tree.value for arg in *tree.value
if arg.type == 'Word' then continue if arg.type == 'Word' then continue
if escaped_args[arg_names[arg_num]] if escaped_args[arg_names[arg_num]]
arg = {type:"Nomsu", value:arg} arg = {type:"Nomsu", value:arg, line_no:tree.line_no}
expr,statement = @tree_to_lua arg expr,statement = @tree_to_lua arg, filename
if statement if statement
@error "Cannot use [[#{arg.src}]] as a function argument, since it's not an expression." @error "Cannot use [[#{arg.src}]] as a function argument, since it's not an expression."
insert args, expr insert args, expr
@ -646,7 +653,7 @@ end)]])\format(concat(lua_bits, "\n"))
if string_buffer ~= "" if string_buffer ~= ""
insert concat_parts, repr(string_buffer) insert concat_parts, repr(string_buffer)
string_buffer = "" string_buffer = ""
expr, statement = @tree_to_lua bit expr, statement = @tree_to_lua bit, filename
if @debug if @debug
@writeln (colored.bright "INTERP:") @writeln (colored.bright "INTERP:")
@print_tree bit @print_tree bit
@ -667,7 +674,7 @@ end)]])\format(concat(lua_bits, "\n"))
when "List" when "List"
items = {} items = {}
for item in *tree.value for item in *tree.value
expr,statement = @tree_to_lua item expr,statement = @tree_to_lua item, filename
if statement if statement
@error "Cannot use [[#{item.src}]] as a list item, since it's not an expression." @error "Cannot use [[#{item.src}]] as a list item, since it's not an expression."
insert items, expr insert items, expr
@ -759,8 +766,9 @@ end)]])\format(concat(lua_bits, "\n"))
-- (e.g. "say %msg") or function call (e.g. FunctionCall({Word("say"), Var("msg"))) -- (e.g. "say %msg") or function call (e.g. FunctionCall({Word("say"), Var("msg")))
if type(x) == 'string' if type(x) == 'string'
-- Standardize format to stuff separated by spaces -- Standardize format to stuff separated by spaces
x = x\gsub("\n%s*%.%.", " ")\gsub("([#{wordbreaker}]+)", " %1 ")\gsub("%s+"," ") x = x\gsub("\n%s*%.%.", " ")
x = x\gsub("^%s*","")\gsub("%s*$","") x = lpeg.Cs((operator / ((op)->" #{op} ") + 1)^0)\match(x)
x = x\gsub("%s+"," ")\gsub("^%s*","")\gsub("%s*$","")
stub = x\gsub("%%%S+","%%")\gsub("\\","") stub = x\gsub("%%%S+","%%")\gsub("\\","")
arg_names = [arg for arg in x\gmatch("%%([^%s]*)")] arg_names = [arg for arg in x\gmatch("%%([^%s]*)")]
escaped_args = utils.set [arg for arg in x\gmatch("\\%%([^%s]*)")] escaped_args = utils.set [arg for arg in x\gmatch("\\%%([^%s]*)")]
@ -797,7 +805,11 @@ end)]])\format(concat(lua_bits, "\n"))
maxlen = utils.max([#c[2] for c in *@callstack when c != "#macro"]) maxlen = utils.max([#c[2] for c in *@callstack when c != "#macro"])
for i=#@callstack,1,-1 for i=#@callstack,1,-1
if @callstack[i] != "#macro" if @callstack[i] != "#macro"
error_msg ..= "\n #{"%-#{maxlen}s"\format @callstack[i][2]}| #{@callstack[i][1]}" line_no = @callstack[i][2]
if line_no
nums = [tonumber(n) for n in line_no\gmatch(":([0-9]+)")]
line_no = line_no\gsub(":.*$", ":#{utils.sum(nums) - #nums + 1}")
error_msg ..= "\n #{"%-#{maxlen}s"\format line_no}| #{@callstack[i][1]}"
error_msg ..= "\n <top level>" error_msg ..= "\n <top level>"
@callstack = {} @callstack = {}
error error_msg, 3 error error_msg, 3
@ -818,7 +830,7 @@ end)]])\format(concat(lua_bits, "\n"))
elseif type(bit) == "table" and bit.type == "FunctionCall" and bit.src == "__src__" elseif type(bit) == "table" and bit.type == "FunctionCall" and bit.src == "__src__"
insert concat_parts, repr(@defs["#macro_tree"].src) insert concat_parts, repr(@defs["#macro_tree"].src)
else else
expr, statement = @tree_to_lua bit expr, statement = @tree_to_lua bit, filename
if statement if statement
@error "Cannot use [[#{bit.src}]] as a string interpolation value, since it's not an expression." @error "Cannot use [[#{bit.src}]] as a string interpolation value, since it's not an expression."
insert concat_parts, expr insert concat_parts, expr