#!/usr/bin/env moon re = require 're' lpeg = require 'lpeg' utils = require 'utils' -- TODO: -- improve indentation of generated lua code -- provide way to run precompiled nomsu -> lua code -- better scoping? -- first-class rules -- better error reporting -- add line numbers of function calls -- versions of rules with auto-supplied arguments -- type checking? INDENT = " " lpeg.setmaxstack 10000 -- whoa {:P,:V,:S,:Cg,:C,:Cp,:B,:Cmt} = lpeg class NomsuCompiler new:(parent)=> @write = (...)=> io.write(...) @defs = setmetatable({}, {__index:parent and parent.defs}) @callstack = {} @debug = false @initialize_core! @utils = utils @loaded_files = {} writeln:(...)=> @write(...) @write("\n") call: (fn_name,...)=> fn_info = @defs[fn_name] if fn_info == nil @error "Attempt to call undefined function: #{fn_name}" if fn_info.is_macro @error "Attempt to call macro at runtime: #{fn_name}\nThis can be caused by using a macro in a function that is defined before the macro." unless @check_permission(fn_name) @error "You do not have the authority to call: #{fn_name}" table.insert @callstack, fn_name {:fn, :arg_names} = fn_info args = {name, select(i,...) for i,name in ipairs(arg_names[fn_name])} if @debug @writeln "Calling #{fn_name} with args: #{utils.repr(args)}" ok,ret = pcall(fn, self, args) table.remove @callstack if not ok error(ret) return ret check_permission: (fn_name)=> fn_info = @defs[fn_name] if fn_info == nil @error "Undefined function: #{fn_name}" if fn_info.whiteset == nil then return true for caller in *@callstack if fn_info.whiteset[caller] return true return false def: (spec, fn, src)=> if @debug @writeln "Defining rule: #{spec}" invocations,arg_names = @get_invocations spec fn_info = {:fn, :arg_names, :invocations, :src, is_macro:false} for invocation in *invocations @defs[invocation] = fn_info get_invocations_from_definition:(def, vars)=> if def.type == "String" return @tree_to_value(def, vars) if def.type != "List" @error "Trying to get invocations from #{def.type}, but expected List or String." invocations = {} for item in *def.value if item.type == "String" table.insert invocations, @tree_to_value(item, vars) continue if item.type != "FunctionCall" @error "Invalid list item: #{item.type}, expected FunctionCall or String" name_bits = {} for token in *item.value if token.type == "Word" table.insert name_bits, token.value elseif token.type == "Var" table.insert name_bits, token.src else @error "Unexpected token type in definition: #{token.type} (expected Word or Var)" table.insert invocations, table.concat(name_bits, " ") return invocations get_invocations:(text)=> if not text @error "No text provided!" if type(text) == 'function' error "Function passed to get_invocations" if type(text) == 'string' then text = {text} invocations = {} arg_names = {} prev_arg_names = nil for _text in *text invocation = _text\gsub("'"," '")\gsub("%%%S+","%%")\gsub("%s+"," ") _arg_names = [arg for arg in _text\gmatch("%%(%S[^%s']*)")] table.insert(invocations, invocation) if prev_arg_names if not utils.equivalent(utils.set(prev_arg_names), utils.set(_arg_names)) @error("Conflicting argument names #{utils.repr(prev_arg_names)} and #{utils.repr(_arg_names)} for #{utils.repr(text)}") else prev_arg_names = _arg_names arg_names[invocation] = _arg_names return invocations, arg_names defmacro: (spec, lua_gen_fn, src)=> if @debug @writeln("DEFINING MACRO: #{spec}#{src or ""}") invocations,arg_names = @get_invocations spec fn_info = {fn:lua_gen_fn, :arg_names, :invocations, :src, is_macro:true} for invocation in *invocations @defs[invocation] = fn_info run: (text, filename)=> if @debug @writeln "RUNNING TEXT:\n#{text}" -- This will execute each chunk as it goes along code, retval = @compile(text, filename) if @debug @writeln "\nGENERATED LUA CODE:\n#{code}" @writeln "\nPRODUCED RETURN VALUE:\n#{retval}" return retval serialize: (obj)=> switch type(obj) when "function" error("Function serialization is not yet implemented.") "assert(load("..utils.repr(string.dump(obj)).."))" when "table" if utils.is_list obj "{#{table.concat([@serialize(i) for i in *obj], ", ")}}" else "{#{table.concat(["[#{@serialize(k)}]= #{@serialize(v)}" for k,v in pairs(obj)], ", ")}}" when "number" utils.repr(obj) when "string" utils.repr(obj) else error "Serialization not implemented for: #{type(obj)}" deserialize: (str)=> lua_thunk, err = load("return (function(compiler,vars) return "..str.." end)") if not lua_thunk error("Failed to compile generated code:\n#{str}\n\n#{err}") return (lua_thunk!)(self, {}) parse: (str, filename)=> if @debug @writeln("PARSING:\n#{str}") get_line_indentation = (line)-> indent_amounts = {[" "]:1, ["\t"]:4} with sum = 0 leading_space = line\match("[\t ]*") for c in leading_space\gmatch "[\t ]" sum += indent_amounts[c] indent_stack = {0} check_indent = (subject,end_pos,spaces)-> num_spaces = get_line_indentation(spaces) if num_spaces > indent_stack[#indent_stack] table.insert(indent_stack, num_spaces) return end_pos check_dedent = (subject,end_pos,spaces)-> num_spaces = get_line_indentation(spaces) if num_spaces < indent_stack[#indent_stack] table.remove(indent_stack) return end_pos check_nodent = (subject,end_pos,spaces)-> num_spaces = get_line_indentation(spaces) if num_spaces == indent_stack[#indent_stack] return end_pos lingo = [=[ file <- ({ {| shebang? {:body: block :} %nl* (({.+} ("" -> "Unexpected end of file")) => error)? |} }) -> File shebang <- "#!" [^%nl]* %nl block <- ({ {| (ignored_line %nl)* line_of_statements (nodent line_of_statements)* (%nl ignored_line)* |} }) -> Block inline_block <- ({ {| inline_line_of_statements |} }) -> Block line_of_statements <- statement (%ws? ";" %ws? statement)* inline_line_of_statements <- inline_statement (%ws? ";" %ws? inline_statement)* statement <- ({ functioncall / expression }) -> Statement inline_statement <- ({ inline_functioncall / expression }) -> Statement expression <- ( longstring / string / number / variable / list / thunk / block_functioncall / ("(" %ws? (inline_thunk / inline_functioncall) %ws? ")")) -- Function calls need at least one word in them functioncall <- ({ {| (expression (dotdot / tok_gap))* word ((dotdot / tok_gap) (expression / word))* |} }) -> FunctionCall inline_functioncall <- ({ {| (expression tok_gap)* word (tok_gap (expression / word))* |} }) -> FunctionCall block_functioncall <- "(..)" indent functioncall (dedent / (({.+} ("" -> "Error while parsing block function call")) => error)) word <- ({ !number {%wordchar (!"'" %wordchar)*} }) -> Word thunk <- ({ ":" ((indent block (dedent / (({.+} ("" -> "Error while parsing thunk")) => error))) / (%ws? inline_block)) }) -> Thunk inline_thunk <- ({ ":" %ws? inline_block }) -> Thunk string <- ({ (!longstring) '"' {(("\" [^%nl]) / [^"%nl])*} '"' }) -> String longstring <- ({ '".."' %ws? {| (longstring_line (indent longstring_line (nodent longstring_line)* (dedent / longstring_error))?) /(indent longstring_line (nodent longstring_line)* (dedent / longstring_error)) |} }) -> Longstring longstring_line <- "|" {| ({("\\" / (!string_interpolation [^%nl]))+} / string_interpolation)* |} longstring_error <- (({.+} ("" -> "Error while parsing Longstring")) => error) string_interpolation <- "\" %ws? (((inline_functioncall / expression) dotdot?) / dotdot) %ws? "\" number <- ({ {"-"? (([0-9]+ "." [0-9]+) / ("." [0-9]+) / ([0-9]+)) } }) -> Number -- Hack to allow %foo's to parse as "%foo" and "'s" separately variable <- ({ ("%" {%wordchar (!"'" %wordchar)*}) }) -> Var list <- ({ {| ("[..]" indent list_line (nodent list_line)* (dedent / (({.+} ("" -> "Error while parsing list")) => error))) /("[" %ws? (list_line %ws?)? "]") |} }) -> List list_line <- list_bit (%ws? "," tok_gap list_bit)* (%ws? ",")? list_bit <- inline_functioncall / expression block_comment <- "#.." [^%nl]* indent [^%nl]* (%nl ((%ws? (!. / &%nl)) / (!%dedented [^%nl]*)))* line_comment <- "#" [^%nl]* eol <- %ws? line_comment? (!. / &%nl) ignored_line <- (%nodented (block_comment / line_comment)) / (%ws? (!. / &%nl)) indent <- eol (%nl ignored_line)* %nl %indented nodent <- eol (%nl ignored_line)* %nl %nodented dedent <- eol (%nl ignored_line)* (((!.) &%dedented) / (&(%nl %dedented))) tok_gap <- %ws / %prev_edge / &("[" / [.,:;{("#%']) dotdot <- nodent ".." %ws? ]=] whitespace = S(" \t")^1 defs = ws:whitespace, nl: P("\n") wordchar: P(1)-S(' \t\n\r%#:;,.{}[]()"\\') indented: Cmt(S(" \t")^0 * (#(P(1)-S(" \t\n") + (-P(1)))), check_indent) nodented: Cmt(S(" \t")^0 * (#(P(1)-S(" \t\n") + (-P(1)))), check_nodent) dedented: Cmt(S(" \t")^0 * (#(P(1)-S(" \t\n") + (-P(1)))), check_dedent) prev_edge: B(S(" \t\n.,:;}])\"")) error: (src,pos,errors,err_msg)-> line_no = 1 for _ in src\sub(1,-#errors)\gmatch("\n") do line_no += 1 err_pos = #src - #errors + 1 if errors\sub(1,1) == "\n" -- Indentation error err_pos += #errors\match("[ \t]*", 2) start_of_err_line = err_pos while src\sub(start_of_err_line, start_of_err_line) != "\n" and start_of_err_line > 1 start_of_err_line -= 1 start_of_prev_line = start_of_err_line - 1 while src\sub(start_of_prev_line, start_of_prev_line) != "\n" and start_of_prev_line > 1 start_of_prev_line -= 1 local prev_line,err_line,next_line prev_line,err_line,next_line = src\match("([^\n]*)\n([^\n]*)\n([^\n]*)", start_of_prev_line+1) pointer = ("-")\rep(err_pos - start_of_err_line + 0) .. "^" error("\n#{err_msg or "Parse error"} in #{filename} on line #{line_no}:\n\n#{prev_line}\n#{err_line}\n#{pointer}\n#{next_line}\n") setmetatable(defs, { __index: (t,key)-> fn = (src, value, errors)-> token = {type: key, :src, :value, :errors} return token t[key] = fn return fn }) lingo = re.compile(lingo, defs) tree = lingo\match(str\gsub("\r","").."\n") if @debug @writeln("\nPARSE TREE:") @print_tree(tree) assert tree, "Failed to parse: #{str}" return tree tree_to_value: (tree, vars)=> code = " return (function(compiler, vars)\nreturn #{@tree_to_lua(tree)}\nend)" lua_thunk, err = load(code) if not lua_thunk error("Failed to compile generated code:\n#{code}\n\n#{err}") return (lua_thunk!)(self, vars or {}) tree_to_lua: (tree)=> assert tree, "No tree provided." if not tree.type @error "Invalid tree: #{utils.repr(tree)}" indent = "" buffer = {} return_value = nil to_lua = (t)-> ret = @tree_to_lua(t) return ret add = (code)-> table.insert(buffer, code) switch tree.type when "File" add [[return (function(compiler, vars) local ret]] vars = {} for statement in *tree.value.body.value ok,code = pcall(to_lua, statement, "Statement") if not ok @writeln "Error occurred in statement:\n#{statement.src}" error(code) -- Run the fuckers as we go lua_code = " return (function(compiler, vars)\n#{code}\nend)" lua_thunk, err = load(lua_code) if not lua_thunk error("Failed to compile generated code:\n#{code}\n\n#{err}\n\nProduced by statement:\n#{utils.repr(statement)}") value = lua_thunk! ok,return_value = pcall(value, self, vars) if not ok @writeln "Error occurred in statement:\n#{statement.src}" error(return_value) add code add [[ return ret end) ]] when "Block" for statement in *tree.value add to_lua(statement) when "Thunk" assert tree.value.type == "Block", "Non-block value in Thunk" add [[ (function(compiler, vars) local ret ]]..to_lua(tree.value).."\n"..[[ return ret end) ]] when "Statement" -- This case here is to prevent "ret =" from getting prepended when the macro might not want it if tree.value.type == "FunctionCall" name = @fn_name_from_tree(tree.value) if @defs[name] and @defs[name].is_macro add @run_macro(tree.value, "Statement") else add "ret = "..(to_lua(tree.value)\match("%s*(.*)")) else add "ret = "..(to_lua(tree.value)\match("%s*(.*)")) when "FunctionCall" name = @fn_name_from_tree(tree) if @defs[name] and @defs[name].is_macro add @run_macro(tree, "Expression") else args = [to_lua(a) for a in *tree.value when a.type != "Word"] table.insert args, 1, utils.repr(name) add @@comma_separated_items("compiler:call(", args, ")") when "String" escapes = n:"\n", t:"\t", b:"\b", a:"\a", v:"\v", f:"\f", r:"\r" unescaped = tree.value\gsub("\\(.)", ((c)-> escapes[c] or c)) add utils.repr(unescaped) when "Longstring" concat_parts = {} string_buffer = "" for i,line in ipairs(tree.value) if i > 1 then string_buffer ..= "\n" for bit in *line if type(bit) == "string" string_buffer ..= bit\gsub("\\\\","\\") else if string_buffer ~= "" table.insert concat_parts, utils.repr(string_buffer) string_buffer = "" table.insert concat_parts, "compiler.utils.repr_if_not_string(#{to_lua(bit)})" if string_buffer ~= "" table.insert concat_parts, utils.repr(string_buffer) if #concat_parts == 0 add "''" elseif #concat_parts == 1 add concat_parts[1] else add "(#{table.concat(concat_parts, "..")})" when "Number" add tree.value when "List" if #tree.value == 0 add "{}" elseif #tree.value == 1 add "{#{to_lua(tree.value[1])}}" else add @@comma_separated_items("{", [to_lua(item) for item in *tree.value], "}") when "Var" add "vars[#{utils.repr(tree.value)}]" else @error("Unknown/unimplemented thingy: #{tree.type}") -- TODO: make indentation clean buffer = table.concat(buffer, "\n") return buffer, return_value @comma_separated_items: (open, items, close)=> utils.accumulate "\n", -> buffer = open so_far = 0 for i,item in ipairs(items) if i < #items then item ..= ", " if so_far + #item >= 80 and #buffer > 0 coroutine.yield buffer so_far -= #buffer buffer = item else so_far += #item buffer ..= item buffer ..= close coroutine.yield buffer fn_name_from_tree: (tree)=> assert(tree.type == "FunctionCall", "Attempt to get fn name from non-functioncall tree: #{tree.type}") name_bits = {} for token in *tree.value table.insert name_bits, if token.type == "Word" then token.value else "%" table.concat(name_bits, " ") var_to_lua_identifier: (var)=> if var.type != "Var" @error("Tried to convert something that wasn't a Var into a lua identifier: it was not a Var, it was: "..label.type) "var"..(var.value\gsub "%W", (verboten)-> if verboten == "_" then "__" else ("_%x")\format(verboten\byte!)) run_macro: (tree, kind="Expression")=> name = @fn_name_from_tree(tree) unless @defs[name] and @defs[name].is_macro @error("Macro not found: #{name}") unless @check_permission(name) @error "You do not have the authority to call: #{name}" {:fn, :arg_names} = @defs[name] args = [a for a in *tree.value when a.type != "Word"] args = {name,args[i] for i,name in ipairs(arg_names[name])} table.insert @callstack, name ret, manual_mode = fn(self, args, kind) table.remove @callstack if not ret @error("No return value for macro: #{name}") if kind == "Statement" and not manual_mode ret = "ret = "..ret return ret _yield_tree: (tree, indent_level=0)=> ind = (s) -> INDENT\rep(indent_level)..s switch tree.type when "File" coroutine.yield(ind"File:") @_yield_tree(tree.value.body, indent_level+1) when "Errors" coroutine.yield(ind"Error:\n#{tree.value}") when "Block" for chunk in *tree.value @_yield_tree(chunk, indent_level) when "Thunk" coroutine.yield(ind"Thunk:") @_yield_tree(tree.value, indent_level+1) when "Statement" @_yield_tree(tree.value, indent_level) when "FunctionCall" name = @fn_name_from_tree(tree) args = [a for a in *tree.value when a.type != "Word"] if #args == 0 coroutine.yield(ind"Call [#{name}]!") else coroutine.yield(ind"Call [#{name}]:") for a in *args @_yield_tree(a, indent_level+1) when "String" -- TODO: Better implement coroutine.yield(ind(utils.repr(tree.value))) when "Longstring" -- TODO: Better implement coroutine.yield(ind(utils.repr(tree.value))) when "Number" coroutine.yield(ind(tree.value)) when "List" if #tree.value == 0 coroutine.yield(ind("")) else coroutine.yield(ind"List:") for item in *tree.value @_yield_tree(item, indent_level+1) when "Var" coroutine.yield ind"Var[#{utils.repr(tree.value)}]" else error("Unknown/unimplemented thingy: #{tree.type}") return nil -- to prevent tail calls print_tree:(tree)=> for line in coroutine.wrap(-> @_yield_tree(tree)) @writeln(line) stringify_tree:(tree)=> result = {} for line in coroutine.wrap(-> @_yield_tree(tree)) table.insert(result, line) return table.concat result, "\n" compile: (src, filename, output_file=nil)=> if @debug @writeln "COMPILING:\n#{src}" tree = @parse(src, filename) assert tree, "Tree failed to compile: #{src}" code, retval = @tree_to_lua(tree) if output_file output = io.open(output_file, "w") output\write(code) return code, retval error: (...)=> @writeln "ERROR!" @writeln(...) @writeln("Callstack:") for i=#@callstack,1,-1 @writeln " #{@callstack[i]}" @writeln " " @callstack = {} error! test: (src, filename, expected)=> i = 1 while i != nil start,stop = src\find("\n\n", i) test = src\sub(i,start) i = stop start,stop = test\find"===" if not start or not stop then @error("WHERE'S THE ===? in:\n#{test}") test_src, expected = test\sub(1,start-1), test\sub(stop+1,-1) expected = expected\match'[\n]*(.*[^\n])' tree = @parse(test_src, filename) got = @stringify_tree(tree.value.body) if got != expected @error"TEST FAILED!\nSource:\n#{test_src}\nExpected:\n#{expected}\n\nGot:\n#{got}" initialize_core: => -- Sets up some core functionality as_lua_code = (str, vars)=> switch str.type when "String" return @tree_to_value(str, vars) when "Longstring" return @tree_to_value(str, vars) else return @tree_to_lua(str) @defmacro [[lua block %lua_code]], (vars, kind)=> if kind == "Expression" then error("Expected to be in statement.") inner_vars = setmetatable({}, {__index:(_,key)-> "vars[#{utils.repr(key)}]"}) return "do\n"..@tree_to_value(vars.lua_code, inner_vars).."\nend", true @defmacro [[lua expr %lua_code]], (vars, kind)=> lua_code = vars.lua_code.value inner_vars = setmetatable({}, {__index:(_,key)-> "vars[#{utils.repr(key)}]"}) return @tree_to_value(vars.lua_code, inner_vars) @def "require %filename", (vars)=> if not @loaded_files[vars.filename] file = io.open(vars.filename) if not file @error "File does not exist: #{vars.filename}" @loaded_files[vars.filename] = @run(file\read('*a'), vars.filename) return @loaded_files[vars.filename] @def "run file %filename", (vars)=> file = io.open(vars.filename) if not file @error "File does not exist: #{vars.filename}" return @run(file\read('*a'), vars.filename) -- Run on the command line via "./nomsu.moon input_file.nom" to execute -- and "./nomsu.moon input_file.nom output_file.lua" to compile (use "-" to compile to stdout) if arg and arg[1] c = NomsuCompiler() input = io.open(arg[1])\read("*a") -- If run via "./nomsu.moon file.nom -", then silence output and print generated -- source code instead. _write = c.write if arg[2] == "-" c.write = -> code, retval = c\compile(input, arg[1]) c.write = _write -- put it back if arg[2] output = if arg[2] == "-" io.output() else io.open(arg[2], 'w') output\write [[ local load = function() ]] output\write(code) output\write [[ end local NomsuCompiler = require('nomsu') local c = NomsuCompiler() return load()(c, {}) ]] elseif arg -- REPL: c = NomsuCompiler() c\run('require "lib/core.nom"') while true buff = "" while true io.write(">> ") line = io.read("*L") if line == "\n" or not line break buff ..= line if #buff == 0 break ok, ret = pcall(-> c\run(buff)) if ok and ret != nil print "= "..utils.repr(ret) return NomsuCompiler