#!/usr/bin/env moon -- This file contains the source code of the Nomsu compiler. -- Nomsu is a programming language that cross-compiles to Lua. It was designed to be good -- at natural-language-like code that is highly self-modifying and flexible. -- The only dependency is LPEG, which can be installed using "luarocks install lpeg" -- File usage: -- Either, in a lua/moonscript file: -- Nomsu = require "nomsu" -- nomsu = Nomsu() -- nomsu:run(your_nomsu_code) -- Or from the command line: -- lua nomsu.lua [input_file [output_file or -]] re = require 're' lpeg = require 'lpeg' utils = require 'utils' repr = utils.repr {:insert, :remove, :concat} = table --pcall = (fn,...)-> true, fn(...) -- TODO: -- use actual variables instead of a vars table -- have macros return (statements, expression) -- consider non-linear codegen, like with moonscript's comprehensions, rather than doing thunks -- improve indentation of generated lua code -- provide way to run precompiled nomsu -> lua code from nomsu -- better scoping? -- better error reporting -- add line numbers of function calls -- type checking? lpeg.setmaxstack 10000 -- whoa {:P,:V,:S,:Cg,:C,:Cp,:B,:Cmt} = lpeg STRING_ESCAPES = n:"\n", t:"\t", b:"\b", a:"\a", v:"\v", f:"\f", r:"\r" -- NOTE: this treats tabs as equivalent to 1 space indent_stack = {0} check_indent = (subject,end_pos,spaces)-> if #spaces > indent_stack[#indent_stack] insert(indent_stack, #spaces) return end_pos check_dedent = (subject,end_pos,spaces)-> if #spaces < indent_stack[#indent_stack] remove(indent_stack) return end_pos check_nodent = (subject,end_pos,spaces)-> if #spaces == indent_stack[#indent_stack] return end_pos -- TYPES: -- Number 1, "String", %Var, [List], (Block), \(Nomsu), FunctionCall, File nomsu = [=[ file <- ({ {| shebang? (ignored_line %nl)* statements (nodent statements)* (%nl ignored_line)* %nl? (({.+} ("" -> "Unexpected end of file")) => error)? |} }) -> File shebang <- "#!" [^%nl]* %nl inline_statements <- inline_statement (semicolon inline_statement)* noeol_statements <- (inline_statement semicolon)* noeol_statement statements <- (inline_statement semicolon)* statement statement <- functioncall / expression noeol_statement <- noeol_functioncall / noeol_expression inline_statement <- inline_functioncall / inline_expression inline_block <- ({ {| "(" inline_statements ")" |} }) -> Block eol_block <- ({ {| ":" %ws? noeol_statements eol |} }) -> Block indented_block <- ({ {| (":" / "(..)") indent statements (nodent statements)* (dedent / (({.+} ("" -> "Error while parsing block")) => error)) |} }) -> Block inline_nomsu <- ({ ("\" inline_expression) }) -> Nomsu eol_nomsu <- ({ ("\" noeol_expression) }) -> Nomsu indented_nomsu <- ({ ("\" expression) }) -> Nomsu inline_expression <- number / variable / inline_string / inline_list / inline_block / inline_nomsu noeol_expression <- indented_string / indented_block / indented_nomsu / indented_list / inline_expression expression <- eol_block / eol_nomsu / noeol_expression -- Function calls need at least one word in them inline_functioncall <- ({ {| (inline_expression tok_gap)* word (tok_gap (inline_expression / word))* |} }) -> FunctionCall noeol_functioncall <- ({ {| (noeol_expression tok_gap)* word (tok_gap (noeol_expression / word))* |} }) -> FunctionCall functioncall <- ({ {| (expression (dotdot / tok_gap))* word ((dotdot / tok_gap) (expression / word))* |} }) -> FunctionCall word <- ({ !number {%wordchar (!"'" %wordchar)*} }) -> Word inline_string <- ({ '"' {| ({~ (("\\" -> "\") / ('\"' -> '"') / (!string_interpolation [^%nl"]))+ ~} / string_interpolation)* |} '"' }) -> String indented_string <- ({ '".."' indent {| indented_string_line (nodent {~ "" -> " " ~} indented_string_line)* |} (dedent / (({.+} ("" -> "Error while parsing String")) => error)) }) -> String indented_string_line <- "|" ({~ (("\\" -> "\") / (!string_interpolation [^%nl]))+ ~} / string_interpolation)* string_interpolation <- "\" (inline_block / indented_block / dotdot) number <- ({ (("-"? (([0-9]+ "." [0-9]+) / ("." [0-9]+) / ([0-9]+)))-> tonumber) }) -> Number -- Variables can be nameless (i.e. just %) and can't contain apostrophes -- which is a hack to allow %foo's to parse as "%foo" and "'s" separately variable <- ({ ("%" { (!"'" %wordchar)* }) }) -> Var inline_list <- ({ {| ("[" %ws? ((inline_list_item comma)* inline_list_item comma?)? %ws? "]") |} }) -> List indented_list <- ({ {| ("[..]" indent list_line (nodent list_line)* (dedent / (({.+} ("" -> "Error while parsing list")) => error))) |} }) -> List list_line <- (inline_list_item comma)* ((inline_list_item %ws? ",") / (functioncall / expression)) inline_list_item <- inline_functioncall / inline_expression block_comment <- "#.." [^%nl]* indent [^%nl]* (%nl ((%ws? (!. / &%nl)) / (!%dedented [^%nl]*)))* line_comment <- "#" [^%nl]* eol <- %ws? line_comment? (!. / &%nl) ignored_line <- (%nodented (block_comment / line_comment)) / (%ws? (!. / &%nl)) indent <- eol (%nl ignored_line)* %nl %indented nodent <- eol (%nl ignored_line)* %nl %nodented dedent <- eol (%nl ignored_line)* (((!.) &%dedented) / (&(%nl %dedented))) tok_gap <- %ws / %prev_edge / &("[" / "\" / [.,:;{("#%']) comma <- %ws? "," %ws? semicolon <- %ws? ";" %ws? dotdot <- nodent ".." %ws? ]=] whitespace = S(" \t")^1 defs = ws:whitespace, nl: P("\n"), :tonumber wordchar: P(1)-S(' \t\n\r%#:;,.{}[]()"\\') indented: Cmt(S(" \t")^0 * (#(P(1)-S(" \t\n") + (-P(1)))), check_indent) nodented: Cmt(S(" \t")^0 * (#(P(1)-S(" \t\n") + (-P(1)))), check_nodent) dedented: Cmt(S(" \t")^0 * (#(P(1)-S(" \t\n") + (-P(1)))), check_dedent) prev_edge: B(S(" \t\n.,:;}])\"\\")) error: (src,pos,errors,err_msg)-> line_no = 1 for _ in src\sub(1,-#errors)\gmatch("\n") do line_no += 1 err_pos = #src - #errors + 1 if errors\sub(1,1) == "\n" -- Indentation error err_pos += #errors\match("[ \t]*", 2) start_of_err_line = err_pos while src\sub(start_of_err_line, start_of_err_line) != "\n" and start_of_err_line > 1 start_of_err_line -= 1 start_of_prev_line = start_of_err_line - 1 while src\sub(start_of_prev_line, start_of_prev_line) != "\n" and start_of_prev_line > 1 start_of_prev_line -= 1 local prev_line,err_line,next_line prev_line,err_line,next_line = src\match("([^\n]*)\n([^\n]*)\n([^\n]*)", start_of_prev_line+1) pointer = ("-")\rep(err_pos - start_of_err_line + 0) .. "^" error("\n#{err_msg or "Parse error"} in #{filename} on line #{line_no}:\n\n#{prev_line}\n#{err_line}\n#{pointer}\n#{next_line}\n") setmetatable(defs, { __index: (t,key)-> with t[key] = (src, value, errors)-> {type: key, :src, :value, :errors} do nil }) nomsu = re.compile(nomsu, defs) class NomsuCompiler new:(parent)=> @write = (...)=> io.write(...) @defs = setmetatable({}, {__index:parent and parent.defs}) @callstack = {} @debug = false @utils = utils @repr = (...)=> repr(...) @loaded_files = {} @initialize_core! writeln:(...)=> @write(...) @write("\n") def: (invocation, thunk, src)=> stub, arg_names = @get_stub invocation assert stub, "NO STUB FOUND: #{repr invocation}" if @debug then @writeln "Defining rule: #{repr stub} with args #{repr arg_names}" for i=1,#arg_names-1 do for j=i+1,#arg_names if arg_names[i] == arg_names[j] then @error "Duplicate argument in function #{stub}: '#{arg_names[i]}'" with @defs[stub] = {:thunk, :invocation, :arg_names, :src, is_macro:false} do nil defmacro: (invocation, thunk, src)=> with @def(invocation, thunk, src) do .is_macro = true call: (stub,...)=> def = @defs[stub] if def == nil @error "Attempt to call undefined function: #{stub}" -- This is a little bit hacky, but having this check is handy for catching mistakes -- I use a hash sign in "#macro" so it's guaranteed to not be a valid function name if def.is_macro and @callstack[#@callstack] != "#macro" @error "Attempt to call macro at runtime: #{stub}\nThis can be caused by using a macro in a function that is defined before the macro." unless @check_permission(def) @error "You do not have the authority to call: #{stub}" {:thunk, :arg_names} = def args = {name, select(i,...) for i,name in ipairs(arg_names)} if @debug @writeln "Calling #{repr stub} with args: #{repr(args)}" insert @callstack, stub -- TODO: optimize, but still allow multiple return values? rets = {thunk(self,args)} remove @callstack return unpack(rets) run_macro: (tree, kind="Expression")=> stub,args = @get_stub tree insert @callstack, "#macro" expr, statement = @call(stub, unpack(args)) remove @callstack return expr, statement check_permission: (fn_def)=> if getmetatable(fn_def) != functiondef_mt fn_name = fn_def fn_def = @defs[fn_name] if fn_def == nil @error "Undefined function: #{fn_name}" whiteset = fn_def.whiteset if whiteset == nil then return true -- TODO: maybe optimize this by making the callstack a Counter and using a -- move-to-front optimization on the whitelist to check most likely candidates sooner for caller in *@callstack if whiteset[caller] then return true return false parse: (str, filename)=> if @debug @writeln("PARSING:\n#{str}") str = str\gsub("\r","") export indent_stack old_indent_stack, indent_stack = indent_stack, {0} tree = nomsu\match(str) indent_stack = old_indent_stack -- Put it back, just in case. assert tree, "Failed to parse: #{str}" if @debug @writeln "PARSE TREE:" @print_tree tree, " " return tree run: (src, filename)=> tree = @parse(src, filename) assert tree, "Tree failed to compile: #{src}" assert tree.type == "File", "Attempt to run non-file: #{tree.type}" buffer = {} vars = {} return_value = nil for statement in *tree.value if @debug @writeln "RUNNING TREE:" @print_tree statement ok,expr,statements = pcall(@tree_to_lua, self, statement) if not ok @writeln "Error occurred in statement:\n#{statement.src}" @error(expr) code_for_statement = ([[ return (function(nomsu, vars) %s return %s end)]])\format(statements or "", expr or "") if @debug @writeln "RUNNING LUA:\n#{code_for_statement}" lua_thunk, err = load(code_for_statement) if not lua_thunk error("Failed to compile generated code:\n#{code_for_statement}\n\n#{err}\n\nProduced by statement:\n#{statement.src}") run_statement = lua_thunk! ok,ret = pcall(run_statement, self, vars) if expr then return_value = ret if not ok @writeln "Error occurred in statement:\n#{statement.src}" @error(repr return_value) insert buffer, "#{statements or ''}\n#{expr and "ret = #{expr}" or ''}" lua_code = ([[ return function(nomsu, vars) local ret %s return ret end]])\format(concat(buffer, "\n")) return return_value, lua_code tree_to_value: (tree, vars)=> code = " return (function(nomsu, vars)\nreturn #{@tree_to_lua(tree)}\nend)" lua_thunk, err = load(code) if not lua_thunk error("Failed to compile generated code:\n#{code}\n\n#{err}") return (lua_thunk!)(self, vars or {}) tree_to_lua: (tree)=> -- Return , assert tree, "No tree provided." if not tree.type @writeln debug.traceback() @error "Invalid tree: #{repr(tree)}" switch tree.type when "File" error("Should not be converting File to lua through this function.") when "Nomsu" return repr(tree.value), nil when "Thunk" -- This is not created by the parser, it's just a helper lua_bits = {} for arg in *tree.value.value expr,statement = @tree_to_lua arg if statement then insert lua_bits, statement if expr then insert lua_bits, "ret = #{expr}" return ([[ (function(nomsu, vars) local ret %s return ret end)]])\format(concat lua_bits, "\n") when "Block" if #tree.value == 0 return "nil",nil if #tree.value == 1 expr,statement = @tree_to_lua tree.value[1] if not statement return expr, nil thunk_lua = @tree_to_lua {type:"Thunk", value:tree, src:tree.src} return ("%s(nomsu, vars)")\format(thunk_lua), nil when "FunctionCall" stub = @get_stub(tree) if @defs[stub] and @defs[stub].is_macro return @run_macro(tree, "Expression") args = {repr(stub)} for arg in *tree.value if arg.type == 'Word' then continue expr,statement = @tree_to_lua arg if statement @error "Cannot use [[#{arg.src}]] as a function argument, since it's not an expression." insert args, expr return @@comma_separated_items("nomsu:call(", args, ")"), nil when "String" concat_parts = {} string_buffer = "" for bit in *tree.value if type(bit) == "string" string_buffer ..= bit continue if string_buffer ~= "" insert concat_parts, repr(string_buffer) string_buffer = "" expr, statement = @tree_to_lua bit if statement @error "Cannot use [[#{bit.src}]] as a string interpolation value, since it's not an expression." insert concat_parts, "nomsu.utils.repr_if_not_string(#{expr})" if string_buffer ~= "" insert concat_parts, repr(string_buffer) if #concat_parts == 0 return "''", nil return "(#{concat(concat_parts, "..")})", nil when "List" items = {} for item in *tree.value expr,statement = @tree_to_lua item if statement @error "Cannot use [[#{item.src}]] as a list item, since it's not an expression." insert items, expr return @@comma_separated_items("{", items, "}"), nil when "Number" return repr(tree.value) when "Var" return "vars[#{repr tree.value}]" else @error("Unknown/unimplemented thingy: #{tree.type}") walk_tree: (tree, depth=0)=> coroutine.yield(tree, depth) if type(tree) != 'table' or not tree.type return switch tree.type when "List", "File", "Nomsu", "Block", "FunctionCall", "String" for v in *tree.value @walk_tree(v, depth+1) else @walk_tree(tree.value, depth+1) return nil print_tree: (tree)=> for node,depth in coroutine.wrap(-> @walk_tree tree) if type(node) != 'table' or not node.type @writeln((" ")\rep(depth)..repr(node)) else @writeln("#{(" ")\rep(depth)}#{node.type}:") tree_to_str: (tree)=> bits = {} for node,depth in coroutine.wrap(-> @walk_tree tree) if type(node) != 'table' or not node.type insert bits, ((" ")\rep(depth)..repr(node)) else insert bits, ("#{(" ")\rep(depth)}#{node.type}:") return concat(bits, "\n") @unescape_string: (str)=> str\gsub("\\(.)", ((c)-> STRING_ESCAPES[c] or c)) @comma_separated_items: (open, items, close)=> bits = {open} so_far = 0 for i,item in ipairs(items) if i < #items then item ..= ", " insert bits, item so_far += #item if so_far >= 80 insert bits, "\n" so_far = 0 insert bits, close return concat(bits) replaced_vars: (tree, vars)=> -- TODO: consider making a pure function version of this that copies instead of modifying if type(tree) != 'table' then return tree switch tree.type when "Var" if vars[tree.value] tree = vars[tree.value] when "File", "Nomsu", "Thunk", "Block", "List", "FunctionCall", "String" new_value = @replaced_vars tree.value, vars if new_value != tree.value tree = {k,v for k,v in pairs(tree)} tree.value = new_value when nil -- Raw table, probably from one of the .value of a multi-value tree (e.g. List) new_values = {} any_different = false for k,v in pairs tree new_values[k] = @replaced_vars v, vars any_different or= (new_values[k] != tree[k]) if any_different tree = new_values return tree get_stub: (x)=> if not x @error "Nothing to get stub from" -- Returns a single stub ("say %"), and list of args ({msg}) from a single rule def -- (e.g. "say %msg") or function call (e.g. FunctionCall({Word("say"), Var("msg"))) if type(x) == 'string' stub = x\gsub("'"," '")\gsub("%%%S+","%%")\gsub("%s+"," ") args = [arg for arg in x\gmatch("%%([^%s']*)")] return stub, args switch x.type when "String" then return @get_stub(x.value) when "FunctionCall" stub, args = {}, {}, {} for token in *x.value switch token.type when "Word" insert stub, token.value when "Var" insert stub, "%" insert args, token.value else insert stub, "%" insert args, token return concat(stub," "), args when "Block" @writeln debug.traceback! @error "Please pass in a single line from a block, not the whole thing:\n#{@tree_to_str x}" var_to_lua_identifier: (var)=> -- Converts arbitrary nomsu vars to valid lua identifiers by replacing illegal -- characters with escape sequences if type(var) == 'table' and var.type == "Var" var = var.value (var\gsub "%W", (verboten)-> if verboten == "_" then "__" else ("_%x")\format(verboten\byte!)) error: (...)=> @writeln "ERROR!" if select(1, ...) @writeln(...) @writeln("Callstack:") for i=#@callstack,1,-1 @writeln " #{@callstack[i]}" @writeln " " @callstack = {} error! initialize_core: => -- Sets up some core functionality lua_code = (vars)=> inner_vars = vars-- setmetatable({}, {__index:(_,key)-> "vars[#{repr(key)}]"}) lua = @tree_to_value(vars.code, inner_vars) return nil, lua @defmacro "lua code %code", lua_code lua_value = (vars)=> inner_vars = vars--setmetatable({}, {__index:(_,key)-> "vars[#{repr(key)}]"}) lua = @tree_to_value(vars.code, inner_vars) return lua, nil @defmacro "lua value %code", lua_value _require = (vars)=> if not @loaded_files[vars.filename] file = io.open(vars.filename) if not file @error "File does not exist: #{vars.filename}" @loaded_files[vars.filename] = (@run(file\read('*a'), vars.filename)) or true return @loaded_files[vars.filename] @def "require %filename", _require run_file = (vars)=> file = io.open(vars.filename) if not file @error "File does not exist: #{vars.filename}" return @run(file\read('*a'), vars.filename) @def "run file %filename", run_file if arg and arg[1] --ProFi = require 'ProFi' --ProFi\start() c = NomsuCompiler() input = io.open(arg[1])\read("*a") -- If run via "./nomsu.moon file.nom -", then silence output and print generated -- source code instead. _write = c.write if arg[2] == "-" c.write = -> retval, code = c\run(input, arg[1]) c.write = _write -- put it back if arg[2] output = if arg[2] == "-" io.output() else io.open(arg[2], 'w') output\write ([[ local NomsuCompiler = require('nomsu') local c = NomsuCompiler() local run = %s return run(c, {}) ]])\format(code) --ProFi\stop() --ProFi\writeReport( 'MyProfilingReport.txt' ) elseif arg -- REPL: c = NomsuCompiler() c\run('require "lib/core.nom"') while true buff = "" while true io.write(">> ") line = io.read("*L") if line == "\n" or not line break buff ..= line if #buff == 0 break ok, ret = pcall(-> c\run(buff)) if ok and ret != nil print "= "..repr(ret) return NomsuCompiler