diff --git a/Makefile b/Makefile index 388f411..10e788b 100644 --- a/Makefile +++ b/Makefile @@ -11,10 +11,10 @@ UNINSTALL_VERSION= # ========= You shouldn't need to mess with any of these variables below ================ MOON_FILES= code_obj.moon error_handling.moon files.moon nomsu.moon nomsu_compiler.moon \ - syntax_tree.moon parser.moon containers.moon bitops.moon \ + syntax_tree.moon containers.moon bitops.moon \ parser2.moon pretty_errors.moon string2.moon LUA_FILES= code_obj.lua consolecolors.lua error_handling.lua files.lua nomsu.lua nomsu_compiler.lua \ - syntax_tree.lua parser.lua containers.lua bitops.lua utils.lua \ + syntax_tree.lua containers.lua bitops.lua utils.lua \ parser2.lua pretty_errors.lua string2.lua CORE_NOM_FILES= $(wildcard core/*.nom) CORE_LUA_FILES= $(patsubst %.nom,%.lua,$(CORE_NOM_FILES)) diff --git a/nomsu_compiler.lua b/nomsu_compiler.lua index e655759..639e891 100644 --- a/nomsu_compiler.lua +++ b/nomsu_compiler.lua @@ -1,4 +1,6 @@ local lpeg = require('lpeg') +local R, P, S +R, P, S = lpeg.R, lpeg.P, lpeg.S local re = require('re') local utils = require('utils') local Files = require('files') @@ -34,7 +36,6 @@ do NomsuCode, LuaCode, Source = _obj_0.NomsuCode, _obj_0.LuaCode, _obj_0.Source end local AST = require("syntax_tree") -local Parser = require("parser") local make_parser = require("parser2") SOURCE_MAP = { } table.map = function(t, fn) @@ -62,6 +63,37 @@ table.copy = function(t) return _tbl_0 end)(), getmetatable(t)) end +local utf8_char_patt = (R("\194\223") * R("\128\191") + R("\224\239") * R("\128\191") * R("\128\191") + R("\240\244") * R("\128\191") * R("\128\191") * R("\128\191")) +local operator_patt = S("'`~!@$^&*+=|<>?/-") ^ 1 * -1 +local identifier_patt = (R("az", "AZ", "09") + P("_") + utf8_char_patt) ^ 1 * -1 +local is_operator +is_operator = function(s) + return not not operator_patt:match(s) +end +local is_identifier +is_identifier = function(s) + return not not identifier_patt:match(s) +end +local inline_escaper = re.compile("{~ (%utf8_char / ('\"' -> '\\\"') / ('\n' -> '\\n') / ('\t' -> '\\t') / ('\b' -> '\\b') / ('\a' -> '\\a') / ('\v' -> '\\v') / ('\f' -> '\\f') / ('\r' -> '\\r') / ('\\' -> '\\\\') / ([^ -~] -> escape) / .)* ~}", { + utf8_char = utf8_char_patt, + escape = (function(self) + return ("\\%03d"):format(self:byte()) + end) +}) +local inline_escape +inline_escape = function(s) + return inline_escaper:match(s) +end +local escaper = re.compile("{~ (%utf8_char / ('\\' -> '\\\\') / [\n\r\t -~] / (. -> escape))* ~}", { + utf8_char = utf8_char_patt, + escape = (function(self) + return ("\\%03d"):format(self:byte()) + end) +}) +local escape +escape = function(s) + return escaper:match(s) +end local make_tree make_tree = function(tree, userdata) local cls = AST[tree.type] @@ -851,7 +883,7 @@ do end for i, bit in ipairs(tree) do if type(bit) == "string" then - local clump_words = (type(tree[i - 1]) == 'string' and Parser.is_operator(bit) ~= Parser.is_operator(tree[i - 1])) + local clump_words = (type(tree[i - 1]) == 'string' and is_operator(bit) ~= is_operator(tree[i - 1])) if i > 1 and not clump_words then nomsu:append(" ") end @@ -902,8 +934,8 @@ do add_text = function(nomsu, tree) for i, bit in ipairs(tree) do if type(bit) == 'string' then - local escaped = Parser.inline_escape(bit) - nomsu:append(Parser.inline_escape(bit)) + local escaped = inline_escape(bit) + nomsu:append(inline_escape(bit)) elseif bit.type == "Text" then add_text(nomsu, bit) else @@ -939,7 +971,7 @@ do elseif "DictEntry" == _exp_0 then local key, value = tree[1], tree[2] local nomsu - if key.type == "Text" and #key == 1 and Parser.is_identifier(key[1]) then + if key.type == "Text" and #key == 1 and is_identifier(key[1]) then nomsu = NomsuCode(key.source, key[1]) else nomsu = recurse(key) @@ -967,7 +999,7 @@ do nomsu:append(".") end local bit_nomsu - if i > 1 and bit.type == "Text" and #bit == 1 and type(bit[1]) == 'string' and Parser.is_identifier(bit[1]) then + if i > 1 and bit.type == "Text" and #bit == 1 and type(bit[1]) == 'string' and is_identifier(bit[1]) then bit_nomsu = bit[1] else bit_nomsu = recurse(bit, nomsu) @@ -1177,7 +1209,7 @@ do next_space = "" end if type(bit) == "string" then - if not (type(tree[i - 1]) == 'string' and Parser.is_operator(tree[i - 1]) ~= Parser.is_operator(bit)) then + if not (type(tree[i - 1]) == 'string' and is_operator(tree[i - 1]) ~= is_operator(bit)) then nomsu:append(next_space) end nomsu:append(bit) @@ -1223,7 +1255,7 @@ do add_text = function(nomsu, tree) for i, bit in ipairs(tree) do if type(bit) == 'string' then - bit = Parser.escape(bit) + bit = escape(bit) local bit_lines = Files.get_lines(bit) for j, line in ipairs(bit_lines) do if j > 1 then @@ -1298,7 +1330,7 @@ do elseif "DictEntry" == _exp_0 then local key, value = tree[1], tree[2] local nomsu - if key.type == "Text" and #key == 1 and Parser.is_identifier(key[1]) then + if key.type == "Text" and #key == 1 and is_identifier(key[1]) then nomsu = NomsuCode(key.source, key[1]) else nomsu = self:tree_to_inline_nomsu(key) diff --git a/nomsu_compiler.moon b/nomsu_compiler.moon index fb3dd4c..5e1510b 100644 --- a/nomsu_compiler.moon +++ b/nomsu_compiler.moon @@ -10,6 +10,7 @@ -- Or from the command line: -- lua nomsu.lua your_file.nom lpeg = require 'lpeg' +{:R,:P,:S} = lpeg re = require 're' utils = require 'utils' Files = require 'files' @@ -23,7 +24,6 @@ unpack or= table.unpack {:match, :sub, :gsub, :format, :byte, :find} = string {:NomsuCode, :LuaCode, :Source} = require "code_obj" AST = require "syntax_tree" -Parser = require("parser") make_parser = require("parser2") -- Mapping from source string (e.g. "@core/metaprogramming.nom[1:100]") to a mapping -- from lua line number to nomsu line number @@ -34,8 +34,30 @@ table.map = (t, fn)-> setmetatable([fn(v) for _,v in ipairs(t)], getmetatable(t) table.fork = (t, values)-> setmetatable(values or {}, {__index:t}) table.copy = (t)-> setmetatable({k,v for k,v in pairs(t)}, getmetatable(t)) +-- Parsing helper functions +utf8_char_patt = ( + R("\194\223")*R("\128\191") + + R("\224\239")*R("\128\191")*R("\128\191") + + R("\240\244")*R("\128\191")*R("\128\191")*R("\128\191")) +operator_patt = S("'`~!@$^&*+=|<>?/-")^1 * -1 +identifier_patt = (R("az","AZ","09") + P("_") + utf8_char_patt)^1 * -1 + +is_operator = (s)-> + return not not operator_patt\match(s) + +is_identifier = (s)-> + return not not identifier_patt\match(s) + +inline_escaper = re.compile("{~ (%utf8_char / ('\"' -> '\\\"') / ('\n' -> '\\n') / ('\t' -> '\\t') / ('\b' -> '\\b') / ('\a' -> '\\a') / ('\v' -> '\\v') / ('\f' -> '\\f') / ('\r' -> '\\r') / ('\\' -> '\\\\') / ([^ -~] -> escape) / .)* ~}", {utf8_char: utf8_char_patt, escape:(=> ("\\%03d")\format(@byte!))}) +inline_escape = (s)-> + return inline_escaper\match(s) + +escaper = re.compile("{~ (%utf8_char / ('\\' -> '\\\\') / [\n\r\t -~] / (. -> escape))* ~}", + {utf8_char: utf8_char_patt, escape:(=> ("\\%03d")\format(@byte!))}) +escape = (s)-> + return escaper\match(s) + -- TODO: --- consider non-linear codegen, rather than doing thunks for things like comprehensions -- Re-implement nomsu-to-lua comment translation? make_tree = (tree, userdata)-> @@ -549,7 +571,7 @@ with NomsuCompiler nomsu\append @tree_to_inline_nomsu(tree.target), "::" for i,bit in ipairs tree if type(bit) == "string" - clump_words = (type(tree[i-1]) == 'string' and Parser.is_operator(bit) != Parser.is_operator(tree[i-1])) + clump_words = (type(tree[i-1]) == 'string' and is_operator(bit) != is_operator(tree[i-1])) nomsu\append " " if i > 1 and not clump_words nomsu\append bit else @@ -582,8 +604,8 @@ with NomsuCompiler add_text = (nomsu, tree)-> for i, bit in ipairs tree if type(bit) == 'string' - escaped = Parser.inline_escape(bit) - nomsu\append Parser.inline_escape(bit) + escaped = inline_escape(bit) + nomsu\append inline_escape(bit) elseif bit.type == "Text" add_text(nomsu, bit) else @@ -609,7 +631,7 @@ with NomsuCompiler when "DictEntry" key, value = tree[1], tree[2] - nomsu = if key.type == "Text" and #key == 1 and Parser.is_identifier(key[1]) + nomsu = if key.type == "Text" and #key == 1 and is_identifier(key[1]) NomsuCode(key.source, key[1]) else recurse(key) nomsu\parenthesize! if key.type == "Action" or key.type == "Block" @@ -627,7 +649,7 @@ with NomsuCompiler for i, bit in ipairs tree nomsu\append "." if i > 1 local bit_nomsu - bit_nomsu = if i > 1 and bit.type == "Text" and #bit == 1 and type(bit[1]) == 'string' and Parser.is_identifier(bit[1]) + bit_nomsu = if i > 1 and bit.type == "Text" and #bit == 1 and type(bit[1]) == 'string' and is_identifier(bit[1]) bit[1] else recurse(bit, nomsu) assert bit.type != "Block" @@ -745,7 +767,7 @@ with NomsuCompiler next_space = "" if type(bit) == "string" - unless type(tree[i-1]) == 'string' and Parser.is_operator(tree[i-1]) != Parser.is_operator(bit) + unless type(tree[i-1]) == 'string' and is_operator(tree[i-1]) != is_operator(bit) nomsu\append(next_space) nomsu\append bit next_space = ' ' @@ -788,7 +810,7 @@ with NomsuCompiler add_text = (nomsu, tree)-> for i, bit in ipairs tree if type(bit) == 'string' - bit = Parser.escape(bit) + bit = escape(bit) bit_lines = Files.get_lines(bit) for j, line in ipairs bit_lines if j > 1 @@ -844,7 +866,7 @@ with NomsuCompiler when "DictEntry" key, value = tree[1], tree[2] - nomsu = if key.type == "Text" and #key == 1 and Parser.is_identifier(key[1]) + nomsu = if key.type == "Text" and #key == 1 and is_identifier(key[1]) NomsuCode(key.source, key[1]) else @tree_to_inline_nomsu(key) nomsu\parenthesize! if key.type == "Action" or key.type == "Block" diff --git a/parser.lua b/parser.lua deleted file mode 100644 index 5eed3be..0000000 --- a/parser.lua +++ /dev/null @@ -1,143 +0,0 @@ -local lpeg = require('lpeg') -local re = require('re') -lpeg.setmaxstack(20000) -local P, R, S, C, Cmt, Carg -P, R, S, C, Cmt, Carg = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cmt, lpeg.Carg -local match, sub -do - local _obj_0 = string - match, sub = _obj_0.match, _obj_0.sub -end -local insert, remove -do - local _obj_0 = table - insert, remove = _obj_0.insert, _obj_0.remove -end -local files = require('files') -local NomsuCode, LuaCode, Source -do - local _obj_0 = require("code_obj") - NomsuCode, LuaCode, Source = _obj_0.NomsuCode, _obj_0.LuaCode, _obj_0.Source -end -local AST = require("syntax_tree") -local NOMSU_DEFS -do - local _with_0 = { } - _with_0.nl = P("\r") ^ -1 * P("\n") - _with_0.ws = S(" \t") - _with_0.tonumber = tonumber - _with_0.table = function() - return { } - end - _with_0.unpack = unpack or table.unpack - local string_escapes = { - n = "\n", - t = "\t", - b = "\b", - a = "\a", - v = "\v", - f = "\f", - r = "\r" - } - local digit, hex = R('09'), R('09', 'af', 'AF') - _with_0.escaped_char = (P("\\") * S("xX") * C(hex * hex)) / function(self) - return string.char(tonumber(self, 16)) - end - _with_0.escaped_char = _with_0.escaped_char + ((P("\\") * C(digit * (digit ^ -2))) / function(self) - return string.char(tonumber(self)) - end) - _with_0.escaped_char = _with_0.escaped_char + ((P("\\") * C(S("ntbavfr"))) / string_escapes) - _with_0.operator_char = S("'`~!@$^&*+=|<>?/-") - _with_0.utf8_char = (R("\194\223") * R("\128\191") + R("\224\239") * R("\128\191") * R("\128\191") + R("\240\244") * R("\128\191") * R("\128\191") * R("\128\191")) - _with_0.ident_char = R("az", "AZ", "09") + P("_") + _with_0.utf8_char - _with_0.userdata = Carg(1) - _with_0.add_comment = function(src, end_pos, start_pos, comment, userdata) - userdata.comments[start_pos] = comment - return true - end - _with_0.error = function(src, end_pos, start_pos, err_msg, userdata) - local seen_errors = userdata.errors - if seen_errors[start_pos] then - return true - end - local num_errors = 0 - for _ in pairs(seen_errors) do - num_errors = num_errors + 1 - end - if num_errors >= 10 then - seen_errors[start_pos + 1] = colored.bright(colored.yellow(colored.onred("Too many errors, canceling parsing..."))) - return #src + 1 - end - local err_pos = start_pos - local line_no = files.get_line_number(src, err_pos) - local prev_line = line_no == 1 and nil or files.get_line(src, line_no - 1) - local err_line = files.get_line(src, line_no) - local next_line = files.get_line(src, line_no + 1) - local i = err_pos - files.get_line_starts(src)[line_no] - local j = i + (end_pos - start_pos) - local pointer = ("-"):rep(i) .. "^" - err_msg = colored.bright(colored.yellow(colored.onred((err_msg or "Parse error") .. " at " .. tostring(userdata.source.filename) .. ":" .. tostring(line_no) .. ":"))) - if prev_line then - err_msg = err_msg .. ("\n" .. colored.dim(prev_line)) - end - if err_line then - err_line = colored.white(err_line:sub(1, i)) .. colored.bright(colored.red(err_line:sub(i + 1, j + 1))) .. colored.dim(err_line:sub(j + 2, -1)) - err_msg = err_msg .. "\n" .. tostring(err_line) .. "\n" .. tostring(colored.red(pointer)) - end - if next_line then - err_msg = err_msg .. ("\n" .. colored.dim(next_line)) - end - seen_errors[start_pos] = err_msg - return true - end - NOMSU_DEFS = _with_0 -end -setmetatable(NOMSU_DEFS, { - __index = function(self, key) - local make_node - make_node = function(start, value, stop, userdata) - if userdata.source then - do - local _with_0 = userdata.source - value.source = Source(_with_0.filename, _with_0.start + start - 1, _with_0.start + stop - 1) - end - end - setmetatable(value, AST[key]) - if value.__init then - value:__init() - end - return value - end - self[key] = make_node - return make_node - end -}) -local Parser = { - version = 4, - patterns = { } -} -Parser.is_operator = function(s) - return not not (NOMSU_DEFS.operator_char ^ 1 * -1):match(s) -end -Parser.is_identifier = function(s) - return not not (NOMSU_DEFS.ident_char ^ 1 * -1):match(s) -end -local inline_escaper = re.compile("{~ (%utf8_char / ('\"' -> '\\\"') / ('\n' -> '\\n') / ('\t' -> '\\t') / ('\b' -> '\\b') / ('\a' -> '\\a') / ('\v' -> '\\v') / ('\f' -> '\\f') / ('\r' -> '\\r') / ('\\' -> '\\\\') / ([^ -~] -> escape) / .)* ~}", { - utf8_char = NOMSU_DEFS.utf8_char, - escape = (function(self) - return ("\\%03d"):format(self:byte()) - end) -}) -Parser.inline_escape = function(s) - return inline_escaper:match(s) -end -local escaper = re.compile("{~ (%utf8_char / ('\\' -> '\\\\') / [\n\r\t -~] / (. -> escape))* ~}", { - utf8_char = NOMSU_DEFS.utf8_char, - escape = (function(self) - return ("\\%03d"):format(self:byte()) - end) -}) -Parser.escape = function(s) - return escaper:match(s) -end -return Parser diff --git a/parser.moon b/parser.moon deleted file mode 100644 index 2adfe1b..0000000 --- a/parser.moon +++ /dev/null @@ -1,92 +0,0 @@ --- This file contains the parser, which converts Nomsu text into abstract syntax trees -lpeg = require 'lpeg' -re = require 're' -lpeg.setmaxstack 20000 -{:P,:R,:S,:C,:Cmt,:Carg} = lpeg -{:match, :sub} = string -{:insert, :remove} = table -files = require 'files' -{:NomsuCode, :LuaCode, :Source} = require "code_obj" -AST = require "syntax_tree" - -NOMSU_DEFS = with {} - -- Newline supports either windows-style CR+LF or unix-style LF - .nl = P("\r")^-1 * P("\n") - .ws = S(" \t") - .tonumber = tonumber - .table = -> {} - .unpack = unpack or table.unpack - string_escapes = n:"\n", t:"\t", b:"\b", a:"\a", v:"\v", f:"\f", r:"\r" - digit, hex = R('09'), R('09','af','AF') - .escaped_char = (P("\\")*S("xX")*C(hex*hex)) / => string.char(tonumber(@, 16)) - .escaped_char += (P("\\")*C(digit*(digit^-2))) / => string.char(tonumber @) - .escaped_char += (P("\\")*C(S("ntbavfr"))) / string_escapes - .operator_char = S("'`~!@$^&*+=|<>?/-") - .utf8_char = ( - R("\194\223")*R("\128\191") + - R("\224\239")*R("\128\191")*R("\128\191") + - R("\240\244")*R("\128\191")*R("\128\191")*R("\128\191")) - .ident_char = R("az","AZ","09") + P("_") + .utf8_char - - .userdata = Carg(1) - - .add_comment = (src,end_pos,start_pos,comment,userdata)-> - userdata.comments[start_pos] = comment - return true - - .error = (src,end_pos,start_pos,err_msg,userdata)-> - seen_errors = userdata.errors - if seen_errors[start_pos] - return true - num_errors = 0 - for _ in pairs(seen_errors) do num_errors += 1 - if num_errors >= 10 - seen_errors[start_pos+1] = colored.bright colored.yellow colored.onred "Too many errors, canceling parsing..." - return #src+1 - err_pos = start_pos - line_no = files.get_line_number(src, err_pos) - --src = files.read(userdata.source.filename) - prev_line = line_no == 1 and nil or files.get_line(src, line_no-1) - err_line = files.get_line(src, line_no) - next_line = files.get_line(src, line_no+1) - i = err_pos-files.get_line_starts(src)[line_no] - j = i + (end_pos-start_pos) - pointer = ("-")\rep(i) .. "^" - err_msg = colored.bright colored.yellow colored.onred (err_msg or "Parse error").." at #{userdata.source.filename}:#{line_no}:" - if prev_line then err_msg ..= "\n"..colored.dim(prev_line) - if err_line - err_line = colored.white(err_line\sub(1, i))..colored.bright(colored.red(err_line\sub(i+1,j+1)))..colored.dim(err_line\sub(j+2,-1)) - err_msg ..= "\n#{err_line}\n#{colored.red pointer}" - if next_line then err_msg ..= "\n"..colored.dim(next_line) - seen_errors[start_pos] = err_msg - return true - -setmetatable(NOMSU_DEFS, {__index:(key)=> - make_node = (start, value, stop, userdata)-> - if userdata.source - with userdata.source - value.source = Source(.filename, .start + start-1, .start + stop-1) - setmetatable(value, AST[key]) - if value.__init then value\__init! - return value - - self[key] = make_node - return make_node -}) - -Parser = {version:4, patterns:{}} - -Parser.is_operator = (s)-> - return not not (NOMSU_DEFS.operator_char^1 * -1)\match(s) - -Parser.is_identifier = (s)-> - return not not (NOMSU_DEFS.ident_char^1 * -1)\match(s) - -inline_escaper = re.compile "{~ (%utf8_char / ('\"' -> '\\\"') / ('\n' -> '\\n') / ('\t' -> '\\t') / ('\b' -> '\\b') / ('\a' -> '\\a') / ('\v' -> '\\v') / ('\f' -> '\\f') / ('\r' -> '\\r') / ('\\' -> '\\\\') / ([^ -~] -> escape) / .)* ~}", {utf8_char: NOMSU_DEFS.utf8_char, escape:(=> ("\\%03d")\format(@byte!))} -Parser.inline_escape = (s)-> - return inline_escaper\match(s) -escaper = re.compile "{~ (%utf8_char / ('\\' -> '\\\\') / [\n\r\t -~] / (. -> escape))* ~}", {utf8_char: NOMSU_DEFS.utf8_char, escape:(=> ("\\%03d")\format(@byte!))} -Parser.escape = (s)-> - return escaper\match(s) - -return Parser