diff --git a/Makefile b/Makefile index 3b96414..388f411 100644 --- a/Makefile +++ b/Makefile @@ -11,9 +11,11 @@ UNINSTALL_VERSION= # ========= You shouldn't need to mess with any of these variables below ================ MOON_FILES= code_obj.moon error_handling.moon files.moon nomsu.moon nomsu_compiler.moon \ - syntax_tree.moon parser.moon containers.moon bitops.moon + syntax_tree.moon parser.moon containers.moon bitops.moon \ + parser2.moon pretty_errors.moon string2.moon LUA_FILES= code_obj.lua consolecolors.lua error_handling.lua files.lua nomsu.lua nomsu_compiler.lua \ - syntax_tree.lua parser.lua containers.lua bitops.lua utils.lua + syntax_tree.lua parser.lua containers.lua bitops.lua utils.lua \ + parser2.lua pretty_errors.lua string2.lua CORE_NOM_FILES= $(wildcard core/*.nom) CORE_LUA_FILES= $(patsubst %.nom,%.lua,$(CORE_NOM_FILES)) LIB_NOM_FILES= $(wildcard lib/*.nom) diff --git a/containers.lua b/containers.lua index 865098d..1b3fd94 100644 --- a/containers.lua +++ b/containers.lua @@ -264,83 +264,69 @@ for i, entry in ipairs(Dict({ })) do assert(i == 1 and entry.key == "x" and entry.value == 99, "ipairs compatibility issue") end -local Text do local reverse, upper, lower, find, byte, match, gmatch, gsub, sub, format, rep do local _obj_0 = string reverse, upper, lower, find, byte, match, gmatch, gsub, sub, format, rep = _obj_0.reverse, _obj_0.upper, _obj_0.lower, _obj_0.find, _obj_0.byte, _obj_0.match, _obj_0.gmatch, _obj_0.gsub, _obj_0.sub, _obj_0.format, _obj_0.rep end - local as_lua_id - as_lua_id = function(str) - str = gsub(str, "^\3*$", "%1\3") - str = gsub(str, "x([0-9A-F][0-9A-F])", "x78%1") - str = gsub(str, "%W", function(c) - if c == ' ' then - return '_' - else - return format("x%02X", byte(c)) - end - end) - str = gsub(str, "^_*%d", "_%1") - return str - end - local line_matcher = re.compile([[ - lines <- {| line (%nl line)* |} - line <- {(!%nl .)*} - ]], { - nl = lpeg.P("\r") ^ -1 * lpeg.P("\n") - }) + local string2 = require('string2') + local lines, line, line_at, as_lua_id + lines, line, line_at, as_lua_id = string2.lines, string2.line, string2.line_at, string2.as_lua_id local text_methods = { - reversed = function(self) - return reverse(tostring(self)) - end, - uppercase = function(self) - return upper(tostring(self)) - end, - lowercase = function(self) - return lower(tostring(self)) - end, - as_lua_id = function(self) - return as_lua_id(tostring(self)) - end, - formatted_with_1 = function(self, args) - return format(tostring(self), unpack(args)) - end, - byte_1 = function(self, i) - return byte(tostring(self), i) - end, - position_of_1 = function(self) - return find(tostring(self)) - end, - position_of_1_after_2 = function(self, i) - return find(tostring(self), i) - end, + formatted_with_1 = format, + byte_1 = byte, + position_of_1 = find, + position_of_1_after_2 = find, bytes_1_to_2 = function(self, start, stop) return List({ byte(tostring(self), start, stop) }) end, + [as_lua_id("with 1 -> 2")] = gsub, bytes = function(self) return List({ - byte(tostring(self), 1, #self) + byte(tostring(self), 1, -1) }) end, - capitalized = function(self) - return gsub(tostring(self), '%l', upper, 1) - end, lines = function(self) - return List(line_matcher:match(self)) + return List(lines(self)) + end, + line_1 = line, + wrap_to_1 = function(self, maxlen) + local _lines = { } + local _list_0 = self:lines() + for _index_0 = 1, #_list_0 do + local line = _list_0[_index_0] + while #line > maxlen do + local chunk = line:sub(1, maxlen) + local split = chunk:find(' ', maxlen - 8) or maxlen + chunk = line:sub(1, split) + line = line:sub(split + 1, -1) + _lines[#_lines + 1] = chunk + end + _lines[#_lines + 1] = line + end + return table.concat(_lines, "\n") + end, + line_at_1 = function(self, i) + return (line_at(self, i)) + end, + line_number_of_1 = function(self, i) + return select(2, line_at(self, i)) + end, + line_position_of_1 = function(self, i) + return select(3, line_at(self, i)) end, matches_1 = function(self, patt) - return match(tostring(self), patt) and true or false + return match(self, patt) and true or false end, [as_lua_id("* 1")] = function(self, n) return rep(self, n) end, matching_1 = function(self, patt) local result = { } - local stepper, x, i = gmatch(tostring(self), patt) + local stepper, x, i = gmatch(self, patt) while true do local tmp = List({ stepper(x, i) @@ -352,23 +338,10 @@ do result[#result + 1] = tmp end return List(result) - end, - [as_lua_id("with 1 -> 2")] = function(self, patt, sub) - return gsub(tostring(self), patt, sub) - end, - _coalesce = function(self) - if rawlen(self) > 1 then - local s = table.concat(self) - for i = rawlen(self), 2, -1 do - self[i] = nil - end - self[1] = s - end - return self end } setmetatable(text_methods, { - __index = string + __index = string2 }) getmetatable("").__index = function(self, i) if type(i) == 'number' then @@ -382,6 +355,5 @@ do end return { List = List, - Dict = Dict, - Text = Text + Dict = Dict } diff --git a/containers.moon b/containers.moon index 243e129..eaf007a 100644 --- a/containers.moon +++ b/containers.moon @@ -106,68 +106,45 @@ Dict = (t)-> setmetatable(t, _dict_mt) for i,entry in ipairs(Dict({x:99})) assert(i == 1 and entry.key == "x" and entry.value == 99, "ipairs compatibility issue") -local Text do {:reverse, :upper, :lower, :find, :byte, :match, :gmatch, :gsub, :sub, :format, :rep} = string - - -- Convert an arbitrary text into a valid Lua identifier. This function is injective, - -- but not idempotent, i.e. if (x != y) then (as_lua_id(x) != as_lua_id(y)), - -- but as_lua_id(x) is not necessarily equal to as_lua_id(as_lua_id(x)) - as_lua_id = (str)-> - -- Empty strings are not valid lua identifiers, so treat them like "\3", - -- and treat "\3" as "\3\3", etc. to preserve injectivity. - str = gsub str, "^\3*$", "%1\3" - -- Escape 'x' when it precedes something that looks like an uppercase hex sequence. - -- This way, all Lua IDs can be unambiguously reverse-engineered, but normal usage - -- of 'x' won't produce ugly Lua IDs. - -- i.e. "x" -> "x", "oxen" -> "oxen", but "Hex2Dec" -> "Hex782Dec" and "He-ec" -> "Hex2Dec" - str = gsub str, "x([0-9A-F][0-9A-F])", "x78%1" - -- Map spaces to underscores, and everything else non-alphanumeric to hex escape sequences - str = gsub str, "%W", (c)-> - if c == ' ' then '_' - else format("x%02X", byte(c)) - -- Lua IDs can't start with numbers, so map "1" -> "_1", "_1" -> "__1", etc. - str = gsub str, "^_*%d", "_%1" - return str - - line_matcher = re.compile([[ - lines <- {| line (%nl line)* |} - line <- {(!%nl .)*} - ]], nl:lpeg.P("\r")^-1 * lpeg.P("\n")) - + string2 = require 'string2' + {:lines, :line, :line_at, :as_lua_id} = string2 text_methods = - reversed:=>reverse(tostring @) - uppercase:=>upper(tostring @) - lowercase:=>lower(tostring @) - as_lua_id:=>as_lua_id(tostring @) - formatted_with_1:(args)=>format(tostring(@), unpack(args)) - byte_1:(i)=>byte(tostring(@), i) - position_of_1:=>find(tostring @), - position_of_1_after_2:(i)=> find(tostring(@), i) + formatted_with_1:format, byte_1:byte, position_of_1:find, position_of_1_after_2:find, bytes_1_to_2: (start, stop)=> List{byte(tostring(@), start, stop)} - bytes: => List{byte(tostring(@), 1, #@)}, - capitalized: => gsub(tostring(@), '%l', upper, 1) - lines: => List(line_matcher\match(@)) - matches_1: (patt)=> match(tostring(@), patt) and true or false + [as_lua_id "with 1 -> 2"]: gsub + bytes: => List{byte(tostring(@), 1, -1)}, + lines: => List(lines(@)) + line_1: line + wrap_to_1: (maxlen)=> + _lines = {} + for line in *@lines! + while #line > maxlen + chunk = line\sub(1, maxlen) + split = chunk\find(' ', maxlen-8) or maxlen + chunk = line\sub(1, split) + line = line\sub(split+1, -1) + _lines[#_lines+1] = chunk + _lines[#_lines+1] = line + return table.concat(_lines, "\n") + + line_at_1: (i)=> (line_at(@, i)) + line_number_of_1: (i)=> select(2, line_at(@, i)) + line_position_of_1: (i)=> select(3, line_at(@, i)) + matches_1: (patt)=> match(@, patt) and true or false [as_lua_id "* 1"]: (n)=> rep(@, n) matching_1: (patt)=> result = {} - stepper,x,i = gmatch(tostring(@), patt) + stepper,x,i = gmatch(@, patt) while true tmp = List{stepper(x,i)} break if #tmp == 0 i = tmp[1] result[#result+1] = tmp return List(result) - [as_lua_id "with 1 -> 2"]: (patt, sub)=> gsub(tostring(@), patt, sub) - _coalesce: => - if rawlen(@) > 1 - s = table.concat(@) - for i=rawlen(@), 2, -1 do @[i] = nil - @[1] = s - return @ - setmetatable(text_methods, {__index:string}) + setmetatable(text_methods, {__index:string2}) getmetatable("").__index = (i)=> -- Use [] for accessing text characters, or s[{3,4}] for s:sub(3,4) @@ -175,4 +152,4 @@ do elseif type(i) == 'table' then return sub(@, i[1], i[2]) else return text_methods[i] -return {:List, :Dict, :Text} +return {:List, :Dict} diff --git a/nomsu.4.peg b/nomsu.4.peg new file mode 100644 index 0000000..ed119c5 --- /dev/null +++ b/nomsu.4.peg @@ -0,0 +1,208 @@ +-- Nomsu version 4 +file: + {:curr_indent: ' '* :} + (((action / expression / inline_block / indented_block) eol !.) + / file_chunks / empty_block) + {:curr_indent: %nil :} + !. + +shebang: "#!" (!"nomsu" [^%nl])* "nomsu" ws+ "-V" ws* {:version: [0-9.]+ :} [^%nl]* + +file_chunks (FileChunks): + {:curr_indent: ' '* :} + shebang? comment? blank_lines? + (top_block (section_division nl_nodent top_block)*) + blank_lines? + ws* unexpected_chunk? + {:curr_indent: %nil :} + +top_block (Block): + {:curr_indent: ' '* :} + comment? blank_lines? statement (nl_nodent statement?)* + {:curr_indent: %nil :} + +empty_block (Block): + {:curr_indent: ' '* :} + comment? blank_lines? + {:curr_indent: %nil :} + +nodent: (unexpected_indent [^%nl]* / =curr_indent) +indent: =curr_indent (" ") +blank_lines: %nl ((nodent comment / ws*) %nl)* +eol: ws* eol_comment? (!. / &%nl) + +nl_nodent: blank_lines nodent +nl_indent: blank_lines {:curr_indent: indent :} (comment nl_nodent)* + +comment (Comment): + "#" {~ [^%nl]* (%nl+ (indent -> '') [^%nl]*)* ~} +eol_comment (Comment): + "#" {[^%nl]*} + +unexpected_code: ws* _unexpected_code +_unexpected_code (Error): + {:error: {~ [^%nl]+ -> "Couldn't parse this code" ~} :} +unexpected_chunk (Error): + {:error: {~ .+ -> "Couldn't parse this code" ~} :} +unexpected_indent (Error): + {:error: {~ (=curr_indent ws+) -> "Messed up indentation" ~} :} + {:hint: {~ '' -> 'Either make sure this line is aligned with the one above it, or make sure the previous line ends with something that uses indentation, like ":" or "(..)"' ~} :} +missing_paren_err (Error): + {:error: {~ eol -> 'Line ended without finding a closing )-parenthesis' ~} :} + {:hint: {~ '' -> 'Put a ")" here' ~} :} +missing_quote_err (Error): + {:error: {~ eol -> 'Line ended before finding a closing double quotation mark' ~} :} + {:hint: {~ "" -> "Put a quotation mark here" ~} :} +missing_bracket_error (Error): + {:error: {~ eol -> "Line ended before finding a closing ]-bracket" ~} :} + {:hint: {~ '' -> 'Put a "]" here' ~} :} +missing_brace_error (Error): + {:error: {~ eol -> "Line ended before finding a closing }-brace" ~} :} + {:hint: {~ '' -> 'Put a "}" here' ~} :} + +section_division: ("~")^+3 eol + +inline_block: + "(" ws* inline_block ws* ")" / raw_inline_block +raw_inline_block (Block): + (!"::") ":" ws* ((inline_statement (ws* ";" ws* inline_statement)*) / !(eol nl_indent)) +indented_block (Block): + ":" eol nl_indent statement (nl_nodent statement?)* + (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* + {:curr_indent: %nil :} + +statement: + (action / expression) (eol / unexpected_code) + +inline_statement: (inline_action / inline_expression) + +noindex_inline_expression: + number / variable / inline_text / inline_list / inline_dict / inline_nomsu + / ( "(" + ws* (inline_action / inline_expression) ws* + (ws* ',' ws* (inline_action / inline_expression) ws*)* + (")" / missing_paren_err / unexpected_code) + ) +inline_expression: index_chain / noindex_inline_expression +indented_expression: + cool_indented_text / indented_text / indented_nomsu / indented_list / indented_dict / ({| + "(..)" nl_indent + (action / expression) (eol / unexpected_code) + (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* + {:curr_indent: %nil :} + |} -> unpack) +expression: + inline_expression / indented_expression + +inline_nomsu (EscapedNomsu): "\" (inline_expression / inline_block) +indented_nomsu (EscapedNomsu): + "\" (noindex_inline_expression / inline_block / indented_expression / indented_block) + +index_chain (IndexChain): + noindex_inline_expression ("." (text_word / noindex_inline_expression))+ + +-- Actions need either at least 1 word, or at least 2 tokens +inline_action (Action): + !section_division + ({:target: inline_arg :} ws* "::" ws*)? + ( (inline_arg (ws* (inline_arg / word))+) + / (word (ws* (inline_arg / word))*)) + (ws* inline_block)? +inline_arg: inline_expression / inline_block +action (Action): + !section_division + ({:target: arg :} (nl_nodent "..")? ws* "::" (nl_nodent "..")? ws*)? + ( (arg ((nl_nodent "..")? ws* (arg / word))+) + / (word ((nl_nodent "..")? ws* (arg / word))*)) +arg: expression / inline_block / indented_block + +word: !number { operator_char+ / ident_char+ } + +text_word (Text): word + +inline_text (Text): + !(cool_indented_text / indented_text) + ('"' _inline_text* ('"' / missing_quote_err / unexpected_code)) +_inline_text: + {~ (('\"' -> '"') / ('\\' -> '\') / escaped_char / [^%nl\"])+ ~} + / inline_text_interpolation +inline_text_interpolation: + "\" ( + variable / inline_list / inline_dict / inline_text + / ("(" + ws* (inline_action / inline_expression) ws* + (ws* ',' ws* (inline_action / inline_expression) ws*)* + (")" / missing_paren_err / unexpected_code)) + ) + +indented_text (Text): + '".."' eol %nl {%nl+}? {:curr_indent: indent :} + (indented_plain_text / text_interpolation / {~ %nl+ (=curr_indent -> "") ~})* + unexpected_code? + {:curr_indent: %nil :} +cool_indented_text (Text): + ({| + '"' _inline_text* '\' %nl {:curr_indent: indent :} '..' + (indented_plain_text / text_interpolation / {~ %nl+ (=curr_indent -> "") ~})* + unexpected_code? + |} -> unpack) + ({(%nl &%nl)+}? %nl =curr_indent '..' _inline_text* '"')? +-- Tracking text-lines-within-indented-text as separate objects allows for better debugging line info +indented_plain_text (Text): + {~ (("\\" -> "\") / (("\" blank_lines =curr_indent "..") -> "") / (!text_interpolation "\") / [^%nl\]+)+ + (%nl+ (=curr_indent -> ""))* ~} +text_interpolation: + inline_text_interpolation / ("\" indented_expression (blank_lines =curr_indent "..")?) + +number (Number): (("-"? (([0-9]+ "." [0-9]+) / ("." [0-9]+) / "0x" [0-9a-fA-F]+ / ([0-9]+)))-> tonumber) + +-- Variables can be nameless (i.e. just %) and can only contain identifier chars. +-- This ensures you don't get weird parsings of `%x+%y` or `%'s thing`. +variable (Var): "%" {ident_char*} + +inline_list (List): + !('[..]') + "[" ws* + (inline_list_item (ws* ',' ws* inline_list_item)* (ws* ',')?)? ws* + ("]" / (","? (missing_bracket_error / unexpected_code))) +indented_list (List): + "[..]" eol nl_indent + list_line (nl_nodent list_line?)* + (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* + (","? unexpected_code)? +list_line: + (inline_list_item ws* "," ws*)+ eol + / (inline_list_item ws* "," ws*)* (action / expression) eol +inline_list_item: inline_action / inline_expression + +inline_dict (Dict): + !('{..}') + "{" ws* + (inline_dict_entry (ws* ',' ws* inline_dict_entry)*)? ws* + ("}" / (","? (missing_brace_error / unexpected_code))) +indented_dict (Dict): + "{..}" eol nl_indent + dict_line (nl_nodent dict_line?)* + (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* + (","? unexpected_code)? +dict_line: + (inline_dict_entry ws* "," ws*)+ eol + / (inline_dict_entry ws* "," ws*)* dict_entry eol +dict_entry(DictEntry): + dict_key (ws* ":" ws* (action / expression))? +inline_dict_entry(DictEntry): + dict_key (ws* ":" ws* (inline_action / inline_expression)?)? +dict_key: + text_word / inline_expression + +operator_char: ['`~!@$^&*+=|<>?/-] +ident_char: [a-zA-Z0-9_] / %utf8_char +ws: [ %tab] + +escaped_char: + ("\"->'') ( + (([xX]->'') ((({[0-9a-fA-F]^2} %number_16) -> tonumber) -> tochar)) + / ((([0-9] [0-9]^-2) -> tonumber) -> tochar) + / ("a"->ascii_7) / ("b"->ascii_8) / ("t"->ascii_9) / ("n"->ascii_10) + / ("v"->ascii_11) / ("f"->ascii_12) / ("r"->ascii_13) + ) diff --git a/nomsu.lua b/nomsu.lua index 24ca589..0a0dce1 100644 --- a/nomsu.lua +++ b/nomsu.lua @@ -59,7 +59,7 @@ else end local usage = [=[Nomsu Compiler -Usage: (nomsu | lua nomsu.lua | moon nomsu.moon) [-V version] [-O optimization level] [-v] [-c] [-s] [-t] [-I file] [--help | -h] [--version] [--no-core] [file [nomsu args...]] +Usage: (nomsu | lua nomsu.lua | moon nomsu.moon) [-V version] [-O optimization level] [-v] [-c] [-s] [-I file] [--help | -h] [--version] [--no-core] [file [nomsu args...]] OPTIONS -O Run the compiler with the given optimization level (>0: use precompiled .lua versions of Nomsu files, when available). @@ -98,29 +98,24 @@ if not arg or debug.getinfo(2).func == require then end local file_queue = { } local sep = "\3" -local parser = re.compile([[ args <- {| (flag %sep)* (({~ file ~} -> add_file) {:primary_file: '' -> true :} %sep)? +local parser = re.compile([[ args <- {| (flag %sep)* (({~ file ~} -> add_file) {:primary_file: %true :} %sep)? {:nomsu_args: {| ({(!%sep .)*} %sep)* |} :} %sep? |} !. flag <- - {:optimization: "-O" (%sep? (([0-9]+) -> tonumber))? :} + {:optimization: "-O" (%sep? %number)? :} / ("-I" %sep? ({~ file ~} -> add_file)) - / ("-e" %sep? (({} {~ file ~}) -> add_exec_string) {:exec_strings: '' -> true :}) - / ({:check_syntax: ("-s" -> true):}) - / ({:compile: ("-c" -> true):}) - / ({:compile: ("-c" -> true):}) - / {:verbose: ("-v" -> true) :} - / {:help: (("-h" / "--help") -> true) :} - / {:version: ("--version" -> true) :} - / {:no_core: ("--no-core" -> true) :} + / ("-e" %sep? (({} {~ file ~}) -> add_exec_string) {:exec_strings: %true :}) + / ({:check_syntax: "-s" %true:}) + / ({:compile: "-c" %true:}) + / {:verbose: "-v" %true :} + / {:help: ("-h" / "--help") %true :} + / {:version: "--version" %true :} + / {:no_core: "--no-core" %true :} / {:debugger: ("-d" %sep? {(!%sep .)*}) :} / {:requested_version: "-V" (%sep? {([0-9.])+})? :} file <- ("-" -> "stdin") / {(!%sep .)+} ]], { - ["true"] = (function() - return true - end), - tonumber = (function(self) - return tonumber(self) - end), + ["true"] = lpeg.Cc(true), + number = lpeg.R("09") ^ 1 / tonumber, sep = lpeg.P(sep), add_file = function(f) return table.insert(file_queue, f) @@ -255,7 +250,7 @@ run = function() _continue_0 = true break end - nomsu:parse(file, source) + local tree = nomsu:parse(file, source) print("Parse succeeded: " .. tostring(filename)) end if args.compile then diff --git a/nomsu.moon b/nomsu.moon index 7a8ff34..0c989e6 100755 --- a/nomsu.moon +++ b/nomsu.moon @@ -20,7 +20,7 @@ else usage = [=[ Nomsu Compiler -Usage: (nomsu | lua nomsu.lua | moon nomsu.moon) [-V version] [-O optimization level] [-v] [-c] [-s] [-t] [-I file] [--help | -h] [--version] [--no-core] [file [nomsu args...]] +Usage: (nomsu | lua nomsu.lua | moon nomsu.moon) [-V version] [-O optimization level] [-v] [-c] [-s] [-I file] [--help | -h] [--version] [--no-core] [file [nomsu args...]] OPTIONS -O Run the compiler with the given optimization level (>0: use precompiled .lua versions of Nomsu files, when available). @@ -57,24 +57,23 @@ if not arg or debug.getinfo(2).func == require file_queue = {} sep = "\3" parser = re.compile([[ - args <- {| (flag %sep)* (({~ file ~} -> add_file) {:primary_file: '' -> true :} %sep)? + args <- {| (flag %sep)* (({~ file ~} -> add_file) {:primary_file: %true :} %sep)? {:nomsu_args: {| ({(!%sep .)*} %sep)* |} :} %sep? |} !. flag <- - {:optimization: "-O" (%sep? (([0-9]+) -> tonumber))? :} + {:optimization: "-O" (%sep? %number)? :} / ("-I" %sep? ({~ file ~} -> add_file)) - / ("-e" %sep? (({} {~ file ~}) -> add_exec_string) {:exec_strings: '' -> true :}) - / ({:check_syntax: ("-s" -> true):}) - / ({:compile: ("-c" -> true):}) - / ({:compile: ("-c" -> true):}) - / {:verbose: ("-v" -> true) :} - / {:help: (("-h" / "--help") -> true) :} - / {:version: ("--version" -> true) :} - / {:no_core: ("--no-core" -> true) :} + / ("-e" %sep? (({} {~ file ~}) -> add_exec_string) {:exec_strings: %true :}) + / ({:check_syntax: "-s" %true:}) + / ({:compile: "-c" %true:}) + / {:verbose: "-v" %true :} + / {:help: ("-h" / "--help") %true :} + / {:version: "--version" %true :} + / {:no_core: "--no-core" %true :} / {:debugger: ("-d" %sep? {(!%sep .)*}) :} / {:requested_version: "-V" (%sep? {([0-9.])+})? :} file <- ("-" -> "stdin") / {(!%sep .)+} ]], { - true:(-> true), tonumber:(=>tonumber(@)), sep:lpeg.P(sep) + true:lpeg.Cc(true), number:lpeg.R("09")^1/tonumber, sep:lpeg.P(sep) add_file: (f)-> table.insert(file_queue, f) add_exec_string: (pos, s)-> name = "command line arg @#{pos}.nom" @@ -165,7 +164,7 @@ run = -> -- Check syntax file, source = get_file_and_source(filename) continue unless file - nomsu\parse(file, source) + tree = nomsu\parse(file, source) print("Parse succeeded: #{filename}") if args.compile diff --git a/nomsu_compiler.lua b/nomsu_compiler.lua index e6462b6..510de16 100644 --- a/nomsu_compiler.lua +++ b/nomsu_compiler.lua @@ -35,6 +35,7 @@ do end local AST = require("syntax_tree") local Parser = require("parser") +local make_parser = require("parser2") SOURCE_MAP = { } table.map = function(t, fn) return setmetatable((function() @@ -61,6 +62,69 @@ table.copy = function(t) return _tbl_0 end)(), getmetatable(t)) end +local Parsers = { } +local max_parser_version = 0 +for version = 1, 999 do + local _continue_0 = false + repeat + if not (version == 4) then + _continue_0 = true + break + end + local peg_file = io.open("nomsu." .. tostring(version) .. ".peg") + if not peg_file and package.nomsupath then + for path in package.nomsupath:gmatch("[^;]+") do + peg_file = io.open(path .. "/nomsu." .. tostring(version) .. ".peg") + if peg_file then + break + end + end + end + if not (peg_file) then + break + end + max_parser_version = version + local make_tree + make_tree = function(tree, userdata) + local cls = AST[tree.type] + tree.source = Source(userdata.filename, tree.start, tree.stop) + tree.start, tree.stop = nil, nil + tree.type = nil + do + local _accum_0 = { } + local _len_0 = 1 + for _index_0 = 1, #tree do + local t = tree[_index_0] + if AST.is_syntax_tree(t, "Comment") then + _accum_0[_len_0] = t + _len_0 = _len_0 + 1 + end + end + tree.comments = _accum_0 + end + if #tree.comments == 0 then + tree.comments = nil + end + for i = #tree, 1, -1 do + if AST.is_syntax_tree(tree[i], "Comment") then + table.remove(tree, i) + end + end + tree = setmetatable(tree, cls) + cls.source_code_for_tree[tree] = userdata.source + if tree.__init then + tree:__init() + end + return tree + end + Parsers[version] = make_parser(peg_file:read("*a"), make_tree) + peg_file:close() + _continue_0 = true + until true + if not _continue_0 then + break + end +end local MAX_LINE = 80 local NomsuCompiler = setmetatable({ name = "Nomsu" @@ -79,12 +143,68 @@ local NomsuCompiler = setmetatable({ return self.name end }) +local _anon_chunk = 0 do NomsuCompiler.NOMSU_COMPILER_VERSION = 7 - NomsuCompiler.NOMSU_SYNTAX_VERSION = Parser.version + NomsuCompiler.NOMSU_SYNTAX_VERSION = max_parser_version NomsuCompiler.nomsu = NomsuCompiler - NomsuCompiler.parse = function(self, ...) - return Parser.parse(...) + NomsuCompiler.parse = function(self, nomsu_code, source, version) + if source == nil then + source = nil + end + if version == nil then + version = nil + end + source = source or nomsu_code.source + nomsu_code = tostring(nomsu_code) + if not (source) then + source = Source("anonymous chunk #" .. tostring(_anon_chunk), 1, #nomsu_code) + _anon_chunk = _anon_chunk + 1 + end + version = version or nomsu_code:match("^#![^\n]*nomsu[ ]+-V[ ]*([0-9.]+)") + local syntax_version = version and tonumber(version:match("^[0-9]+")) or max_parser_version + local parse = Parsers[syntax_version] or Parsers[max_parser_version] + local tree = parse(nomsu_code, source.filename) + local pretty_error = require("pretty_errors") + local find_errors + find_errors = function(t) + if t.type == "Error" then + return pretty_error({ + error = t.error, + hint = t.hint, + source = t:get_source_code(), + start = t.source.start, + stop = t.source.stop + }) + end + local errs = "" + for k, v in pairs(t) do + local _continue_0 = false + repeat + if not (AST.is_syntax_tree(v)) then + _continue_0 = true + break + end + local err = find_errors(v) + if #err > 0 then + if #errs > 0 then + errs = errs .. "\n\n" + end + errs = errs .. err + end + _continue_0 = true + until true + if not _continue_0 then + break + end + end + return errs + end + local errs = find_errors(tree) + if #errs > 0 then + error(errs, 0) + end + return tree end NomsuCompiler.can_optimize = function() return false @@ -699,6 +819,10 @@ do return LuaCode.Value(tree.source, (tree[1]):as_lua_id()) elseif "FileChunks" == _exp_0 then return error("Cannot convert FileChunks to a single block of lua, since each chunk's " .. "compilation depends on the earlier chunks") + elseif "Comment" == _exp_0 then + return LuaCode(tree.source, "") + elseif "Error" == _exp_0 then + return error("Cannot compile errors") else return error("Unknown type: " .. tostring(tree.type)) end @@ -726,6 +850,10 @@ do local _exp_0 = tree.type if "FileChunks" == _exp_0 then return error("Cannot inline a FileChunks") + elseif "Comment" == _exp_0 then + return NomsuCode(tree.source, "") + elseif "Error" == _exp_0 then + return error("Cannot compile errors") elseif "Action" == _exp_0 then local nomsu = NomsuCode(tree.source) if tree.target then @@ -950,15 +1078,8 @@ do end local space = MAX_LINE - pos local inline - local check - check = function(prefix, nomsu, tree) - if type(tree) == 'number' then - require('ldt').breakpoint() - end - return coroutine.yield(prefix, nomsu, tree) - end for prefix, nomsu, tree in coroutine.wrap(function() - inline = self:tree_to_inline_nomsu(t, false, check) + inline = self:tree_to_inline_nomsu(t, false, coroutine.yield) end) do local len = #tostring(nomsu) if prefix + len > MAX_LINE then @@ -1197,7 +1318,7 @@ do end nomsu:append(": ", recurse(value, #tostring(nomsu))) return nomsu - elseif "IndexChain" == _exp_0 or "Number" == _exp_0 or "Var" == _exp_0 then + elseif "IndexChain" == _exp_0 or "Number" == _exp_0 or "Var" == _exp_0 or "Comment" == _exp_0 or "Error" == _exp_0 then return self:tree_to_inline_nomsu(tree) else return error("Unknown type: " .. tostring(tree.type)) diff --git a/nomsu_compiler.moon b/nomsu_compiler.moon index f388c2d..4300327 100644 --- a/nomsu_compiler.moon +++ b/nomsu_compiler.moon @@ -24,6 +24,7 @@ unpack or= table.unpack {:NomsuCode, :LuaCode, :Source} = require "code_obj" AST = require "syntax_tree" Parser = require("parser") +make_parser = require("parser2") -- Mapping from source string (e.g. "@core/metaprogramming.nom[1:100]") to a mapping -- from lua line number to nomsu line number export SOURCE_MAP @@ -37,15 +38,74 @@ table.copy = (t)-> setmetatable({k,v for k,v in pairs(t)}, getmetatable(t)) -- consider non-linear codegen, rather than doing thunks for things like comprehensions -- Re-implement nomsu-to-lua comment translation? +Parsers = {} +max_parser_version = 0 +for version=1,999 + continue unless version == 4 -- TODO: remove + peg_file = io.open("nomsu.#{version}.peg") + if not peg_file and package.nomsupath + for path in package.nomsupath\gmatch("[^;]+") + peg_file = io.open(path.."/nomsu.#{version}.peg") + break if peg_file + break unless peg_file + max_parser_version = version + make_tree = (tree, userdata)-> + cls = AST[tree.type] + tree.source = Source(userdata.filename, tree.start, tree.stop) + tree.start, tree.stop = nil, nil + tree.type = nil + tree.comments = [t for t in *tree when AST.is_syntax_tree(t, "Comment")] + if #tree.comments == 0 then tree.comments = nil + for i=#tree,1,-1 + if AST.is_syntax_tree(tree[i], "Comment") + table.remove(tree, i) + tree = setmetatable(tree, cls) + cls.source_code_for_tree[tree] = userdata.source + if tree.__init then tree\__init! + return tree + Parsers[version] = make_parser(peg_file\read("*a"), make_tree) + peg_file\close! + MAX_LINE = 80 -- For beautification purposes, try not to make lines much longer than this value NomsuCompiler = setmetatable {name:"Nomsu"}, __index: (k)=> if _self = rawget(@, "self") then _self[k] else nil __tostring: => @name +_anon_chunk = 0 with NomsuCompiler .NOMSU_COMPILER_VERSION = 7 - .NOMSU_SYNTAX_VERSION = Parser.version + .NOMSU_SYNTAX_VERSION = max_parser_version .nomsu = NomsuCompiler - .parse = (...)=> Parser.parse(...) + .parse = (nomsu_code, source=nil, version=nil)=> + source or= nomsu_code.source + nomsu_code = tostring(nomsu_code) + unless source + source = Source("anonymous chunk ##{_anon_chunk}", 1, #nomsu_code) + _anon_chunk += 1 + version or= nomsu_code\match("^#![^\n]*nomsu[ ]+-V[ ]*([0-9.]+)") + syntax_version = version and tonumber(version\match("^[0-9]+")) or max_parser_version + parse = Parsers[syntax_version] or Parsers[max_parser_version] + tree = parse(nomsu_code, source.filename) + pretty_error = require("pretty_errors") + -- TODO: truncate + find_errors = (t)-> + if t.type == "Error" + return pretty_error{ + error:t.error, hint:t.hint, source:t\get_source_code! + start:t.source.start, stop:t.source.stop + } + errs = "" + for k,v in pairs(t) + continue unless AST.is_syntax_tree(v) + err = find_errors(v) + if #err > 0 + if #errs > 0 then errs ..="\n\n" + errs ..= err + return errs + + errs = find_errors(tree) + if #errs > 0 + error(errs, 0) + return tree .can_optimize = -> false -- Discretionary/convenience stuff @@ -457,6 +517,13 @@ with NomsuCompiler when "FileChunks" error("Cannot convert FileChunks to a single block of lua, since each chunk's ".. "compilation depends on the earlier chunks") + + when "Comment" + -- TODO: implement? + return LuaCode(tree.source, "") + + when "Error" + error("Cannot compile errors") else error("Unknown type: #{tree.type}") @@ -467,6 +534,13 @@ with NomsuCompiler switch tree.type when "FileChunks" error("Cannot inline a FileChunks") + + when "Comment" + -- TODO: implement? + return NomsuCode(tree.source, "") + + when "Error" + error("Cannot compile errors") when "Action" nomsu = NomsuCode(tree.source) @@ -601,10 +675,7 @@ with NomsuCompiler if type(pos) != 'number' then pos = #tostring(pos)\match("[ ]*([^\n]*)$") space = MAX_LINE - pos local inline - check = (prefix,nomsu,tree)-> - if type(tree) == 'number' then require('ldt').breakpoint! - coroutine.yield(prefix,nomsu,tree) - for prefix, nomsu, tree in coroutine.wrap(-> inline = @tree_to_inline_nomsu(t, false, check)) + for prefix, nomsu, tree in coroutine.wrap(-> inline = @tree_to_inline_nomsu(t, false, coroutine.yield)) len = #tostring(nomsu) break if prefix+len > MAX_LINE break if tree.type == "Block" and (#tree > 1 or len > 20) @@ -779,7 +850,7 @@ with NomsuCompiler nomsu\append ": ", recurse(value, #tostring(nomsu)) return nomsu - when "IndexChain", "Number", "Var" + when "IndexChain", "Number", "Var", "Comment", "Error" return @tree_to_inline_nomsu tree else diff --git a/parser.lua b/parser.lua index 4662f5e..8a94314 100644 --- a/parser.lua +++ b/parser.lua @@ -47,7 +47,7 @@ do return string.char(tonumber(self)) end) _with_0.escaped_char = _with_0.escaped_char + ((P("\\") * C(S("ntbavfr"))) / string_escapes) - _with_0.operator_char = S("'`~!@$^&*-+=|<>?/") + _with_0.operator_char = S("'`~!@$^&*+=|<>?/-") _with_0.utf8_char = (R("\194\223") * R("\128\191") + R("\224\239") * R("\128\191") * R("\128\191") + R("\240\244") * R("\128\191") * R("\128\191") * R("\128\191")) _with_0.ident_char = R("az", "AZ", "09") + P("_") + _with_0.utf8_char _with_0.userdata = Carg(1) diff --git a/parser.moon b/parser.moon index 0e68b4d..f2e5b17 100644 --- a/parser.moon +++ b/parser.moon @@ -21,7 +21,7 @@ NOMSU_DEFS = with {} .escaped_char = (P("\\")*S("xX")*C(hex*hex)) / => string.char(tonumber(@, 16)) .escaped_char += (P("\\")*C(digit*(digit^-2))) / => string.char(tonumber @) .escaped_char += (P("\\")*C(S("ntbavfr"))) / string_escapes - .operator_char = S("'`~!@$^&*-+=|<>?/") + .operator_char = S("'`~!@$^&*+=|<>?/-") .utf8_char = ( R("\194\223")*R("\128\191") + R("\224\239")*R("\128\191")*R("\128\191") + diff --git a/parser2.moon b/parser2.moon new file mode 100644 index 0000000..9a2e2ff --- /dev/null +++ b/parser2.moon @@ -0,0 +1,64 @@ +-- This file contains the parser, which converts text into abstract syntax trees +lpeg = require 'lpeg' +re = require 're' +lpeg.setmaxstack 20000 +{:P,:R,:S,:C,:Cmt,:Carg,:Cc} = lpeg +{:repr} = require 'utils' + +DEFS = with {} + -- Newline supports either windows-style CR+LF or unix-style LF + .nl = P("\r")^-1 * P("\n") + .tab = P("\t") + .tonumber = tonumber + .tochar = string.char + .unpack = unpack or table.unpack + .nil = Cc(nil) + .userdata = Carg(1) + .utf8_char = ( + R("\194\223")*R("\128\191") + + R("\224\239")*R("\128\191")*R("\128\191") + + R("\240\244")*R("\128\191")*R("\128\191")*R("\128\191")) + .Tree = (t, userdata)-> userdata.make_tree(t, userdata) + +setmetatable(DEFS, {__index:(key)=> + if i = key\match("^ascii_(%d+)$") + c = string.char(tonumber(i)) + self[key] = c + return c + elseif i = key\match("^number_(%d+)$") + p = Cc(tonumber(i)) + self[key] = p + return p +}) + +-- Just for cleanliness, I put the language spec in its own file using a slightly modified +-- version of the lpeg.re syntax. +peg_tidier = re.compile [[ +file <- %nl* {~ (def/comment) (%nl+ (def/comment))* %nl* ~} +def <- anon_def / captured_def +anon_def <- + ({ident} (" "*) ":" {[^%nl]* (%nl+ " "+ [^%nl]*)*}) + -> "%1 <- %2" +captured_def <- + ({ident} (" "*) "(" {ident} ")" (" "*) ":" {[^%nl]* (%nl+ " "+ [^%nl]*)*}) + -> "%1 <- ({| {:start:{}:} %3 {:stop:{}:} {:type: (''->'%2') :} |} %%userdata) -> Tree" +ident <- [a-zA-Z_][a-zA-Z0-9_]* +comment <- "--" [^%nl]* +]] + +make_parser = (peg, make_tree=nil)-> + peg = assert(peg_tidier\match(peg)) + peg = assert(re.compile(peg, DEFS)) + return (input, filename='???')-> + input = tostring(input) + tree_mt = {__index: {source:input, filename:filename}} + userdata = { + make_tree: make_tree or ((t)->setmetatable(t, tree_mt)) + :filename, source:input + } + tree = peg\match(input, nil, userdata) + if not tree + error "File #{filename} failed to parse:\n#{input}" + return tree + +return make_parser diff --git a/pretty_errors.lua b/pretty_errors.lua new file mode 100644 index 0000000..f2a608e --- /dev/null +++ b/pretty_errors.lua @@ -0,0 +1,60 @@ +require("containers") +local string2 = require('string2') +local box +box = function(text) + local max_line = 0 + for line in text:gmatch("[^\n]+") do + max_line = math.max(max_line, #(line:gsub("\027%[[0-9;]*m", ""))) + end + local ret = ("\n" .. text):gsub("\n([^\n]*)", function(line) + local line_len = #(line:gsub("\027%[[0-9;]*m", "")) + return "\n" .. tostring(line) .. tostring((" "):rep(max_line - line_len)) .. " \027[0m" + end) + return ret:sub(2, -1), max_line +end +local format_error +format_error = function(err) + local context = err.context or 2 + local err_line, err_linenum, err_linepos = string2.line_at(err.source, err.start) + local err_size = math.min((err.stop - err.start), (#err_line - err_linepos) + 1) + local nl_indicator = (err_linepos > #err_line) and " " or "" + local fmt_str = " %" .. tostring(#tostring(err_linenum + context)) .. "d|" + local pointer + if err_size >= 2 then + pointer = (" "):rep(err_linepos + #fmt_str:format(0) - 1) .. "╚" .. tostring(("═"):rep(err_size - 2)) .. "╝" + else + pointer = (" "):rep(err_linepos + #fmt_str:format(0) - 1) .. "⬆" + end + local err_msg = "\027[33;41;1mParse error at " .. tostring(err.filename) .. ":" .. tostring(err_linenum) .. "\027[0m" + for i = err_linenum - context, err_linenum - 1 do + do + local line = string2.line(err.source, i) + if line then + err_msg = err_msg .. "\n\027[2m" .. tostring(fmt_str:format(i)) .. "\027[0m" .. tostring(line) .. "\027[0m" + end + end + end + if err_line then + local box_width = 60 + local before = err_line:sub(1, err_linepos - 1) + local during = err_line:sub(err_linepos, err_linepos + err_size - 1) + local after = err_line:sub(err_linepos + err_size, -1) + err_line = "\027[0m" .. tostring(before) .. "\027[41;30m" .. tostring(during) .. tostring(nl_indicator) .. "\027[0m" .. tostring(after) + err_msg = err_msg .. "\n\027[2m" .. tostring(fmt_str:format(err_linenum)) .. tostring(err_line) .. "\027[0m\n" .. tostring(pointer) + local err_text = "\027[47;31;1m" .. tostring((" " .. err.error):wrap_to_1(box_width):gsub("\n", "\n\027[47;31;1m ")) + if err.hint then + err_text = err_text .. "\n\027[47;30m" .. tostring((" Suggestion: " .. tostring(err.hint)):wrap_to_1(box_width):gsub("\n", "\n\027[47;30m ")) + end + err_msg = err_msg .. ("\n\027[33;1m " .. box(err_text):gsub("\n", "\n ")) + end + for i = err_linenum + 1, err_linenum + context do + do + local line = string2.line(err.source, i) + if line then + err_msg = err_msg .. "\n\027[2m" .. tostring(fmt_str:format(i)) .. "\027[0m" .. tostring(line) .. "\027[0m" + end + end + end + return err_msg +end +return format_error diff --git a/pretty_errors.moon b/pretty_errors.moon new file mode 100644 index 0000000..3adada5 --- /dev/null +++ b/pretty_errors.moon @@ -0,0 +1,46 @@ +-- This file has code for converting errors to user-friendly format, with colors, +-- line numbers, code excerpts, and so on. +require "containers" +string2 = require 'string2' + +box = (text)-> + max_line = 0 + for line in text\gmatch("[^\n]+") + max_line = math.max(max_line, #(line\gsub("\027%[[0-9;]*m",""))) + ret = ("\n"..text)\gsub "\n([^\n]*)", (line)-> + line_len = #(line\gsub("\027%[[0-9;]*m","")) + return "\n#{line}#{(" ")\rep(max_line-line_len)} \027[0m" + return ret\sub(2,-1), max_line + +format_error = (err)-> + context = err.context or 2 + err_line, err_linenum, err_linepos = string2.line_at(err.source, err.start) + -- TODO: better handle multi-line errors + err_size = math.min((err.stop - err.start), (#err_line-err_linepos) + 1) + nl_indicator = (err_linepos > #err_line) and " " or "" + fmt_str = " %#{#tostring(err_linenum+context)}d|" + pointer = if err_size >= 2 + (" ")\rep(err_linepos+#fmt_str\format(0)-1).."╚#{("═")\rep(err_size-2)}╝" + else + (" ")\rep(err_linepos+#fmt_str\format(0)-1).."⬆" + err_msg = "\027[33;41;1mParse error at #{err.filename}:#{err_linenum}\027[0m" + for i=err_linenum-context,err_linenum-1 + if line = string2.line(err.source, i) + err_msg ..= "\n\027[2m#{fmt_str\format(i)}\027[0m#{line}\027[0m" + if err_line + box_width = 60 + before = err_line\sub(1, err_linepos-1) + during = err_line\sub(err_linepos,err_linepos+err_size-1) + after = err_line\sub(err_linepos+err_size, -1) + err_line = "\027[0m#{before}\027[41;30m#{during}#{nl_indicator}\027[0m#{after}" + err_msg ..= "\n\027[2m#{fmt_str\format(err_linenum)}#{err_line}\027[0m\n#{pointer}" + err_text = "\027[47;31;1m#{(" "..err.error)\wrap_to_1(box_width)\gsub("\n", "\n\027[47;31;1m ")}" + if err.hint + err_text ..= "\n\027[47;30m#{(" Suggestion: #{err.hint}")\wrap_to_1(box_width)\gsub("\n", "\n\027[47;30m ")}" + err_msg ..= "\n\027[33;1m "..box(err_text)\gsub("\n", "\n ") + for i=err_linenum+1,err_linenum+context + if line = string2.line(err.source, i) + err_msg ..= "\n\027[2m#{fmt_str\format(i)}\027[0m#{line}\027[0m" + return err_msg + +return format_error diff --git a/string2.lua b/string2.lua new file mode 100644 index 0000000..387fd65 --- /dev/null +++ b/string2.lua @@ -0,0 +1,134 @@ +local reverse, upper, lower, find, byte, match, gmatch, gsub, sub, format, rep, char +do + local _obj_0 = string + reverse, upper, lower, find, byte, match, gmatch, gsub, sub, format, rep, char = _obj_0.reverse, _obj_0.upper, _obj_0.lower, _obj_0.find, _obj_0.byte, _obj_0.match, _obj_0.gmatch, _obj_0.gsub, _obj_0.sub, _obj_0.format, _obj_0.rep, _obj_0.char +end +local isplit +isplit = function(self, sep) + if sep == nil then + sep = '%s+' + end + local step + step = function(self, i) + local start = self.pos + if not (start) then + return + end + i = i + 1 + local nl = find(self.str, self.sep, start) + self.pos = nl and (nl + 1) or nil + local line = sub(self.str, start, nl and (nl - 1) or #self.str) + return i, line, start, (nl and (nl - 1) or #self.str) + end + return step, { + str = self, + pos = 1, + sep = sep + }, 0 +end +local string2 = { + isplit = isplit, + uppercase = upper, + lowercase = lower, + reversed = reverse, + capitalized = function(self) + return gsub(self, '%l', upper, 1) + end, + byte = byte, + bytes = function(self, i, j) + return { + byte(self, i or 1, j or -1) + } + end, + split = function(self, sep) + local _accum_0 = { } + local _len_0 = 1 + for i, chunk in isplit(self, sep) do + _accum_0[_len_0] = chunk + _len_0 = _len_0 + 1 + end + return _accum_0 + end, + lines = function(self) + local _accum_0 = { } + local _len_0 = 1 + for i, line in isplit(self, '\n') do + _accum_0[_len_0] = line + _len_0 = _len_0 + 1 + end + return _accum_0 + end, + line = function(self, line_num) + for i, line, start in isplit(self, '\n') do + if i == line_num then + return line + end + end + end, + line_at = function(self, pos) + assert(type(pos) == 'number', "Invalid string position") + if pos < 1 or pos > #self then + return + end + for i, line, start, stop in isplit(self, '\n') do + if stop >= pos then + return line, i, (pos - start + 1) + end + end + end, + wrap = function(self, maxlen, buffer) + if maxlen == nil then + maxlen = 80 + end + if buffer == nil then + buffer = 8 + end + local lines = { } + local _list_0 = self:lines() + for _index_0 = 1, #_list_0 do + local line = _list_0[_index_0] + while #line > maxlen do + local chunk = line:sub(1, maxlen) + local split = chunk:find(' ', maxlen - buffer, true) or maxlen + chunk = line:sub(1, split) + line = line:sub(split + 1, -1) + lines[#lines + 1] = chunk + end + lines[#lines + 1] = line + end + return table.concat(lines, "\n") + end, + as_lua_id = function(str) + local orig = str + str = gsub(str, "^ *$", "%1 ") + str = gsub(str, "x([0-9A-F][0-9A-F])", "x78%1") + str = gsub(str, "%W", function(c) + if c == ' ' then + return '_' + else + return format("x%02X", byte(c)) + end + end) + str = gsub(str, "^_*%d", "_%1") + if str.from_lua_id then + local re_orig = str:from_lua_id() + if re_orig ~= orig then + require('ldt').breakpoint() + end + end + return str + end, + from_lua_id = function(str) + str = gsub(str, "^_(_*%d.*)", "%1") + str = gsub(str, "_", " ") + str = gsub(str, "x([0-9A-F][0-9A-F])", function(hex) + return char(tonumber(hex, 16)) + end) + str = gsub(str, "^ ([ ]*)$", "%1") + return str + end +} +for k, v in pairs(string) do + string2[k] = string2[k] or v +end +return string2 diff --git a/string2.moon b/string2.moon new file mode 100644 index 0000000..735a2cb --- /dev/null +++ b/string2.moon @@ -0,0 +1,79 @@ +-- Expand the capabilities of the built-in strings +{:reverse, :upper, :lower, :find, :byte, :match, :gmatch, :gsub, :sub, :format, :rep, :char} = string + +isplit = (sep='%s+')=> + step = (i)=> + start = @pos + return unless start + i += 1 + nl = find(@str, @sep, start) + @pos = nl and (nl+1) or nil + line = sub(@str, start, nl and (nl-1) or #@str) + return i, line, start, (nl and (nl-1) or #@str) + return step, {str:@, pos:1, :sep}, 0 + +string2 = { + :isplit, uppercase:upper, lowercase:lower, reversed:reverse + capitalized: => gsub(@, '%l', upper, 1) + byte: byte, bytes: (i, j)=> {byte(@, i or 1, j or -1)} + split: (sep)=> [chunk for i,chunk in isplit(@, sep)] + lines: => [line for i,line in isplit(@, '\n')] + line: (line_num)=> + for i, line, start in isplit(@, '\n') + return line if i == line_num + + line_at: (pos)=> + assert(type(pos) == 'number', "Invalid string position") + return if pos < 1 or pos > #@ + for i, line, start, stop in isplit(@, '\n') + if stop >= pos + return line, i, (pos-start+1) + + wrap: (maxlen=80, buffer=8)=> + lines = {} + for line in *@lines! + while #line > maxlen + chunk = line\sub(1, maxlen) + split = chunk\find(' ', maxlen-buffer, true) or maxlen + chunk = line\sub(1, split) + line = line\sub(split+1, -1) + lines[#lines+1] = chunk + lines[#lines+1] = line + return table.concat(lines, "\n") + + -- Convert an arbitrary text into a valid Lua identifier. This function is injective, + -- but not idempotent. In logic terms: (x != y) => (as_lua_id(x) != as_lua_id(y)), + -- but not (as_lua_id(a) == b) => (as_lua_id(b) == b). + as_lua_id: (str)-> + orig = str + -- Empty strings are not valid lua identifiers, so treat them like " ", + -- and treat " " as " ", etc. to preserve injectivity. + str = gsub str, "^ *$", "%1 " + -- Escape 'x' (\x78) when it precedes something that looks like an uppercase hex sequence. + -- This way, all Lua IDs can be unambiguously reverse-engineered, but normal usage + -- of 'x' won't produce ugly Lua IDs. + -- i.e. "x" -> "x", "oxen" -> "oxen", but "Hex2Dec" -> "Hex782Dec" and "He-ec" -> "Hex2Dec" + str = gsub str, "x([0-9A-F][0-9A-F])", "x78%1" + -- Map spaces to underscores, and everything else non-alphanumeric to hex escape sequences + str = gsub str, "%W", (c)-> + if c == ' ' then '_' + else format("x%02X", byte(c)) + -- Lua IDs can't start with numbers, so map "1" -> "_1", "_1" -> "__1", etc. + str = gsub str, "^_*%d", "_%1" + if str.from_lua_id + re_orig = str\from_lua_id! + require('ldt').breakpoint! if re_orig != orig + return str + + -- from_lua_id(as_lua_id(str)) == str, but behavior is unspecified for inputs that + -- did not come from as_lua_id() + from_lua_id: (str)-> + str = gsub(str, "^_(_*%d.*)", "%1") + str = gsub(str, "_", " ") + str = gsub(str, "x([0-9A-F][0-9A-F])", (hex)-> char(tonumber(hex, 16))) + str = gsub(str, "^ ([ ]*)$", "%1") + return str +} +for k,v in pairs(string) do string2[k] or= v + +return string2 diff --git a/syntax_tree.lua b/syntax_tree.lua index 8fa15b7..b4cf45e 100644 --- a/syntax_tree.lua +++ b/syntax_tree.lua @@ -26,7 +26,9 @@ local types = { "DictEntry", "IndexChain", "Action", - "FileChunks" + "FileChunks", + "Error", + "Comment" } for _index_0 = 1, #types do local name = types[_index_0] @@ -49,6 +51,10 @@ for _index_0 = 1, #types do return Source:is_instance(x) and tostring(x) or nil end))) end + cls.source_code_for_tree = { } + cls.get_source_code = function(self) + return self.source_code_for_tree[self] + end cls.map = function(self, fn) local replacement = fn(self) if replacement == false then diff --git a/syntax_tree.moon b/syntax_tree.moon index 96230a1..2f46d9e 100644 --- a/syntax_tree.moon +++ b/syntax_tree.moon @@ -10,7 +10,7 @@ AST.is_syntax_tree = (n, t=nil)-> type(n) == 'table' and getmetatable(n) and AST[n.type] == getmetatable(n) and (t == nil or n.type == t) types = {"Number", "Var", "Block", "EscapedNomsu", "Text", "List", "Dict", "DictEntry", - "IndexChain", "Action", "FileChunks"} + "IndexChain", "Action", "FileChunks", "Error", "Comment"} for name in *types cls = {} with cls @@ -21,6 +21,8 @@ for name in *types .is_instance = (x)=> getmetatable(x) == @ .__tostring = => "#{@type}#{repr @, ((x)-> Source\is_instance(x) and tostring(x) or nil)}" .__repr = => "#{@type}#{repr @, ((x)-> Source\is_instance(x) and tostring(x) or nil)}" + .source_code_for_tree = {} + .get_source_code = => @source_code_for_tree[@] .map = (fn)=> replacement = fn(@) if replacement == false then return nil