From a7d912a33ae1f6857e5abbda1997bcf1fff2a83d Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Tue, 18 Dec 2018 19:30:01 -0800 Subject: [PATCH] Initial pass on updating syntax. --- core/metaprogramming.nom | 42 ++++------- core/operators.nom | 2 +- examples/how_do_i.nom | 2 +- lib/object.nom | 4 +- nomsu.1.peg | 110 +++++++++++++-------------- nomsu.2.peg | 124 +++++++++++++++---------------- nomsu.3.peg | 132 ++++++++++++++++----------------- nomsu.4.peg | 148 ++++++++++++++++++------------------- nomsu.5.peg | 156 +++++++++++++++++++-------------------- nomsu_compiler.lua | 136 ++++++++++++++++++---------------- nomsu_compiler.moon | 126 ++++++++++++++----------------- nomsu_decompiler.lua | 142 +++++++++++++++++++++-------------- nomsu_decompiler.moon | 117 ++++++++++++++++++----------- parser.lua | 32 +++++--- parser.moon | 35 ++++++--- syntax_tree.lua | 12 +-- syntax_tree.moon | 12 +-- 17 files changed, 702 insertions(+), 630 deletions(-) diff --git a/core/metaprogramming.nom b/core/metaprogramming.nom index 21de3df..6d9ced4 100644 --- a/core/metaprogramming.nom +++ b/core/metaprogramming.nom @@ -51,16 +51,6 @@ lua> " compile.action["->"] = compile.action["1 ->"] compile.action["for"] = compile.action["1 ->"]" -lua> " - compile.action["what 1 compiles to"] = function(compile, \$action) - local lua = LuaCode("compile.action[", \$action.stub:as_lua(), "](") - local lua_args = table.map(\$action:get_args(), compile) - table.insert(lua_args, 1, "compile") - lua:concat_add(lua_args, ", ") - lua:add(")") - return lua - end" - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ test: @@ -90,7 +80,7 @@ lua> " if \$body.type == "Text" then \$body = SyntaxTree{source=\$body.source, type="Action", "Lua", \$body} end - return LuaCode("compile.action[", \$action.stub:as_lua(), + return LuaCode("compile.action[", \$action:get_stub():as_lua(), "] = ", \(\($args -> $body) as lua)) end" @@ -107,13 +97,13 @@ lua> " for i=2,#\$actions do local alias = \$actions[i] local \$alias_args = List{\(\$compile), unpack(alias:get_args())} - lua:add("\\ncompile.action[", alias.stub:as_lua(), "] = ") + lua:add("\\ncompile.action[", alias:get_stub():as_lua(), "] = ") if \$alias_args == \$args then - lua:add("compile.action[", \$actions[1].stub:as_lua(), "]") + lua:add("compile.action[", \$actions[1]:get_stub():as_lua(), "]") else lua:add("function(") lua:concat_add(table.map(\$alias_args, compile), ", ") - lua:add(") return compile.action[", \$actions[1].stub:as_lua(), "](") + lua:add(") return compile.action[", \$actions[1]:get_stub():as_lua(), "](") lua:concat_add(\$compiled_args, ", ") lua:add(") end") end @@ -138,10 +128,10 @@ test: local lua = LuaCode() if \$action.type == "MethodCall" then - lua:add(compile(\$action[1]), ".", \$action[2].stub:as_lua_id()) + lua:add(compile(\$action[1]), ".", \$action[2]:get_stub():as_lua_id()) elseif \$action.type == "Action" then - lua:add(\$action.stub:as_lua_id()) - lua:add_free_vars({\$action.stub:as_lua_id()}) + lua:add(\$action:get_stub():as_lua_id()) + lua:add_free_vars({\$action:get_stub():as_lua_id()}) else compile_error_at(\$action, "Expected an action or method call here") end @@ -152,17 +142,17 @@ test: lua> " local lua = \(\($actions.1 means $body) as lua) local first_def = (\$actions[1].type == "MethodCall" - and LuaCode(compile(\$actions[1][1]), ".", \$actions[1].stub:as_lua_id()) - or LuaCode(\$actions[1].stub:as_lua_id())) + and LuaCode(compile(\$actions[1][1]), ".", \$actions[1]:get_stub():as_lua_id()) + or LuaCode(\$actions[1]:get_stub():as_lua_id())) local \$args = List(\$actions[1]:get_args()) for i=2,#\$actions do local alias = \$actions[i] local \$alias_args = List(alias:get_args()) lua:add("\\n") if alias.type == "MethodCall" then - lua:add(compile(alias[1]), ".", alias.stub:as_lua_id()) + lua:add(compile(alias[1]), ".", alias:get_stub():as_lua_id()) else - lua:add(alias.stub:as_lua_id()) + lua:add(alias:get_stub():as_lua_id()) lua:add_free_vars({alias_name}) end if \$args == \$alias_args then @@ -185,19 +175,19 @@ test: (externally $action means $body) compiles to: lua> " local lua = \(\($action means $body) as lua) - lua:remove_free_vars({\$action.stub:as_lua_id()}) + lua:remove_free_vars({\$action:get_stub():as_lua_id()}) return lua" (externally $actions all mean $body) compiles to: lua> " local lua = \(\($actions all mean $body) as lua) - lua:remove_free_vars(table.map(\$actions, function(a) return a.stub:as_lua_id() end)) + lua:remove_free_vars(table.map(\$actions, function(a) return a:get_stub():as_lua_id() end)) return lua" test: assume (((say $)'s meaning) == (=lua "say")) -($action's meaning) compiles to (Lua ($action.stub|as lua id)) +($action's meaning) compiles to (Lua (($action|get stub)|as lua id)) test: (swap $x and $y) parses as (..) do: @@ -330,7 +320,7 @@ externally ($tree with vars $replacements) means (..) externally (match $tree with $patt) means: lua> " if \$patt.type == "Var" then return Dict{[\$patt[1]]=\$tree} end - if \$patt.type == "Action" and \$patt.stub ~= \$tree.stub then return nil end + if \$patt.type == "Action" and \$patt:get_stub() ~= \$tree:get_stub() then return nil end if #\$patt ~= #\$tree then return nil end local matches = Dict{} for \($i)=1,#\$patt do @@ -412,7 +402,7 @@ test: (Nomsu compiler version) compiles to "NOMSU_COMPILER_VERSION" (core version) compiles to "NOMSU_CORE_VERSION" (lib version) compiles to "NOMSU_LIB_VERSION" -(command line args) compiles to "command_line_args" +(command line args) compiles to "COMMAND_LINE_ARGS" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/core/operators.nom b/core/operators.nom index 91a09ab..1bb9f0b 100644 --- a/core/operators.nom +++ b/core/operators.nom @@ -68,7 +68,7 @@ test: if \$var.type == 'Var' then lua:add_free_vars({var_lua:text()}) end - lua:add(' = ', \($value as lua expr), ';') + lua:add(' = ', \($value as lua expr)) end return lua" diff --git a/examples/how_do_i.nom b/examples/how_do_i.nom index 00fc7a2..a296849 100644 --- a/examples/how_do_i.nom +++ b/examples/how_do_i.nom @@ -63,7 +63,7 @@ say " ..start the next line with two periods, like that. Similarly, you can put a long interpolated indented value like: \(..) - 100 + 200 + 300 + 400 + 500 + 600 + 700 + 800 + 900 + 1000 + 2000 + 3000 + 4000 + 5000 + 6000 + 7000 + 8000 + 9000 ..between a backslash and two periods." say "Single-line text can contain escape sequences like \", \\, \000, and \n" diff --git a/lib/object.nom b/lib/object.nom index a77af3f..6f16410 100644 --- a/lib/object.nom +++ b/lib/object.nom @@ -60,7 +60,7 @@ test: local fn_name = \$actions[1].stub:as_lua_id() local \$args = List(\$actions[1]:get_args()) table.insert(\$args, 1, \(\$me)) - local lua = LuaCode("class.", fn_name, " = ", \(what ($args -> $body) compiles to)) + local lua = LuaCode("class.", fn_name, " = ", \(\($args -> $body) as lua)) for i=2,#\$actions do local alias = \$actions[i] local alias_name = alias.stub:as_lua_id() @@ -70,7 +70,7 @@ test: if \$args == \$alias_args then lua:add("class.", fn_name) else - lua:add(\(what ($alias_args -> $actions.1) compiles to)) + lua:add(\(\($alias_args -> $actions.1) as lua)) end end return lua" diff --git a/nomsu.1.peg b/nomsu.1.peg index 5d881b3..88983bb 100644 --- a/nomsu.1.peg +++ b/nomsu.1.peg @@ -1,5 +1,5 @@ -- Nomsu version 1 -file (FileChunks): +file (FileChunks) <- {:curr_indent: ' '* :} ("#!" (!"nomsu" [^%nl])* "nomsu" ws+ "-V" ws* {:version: [0-9.]+ :} [^%nl]*)? comment? blank_lines? @@ -8,108 +8,108 @@ file (FileChunks): {:curr_indent: %nil :} !. -nodent: (unexpected_indent [^%nl]* / =curr_indent) -indent: =curr_indent " " -blank_lines: %nl ((nodent comment / ws*) %nl)* -eol: ws* eol_comment? (!. / &%nl) +nodent <- (unexpected_indent [^%nl]* / =curr_indent) +indent <- =curr_indent " " +blank_lines <- %nl ((nodent comment / ws*) %nl)* +eol <- ws* eol_comment? (!. / &%nl) -nl_nodent: blank_lines nodent -nl_indent: blank_lines {:curr_indent: indent :} (comment nl_nodent)* +nl_nodent <- blank_lines nodent +nl_indent <- blank_lines {:curr_indent: indent :} (comment nl_nodent)* -comment (Comment): +comment (Comment) <- "#" {~ [^%nl]* (%nl+ (indent -> '') [^%nl]*)* ~} -eol_comment (Comment): +eol_comment (Comment) <- "#" {[^%nl]*} -unexpected_code: ws* _unexpected_code -_unexpected_code (Error): +unexpected_code <- ws* _unexpected_code +_unexpected_code (Error) <- {:error: {~ [^%nl]+ -> "Couldn't parse this code" ~} :} -unexpected_indent (Error): +unexpected_indent (Error) <- {:error: {~ (=curr_indent ws+) -> "Messed up indentation" ~} :} {:hint: {~ '' -> 'Either make sure this line is aligned with the one above it, or make sure the previous line ends with something that uses indentation, like ":" or "(..)"' ~} :} -missing_paren_err (Error): +missing_paren_err (Error) <- {:error: {~ eol -> 'Line ended without finding a closing )-parenthesis' ~} :} {:hint: {~ '' -> 'Put a ")" here' ~} :} -missing_quote_err (Error): +missing_quote_err (Error) <- {:error: {~ eol -> 'Line ended before finding a closing double quotation mark' ~} :} {:hint: {~ "" -> "Put a quotation mark here" ~} :} -missing_bracket_error (Error): +missing_bracket_error (Error) <- {:error: {~ eol -> "Line ended before finding a closing ]-bracket" ~} :} {:hint: {~ '' -> 'Put a "]" here' ~} :} -missing_brace_error (Error): +missing_brace_error (Error) <- {:error: {~ eol -> "Line ended before finding a closing }-brace" ~} :} {:hint: {~ '' -> 'Put a "}" here' ~} :} -missing_block_expr_error (Error): +missing_block_expr_error (Error) <- {:error: '' -> "Missing expression after the ':'" :} -chunk: block / action / expression -chunk_delimeter: ("~")^+3 eol +chunk <- block / action / expression +chunk_delimeter <- ("~")^+3 eol -inline_block (Block): +inline_block (Block) <- inline_statement (ws* ";" ws* inline_statement)+ -block (Block): +block (Block) <- statement (nl_nodent statement)+ (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* {:curr_indent: %nil :} -statement: (action / expression) (eol / unexpected_code) -inline_statement: (inline_action / inline_expression) +statement <- (action / expression) (eol / unexpected_code) +inline_statement <- (inline_action / inline_expression) -noindex_inline_expression: +noindex_inline_expression <- number / variable / inline_text / inline_list / inline_dict / inline_nomsu / ( "(" ws* (inline_block / inline_action / inline_expression) ws* (ws* ',' ws* (inline_block / inline_action / inline_expression) ws*)* (")" / missing_paren_err / unexpected_code) ) -inline_expression: index_chain / noindex_inline_expression -indented_expression: +inline_expression <- index_chain / noindex_inline_expression +indented_expression <- indented_text / indented_nomsu / indented_list / indented_dict / ({| ("(..)")? nl_indent (block / action / expression) (eol / unexpected_code) (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* {:curr_indent: %nil :} |} -> unpack) -expression: +expression <- inline_expression / (":" ws* (inline_block / inline_action / inline_expression / missing_block_expr_error)) / indented_expression -inline_nomsu (EscapedNomsu): "\" inline_expression -indented_nomsu (EscapedNomsu): +inline_nomsu (EscapedNomsu) <- "\" inline_expression +indented_nomsu (EscapedNomsu) <- "\" ( noindex_inline_expression / (":" ws* (inline_block / inline_action / inline_expression / missing_block_expr_error)) / indented_expression) -index_chain (IndexChain): +index_chain (IndexChain) <- noindex_inline_expression ("." (text_word / noindex_inline_expression))+ -- Actions need either at least 1 word, or at least 2 tokens -inline_action (Action): +inline_action (Action) <- !chunk_delimeter ( (inline_expression (ws* (inline_expression / word))+) / (word (ws* (inline_expression / word))*)) (ws* ":" ws* (inline_block / inline_action / inline_expression / missing_block_expr_error))? -action (Action): +action (Action) <- !chunk_delimeter ( (expression ((nl_nodent "..")? ws* (expression / word))+) / (word ((nl_nodent "..")? ws* (expression / word))*)) -word: !number { operator_char+ / ident_char+ } +word <- !number { operator_char+ / ident_char+ } -text_word (Text): word +text_word (Text) <- word -inline_text (Text): +inline_text (Text) <- !(indented_text) '"' ({~ (('\"' -> '"') / ('\\' -> '\') / escaped_char / [^%nl\"])+ ~} / inline_text_interpolation)* ('"' / missing_quote_err / unexpected_code) -inline_text_interpolation: +inline_text_interpolation <- "\" ( variable / inline_list / inline_dict / inline_text / ("(" @@ -118,63 +118,63 @@ inline_text_interpolation: (")" / missing_paren_err / unexpected_code)) ) -indented_text (Text): +indented_text (Text) <- '".."' eol %nl {%nl+}? {:curr_indent: indent :} (indented_plain_text / text_interpolation / {~ %nl+ (=curr_indent -> "") ~})* unexpected_code? {:curr_indent: %nil :} -indented_plain_text (Text): +indented_plain_text (Text) <- {~ (("\\" -> "\") / (("\" blank_lines =curr_indent "..") -> "") / (!text_interpolation "\") / [^%nl\]+)+ (%nl+ (=curr_indent -> ""))* ~} -text_interpolation: +text_interpolation <- inline_text_interpolation / ("\" indented_expression blank_lines =curr_indent "..") -number (Number): (("-"? (([0-9]+ "." [0-9]+) / ("." [0-9]+) / ([0-9]+)))-> tonumber) +number (Number) <- (("-"? (([0-9]+ "." [0-9]+) / ("." [0-9]+) / ([0-9]+)))-> tonumber) -- Variables can be nameless (i.e. just %) and can't contain operators like apostrophe -- which is a hack to allow %'s to parse as "%" and "' s" separately -variable (Var): "%" {(ident_char+ ((!"'" operator_char+) / ident_char+)*)?} +variable (Var) <- "%" {(ident_char+ ((!"'" operator_char+) / ident_char+)*)?} -inline_list (List): +inline_list (List) <- !('[..]') "[" ws* (inline_list_item (ws* ',' ws* inline_list_item)* (ws* ',')?)? ws* ("]" / (","? (missing_bracket_error / unexpected_code))) -indented_list (List): +indented_list (List) <- "[..]" eol nl_indent list_line (nl_nodent list_line)* (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* (","? unexpected_code)? -list_line: +list_line <- (inline_list_item ws* "," ws*)+ eol / (inline_list_item ws* "," ws*)* (action / expression) eol -inline_list_item: inline_block / inline_action / inline_expression +inline_list_item <- inline_block / inline_action / inline_expression -inline_dict (Dict): +inline_dict (Dict) <- !('{..}') "{" ws* (inline_dict_entry (ws* ',' ws* inline_dict_entry)*)? ws* ("}" / (","? (missing_brace_error / unexpected_code))) -indented_dict (Dict): +indented_dict (Dict) <- "{..}" eol nl_indent dict_line (nl_nodent dict_line)* (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* (","? unexpected_code)? -dict_line: +dict_line <- (inline_dict_entry ws* "," ws*)+ eol / (inline_dict_entry ws* "," ws*)* dict_entry eol -dict_entry(DictEntry): +dict_entry(DictEntry) <- dict_key (ws* ":" ws* (action / expression))? -inline_dict_entry(DictEntry): +inline_dict_entry(DictEntry) <- dict_key (ws* ":" ws* (inline_block / inline_action / inline_expression)?)? -dict_key: +dict_key <- text_word / inline_expression -operator_char: ['`~!@$^&*+=|<>?/-] -ident_char: [a-zA-Z0-9_] / %utf8_char -ws: [ %tab] +operator_char <- ['`~!@$^&*+=|<>?/-] +ident_char <- [a-zA-Z0-9_] / %utf8_char +ws <- [ %tab] -escaped_char: +escaped_char <- ("\"->'') ( (([xX]->'') ((({[0-9a-fA-F]^2} %number_16) -> tonumber) -> tochar)) / ((([0-9] [0-9]^-2) -> tonumber) -> tochar) diff --git a/nomsu.2.peg b/nomsu.2.peg index fd64fec..b6d1687 100644 --- a/nomsu.2.peg +++ b/nomsu.2.peg @@ -1,14 +1,14 @@ -- Nomsu version 2 -file: +file <- {:curr_indent: ' '* :} (((action / expression / inline_block / indented_block) eol !.) / file_chunks / empty_block) {:curr_indent: %nil :} !. -shebang: "#!" (!"nomsu" [^%nl])* "nomsu" ws+ "-V" ws* {:version: [0-9.]+ :} [^%nl]* +shebang <- "#!" (!"nomsu" [^%nl])* "nomsu" ws+ "-V" ws* {:version: [0-9.]+ :} [^%nl]* -file_chunks (FileChunks): +file_chunks (FileChunks) <- {:curr_indent: ' '* :} shebang? comment? blank_lines? (top_block (nl_nodent section_division top_block)*) @@ -16,115 +16,115 @@ file_chunks (FileChunks): ws* unexpected_chunk? {:curr_indent: %nil :} -top_block (Block): +top_block (Block) <- {:curr_indent: ' '* :} comment? blank_lines? statement (nl_nodent statement)* {:curr_indent: %nil :} -empty_block (Block): +empty_block (Block) <- {:curr_indent: ' '* :} comment? blank_lines? {:curr_indent: %nil :} -nodent: (unexpected_indent [^%nl]* / =curr_indent) -indent: =curr_indent " " -blank_lines: %nl ((nodent comment / ws*) %nl)* -eol: ws* eol_comment? (!. / &%nl) +nodent <- (unexpected_indent [^%nl]* / =curr_indent) +indent <- =curr_indent " " +blank_lines <- %nl ((nodent comment / ws*) %nl)* +eol <- ws* eol_comment? (!. / &%nl) -nl_nodent: blank_lines nodent -nl_indent: blank_lines {:curr_indent: indent :} (comment nl_nodent)* +nl_nodent <- blank_lines nodent +nl_indent <- blank_lines {:curr_indent: indent :} (comment nl_nodent)* -comment (Comment): +comment (Comment) <- "#" {~ [^%nl]* (%nl+ (indent -> '') [^%nl]*)* ~} -eol_comment (Comment): +eol_comment (Comment) <- "#" {[^%nl]*} -unexpected_code: ws* _unexpected_code -_unexpected_code (Error): +unexpected_code <- ws* _unexpected_code +_unexpected_code (Error) <- {:error: {~ [^%nl]+ -> "Couldn't parse this code" ~} :} -unexpected_chunk (Error): +unexpected_chunk (Error) <- {:error: {~ .+ -> "Couldn't parse this code" ~} :} -unexpected_indent (Error): +unexpected_indent (Error) <- {:error: {~ (=curr_indent ws+) -> "Messed up indentation" ~} :} {:hint: {~ '' -> 'Either make sure this line is aligned with the one above it, or make sure the previous line ends with something that uses indentation, like ":" or "(..)"' ~} :} -missing_paren_err (Error): +missing_paren_err (Error) <- {:error: {~ eol -> 'Line ended without finding a closing )-parenthesis' ~} :} {:hint: {~ '' -> 'Put a ")" here' ~} :} -missing_quote_err (Error): +missing_quote_err (Error) <- {:error: {~ eol -> 'Line ended before finding a closing double quotation mark' ~} :} {:hint: {~ "" -> "Put a quotation mark here" ~} :} -missing_bracket_error (Error): +missing_bracket_error (Error) <- {:error: {~ eol -> "Line ended before finding a closing ]-bracket" ~} :} {:hint: {~ '' -> 'Put a "]" here' ~} :} -missing_brace_error (Error): +missing_brace_error (Error) <- {:error: {~ eol -> "Line ended before finding a closing }-brace" ~} :} {:hint: {~ '' -> 'Put a "}" here' ~} :} -section_division: ("~")^+3 eol +section_division <- ("~")^+3 eol -inline_block: +inline_block <- "(" ws* inline_block ws* ")" / raw_inline_block -raw_inline_block (Block): +raw_inline_block (Block) <- ":" ws* ((inline_statement (ws* ";" ws* inline_statement)*) / !(eol nl_indent)) -indented_block (Block): +indented_block (Block) <- ":" eol nl_indent statement (nl_nodent statement)* (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* {:curr_indent: %nil :} -statement: +statement <- (action / expression) (eol / unexpected_code) -inline_statement: (inline_action / inline_expression) +inline_statement <- (inline_action / inline_expression) -noindex_inline_expression: +noindex_inline_expression <- number / variable / inline_text / inline_list / inline_dict / inline_nomsu / ( "(" ws* (inline_action / inline_expression) ws* (ws* ',' ws* (inline_action / inline_expression) ws*)* (")" / missing_paren_err / unexpected_code) ) -inline_expression: index_chain / noindex_inline_expression -indented_expression: +inline_expression <- index_chain / noindex_inline_expression +indented_expression <- indented_text / indented_nomsu / indented_list / indented_dict / ({| "(..)" nl_indent (action / expression) (eol / unexpected_code) (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* {:curr_indent: %nil :} |} -> unpack) -expression: +expression <- inline_expression / indented_expression -inline_nomsu (EscapedNomsu): "\" (inline_expression / inline_block) -indented_nomsu (EscapedNomsu): +inline_nomsu (EscapedNomsu) <- "\" (inline_expression / inline_block) +indented_nomsu (EscapedNomsu) <- "\" (noindex_inline_expression / inline_block / indented_expression / indented_block) -index_chain (IndexChain): +index_chain (IndexChain) <- noindex_inline_expression ("." (text_word / noindex_inline_expression))+ -- Actions need either at least 1 word, or at least 2 tokens -inline_action (Action): +inline_action (Action) <- !section_division ( (inline_arg (ws* (inline_arg / word))+) / (word (ws* (inline_arg / word))*)) (ws* inline_block)? -inline_arg: inline_expression / inline_block -action (Action): +inline_arg <- inline_expression / inline_block +action (Action) <- !section_division ( (arg ((nl_nodent "..")? ws* (arg / word))+) / (word ((nl_nodent "..")? ws* (arg / word))*)) -arg: expression / inline_block / indented_block +arg <- expression / inline_block / indented_block -word: !number { operator_char+ / ident_char+ } +word <- !number { operator_char+ / ident_char+ } -text_word (Text): word +text_word (Text) <- word -inline_text (Text): +inline_text (Text) <- !(indented_text) ('"' _inline_text* ('"' / missing_quote_err / unexpected_code)) -_inline_text: +_inline_text <- {~ (('\"' -> '"') / ('\\' -> '\') / escaped_char / [^%nl\"]+)+ ~} / inline_text_interpolation -inline_text_interpolation: +inline_text_interpolation <- "\" ( variable / inline_list / inline_dict / inline_text / ("(" @@ -133,64 +133,64 @@ inline_text_interpolation: (")" / missing_paren_err / unexpected_code)) ) -indented_text (Text): +indented_text (Text) <- '".."' eol %nl {%nl+}? {:curr_indent: indent :} (indented_plain_text / text_interpolation / {~ %nl+ (=curr_indent -> "") ~})* unexpected_code? {:curr_indent: %nil :} -- Tracking text-lines-within-indented-text as separate objects allows for better debugging line info -indented_plain_text (Text): +indented_plain_text (Text) <- {~ (("\\" -> "\") / (("\" blank_lines =curr_indent "..") -> "") / (!text_interpolation "\") / [^%nl\]+)+ (%nl+ (=curr_indent -> ""))* ~} -text_interpolation: +text_interpolation <- inline_text_interpolation / ("\" indented_expression (blank_lines =curr_indent "..")?) -number (Number): (("-"? (([0-9]+ "." [0-9]+) / ("." [0-9]+) / "0x" [0-9a-fA-F]+ / ([0-9]+)))-> tonumber) +number (Number) <- (("-"? (([0-9]+ "." [0-9]+) / ("." [0-9]+) / "0x" [0-9a-fA-F]+ / ([0-9]+)))-> tonumber) -- Variables can be nameless (i.e. just %) and can only contain identifier chars. -- This ensures you don't get weird parsings of `%x+%y` or `%'s thing`. -variable (Var): "%" {ident_char*} +variable (Var) <- "%" {ident_char*} -inline_list (List): +inline_list (List) <- !('[..]') "[" ws* (inline_list_item (ws* ',' ws* inline_list_item)* (ws* ',')?)? ws* ("]" / (","? (missing_bracket_error / unexpected_code))) -indented_list (List): +indented_list (List) <- "[..]" eol nl_indent list_line (nl_nodent list_line)* (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* (","? unexpected_code)? -list_line: +list_line <- (inline_list_item ws* "," ws*)+ eol / (inline_list_item ws* "," ws*)* (action / expression) eol -inline_list_item: inline_action / inline_expression +inline_list_item <- inline_action / inline_expression -inline_dict (Dict): +inline_dict (Dict) <- !('{..}') "{" ws* (inline_dict_entry (ws* ',' ws* inline_dict_entry)*)? ws* ("}" / (","? (missing_brace_error / unexpected_code))) -indented_dict (Dict): +indented_dict (Dict) <- "{..}" eol nl_indent dict_line (nl_nodent dict_line)* (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* (","? unexpected_code)? -dict_line: +dict_line <- (inline_dict_entry ws* "," ws*)+ eol / (inline_dict_entry ws* "," ws*)* dict_entry eol -dict_entry(DictEntry): +dict_entry(DictEntry) <- dict_key (ws* ":" ws* (action / expression))? -inline_dict_entry(DictEntry): +inline_dict_entry(DictEntry) <- dict_key (ws* ":" ws* (inline_action / inline_expression)?)? -dict_key: +dict_key <- text_word / inline_expression -operator_char: ['`~!@$^&*+=|<>?/-] -ident_char: [a-zA-Z0-9_] / %utf8_char -ws: [ %tab] +operator_char <- ['`~!@$^&*+=|<>?/-] +ident_char <- [a-zA-Z0-9_] / %utf8_char +ws <- [ %tab] -escaped_char: +escaped_char <- ("\"->'') ( (([xX]->'') ((({[0-9a-fA-F]^2} %number_16) -> tonumber) -> tochar)) / ((([0-9] [0-9]^-2) -> tonumber) -> tochar) diff --git a/nomsu.3.peg b/nomsu.3.peg index d43b404..d0b2dc9 100644 --- a/nomsu.3.peg +++ b/nomsu.3.peg @@ -1,14 +1,14 @@ -- Nomsu version 3 -file: +file <- {:curr_indent: ' '* :} (((action / expression / inline_block / indented_block) eol !.) / file_chunks / empty_block) {:curr_indent: %nil :} !. -shebang: "#!" (!"nomsu" [^%nl])* "nomsu" ws+ "-V" ws* {:version: [0-9.]+ :} [^%nl]* +shebang <- "#!" (!"nomsu" [^%nl])* "nomsu" ws+ "-V" ws* {:version: [0-9.]+ :} [^%nl]* -file_chunks (FileChunks): +file_chunks (FileChunks) <- {:curr_indent: ' '* :} shebang? comment? blank_lines? (top_block (nl_nodent section_division top_block)*) @@ -16,122 +16,122 @@ file_chunks (FileChunks): ws* unexpected_chunk? {:curr_indent: %nil :} -top_block (Block): +top_block (Block) <- {:curr_indent: ' '* :} comment? blank_lines? statement (nl_nodent statement)* {:curr_indent: %nil :} -empty_block (Block): +empty_block (Block) <- {:curr_indent: ' '* :} comment? blank_lines? {:curr_indent: %nil :} -nodent: (unexpected_indent [^%nl]* / =curr_indent) -indent: =curr_indent " " -blank_lines: %nl ((nodent comment / ws*) %nl)* -eol: ws* eol_comment? (!. / &%nl) +nodent <- (unexpected_indent [^%nl]* / =curr_indent) +indent <- =curr_indent " " +blank_lines <- %nl ((nodent comment / ws*) %nl)* +eol <- ws* eol_comment? (!. / &%nl) -nl_nodent: blank_lines nodent -nl_indent: blank_lines {:curr_indent: indent :} (comment nl_nodent)* +nl_nodent <- blank_lines nodent +nl_indent <- blank_lines {:curr_indent: indent :} (comment nl_nodent)* -comment (Comment): +comment (Comment) <- "#" {~ [^%nl]* (%nl+ (indent -> '') [^%nl]*)* ~} -eol_comment (Comment): +eol_comment (Comment) <- "#" {[^%nl]*} -unexpected_code: ws* _unexpected_code -_unexpected_code (Error): +unexpected_code <- ws* _unexpected_code +_unexpected_code (Error) <- {:error: {~ [^%nl]+ -> "Couldn't parse this code" ~} :} -unexpected_chunk (Error): +unexpected_chunk (Error) <- {:error: {~ .+ -> "Couldn't parse this code" ~} :} -unexpected_indent (Error): +unexpected_indent (Error) <- {:error: {~ (=curr_indent ws+) -> "Messed up indentation" ~} :} {:hint: {~ '' -> 'Either make sure this line is aligned with the one above it, or make sure the previous line ends with something that uses indentation, like ":" or "(..)"' ~} :} -missing_paren_err (Error): +missing_paren_err (Error) <- {:error: {~ eol -> 'Line ended without finding a closing )-parenthesis' ~} :} {:hint: {~ '' -> 'Put a ")" here' ~} :} -missing_quote_err (Error): +missing_quote_err (Error) <- {:error: {~ eol -> 'Line ended before finding a closing double quotation mark' ~} :} {:hint: {~ "" -> "Put a quotation mark here" ~} :} -missing_bracket_error (Error): +missing_bracket_error (Error) <- {:error: {~ eol -> "Line ended before finding a closing ]-bracket" ~} :} {:hint: {~ '' -> 'Put a "]" here' ~} :} -missing_brace_error (Error): +missing_brace_error (Error) <- {:error: {~ eol -> "Line ended before finding a closing }-brace" ~} :} {:hint: {~ '' -> 'Put a "}" here' ~} :} -section_division: ("~")^+3 eol +section_division <- ("~")^+3 eol -inline_block: +inline_block <- "(" ws* inline_block ws* ")" / raw_inline_block -raw_inline_block (Block): +raw_inline_block (Block) <- (!"::") ":" ws* ((inline_statement (ws* ";" ws* inline_statement)*) / !(eol nl_indent)) -indented_block (Block): +indented_block (Block) <- ":" eol nl_indent statement (nl_nodent statement)* (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* {:curr_indent: %nil :} -statement: +statement <- (action / expression) (eol / unexpected_code) -inline_statement: (inline_action / inline_expression) +inline_statement <- (inline_action / inline_expression) -noindex_inline_expression: +noindex_inline_expression <- number / variable / inline_text / inline_list / inline_dict / inline_nomsu / ( "(" ws* (inline_action / inline_expression) ws* (ws* ',' ws* (inline_action / inline_expression) ws*)* (")" / missing_paren_err / unexpected_code) ) -inline_expression: index_chain / noindex_inline_expression -indented_expression: +inline_expression <- index_chain / noindex_inline_expression +indented_expression <- indented_text / indented_nomsu / indented_list / indented_dict / ({| "(..)" nl_indent (action / expression) (eol / unexpected_code) (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* {:curr_indent: %nil :} |} -> unpack) -expression: +expression <- inline_expression / indented_expression -inline_nomsu (EscapedNomsu): "\" (inline_expression / inline_block) -indented_nomsu (EscapedNomsu): +inline_nomsu (EscapedNomsu) <- "\" (inline_expression / inline_block) +indented_nomsu (EscapedNomsu) <- "\" (noindex_inline_expression / inline_block / indented_expression / indented_block) -index_chain (IndexChain): +index_chain (IndexChain) <- noindex_inline_expression ("." (text_word / noindex_inline_expression))+ -inline_action: inline_methodcall / _inline_action -inline_methodcall: +inline_action <- inline_methodcall / _inline_action +inline_methodcall <- inline_arg ws* "::" ws* _inline_action -- Actions need either at least 1 word, or at least 2 tokens -_inline_action (Action): +_inline_action (Action) <- !section_division ( (inline_arg (ws* (inline_arg / word))+) / (word (ws* (inline_arg / word))*)) (ws* inline_block)? -inline_arg: inline_expression / inline_block +inline_arg <- inline_expression / inline_block -action: methodcall / _action -methodcall: +action <- methodcall / _action +methodcall <- arg (nl_nodent "..")? ws* "::" (nl_nodent "..")? ws* _action -_action (Action): +_action (Action) <- !section_division ( (arg ((nl_nodent "..")? ws* (arg / word))+) / (word ((nl_nodent "..")? ws* (arg / word))*)) -arg: expression / inline_block / indented_block +arg <- expression / inline_block / indented_block -word: !number { operator_char+ / ident_char+ } +word <- !number { operator_char+ / ident_char+ } -text_word (Text): word +text_word (Text) <- word -inline_text (Text): +inline_text (Text) <- !(indented_text) ('"' _inline_text* ('"' / missing_quote_err / unexpected_code)) -_inline_text: +_inline_text <- {~ (('\"' -> '"') / ('\\' -> '\') / escaped_char / [^%nl\"]+)+ ~} / inline_text_interpolation -inline_text_interpolation: +inline_text_interpolation <- "\" ( variable / inline_list / inline_dict / inline_text / ("(" @@ -140,64 +140,64 @@ inline_text_interpolation: (")" / missing_paren_err / unexpected_code)) ) -indented_text (Text): +indented_text (Text) <- '".."' eol %nl {%nl+}? {:curr_indent: indent :} (indented_plain_text / text_interpolation / {~ %nl+ (=curr_indent -> "") ~})* unexpected_code? {:curr_indent: %nil :} -- Tracking text-lines-within-indented-text as separate objects allows for better debugging line info -indented_plain_text (Text): +indented_plain_text (Text) <- {~ (("\\" -> "\") / (("\" blank_lines =curr_indent "..") -> "") / (!text_interpolation "\") / [^%nl\]+)+ (%nl+ (=curr_indent -> ""))* ~} -text_interpolation: +text_interpolation <- inline_text_interpolation / ("\" indented_expression (blank_lines =curr_indent "..")?) -number (Number): (("-"? (([0-9]+ "." [0-9]+) / ("." [0-9]+) / "0x" [0-9a-fA-F]+ / ([0-9]+)))-> tonumber) +number (Number) <- (("-"? (([0-9]+ "." [0-9]+) / ("." [0-9]+) / "0x" [0-9a-fA-F]+ / ([0-9]+)))-> tonumber) -- Variables can be nameless (i.e. just %) and can only contain identifier chars. -- This ensures you don't get weird parsings of `%x+%y` or `%'s thing`. -variable (Var): "%" {ident_char*} +variable (Var) <- "%" {ident_char*} -inline_list (List): +inline_list (List) <- !('[..]') "[" ws* (inline_list_item (ws* ',' ws* inline_list_item)* (ws* ',')?)? ws* ("]" / (","? (missing_bracket_error / unexpected_code))) -indented_list (List): +indented_list (List) <- "[..]" eol nl_indent list_line (nl_nodent list_line)* (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* (","? unexpected_code)? -list_line: +list_line <- (inline_list_item ws* "," ws*)+ eol / (inline_list_item ws* "," ws*)* (action / expression) eol -inline_list_item: inline_action / inline_expression +inline_list_item <- inline_action / inline_expression -inline_dict (Dict): +inline_dict (Dict) <- !('{..}') "{" ws* (inline_dict_entry (ws* ',' ws* inline_dict_entry)*)? ws* ("}" / (","? (missing_brace_error / unexpected_code))) -indented_dict (Dict): +indented_dict (Dict) <- "{..}" eol nl_indent dict_line (nl_nodent dict_line)* (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* (","? unexpected_code)? -dict_line: +dict_line <- (inline_dict_entry ws* "," ws*)+ eol / (inline_dict_entry ws* "," ws*)* dict_entry eol -dict_entry(DictEntry): +dict_entry(DictEntry) <- dict_key (ws* ":" ws* (action / expression))? -inline_dict_entry(DictEntry): +inline_dict_entry(DictEntry) <- dict_key (ws* ":" ws* (inline_action / inline_expression)?)? -dict_key: +dict_key <- text_word / inline_expression -operator_char: ['`~!@$^&*+=|<>?/-] -ident_char: [a-zA-Z0-9_] / %utf8_char -ws: [ %tab] +operator_char <- ['`~!@$^&*+=|<>?/-] +ident_char <- [a-zA-Z0-9_] / %utf8_char +ws <- [ %tab] -escaped_char: +escaped_char <- ("\"->'') ( (([xX]->'') ((({[0-9a-fA-F]^2} %number_16) -> tonumber) -> tochar)) / ((([0-9] [0-9]^-2) -> tonumber) -> tochar) diff --git a/nomsu.4.peg b/nomsu.4.peg index 5f84688..bcfc972 100644 --- a/nomsu.4.peg +++ b/nomsu.4.peg @@ -1,16 +1,16 @@ -- Nomsu version 4 -file: +file <- {:curr_indent: ' '* :} (((action / expression / inline_block / indented_block) eol !.) / file_chunks / empty_block) {:curr_indent: %nil :} !. -shebang: "#!" (!"nomsu" [^%nl])* "nomsu" ws+ "-V" ws* [0-9.]+ [^%nl]* (%nl / !.) +shebang <- "#!" (!"nomsu" [^%nl])* "nomsu" ws+ "-V" ws* [0-9.]+ [^%nl]* (%nl / !.) -eof: !. +eof <- !. -file_chunks (FileChunks): +file_chunks (FileChunks) <- {:curr_indent: ' '* :} {:shebang: shebang :}? (top_block (nl_nodent section_division top_block)*) @@ -18,132 +18,132 @@ file_chunks (FileChunks): ws* unexpected_chunk? {:curr_indent: %nil :} -top_block (Block): +top_block (Block) <- {:curr_indent: ' '* :} comment? blank_lines? statement (nl_nodent statement)* {:curr_indent: %nil :} -empty_block (Block): +empty_block (Block) <- {:curr_indent: ' '* :} comment? blank_lines? {:curr_indent: %nil :} -nodent: (unexpected_indent [^%nl]* / =curr_indent) -indent: {~ =curr_indent (ws / (%tab -> ' '))+ ~} -blank_lines: %nl ((nodent comment / ws*) %nl)* -eol: ws* eol_comment? (!. / &%nl) +nodent <- (unexpected_indent [^%nl]* / =curr_indent) +indent <- {~ =curr_indent (ws / (%tab -> ' '))+ ~} +blank_lines <- %nl ((nodent comment / ws*) %nl)* +eol <- ws* eol_comment? (!. / &%nl) -nl_nodent: blank_lines nodent -nl_indent: blank_lines tab_error? {:curr_indent: indent :} (comment nl_nodent)* +nl_nodent <- blank_lines nodent +nl_indent <- blank_lines tab_error? {:curr_indent: indent :} (comment nl_nodent)* -comment (Comment): +comment (Comment) <- "#" {~ [^%nl]* (%nl+ (indent -> '') [^%nl]*)* (%nl &%nl)* ~} -eol_comment (Comment): +eol_comment (Comment) <- "#" {[^%nl]*} -unexpected_code: ws* _unexpected_code -_unexpected_code (Error): +unexpected_code <- ws* _unexpected_code +_unexpected_code (Error) <- {:error: {~ [^%nl]+ -> "Couldn't parse this code" ~} :} -unexpected_chunk (Error): +unexpected_chunk (Error) <- {:error: {~ .+ -> "Couldn't parse this code" ~} :} -unexpected_indent (Error): +unexpected_indent (Error) <- {:error: {~ (=curr_indent ws+) -> "Messed up indentation" ~} :} {:hint: {~ '' -> 'Either make sure this line is aligned with the one above it, or make sure the previous line ends with something that uses indentation, like ":" or "(..)"' ~} :} -missing_paren_err (Error): +missing_paren_err (Error) <- {:error: {~ eol -> 'Line ended without finding a closing )-parenthesis' ~} :} {:hint: {~ '' -> 'Put a ")" here' ~} :} -missing_quote_err (Error): +missing_quote_err (Error) <- {:error: {~ eol -> 'Line ended before finding a closing double quotation mark' ~} :} {:hint: {~ "" -> "Put a quotation mark here" ~} :} -missing_bracket_error (Error): +missing_bracket_error (Error) <- {:error: {~ eol -> "Line ended before finding a closing ]-bracket" ~} :} {:hint: {~ '' -> 'Put a "]" here' ~} :} -missing_brace_error (Error): +missing_brace_error (Error) <- {:error: {~ eol -> "Line ended before finding a closing }-brace" ~} :} {:hint: {~ '' -> 'Put a "}" here' ~} :} -disallowed_interpolation (Error): +disallowed_interpolation (Error) <- {:error: {~ ("\" ('\:' / '(..)' / '[..]' / '{..}') (%nl (&(%nl) / =curr_indent ' ' [^%nl]*))*) -> "Sorry, indented text interpolations are not currently supported on the first line of multi-line text." ~} :} {:hint: {~ '' -> 'Move the code for the first line of text to the next line by ending this line with "\" and starting the next line indented with "..", followed by the code for the first line.' ~} :} -tab_error (Error): +tab_error (Error) <- &(=curr_indent %tab) {:error: {~ '' -> 'Tabs are not allowed for indentation.' ~} :} {:hint: {~ '' -> 'Use spaces instead of tabs.' ~} :} -section_division: ("~")^+3 eol +section_division <- ("~")^+3 eol -inline_block: +inline_block <- "(" ws* inline_block ws* (eof / ")") / raw_inline_block -raw_inline_block (Block): +raw_inline_block (Block) <- (!"::") ":" ws* ((inline_statement (ws* ";" ws* inline_statement)*) / !(eol nl_indent)) -indented_block (Block): +indented_block (Block) <- ":" eol nl_indent statement (nl_nodent statement)* (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* {:curr_indent: %nil :} -statement: +statement <- (action / expression) (eol / unexpected_code) -inline_statement: (inline_action / inline_expression) +inline_statement <- (inline_action / inline_expression) -noindex_inline_expression: +noindex_inline_expression <- number / variable / inline_text / inline_list / inline_dict / inline_nomsu / ( "(" ws* (inline_action / inline_expression) ws* (ws* ',' ws* (inline_action / inline_expression) ws*)* (")" / eof / missing_paren_err / unexpected_code) ) -inline_expression: index_chain / noindex_inline_expression -indented_expression: +inline_expression <- index_chain / noindex_inline_expression +indented_expression <- indented_text / indented_nomsu / indented_list / indented_dict / ({| "(..)" eol nl_indent (action / expression) (eol / unexpected_code) (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* {:curr_indent: %nil :} |} -> unpack) -expression: +expression <- inline_expression / indented_expression -inline_nomsu (EscapedNomsu): "\" (inline_expression / inline_block) -indented_nomsu (EscapedNomsu): +inline_nomsu (EscapedNomsu) <- "\" (inline_expression / inline_block) +indented_nomsu (EscapedNomsu) <- "\" (noindex_inline_expression / inline_block / indented_expression / indented_block) -index_chain (IndexChain): +index_chain (IndexChain) <- noindex_inline_expression ("." (text_word / noindex_inline_expression))+ -inline_action: inline_methodcall / _inline_action -inline_methodcall (MethodCall): +inline_action <- inline_methodcall / _inline_action +inline_methodcall (MethodCall) <- (inline_expression / "(" inline_block ")") ws* "::" ws* _inline_action -- Actions need either at least 1 word, or at least 2 tokens -_inline_action (Action): +_inline_action (Action) <- !section_division ( (inline_arg (ws* (inline_arg / word))+) / (word (ws* (inline_arg / word))*)) (ws* inline_block)? -inline_arg: inline_expression / inline_block / "(" ws* ")" +inline_arg <- inline_expression / inline_block / "(" ws* ")" -action: methodcall / _action -methodcall (MethodCall): +action <- methodcall / _action +methodcall (MethodCall) <- (expression / "(" inline_block ")" / indented_block) ((ws* "\")? eol nl_nodent "..")? ws* "::" ((ws* "\")? eol nl_nodent "..")? ws* _action -_action (Action): +_action (Action) <- !section_division ( (arg (((ws* "\")? eol nl_nodent "..")? ws* (arg / word))+) / (word (((ws* "\")? eol nl_nodent "..")? ws* (arg / word))*)) -arg: expression / inline_block / indented_block / "(" ws* ")" +arg <- expression / inline_block / indented_block / "(" ws* ")" -word: !number { operator_char+ / ident_char+ } +word <- !number { operator_char+ / ident_char+ } -text_word (Text): word +text_word (Text) <- word -inline_text (Text): +inline_text (Text) <- !(indented_text) '"' _inline_text* ('"' / eof / missing_quote_err / unexpected_code) -_inline_text: +_inline_text <- {~ (('\"' -> '"') / ('\\' -> '\') / escaped_char / text_char+)+ ~} / inline_text_interpolation / illegal_char -inline_text_interpolation: +inline_text_interpolation <- "\" ( variable / inline_list / inline_dict / ("(" @@ -152,14 +152,14 @@ inline_text_interpolation: (")" / eof / missing_paren_err / unexpected_code)) ) -text_char: %utf8_char / !["\] %print / %tab -illegal_char (Error): +text_char <- %utf8_char / !["\] %print / %tab +illegal_char (Error) <- {:error: {~ (!(%nl / %tab / %print) .) -> "Illegal unprintable character here (it may not be visible, but it's there)" ~} :} {:hint: {~ '' -> "This sort of thing can happen when copying and pasting code. Try deleting and retyping the code." ~} :} -nonterminal_quote: +nonterminal_quote <- '"' &([^%nl] / %nl+ =curr_indent) -indented_text (Text): +indented_text (Text) <- '"' _inline_text* (('\' %nl+ {:curr_indent: indent :} ('..')?) @@ -168,68 +168,68 @@ indented_text (Text): ('"' eol / eof / missing_quote_err) {:curr_indent: %nil :} -- Tracking text-lines-within-indented-text as separate objects allows for better debugging line info -indented_plain_text (Text): +indented_plain_text (Text) <- {~ ((("\" blank_lines =curr_indent "..") -> "") / ('\\' -> '\') / (!text_interpolation (escaped_char / '\')) / (nonterminal_quote / text_char)+)+ ~} -text_interpolation: +text_interpolation <- inline_text_interpolation / ("\" (indented_expression / indented_block) (blank_lines =curr_indent "..")?) -number (Number): +number (Number) <- (&("-"? "0x" [0-9a-fA-F]+) {:hex: '' -> 'yes' :})? (("-"? (([0-9]+ "." [0-9]+) / ("." [0-9]+) / "0x" [0-9a-fA-F]+ / ([0-9]+)))-> tonumber) -- Variables can be nameless (i.e. just %) and can only contain identifier chars. -- This ensures you don't get weird parsings of `%x+%y` or `%'s thing`. -variable (Var): "%" {ident_char*} +variable (Var) <- "%" {ident_char*} -inline_list (List): +inline_list (List) <- !('[..]') "[" ws* (inline_list_item (ws* ',' ws* inline_list_item)* (ws* ',')?)? ws* ("]" / eof / (","? (missing_bracket_error / unexpected_code))) -indented_list (List): +indented_list (List) <- "[..]" eol nl_indent list_line (nl_nodent list_line)* (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* (","? unexpected_code)? -list_line: +list_line <- (inline_list_item ws* "," ws*)+ eol / (inline_list_item ws* "," ws*)* (action / expression / inline_block / indented_block) eol -inline_list_item: inline_action / inline_expression / inline_block +inline_list_item <- inline_action / inline_expression / inline_block -inline_dict (Dict): +inline_dict (Dict) <- !('{..}') "{" ws* (inline_dict_entry (ws* ',' ws* inline_dict_entry)*)? ws* ("}" / eof / (","? (missing_brace_error / unexpected_code))) -indented_dict (Dict): +indented_dict (Dict) <- "{..}" eol nl_indent dict_line (nl_nodent dict_line)* (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* (","? unexpected_code)? -dict_line: +dict_line <- (inline_dict_entry ws* "," ws*)+ eol / (inline_dict_entry ws* "," ws*)* dict_entry eol -_dict_entry(DictEntry): +_dict_entry(DictEntry) <- dict_key (ws* ":" ws* (action / expression))? -dict_entry: +dict_entry <- _dict_entry / inline_block / indented_block -_inline_dict_entry(DictEntry): +_inline_dict_entry(DictEntry) <- dict_key (ws* ":" ws* (inline_action / inline_expression)?)? -inline_dict_entry: +inline_dict_entry <- _inline_dict_entry / inline_block -dict_key: +dict_key <- text_word / inline_expression -operator_char: ['`~!@$^&*+=|<>?/-] -ident_char: [a-zA-Z0-9_] / %utf8_char -ws: " " +operator_char <- ['`~!@$^&*+=|<>?/-] +ident_char <- [a-zA-Z0-9_] / %utf8_char +ws <- " " -escaped_char: +escaped_char <- ("\"->'') ( (([xX]->'') ((({[0-9a-fA-F]^2} %number_16) -> tonumber) -> tochar)) / ((([0-9] [0-9]^-2) -> tonumber) -> tochar) diff --git a/nomsu.5.peg b/nomsu.5.peg index ae87f71..772ca75 100644 --- a/nomsu.5.peg +++ b/nomsu.5.peg @@ -1,16 +1,16 @@ -- Nomsu version 5 -file: +file <- {:curr_indent: ' '* :} (((methodcall / action / expression / inline_block / indented_block) eol !.) / file_chunks / empty_block) {:curr_indent: %nil :} !. -shebang: "#!" (!"nomsu" [^%nl])* "nomsu" ws+ "-V" ws* [0-9.]+ [^%nl]* (%nl / !.) +shebang <- "#!" (!"nomsu" [^%nl])* "nomsu" ws+ "-V" ws* [0-9.]+ [^%nl]* (%nl / !.) -eof: !. +eof <- !. -file_chunks (FileChunks): +file_chunks (FileChunks) <- {:curr_indent: ' '* :} {:shebang: shebang :}? (top_block (nl_nodent section_division top_block)*) @@ -18,117 +18,117 @@ file_chunks (FileChunks): ws* unexpected_chunk? {:curr_indent: %nil :} -top_block (Block): +top_block (Block) <- {:curr_indent: ' '* :} comment? blank_lines? statement (nl_nodent statement)* {:curr_indent: %nil :} -empty_block (Block): +empty_block (Block) <- {:curr_indent: ' '* :} comment? blank_lines? {:curr_indent: %nil :} -nodent: (unexpected_indent [^%nl]* / =curr_indent) -indent: {~ =curr_indent (ws / (%tab -> ' '))+ ~} -blank_lines: %nl ((nodent comment / ws*) %nl)* -eol: ws* (!. / &%nl) +nodent <- (unexpected_indent [^%nl]* / =curr_indent) +indent <- {~ =curr_indent (ws / (%tab -> ' '))+ ~} +blank_lines <- %nl ((nodent comment / ws*) %nl)* +eol <- ws* (!. / &%nl) -nl_nodent: blank_lines nodent -nl_indent: blank_lines tab_error? {:curr_indent: indent :} (comment nl_nodent)* +nl_nodent <- blank_lines nodent +nl_indent <- blank_lines tab_error? {:curr_indent: indent :} (comment nl_nodent)* -comment (Comment): +comment (Comment) <- "#" {~ [^%nl]* (%nl+ (indent -> '') [^%nl]*)* (%nl &%nl)* ~} -unexpected_code: ws* _unexpected_code -_unexpected_code (Error): +unexpected_code <- ws* _unexpected_code +_unexpected_code (Error) <- {:error: {~ [^%nl]+ -> "Couldn't parse this code" ~} :} -unexpected_chunk (Error): +unexpected_chunk (Error) <- {:error: {~ .+ -> "Couldn't parse this code" ~} :} -unexpected_indent (Error): +unexpected_indent (Error) <- {:error: {~ (=curr_indent ws+) -> "Messed up indentation" ~} :} {:hint: {~ '' -> 'Either make sure this line is aligned with the one above it, or make sure the previous line ends with something that uses indentation, like ":" or "(..)"' ~} :} -missing_paren_err (Error): +missing_paren_err (Error) <- {:error: {~ eol -> 'Line ended without finding a closing )-parenthesis' ~} :} {:hint: {~ '' -> 'Put a ")" here' ~} :} -missing_quote_err (Error): +missing_quote_err (Error) <- {:error: {~ eol -> "Line ended without finding a closing quotation mark." ~} :} {:hint: {~ "" -> "Put a quotation mark here." ~} :} -missing_indented_quote_err (Error): +missing_indented_quote_err (Error) <- {:error: {~ eol -> "This text doesn't have a closing quotation mark." ~} :} {:hint: {~ "" -> "Put a quotation mark here on its own line." ~} :} -missing_bracket_error (Error): +missing_bracket_error (Error) <- {:error: {~ eol -> "Line ended before finding a closing ]-bracket" ~} :} {:hint: {~ '' -> 'Put a "]" here' ~} :} -missing_brace_error (Error): +missing_brace_error (Error) <- {:error: {~ eol -> "Line ended before finding a closing }-brace" ~} :} {:hint: {~ '' -> 'Put a "}" here' ~} :} -tab_error (Error): +tab_error (Error) <- &(=curr_indent %tab) {:error: {~ '' -> 'Tabs are not allowed for indentation.' ~} :} {:hint: {~ '' -> 'Use spaces instead of tabs.' ~} :} -section_division: ("~")^+3 eol +section_division <- ("~")^+3 eol -inline_block: +inline_block <- "(" ws* inline_block ws* (eof / ")") / raw_inline_block -raw_inline_block (Block): +raw_inline_block (Block) <- (!"::") ":" ws* ((inline_statement (ws* ";" ws* inline_statement)*) / !(eol nl_indent)) -indented_block (Block): +indented_block (Block) <- ":" eol nl_indent statement (nl_nodent statement)* (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* {:curr_indent: %nil :} -statement: +statement <- (methodcall / action / expression) (eol / unexpected_code) -inline_statement: (inline_methodcall / inline_action / inline_expression) +inline_statement <- (inline_methodcall / inline_action / inline_expression) -noindex_inline_expression: +noindex_inline_expression <- number / variable / inline_text / inline_list / inline_dict / inline_nomsu / ( "(" ws* (inline_methodcall / inline_action / inline_expression) ws* (")" / eof / missing_paren_err / unexpected_code) ) -inline_expression: index_chain / noindex_inline_expression -indented_expression: +inline_expression <- index_chain / noindex_inline_expression +indented_expression <- indented_text / indented_nomsu / indented_list / indented_dict / ({| "(..)" eol nl_indent (methodcall / action / expression) (eol / unexpected_code) (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* {:curr_indent: %nil :} |} -> unpack) -expression: +expression <- inline_expression / indented_expression -inline_nomsu (EscapedNomsu): "\" (inline_expression / inline_block) -indented_nomsu (EscapedNomsu): +inline_nomsu (EscapedNomsu) <- "\" (inline_expression / inline_block) +indented_nomsu (EscapedNomsu) <- "\" (noindex_inline_expression / inline_block / indented_expression / indented_block) -index_chain (IndexChain): +index_chain (IndexChain) <- noindex_inline_expression ("." (hex_integer / integer / text_word / noindex_inline_expression))+ -index_chain_before_method (IndexChain): +index_chain_before_method (IndexChain) <- noindex_inline_expression ("." (hex_integer / integer / text_word / noindex_inline_expression) &".")+ -- Actions need 1 argument and either another argument or a word. -inline_action (Action): +inline_action (Action) <- !section_division ( (word (ws* (inline_arg / word))*) /(inline_arg (ws* (inline_arg / word))+)) -inline_arg: inline_expression / inline_block -inline_methodcall (MethodCall): +inline_arg <- inline_expression / inline_block +inline_methodcall (MethodCall) <- (index_chain / noindex_inline_expression / "(" inline_block ")") "|" inline_action (ws* ";" ws* inline_action)* -action (Action): +action (Action) <- !section_division ( (word ((linesplit / ws*) (arg / word))*) /(arg ((linesplit / ws*) (arg / word))+)) -linesplit: (ws* "\")? eol nl_nodent ".." ws* -arg: expression / inline_block / indented_block -methodcall (MethodCall): +linesplit <- (ws* "\")? eol nl_nodent ".." ws* +arg <- expression / inline_block / indented_block +methodcall (MethodCall) <- (index_chain / noindex_inline_expression / indented_expression / "(" inline_block ")" / indented_block) linesplit? "|" ((ws* inline_action ws* ";")* ws* action @@ -136,17 +136,17 @@ methodcall (MethodCall): (action eol) (nl_nodent action eol)* (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))*) -word: !number { operator_char+ / ident_char+ } +word <- !number { operator_char+ / ident_char+ } -text_word (Text): word +text_word (Text) <- word -inline_text (Text): +inline_text (Text) <- !(indented_text) '"' _inline_text* ('"' / eof / missing_quote_err / unexpected_code) -_inline_text: +_inline_text <- {~ (('\"' -> '"') / ('\\' -> '\') / escaped_char / text_char+)+ ~} / inline_text_interpolation / illegal_char -inline_text_interpolation: +inline_text_interpolation <- "\" ( variable / inline_list / inline_dict / ("(" @@ -154,91 +154,91 @@ inline_text_interpolation: (")" / eof / missing_paren_err / unexpected_code)) ) -text_char: %utf8_char / !["\] %print / %tab -illegal_char (Error): +text_char <- %utf8_char / !["\] %print / %tab +illegal_char (Error) <- {:error: {~ (!(%nl / %tab / %print) .) -> "Illegal unprintable character here (it may not be visible, but it's there)" ~} :} {:hint: {~ '' -> "This sort of thing can happen when copying and pasting code. Try deleting and retyping the code." ~} :} -terminal_quote: '"' !([^%nl] / (%nl (ws* eol)?)+ =curr_indent [^%nl]) -nonterminal_quote: !terminal_quote '"' -indented_text (Text): +terminal_quote <- '"' !([^%nl] / (%nl (ws* eol)?)+ =curr_indent [^%nl]) +nonterminal_quote <- !terminal_quote '"' +indented_text (Text) <- '"' %nl {%nl*} {:curr_indent: indent :} (indented_plain_text / text_interpolation / illegal_char / blank_text_lines)* (terminal_quote eol / eof / missing_indented_quote_err) {:curr_indent: %nil :} -- Tracking text-lines-within-indented-text as separate objects allows for better debugging line info -indented_plain_text (Text): +indented_plain_text (Text) <- {~ ((("\" blank_lines =curr_indent "..") -> "") / ('\\' -> '\') / (!text_interpolation ((!("\n") escaped_char) / '\')) / (nonterminal_quote / text_char)+)+ blank_text_lines? ~} -blank_text_lines: +blank_text_lines <- {~ (%nl ((ws* -> '') eol / (=curr_indent -> '') &[^%nl]))+ ~} -text_interpolation: +text_interpolation <- ("\" indented_expression (blank_lines =curr_indent "..")?) / inline_text_interpolation -number: +number <- hex_integer / real_number / integer -integer (Number): +integer (Number) <- (("-"? [0-9]+)-> tonumber) -hex_integer (Number): +hex_integer (Number) <- (("-"? "0x" [0-9a-fA-F]+)-> tonumber) {:hex: '' -> 'yes' :} -real_number (Number): +real_number (Number) <- (("-"? ([0-9]+ "." [0-9]+) / ("." [0-9]+))-> tonumber) -variable (Var): "$" ({ident_char+} / "(" {(ws+ / operator_char+ / ident_char+)*} ")" / {''}) +variable (Var) <- "$" ({ident_char+} / "(" {(ws+ / operator_char+ / ident_char+)*} ")" / {''}) -inline_list (List): +inline_list (List) <- !('[..]') "[" ws* (inline_list_item (ws* ',' ws* inline_list_item)* (ws* ',')?)? ws* ("]" / eof / (","? (missing_bracket_error / unexpected_code))) -indented_list (List): +indented_list (List) <- "[..]" eol nl_indent list_line (nl_nodent list_line)* (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* (","? unexpected_code)? -list_line: +list_line <- (inline_list_item ws* "," ws*)+ eol / (inline_list_item ws* "," ws*)* (methodcall / action / expression / inline_block / indented_block) eol -inline_list_item: inline_methodcall / inline_action / inline_expression / inline_block +inline_list_item <- inline_methodcall / inline_action / inline_expression / inline_block -inline_dict (Dict): +inline_dict (Dict) <- !('{..}') "{" ws* (inline_dict_entry (ws* ',' ws* inline_dict_entry)*)? ws* ("}" / eof / (","? (missing_brace_error / unexpected_code))) -indented_dict (Dict): +indented_dict (Dict) <- "{..}" eol nl_indent dict_line (nl_nodent dict_line)* (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* (","? unexpected_code)? -dict_line: +dict_line <- (inline_dict_entry ws* "," ws*)+ eol / (inline_dict_entry ws* "," ws*)* dict_entry eol -_dict_entry(DictEntry): +_dict_entry(DictEntry) <- dict_key (ws* ":" ws* (methodcall / action / expression))? -dict_entry: +dict_entry <- _dict_entry / inline_block / indented_block -_inline_dict_entry(DictEntry): +_inline_dict_entry(DictEntry) <- dict_key (ws* ":" ws* (inline_methodcall / inline_action / inline_expression)?)? -inline_dict_entry: +inline_dict_entry <- _inline_dict_entry / inline_block -dict_key: +dict_key <- text_word / inline_expression -operator_char: [#'`~@^&*+=|<>?/%!-] -ident_char: [a-zA-Z0-9_] / %utf8_char -ws: " " +operator_char <- [#'`~@^&*+=|<>?/%!-] +ident_char <- [a-zA-Z0-9_] / %utf8_char +ws <- " " -escaped_char: +escaped_char <- ("\"->'') ( (([xX]->'') ((({[0-9a-fA-F]^2} %number_16) -> tonumber) -> tochar)) / ((([0-9] [0-9]^-2) -> tonumber) -> tochar) diff --git a/nomsu_compiler.lua b/nomsu_compiler.lua index 7fd9551..9b8fc87 100644 --- a/nomsu_compiler.lua +++ b/nomsu_compiler.lua @@ -150,9 +150,6 @@ local compile = setmetatable({ ["use 1 with prefix"] = function(compile, path, prefix) return LuaCode("run_file_1_in(" .. tostring(compile(path)) .. ", _ENV, OPTIMIZATION, ", compile(prefix), ")") end, - ["tests"] = function(compile) - return LuaCode("TESTS") - end, ["test"] = function(compile, body) if not (body.type == 'Block') then compile_error(body, "This should be a Block") @@ -355,10 +352,12 @@ local compile = setmetatable({ string_buffer = "" end local bit_lua = compile(bit) - if bit.type == "Block" then - bit_lua = LuaCode:from(bit.source, "(function()", "\n local _buffer = List{}", "\n local function add(bit) _buffer:add(bit) end", "\n local function join_with(glue) _buffer = _buffer:joined_with(glue) end", "\n ", bit_lua, "\n if lua_type_of(_buffer) == 'table' then _buffer = _buffer:joined() end", "\n return _buffer", "\nend)()") + if bit.type == "Block" and #bit == 1 then + bit = bit[1] end - if bit.type ~= "Text" then + if bit.type == "Block" then + bit_lua = LuaCode:from(bit.source, "List(function(add)", "\n ", bit_lua, "\nend):joined()") + elseif bit.type ~= "Text" and bit.type ~= "Number" then bit_lua = LuaCode:from(bit.source, "tostring(", bit_lua, ")") end add_bit(bit_lua) @@ -382,82 +381,91 @@ local compile = setmetatable({ end return lua elseif "List" == _exp_0 or "Dict" == _exp_0 then + if #tree == 0 then + return LuaCode:from(tree.source, tree.type, "{}") + end local lua = LuaCode:from(tree.source) + local chunks = 0 local i = 1 - local sep = '' - while i <= #tree do - local item = tree[i] - if item.type == "Block" then - break - end - lua:add(sep) - if item.type == "Comment" then - lua:add(compile(item), "\n") - sep = '' - else - local item_lua = compile(item) - lua:add(item_lua) - sep = ', ' - end - i = i + 1 - end - if lua:is_multiline() then - lua = LuaCode:from(tree.source, tostring(tree.type) .. "{\n ", lua, "\n}") - else - lua = LuaCode:from(tree.source, tostring(tree.type) .. "{", lua, "}") - end - if i <= #tree then - lua = LuaCode:from(tree.source, "(function()\n local comprehension = ", lua) - if tree.type == "List" then - lua:add("\n local function add(x) comprehension[#comprehension+1] = x end") - else - lua:add("\n local function " .. tostring(("add 1 ="):as_lua_id()) .. "(k, v) comprehension[k] = v end") - end - while i <= #tree do - lua:add("\n ") - if tree[i].type == 'Block' or tree[i].type == 'Comment' then - lua:add(compile(tree[i])) - elseif tree[i].type == "DictEntry" then - local entry_lua = compile(tree[i]) - lua:add((entry_lua:text():sub(1, 1) == '[' and "comprehension" or "comprehension."), entry_lua) - else - lua:add("comprehension[#comprehension+1] = ", compile(tree[i])) + while tree[i] do + if tree[i].type == 'Block' then + if chunks > 0 then + lua:add(" + ") end + lua:add(tree.type, "(function(", (tree.type == 'List' and "add" or ("add, " .. ("add 1 ="):as_lua_id())), ")") + lua:add("\n ", compile(tree[i]), "\nend)") + chunks = chunks + 1 i = i + 1 + else + if chunks > 0 then + lua:add(" + ") + end + local sep = '' + local items_lua = LuaCode:from(tree[i].source) + while tree[i] do + if tree[i].type == "Block" then + break + end + local item_lua = compile(tree[i]) + if item_lua:text():match("^%.[a-zA-Z_]") then + item_lua = item_lua:text():sub(2) + end + if tree.type == 'Dict' and tree[i].type == 'Index' then + item_lua = LuaCode:from(tree[i].source, item_lua, "=true") + end + items_lua:add(sep, item_lua) + if tree[i].type == "Comment" then + items_lua:add("\n") + sep = '' + else + sep = ', ' + end + i = i + 1 + end + if items_lua:is_multiline() then + lua:add(LuaCode:from(items_lua.source, tree.type, "{\n ", items_lua, "\n}")) + else + lua:add(LuaCode:from(items_lua.source, tree.type, "{", items_lua, "}")) + end + chunks = chunks + 1 end - lua:add("\n return comprehension\nend)()") end return lua - elseif "DictEntry" == _exp_0 then - local key, value = tree[1], tree[2] - local key_lua = compile(key) - local value_lua = value and compile(value) or LuaCode:from(key.source, "true") - local key_str = match(key_lua:text(), [=[^["']([a-zA-Z_][a-zA-Z0-9_]*)['"]$]=]) + elseif "Index" == _exp_0 then + local key_lua = compile(tree[1]) + local key_str = match(key_lua:text(), '^"([a-zA-Z_][a-zA-Z0-9_]*)"$') if key_str and key_str:is_lua_id() then - return LuaCode:from(tree.source, key_str, "=", value_lua) + return LuaCode:from(tree.source, ".", key_str) elseif sub(key_lua:text(), 1, 1) == "[" then - return LuaCode:from(tree.source, "[ ", key_lua, "]=", value_lua) + return LuaCode:from(tree.source, "[ ", key_lua, "]") else - return LuaCode:from(tree.source, "[", key_lua, "]=", value_lua) + return LuaCode:from(tree.source, "[", key_lua, "]") end + elseif "DictEntry" == _exp_0 then + local key = tree[1] + if key.type ~= "Index" then + key = SyntaxTree({ + type = "Index", + source = key.source, + key + }) + end + return LuaCode:from(tree.source, compile(key), "=", (tree[2] and compile(tree[2]) or "true")) elseif "IndexChain" == _exp_0 then local lua = compile(tree[1]) - local first_char = sub(lua:text(), 1, 1) - if first_char == "{" or first_char == '"' or first_char == "[" then + if lua:text():match("['\"}]$") or lua:text():match("]=*]$") then lua:parenthesize() end for i = 2, #tree do local key = tree[i] - local key_lua = compile(key) - local key_lua_str = key_lua:text() - local lua_id = match(key_lua_str, "^['\"]([a-zA-Z_][a-zA-Z0-9_]*)['\"]$") - if lua_id and lua_id:is_lua_id() then - lua:add("." .. tostring(lua_id)) - elseif sub(key_lua_str, 1, 1) == '[' then - lua:add("[ ", key_lua, " ]") - else - lua:add("[", key_lua, "]") + if key.type ~= "Index" then + key = SyntaxTree({ + type = "Index", + source = key.source, + key + }) end + lua:add(compile(key)) end return lua elseif "Number" == _exp_0 then diff --git a/nomsu_compiler.moon b/nomsu_compiler.moon index bffec92..12a7529 100644 --- a/nomsu_compiler.moon +++ b/nomsu_compiler.moon @@ -122,7 +122,6 @@ compile = setmetatable({ ["use 1 with prefix"]: (compile, path, prefix)-> LuaCode("run_file_1_in(#{compile(path)}, _ENV, OPTIMIZATION, ", compile(prefix), ")") - ["tests"]: (compile)-> LuaCode("TESTS") ["test"]: (compile, body)-> unless body.type == 'Block' compile_error(body, "This should be a Block") @@ -150,6 +149,7 @@ compile = setmetatable({ lua\add tok else tok_lua = compile(tok) + -- TODO: this is overly eager, should be less aggressive tok_lua\parenthesize! if tok.type == "Action" lua\add tok_lua lua\add " " if i < #tree @@ -271,16 +271,13 @@ compile = setmetatable({ string_buffer = "" bit_lua = compile(bit) + if bit.type == "Block" and #bit == 1 + bit = bit[1] if bit.type == "Block" - bit_lua = LuaCode\from bit.source, "(function()", - "\n local _buffer = List{}", - "\n local function add(bit) _buffer:add(bit) end", - "\n local function join_with(glue) _buffer = _buffer:joined_with(glue) end", + bit_lua = LuaCode\from bit.source, "List(function(add)", "\n ", bit_lua, - "\n if lua_type_of(_buffer) == 'table' then _buffer = _buffer:joined() end", - "\n return _buffer", - "\nend)()" - if bit.type != "Text" + "\nend):joined()" + elseif bit.type != "Text" and bit.type != "Number" bit_lua = LuaCode\from(bit.source, "tostring(",bit_lua,")") add_bit bit_lua @@ -296,84 +293,75 @@ compile = setmetatable({ return lua when "List", "Dict" + if #tree == 0 + return LuaCode\from tree.source, tree.type, "{}" + lua = LuaCode\from tree.source + chunks = 0 i = 1 - sep = '' - while i <= #tree - item = tree[i] - if item.type == "Block" - break - lua\add sep - if item.type == "Comment" - lua\add compile(item), "\n" - sep = '' - else - item_lua = compile(item) - lua\add item_lua - sep = ', ' - i += 1 - - if lua\is_multiline! - lua = LuaCode\from tree.source, "#{tree.type}{\n ", lua, "\n}" - else - lua = LuaCode\from tree.source, "#{tree.type}{", lua, "}" - - -- List/dict comprehenstion - if i <= #tree - lua = LuaCode\from tree.source, "(function()\n local comprehension = ", lua - if tree.type == "List" - lua\add "\n local function add(x) comprehension[#comprehension+1] = x end" - else - lua\add "\n local function #{("add 1 =")\as_lua_id!}(k, v) comprehension[k] = v end" - while i <= #tree - lua\add "\n " - if tree[i].type == 'Block' or tree[i].type == 'Comment' - lua\add compile(tree[i]) - elseif tree[i].type == "DictEntry" - entry_lua = compile(tree[i]) - lua\add (entry_lua\text!\sub(1,1) == '[' and "comprehension" or "comprehension."), entry_lua - else - lua\add "comprehension[#comprehension+1] = ", compile(tree[i]) + while tree[i] + if tree[i].type == 'Block' + lua\add " + " if chunks > 0 + lua\add tree.type, "(function(", (tree.type == 'List' and "add" or ("add, "..("add 1 =")\as_lua_id!)), ")" + lua\add "\n ", compile(tree[i]), "\nend)" + chunks += 1 i += 1 - lua\add "\n return comprehension\nend)()" + else + lua\add " + " if chunks > 0 + sep = '' + items_lua = LuaCode\from tree[i].source + while tree[i] + if tree[i].type == "Block" + break + item_lua = compile tree[i] + if item_lua\text!\match("^%.[a-zA-Z_]") + item_lua = item_lua\text!\sub(2) + if tree.type == 'Dict' and tree[i].type == 'Index' + item_lua = LuaCode\from tree[i].source, item_lua, "=true" + items_lua\add sep, item_lua + if tree[i].type == "Comment" + items_lua\add "\n" + sep = '' + else + sep = ', ' + i += 1 + if items_lua\is_multiline! + lua\add LuaCode\from items_lua.source, tree.type, "{\n ", items_lua, "\n}" + else + lua\add LuaCode\from items_lua.source, tree.type, "{", items_lua, "}" + chunks += 1 return lua - when "DictEntry" - key, value = tree[1], tree[2] - key_lua = compile(key) - value_lua = value and compile(value) or LuaCode\from(key.source, "true") - key_str = match(key_lua\text!, [=[^["']([a-zA-Z_][a-zA-Z0-9_]*)['"]$]=]) + when "Index" + key_lua = compile(tree[1]) + key_str = match(key_lua\text!, '^"([a-zA-Z_][a-zA-Z0-9_]*)"$') return if key_str and key_str\is_lua_id! - LuaCode\from tree.source, key_str,"=",value_lua + LuaCode\from tree.source, ".", key_str elseif sub(key_lua\text!,1,1) == "[" -- NOTE: this *must* use a space after the [ to avoid freaking out -- Lua's parser if the inner expression is a long string. Lua -- parses x[[[y]]] as x("[y]"), not as x["y"] - LuaCode\from tree.source, "[ ",key_lua,"]=",value_lua + LuaCode\from tree.source, "[ ",key_lua,"]" else - LuaCode\from tree.source, "[",key_lua,"]=",value_lua + LuaCode\from tree.source, "[",key_lua,"]" + + when "DictEntry" + key = tree[1] + if key.type != "Index" + key = SyntaxTree{type:"Index", source:key.source, key} + return LuaCode\from tree.source, compile(key),"=",(tree[2] and compile(tree[2]) or "true") when "IndexChain" lua = compile(tree[1]) - first_char = sub(lua\text!,1,1) - if first_char == "{" or first_char == '"' or first_char == "[" + if lua\text!\match("['\"}]$") or lua\text!\match("]=*]$") lua\parenthesize! - for i=2,#tree key = tree[i] - key_lua = compile(key) - key_lua_str = key_lua\text! - lua_id = match(key_lua_str, "^['\"]([a-zA-Z_][a-zA-Z0-9_]*)['\"]$") - if lua_id and lua_id\is_lua_id! - lua\add ".#{lua_id}" - elseif sub(key_lua_str,1,1) == '[' - -- NOTE: this *must* use a space after the [ to avoid freaking out - -- Lua's parser if the inner expression is a long string. Lua - -- parses x[[[y]]] as x("[y]"), not as x["y"] - lua\add "[ ",key_lua," ]" - else - lua\add "[",key_lua,"]" + -- TODO: remove this shim + if key.type != "Index" + key = SyntaxTree{type:"Index", source:key.source, key} + lua\add compile(key) return lua when "Number" diff --git a/nomsu_decompiler.lua b/nomsu_decompiler.lua index c3b7cb2..ef5b8c7 100644 --- a/nomsu_decompiler.lua +++ b/nomsu_decompiler.lua @@ -14,7 +14,7 @@ local re = require('re') local MAX_LINE = 80 local GOLDEN_RATIO = ((math.sqrt(5) - 1) / 2) local utf8_char_patt = (R("\194\223") * R("\128\191") + R("\224\239") * R("\128\191") * R("\128\191") + R("\240\244") * R("\128\191") * R("\128\191") * R("\128\191")) -local operator_patt = S("'`~!@$^&*+=|<>?/-") ^ 1 * -1 +local operator_patt = S("'`~!@%#^&*+=|<>?/-") ^ 1 * -1 local identifier_patt = (R("az", "AZ", "09") + P("_") + utf8_char_patt) ^ 1 * -1 local is_operator is_operator = function(s) @@ -73,11 +73,15 @@ tree_to_inline_nomsu = function(tree) arg_nomsu:parenthesize() end else - if i > 1 then - nomsu:add(" ") - end if bit.type == "Action" or bit.type == "MethodCall" then + if i > 1 then + nomsu:add(" ") + end arg_nomsu:parenthesize() + else + if i > 1 then + nomsu:add(" ") + end end end nomsu:add(arg_nomsu) @@ -86,10 +90,10 @@ tree_to_inline_nomsu = function(tree) return nomsu elseif "MethodCall" == _exp_0 then local target_nomsu = tree_to_inline_nomsu(tree[1]) - if tree[1].type == "Action" or tree[1].type == "MethodCall" or tree[1].type == "Block" then + if tree[1].type == "Block" then target_nomsu:parenthesize() end - local nomsu = NomsuCode:from(tree.source, target_nomsu, "|") + local nomsu = NomsuCode:from(tree.source, target_nomsu, ", ") for i = 2, #tree do if i > 2 then nomsu:add("; ") @@ -142,30 +146,47 @@ tree_to_inline_nomsu = function(tree) if i > 1 then nomsu:add(", ") end - nomsu:add(tree_to_inline_nomsu(item)) + local item_nomsu = tree_to_inline_nomsu(item, true) + if item.type == "MethodCall" then + item_nomsu:parenthesize() + end + nomsu:add(item_nomsu) end nomsu:add(tree.type == "List" and "]" or "}") return nomsu elseif "DictEntry" == _exp_0 then local key, value = tree[1], tree[2] - local nomsu - if key.type == "Text" and #key == 1 and is_identifier(key[1]) then - nomsu = NomsuCode:from(key.source, key[1]) - else - nomsu = tree_to_inline_nomsu(key) - end - if key.type == "Action" or key.type == "MethodCall" or key.type == "Block" then - nomsu:parenthesize() + local nomsu = NomsuCode:from(tree.source) + if key.type ~= "Index" then + key = { + type = "Index", + source = key.source, + key + } end + nomsu:add(tree_to_inline_nomsu(key)) if value then - nomsu:add(": ") + nomsu:add(" = ") local value_nomsu = tree_to_inline_nomsu(value) - if value.type == "Block" then + if value.type == "Block" or value.type == "Action" or value.type == "MethodCall" then value_nomsu:parenthesize() end nomsu:add(value_nomsu) end return nomsu + elseif "Index" == _exp_0 then + local key = tree[1] + local nomsu = NomsuCode:from(key.source, ".") + local key_nomsu + if key.type == "Text" and #key == 1 and is_identifier(key[1]) then + key_nomsu = key[1] + else + key_nomsu = tree_to_inline_nomsu(key) + end + if key.type == "Block" or key.type == "Action" or key.type == "MethodCall" then + key_nomsu:parenthesize() + end + return NomsuCode:from(key.source, ".", key_nomsu) elseif "IndexChain" == _exp_0 then local nomsu = NomsuCode:from(tree.source) for i, bit in ipairs(tree) do @@ -240,7 +261,7 @@ tree_to_nomsu = function(tree) if try_inline then inline_nomsu = tree_to_inline_nomsu(t) if #inline_nomsu:text() <= space or #inline_nomsu:text() <= 8 then - if t.type == "Action" or t.type == "MethodCall" then + if (t.type == "Action" or t.type == "MethodCall") then inline_nomsu:parenthesize() end return inline_nomsu @@ -252,7 +273,7 @@ tree_to_nomsu = function(tree) local indented = tree_to_nomsu(t) if t.type == "Action" or t.type == "MethodCall" then if indented:is_multiline() then - return NomsuCode:from(t.source, "(..)\n ", indented) + return NomsuCode:from(t.source, "(\n ", indented, "\n)") else indented:parenthesize() end @@ -300,7 +321,7 @@ tree_to_nomsu = function(tree) local words = table.concat(word_buffer) if next_space == " " then if nomsu:trailing_line_len() + #words > MAX_LINE and nomsu:trailing_line_len() > 8 then - next_space = " \\\n.." + next_space = "\n.." elseif word_buffer[1] == "'" then next_space = "" end @@ -311,23 +332,24 @@ tree_to_nomsu = function(tree) end num_args = num_args + 1 local bit_nomsu = recurse(bit) - if bit.type == "Block" and not bit_nomsu:is_multiline() then - if #bit_nomsu:text() > nomsu:trailing_line_len() * GOLDEN_RATIO and #bit_nomsu:text() > 8 then + if bit.type == "Block" then + if not bit_nomsu:is_multiline() and #bit_nomsu:text() > nomsu:trailing_line_len() * GOLDEN_RATIO and #bit_nomsu:text() > 8 then + bit_nomsu = tree_to_nomsu(bit) + end + elseif (not bit_nomsu:is_multiline() and nomsu:trailing_line_len() + #bit_nomsu:text() > MAX_LINE and nomsu:trailing_line_len() > 8) then + if next_space == " " and #bit_nomsu:text() < MAX_LINE then + next_space = "\n.." + elseif bit.type == 'Action' or bit.type == "MethodCall" then + bit_nomsu = NomsuCode:from(bit.source, "(\n ", tree_to_nomsu(bit), ")") + else bit_nomsu = tree_to_nomsu(bit) end end - if (next_space == " " and not bit_nomsu:is_multiline() and nomsu:trailing_line_len() + #bit_nomsu:text() > MAX_LINE and nomsu:trailing_line_len() > 8) then - if bit.type == 'Action' or bit.type == "MethodCall" then - bit_nomsu = NomsuCode:from(bit.source, "(..)\n ", tree_to_nomsu(bit)) - else - next_space = " \\\n.." - end - end - if not (next_space == " " and bit.type == "Block") then + if not (next_space == " " and bit_nomsu:text():match("^:")) then nomsu:add(next_space) end nomsu:add(bit_nomsu) - next_space = (bit_nomsu:is_multiline() or bit.type == 'Block') and "\n.." or " " + next_space = (bit.type == 'Block' and bit_nomsu:is_multiline()) and "\n.." or " " _continue_0 = true until true if not _continue_0 then @@ -338,7 +360,7 @@ tree_to_nomsu = function(tree) local words = table.concat(word_buffer) if next_space == " " then if nomsu:trailing_line_len() + #words > MAX_LINE and nomsu:trailing_line_len() > 8 then - next_space = " \\\n.." + next_space = "\n.." elseif word_buffer[1] == "'" then next_space = "" end @@ -352,8 +374,7 @@ tree_to_nomsu = function(tree) if tree[1].type == "Block" and not target_nomsu:is_multiline() then target_nomsu:parenthesize() end - nomsu:add(target_nomsu) - nomsu:add(target_nomsu:is_multiline() and "\n..|" or "|") + nomsu:add(target_nomsu, ", ") local inner_nomsu = NomsuCode() for i = 2, #tree do if i > 2 then @@ -391,7 +412,7 @@ tree_to_nomsu = function(tree) end return NomsuCode:from(tree.source, ":\n ", nomsu) elseif "Text" == _exp_0 then - local max_line = math.floor(1.25 * MAX_LINE) + local max_line = MAX_LINE + 8 local add_text add_text = function(tree) for i, bit in ipairs(tree) do @@ -399,6 +420,9 @@ tree_to_nomsu = function(tree) bit = escape(bit) for j, line in ipairs(bit:lines()) do if j > 1 then + if nomsu:text():match(" $") then + nomsu:add("\\;") + end nomsu:add("\n") elseif #line > 10 and nomsu:trailing_line_len() > max_line then nomsu:add("\\\n..") @@ -439,14 +463,17 @@ tree_to_nomsu = function(tree) end end nomsu:add(interp_nomsu) - if interp_nomsu:is_multiline() then + if interp_nomsu:is_multiline() and bit.type == "Block" then nomsu:add("\n..") end end end end add_text(tree) - return NomsuCode:from(tree.source, '"\n ', nomsu, '"') + if nomsu:text():match(" $") then + nomsu:add("\\;") + end + return NomsuCode:from(tree.source, '"\n ', nomsu, '\n"') elseif "List" == _exp_0 or "Dict" == _exp_0 then if #tree == 0 then nomsu:add(tree.type == "List" and "[]" or "{}") @@ -454,13 +481,19 @@ tree_to_nomsu = function(tree) end local sep = '' for i, item in ipairs(tree) do - local item_nomsu = tree_to_inline_nomsu(item) - if #item_nomsu:text() > MAX_LINE then + local item_nomsu + if item.type == 'MethodCall' then item_nomsu = recurse(item) - end - if item.type == 'Comment' then - sep = '\n' + elseif item.type == 'Comment' then item_nomsu = tree_to_nomsu(item) + if i > 1 then + sep = '\n' + end + else + item_nomsu = tree_to_inline_nomsu(item) + if #item_nomsu:text() > MAX_LINE then + item_nomsu = recurse(item) + end end nomsu:add(sep) nomsu:add(item_nomsu) @@ -471,32 +504,33 @@ tree_to_nomsu = function(tree) end end if tree.type == "List" then - return NomsuCode:from(tree.source, "[..]\n ", nomsu) + return NomsuCode:from(tree.source, "[\n ", nomsu, "\n]") else - return NomsuCode:from(tree.source, "{..}\n ", nomsu) + return NomsuCode:from(tree.source, "{\n ", nomsu, "\n}") end elseif "DictEntry" == _exp_0 then local key, value = tree[1], tree[2] - if key.type == "Text" and #key == 1 and is_identifier(key[1]) then - nomsu = NomsuCode:from(key.source, key[1]) - else - nomsu = tree_to_inline_nomsu(key) - end - if key.type == "Block" then - nomsu:parenthesize() + nomsu = NomsuCode:from(tree.source) + if key.type ~= "Index" then + key = { + type = "Index", + source = key.source, + key + } end + nomsu:add(tree_to_nomsu(key)) if value then local value_nomsu = tree_to_nomsu(value) - if (value.type == "Block" or value.type == "EscapedNomsu") and not value_nomsu:is_multiline() then + if (value.type == "Block" or value.type == "Action" or value.type == "MethodCall") and not value_nomsu:is_multiline() then value_nomsu:parenthesize() end - nomsu:add(": ", value_nomsu) + nomsu:add(" = ", value_nomsu) end return nomsu elseif "Comment" == _exp_0 then nomsu:add("#", (tree[1]:gsub("\n", "\n "))) return nomsu - elseif "IndexChain" == _exp_0 or "Number" == _exp_0 or "Var" == _exp_0 or "Comment" == _exp_0 or "Error" == _exp_0 then + elseif "IndexChain" == _exp_0 or "Index" == _exp_0 or "Number" == _exp_0 or "Var" == _exp_0 or "Comment" == _exp_0 or "Error" == _exp_0 then return tree_to_inline_nomsu(tree) else return error("Unknown type: " .. tostring(tree.type)) diff --git a/nomsu_decompiler.moon b/nomsu_decompiler.moon index f8c2aa3..e16c20e 100644 --- a/nomsu_decompiler.moon +++ b/nomsu_decompiler.moon @@ -11,7 +11,7 @@ utf8_char_patt = ( R("\194\223")*R("\128\191") + R("\224\239")*R("\128\191")*R("\128\191") + R("\240\244")*R("\128\191")*R("\128\191")*R("\128\191")) -operator_patt = S("'`~!@$^&*+=|<>?/-")^1 * -1 +operator_patt = S("'`~!@%#^&*+=|<>?/-")^1 * -1 identifier_patt = (R("az","AZ","09") + P("_") + utf8_char_patt)^1 * -1 is_operator = (s)-> @@ -50,17 +50,19 @@ tree_to_inline_nomsu = (tree)-> unless i == #tree arg_nomsu\parenthesize! else - nomsu\add " " if i > 1 if bit.type == "Action" or bit.type == "MethodCall" + nomsu\add " " if i > 1 arg_nomsu\parenthesize! + else + nomsu\add " " if i > 1 nomsu\add arg_nomsu return nomsu when "MethodCall" target_nomsu = tree_to_inline_nomsu(tree[1]) - if tree[1].type == "Action" or tree[1].type == "MethodCall" or tree[1].type == "Block" + if tree[1].type == "Block" target_nomsu\parenthesize! - nomsu = NomsuCode\from(tree.source, target_nomsu, "|") + nomsu = NomsuCode\from(tree.source, target_nomsu, ", ") for i=2,#tree nomsu\add "; " if i > 2 nomsu\add tree_to_inline_nomsu(tree[i]) @@ -103,23 +105,39 @@ tree_to_inline_nomsu = (tree)-> nomsu = NomsuCode\from(tree.source, (tree.type == "List" and "[" or "{")) for i, item in ipairs tree nomsu\add ", " if i > 1 - nomsu\add tree_to_inline_nomsu(item) + item_nomsu = tree_to_inline_nomsu(item, true) + --if item.type == "Block" or item.type == "Action" or item.type == "MethodCall" + -- item_nomsu\parenthesize! + if item.type == "MethodCall" + item_nomsu\parenthesize! + nomsu\add item_nomsu nomsu\add(tree.type == "List" and "]" or "}") return nomsu when "DictEntry" key, value = tree[1], tree[2] - nomsu = if key.type == "Text" and #key == 1 and is_identifier(key[1]) - NomsuCode\from(key.source, key[1]) - else tree_to_inline_nomsu(key) - nomsu\parenthesize! if key.type == "Action" or key.type == "MethodCall" or key.type == "Block" + nomsu = NomsuCode\from(tree.source) + -- TODO: remove shim + if key.type != "Index" + key = {type:"Index", source:key.source, key} + nomsu\add tree_to_inline_nomsu(key) if value - nomsu\add ": " + nomsu\add " = " value_nomsu = tree_to_inline_nomsu(value) - value_nomsu\parenthesize! if value.type == "Block" + value_nomsu\parenthesize! if value.type == "Block" or value.type == "Action" or value.type == "MethodCall" nomsu\add value_nomsu return nomsu + when "Index" + key = tree[1] + nomsu = NomsuCode\from(key.source, ".") + key_nomsu = if key.type == "Text" and #key == 1 and is_identifier(key[1]) + key[1] + else + tree_to_inline_nomsu(key) + key_nomsu\parenthesize! if key.type == "Block" or key.type == "Action" or key.type == "MethodCall" + return NomsuCode\from(key.source, ".", key_nomsu) + when "IndexChain" nomsu = NomsuCode\from(tree.source) for i, bit in ipairs tree @@ -183,7 +201,7 @@ tree_to_nomsu = (tree)-> if try_inline inline_nomsu = tree_to_inline_nomsu(t) if #inline_nomsu\text! <= space or #inline_nomsu\text! <= 8 - if t.type == "Action" or t.type == "MethodCall" + if (t.type == "Action" or t.type == "MethodCall") inline_nomsu\parenthesize! return inline_nomsu if t.type == "Text" and #inline_nomsu\text! + 2 < MAX_LINE @@ -191,8 +209,9 @@ tree_to_nomsu = (tree)-> indented = tree_to_nomsu(t) if t.type == "Action" or t.type == "MethodCall" if indented\is_multiline! - return NomsuCode\from(t.source, "(..)\n ", indented) - else indented\parenthesize! + return NomsuCode\from(t.source, "(\n ", indented, "\n)") + else + indented\parenthesize! if inline_nomsu and indented\text!\match("^[^\n]*\n[^\n]*$") and nomsu\trailing_line_len! <= 8 return inline_nomsu return indented @@ -227,7 +246,7 @@ tree_to_nomsu = (tree)-> words = table.concat(word_buffer) if next_space == " " if nomsu\trailing_line_len! + #words > MAX_LINE and nomsu\trailing_line_len! > 8 - next_space = " \\\n.." + next_space = "\n.." elseif word_buffer[1] == "'" next_space = "" nomsu\add next_space, words @@ -236,30 +255,32 @@ tree_to_nomsu = (tree)-> num_args += 1 bit_nomsu = recurse(bit) - if bit.type == "Block" and not bit_nomsu\is_multiline! + if bit.type == "Block" -- Rule of thumb: nontrivial one-liner block arguments should be no more -- than golden ratio * the length of the proceeding part of the line - if #bit_nomsu\text! > nomsu\trailing_line_len! * GOLDEN_RATIO and #bit_nomsu\text! > 8 + if not bit_nomsu\is_multiline! and #bit_nomsu\text! > nomsu\trailing_line_len! * GOLDEN_RATIO and #bit_nomsu\text! > 8 bit_nomsu = tree_to_nomsu(bit) - - if (next_space == " " and not bit_nomsu\is_multiline! and + elseif (not bit_nomsu\is_multiline! and nomsu\trailing_line_len! + #bit_nomsu\text! > MAX_LINE and nomsu\trailing_line_len! > 8) - if bit.type == 'Action' or bit.type == "MethodCall" - bit_nomsu = NomsuCode\from bit.source, "(..)\n ", tree_to_nomsu(bit) + if next_space == " " and #bit_nomsu\text! < MAX_LINE + next_space = "\n.." + elseif bit.type == 'Action' or bit.type == "MethodCall" + bit_nomsu = NomsuCode\from bit.source, "(\n ", tree_to_nomsu(bit), ")" else - next_space = " \\\n.." - unless next_space == " " and bit.type == "Block" + bit_nomsu = tree_to_nomsu(bit) + + unless next_space == " " and bit_nomsu\text!\match("^:") nomsu\add next_space nomsu\add bit_nomsu - next_space = (bit_nomsu\is_multiline! or bit.type == 'Block') and "\n.." or " " + next_space = (bit.type == 'Block' and bit_nomsu\is_multiline!) and "\n.." or " " if #word_buffer > 0 words = table.concat(word_buffer) if next_space == " " if nomsu\trailing_line_len! + #words > MAX_LINE and nomsu\trailing_line_len! > 8 - next_space = " \\\n.." + next_space = "\n.." elseif word_buffer[1] == "'" next_space = "" nomsu\add next_space, words @@ -271,8 +292,7 @@ tree_to_nomsu = (tree)-> target_nomsu = recurse(tree[1]) if tree[1].type == "Block" and not target_nomsu\is_multiline! target_nomsu\parenthesize! - nomsu\add target_nomsu - nomsu\add(target_nomsu\is_multiline! and "\n..|" or "|") + nomsu\add target_nomsu, ", " inner_nomsu = NomsuCode! for i=2,#tree inner_nomsu\add "\n" if i > 2 @@ -307,13 +327,15 @@ tree_to_nomsu = (tree)-> when "Text" -- Multi-line text has more generous wrap margins - max_line = math.floor(1.25*MAX_LINE) + max_line = MAX_LINE + 8 add_text = (tree)-> for i, bit in ipairs tree if type(bit) == 'string' bit = escape(bit) for j, line in ipairs bit\lines! if j > 1 + if nomsu\text!\match(" $") + nomsu\add "\\;" nomsu\add "\n" elseif #line > 10 and nomsu\trailing_line_len! > max_line nomsu\add "\\\n.." @@ -343,10 +365,12 @@ tree_to_nomsu = (tree)-> elseif bit.type == "EscapedNomsu" or bit.type == "Block" or bit.type == "IndexChain" interp_nomsu\parenthesize! nomsu\add interp_nomsu - if interp_nomsu\is_multiline! + if interp_nomsu\is_multiline! and bit.type == "Block" nomsu\add "\n.." add_text(tree) - return NomsuCode\from(tree.source, '"\n ', nomsu, '"') + if nomsu\text!\match(" $") + nomsu\add "\\;" + return NomsuCode\from(tree.source, '"\n ', nomsu, '\n"') when "List", "Dict" if #tree == 0 @@ -354,12 +378,16 @@ tree_to_nomsu = (tree)-> return nomsu sep = '' for i, item in ipairs tree - item_nomsu = tree_to_inline_nomsu(item) - if #item_nomsu\text! > MAX_LINE + local item_nomsu + if item.type == 'MethodCall' item_nomsu = recurse(item) - if item.type == 'Comment' - sep = '\n' + elseif item.type == 'Comment' item_nomsu = tree_to_nomsu(item) + sep = '\n' if i > 1 + else + item_nomsu = tree_to_inline_nomsu(item) + if #item_nomsu\text! > MAX_LINE + item_nomsu = recurse(item) nomsu\add sep nomsu\add item_nomsu if item_nomsu\is_multiline! or item.type == 'Comment' or nomsu\trailing_line_len! + #tostring(item_nomsu) >= MAX_LINE @@ -367,28 +395,29 @@ tree_to_nomsu = (tree)-> else sep = ', ' return if tree.type == "List" then - NomsuCode\from(tree.source, "[..]\n ", nomsu) + NomsuCode\from(tree.source, "[\n ", nomsu, "\n]") else - NomsuCode\from(tree.source, "{..}\n ", nomsu) - + NomsuCode\from(tree.source, "{\n ", nomsu, "\n}") + when "DictEntry" key, value = tree[1], tree[2] - nomsu = if key.type == "Text" and #key == 1 and is_identifier(key[1]) - NomsuCode\from(key.source, key[1]) - else tree_to_inline_nomsu(key) - nomsu\parenthesize! if key.type == "Block" + nomsu = NomsuCode\from(tree.source) + -- TODO: remove shim + if key.type != "Index" + key = {type:"Index", source:key.source, key} + nomsu\add tree_to_nomsu(key) if value value_nomsu = tree_to_nomsu(value) - if (value.type == "Block" or value.type == "EscapedNomsu") and not value_nomsu\is_multiline! + if (value.type == "Block" or value.type == "Action" or value.type == "MethodCall") and not value_nomsu\is_multiline! value_nomsu\parenthesize! - nomsu\add ": ", value_nomsu + nomsu\add " = ", value_nomsu return nomsu when "Comment" nomsu\add "#", (tree[1]\gsub("\n", "\n ")) return nomsu - when "IndexChain", "Number", "Var", "Comment", "Error" + when "IndexChain", "Index", "Number", "Var", "Comment", "Error" return tree_to_inline_nomsu tree else diff --git a/parser.lua b/parser.lua index 5eee8ca..8cb96ea 100644 --- a/parser.lua +++ b/parser.lua @@ -3,6 +3,19 @@ local re = require('re') lpeg.setmaxstack(20000) local P, R, S, C, Cmt, Carg, Cc P, R, S, C, Cmt, Carg, Cc = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cmt, lpeg.Carg, lpeg.Cc +local foldr +foldr = function(...) + local inner = select(1, ...) + for i = 2, select('#', ...) do + assert(inner.type) + local outer = select(i, ...) + table.insert(outer, 1, inner) + inner.start = outer.start + inner = outer + end + assert(inner.type) + return inner +end local DEFS do local _with_0 = { } @@ -17,6 +30,7 @@ do _with_0.Tree = function(t, userdata) return userdata.make_tree(t, userdata) end + _with_0.foldr = foldr DEFS = _with_0 end setmetatable(DEFS, { @@ -40,16 +54,14 @@ setmetatable(DEFS, { end end }) -local peg_tidier = re.compile([[file <- %nl* {~ (def/comment) (%nl+ (def/comment))* %nl* ~} -def <- anon_def / captured_def -anon_def <- - ({ident} (" "*) ":" {[^%nl]* (%nl+ " "+ [^%nl]*)*}) - -> "%1 <- %2" -captured_def <- - ({ident} (" "*) "(" {ident} ")" (" "*) ":" {[^%nl]* (%nl+ " "+ [^%nl]*)*}) - -> "%1 <- ({| {:start:{}:} %3 {:stop:{}:} {:type: (''->'%2') :} |} %%userdata) -> Tree" -ident <- [a-zA-Z_][a-zA-Z0-9_]* -comment <- "--" [^%nl]* +local peg_tidier = re.compile([[ file <- %nl* {~ (captured_def/line) (%nl+ (captured_def/line))* %nl* ~} + ident <- [a-zA-Z_][a-zA-Z0-9_]* + line <- [^%nl]* + captured_def <- + ({ident} (" "*) "(" {ident} ")" (" "*) "<-" {[^%nl]* (%nl+ " "+ [^%nl]*)*}) -> +"%1 <- ({| {:type:''->'%2':} {:start:{}:} + %3 + {:stop:{}:} |} %%userdata) -> Tree" ]]) local make_parser make_parser = function(peg, make_tree) diff --git a/parser.moon b/parser.moon index 78e6291..63b2fb2 100644 --- a/parser.moon +++ b/parser.moon @@ -4,6 +4,18 @@ re = require 're' lpeg.setmaxstack 20000 {:P,:R,:S,:C,:Cmt,:Carg,:Cc} = lpeg +-- foldr {A{a1,a2,...},B{b1,b2,...},C{c1,c2,...}} -> C{B{A{a1,a2,...},b1,b2...},c1,c2...} +foldr = (...)-> + inner = select(1,...) + for i=2,select('#',...) do + assert inner.type + outer = select(i,...) + table.insert(outer, 1, inner) + inner.start = outer.start + inner = outer + assert inner.type + return inner + DEFS = with {} -- Newline supports either windows-style CR+LF or unix-style LF .nl = P("\r")^-1 * P("\n") @@ -18,6 +30,7 @@ DEFS = with {} R("\224\239")*R("\128\191")*R("\128\191") + R("\240\244")*R("\128\191")*R("\128\191")*R("\128\191")) .Tree = (t, userdata)-> userdata.make_tree(t, userdata) + .foldr = foldr setmetatable(DEFS, {__index:(key)=> if i = key\match("^ascii_(%d+)$") @@ -30,19 +43,17 @@ setmetatable(DEFS, {__index:(key)=> return p }) --- Just for cleanliness, I put the language spec in its own file using a slightly modified --- version of the lpeg.re syntax. +-- Just for cleanliness, I put the language spec in its own file using a slightly +-- extended version of the lpeg.re syntax. peg_tidier = re.compile [[ -file <- %nl* {~ (def/comment) (%nl+ (def/comment))* %nl* ~} -def <- anon_def / captured_def -anon_def <- - ({ident} (" "*) ":" {[^%nl]* (%nl+ " "+ [^%nl]*)*}) - -> "%1 <- %2" -captured_def <- - ({ident} (" "*) "(" {ident} ")" (" "*) ":" {[^%nl]* (%nl+ " "+ [^%nl]*)*}) - -> "%1 <- ({| {:start:{}:} %3 {:stop:{}:} {:type: (''->'%2') :} |} %%userdata) -> Tree" -ident <- [a-zA-Z_][a-zA-Z0-9_]* -comment <- "--" [^%nl]* + file <- %nl* {~ (captured_def/line) (%nl+ (captured_def/line))* %nl* ~} + ident <- [a-zA-Z_][a-zA-Z0-9_]* + line <- [^%nl]* + captured_def <- + ({ident} (" "*) "(" {ident} ")" (" "*) "<-" {[^%nl]* (%nl+ " "+ [^%nl]*)*}) -> +"%1 <- ({| {:type:''->'%2':} {:start:{}:} + %3 + {:stop:{}:} |} %%userdata) -> Tree" ]] make_parser = (peg, make_tree=nil)-> diff --git a/syntax_tree.lua b/syntax_tree.lua index ba74708..702cba6 100644 --- a/syntax_tree.lua +++ b/syntax_tree.lua @@ -38,17 +38,17 @@ do local _len_0 = 1 for _index_0 = 1, #self do local b = self[_index_0] - _accum_0[_len_0] = tostring(b) + _accum_0[_len_0] = type(b) == 'string' and b:as_lua() or tostring(b) _len_0 = _len_0 + 1 end bits = _accum_0 end for k, v in pairs(self) do - if not (bits[k]) then - table.insert(bits, "[ " .. tostring(tostring(k)) .. "]=" .. tostring(tostring(v))) + if not (bits[k] or k == 'type' or k == 'source') then + table.insert(bits, tostring(k) .. "=" .. tostring(type(v) == 'string' and v:as_lua() or v)) end end - return "SyntaxTree{" .. tostring(table.concat(bits, ", ")) .. "}" + return tostring(self.type) .. "{" .. tostring(table.concat(bits, ", ")) .. "}" end, __eq = function(self, other) if type(self) ~= type(other) or #self ~= #other or getmetatable(self) ~= getmetatable(other) then @@ -146,6 +146,7 @@ do assert(self.type == "Action" or self.type == "MethodCall", "Only actions and method calls have arguments") local args = { } if self.type == "MethodCall" then + assert(#self == 2, "Can't get arguments for multiple method calls at once.") args[1] = self[1] local _list_0 = self[2] for _index_0 = 1, #_list_0 do @@ -166,6 +167,7 @@ do end, get_stub = function(self) if self.type == "MethodCall" then + assert(#self == 2, "Can't get the stubs of multiple method calls at once.") return self[2]:get_stub() end local stub_bits = { } @@ -222,8 +224,6 @@ getmetatable(SyntaxTree).__call = function(self, t) setmetatable(t, self.__base) if t.type == 'Action' then t.stub = t:get_stub() - elseif t.type == 'MethodCall' then - t.stub = t[2]:get_stub() end return t end diff --git a/syntax_tree.moon b/syntax_tree.moon index e2bbb4e..d9f9da1 100644 --- a/syntax_tree.moon +++ b/syntax_tree.moon @@ -19,11 +19,11 @@ class SyntaxTree @__type: "Syntax Tree" __tostring: => - bits = [tostring(b) for b in *@] + bits = [type(b) == 'string' and b\as_lua! or tostring(b) for b in *@] for k,v in pairs(@) - unless bits[k] - table.insert(bits, "[ #{tostring(k)}]=#{tostring(v)}") - return "SyntaxTree{#{table.concat(bits, ", ")}}" + unless bits[k] or k == 'type' or k == 'source' + table.insert(bits, "#{k}=#{type(v) == 'string' and v\as_lua! or v}") + return "#{@type}{#{table.concat(bits, ", ")}}" __eq: (other)=> return false if type(@) != type(other) or #@ != #other or getmetatable(@) != getmetatable(other) @@ -76,6 +76,7 @@ class SyntaxTree assert(@type == "Action" or @type == "MethodCall", "Only actions and method calls have arguments") args = {} if @type == "MethodCall" + assert(#@ == 2, "Can't get arguments for multiple method calls at once.") args[1] = @[1] for tok in *@[2] if type(tok) != 'string' then args[#args+1] = tok @@ -86,6 +87,7 @@ class SyntaxTree get_stub: => if @type == "MethodCall" + assert(#@ == 2, "Can't get the stubs of multiple method calls at once.") return @[2]\get_stub! stub_bits = {} arg_i = 1 @@ -109,8 +111,6 @@ getmetatable(SyntaxTree).__call = (t)=> setmetatable(t, @__base) if t.type == 'Action' t.stub = t\get_stub! - elseif t.type == 'MethodCall' - t.stub = t[2]\get_stub! return t return SyntaxTree