From f908bb49b30dcead0c5123a513a1ee9e70dac412 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Thu, 12 Jul 2018 16:14:29 -0700 Subject: [PATCH] Initial version of new pegfile. works (ish) --- nomsu.peg | 159 +++++++++++++++++++++++++--------------------------- parser.lua | 59 +++++++++---------- parser.moon | 50 ++++++----------- 3 files changed, 121 insertions(+), 147 deletions(-) diff --git a/nomsu.peg b/nomsu.peg index 4598598..62f1900 100644 --- a/nomsu.peg +++ b/nomsu.peg @@ -1,24 +1,37 @@ --- Nomsu version 1 -file: - shebang? - (ignored_line %nl)* - (file_chunks / block / action / expression)? - (%nl ignored_line)* - (!. / (({} (.* -> "Parse error") %userdata) => error)) +-- Nomsu version 2 +file (File): + {:curr_indent: '' :} + blank_line* + (chunk (nl_nodent chunk_delimeter nl_nodent chunk)*)? + blank_line* + (!. / (!! .* -> "Parse error" !!)) -shebang: - ("#!" (!"nomsu" !%nl .)* "nomsu" ((%ws* "-V" %ws* {[0-9]+ ("." [0-9]+)*}) / {''}) %ws* (%nl / !.) %userdata) => Version +nodent: =curr_indent !(" ") +indent: =curr_indent " " !(" ") +dedent: !(=curr_indent) (" ")* +eol: %ws* (!. / &%nl) -file_chunks (FileChunks): - {| (block/action/expression) (nodent chunk_delimeter nodent (block/action/expression))+ |} -chunk_delimeter: "~~~" (("~")*) +comment (Comment): + "#" {~ [^%nl]* (%nl+ ({:curr_indent: indent :} -> '') [^%nl]* (%nl+ (=curr_indent -> '') [^%nl]*)* (!. / nl_dedent))? ~} +inline_comment (Comment): + "(#" {~ (inline_comment / [^%nl])* ~} "#)" + +blank_line: %nl eol +nl_nodent: blank_line* %nl nodent +nl_indent: blank_line* %nl {:curr_indent: indent :} +nl_dedent: blank_line* %nl &dedent + +chunk: !chunk_delimeter (block/action/expression) +chunk_delimeter: ("~")^+3 inline_block (Block): - {| inline_statement (%ws* ";" %ws* inline_statement)+ |} + (inline_comment / inline_statement) (%ws* ";" %ws* (inline_comment / inline_statement))+ block (Block): - {| statement (nodent !("~") (statement / (({} ([^%nl]* -> "Unexpected character while parsing block line") %userdata) => error)))+ |} + block_line (nl_nodent block_line)+ +block_line: + comment / inline_comment / statement / (!! [^%nl]* -> "Unexpected character while parsing block line" !!) -statement: (action / expression) (eol / (({} ([^%nl]* -> "Unexpected character while parsing line") %userdata) => error)) +statement: (action / expression) (eol / (!! [^%nl]* -> "Unexpected character while parsing line" !!)) inline_statement: inline_action / inline_expression noindex_inline_expression: @@ -27,64 +40,56 @@ noindex_inline_expression: %ws* (inline_block / inline_action / inline_expression) %ws* (%ws* ',' %ws* (inline_block / inline_action / inline_expression) %ws*)* (")" - / (({} ((!. / &%nl) -> 'Line ended without finding a closing )-parenthesis') %userdata) => error) - / (({} ([^%nl]* -> 'Unexpected character while parsing subexpression') %userdata) => error) + / (!! eol -> 'Line ended without finding a closing )-parenthesis' !!) + / (!! [^%nl]+ -> 'Unexpected character while parsing subexpression' !!) ) ) -inline_expression: - index_chain / noindex_inline_expression +inline_expression: index_chain / noindex_inline_expression indented_expression: indented_text / indented_nomsu / indented_list / indented_dict - / ("(..)"? indent + / (("(..)" / ":")? nl_indent (block / action / expression) - (dedent / (({} (non_dedent_error -> "Unexpected character while parsing indented expression") %userdata) => error)) + (!. / &nl_dedent / (!! (!nl_dedent .)* -> "Unexpected character while parsing indented expression" !!)) ) expression: inline_expression / (":" %ws* ((inline_block / inline_action / inline_expression) eol - / (({} (eol -> "Missing expression after the ':'") %userdata) => error))) + / (!! eol -> "Missing expression after the ':'" !!))) / indented_expression -inline_nomsu (EscapedNomsu): "\" {| inline_expression |} +inline_nomsu (EscapedNomsu): "\" inline_expression indented_nomsu (EscapedNomsu): - "\" {| + "\" ( noindex_inline_expression / (":" %ws* ((inline_block / inline_action / inline_expression) eol - / (({} (eol -> "Missing expression after the ':'") %userdata) => error))) - / indented_expression |} + / (!! eol -> "Missing expression after the ':'" !!))) + / indented_expression) index_chain (IndexChain): - {| noindex_inline_expression ("." (text_word / noindex_inline_expression))+ |} + noindex_inline_expression ("." (text_word / noindex_inline_expression))+ -- Actions need either at least 1 word, or at least 2 tokens inline_action (Action): - !chunk_delimeter - {| ( (inline_expression (%ws* (inline_expression / word))+) / (word (%ws* (inline_expression / word))*)) (%ws* ":" %ws* (inline_block / inline_action / inline_expression - / (({} ('' -> "Missing expression after the ':'") %userdata) => error)))? - |} + / (!! '' -> "Missing expression after the ':'" !!)))? action (Action): - !chunk_delimeter - {| - (expression ((nodent "..")? %ws* (expression / word))+) - / (word ((nodent "..")? %ws* (expression / word))*) - |} + (expression ((nl_nodent "..")? %ws* (expression / word))+) + / (word ((nl_nodent "..")? %ws* (expression / word))*) -word: !number { %operator_char+ / %ident_char+ } +word: !number ( %operator_char+ / %ident_char+ ) -text_word (Text): {| word |} +text_word (Text): word inline_text (Text): !('".."' eol) - '"' {| + '"' ({~ (('\"' -> '"') / ('\\' -> '\') / %escaped_char / [^%nl\"])+ ~} / inline_text_interpolation)* - |} ('"' / ( - (({} (eol->'Line ended before finding a closing double quotation mark') %userdata) => error) - /(({} ([^%nl]*->'Unexpected character while parsing Text') %userdata) => error) - )) + ('"' + / (!! eol -> 'Line ended before finding a closing double quotation mark' !!) + / (!! [^%nl]* -> 'Unexpected character while parsing Text' !!)) inline_text_interpolation: "\" ( variable / inline_list / inline_dict / inline_text @@ -92,73 +97,63 @@ inline_text_interpolation: %ws* (inline_block / inline_action / inline_expression) %ws* (%ws* ',' %ws* (inline_block / inline_action / inline_expression) %ws*)* (")" - / (({} (&%nl -> 'Line ended without finding a closing )-parenthesis') %userdata) => error) - / (({} ([^%nl]* -> 'Unexpected character while parsing Text interpolation') %userdata) => error)) - ) + / (!! &%nl -> 'Line ended without finding a closing )-parenthesis' !!) + / (!! [^%nl]* -> 'Unexpected character while parsing Text interpolation' !!))) ) -- Have to use "%indent" instead of "indent" etc. to avoid messing up text lines that start with "#" indented_text (Text): - '".."' eol %nl {| - {~ (%nl*) (%indent -> "") ~} - (indented_plain_text / text_interpolation / {~ %nl+ (%nodent -> "") ~})* - |} (((!.) %dedent) / (&(%nl %dedent)) / (({} (non_dedent_error -> "Unexpected character while parsing Text") %userdata) => error)) + '".."' eol %nl + {~ (%nl*) ({:curr_indent: indent :} -> "") ~} + (indented_plain_text / text_interpolation / {~ %nl+ (nodent -> "") ~})* + (!. / &nl_dedent / (!! (!nl_dedent .)* -> "Unexpected character while parsing Text" !!)) indented_plain_text (Text): - {| {~ (("\\" -> "\") / (("\" nodent "..") -> "") / (!text_interpolation "\") / [^%nl\]+)+ - (%nl+ (%nodent -> ""))* ~} |} + {~ (("\\" -> "\") / (("\" nl_nodent "..") -> "") / (!text_interpolation "\") / [^%nl\]+)+ + (%nl+ (nodent -> ""))* ~} text_interpolation: - inline_text_interpolation / ("\" indented_expression nodent "..") + inline_text_interpolation / ("\" indented_expression nl_nodent "..") -number (Number): {| (("-"? (([0-9]+ "." [0-9]+) / ("." [0-9]+) / ([0-9]+)))-> tonumber) |} +number (Number): (("-"? (([0-9]+ "." [0-9]+) / ("." [0-9]+) / ([0-9]+)))-> tonumber) -- Variables can be nameless (i.e. just %) and can't contain operators like apostrophe -- which is a hack to allow %'s to parse as "%" and "' s" separately -variable (Var): "%" {| {(%ident_char+ ((!"'" %operator_char+) / %ident_char+)*)?} |} +variable (Var): "%" {(%ident_char+ ((!"'" %operator_char+) / %ident_char+)*)?} inline_list (List): !('[..]') "[" %ws* - {| (inline_list_item (%ws* ',' %ws* inline_list_item)* (%ws* ',')?)? |} %ws* + (inline_list_item (%ws* ',' %ws* inline_list_item)* (%ws* ',')?)? %ws* ("]" / (","? ( - (({} (eol->"Line ended before finding a closing ]-bracket") %userdata) => error) - /(({} ([^%nl]*->"Unexpected character while parsing List") %userdata) => error) + (!! eol -> "Line ended before finding a closing ]-bracket" !!) + /(!! [^%nl]* -> "Unexpected character while parsing List" !!) ))) indented_list (List): - "[..]" indent - {| list_line (nodent list_line)* |} - (dedent / ((","? {} (non_dedent_error -> "Unexpected character while parsing List") %userdata) => error)) + "[..]" nl_indent + list_line (nl_nodent list_line)* + (&nl_dedent / (","? (!! (!nl_dedent .)* -> "Unexpected character while parsing List" !!))) list_line: - ((action / expression) !(%ws* ',')) - / (inline_list_item ((%ws* ',' %ws*) list_line?)?) + (inline_list_item %ws* "," %ws*)+ eol + / (inline_list_item %ws* "," %ws*)* (action / expression) inline_list_item: inline_block / inline_action / inline_expression inline_dict (Dict): !('{..}') "{" %ws* - {| (inline_dict_entry (%ws* ',' %ws* inline_dict_entry)*)? |} %ws* + (inline_dict_entry (%ws* ',' %ws* inline_dict_entry)*)? %ws* ("}" / (","? ( - (({} (%ws* eol->"Line ended before finding a closing }-brace") %userdata) => error) - / (({} ([^%nl]*->"Unexpected character while parsing Dictionary") %userdata) => error) + (!! %ws* eol -> "Line ended before finding a closing }-brace" !!) + / (!! [^%nl]* -> "Unexpected character while parsing Dictionary" !!) ))) indented_dict (Dict): - "{..}" indent - {| dict_line (nodent dict_line)* |} - (dedent / ((","? {} (non_dedent_error -> "Unexpected character while parsing Dictionary") %userdata) => error)) + "{..}" nl_indent + dict_line (nl_nodent dict_line)* + (&nl_dedent / (","? (!! (!nl_dedent .)* -> "Unexpected character while parsing Dictionary" !!))) dict_line: - (dict_entry !(%ws* ',')) / (inline_dict_entry (%ws* ',' %ws dict_line?)?) + (inline_dict_entry %ws* "," %ws*)+ eol + / (inline_dict_entry %ws* "," %ws*)* dict_entry dict_entry(DictEntry): - {| dict_key (%ws* ":" %ws* (action / expression))? |} + dict_key (%ws* ":" %ws* (action / expression))? inline_dict_entry(DictEntry): - {| dict_key (%ws* ":" %ws* (inline_block / inline_action / inline_expression)?)? |} + dict_key (%ws* ":" %ws* (inline_block / inline_action / inline_expression)?)? dict_key: text_word / inline_expression - -comment: ("#" {} {~[^%nl]* (%nl+ (%indent -> '') [^%nl]* (%nl+ (%nodent -> '') [^%nl]*)* %dedent)?~} %userdata) => Comment -eol_comment: ("#" {} {[^%nl]*} %userdata) => Comment - -eol: %ws* eol_comment? (!. / &%nl) -ignored_line: (%nodent comment) / (%ws* (!. / &%nl)) -indent: eol (%nl ignored_line)* %nl %indent (comment (%nl ignored_line)* nodent)? -nodent: eol (%nl ignored_line)* %nl %nodent -dedent: eol (%nl ignored_line)* (((!.) %dedent) / (&(%nl %dedent))) -non_dedent_error: (!dedent .)* eol (%nl ignored_line)* (!. / &%nl) diff --git a/parser.lua b/parser.lua index 2eed78c..c21e95d 100644 --- a/parser.lua +++ b/parser.lua @@ -8,6 +8,11 @@ do local _obj_0 = string match, sub = _obj_0.match, _obj_0.sub end +local insert, remove +do + local _obj_0 = table + insert, remove = _obj_0.insert, _obj_0.remove +end local files = require('files') local NomsuCode, LuaCode, Source do @@ -41,25 +46,6 @@ do _with_0.operator_char = S("'`~!@$^&*-+=|<>?/") _with_0.utf8_char = (R("\194\223") * R("\128\191") + R("\224\239") * R("\128\191") * R("\128\191") + R("\240\244") * R("\128\191") * R("\128\191") * R("\128\191")) _with_0.ident_char = R("az", "AZ", "09") + P("_") + _with_0.utf8_char - _with_0.indent = Cmt(Carg(1), function(self, start, userdata) - local indented = userdata.indent .. ' ' - if sub(self, start, start + #indented - 1) == indented then - userdata.indent = indented - return start + #indented - end - end) - _with_0.dedent = Cmt(Carg(1), function(self, start, userdata) - local dedented = sub(userdata.indent, 1, -5) - if #match(self, "^[ ]*", start) <= #dedented then - userdata.indent = dedented - return start - end - end) - _with_0.nodent = Cmt(Carg(1), function(self, start, userdata) - if sub(self, start, start + #userdata.indent - 1) == userdata.indent then - return start + #userdata.indent - end - end) _with_0.userdata = Carg(1) _with_0.error = function(src, end_pos, start_pos, err_msg, userdata) local seen_errors = userdata.errors @@ -93,14 +79,6 @@ do seen_errors[start_pos] = err_msg return true end - _with_0.Comment = function(src, end_pos, start_pos, value, userdata) - userdata.comments[start_pos] = value - return true - end - _with_0.Version = function(src, end_pos, version, userdata) - userdata.version = version - return true - end NOMSU_DEFS = _with_0 end setmetatable(NOMSU_DEFS, { @@ -113,6 +91,19 @@ setmetatable(NOMSU_DEFS, { value.source = Source(_with_0.filename, _with_0.start + start - 1, _with_0.start + stop - 1) end end + if key == "Comment" then + value = value[1] + else + local comments = { } + for i = #value, 1, -1 do + if type(value[i]) == 'table' and value[i].type == "Comment" then + insert(comments, remove(value, i)) + end + end + if #comments > 0 then + value.comments = comments + end + end setmetatable(value, AST[key]) if value.__init then value:__init() @@ -126,18 +117,22 @@ setmetatable(NOMSU_DEFS, { local Parser = { } local NOMSU_PATTERN do - local peg_tidier = re.compile([[ file <- %nl* version? %nl* {~ (def/comment) (%nl+ (def/comment))* %nl* ~} - version <- "--" (!"version" [^%nl])* "version" ([ ])* (([0-9])+ -> set_version) ([^%nl])* + local peg_tidier = re.compile([[ file <- %nl* version %nl* {~ (def/comment) (%nl+ (def/comment))* %nl* ~} + version <- "--" (!"version" [^%nl])* "version" (" ")* (([0-9])+ -> set_version) ([^%nl])* def <- anon_def / captured_def anon_def <- ({ident} (" "*) ":" - {((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- %2" + {~ ((%nl " "+ def_line?)+) / def_line ~}) -> "%1 <- %2" captured_def <- ({ident} (" "*) "(" {ident} ")" (" "*) ":" - {((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- (({} %3 {} %%userdata) -> %2)" + {~ ((%nl " "+ def_line?)+) / def_line ~}) -> "%1 <- (({} {| %3 |} {} %%userdata) -> %2)" + def_line <- (err / [^%nl])+ + err <- ("(!!" { (!("!!)") .)* } "!!)") -> "(({} (%1) %%userdata) => error)" ident <- [a-zA-Z_][a-zA-Z0-9_]* comment <- "--" [^%nl]* ]], { set_version = function(v) - Parser.version = tonumber(v) + Parser.version = tonumber(v), { + nl = NOMSU_DEFS.nl + } end }) local peg_file = io.open("nomsu.peg") diff --git a/parser.moon b/parser.moon index 7635cbc..7786650 100644 --- a/parser.moon +++ b/parser.moon @@ -4,6 +4,7 @@ re = require 're' lpeg.setmaxstack 10000 {:P,:R,:S,:C,:Cmt,:Carg} = lpeg {:match, :sub} = string +{:insert, :remove} = table files = require 'files' {:NomsuCode, :LuaCode, :Source} = require "code_obj" AST = require "nomsu_tree" @@ -25,26 +26,6 @@ NOMSU_DEFS = with {} R("\240\244")*R("\128\191")*R("\128\191")*R("\128\191")) .ident_char = R("az","AZ","09") + P("_") + .utf8_char - -- If the line begins with #indent+4 spaces, the pattern matches *those* spaces - -- and adds them to the current indent (not any more). - .indent = Cmt Carg(1), (start, userdata)=> - indented = userdata.indent..' ' - if sub(@, start, start+#indented-1) == indented - userdata.indent = indented - return start + #indented - -- If the number of leading space characters is <= the number of spaces in the current - -- indent minus 4, this pattern matches and decrements the current indent exactly once. - .dedent = Cmt Carg(1), (start, userdata)=> - dedented = sub(userdata.indent, 1, -5) - if #match(@, "^[ ]*", start) <= #dedented - userdata.indent = dedented - return start - -- If the number of leading space characters is >= the number of spaces in the current - -- indent, this pattern matches and does not modify the indent. - .nodent = Cmt Carg(1), (start, userdata)=> - if sub(@, start, start+#userdata.indent-1) == userdata.indent - return start + #userdata.indent - .userdata = Carg(1) .error = (src,end_pos,start_pos,err_msg,userdata)-> @@ -72,19 +53,20 @@ NOMSU_DEFS = with {} seen_errors[start_pos] = err_msg return true - .Comment = (src,end_pos,start_pos,value,userdata)-> - userdata.comments[start_pos] = value - return true - - .Version = (src,end_pos,version,userdata)-> - userdata.version = version - return true - setmetatable(NOMSU_DEFS, {__index:(key)=> make_node = (start, value, stop, userdata)-> if userdata.source with userdata.source value.source = Source(.filename, .start + start-1, .start + stop-1) + if key == "Comment" + value = value[1] + else + comments = {} + for i=#value,1,-1 + if type(value[i]) == 'table' and value[i].type == "Comment" + insert comments, remove(value, i) + if #comments > 0 + value.comments = comments setmetatable(value, AST[key]) if value.__init then value\__init! return value @@ -98,16 +80,18 @@ NOMSU_PATTERN = do -- Just for cleanliness, I put the language spec in its own file using a slightly modified -- version of the lpeg.re syntax. peg_tidier = re.compile [[ - file <- %nl* version? %nl* {~ (def/comment) (%nl+ (def/comment))* %nl* ~} - version <- "--" (!"version" [^%nl])* "version" ([ ])* (([0-9])+ -> set_version) ([^%nl])* + file <- %nl* version %nl* {~ (def/comment) (%nl+ (def/comment))* %nl* ~} + version <- "--" (!"version" [^%nl])* "version" (" ")* (([0-9])+ -> set_version) ([^%nl])* def <- anon_def / captured_def anon_def <- ({ident} (" "*) ":" - {((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- %2" + {~ ((%nl " "+ def_line?)+) / def_line ~}) -> "%1 <- %2" captured_def <- ({ident} (" "*) "(" {ident} ")" (" "*) ":" - {((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- (({} %3 {} %%userdata) -> %2)" + {~ ((%nl " "+ def_line?)+) / def_line ~}) -> "%1 <- (({} {| %3 |} {} %%userdata) -> %2)" + def_line <- (err / [^%nl])+ + err <- ("(!!" { (!("!!)") .)* } "!!)") -> "(({} (%1) %%userdata) => error)" ident <- [a-zA-Z_][a-zA-Z0-9_]* comment <- "--" [^%nl]* - ]], {set_version: (v) -> Parser.version = tonumber(v)} + ]], {set_version: (v) -> Parser.version = tonumber(v), nl:NOMSU_DEFS.nl} peg_file = io.open("nomsu.peg") if not peg_file and package.nomsupath for path in package.nomsupath\gmatch("[^;]+")