Initial version of new pegfile. works (ish)

This commit is contained in:
Bruce Hill 2018-07-12 16:14:29 -07:00
parent ba2b83d566
commit f908bb49b3
3 changed files with 121 additions and 147 deletions

159
nomsu.peg
View File

@ -1,24 +1,37 @@
-- Nomsu version 1
file:
shebang?
(ignored_line %nl)*
(file_chunks / block / action / expression)?
(%nl ignored_line)*
(!. / (({} (.* -> "Parse error") %userdata) => error))
-- Nomsu version 2
file (File):
{:curr_indent: '' :}
blank_line*
(chunk (nl_nodent chunk_delimeter nl_nodent chunk)*)?
blank_line*
(!. / (!! .* -> "Parse error" !!))
shebang:
("#!" (!"nomsu" !%nl .)* "nomsu" ((%ws* "-V" %ws* {[0-9]+ ("." [0-9]+)*}) / {''}) %ws* (%nl / !.) %userdata) => Version
nodent: =curr_indent !(" ")
indent: =curr_indent " " !(" ")
dedent: !(=curr_indent) (" ")*
eol: %ws* (!. / &%nl)
file_chunks (FileChunks):
{| (block/action/expression) (nodent chunk_delimeter nodent (block/action/expression))+ |}
chunk_delimeter: "~~~" (("~")*)
comment (Comment):
"#" {~ [^%nl]* (%nl+ ({:curr_indent: indent :} -> '') [^%nl]* (%nl+ (=curr_indent -> '') [^%nl]*)* (!. / nl_dedent))? ~}
inline_comment (Comment):
"(#" {~ (inline_comment / [^%nl])* ~} "#)"
blank_line: %nl eol
nl_nodent: blank_line* %nl nodent
nl_indent: blank_line* %nl {:curr_indent: indent :}
nl_dedent: blank_line* %nl &dedent
chunk: !chunk_delimeter (block/action/expression)
chunk_delimeter: ("~")^+3
inline_block (Block):
{| inline_statement (%ws* ";" %ws* inline_statement)+ |}
(inline_comment / inline_statement) (%ws* ";" %ws* (inline_comment / inline_statement))+
block (Block):
{| statement (nodent !("~") (statement / (({} ([^%nl]* -> "Unexpected character while parsing block line") %userdata) => error)))+ |}
block_line (nl_nodent block_line)+
block_line:
comment / inline_comment / statement / (!! [^%nl]* -> "Unexpected character while parsing block line" !!)
statement: (action / expression) (eol / (({} ([^%nl]* -> "Unexpected character while parsing line") %userdata) => error))
statement: (action / expression) (eol / (!! [^%nl]* -> "Unexpected character while parsing line" !!))
inline_statement: inline_action / inline_expression
noindex_inline_expression:
@ -27,64 +40,56 @@ noindex_inline_expression:
%ws* (inline_block / inline_action / inline_expression) %ws*
(%ws* ',' %ws* (inline_block / inline_action / inline_expression) %ws*)*
(")"
/ (({} ((!. / &%nl) -> 'Line ended without finding a closing )-parenthesis') %userdata) => error)
/ (({} ([^%nl]* -> 'Unexpected character while parsing subexpression') %userdata) => error)
/ (!! eol -> 'Line ended without finding a closing )-parenthesis' !!)
/ (!! [^%nl]+ -> 'Unexpected character while parsing subexpression' !!)
)
)
inline_expression:
index_chain / noindex_inline_expression
inline_expression: index_chain / noindex_inline_expression
indented_expression:
indented_text / indented_nomsu / indented_list / indented_dict
/ ("(..)"? indent
/ (("(..)" / ":")? nl_indent
(block / action / expression)
(dedent / (({} (non_dedent_error -> "Unexpected character while parsing indented expression") %userdata) => error))
(!. / &nl_dedent / (!! (!nl_dedent .)* -> "Unexpected character while parsing indented expression" !!))
)
expression:
inline_expression
/ (":" %ws* ((inline_block / inline_action / inline_expression) eol
/ (({} (eol -> "Missing expression after the ':'") %userdata) => error)))
/ (!! eol -> "Missing expression after the ':'" !!)))
/ indented_expression
inline_nomsu (EscapedNomsu): "\" {| inline_expression |}
inline_nomsu (EscapedNomsu): "\" inline_expression
indented_nomsu (EscapedNomsu):
"\" {|
"\" (
noindex_inline_expression
/ (":" %ws* ((inline_block / inline_action / inline_expression) eol
/ (({} (eol -> "Missing expression after the ':'") %userdata) => error)))
/ indented_expression |}
/ (!! eol -> "Missing expression after the ':'" !!)))
/ indented_expression)
index_chain (IndexChain):
{| noindex_inline_expression ("." (text_word / noindex_inline_expression))+ |}
noindex_inline_expression ("." (text_word / noindex_inline_expression))+
-- Actions need either at least 1 word, or at least 2 tokens
inline_action (Action):
!chunk_delimeter
{|
( (inline_expression (%ws* (inline_expression / word))+)
/ (word (%ws* (inline_expression / word))*))
(%ws* ":" %ws* (inline_block / inline_action / inline_expression
/ (({} ('' -> "Missing expression after the ':'") %userdata) => error)))?
|}
/ (!! '' -> "Missing expression after the ':'" !!)))?
action (Action):
!chunk_delimeter
{|
(expression ((nodent "..")? %ws* (expression / word))+)
/ (word ((nodent "..")? %ws* (expression / word))*)
|}
(expression ((nl_nodent "..")? %ws* (expression / word))+)
/ (word ((nl_nodent "..")? %ws* (expression / word))*)
word: !number { %operator_char+ / %ident_char+ }
word: !number ( %operator_char+ / %ident_char+ )
text_word (Text): {| word |}
text_word (Text): word
inline_text (Text):
!('".."' eol)
'"' {|
'"'
({~ (('\"' -> '"') / ('\\' -> '\') / %escaped_char / [^%nl\"])+ ~}
/ inline_text_interpolation)*
|} ('"' / (
(({} (eol->'Line ended before finding a closing double quotation mark') %userdata) => error)
/(({} ([^%nl]*->'Unexpected character while parsing Text') %userdata) => error)
))
('"'
/ (!! eol -> 'Line ended before finding a closing double quotation mark' !!)
/ (!! [^%nl]* -> 'Unexpected character while parsing Text' !!))
inline_text_interpolation:
"\" (
variable / inline_list / inline_dict / inline_text
@ -92,73 +97,63 @@ inline_text_interpolation:
%ws* (inline_block / inline_action / inline_expression) %ws*
(%ws* ',' %ws* (inline_block / inline_action / inline_expression) %ws*)*
(")"
/ (({} (&%nl -> 'Line ended without finding a closing )-parenthesis') %userdata) => error)
/ (({} ([^%nl]* -> 'Unexpected character while parsing Text interpolation') %userdata) => error))
)
/ (!! &%nl -> 'Line ended without finding a closing )-parenthesis' !!)
/ (!! [^%nl]* -> 'Unexpected character while parsing Text interpolation' !!)))
)
-- Have to use "%indent" instead of "indent" etc. to avoid messing up text lines that start with "#"
indented_text (Text):
'".."' eol %nl {|
{~ (%nl*) (%indent -> "") ~}
(indented_plain_text / text_interpolation / {~ %nl+ (%nodent -> "") ~})*
|} (((!.) %dedent) / (&(%nl %dedent)) / (({} (non_dedent_error -> "Unexpected character while parsing Text") %userdata) => error))
'".."' eol %nl
{~ (%nl*) ({:curr_indent: indent :} -> "") ~}
(indented_plain_text / text_interpolation / {~ %nl+ (nodent -> "") ~})*
(!. / &nl_dedent / (!! (!nl_dedent .)* -> "Unexpected character while parsing Text" !!))
indented_plain_text (Text):
{| {~ (("\\" -> "\") / (("\" nodent "..") -> "") / (!text_interpolation "\") / [^%nl\]+)+
(%nl+ (%nodent -> ""))* ~} |}
{~ (("\\" -> "\") / (("\" nl_nodent "..") -> "") / (!text_interpolation "\") / [^%nl\]+)+
(%nl+ (nodent -> ""))* ~}
text_interpolation:
inline_text_interpolation / ("\" indented_expression nodent "..")
inline_text_interpolation / ("\" indented_expression nl_nodent "..")
number (Number): {| (("-"? (([0-9]+ "." [0-9]+) / ("." [0-9]+) / ([0-9]+)))-> tonumber) |}
number (Number): (("-"? (([0-9]+ "." [0-9]+) / ("." [0-9]+) / ([0-9]+)))-> tonumber)
-- Variables can be nameless (i.e. just %) and can't contain operators like apostrophe
-- which is a hack to allow %'s to parse as "%" and "' s" separately
variable (Var): "%" {| {(%ident_char+ ((!"'" %operator_char+) / %ident_char+)*)?} |}
variable (Var): "%" {(%ident_char+ ((!"'" %operator_char+) / %ident_char+)*)?}
inline_list (List):
!('[..]')
"[" %ws*
{| (inline_list_item (%ws* ',' %ws* inline_list_item)* (%ws* ',')?)? |} %ws*
(inline_list_item (%ws* ',' %ws* inline_list_item)* (%ws* ',')?)? %ws*
("]" / (","? (
(({} (eol->"Line ended before finding a closing ]-bracket") %userdata) => error)
/(({} ([^%nl]*->"Unexpected character while parsing List") %userdata) => error)
(!! eol -> "Line ended before finding a closing ]-bracket" !!)
/(!! [^%nl]* -> "Unexpected character while parsing List" !!)
)))
indented_list (List):
"[..]" indent
{| list_line (nodent list_line)* |}
(dedent / ((","? {} (non_dedent_error -> "Unexpected character while parsing List") %userdata) => error))
"[..]" nl_indent
list_line (nl_nodent list_line)*
(&nl_dedent / (","? (!! (!nl_dedent .)* -> "Unexpected character while parsing List" !!)))
list_line:
((action / expression) !(%ws* ','))
/ (inline_list_item ((%ws* ',' %ws*) list_line?)?)
(inline_list_item %ws* "," %ws*)+ eol
/ (inline_list_item %ws* "," %ws*)* (action / expression)
inline_list_item: inline_block / inline_action / inline_expression
inline_dict (Dict):
!('{..}')
"{" %ws*
{| (inline_dict_entry (%ws* ',' %ws* inline_dict_entry)*)? |} %ws*
(inline_dict_entry (%ws* ',' %ws* inline_dict_entry)*)? %ws*
("}" / (","? (
(({} (%ws* eol->"Line ended before finding a closing }-brace") %userdata) => error)
/ (({} ([^%nl]*->"Unexpected character while parsing Dictionary") %userdata) => error)
(!! %ws* eol -> "Line ended before finding a closing }-brace" !!)
/ (!! [^%nl]* -> "Unexpected character while parsing Dictionary" !!)
)))
indented_dict (Dict):
"{..}" indent
{| dict_line (nodent dict_line)* |}
(dedent / ((","? {} (non_dedent_error -> "Unexpected character while parsing Dictionary") %userdata) => error))
"{..}" nl_indent
dict_line (nl_nodent dict_line)*
(&nl_dedent / (","? (!! (!nl_dedent .)* -> "Unexpected character while parsing Dictionary" !!)))
dict_line:
(dict_entry !(%ws* ',')) / (inline_dict_entry (%ws* ',' %ws dict_line?)?)
(inline_dict_entry %ws* "," %ws*)+ eol
/ (inline_dict_entry %ws* "," %ws*)* dict_entry
dict_entry(DictEntry):
{| dict_key (%ws* ":" %ws* (action / expression))? |}
dict_key (%ws* ":" %ws* (action / expression))?
inline_dict_entry(DictEntry):
{| dict_key (%ws* ":" %ws* (inline_block / inline_action / inline_expression)?)? |}
dict_key (%ws* ":" %ws* (inline_block / inline_action / inline_expression)?)?
dict_key:
text_word / inline_expression
comment: ("#" {} {~[^%nl]* (%nl+ (%indent -> '') [^%nl]* (%nl+ (%nodent -> '') [^%nl]*)* %dedent)?~} %userdata) => Comment
eol_comment: ("#" {} {[^%nl]*} %userdata) => Comment
eol: %ws* eol_comment? (!. / &%nl)
ignored_line: (%nodent comment) / (%ws* (!. / &%nl))
indent: eol (%nl ignored_line)* %nl %indent (comment (%nl ignored_line)* nodent)?
nodent: eol (%nl ignored_line)* %nl %nodent
dedent: eol (%nl ignored_line)* (((!.) %dedent) / (&(%nl %dedent)))
non_dedent_error: (!dedent .)* eol (%nl ignored_line)* (!. / &%nl)

View File

@ -8,6 +8,11 @@ do
local _obj_0 = string
match, sub = _obj_0.match, _obj_0.sub
end
local insert, remove
do
local _obj_0 = table
insert, remove = _obj_0.insert, _obj_0.remove
end
local files = require('files')
local NomsuCode, LuaCode, Source
do
@ -41,25 +46,6 @@ do
_with_0.operator_char = S("'`~!@$^&*-+=|<>?/")
_with_0.utf8_char = (R("\194\223") * R("\128\191") + R("\224\239") * R("\128\191") * R("\128\191") + R("\240\244") * R("\128\191") * R("\128\191") * R("\128\191"))
_with_0.ident_char = R("az", "AZ", "09") + P("_") + _with_0.utf8_char
_with_0.indent = Cmt(Carg(1), function(self, start, userdata)
local indented = userdata.indent .. ' '
if sub(self, start, start + #indented - 1) == indented then
userdata.indent = indented
return start + #indented
end
end)
_with_0.dedent = Cmt(Carg(1), function(self, start, userdata)
local dedented = sub(userdata.indent, 1, -5)
if #match(self, "^[ ]*", start) <= #dedented then
userdata.indent = dedented
return start
end
end)
_with_0.nodent = Cmt(Carg(1), function(self, start, userdata)
if sub(self, start, start + #userdata.indent - 1) == userdata.indent then
return start + #userdata.indent
end
end)
_with_0.userdata = Carg(1)
_with_0.error = function(src, end_pos, start_pos, err_msg, userdata)
local seen_errors = userdata.errors
@ -93,14 +79,6 @@ do
seen_errors[start_pos] = err_msg
return true
end
_with_0.Comment = function(src, end_pos, start_pos, value, userdata)
userdata.comments[start_pos] = value
return true
end
_with_0.Version = function(src, end_pos, version, userdata)
userdata.version = version
return true
end
NOMSU_DEFS = _with_0
end
setmetatable(NOMSU_DEFS, {
@ -113,6 +91,19 @@ setmetatable(NOMSU_DEFS, {
value.source = Source(_with_0.filename, _with_0.start + start - 1, _with_0.start + stop - 1)
end
end
if key == "Comment" then
value = value[1]
else
local comments = { }
for i = #value, 1, -1 do
if type(value[i]) == 'table' and value[i].type == "Comment" then
insert(comments, remove(value, i))
end
end
if #comments > 0 then
value.comments = comments
end
end
setmetatable(value, AST[key])
if value.__init then
value:__init()
@ -126,18 +117,22 @@ setmetatable(NOMSU_DEFS, {
local Parser = { }
local NOMSU_PATTERN
do
local peg_tidier = re.compile([[ file <- %nl* version? %nl* {~ (def/comment) (%nl+ (def/comment))* %nl* ~}
version <- "--" (!"version" [^%nl])* "version" ([ ])* (([0-9])+ -> set_version) ([^%nl])*
local peg_tidier = re.compile([[ file <- %nl* version %nl* {~ (def/comment) (%nl+ (def/comment))* %nl* ~}
version <- "--" (!"version" [^%nl])* "version" (" ")* (([0-9])+ -> set_version) ([^%nl])*
def <- anon_def / captured_def
anon_def <- ({ident} (" "*) ":"
{((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- %2"
{~ ((%nl " "+ def_line?)+) / def_line ~}) -> "%1 <- %2"
captured_def <- ({ident} (" "*) "(" {ident} ")" (" "*) ":"
{((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- (({} %3 {} %%userdata) -> %2)"
{~ ((%nl " "+ def_line?)+) / def_line ~}) -> "%1 <- (({} {| %3 |} {} %%userdata) -> %2)"
def_line <- (err / [^%nl])+
err <- ("(!!" { (!("!!)") .)* } "!!)") -> "(({} (%1) %%userdata) => error)"
ident <- [a-zA-Z_][a-zA-Z0-9_]*
comment <- "--" [^%nl]*
]], {
set_version = function(v)
Parser.version = tonumber(v)
Parser.version = tonumber(v), {
nl = NOMSU_DEFS.nl
}
end
})
local peg_file = io.open("nomsu.peg")

View File

@ -4,6 +4,7 @@ re = require 're'
lpeg.setmaxstack 10000
{:P,:R,:S,:C,:Cmt,:Carg} = lpeg
{:match, :sub} = string
{:insert, :remove} = table
files = require 'files'
{:NomsuCode, :LuaCode, :Source} = require "code_obj"
AST = require "nomsu_tree"
@ -25,26 +26,6 @@ NOMSU_DEFS = with {}
R("\240\244")*R("\128\191")*R("\128\191")*R("\128\191"))
.ident_char = R("az","AZ","09") + P("_") + .utf8_char
-- If the line begins with #indent+4 spaces, the pattern matches *those* spaces
-- and adds them to the current indent (not any more).
.indent = Cmt Carg(1), (start, userdata)=>
indented = userdata.indent..' '
if sub(@, start, start+#indented-1) == indented
userdata.indent = indented
return start + #indented
-- If the number of leading space characters is <= the number of spaces in the current
-- indent minus 4, this pattern matches and decrements the current indent exactly once.
.dedent = Cmt Carg(1), (start, userdata)=>
dedented = sub(userdata.indent, 1, -5)
if #match(@, "^[ ]*", start) <= #dedented
userdata.indent = dedented
return start
-- If the number of leading space characters is >= the number of spaces in the current
-- indent, this pattern matches and does not modify the indent.
.nodent = Cmt Carg(1), (start, userdata)=>
if sub(@, start, start+#userdata.indent-1) == userdata.indent
return start + #userdata.indent
.userdata = Carg(1)
.error = (src,end_pos,start_pos,err_msg,userdata)->
@ -72,19 +53,20 @@ NOMSU_DEFS = with {}
seen_errors[start_pos] = err_msg
return true
.Comment = (src,end_pos,start_pos,value,userdata)->
userdata.comments[start_pos] = value
return true
.Version = (src,end_pos,version,userdata)->
userdata.version = version
return true
setmetatable(NOMSU_DEFS, {__index:(key)=>
make_node = (start, value, stop, userdata)->
if userdata.source
with userdata.source
value.source = Source(.filename, .start + start-1, .start + stop-1)
if key == "Comment"
value = value[1]
else
comments = {}
for i=#value,1,-1
if type(value[i]) == 'table' and value[i].type == "Comment"
insert comments, remove(value, i)
if #comments > 0
value.comments = comments
setmetatable(value, AST[key])
if value.__init then value\__init!
return value
@ -98,16 +80,18 @@ NOMSU_PATTERN = do
-- Just for cleanliness, I put the language spec in its own file using a slightly modified
-- version of the lpeg.re syntax.
peg_tidier = re.compile [[
file <- %nl* version? %nl* {~ (def/comment) (%nl+ (def/comment))* %nl* ~}
version <- "--" (!"version" [^%nl])* "version" ([ ])* (([0-9])+ -> set_version) ([^%nl])*
file <- %nl* version %nl* {~ (def/comment) (%nl+ (def/comment))* %nl* ~}
version <- "--" (!"version" [^%nl])* "version" (" ")* (([0-9])+ -> set_version) ([^%nl])*
def <- anon_def / captured_def
anon_def <- ({ident} (" "*) ":"
{((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- %2"
{~ ((%nl " "+ def_line?)+) / def_line ~}) -> "%1 <- %2"
captured_def <- ({ident} (" "*) "(" {ident} ")" (" "*) ":"
{((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- (({} %3 {} %%userdata) -> %2)"
{~ ((%nl " "+ def_line?)+) / def_line ~}) -> "%1 <- (({} {| %3 |} {} %%userdata) -> %2)"
def_line <- (err / [^%nl])+
err <- ("(!!" { (!("!!)") .)* } "!!)") -> "(({} (%1) %%userdata) => error)"
ident <- [a-zA-Z_][a-zA-Z0-9_]*
comment <- "--" [^%nl]*
]], {set_version: (v) -> Parser.version = tonumber(v)}
]], {set_version: (v) -> Parser.version = tonumber(v), nl:NOMSU_DEFS.nl}
peg_file = io.open("nomsu.peg")
if not peg_file and package.nomsupath
for path in package.nomsupath\gmatch("[^;]+")