nomsu/nomsu.4.peg

233 lines
9.0 KiB
Plaintext

-- Nomsu version 4
file <-
{:curr_indent: ' '* :}
(((action / expression / inline_block / indented_block) eol !.)
/ file_chunks / empty_block)
{:curr_indent: %nil :}
!.
shebang <- "#!" (!"nomsu" [^%nl])* "nomsu" ws+ "-V" ws* [0-9.]+ [^%nl]* (%nl / !.)
eof <- !.
file_chunks (FileChunks) <-
{:shebang: shebang :}?
(top_block (nl_nodent section_division top_block)*)
blank_lines?
unexpected_indent? unexpected_chunk?
top_block (Block) <-
((blank_lines nodent) / (comment nl_nodent))? statement (nl_nodent statement)*
empty_block (Block) <-
comment? blank_lines?
nodent <- (unexpected_indent [^%nl]* / =curr_indent)
indent <- {~ =curr_indent (ws / (%tab -> ' '))+ ~}
blank_lines <- %nl ((nodent comment / ws*) %nl)*
eol <- ws* eol_comment? (!. / &%nl)
nl_nodent <- blank_lines nodent
nl_indent <- blank_lines tab_error? {:curr_indent: indent :} (comment nl_nodent)*
comment (Comment) <-
"#" {~ [^%nl]* (%nl+ (indent -> '') [^%nl]*)* (%nl &%nl)* ~}
eol_comment (Comment) <-
"#" {[^%nl]*}
unexpected_code <- ws* _unexpected_code
_unexpected_code (Error) <-
{:error: {~ [^%nl]+ -> "Couldn't parse this code." ~} :}
unexpected_chunk (Error) <-
{:error: {~ .+ -> "Couldn't parse this chunk of code." ~} :}
unexpected_indent (Error) <-
{:error: {~ (=curr_indent ws+) -> "This indentation is messed up." ~} :}
{:hint: {~ '' -> 'This line should either have the same indentation as the line above it, or exactly 4 spaces more.' ~} :}
missing_paren_err (Error) <-
{:error: {~ eol -> 'Line ended without finding a closing )-parenthesis' ~} :}
{:hint: {~ '' -> 'Put a ")" here' ~} :}
missing_quote_err (Error) <-
{:error: {~ eol -> 'Line ended before finding a closing double quotation mark' ~} :}
{:hint: {~ "" -> "Put a quotation mark here" ~} :}
missing_bracket_error (Error) <-
{:error: {~ eol -> "Line ended before finding a closing ]-bracket" ~} :}
{:hint: {~ '' -> 'Put a "]" here' ~} :}
missing_brace_error (Error) <-
{:error: {~ eol -> "Line ended before finding a closing }-brace" ~} :}
{:hint: {~ '' -> 'Put a "}" here' ~} :}
disallowed_interpolation (Error) <-
{:error: {~ ("\" ('\:' / '(..)' / '[..]' / '{..}') (%nl (&(%nl) / =curr_indent ' ' [^%nl]*))*) ->
"Sorry, indented text interpolations are not currently supported on the first line of multi-line text." ~} :}
{:hint: {~ '' -> 'Move the code for the first line of text to the next line by ending this line with "\" and starting the next line indented with "..", followed by the code for the first line.' ~} :}
tab_error (Error) <-
&(=curr_indent %tab)
{:error: {~ '' -> 'Tabs are not allowed for indentation.' ~} :}
{:hint: {~ '' -> 'Use spaces instead of tabs.' ~} :}
section_division <- ("~")^+3 eol
inline_block <-
"(" ws* inline_block ws* (eof / ")") / raw_inline_block
raw_inline_block (Block) <-
(!"::") ":" ws* ((inline_statement (ws* ";" ws* inline_statement)*) / !(eol nl_indent))
indented_block (Block) <-
":" eol nl_indent statement (nl_nodent statement)*
(%nl (ws* %nl)* nodent (comment / eol / unexpected_code))*
{:curr_indent: %nil :}
statement <-
(action / expression) (eol / unexpected_code)
inline_statement <- (inline_action / inline_expression)
noindex_inline_expression <-
number / variable / inline_text / inline_list / inline_dict / inline_nomsu
/ ( "("
ws* (inline_action / inline_expression) ws*
(ws* ',' ws* (inline_action / inline_expression) ws*)*
(")" / eof / missing_paren_err / unexpected_code)
)
inline_expression <- index_chain / noindex_inline_expression
indented_expression <-
indented_text / indented_nomsu / indented_list / indented_dict / ({|
"(..)" eol nl_indent
(action / expression) (eol / unexpected_code)
(%nl (ws* %nl)* nodent (comment / eol / unexpected_code))*
{:curr_indent: %nil :}
|} -> unpack)
expression <-
inline_expression / indented_expression
inline_nomsu (EscapedNomsu) <- "\" (inline_expression / inline_block)
indented_nomsu (EscapedNomsu) <-
"\" (noindex_inline_expression / inline_block / indented_expression / indented_block)
index_chain (IndexChain) <-
noindex_inline_expression ("." (text_word / noindex_inline_expression))+
inline_action <- inline_methodcall / _inline_action
inline_methodcall (MethodCall) <-
(inline_expression / "(" inline_block ")") ws* "::" ws* _inline_action
-- Actions need either at least 1 word, or at least 2 tokens
_inline_action (Action) <-
!section_division
( (inline_arg (ws* (inline_arg / word))+)
/ (word (ws* (inline_arg / word))*))
(ws* inline_block)?
inline_arg <- inline_expression / inline_block / "(" ws* ")"
action <- methodcall / _action
methodcall (MethodCall) <-
(expression / "(" inline_block ")" / indented_block)
((ws* "\")? eol nl_nodent "..")? ws* "::" ((ws* "\")? eol nl_nodent "..")? ws*
_action
_action (Action) <-
!section_division
( (arg (((ws* "\")? eol nl_nodent "..")? ws* (arg / word))+)
/ (word (((ws* "\")? eol nl_nodent "..")? ws* (arg / word))*))
arg <- expression / inline_block / indented_block / "(" ws* ")"
word <- !number { operator_char+ / ident_char+ }
text_word (Text) <- word
inline_text (Text) <-
!(indented_text)
'"' _inline_text* ('"' / eof / missing_quote_err / unexpected_code)
_inline_text <-
{~ (('\"' -> '"') / ('\\' -> '\') / escaped_char / text_char+)+ ~}
/ inline_text_interpolation / illegal_char
inline_text_interpolation <-
"\" (
variable / inline_list / inline_dict
/ ("("
ws* (inline_action / inline_expression) ws*
(ws* ',' ws* (inline_action / inline_expression) ws*)*
(")" / eof / missing_paren_err / unexpected_code))
)
text_char <- %utf8_char / !["\] %print / %tab
illegal_char (Error) <-
{:error: {~ (!(%nl / %tab / %print) .) -> "Illegal unprintable character here (it may not be visible, but it's there)" ~} :}
{:hint: {~ '' -> "This sort of thing can happen when copying and pasting code. Try deleting and retyping the code." ~} :}
nonterminal_quote <-
'"' &([^%nl] / %nl+ =curr_indent)
indented_text (Text) <-
'"'
_inline_text*
(('\' %nl+ {:curr_indent: indent :} ('..')?)
/ disallowed_interpolation? {%nl+} {:curr_indent: indent :})
(indented_plain_text / text_interpolation / illegal_char / {~ %nl+ (=curr_indent -> "") [ ]* ~})*
('"' eol / eof / missing_quote_err)
{:curr_indent: %nil :}
-- Tracking text-lines-within-indented-text as separate objects allows for better debugging line info
indented_plain_text (Text) <-
{~
((("\" blank_lines =curr_indent "..") -> "") / ('\\' -> '\')
/ (!text_interpolation (escaped_char / '\'))
/ (nonterminal_quote / text_char)+)+
~}
text_interpolation <-
inline_text_interpolation / ("\" (indented_expression / indented_block) (blank_lines =curr_indent "..")?)
number (Number) <-
(&("-"? "0x" [0-9a-fA-F]+) {:hex: '' -> 'yes' :})?
(("-"? (([0-9]+ "." [0-9]+) / ("." [0-9]+) / "0x" [0-9a-fA-F]+ / ([0-9]+)))-> tonumber)
-- Variables can be nameless (i.e. just %) and can only contain identifier chars.
-- This ensures you don't get weird parsings of `%x+%y` or `%'s thing`.
variable (Var) <- "%" {ident_char*}
inline_list (List) <-
!('[..]')
"[" ws*
(inline_list_item (ws* ',' ws* inline_list_item)* (ws* ',')?)? ws*
("]" / eof / (","? (missing_bracket_error / unexpected_code)))
indented_list (List) <-
"[..]" eol nl_indent
list_line (nl_nodent list_line)*
(%nl (ws* %nl)* nodent (comment / eol / unexpected_code))*
(","? unexpected_code)?
list_line <-
(inline_list_item ws* "," ws*)+ eol
/ (inline_list_item ws* "," ws*)* (action / expression / inline_block / indented_block) eol
inline_list_item <- inline_action / inline_expression / inline_block
inline_dict (Dict) <-
!('{..}')
"{" ws*
(inline_dict_entry (ws* ',' ws* inline_dict_entry)*)? ws*
("}" / eof / (","? (missing_brace_error / unexpected_code)))
indented_dict (Dict) <-
"{..}" eol nl_indent
dict_line (nl_nodent dict_line)*
(%nl (ws* %nl)* nodent (comment / eol / unexpected_code))*
(","? unexpected_code)?
dict_line <-
(inline_dict_entry ws* "," ws*)+ eol
/ (inline_dict_entry ws* "," ws*)* dict_entry eol
_dict_entry(DictEntry) <-
dict_key (ws* ":" ws* (action / expression))?
dict_entry <-
_dict_entry / inline_block / indented_block
_inline_dict_entry(DictEntry) <-
dict_key (ws* ":" ws* (inline_action / inline_expression)?)?
inline_dict_entry <-
_inline_dict_entry / inline_block
dict_key <-
text_word / inline_expression
operator_char <- ['`~!@$^&*+=|<>?/-]
ident_char <- [a-zA-Z0-9_] / %utf8_char
ws <- " "
escaped_char <-
("\"->'') (
(([xX]->'') ((({[0-9a-fA-F]^2} %number_16) -> tonumber) -> tochar))
/ ((([0-9] [0-9]^-2) -> tonumber) -> tochar)
/ ("a"->ascii_7) / ("b"->ascii_8) / ("t"->ascii_9) / ("n"->ascii_10)
/ ("v"->ascii_11) / ("f"->ascii_12) / ("r"->ascii_13)
)