diff options
Diffstat (limited to 'nomsu.7.peg')
| -rw-r--r-- | nomsu.7.peg | 273 |
1 files changed, 273 insertions, 0 deletions
diff --git a/nomsu.7.peg b/nomsu.7.peg new file mode 100644 index 0000000..22bcd46 --- /dev/null +++ b/nomsu.7.peg @@ -0,0 +1,273 @@ +-- Nomsu version 7 +file <- + {:curr_indent: ' '* :} + (((comment / methodchain / action / expression / inline_block) eol !.) + / file_chunks / comment? blank_lines?) + {:curr_indent: %nil :} + !. + +shebang <- "#!" [^%nl]* + +file_chunks (FileChunks) <- + {:shebang: shebang :}? + (top_block (nl_nodent section_division top_block)*) + blank_lines? + unexpected_indent? unexpected_chunk? + +section_division <- ("~")^+3 eol + +eof <- !. +eol <- ws* (&%nl / !.) +nodent <- (unexpected_indent [^%nl]* / =curr_indent) +indent <- { =curr_indent " " } +blank_lines <- %nl ((tab_error / nodent comment / ws*) %nl)* +nl_nodent <- blank_lines nodent +nl_indent <- blank_lines {:curr_indent: indent :} (comment nl_nodent)* + + +comment (Comment) <- + "###" {~ [^%nl]* (%nl+ (indent -> '') [^%nl]*)* (%nl &%nl)* ~} + + +top_block (Block) <- + ((blank_lines nodent) / (comment nl_nodent))? statement (nl_nodent statement)* + +inline_block (Block) <- + ":" ws* (inline_statement (ws* ";" ws* inline_statement)*)? + (&eol !nl_indent / &(ws* ([)},;] / "]"))) + +indented_block (Block) <- + ":" eol nl_indent statement (nl_nodent statement)* + (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* + {:curr_indent: %nil :} + +statement_block (Block) <- + ":" ws* (methodchain / action / expression) + + +statement <- + (methodchain / action / expression / statement_block) (eol / unexpected_code) + +inline_statement <- + inline_methodchain / inline_action / inline_expression + +noindex_inline_expression <- + number / variable / inline_text / inline_list / inline_dict / inline_unary_action / + "(" + ws* (inline_block / inline_methodchain / inline_action / inline_expression) ws* + (")" / eof / missing_paren_err / unexpected_code) + +inline_expression <- inline_index_chain / noindex_inline_expression / inline_index / inline_block + +indented_expression <- + indented_text / indented_list / indented_dict / indented_block / + indented_parens / unary_action + +indented_parens <- + "(" indented_naked_expression (nl_nodent ")" / missing_paren_err / unexpected_code) + +indented_naked_expression <- + ({| nl_indent + (methodchain / action / expression) (eol / unexpected_code) + (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* + {:curr_indent: %nil :} + |} -> unpack) + +expression <- + inline_expression / indented_index_chain / indented_expression / inline_index / indented_index + + +inline_index (Index) <- + "." (hex_integer / integer / text_word / noindex_inline_expression) +_inline_index (IndexChain) <- inline_index +inline_index_chain <- + (noindex_inline_expression _inline_index+) -> foldr + +indented_index (Index) <- "." indented_expression +_indented_index (IndexChain) <- indented_index +indented_index_chain <- + (noindex_inline_expression inline_index* (inline_index / _indented_index)) -> foldr + + +-- Actions need 1 argument and either another argument or a word. +inline_action (Action) <- + !section_division + ( inline_expression ((ws* inline_arg)+ / "(" ws* ")") + / word (ws* inline_arg)*) +inline_arg <- inline_expression / word + +inline_unary_action (Action) <- + !section_division %at_break operator !":" inline_expression + +action (Action) <- + !section_division + ( !statement_block (expression / indented_naked_expression) (((linesplit / ws*) arg)+ / "(" ws* ")") + / word ((linesplit / ws*) arg)*) +arg <- expression / indented_naked_expression / word +linesplit <- eol nl_nodent ".." ws* + +unary_action (Action) <- + !section_division %at_break operator !":" !eol (expression / indented_naked_expression) + + +inline_methodsuffix (MethodCall) <- + inline_action + / "(" ws* inline_action (ws* ";" ws* inline_action)* ")" +inline_methodchain <- + ((inline_action / inline_expression) (ws* "," ws* inline_methodsuffix)+) -> foldr + +methodsuffix (MethodCall) <- + action + / "(" ws* inline_action (ws* ";" ws* inline_action)* ws* ")" + / eol ({| nl_indent + (action eol) (nl_nodent action eol)* + (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* + {:curr_indent: %nil :} + |} -> unpack) +methodchain <- + ((unary_action / inline_action / expression) ((linesplit / ws*) "," ws* methodsuffix)+) -> foldr + +word <- !number { %operator_char+ / ident_char+ } +operator <- !"###" {%operator_char+} + + +text_word (Text) <- word + +inline_text (Text) <- + '"' !eol _inline_text* ('"' / eof / missing_quote_err / unexpected_code) +_inline_text <- + {~ (('\"' -> '"') / ('\\' -> '\') / escaped_char / text_char+)+ ~} + / inline_text_interpolation / illegal_char +inline_text_interpolation <- + "\" ( + variable / inline_list / inline_dict + / ("(" + ws* ((inline_methodchain / inline_action / inline_expression) ws*)? + (")" / eof / missing_paren_err / unexpected_code)) + ) + +text_char <- %utf8_char / !["\] %print / %tab + +indented_text (Text) <- + '("' %nl {%nl*} ({| + {:curr_indent: indent :} + (indented_plain_text / text_interpolation / illegal_char / blank_text_lines)* + {:curr_indent: %nil :} + |} -> unpack) + (nl_nodent '")' / eof / missing_indented_quote_err) + +-- Tracking text-lines-within-indented-text as separate objects allows for better debugging line info +indented_plain_text (Text) <- + {~ + ((("\" blank_lines =curr_indent "..") -> "") / ('\\' -> '\') / ('\"' -> '"') / ('\;' -> '') + / (!text_interpolation ((!("\n") escaped_char) / '\')) + / ('"' / text_char)+)+ + blank_text_lines? + ~} +blank_text_lines <- + {~ (%nl ((ws* -> '') (&%nl / !.) / (=curr_indent -> '') &[^%nl]))+ ~} + +text_interpolation <- + ({| + -- %indentation will backtrack and match the actual indentation of the current line + "\" {:curr_indent: %indentation :} + (indented_block (blank_lines =curr_indent "..")? / indented_expression) + |} -> unpack) + / inline_text_interpolation + + +number <- + hex_integer / real_number / integer + +integer (Number) <- + (((%at_break "-")? [0-9]+)-> tonumber) + +hex_integer (Number) <- + (((%at_break "-")? "0x" [0-9a-fA-F]+)-> tonumber) + {:hex: '' -> 'yes' :} + +real_number (Number) <- + (((%at_break "-")? [0-9]+ "." [0-9]+)-> tonumber) + + +variable (Var) <- "$" ({ident_char+} / "(" ws* (inline_action / variable) ws* ")" / {''}) + + +inline_list (List) <- + "[" ws* !eol + (inline_list_item (ws* ',' ws* inline_list_item)* (ws* ',')?)? ws* + ("]" / eof / (","? (missing_bracket_error / unexpected_code))) +inline_list_item <- inline_action / inline_expression + +indented_list (List) <- + ({| + "[" eol nl_indent + list_line (nl_nodent list_line)* + {:curr_indent: %nil :} + |} -> unpack) + (nl_nodent "]" / eof / missing_bracket_error / unexpected_code) +list_line <- + (inline_list_item ws* "," ws*)+ eol + / (inline_list_item ws* "," ws*)* (action / statement_block / expression) eol + + +inline_dict (Dict) <- + "{" ws* !eol + ((inline_action / inline_expression) (ws* ',' ws* (inline_action / inline_expression))*)? ws* + ("}" / eof / (","? (missing_brace_error / unexpected_code))) + +indented_dict (Dict) <- + ({| + "{" eol nl_indent + dict_line (nl_nodent dict_line)* + (%nl (ws* %nl)* nodent (comment / eol / unexpected_code))* + {:curr_indent: %nil :} + |} -> unpack) + (nl_nodent "}" / eof / missing_brace_error / unexpected_code) +dict_line <- + ((inline_action / inline_expression) ws* "," ws*)+ eol + / ((inline_action / inline_expression) ws* "," ws*)* (action / statement_block / expression) eol + +ident_char <- [a-zA-Z0-9_] / (!%operator_char %utf8_char) +ws <- " " + +escaped_char <- + ("\"->'') ( + (([xX]->'') ((({[0-9a-fA-F]^2} %number_16) -> tonumber) -> tochar)) + / ((([0-9] [0-9]^-2) -> tonumber) -> tochar) + / ("a"->ascii_7) / ("b"->ascii_8) / ("t"->ascii_9) / ("n"->ascii_10) + / ("v"->ascii_11) / ("f"->ascii_12) / ("r"->ascii_13) + ) + + +-- Errors +unexpected_code <- ws* _unexpected_code +_unexpected_code (Error) <- + {:error: {~ [^%nl]+ -> "Couldn't parse this code." ~} :} +unexpected_chunk (Error) <- + {:error: {~ .+ -> "Couldn't parse this chunk of code." ~} :} +unexpected_indent (Error) <- + {:error: {~ (=curr_indent ws+) -> "This indentation is messed up." ~} :} + {:hint: {~ '' -> 'This line should either have the same indentation as the line above it, or exactly 4 spaces more.' ~} :} +missing_paren_err (Error) <- + {:error: {~ eol -> 'This expression is missing a closing )-parenthesis.' ~} :} + {:hint: {~ '' -> 'Put a ")" here' ~} :} +missing_quote_err (Error) <- + {:error: {~ eol -> "This text is missing a closing quotation mark." ~} :} + {:hint: {~ "" -> "Put a quotation mark here." ~} :} +missing_indented_quote_err (Error) <- + {:error: {~ '' -> 'This text is missing a closing ")-quotation mark.' ~} :} + {:hint: {~ "" -> 'Put a ") after this line, at the same level of indentation as the opening (".' ~} :} +missing_bracket_error (Error) <- + {:error: {~ eol -> "This list is missing a closing ]-bracket" ~} :} + {:hint: {~ '' -> 'Put a "]" here' ~} :} +missing_brace_error (Error) <- + {:error: {~ eol -> "This dict is missing a closing }-brace" ~} :} + {:hint: {~ '' -> 'Put a "}" here' ~} :} +tab_error <- ws* _tab_error [^%nl]* +_tab_error (Error) <- + {:error: {~ %tab+ -> 'Tabs are not allowed for indentation.' ~} :} + {:hint: {~ '' -> 'Use 4-space indentation instead of tabs.' ~} :} +illegal_char (Error) <- + {:error: {~ (!(%nl / %tab / %print) .) -> "Illegal unprintable character here (it may not be visible, but it's there)" ~} :} + {:hint: {~ '' -> "This sort of thing can happen when copying and pasting code. Try deleting and retyping the code." ~} :} |
