aboutsummaryrefslogtreecommitdiff
path: root/parser.moon
blob: 7635cbc2ced3a39b94374eecad80c0d2a87e86e7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
-- This file contains the parser, which converts Nomsu text into abstract syntax trees
lpeg = require 'lpeg'
re = require 're'
lpeg.setmaxstack 10000
{:P,:R,:S,:C,:Cmt,:Carg} = lpeg
{:match, :sub} = string
files = require 'files'
{:NomsuCode, :LuaCode, :Source} = require "code_obj"
AST = require "nomsu_tree"

NOMSU_DEFS = with {}
    -- Newline supports either windows-style CR+LF or unix-style LF
    .nl = P("\r")^-1 * P("\n")
    .ws = S(" \t")
    .tonumber = tonumber
    string_escapes = n:"\n", t:"\t", b:"\b", a:"\a", v:"\v", f:"\f", r:"\r"
    digit, hex = R('09'), R('09','af','AF')
    .escaped_char = (P("\\")*S("xX")*C(hex*hex)) / => string.char(tonumber(@, 16))
    .escaped_char += (P("\\")*C(digit*(digit^-2))) / => string.char(tonumber @)
    .escaped_char += (P("\\")*C(S("ntbavfr"))) / string_escapes
    .operator_char = S("'`~!@$^&*-+=|<>?/")
    .utf8_char = (
        R("\194\223")*R("\128\191") +
        R("\224\239")*R("\128\191")*R("\128\191") +
        R("\240\244")*R("\128\191")*R("\128\191")*R("\128\191"))
    .ident_char = R("az","AZ","09") + P("_") + .utf8_char

    -- If the line begins with #indent+4 spaces, the pattern matches *those* spaces
    -- and adds them to the current indent (not any more).
    .indent = Cmt Carg(1), (start, userdata)=>
        indented = userdata.indent..'    '
        if sub(@, start, start+#indented-1) == indented
            userdata.indent = indented
            return start + #indented
    -- If the number of leading space characters is <= the number of spaces in the current
    -- indent minus 4, this pattern matches and decrements the current indent exactly once.
    .dedent = Cmt Carg(1), (start, userdata)=>
        dedented = sub(userdata.indent, 1, -5)
        if #match(@, "^[ ]*", start) <= #dedented
            userdata.indent = dedented
            return start
    -- If the number of leading space characters is >= the number of spaces in the current
    -- indent, this pattern matches and does not modify the indent.
    .nodent = Cmt Carg(1), (start, userdata)=>
        if sub(@, start, start+#userdata.indent-1) == userdata.indent
            return start + #userdata.indent

    .userdata = Carg(1)

    .error = (src,end_pos,start_pos,err_msg,userdata)->
        seen_errors = userdata.errors
        if seen_errors[start_pos]
            return true
        num_errors = 0
        for _ in pairs(seen_errors) do num_errors += 1
        if num_errors >= 10
            seen_errors[start_pos+1] = colored.bright colored.yellow colored.onred "Too many errors, canceling parsing..."
            return #src+1
        err_pos = start_pos
        line_no = files.get_line_number(src, err_pos)
        --src = files.read(userdata.source.filename)
        prev_line = line_no == 1 and "" or files.get_line(src, line_no-1)
        err_line = files.get_line(src, line_no)
        next_line = files.get_line(src, line_no+1)
        i = err_pos-files.get_line_starts(src)[line_no]
        pointer = ("-")\rep(i) .. "^"
        err_msg = colored.bright colored.yellow colored.onred (err_msg or "Parse error").." at #{userdata.source.filename}:#{line_no}:"
        if #prev_line > 0 then err_msg ..= "\n"..colored.dim(prev_line)
        err_line = colored.white(err_line\sub(1, i))..colored.bright(colored.red(err_line\sub(i+1,i+1)))..colored.dim(err_line\sub(i+2,-1))
        err_msg ..= "\n#{err_line}\n#{colored.red pointer}"
        if #next_line > 0 then err_msg ..= "\n"..colored.dim(next_line)
        seen_errors[start_pos] = err_msg
        return true

    .Comment = (src,end_pos,start_pos,value,userdata)->
        userdata.comments[start_pos] = value
        return true

    .Version = (src,end_pos,version,userdata)->
        userdata.version = version
        return true

setmetatable(NOMSU_DEFS, {__index:(key)=>
    make_node = (start, value, stop, userdata)->
        if userdata.source
            with userdata.source
                value.source = Source(.filename, .start + start-1, .start + stop-1)
        setmetatable(value, AST[key])
        if value.__init then value\__init!
        return value

    self[key] = make_node
    return make_node
})

Parser = {}
NOMSU_PATTERN = do
    -- Just for cleanliness, I put the language spec in its own file using a slightly modified
    -- version of the lpeg.re syntax.
    peg_tidier = re.compile [[
    file <- %nl* version? %nl* {~ (def/comment) (%nl+ (def/comment))* %nl* ~}
    version <- "--" (!"version" [^%nl])* "version" ([ ])* (([0-9])+ -> set_version) ([^%nl])*
    def <- anon_def / captured_def
    anon_def <- ({ident} (" "*) ":"
        {((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- %2"
    captured_def <- ({ident} (" "*) "(" {ident} ")" (" "*) ":"
        {((%nl " "+ [^%nl]*)+) / ([^%nl]*)}) -> "%1 <- (({} %3 {} %%userdata) -> %2)"
    ident <- [a-zA-Z_][a-zA-Z0-9_]*
    comment <- "--" [^%nl]*
    ]], {set_version: (v) -> Parser.version = tonumber(v)}
    peg_file = io.open("nomsu.peg")
    if not peg_file and package.nomsupath
        for path in package.nomsupath\gmatch("[^;]+")
            peg_file = io.open(path.."/nomsu.peg")
            break if peg_file
    assert(peg_file, "could not find nomsu.peg file")
    nomsu_peg = peg_tidier\match(peg_file\read('*a'))
    peg_file\close!
    re.compile(nomsu_peg, NOMSU_DEFS)

Parser.parse = (nomsu_code, source=nil)->
    source or= nomsu_code.source
    nomsu_code = tostring(nomsu_code)
    userdata = {
        indent: "", errors: {}, :source, comments: {},
    }
    tree = NOMSU_PATTERN\match(nomsu_code, nil, userdata)
    unless tree
        error "In file #{colored.blue tostring(source or "<unknown>")} failed to parse:\n#{colored.onyellow colored.black nomsu_code}"
    if type(tree) == 'number'
        return nil

    if next(userdata.errors)
        keys = [k for k,v in pairs(userdata.errors)]
        table.sort(keys)
        errors = [userdata.errors[k] for k in *keys]
        error("Errors occurred while parsing:\n\n"..table.concat(errors, "\n\n"), 0)
    
    tree.comments = userdata.comments
    tree.version = userdata.version
    return tree

return Parser