aboutsummaryrefslogtreecommitdiff
path: root/parser.moon
blob: 3e14063c06e16631bebf65123b3bcbe3be0e376c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
-- This file contains the parser, which converts Nomsu text into abstract syntax trees
lpeg = require 'lpeg'
re = require 're'
lpeg.setmaxstack 10000
{:P,:R,:S,:C,:Cmt,:Carg} = lpeg
{:match, :sub} = string
{:insert, :remove} = table
files = require 'files'
{:NomsuCode, :LuaCode, :Source} = require "code_obj"
AST = require "syntax_tree"

NOMSU_DEFS = with {}
    -- Newline supports either windows-style CR+LF or unix-style LF
    .nl = P("\r")^-1 * P("\n")
    .ws = S(" \t")
    .tonumber = tonumber
    .table = -> {}
    .unpack = unpack or table.unpack
    string_escapes = n:"\n", t:"\t", b:"\b", a:"\a", v:"\v", f:"\f", r:"\r"
    digit, hex = R('09'), R('09','af','AF')
    .escaped_char = (P("\\")*S("xX")*C(hex*hex)) / => string.char(tonumber(@, 16))
    .escaped_char += (P("\\")*C(digit*(digit^-2))) / => string.char(tonumber @)
    .escaped_char += (P("\\")*C(S("ntbavfr"))) / string_escapes
    .operator_char = S("'`~!@$^&*-+=|<>?/")
    .utf8_char = (
        R("\194\223")*R("\128\191") +
        R("\224\239")*R("\128\191")*R("\128\191") +
        R("\240\244")*R("\128\191")*R("\128\191")*R("\128\191"))
    .ident_char = R("az","AZ","09") + P("_") + .utf8_char

    .userdata = Carg(1)

    .add_comment = (src,end_pos,start_pos,comment,userdata)->
        userdata.comments[start_pos] = comment
        return true

    .error = (src,end_pos,start_pos,err_msg,userdata)->
        seen_errors = userdata.errors
        if seen_errors[start_pos]
            return true
        num_errors = 0
        for _ in pairs(seen_errors) do num_errors += 1
        if num_errors >= 10
            seen_errors[start_pos+1] = colored.bright colored.yellow colored.onred "Too many errors, canceling parsing..."
            return #src+1
        err_pos = start_pos
        line_no = files.get_line_number(src, err_pos)
        --src = files.read(userdata.source.filename)
        prev_line = line_no == 1 and nil or files.get_line(src, line_no-1)
        err_line = files.get_line(src, line_no)
        next_line = files.get_line(src, line_no+1)
        i = err_pos-files.get_line_starts(src)[line_no]
        j = i + (end_pos-start_pos)
        pointer = ("-")\rep(i) .. "^"
        err_msg = colored.bright colored.yellow colored.onred (err_msg or "Parse error").." at #{userdata.source.filename}:#{line_no}:"
        if prev_line then err_msg ..= "\n"..colored.dim(prev_line)
        if err_line
            err_line = colored.white(err_line\sub(1, i))..colored.bright(colored.red(err_line\sub(i+1,j+1)))..colored.dim(err_line\sub(j+2,-1))
            err_msg ..= "\n#{err_line}\n#{colored.red pointer}"
        if next_line then err_msg ..= "\n"..colored.dim(next_line)
        seen_errors[start_pos] = err_msg
        return true

setmetatable(NOMSU_DEFS, {__index:(key)=>
    make_node = (start, value, stop, userdata)->
        if userdata.source
            with userdata.source
                value.source = Source(.filename, .start + start-1, .start + stop-1)
        setmetatable(value, AST[key])
        if value.__init then value\__init!
        return value

    self[key] = make_node
    return make_node
})

Parser = {version:3, patterns:{}}
do
    -- Just for cleanliness, I put the language spec in its own file using a slightly modified
    -- version of the lpeg.re syntax.
    peg_tidier = re.compile [[
    file <- %nl* {~ (def/comment) (%nl+ (def/comment))* %nl* ~}
    def <- anon_def / captured_def
    anon_def <- ({ident} (" "*) ":"
        {~ ((%nl " "+ def_line?)+) / def_line ~}) -> "%1 <- %2"
    captured_def <- ({ident} (" "*) "(" {ident} ")" (" "*) ":"
        {~ ((%nl " "+ def_line?)+) / def_line ~}) -> "%1 <- (({} {| %3 |} {} %%userdata) -> %2)"
    def_line <- (err / [^%nl])+
    err <- ("(!!" { (!("!!)") .)* } "!!)") -> "(({} (%1) %%userdata) => error)"
    ident <- [a-zA-Z_][a-zA-Z0-9_]*
    comment <- "--" [^%nl]*
    ]]
    for version=1,Parser.version
        peg_file = io.open("nomsu.#{version}.peg")
        if not peg_file and package.nomsupath
            for path in package.nomsupath\gmatch("[^;]+")
                peg_file = io.open(path.."/nomsu.#{version}.peg")
                break if peg_file
        assert(peg_file, "could not find nomsu .peg file")
        nomsu_peg = peg_tidier\match(peg_file\read('*a'))
        peg_file\close!
        Parser.patterns[version] = re.compile(nomsu_peg, NOMSU_DEFS)

_anon_chunk = 0
Parser.parse = (nomsu_code, source=nil, version=nil)->
    source or= nomsu_code.source
    nomsu_code = tostring(nomsu_code)
    unless source
        source = Source("anonymous chunk ##{_anon_chunk}", 1, #nomsu_code)
        _anon_chunk += 1
    version or= nomsu_code\match("^#![^\n]*nomsu[ ]+-V[ ]*([0-9.]+)")
    syntax_version = version and tonumber(version\match("^[0-9]+")) or Parser.version
    userdata = {
        errors: {}, :source, comments: {}
    }
    tree = Parser.patterns[syntax_version]\match(nomsu_code, nil, userdata)
    if not tree or type(tree) == 'number'
        error "In file #{colored.blue tostring(source or "<unknown>")} failed to parse:\n#{colored.onyellow colored.black nomsu_code}"

    if next(userdata.errors)
        keys = [k for k,v in pairs(userdata.errors)]
        table.sort(keys)
        errors = [userdata.errors[k] for k in *keys]
        error("Errors occurred while parsing (v#{syntax_version}):\n\n"..table.concat(errors, "\n\n"), 0)

    comments = [{comment:c, pos:p} for p,c in pairs(userdata.comments)]
    -- Sort in descending order so we can pop the first comments off the end one at a time
    table.sort comments, (a,b)-> a.pos > b.pos
    comment_i = 1
    walk_tree = (t)->
        export comment_i
        comment_buff = {}
        while comments[#comments] and comments[#comments].pos <= t.source.start
            table.insert(comment_buff, table.remove(comments))
        for x in *t
            if AST.is_syntax_tree x
                walk_tree x
        while comments[#comments] and comments[#comments].pos <= t.source.stop
            table.insert(comment_buff, table.remove(comments))
        t.comments = comment_buff if #comment_buff > 0
    walk_tree tree

    return tree

Parser.is_operator = (s)->
    return not not (NOMSU_DEFS.operator_char^1 * -1)\match(s)

Parser.is_identifier = (s)->
    return not not (NOMSU_DEFS.ident_char^1 * -1)\match(s)

inline_escaper = re.compile "{~ (%utf8_char / ('\"' -> '\\\"') / ('\n' -> '\\n') / ('\t' -> '\\t') / ('\b' -> '\\b') / ('\a' -> '\\a') / ('\v' -> '\\v') / ('\f' -> '\\f') / ('\r' -> '\\r') / ('\\' -> '\\\\') / ([^ -~] -> escape) / .)* ~}", {utf8_char: NOMSU_DEFS.utf8_char, escape:(=> ("\\%03d")\format(@byte!))}
Parser.inline_escape = (s)->
    return inline_escaper\match(s)
escaper = re.compile "{~ (%utf8_char / ('\\' -> '\\\\') / [\n\r\t -~] / (. -> escape))* ~}", {utf8_char: NOMSU_DEFS.utf8_char, escape:(=> ("\\%03d")\format(@byte!))}
Parser.escape = (s)->
    return escaper\match(s)

return Parser