1 ! Copyright (C) 2008 Chris Double.
2 ! See https://factorcode.org/license.txt for BSD license.
3 USING: accessors kernel math.parser multiline peg peg.ebnf
9 TUPLE: ast-keyword value ;
10 TUPLE: ast-name value ;
11 TUPLE: ast-number value ;
12 TUPLE: ast-string value ;
13 TUPLE: ast-regexp body flags ;
14 TUPLE: ast-cond-expr condition then else ;
15 TUPLE: ast-set lhs rhs ;
16 TUPLE: ast-get value ;
17 TUPLE: ast-mset lhs rhs operator ;
18 TUPLE: ast-binop lhs rhs operator ;
19 TUPLE: ast-unop expr operator ;
20 TUPLE: ast-postop expr operator ;
21 TUPLE: ast-preop expr operator ;
22 TUPLE: ast-getp index expr ;
23 TUPLE: ast-send method expr args ;
24 TUPLE: ast-call expr args ;
26 TUPLE: ast-new name args ;
27 TUPLE: ast-array values ;
28 TUPLE: ast-json bindings ;
29 TUPLE: ast-binding name value ;
30 TUPLE: ast-func fs body ;
31 TUPLE: ast-var name value ;
32 TUPLE: ast-begin statements ;
33 TUPLE: ast-if condition true false ;
34 TUPLE: ast-while condition statements ;
35 TUPLE: ast-do-while statements condition ;
36 TUPLE: ast-for i c u statements ;
37 TUPLE: ast-for-in v e statements ;
38 TUPLE: ast-switch expr statements ;
42 TUPLE: ast-try t e c f ;
44 TUPLE: ast-with expr body ;
45 TUPLE: ast-case c cs ;
46 TUPLE: ast-default cs ;
50 ! Grammar for JavaScript. Based on OMeta-JS example from:
51 ! https://jarrett.cs.ucla.edu/ometa-js/#JavaScript_Compiler
53 EBNF: tokenize-javascript [=[
57 HexDigit = [0-9a-fA-F]
59 LineTerminator = [\r\n\u002028\u002029]
60 WhiteSpace = [ \t\v\f\xa0\u00feff\u001680\u002000\u002001\u002002\u002003\u002004\u002005\u002006\u002007\u002008\u002009\u00200a\u00202f\u00205f\u003000]
61 SingleLineComment = "//" (!(LineTerminator) .)* "\n" => [[ ignore ]]
62 MultiLineComment = "/*" (!("*/") .)* "*/" => [[ ignore ]]
63 Comment = SingleLineComment | MultiLineComment
64 Space = WhiteSpace | LineTerminator | Comment
65 Spaces = Space* => [[ ignore ]]
66 NameFirst = Letter | "$" => [[ CHAR: $ ]] | "_" => [[ CHAR: _ ]]
67 NameRest = NameFirst | Digit
68 iName = NameFirst NameRest* => [[ first2 swap prefix >string ]]
95 FutureReserved = ("class"
102 Name = !(Keyword) iName => [[ ast-name boa ]]
103 HexInteger = "0"~ [xX]~ HexDigit+ => [[ hex> ]]
104 OctInteger = "0"~ OctDigit+ => [[ oct> ]]
105 ExponentPart = ("e"|"E") ("+"|"-")? Digits => [[ concat ]]
106 Decimal = (Digits "." Digits? ExponentPart?
107 | "." Digits ExponentPart?
108 | Digits ExponentPart?) => [[ concat string>number ]]
109 Number = HexInteger | OctInteger | Decimal => [[ ast-number boa ]]
111 SingleEscape = "b" => [[ CHAR: \b ]]
112 | "f" => [[ CHAR: \f ]]
113 | "n" => [[ CHAR: \n ]]
114 | "r" => [[ CHAR: \r ]]
115 | "t" => [[ CHAR: \t ]]
116 | "v" => [[ CHAR: \v ]]
117 | "'" => [[ CHAR: ' ]]
118 | "\"" => [[ CHAR: \" ]]
119 | "\\" => [[ CHAR: \\ ]]
120 OctEscape = ([0-3] OctDigit OctDigit?
121 | [4-7] OctDigit) [[ sift oct> ]]
122 HexEscape = "x"~ (HexDigit HexDigit) => [[ hex> ]]
123 UnicodeEscape = ("u"~ (HexDigit HexDigit HexDigit HexDigit)
124 | "u{"~ HexDigit+ "}"~) => [[ hex> ]]
125 EscapeChar = "\\" (SingleEscape | OctEscape | HexEscape | UnicodeEscape):c => [[ c ]]
126 LineContinuation = "\\" LineTerminator => [[ drop f ]]
127 StringChars1 = (EscapeChar | LineContinuation | !('"""') .)
128 StringChars2 = (EscapeChar | LineContinuation | !('"') .)
129 StringChars3 = (EscapeChar | LineContinuation | !("'") .)
130 Str = ( '"""'~ StringChars1* '"""'~
131 | '"'~ StringChars2* '"'~
132 | "'"~ StringChars3* "'"~ ) => [[ sift >string ast-string boa ]]
133 RegExpFlags = NameRest* => [[ >string ]]
134 NonTerminator = !(LineTerminator) .
135 BackslashSequence = "\\" NonTerminator => [[ second ]]
136 RegExpFirstChar = !([*\\/]) NonTerminator
138 RegExpChar = !([\\/]) NonTerminator
140 RegExpChars = RegExpChar*
141 RegExpBody = RegExpFirstChar RegExpChars => [[ first2 swap prefix >string ]]
142 RegExp = "/" RegExpBody:b "/" RegExpFlags:fl => [[ b fl ast-regexp boa ]]
143 Special = "(" | ")" | "{" | "}" | "[" | "]" | "," | ";"
144 | "?" | ":" | "!==" | "!=" | "===" | "==" | "=" | ">="
145 | ">>>=" | ">>>" | ">>=" | ">>" | ">" | "<=" | "<<=" | "<<"
146 | "<" | "++" | "+=" | "+" | "--" | "-=" | "-" | "*="
147 | "*" | "/=" | "/" | "%=" | "%" | "&&=" | "&&" | "||="
148 | "||" | "." | "!" | "&=" | "&" | "|=" | "|" | "^="
150 Tok = Spaces (Name | Keyword | Number | Str | RegExp | Special )
154 ! Grammar for JavaScript. Based on OMeta-JS example from:
155 ! https://jarrett.cs.ucla.edu/ometa-js/#JavaScript_Compiler
157 ! The interesting thing about this parser is the mixing of
158 ! a default and non-default tokenizer. The JavaScript tokenizer
159 ! removes all newlines. So when operating on tokens there is no
160 ! need for newline and space skipping in the grammar. But JavaScript
161 ! uses the newline in the 'automatic semicolon insertion' rule.
163 ! If a statement ends in a newline, sometimes the semicolon can be
164 ! skipped. So we define an 'nl' rule using the default tokenizer.
165 ! This operates a character at a time. Using this 'nl' in the parser
166 ! allows us to detect newlines when we need to for the semicolon
167 ! insertion rule, but ignore it in all other places.
169 EBNF: parse-javascript [=[
173 tokenizer = <foreign tokenize-javascript Tok>
176 Spaces = Space* => [[ ignore ]]
177 Name = . ?[ ast-name? ]? => [[ value>> ]]
178 Number = . ?[ ast-number? ]?
179 String = . ?[ ast-string? ]?
180 RegExp = . ?[ ast-regexp? ]?
181 SpacesNoNl = (!(nl) Space)* => [[ ignore ]]
183 Expr = OrExpr:e "?" Expr:t ":" Expr:f => [[ e t f ast-cond-expr boa ]]
184 | OrExpr:e "=" Expr:rhs => [[ e rhs ast-set boa ]]
185 | OrExpr:e "+=" Expr:rhs => [[ e rhs "+" ast-mset boa ]]
186 | OrExpr:e "-=" Expr:rhs => [[ e rhs "-" ast-mset boa ]]
187 | OrExpr:e "*=" Expr:rhs => [[ e rhs "*" ast-mset boa ]]
188 | OrExpr:e "/=" Expr:rhs => [[ e rhs "/" ast-mset boa ]]
189 | OrExpr:e "%=" Expr:rhs => [[ e rhs "%" ast-mset boa ]]
190 | OrExpr:e "&&=" Expr:rhs => [[ e rhs "&&" ast-mset boa ]]
191 | OrExpr:e "||=" Expr:rhs => [[ e rhs "||" ast-mset boa ]]
192 | OrExpr:e "^=" Expr:rhs => [[ e rhs "^" ast-mset boa ]]
193 | OrExpr:e "&=" Expr:rhs => [[ e rhs "&" ast-mset boa ]]
194 | OrExpr:e "|=" Expr:rhs => [[ e rhs "|" ast-mset boa ]]
195 | OrExpr:e "<<=" Expr:rhs => [[ e rhs "<<" ast-mset boa ]]
196 | OrExpr:e ">>=" Expr:rhs => [[ e rhs ">>" ast-mset boa ]]
197 | OrExpr:e ">>>=" Expr:rhs => [[ e rhs ">>>" ast-mset boa ]]
198 | OrExpr:e => [[ e ]]
200 ExprNoIn = OrExprNoIn:e "?" ExprNoIn:t ":" ExprNoIn:f => [[ e t f ast-cond-expr boa ]]
201 | OrExprNoIn:e "=" ExprNoIn:rhs => [[ e rhs ast-set boa ]]
202 | OrExprNoIn:e "+=" ExprNoIn:rhs => [[ e rhs "+" ast-mset boa ]]
203 | OrExprNoIn:e "-=" ExprNoIn:rhs => [[ e rhs "-" ast-mset boa ]]
204 | OrExprNoIn:e "*=" ExprNoIn:rhs => [[ e rhs "*" ast-mset boa ]]
205 | OrExprNoIn:e "/=" ExprNoIn:rhs => [[ e rhs "/" ast-mset boa ]]
206 | OrExprNoIn:e "%=" ExprNoIn:rhs => [[ e rhs "%" ast-mset boa ]]
207 | OrExprNoIn:e "&&=" ExprNoIn:rhs => [[ e rhs "&&" ast-mset boa ]]
208 | OrExprNoIn:e "||=" ExprNoIn:rhs => [[ e rhs "||" ast-mset boa ]]
209 | OrExprNoIn:e "^=" ExprNoIn:rhs => [[ e rhs "^" ast-mset boa ]]
210 | OrExprNoIn:e "&=" ExprNoIn:rhs => [[ e rhs "&" ast-mset boa ]]
211 | OrExprNoIn:e "|=" ExprNoIn:rhs => [[ e rhs "|" ast-mset boa ]]
212 | OrExprNoIn:e "<<=" ExprNoIn:rhs => [[ e rhs "<<" ast-mset boa ]]
213 | OrExprNoIn:e ">>=" ExprNoIn:rhs => [[ e rhs ">>" ast-mset boa ]]
214 | OrExprNoIn:e ">>>=" ExprNoIn:rhs => [[ e rhs ">>>" ast-mset boa ]]
215 | OrExprNoIn:e => [[ e ]]
217 OrExpr = OrExpr:x "||" AndExpr:y => [[ x y "||" ast-binop boa ]]
219 OrExprNoIn = OrExprNoIn:x "||" AndExprNoIn:y => [[ x y "||" ast-binop boa ]]
221 AndExpr = AndExpr:x "&&" BitOrExpr:y => [[ x y "&&" ast-binop boa ]]
223 AndExprNoIn = AndExprNoIn:x "&&" BitOrExprNoIn:y => [[ x y "&&" ast-binop boa ]]
225 BitOrExpr = BitOrExpr:x "|" BitXORExpr:y => [[ x y "|" ast-binop boa ]]
227 BitOrExprNoIn = BitOrExprNoIn:x "|" BitXORExprNoIn:y => [[ x y "|" ast-binop boa ]]
229 BitXORExpr = BitXORExpr:x "^" BitANDExpr:y => [[ x y "^" ast-binop boa ]]
231 BitXORExprNoIn = BitXORExprNoIn:x "^" BitANDExprNoIn:y => [[ x y "^" ast-binop boa ]]
233 BitANDExpr = BitANDExpr:x "&" EqExpr:y => [[ x y "&" ast-binop boa ]]
235 BitANDExprNoIn = BitANDExprNoIn:x "&" EqExprNoIn:y => [[ x y "&" ast-binop boa ]]
237 EqExpr = EqExpr:x "==" RelExpr:y => [[ x y "==" ast-binop boa ]]
238 | EqExpr:x "!=" RelExpr:y => [[ x y "!=" ast-binop boa ]]
239 | EqExpr:x "===" RelExpr:y => [[ x y "===" ast-binop boa ]]
240 | EqExpr:x "!==" RelExpr:y => [[ x y "!==" ast-binop boa ]]
242 EqExprNoIn = EqExprNoIn:x "==" RelExprNoIn:y => [[ x y "==" ast-binop boa ]]
243 | EqExprNoIn:x "!=" RelExprNoIn:y => [[ x y "!=" ast-binop boa ]]
244 | EqExprNoIn:x "===" RelExprNoIn:y => [[ x y "===" ast-binop boa ]]
245 | EqExprNoIn:x "!==" RelExprNoIn:y => [[ x y "!==" ast-binop boa ]]
247 RelExpr = RelExpr:x ">" ShiftExpr:y => [[ x y ">" ast-binop boa ]]
248 | RelExpr:x ">=" ShiftExpr:y => [[ x y ">=" ast-binop boa ]]
249 | RelExpr:x "<" ShiftExpr:y => [[ x y "<" ast-binop boa ]]
250 | RelExpr:x "<=" ShiftExpr:y => [[ x y "<=" ast-binop boa ]]
251 | RelExpr:x "instanceof" ShiftExpr:y => [[ x y "instanceof" ast-binop boa ]]
252 | RelExpr:x "in" ShiftExpr:y => [[ x y "in" ast-binop boa ]]
254 RelExprNoIn = RelExprNoIn:x ">" ShiftExpr:y => [[ x y ">" ast-binop boa ]]
255 | RelExprNoIn:x ">=" ShiftExpr:y => [[ x y ">=" ast-binop boa ]]
256 | RelExprNoIn:x "<" ShiftExpr:y => [[ x y "<" ast-binop boa ]]
257 | RelExprNoIn:x "<=" ShiftExpr:y => [[ x y "<=" ast-binop boa ]]
258 | RelExprNoIn:x "instanceof" ShiftExpr:y => [[ x y "instanceof" ast-binop boa ]]
260 ShiftExpr = ShiftExpr:x "<<" AddExpr:y => [[ x y "<<" ast-binop boa ]]
261 | ShiftExpr:x ">>>" AddExpr:y => [[ x y ">>>" ast-binop boa ]]
262 | ShiftExpr:x ">>" AddExpr:y => [[ x y ">>" ast-binop boa ]]
264 AddExpr = AddExpr:x "+" MulExpr:y => [[ x y "+" ast-binop boa ]]
265 | AddExpr:x "-" MulExpr:y => [[ x y "-" ast-binop boa ]]
267 MulExpr = MulExpr:x "*" Unary:y => [[ x y "*" ast-binop boa ]]
268 | MulExpr:x "/" Unary:y => [[ x y "/" ast-binop boa ]]
269 | MulExpr:x "%" Unary:y => [[ x y "%" ast-binop boa ]]
271 Unary = "-" Unary:p => [[ p "-" ast-unop boa ]]
272 | "+" Unary:p => [[ p ]]
273 | "++" Unary:p => [[ p "++" ast-preop boa ]]
274 | "--" Unary:p => [[ p "--" ast-preop boa ]]
275 | "!" Unary:p => [[ p "!" ast-unop boa ]]
276 | "typeof" Unary:p => [[ p "typeof" ast-unop boa ]]
277 | "void" Unary:p => [[ p "void" ast-unop boa ]]
278 | "delete" Unary:p => [[ p "delete" ast-unop boa ]]
280 Postfix = PrimExpr:p SpacesNoNl "++" => [[ p "++" ast-postop boa ]]
281 | PrimExpr:p SpacesNoNl "--" => [[ p "--" ast-postop boa ]]
283 Args = (Expr ("," Expr => [[ second ]])* => [[ first2 swap prefix ]])?
284 PrimExpr = PrimExpr:p "[" Expr:i "]" => [[ i p ast-getp boa ]]
285 | PrimExpr:p "." Name:m "(" Args:as ")" => [[ m p as ast-send boa ]]
286 | PrimExpr:p "." Name:f => [[ f p ast-getp boa ]]
287 | PrimExpr:p "(" Args:as ")" => [[ p as ast-call boa ]]
289 PrimExprHd = "(" Expr:e ")" => [[ e ]]
290 | "this" => [[ ast-this boa ]]
291 | Name => [[ ast-get boa ]]
295 | "function" FuncRest:fr => [[ fr ]]
296 | "new" PrimExpr:n "(" Args:as ")" => [[ n as ast-new boa ]]
297 | "new" PrimExpr:n => [[ n f ast-new boa ]]
298 | "[" Args:es "]" => [[ es ast-array boa ]]
300 JsonBindings = (JsonBinding ("," JsonBinding => [[ second ]])* => [[ first2 swap prefix ]])?
301 Json = "{" JsonBindings:bs "}" => [[ bs ast-json boa ]]
302 JsonBinding = JsonPropName:n ":" Expr:v => [[ n v ast-binding boa ]]
303 JsonPropName = Name | Number | String | RegExp
305 Formals = (Formal ("," Formal => [[ second ]])* => [[ first2 swap prefix ]])?
306 FuncRest = "(" Formals:fs ")" "{" SrcElems:body "}" => [[ fs body ast-func boa ]]
307 Sc = SpacesNoNl (nl | &("}") | End)| ";"
308 Binding = Name:n "=" Expr:v => [[ n v ast-var boa ]]
309 | Name:n => [[ n "undefined" ast-get boa ast-var boa ]]
310 Block = "{" SrcElems:ss "}" => [[ ss ]]
311 Bindings = (Binding (","~ Binding)* => [[ first2 swap prefix ]])?
312 For1 = "var" Bindings => [[ second ]]
314 | Spaces => [[ "undefined" ast-get boa ]]
316 | Spaces => [[ "true" ast-get boa ]]
318 | Spaces => [[ "undefined" ast-get boa ]]
319 ForIn1 = "var" Name:n => [[ n "undefined" ast-get boa ast-var boa ]]
321 Switch1 = "case" Expr:c ":" SrcElems:cs => [[ c cs ast-case boa ]]
322 | "default" ":" SrcElems:cs => [[ cs ast-default boa ]]
323 SwitchBody = Switch1*
324 Finally = "finally" Block:b => [[ b ]]
325 | Spaces => [[ "undefined" ast-get boa ]]
327 | "var" Bindings:bs Sc => [[ bs ast-begin boa ]]
328 | "if" "(" Expr:c ")" Stmt:t "else" Stmt:f => [[ c t f ast-if boa ]]
329 | "if" "(" Expr:c ")" Stmt:t => [[ c t "undefined" ast-get boa ast-if boa ]]
330 | "while" "(" Expr:c ")" Stmt:s => [[ c s ast-while boa ]]
331 | "do" Stmt:s "while" "(" Expr:c ")" Sc => [[ s c ast-do-while boa ]]
332 | "for" "(" For1:i ";" For2:c ";" For3:u ")" Stmt:s => [[ i c u s ast-for boa ]]
333 | "for" "(" ForIn1:v "in" Expr:e ")" Stmt:s => [[ v e s ast-for-in boa ]]
334 | "switch" "(" Expr:e ")" "{" SwitchBody:cs "}" => [[ e cs ast-switch boa ]]
335 | "break" Sc => [[ ast-break boa ]]
336 | "continue" Sc => [[ ast-continue boa ]]
337 | "throw" SpacesNoNl Expr:e Sc => [[ e ast-throw boa ]]
338 | "try" Block:t "catch" "(" Name:e ")" Block:c Finally:f => [[ t e c f ast-try boa ]]
339 | "return" Expr:e Sc => [[ e ast-return boa ]]
340 | "return" Sc => [[ "undefined" ast-get boa ast-return boa ]]
341 | "with" "(" Expr:e ")" Stmt:b => [[ e b ast-with boa ]]
342 | Expr:e Sc => [[ e ]]
343 | ";" => [[ "undefined" ast-get boa ]]
344 SrcElem = "function" Name:n FuncRest:f => [[ n f ast-var boa ]]
346 SrcElems = SrcElem* => [[ ast-begin boa ]]
347 TopLevel = SrcElems Spaces