1 ! Copyright (C) 2007 Chris Double.
2 ! See http://factorcode.org/license.txt for BSD license.
4 USING: kernel tools.test peg peg.ebnf peg.ebnf.private words
5 math math.parser sequences accessors peg.parsers parser
6 namespaces arrays strings eval unicode.data multiline ;
9 { T{ ebnf-non-terminal f "abc" } } [
10 "abc" 'non-terminal' parse
13 { T{ ebnf-terminal f "55" } } [
14 "'55'" 'terminal' parse
21 V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } }
25 "digit = '1' | '2'" 'rule' parse
32 V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } }
36 "digit = '1' '2'" 'rule' parse
43 V{ T{ ebnf-non-terminal f "one" } T{ ebnf-non-terminal f "two" } }
45 T{ ebnf-non-terminal f "three" }
49 "one two | three" 'choice' parse
55 T{ ebnf-non-terminal f "one" }
58 V{ T{ ebnf-non-terminal f "two" } T{ ebnf-non-terminal f "three" } }
64 "one {two | three}" 'choice' parse
70 T{ ebnf-non-terminal f "one" }
75 V{ T{ ebnf-non-terminal f "two" } T{ ebnf-non-terminal f "three" } }
77 T{ ebnf-non-terminal f "four" }
84 "one ((two | three) four)*" 'choice' parse
90 T{ ebnf-non-terminal f "one" }
95 V{ T{ ebnf-non-terminal f "two" } T{ ebnf-non-terminal f "three" } }
97 T{ ebnf-non-terminal f "four" }
104 "one ((two | three) four)~" 'choice' parse
110 T{ ebnf-non-terminal f "one" }
111 T{ ebnf-optional f T{ ebnf-non-terminal f "two" } }
112 T{ ebnf-non-terminal f "three" }
116 "one ( two )? three" 'choice' parse
120 "\"foo\"" 'identifier' parse
124 "'foo'" 'identifier' parse
128 "foo" 'non-terminal' parse symbol>>
132 "foo]" 'non-terminal' parse symbol>>
136 "ab" [EBNF foo='a' 'b' EBNF]
140 "ab" [EBNF foo=('a')[[ drop 1 ]] 'b' EBNF]
144 "ab" [EBNF foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]] EBNF]
148 "A" [EBNF foo=[A-Z] EBNF]
152 "Z" [EBNF foo=[A-Z] EBNF]
156 "0" [EBNF foo=[A-Z] EBNF]
160 "0" [EBNF foo=[^A-Z] EBNF]
164 "A" [EBNF foo=[^A-Z] EBNF]
168 "Z" [EBNF foo=[^A-Z] EBNF]
171 { V{ "1" "+" "foo" } } [
172 "1+1" [EBNF foo='1' '+' '1' [[ drop "foo" ]] EBNF]
176 "1+1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] EBNF]
180 "1+1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF]
184 "1-1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF]
188 "4+2" [EBNF num=[0-9] => [[ digit> ]] foo=num:x '+' num:y => [[ x y + ]] EBNF]
192 "4+2" [EBNF foo=[0-9]:x '+' [0-9]:y => [[ x digit> y digit> + ]] EBNF]
196 { 1 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF]
200 { "a" 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF]
204 { 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF]
208 "ab" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF]
211 { V{ "a" " " "b" } } [
212 "a b" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF]
215 { V{ "a" "\t" "b" } } [
216 "a\tb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF]
219 { V{ "a" "\n" "b" } } [
220 "a\nb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF]
224 "ab" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF]
227 { V{ "a" " " "b" } } [
228 "a b" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF]
232 { V{ "a" "\t" "b" } } [
233 "a\tb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF]
236 { V{ "a" "\n" "b" } } [
237 "a\nb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF]
241 "ab" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF]
245 "a\tb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF]
249 "a\nb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF]
253 "axb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF]
256 { V{ V{ 49 } "+" V{ 49 } } } [
257 #! Test direct left recursion.
258 #! Using packrat, so first part of expr fails, causing 2nd choice to be used
259 "1+1" [EBNF num=([0-9])+ expr=expr "+" num | num EBNF]
262 { V{ V{ V{ 49 } "+" V{ 49 } } "+" V{ 49 } } } [
263 #! Test direct left recursion.
264 #! Using packrat, so first part of expr fails, causing 2nd choice to be used
265 "1+1+1" [EBNF num=([0-9])+ expr=expr "+" num | num EBNF]
268 { V{ V{ V{ 49 } "+" V{ 49 } } "+" V{ 49 } } } [
269 #! Test indirect left recursion.
270 #! Using packrat, so first part of expr fails, causing 2nd choice to be used
271 "1+1+1" [EBNF num=([0-9])+ x=expr expr=x "+" num | num EBNF]
275 "abcd='9' | ('8'):x => [[ x ]]" 'ebnf' (parse) remaining>> empty?
279 Primary = PrimaryNoNewArray
280 PrimaryNoNewArray = ClassInstanceCreationExpression
285 ClassInstanceCreationExpression = "new" ClassOrInterfaceType "(" ")"
286 | Primary "." "new" Identifier "(" ")"
287 MethodInvocation = Primary "." MethodName "(" ")"
289 FieldAccess = Primary "." Identifier
290 | "super" "." Identifier
291 ArrayAccess = Primary "[" Expression "]"
292 | ExpressionName "[" Expression "]"
293 ClassOrInterfaceType = ClassName | InterfaceTypeName
294 ClassName = "C" | "D"
295 InterfaceTypeName = "I" | "J"
296 Identifier = "x" | "y" | ClassOrInterfaceType
297 MethodName = "m" | "n"
298 ExpressionName = Identifier
299 Expression = "i" | "j"
307 { V{ "this" "." "x" } } [
311 { V{ V{ "this" "." "x" } "." "y" } } [
315 { V{ V{ "this" "." "x" } "." "m" "(" ")" } } [
319 { V{ V{ V{ "x" "[" "i" "]" } "[" "j" "]" } "." "y" } } [
323 { V{ V{ "a" "b" } "c" } } [
324 "abc" [EBNF a="a" "b" foo=(a "c") EBNF]
328 "abc" [EBNF a="a" "b"~ foo=(a "c") EBNF]
331 { V{ V{ "a" V{ "b" "b" } } "c" } } [
332 "abbc" [EBNF a=("a" "b"*) foo=(a "c") EBNF]
336 "abc" [EBNF a=("a" ("b")~) foo=(a "c") EBNF]
340 "abc" [EBNF a=("a" "b"~) foo=(a "c") EBNF]
344 "abc" [EBNF a=("a" "b")~ foo=(a "c") EBNF]
347 { V{ V{ "a" "b" } "c" } } [
348 "abc" [EBNF a="a" "b" foo={a "c"} EBNF]
351 { V{ V{ "a" "b" } "c" } } [
352 "abc" [EBNF a="a" "b" foo=a "c" EBNF]
356 "a bc" [EBNF a="a" "b" foo=(a "c") EBNF]
360 "a bc" [EBNF a="a" "b" foo=a "c" EBNF]
364 "a bc" [EBNF a="a" "b" foo={a "c"} EBNF]
368 "ab c" [EBNF a="a" "b" foo=a "c" EBNF]
371 { V{ V{ "a" "b" } "c" } } [
372 "ab c" [EBNF a="a" "b" foo={a "c"} EBNF]
376 "ab c" [EBNF a="a" "b" foo=(a "c") EBNF]
380 "a b c" [EBNF a="a" "b" foo=a "c" EBNF]
384 "a b c" [EBNF a="a" "b" foo=(a "c") EBNF]
388 "a b c" [EBNF a="a" "b" foo={a "c"} EBNF]
391 { V{ V{ V{ "a" "b" } "c" } V{ V{ "a" "b" } "c" } } } [
392 "ab cab c" [EBNF a="a" "b" foo={a "c"}* EBNF]
396 "ab cab c" [EBNF a="a" "b" foo=(a "c")* EBNF]
399 { V{ V{ V{ "a" "b" } "c" } V{ V{ "a" "b" } "c" } } } [
400 "ab c ab c" [EBNF a="a" "b" foo={a "c"}* EBNF]
403 { V{ V{ "a" "c" } V{ "a" "c" } } } [
404 "ab c ab c" [EBNF a="a" "b"~ foo={a "c"}* EBNF]
408 "ab c ab c" [EBNF a="a" "b" foo=(a "c")* EBNF]
412 "ab c ab c" [EBNF a="a" "b" foo=(a "c")* EBNF]
415 { V{ "a" "a" "a" } } [
416 "aaa" [EBNF a=('a')* b=!('b') a:x => [[ x ]] EBNF]
420 "aaa" [EBNF a=('a')* b=!('b') a:x => [[ x ]] EBNF]
421 "aaa" [EBNF a=('a')* b=!('b') (a):x => [[ x ]] EBNF] =
424 { V{ "a" "a" "a" } } [
425 "aaa" [EBNF a=('a')* b=a:x => [[ x ]] EBNF]
429 "aaa" [EBNF a=('a')* b=a:x => [[ x ]] EBNF]
430 "aaa" [EBNF a=('a')* b=(a):x => [[ x ]] EBNF] =
434 "number=(digit)+:n 'a'" 'ebnf' (parse) remaining>> length zero?
438 "number=(digit)+ 'a'" 'ebnf' (parse) remaining>> length zero?
442 "number=digit+ 'a'" 'ebnf' (parse) remaining>> length zero?
446 "number=digit+:n 'a'" 'ebnf' (parse) remaining>> length zero?
450 "foo=(name):n !(keyword) => [[ n ]]" 'rule' parse
451 "foo=name:n !(keyword) => [[ n ]]" 'rule' parse =
455 "foo=!(keyword) (name):n => [[ n ]]" 'rule' parse
456 "foo=!(keyword) name:n => [[ n ]]" 'rule' parse =
466 foo=<foreign parser1 foo> 'b'
470 foo=<foreign parser1> 'c'
474 foo=<foreign any-char> 'd'
489 { V{ CHAR: a "d" } } [
494 "USING: kernel peg.ebnf ; \"a\\n\" [EBNF foo='a' '\n' => [[ drop \"\n\" ]] EBNF] drop" eval( -- )
498 "USING: peg.ebnf ; <EBNF foo='a' foo='b' EBNF>" eval( -- ) drop
502 #! Rule lookup occurs in a namespace. This causes an incorrect duplicate rule
503 #! if a var in a namespace is set. This unit test is to remind me to fix this.
504 [ "fail" "foo" set "foo='a'" 'ebnf' parse transform drop t ] with-scope
508 { V{ "a" CHAR: b } } [
509 "ab" [EBNF tokenizer=default foo="a" . EBNF]
512 TUPLE: ast-number value ;
518 SingleLineComment = "//" (!("\n") .)* "\n" => [[ ignore ]]
519 MultiLineComment = "/*" (!("*/") .)* "*/" => [[ ignore ]]
520 Space = " " | "\t" | "\r" | "\n" | SingleLineComment | MultiLineComment
521 Spaces = Space* => [[ ignore ]]
522 Number = Digits:ws '.' Digits:fs => [[ ws "." fs 3array "" concat-as string>number ast-number boa ]]
523 | Digits => [[ >string string>number ast-number boa ]]
524 Special = "(" | ")" | "{" | "}" | "[" | "]" | "," | ";"
525 | "?" | ":" | "!==" | "~=" | "===" | "==" | "=" | ">="
526 | ">" | "<=" | "<" | "++" | "+=" | "+" | "--" | "-="
527 | "-" | "*=" | "*" | "/=" | "/" | "%=" | "%" | "&&="
528 | "&&" | "||=" | "||" | "." | "!"
529 Tok = Spaces (Number | Special )
532 { V{ CHAR: 1 T{ ast-number f 23 } ";" CHAR: x } } [
533 "123;x" [EBNF bar = .
534 tokenizer = <foreign a-tokenizer Tok> foo=.
535 tokenizer=default baz=.
536 main = bar foo foo baz
540 { V{ CHAR: 5 "+" CHAR: 2 } } [
545 spaces=space* => [[ ignore ]]
546 tokenizer=spaces (number | operator)
551 { V{ CHAR: 5 "+" CHAR: 2 } } [
556 spaces=space* => [[ ignore ]]
557 tokenizer=spaces (number | operator)
563 "++--" [EBNF tokenizer=("++" | "--") main="++" EBNF]
567 "\\" [EBNF foo="\\" EBNF]
570 [ "USE: peg.ebnf [EBNF EBNF]" eval( -- ) ] must-fail
572 [ """USE: peg.ebnf [EBNF
577 error>> [ redefined-rule? ] [ name>> "lol" = ] bi and
583 EBNF: foo Bar = "a":a1 "a":a2 => [[ a1 a2 2array ]] ;EBNF
590 EBNF: foo2 Bar = "a":a-1 "a":a-2 => [[ a-1 a-2 2array ]] ;EBNF