basis/peg/ebnf/ebnf-tests.factor

   1 ! Copyright (C) 2007 Chris Double.
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 !
   4 USING: kernel tools.test peg peg.ebnf words math math.parser
   5        sequences accessors peg.parsers parser namespaces arrays
   6        strings eval unicode.data multiline ;
   7 IN: peg.ebnf.tests
   8
   9 { T{ ebnf-non-terminal f "abc" } } [
  10   "abc" 'non-terminal' parse
  11 ] unit-test
  12
  13 { T{ ebnf-terminal f "55" } } [
  14   "'55'" 'terminal' parse
  15 ] unit-test
  16
  17 {
  18   T{ ebnf-rule f
  19      "digit"
  20      T{ ebnf-choice f
  21         V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } }
  22      }
  23   }
  24 } [
  25   "digit = '1' | '2'" 'rule' parse
  26 ] unit-test
  27
  28 {
  29   T{ ebnf-rule f
  30      "digit"
  31      T{ ebnf-sequence f
  32         V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } }
  33      }
  34   }
  35 } [
  36   "digit = '1' '2'" 'rule' parse
  37 ] unit-test
  38
  39 {
  40   T{ ebnf-choice f
  41      V{
  42        T{ ebnf-sequence f
  43           V{ T{ ebnf-non-terminal f "one" } T{ ebnf-non-terminal f "two" } }
  44        }
  45        T{ ebnf-non-terminal f "three" }
  46      }
  47   }
  48 } [
  49   "one two | three" 'choice' parse
  50 ] unit-test
  51
  52 {
  53   T{ ebnf-sequence f
  54      V{
  55        T{ ebnf-non-terminal f "one" }
  56        T{ ebnf-whitespace f
  57          T{ ebnf-choice f
  58             V{ T{ ebnf-non-terminal f "two" } T{ ebnf-non-terminal f "three" } }
  59          }
  60        }
  61      }
  62   }
  63 } [
  64   "one {two | three}" 'choice' parse
  65 ] unit-test
  66
  67 {
  68   T{ ebnf-sequence f
  69      V{
  70        T{ ebnf-non-terminal f "one" }
  71        T{ ebnf-repeat0 f
  72           T{ ebnf-sequence f
  73              V{
  74                 T{ ebnf-choice f
  75                    V{ T{ ebnf-non-terminal f "two" } T{ ebnf-non-terminal f "three" } }
  76                 }
  77                 T{ ebnf-non-terminal f "four" }
  78              }
  79           }
  80         }
  81      }
  82   }
  83 } [
  84   "one ((two | three) four)*" 'choice' parse
  85 ] unit-test
  86
  87 {
  88   T{ ebnf-sequence f
  89      V{
  90          T{ ebnf-non-terminal f "one" }
  91          T{ ebnf-optional f T{ ebnf-non-terminal f "two" } }
  92          T{ ebnf-non-terminal f "three" }
  93      }
  94   }
  95 } [
  96   "one ( two )? three" 'choice' parse
  97 ] unit-test
  98
  99 { "foo" } [
 100   "\"foo\"" 'identifier' parse
 101 ] unit-test
 102
 103 { "foo" } [
 104   "'foo'" 'identifier' parse
 105 ] unit-test
 106
 107 { "foo" } [
 108   "foo" 'non-terminal' parse symbol>>
 109 ] unit-test
 110
 111 { "foo" } [
 112   "foo]" 'non-terminal' parse symbol>>
 113 ] unit-test
 114
 115 { V{ "a" "b" } } [
 116   "ab" [EBNF foo='a' 'b' EBNF]
 117 ] unit-test
 118
 119 { V{ 1 "b" } } [
 120   "ab" [EBNF foo=('a')[[ drop 1 ]] 'b' EBNF]
 121 ] unit-test
 122
 123 { V{ 1 2 } } [
 124   "ab" [EBNF foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]] EBNF]
 125 ] unit-test
 126
 127 { CHAR: A } [
 128   "A" [EBNF foo=[A-Z] EBNF]
 129 ] unit-test
 130
 131 { CHAR: Z } [
 132   "Z" [EBNF foo=[A-Z] EBNF]
 133 ] unit-test
 134
 135 [
 136   "0" [EBNF foo=[A-Z] EBNF]
 137 ] must-fail
 138
 139 { CHAR: 0 } [
 140   "0" [EBNF foo=[^A-Z] EBNF]
 141 ] unit-test
 142
 143 [
 144   "A" [EBNF foo=[^A-Z] EBNF]
 145 ] must-fail
 146
 147 [
 148   "Z" [EBNF foo=[^A-Z] EBNF]
 149 ] must-fail
 150
 151 { V{ "1" "+" "foo" } } [
 152   "1+1" [EBNF foo='1' '+' '1' [[ drop "foo" ]] EBNF]
 153 ] unit-test
 154
 155 { "foo" } [
 156   "1+1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] EBNF]
 157 ] unit-test
 158
 159 { "foo" } [
 160   "1+1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF]
 161 ] unit-test
 162
 163 { "bar" } [
 164   "1-1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF]
 165 ] unit-test
 166
 167 { 6 } [
 168   "4+2" [EBNF num=[0-9] => [[ digit> ]] foo=num:x '+' num:y => [[ x y + ]] EBNF]
 169 ] unit-test
 170
 171 { 6 } [
 172   "4+2" [EBNF foo=[0-9]:x '+' [0-9]:y => [[ x digit> y digit> + ]] EBNF]
 173 ] unit-test
 174
 175 { 10 } [
 176   { 1 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF]
 177 ] unit-test
 178
 179 [
 180   { "a" 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF]
 181 ] must-fail
 182
 183 { 3 } [
 184   { 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF]
 185 ] unit-test
 186
 187 [
 188   "ab" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF]
 189 ] must-fail
 190
 191 { V{ "a" " " "b" } } [
 192   "a b" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF]
 193 ] unit-test
 194
 195 { V{ "a" "\t" "b" } } [
 196   "a\tb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF]
 197 ] unit-test
 198
 199 { V{ "a" "\n" "b" } } [
 200   "a\nb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF]
 201 ] unit-test
 202
 203 { V{ "a" f "b" } } [
 204   "ab" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF]
 205 ] unit-test
 206
 207 { V{ "a" " " "b" } } [
 208   "a b" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF]
 209 ] unit-test
 210
 211
 212 { V{ "a" "\t" "b" } } [
 213   "a\tb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF]
 214 ] unit-test
 215
 216 { V{ "a" "\n" "b" } } [
 217   "a\nb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF]
 218 ] unit-test
 219
 220 { V{ "a" "b" } } [
 221   "ab" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF]
 222 ] unit-test
 223
 224 { V{ "a" "b" } } [
 225   "a\tb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF]
 226 ] unit-test
 227
 228 { V{ "a" "b" } } [
 229   "a\nb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF]
 230 ] unit-test
 231
 232 [
 233   "axb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF]
 234 ] must-fail
 235
 236 { V{ V{ 49 } "+" V{ 49 } } } [
 237   #! Test direct left recursion.
 238   #! Using packrat, so first part of expr fails, causing 2nd choice to be used
 239   "1+1" [EBNF num=([0-9])+ expr=expr "+" num | num EBNF]
 240 ] unit-test
 241
 242 { V{ V{ V{ 49 } "+" V{ 49 } } "+" V{ 49 } } } [
 243   #! Test direct left recursion.
 244   #! Using packrat, so first part of expr fails, causing 2nd choice to be used
 245   "1+1+1" [EBNF num=([0-9])+ expr=expr "+" num | num EBNF]
 246 ] unit-test
 247
 248 { V{ V{ V{ 49 } "+" V{ 49 } } "+" V{ 49 } } } [
 249   #! Test indirect left recursion.
 250   #! Using packrat, so first part of expr fails, causing 2nd choice to be used
 251   "1+1+1" [EBNF num=([0-9])+ x=expr expr=x "+" num | num EBNF]
 252 ] unit-test
 253
 254 { t } [
 255   "abcd='9' | ('8'):x => [[ x ]]" 'ebnf' (parse) remaining>> empty?
 256 ] unit-test
 257
 258 EBNF: primary
 259 Primary = PrimaryNoNewArray
 260 PrimaryNoNewArray =  ClassInstanceCreationExpression
 261                    | MethodInvocation
 262                    | FieldAccess
 263                    | ArrayAccess
 264                    | "this"
 265 ClassInstanceCreationExpression =  "new" ClassOrInterfaceType "(" ")"
 266                                  | Primary "." "new" Identifier "(" ")"
 267 MethodInvocation =  Primary "." MethodName "(" ")"
 268                   | MethodName "(" ")"
 269 FieldAccess =  Primary "." Identifier
 270              | "super" "." Identifier
 271 ArrayAccess =  Primary "[" Expression "]"
 272              | ExpressionName "[" Expression "]"
 273 ClassOrInterfaceType = ClassName | InterfaceTypeName
 274 ClassName = "C" | "D"
 275 InterfaceTypeName = "I" | "J"
 276 Identifier = "x" | "y" | ClassOrInterfaceType
 277 MethodName = "m" | "n"
 278 ExpressionName = Identifier
 279 Expression = "i" | "j"
 280 main = Primary
 281 ;EBNF
 282
 283 { "this" } [
 284   "this" primary
 285 ] unit-test
 286
 287 { V{ "this" "." "x" } } [
 288   "this.x" primary
 289 ] unit-test
 290
 291 { V{ V{ "this" "." "x" } "." "y" } } [
 292   "this.x.y" primary
 293 ] unit-test
 294
 295 { V{ V{ "this" "." "x" } "." "m" "(" ")" } } [
 296   "this.x.m()" primary
 297 ] unit-test
 298
 299 { V{ V{ V{ "x" "[" "i" "]" } "[" "j" "]" } "." "y" } } [
 300   "x[i][j].y" primary
 301 ] unit-test
 302
 303 { V{ V{ "a" "b" } "c" } } [
 304   "abc" [EBNF a="a" "b" foo=(a "c") EBNF]
 305 ] unit-test
 306
 307 { V{ V{ "a" "b" } "c" } } [
 308   "abc" [EBNF a="a" "b" foo={a "c"} EBNF]
 309 ] unit-test
 310
 311 { V{ V{ "a" "b" } "c" } } [
 312   "abc" [EBNF a="a" "b" foo=a "c" EBNF]
 313 ] unit-test
 314
 315 [
 316   "a bc" [EBNF a="a" "b" foo=(a "c") EBNF]
 317 ] must-fail
 318
 319 [
 320   "a bc" [EBNF a="a" "b" foo=a "c" EBNF]
 321 ] must-fail
 322
 323 [
 324   "a bc" [EBNF a="a" "b" foo={a "c"} EBNF]
 325 ] must-fail
 326
 327 [
 328   "ab c" [EBNF a="a" "b" foo=a "c" EBNF]
 329 ] must-fail
 330
 331 { V{ V{ "a" "b" } "c" } } [
 332   "ab c" [EBNF a="a" "b" foo={a "c"} EBNF]
 333 ] unit-test
 334
 335 [
 336   "ab c" [EBNF a="a" "b" foo=(a "c") EBNF]
 337 ] must-fail
 338
 339 [
 340   "a b c" [EBNF a="a" "b" foo=a "c" EBNF]
 341 ] must-fail
 342
 343 [
 344   "a b c" [EBNF a="a" "b" foo=(a "c") EBNF]
 345 ] must-fail
 346
 347 [
 348   "a b c" [EBNF a="a" "b" foo={a "c"} EBNF]
 349 ] must-fail
 350
 351 { V{ V{ V{ "a" "b" } "c" } V{ V{ "a" "b" } "c" } } } [
 352   "ab cab c" [EBNF a="a" "b" foo={a "c"}* EBNF]
 353 ] unit-test
 354
 355 { V{ } } [
 356   "ab cab c" [EBNF a="a" "b" foo=(a "c")* EBNF]
 357 ] unit-test
 358
 359 { V{ V{ V{ "a" "b" } "c" } V{ V{ "a" "b" } "c" } } } [
 360   "ab c ab c" [EBNF a="a" "b" foo={a "c"}* EBNF]
 361 ] unit-test
 362
 363 { V{ } } [
 364   "ab c ab c" [EBNF a="a" "b" foo=(a "c")* EBNF]
 365 ] unit-test
 366
 367 { V{ "a" "a" "a" } } [
 368   "aaa" [EBNF a=('a')* b=!('b') a:x => [[ x ]] EBNF]
 369 ] unit-test
 370
 371 { t } [
 372   "aaa" [EBNF a=('a')* b=!('b') a:x => [[ x ]] EBNF]
 373   "aaa" [EBNF a=('a')* b=!('b') (a):x => [[ x ]] EBNF] =
 374 ] unit-test
 375
 376 { V{ "a" "a" "a" } } [
 377   "aaa" [EBNF a=('a')* b=a:x => [[ x ]] EBNF]
 378 ] unit-test
 379
 380 { t } [
 381   "aaa" [EBNF a=('a')* b=a:x => [[ x ]] EBNF]
 382   "aaa" [EBNF a=('a')* b=(a):x => [[ x ]] EBNF] =
 383 ] unit-test
 384
 385 { t } [
 386   "number=(digit)+:n 'a'" 'ebnf' (parse) remaining>> length zero?
 387 ] unit-test
 388
 389 { t } [
 390   "number=(digit)+ 'a'" 'ebnf' (parse) remaining>> length zero?
 391 ] unit-test
 392
 393 { t } [
 394   "number=digit+ 'a'" 'ebnf' (parse) remaining>> length zero?
 395 ] unit-test
 396
 397 { t } [
 398   "number=digit+:n 'a'" 'ebnf' (parse) remaining>> length zero?
 399 ] unit-test
 400
 401 { t } [
 402   "foo=(name):n !(keyword) => [[ n ]]" 'rule' parse
 403   "foo=name:n !(keyword) => [[ n ]]" 'rule' parse =
 404 ] unit-test
 405
 406 { t } [
 407   "foo=!(keyword) (name):n => [[ n ]]" 'rule' parse
 408   "foo=!(keyword) name:n => [[ n ]]" 'rule' parse =
 409 ] unit-test
 410
 411 <<
 412 EBNF: parser1
 413 foo='a'
 414 ;EBNF
 415 >>
 416
 417 EBNF: parser2
 418 foo=<foreign parser1 foo> 'b'
 419 ;EBNF
 420
 421 EBNF: parser3
 422 foo=<foreign parser1> 'c'
 423 ;EBNF
 424
 425 EBNF: parser4
 426 foo=<foreign any-char> 'd'
 427 ;EBNF
 428
 429 { "a" } [
 430   "a" parser1
 431 ] unit-test
 432
 433 { V{ "a" "b" } } [
 434   "ab" parser2
 435 ] unit-test
 436
 437 { V{ "a" "c" } } [
 438   "ac" parser3
 439 ] unit-test
 440
 441 { V{ CHAR: a "d" } } [
 442   "ad" parser4
 443 ] unit-test
 444
 445 { } [
 446  "USING: kernel peg.ebnf ; \"a\\n\" [EBNF foo='a' '\n'  => [[ drop \"\n\" ]] EBNF] drop" eval( -- )
 447 ] unit-test
 448
 449 [
 450   "USING: peg.ebnf ; <EBNF foo='a' foo='b' EBNF>" eval( -- ) drop
 451 ] must-fail
 452
 453 { t } [
 454   #! Rule lookup occurs in a namespace. This causes an incorrect duplicate rule
 455   #! if a var in a namespace is set. This unit test is to remind me to fix this.
 456   [ "fail" "foo" set "foo='a'" 'ebnf' parse transform drop t ] with-scope
 457 ] unit-test
 458
 459 #! Tokenizer tests
 460 { V{ "a" CHAR: b } } [
 461   "ab" [EBNF tokenizer=default foo="a" . EBNF]
 462 ] unit-test
 463
 464 TUPLE: ast-number value ;
 465
 466 EBNF: a-tokenizer
 467 Letter            = [a-zA-Z]
 468 Digit             = [0-9]
 469 Digits            = Digit+
 470 SingleLineComment = "//" (!("\n") .)* "\n" => [[ ignore ]]
 471 MultiLineComment  = "/*" (!("*/") .)* "*/" => [[ ignore ]]
 472 Space             = " " | "\t" | "\r" | "\n" | SingleLineComment | MultiLineComment
 473 Spaces            = Space* => [[ ignore ]]
 474 Number            = Digits:ws '.' Digits:fs => [[ ws "." fs 3array concat >string string>number ast-number boa ]]
 475                     | Digits => [[ >string string>number ast-number boa ]]
 476 Special            =   "("   | ")"   | "{"   | "}"   | "["   | "]"   | ","   | ";"
 477                      | "?"   | ":"   | "!==" | "~="  | "===" | "=="  | "="   | ">="
 478                      | ">"   | "<="  | "<"   | "++"  | "+="  | "+"   | "--"  | "-="
 479                      | "-"   | "*="  | "*"   | "/="  | "/"   | "%="  | "%"   | "&&="
 480                      | "&&"  | "||=" | "||"  | "."   | "!"
 481 Tok                = Spaces (Number | Special )
 482 ;EBNF
 483
 484 { V{ CHAR: 1 T{ ast-number f 23 } ";" CHAR: x } } [
 485   "123;x" [EBNF bar = .
 486                 tokenizer = <foreign a-tokenizer Tok>  foo=.
 487                 tokenizer=default baz=.
 488                 main = bar foo foo baz
 489           EBNF]
 490 ] unit-test
 491
 492 { V{ CHAR: 5 "+" CHAR: 2 } } [
 493   "5+2" [EBNF
 494           space=(" " | "\n")
 495           number=[0-9]
 496           operator=("*" | "+")
 497           spaces=space* => [[ ignore ]]
 498           tokenizer=spaces (number | operator)
 499           main= . . .
 500         EBNF]
 501 ] unit-test
 502
 503 { V{ CHAR: 5 "+" CHAR: 2 } } [
 504   "5 + 2" [EBNF
 505           space=(" " | "\n")
 506           number=[0-9]
 507           operator=("*" | "+")
 508           spaces=space* => [[ ignore ]]
 509           tokenizer=spaces (number | operator)
 510           main= . . .
 511         EBNF]
 512 ] unit-test
 513
 514 { "++" } [
 515   "++--" [EBNF tokenizer=("++" | "--") main="++" EBNF]
 516 ] unit-test
 517
 518 { "\\" } [
 519   "\\" [EBNF foo="\\" EBNF]
 520 ] unit-test
 521
 522 [ "USE: peg.ebnf [EBNF EBNF]" eval( -- ) ] must-fail
 523
 524 [ <" USE: peg.ebnf [EBNF
 525     lol = a
 526     lol = b
 527   EBNF] "> eval( -- )
 528 ] [
 529     error>> [ redefined-rule? ] [ name>> "lol" = ] bi and
 530 ] must-fail-with