basis/peg/ebnf/ebnf-tests.factor

   1 ! Copyright (C) 2007 Chris Double.
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 !
   4 USING: kernel tools.test peg peg.ebnf words math math.parser
   5        sequences accessors peg.parsers parser namespaces arrays
   6        strings eval unicode.data multiline ;
   7 IN: peg.ebnf.tests
   8
   9 { T{ ebnf-non-terminal f "abc" } } [
  10   "abc" 'non-terminal' parse
  11 ] unit-test
  12
  13 { T{ ebnf-terminal f "55" } } [
  14   "'55'" 'terminal' parse
  15 ] unit-test
  16
  17 {
  18   T{ ebnf-rule f
  19      "digit"
  20      T{ ebnf-choice f
  21         V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } }
  22      }
  23   }
  24 } [
  25   "digit = '1' | '2'" 'rule' parse
  26 ] unit-test
  27
  28 {
  29   T{ ebnf-rule f
  30      "digit"
  31      T{ ebnf-sequence f
  32         V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } }
  33      }
  34   }
  35 } [
  36   "digit = '1' '2'" 'rule' parse
  37 ] unit-test
  38
  39 {
  40   T{ ebnf-choice f
  41      V{
  42        T{ ebnf-sequence f
  43           V{ T{ ebnf-non-terminal f "one" } T{ ebnf-non-terminal f "two" } }
  44        }
  45        T{ ebnf-non-terminal f "three" }
  46      }
  47   }
  48 } [
  49   "one two | three" 'choice' parse
  50 ] unit-test
  51
  52 {
  53   T{ ebnf-sequence f
  54      V{
  55        T{ ebnf-non-terminal f "one" }
  56        T{ ebnf-whitespace f
  57          T{ ebnf-choice f
  58             V{ T{ ebnf-non-terminal f "two" } T{ ebnf-non-terminal f "three" } }
  59          }
  60        }
  61      }
  62   }
  63 } [
  64   "one {two | three}" 'choice' parse
  65 ] unit-test
  66
  67 {
  68   T{ ebnf-sequence f
  69      V{
  70        T{ ebnf-non-terminal f "one" }
  71        T{ ebnf-repeat0 f
  72           T{ ebnf-sequence f
  73              V{
  74                 T{ ebnf-choice f
  75                    V{ T{ ebnf-non-terminal f "two" } T{ ebnf-non-terminal f "three" } }
  76                 }
  77                 T{ ebnf-non-terminal f "four" }
  78              }
  79           }
  80         }
  81      }
  82   }
  83 } [
  84   "one ((two | three) four)*" 'choice' parse
  85 ] unit-test
  86
  87 {
  88   T{ ebnf-sequence f
  89      V{
  90          T{ ebnf-non-terminal f "one" }
  91          T{ ebnf-optional f T{ ebnf-non-terminal f "two" } }
  92          T{ ebnf-non-terminal f "three" }
  93      }
  94   }
  95 } [
  96   "one ( two )? three" 'choice' parse
  97 ] unit-test
  98
  99 { "foo" } [
 100   "\"foo\"" 'identifier' parse
 101 ] unit-test
 102
 103 { "foo" } [
 104   "'foo'" 'identifier' parse
 105 ] unit-test
 106
 107 { "foo" } [
 108   "foo" 'non-terminal' parse symbol>>
 109 ] unit-test
 110
 111 { "foo" } [
 112   "foo]" 'non-terminal' parse symbol>>
 113 ] unit-test
 114
 115 { V{ "a" "b" } } [
 116   "ab" [EBNF foo='a' 'b' EBNF]
 117 ] unit-test
 118
 119 { V{ 1 "b" } } [
 120   "ab" [EBNF foo=('a')[[ drop 1 ]] 'b' EBNF]
 121 ] unit-test
 122
 123 { V{ 1 2 } } [
 124   "ab" [EBNF foo=('a') [[ drop 1 ]] ('b') [[ drop 2 ]] EBNF]
 125 ] unit-test
 126
 127 { CHAR: A } [
 128   "A" [EBNF foo=[A-Z] EBNF]
 129 ] unit-test
 130
 131 { CHAR: Z } [
 132   "Z" [EBNF foo=[A-Z] EBNF]
 133 ] unit-test
 134
 135 [
 136   "0" [EBNF foo=[A-Z] EBNF]
 137 ] must-fail
 138
 139 { CHAR: 0 } [
 140   "0" [EBNF foo=[^A-Z] EBNF]
 141 ] unit-test
 142
 143 [
 144   "A" [EBNF foo=[^A-Z] EBNF]
 145 ] must-fail
 146
 147 [
 148   "Z" [EBNF foo=[^A-Z] EBNF]
 149 ] must-fail
 150
 151 { V{ "1" "+" "foo" } } [
 152   "1+1" [EBNF foo='1' '+' '1' [[ drop "foo" ]] EBNF]
 153 ] unit-test
 154
 155 { "foo" } [
 156   "1+1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] EBNF]
 157 ] unit-test
 158
 159 { "foo" } [
 160   "1+1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF]
 161 ] unit-test
 162
 163 { "bar" } [
 164   "1-1" [EBNF foo='1' '+' '1' => [[ drop "foo" ]] | '1' '-' '1' => [[ drop "bar" ]] EBNF]
 165 ] unit-test
 166
 167 { 6 } [
 168   "4+2" [EBNF num=[0-9] => [[ digit> ]] foo=num:x '+' num:y => [[ x y + ]] EBNF]
 169 ] unit-test
 170
 171 { 6 } [
 172   "4+2" [EBNF foo=[0-9]:x '+' [0-9]:y => [[ x digit> y digit> + ]] EBNF]
 173 ] unit-test
 174
 175 { 10 } [
 176   { 1 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF]
 177 ] unit-test
 178
 179 [
 180   { "a" 2 3 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF]
 181 ] must-fail
 182
 183 { 3 } [
 184   { 1 2 "a" 4 } [EBNF num=. ?[ number? ]? list=list:x num:y => [[ x y + ]] | num EBNF]
 185 ] unit-test
 186
 187 [
 188   "ab" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF]
 189 ] must-fail
 190
 191 { V{ "a" " " "b" } } [
 192   "a b" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF]
 193 ] unit-test
 194
 195 { V{ "a" "\t" "b" } } [
 196   "a\tb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF]
 197 ] unit-test
 198
 199 { V{ "a" "\n" "b" } } [
 200   "a\nb" [EBNF -=" " | "\t" | "\n" foo="a" - "b" EBNF]
 201 ] unit-test
 202
 203 { V{ "a" f "b" } } [
 204   "ab" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF]
 205 ] unit-test
 206
 207 { V{ "a" " " "b" } } [
 208   "a b" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF]
 209 ] unit-test
 210
 211
 212 { V{ "a" "\t" "b" } } [
 213   "a\tb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF]
 214 ] unit-test
 215
 216 { V{ "a" "\n" "b" } } [
 217   "a\nb" [EBNF -=" " | "\t" | "\n" foo="a" (-)? "b" EBNF]
 218 ] unit-test
 219
 220 { V{ "a" "b" } } [
 221   "ab" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF]
 222 ] unit-test
 223
 224 { V{ "a" "b" } } [
 225   "a\tb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF]
 226 ] unit-test
 227
 228 { V{ "a" "b" } } [
 229   "a\nb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF]
 230 ] unit-test
 231
 232 [
 233   "axb" [EBNF -=(" " | "\t" | "\n")? => [[ drop ignore ]] foo="a" - "b" EBNF]
 234 ] must-fail
 235
 236 { V{ V{ 49 } "+" V{ 49 } } } [
 237   #! Test direct left recursion.
 238   #! Using packrat, so first part of expr fails, causing 2nd choice to be used
 239   "1+1" [EBNF num=([0-9])+ expr=expr "+" num | num EBNF]
 240 ] unit-test
 241
 242 { V{ V{ V{ 49 } "+" V{ 49 } } "+" V{ 49 } } } [
 243   #! Test direct left recursion.
 244   #! Using packrat, so first part of expr fails, causing 2nd choice to be used
 245   "1+1+1" [EBNF num=([0-9])+ expr=expr "+" num | num EBNF]
 246 ] unit-test
 247
 248 { V{ V{ V{ 49 } "+" V{ 49 } } "+" V{ 49 } } } [
 249   #! Test indirect left recursion.
 250   #! Using packrat, so first part of expr fails, causing 2nd choice to be used
 251   "1+1+1" [EBNF num=([0-9])+ x=expr expr=x "+" num | num EBNF]
 252 ] unit-test
 253
 254 { t } [
 255   "abcd='9' | ('8'):x => [[ x ]]" 'ebnf' (parse) remaining>> empty?
 256 ] unit-test
 257
 258 EBNF: primary
 259 Primary = PrimaryNoNewArray
 260 PrimaryNoNewArray =  ClassInstanceCreationExpression
 261                    | MethodInvocation
 262                    | FieldAccess
 263                    | ArrayAccess
 264                    | "this"
 265 ClassInstanceCreationExpression =  "new" ClassOrInterfaceType "(" ")"
 266                                  | Primary "." "new" Identifier "(" ")"
 267 MethodInvocation =  Primary "." MethodName "(" ")"
 268                   | MethodName "(" ")"
 269 FieldAccess =  Primary "." Identifier
 270              | "super" "." Identifier
 271 ArrayAccess =  Primary "[" Expression "]"
 272              | ExpressionName "[" Expression "]"
 273 ClassOrInterfaceType = ClassName | InterfaceTypeName
 274 ClassName = "C" | "D"
 275 InterfaceTypeName = "I" | "J"
 276 Identifier = "x" | "y" | ClassOrInterfaceType
 277 MethodName = "m" | "n"
 278 ExpressionName = Identifier
 279 Expression = "i" | "j"
 280 main = Primary
 281 ;EBNF
 282
 283 { "this" } [
 284   "this" primary
 285 ] unit-test
 286
 287 { V{ "this" "." "x" } } [
 288   "this.x" primary
 289 ] unit-test
 290
 291 { V{ V{ "this" "." "x" } "." "y" } } [
 292   "this.x.y" primary
 293 ] unit-test
 294
 295 { V{ V{ "this" "." "x" } "." "m" "(" ")" } } [
 296   "this.x.m()" primary
 297 ] unit-test
 298
 299 { V{ V{ V{ "x" "[" "i" "]" } "[" "j" "]" } "." "y" } } [
 300   "x[i][j].y" primary
 301 ] unit-test
 302
 303 'ebnf' compile must-infer
 304
 305 { V{ V{ "a" "b" } "c" } } [
 306   "abc" [EBNF a="a" "b" foo=(a "c") EBNF]
 307 ] unit-test
 308
 309 { V{ V{ "a" "b" } "c" } } [
 310   "abc" [EBNF a="a" "b" foo={a "c"} EBNF]
 311 ] unit-test
 312
 313 { V{ V{ "a" "b" } "c" } } [
 314   "abc" [EBNF a="a" "b" foo=a "c" EBNF]
 315 ] unit-test
 316
 317 [
 318   "a bc" [EBNF a="a" "b" foo=(a "c") EBNF]
 319 ] must-fail
 320
 321 [
 322   "a bc" [EBNF a="a" "b" foo=a "c" EBNF]
 323 ] must-fail
 324
 325 [
 326   "a bc" [EBNF a="a" "b" foo={a "c"} EBNF]
 327 ] must-fail
 328
 329 [
 330   "ab c" [EBNF a="a" "b" foo=a "c" EBNF]
 331 ] must-fail
 332
 333 { V{ V{ "a" "b" } "c" } } [
 334   "ab c" [EBNF a="a" "b" foo={a "c"} EBNF]
 335 ] unit-test
 336
 337 [
 338   "ab c" [EBNF a="a" "b" foo=(a "c") EBNF]
 339 ] must-fail
 340
 341 [
 342   "a b c" [EBNF a="a" "b" foo=a "c" EBNF]
 343 ] must-fail
 344
 345 [
 346   "a b c" [EBNF a="a" "b" foo=(a "c") EBNF]
 347 ] must-fail
 348
 349 [
 350   "a b c" [EBNF a="a" "b" foo={a "c"} EBNF]
 351 ] must-fail
 352
 353 { V{ V{ V{ "a" "b" } "c" } V{ V{ "a" "b" } "c" } } } [
 354   "ab cab c" [EBNF a="a" "b" foo={a "c"}* EBNF]
 355 ] unit-test
 356
 357 { V{ } } [
 358   "ab cab c" [EBNF a="a" "b" foo=(a "c")* EBNF]
 359 ] unit-test
 360
 361 { V{ V{ V{ "a" "b" } "c" } V{ V{ "a" "b" } "c" } } } [
 362   "ab c ab c" [EBNF a="a" "b" foo={a "c"}* EBNF]
 363 ] unit-test
 364
 365 { V{ } } [
 366   "ab c ab c" [EBNF a="a" "b" foo=(a "c")* EBNF]
 367 ] unit-test
 368
 369 { V{ "a" "a" "a" } } [
 370   "aaa" [EBNF a=('a')* b=!('b') a:x => [[ x ]] EBNF]
 371 ] unit-test
 372
 373 { t } [
 374   "aaa" [EBNF a=('a')* b=!('b') a:x => [[ x ]] EBNF]
 375   "aaa" [EBNF a=('a')* b=!('b') (a):x => [[ x ]] EBNF] =
 376 ] unit-test
 377
 378 { V{ "a" "a" "a" } } [
 379   "aaa" [EBNF a=('a')* b=a:x => [[ x ]] EBNF]
 380 ] unit-test
 381
 382 { t } [
 383   "aaa" [EBNF a=('a')* b=a:x => [[ x ]] EBNF]
 384   "aaa" [EBNF a=('a')* b=(a):x => [[ x ]] EBNF] =
 385 ] unit-test
 386
 387 { t } [
 388   "number=(digit)+:n 'a'" 'ebnf' (parse) remaining>> length zero?
 389 ] unit-test
 390
 391 { t } [
 392   "number=(digit)+ 'a'" 'ebnf' (parse) remaining>> length zero?
 393 ] unit-test
 394
 395 { t } [
 396   "number=digit+ 'a'" 'ebnf' (parse) remaining>> length zero?
 397 ] unit-test
 398
 399 { t } [
 400   "number=digit+:n 'a'" 'ebnf' (parse) remaining>> length zero?
 401 ] unit-test
 402
 403 { t } [
 404   "foo=(name):n !(keyword) => [[ n ]]" 'rule' parse
 405   "foo=name:n !(keyword) => [[ n ]]" 'rule' parse =
 406 ] unit-test
 407
 408 { t } [
 409   "foo=!(keyword) (name):n => [[ n ]]" 'rule' parse
 410   "foo=!(keyword) name:n => [[ n ]]" 'rule' parse =
 411 ] unit-test
 412
 413 <<
 414 EBNF: parser1
 415 foo='a'
 416 ;EBNF
 417 >>
 418
 419 EBNF: parser2
 420 foo=<foreign parser1 foo> 'b'
 421 ;EBNF
 422
 423 EBNF: parser3
 424 foo=<foreign parser1> 'c'
 425 ;EBNF
 426
 427 EBNF: parser4
 428 foo=<foreign any-char> 'd'
 429 ;EBNF
 430
 431 { "a" } [
 432   "a" parser1
 433 ] unit-test
 434
 435 { V{ "a" "b" } } [
 436   "ab" parser2
 437 ] unit-test
 438
 439 { V{ "a" "c" } } [
 440   "ac" parser3
 441 ] unit-test
 442
 443 { V{ CHAR: a "d" } } [
 444   "ad" parser4
 445 ] unit-test
 446
 447 { } [
 448  "USING: kernel peg.ebnf ; \"a\\n\" [EBNF foo='a' '\n'  => [[ drop \"\n\" ]] EBNF] drop" (( -- )) eval
 449 ] unit-test
 450
 451 [
 452   "USING: peg.ebnf ; <EBNF foo='a' foo='b' EBNF>" (( -- )) eval drop
 453 ] must-fail
 454
 455 { t } [
 456   #! Rule lookup occurs in a namespace. This causes an incorrect duplicate rule
 457   #! if a var in a namespace is set. This unit test is to remind me to fix this.
 458   [ "fail" "foo" set "foo='a'" 'ebnf' parse transform drop t ] with-scope
 459 ] unit-test
 460
 461 #! Tokenizer tests
 462 { V{ "a" CHAR: b } } [
 463   "ab" [EBNF tokenizer=default foo="a" . EBNF]
 464 ] unit-test
 465
 466 TUPLE: ast-number value ;
 467
 468 EBNF: a-tokenizer
 469 Letter            = [a-zA-Z]
 470 Digit             = [0-9]
 471 Digits            = Digit+
 472 SingleLineComment = "//" (!("\n") .)* "\n" => [[ ignore ]]
 473 MultiLineComment  = "/*" (!("*/") .)* "*/" => [[ ignore ]]
 474 Space             = " " | "\t" | "\r" | "\n" | SingleLineComment | MultiLineComment
 475 Spaces            = Space* => [[ ignore ]]
 476 Number            = Digits:ws '.' Digits:fs => [[ ws "." fs 3array concat >string string>number ast-number boa ]]
 477                     | Digits => [[ >string string>number ast-number boa ]]
 478 Special            =   "("   | ")"   | "{"   | "}"   | "["   | "]"   | ","   | ";"
 479                      | "?"   | ":"   | "!==" | "~="  | "===" | "=="  | "="   | ">="
 480                      | ">"   | "<="  | "<"   | "++"  | "+="  | "+"   | "--"  | "-="
 481                      | "-"   | "*="  | "*"   | "/="  | "/"   | "%="  | "%"   | "&&="
 482                      | "&&"  | "||=" | "||"  | "."   | "!"
 483 Tok                = Spaces (Number | Special )
 484 ;EBNF
 485
 486 { V{ CHAR: 1 T{ ast-number f 23 } ";" CHAR: x } } [
 487   "123;x" [EBNF bar = .
 488                 tokenizer = <foreign a-tokenizer Tok>  foo=.
 489                 tokenizer=default baz=.
 490                 main = bar foo foo baz
 491           EBNF]
 492 ] unit-test
 493
 494 { V{ CHAR: 5 "+" CHAR: 2 } } [
 495   "5+2" [EBNF
 496           space=(" " | "\n")
 497           number=[0-9]
 498           operator=("*" | "+")
 499           spaces=space* => [[ ignore ]]
 500           tokenizer=spaces (number | operator)
 501           main= . . .
 502         EBNF]
 503 ] unit-test
 504
 505 { V{ CHAR: 5 "+" CHAR: 2 } } [
 506   "5 + 2" [EBNF
 507           space=(" " | "\n")
 508           number=[0-9]
 509           operator=("*" | "+")
 510           spaces=space* => [[ ignore ]]
 511           tokenizer=spaces (number | operator)
 512           main= . . .
 513         EBNF]
 514 ] unit-test
 515
 516 { "++" } [
 517   "++--" [EBNF tokenizer=("++" | "--") main="++" EBNF]
 518 ] unit-test
 519
 520 { "\\" } [
 521   "\\" [EBNF foo="\\" EBNF]
 522 ] unit-test
 523
 524 [ "USE: peg.ebnf [EBNF EBNF]" (( -- )) eval ] must-fail
 525
 526 [ <" USE: peg.ebnf [EBNF
 527     lol = a
 528     lol = b
 529   EBNF] "> (( -- )) eval
 530 ] [
 531     error>> [ redefined-rule? ] [ name>> "lol" = ] bi and
 532 ] must-fail-with