! See http://factorcode.org/license.txt for BSD license.
!
USING: kernel tools.test peg peg.ebnf words math math.parser
- sequences accessors peg.parsers parser namespaces ;
+ sequences accessors peg.parsers parser namespaces arrays
+ strings ;
IN: peg.ebnf.tests
{ T{ ebnf-non-terminal f "abc" } } [
"USING: peg.ebnf ; [EBNF foo='a' foo='b' EBNF]" eval drop
] must-fail
-
{ t } [
#! Rule lookup occurs in a namespace. This causes an incorrect duplicate rule
#! if a var in a namespace is set. This unit test is to remind me to fix this.
[ "fail" "foo" set "foo='a'" 'ebnf' parse ast>> transform drop t ] with-scope
+] unit-test
+
+#! Tokenizer tests
+{ V{ "a" CHAR: b } } [
+ "ab" [EBNF tokenizer=default foo="a" . EBNF] call ast>>
+] unit-test
+
+TUPLE: ast-number value ;
+
+EBNF: a-tokenizer
+Letter = [a-zA-Z]
+Digit = [0-9]
+Digits = Digit+
+SingleLineComment = "//" (!("\n") .)* "\n" => [[ ignore ]]
+MultiLineComment = "/*" (!("*/") .)* "*/" => [[ ignore ]]
+Space = " " | "\t" | "\r" | "\n" | SingleLineComment | MultiLineComment
+Spaces = Space* => [[ ignore ]]
+Number = Digits:ws '.' Digits:fs => [[ ws "." fs 3array concat >string string>number ast-number boa ]]
+ | Digits => [[ >string string>number ast-number boa ]]
+Special = "(" | ")" | "{" | "}" | "[" | "]" | "," | ";"
+ | "?" | ":" | "!==" | "~=" | "===" | "==" | "=" | ">="
+ | ">" | "<=" | "<" | "++" | "+=" | "+" | "--" | "-="
+ | "-" | "*=" | "*" | "/=" | "/" | "%=" | "%" | "&&="
+ | "&&" | "||=" | "||" | "." | "!"
+Tok = Spaces (Number | Special )
+;EBNF
+
+{ V{ CHAR: 1 T{ ast-number f 23 } ";" CHAR: x } } [
+ "123;x" [EBNF bar = .
+ tokenizer = <foreign a-tokenizer Tok> foo=.
+ tokenizer=default baz=.
+ main = bar foo foo baz
+ EBNF] call ast>>
+] unit-test
+
+{ V{ CHAR: 5 "+" CHAR: 2 } } [
+ "5+2" [EBNF
+ space=(" " | "\n")
+ number=[0-9]
+ operator=("*" | "+")
+ spaces=space* => [[ ignore ]]
+ tokenizer=spaces (number | operator)
+ main= . . .
+ EBNF] call ast>>
+] unit-test
+
+{ V{ CHAR: 5 "+" CHAR: 2 } } [
+ "5 + 2" [EBNF
+ space=(" " | "\n")
+ number=[0-9]
+ operator=("*" | "+")
+ spaces=space* => [[ ignore ]]
+ tokenizer=spaces (number | operator)
+ main= . . .
+ EBNF] call ast>>
] unit-test
\ No newline at end of file
rule parser-tokenizer ;\r
\r
: tokenizer ( -- word )\r
- \ tokenizer get [ default-tokenizer ] unless* ;\r
+ \ tokenizer get-global [ default-tokenizer ] unless* ;\r
\r
: reset-tokenizer ( -- )\r
default-tokenizer \ tokenizer set-global ;\r
TUPLE: ebnf-repeat1 group ;\r
TUPLE: ebnf-optional group ;\r
TUPLE: ebnf-whitespace group ;\r
+TUPLE: ebnf-tokenizer elements ;\r
TUPLE: ebnf-rule symbol elements ;\r
TUPLE: ebnf-action parser code ;\r
TUPLE: ebnf-var parser name ;\r
C: <ebnf-repeat1> ebnf-repeat1\r
C: <ebnf-optional> ebnf-optional\r
C: <ebnf-whitespace> ebnf-whitespace\r
+C: <ebnf-tokenizer> ebnf-tokenizer\r
C: <ebnf-rule> ebnf-rule\r
C: <ebnf-action> ebnf-action\r
C: <ebnf-var> ebnf-var\r
dup length 1 = [ first ] [ <ebnf-choice> ] if\r
] action ;\r
\r
+: 'tokenizer' ( -- parser )\r
+ [\r
+ "tokenizer" syntax ,\r
+ "=" syntax ,\r
+ ">" token ensure-not ,\r
+ [ "default" token sp , 'choice' , ] choice* ,\r
+ ] seq* [ first <ebnf-tokenizer> ] action ;\r
+\r
: 'rule' ( -- parser )\r
[\r
+ "tokenizer" token ensure-not , \r
'non-terminal' [ symbol>> ] action ,\r
"=" syntax ,\r
">" token ensure-not ,\r
] seq* [ first2 <ebnf-rule> ] action ;\r
\r
: 'ebnf' ( -- parser )\r
- 'rule' sp repeat1 [ <ebnf> ] action ;\r
+ [ 'tokenizer' sp , 'rule' sp , ] choice* repeat1 [ <ebnf> ] action ;\r
\r
GENERIC: (transform) ( ast -- parser )\r
\r
\r
M: ebnf (transform) ( ast -- parser )\r
rules>> [ (transform) ] map peek ;\r
+\r
+M: ebnf-tokenizer (transform) ( ast -- parser )\r
+ elements>> dup "default" = [\r
+ drop default-tokenizer \ tokenizer set-global any-char\r
+ ] [\r
+ (transform) \r
+ dup parser-tokenizer \ tokenizer set-global\r
+ ] if ;\r
\r
M: ebnf-rule (transform) ( ast -- parser )\r
dup elements>> \r
options>> [ (transform) ] map choice ;\r
\r
M: ebnf-any-character (transform) ( ast -- parser )\r
- drop [ tokenizer any>> call ] box ;\r
+ drop tokenizer any>> call ;\r
\r
M: ebnf-range (transform) ( ast -- parser )\r
pattern>> range-pattern ;\r
parser>> (transform) ;\r
\r
M: ebnf-terminal (transform) ( ast -- parser )\r
- symbol>> [ tokenizer one>> call ] curry box ;\r
+ symbol>> tokenizer one>> call ;\r
\r
M: ebnf-foreign (transform) ( ast -- parser )\r
dup word>> search\r
scan {\r
{ "+" [ scan-word execute "" swap ] }\r
[ " " append default-tokenizer ]\r
- } case \ tokenizer [\r
- [ "EBNF]" parse-multiline-string ] [ drop "" ] recover append ebnf>quot nip parsed \r
- ] with-variable ; parsing\r
+ } case \ tokenizer set-global\r
+ [ "EBNF]" parse-multiline-string ] [ drop "" ] recover append ebnf>quot nip parsed \r
+ reset-tokenizer ; parsing\r
\r
: EBNF: \r
CREATE-WORD scan {\r
{ "+" [ scan-word execute "" swap ] }\r
[ " " append default-tokenizer ]\r
- } case \ tokenizer [\r
- dupd [ ";EBNF" parse-multiline-string ] [ drop "" ] recover append \r
- ebnf>quot swapd 1 1 <effect> define-declared "ebnf-parser" set-word-prop \r
- ] with-variable ; parsing\r
+ } case \ tokenizer set-global\r
+ dupd [ ";EBNF" parse-multiline-string ] [ drop "" ] recover append \r
+ ebnf>quot swapd 1 1 <effect> define-declared "ebnf-parser" set-word-prop \r
+ reset-tokenizer ; parsing\r
\r
\r
\r