! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license.
!
-USING: kernel tools.test strings namespaces make arrays sequences
- peg peg.private peg.parsers words math accessors ;
+USING: continuations kernel tools.test strings namespaces make arrays
+sequences peg peg.private peg.parsers words math accessors ;
IN: peg.tests
[ ] [ reset-pegs ] unit-test
] unit-test
[
- "cbcd" "a" token "b" token 2array choice parse
+ "cbcd" "a" token "b" token 2array choice parse
] must-fail
[
- "" "a" token "b" token 2array choice parse
+ "" "a" token "b" token 2array choice parse
] must-fail
{ 0 } [
] unit-test
[
- "bb" "a" token ensure CHAR: a CHAR: z range 2array seq parse
+ "bb" "a" token ensure CHAR: a CHAR: z range 2array seq parse
] must-fail
{ t } [
] unit-test
[
- "a]" "[" token hide "a" token "]" token hide 3array seq parse
+ "a]" "[" token hide "a" token "]" token hide 3array seq parse
] must-fail
"1+1" swap parse
] unit-test
-: expr ( -- parser )
+: expr ( -- parser )
#! Test direct left recursion. Currently left recursion should cause a
#! failure of that parser.
[ expr ] delay "+" token "1" token 3seq "1" token 2choice ;
] unit-test
[
- "A" [ drop t ] satisfy [ 66 >= ] semantic parse
+ "A" [ drop t ] satisfy [ 66 >= ] semantic parse
] must-fail
{ CHAR: B } [
[ ] [ enable-optimizer ] unit-test
[ [ ] ] [ "" epsilon [ drop [ [ ] ] call ] action parse ] unit-test
+
+{
+ T{ parse-error
+ { position 0 }
+ { got "fbcd" }
+ { messages V{ "'a'" "'b'" } }
+ }
+} [
+ [ "fbcd" "a" token "b" token 2array choice parse ] [ ] recover
+] unit-test
IN: peg
TUPLE: parse-result remaining ast ;
-TUPLE: parse-error position messages ;
+TUPLE: parse-error position got messages ;
TUPLE: parser peg compiled id ;
M: parser equal? { [ [ class-of ] same? ] [ [ id>> ] same? ] } 2&& ;
SYMBOL: error-stack
+: merge-overlapping-errors ( a b -- c )
+ dupd [ messages>> ] bi@ union [ [ position>> ] [ got>> ] bi ] dip
+ <parse-error> ;
+
: (merge-errors) ( a b -- c )
{
{ [ over position>> not ] [ nip ] }
2dup [ position>> ] compare {
{ +lt+ [ nip ] }
{ +gt+ [ drop ] }
- { +eq+ [ messages>> over messages>> union [ position>> ] dip <parse-error> ] }
+ { +eq+ [ merge-overlapping-errors ] }
} case
]
} cond ;
drop
] if ;
-: add-error ( remaining message -- )
+: add-error ( position got message -- )
<parse-error> error-stack get push ;
SYMBOL: ignore
: reset-pegs ( -- )
H{ } clone \ peg-cache set-global ;
-reset-pegs
+reset-pegs
#! An entry in the table of memoized parse results
#! ast = an AST produced from the parse
#! pos = the position in the input string of this entry
TUPLE: memo-entry ans pos ;
-TUPLE: left-recursion seed rule-id head next ;
+TUPLE: left-recursion seed rule-id head next ;
TUPLE: peg-head rule-id involved-set eval-set ;
-: rule-id ( word -- id )
+: rule-id ( word -- id )
#! A rule is the parser compiled down to a word. It has
#! a "peg-id" property containing the id of the original parser.
"peg-id" word-prop ;
nip [ ast>> ] [ remaining>> ] bi input-from pos set
] [
pos set fail
- ] if* ;
+ ] if* ;
: eval-rule ( rule -- ast )
#! Evaluate a rule, return an ast resulting from it.
swap >>ans pos get >>pos drop ;
: stop-growth? ( ast m -- ? )
- [ failed? pos get ] dip
+ [ failed? pos get ] dip
pos>> <= or ;
: setup-growth ( h p -- )
#! If not, compile it to a temporary word, cache it,
#! and return it. Otherwise return the existing one.
#! Circular parsers are supported by getting the word
- #! name and storing it in the cache, before compiling,
+ #! name and storing it in the cache, before compiling,
#! so it is picked up when re-entered.
dup compiled>> [
nip
: parse-token ( input string -- result )
#! Parse the string, returning a parse result
[ ?head-slice ] keep swap [
- <parse-result> f f add-error
+ <parse-result> f f f add-error
] [
- [ drop pos get "token '" ] dip append "'" append 1vector add-error f
+ [ seq>> pos get swap ] dip "'" "'" surround 1vector add-error f
] if ;
M: token-parser (compile) ( peg -- quot )
[
parsers>> unclip compile-parser-quot [ parse-seq-element ] curry ,
[ compile-parser-quot [ merge-errors ] compose [ parse-seq-element ] curry , ] each
- ] { } make , \ 1&& ,
+ ] { } make , \ 1&& ,
] [ ] make ;
TUPLE: choice-parser parsers ;
: (repeat) ( quot: ( -- result ) result -- result )
over call [
- [ remaining>> swap remaining<< ] 2keep
+ [ remaining>> swap remaining<< ] 2keep
ast>> swap [ ast>> push ] keep
(repeat)
] [
M: repeat0-parser (compile) ( peg -- quot )
p1>> compile-parser-quot '[
- input-slice V{ } clone <parse-result> _ swap (repeat)
+ input-slice V{ } clone <parse-result> _ swap (repeat)
] ;
TUPLE: repeat1-parser p1 ;
] if* ;
M: repeat1-parser (compile) ( peg -- quot )
- p1>> compile-parser-quot '[
- input-slice V{ } clone <parse-result> _ swap (repeat) repeat1-empty-check
+ p1>> compile-parser-quot '[
+ input-slice V{ } clone <parse-result> _ swap (repeat) repeat1-empty-check
] ;
TUPLE: optional-parser p1 ;
M: sp-parser (compile) ( peg -- quot )
p1>> compile-parser-quot '[
- input-slice [ blank? ] trim-head-slice input-from pos set @
+ input-slice [ blank? ] trim-head-slice input-from pos set @
] ;
TUPLE: delay-parser quot ;
M: delay-parser (compile) ( peg -- quot )
#! For efficiency we memoize the quotation.
- #! This way it is run only once and the
+ #! This way it is run only once and the
#! parser constructed once at run time.
- quot>> gensym [ delayed get set-at ] keep 1quotation ;
+ quot>> gensym [ delayed get set-at ] keep 1quotation ;
TUPLE: box-parser quot ;