! Copyright (C) 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
-USING: regexp.classes kernel sequences regexp.negation
-quotations assocs fry math locals combinators
-accessors words compiler.units kernel.private strings
-sequences.private arrays namespaces unicode.breaks
-regexp.transition-tables combinators.short-circuit ;
+USING: accessors assocs combinators combinators.short-circuit
+kernel kernel.private math namespaces quotations regexp.classes
+regexp.transition-tables sequences sequences.private sets
+strings unicode words ;
IN: regexp.compiler
GENERIC: question>quot ( question -- quot )
} 2&&
] ;
-M: $ question>quot
+M: $crlf question>quot
drop [ { [ length = ] [ ?nth "\r\n" member? ] } 2|| ] ;
-M: ^ question>quot
+M: ^crlf question>quot
drop [ { [ drop zero? ] [ [ 1 - ] dip ?nth "\r\n" member? ] } 2|| ] ;
M: $unix question>quot
[ question>> question>quot ] [ yes>> ] [ no>> ] tri
[ (execution-quot) ] bi@
'[ 2dup @ _ _ if ]
- ] [ '[ _ execute ] ] if ;
+ ] [ 1quotation ] if ;
: execution-quot ( next-state -- quot )
dup sequence? [ first ] when
: word>quot ( word dfa -- quot )
[ transitions>> at ]
- [ final-states>> key? ] 2bi
+ [ final-states>> in? ] 2bi
transitions>quot ;
: states>code ( words dfa -- )
'[
dup _ word>quot
- (( last-match index string -- ? ))
+ ( last-match index string -- ? )
define-declared
] each ;
dup transitions>> keys [ gensym ] H{ } map>assoc
[ transitions-at ]
[ values ]
- bi swap ;
+ bi swap ;
: dfa>main-word ( dfa -- word )
states>words [ states>code ] keep start-state>> ;
: dfa>word ( dfa -- quot )
dfa>main-word execution-quot word-template
- (( start-index string regexp -- i/f )) define-temp ;
+ ( start-index string regexp -- i/f ) define-temp ;
: dfa>shortest-word ( dfa -- word )
t shortest? [ dfa>word ] with-variable ;