! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors kernel math math.order words
+USING: accessors kernel math math.order words combinators
ascii unicode.categories combinators.short-circuit sequences ;
IN: regexp.classes
2drop f ;
TUPLE: or-class seq ;
-C: <or-class> or-class
TUPLE: not-class class ;
-C: <not-class> not-class
-: <and-class> ( classes -- class )
- [ <not-class> ] map <or-class> <not-class> ;
+TUPLE: and-class seq ;
TUPLE: primitive-class class ;
C: <primitive-class> primitive-class
+: <and-class> ( seq -- class )
+ t swap remove
+ f over member? [ drop f ] [
+ dup length {
+ { 0 [ drop t ] }
+ { 1 [ first ] }
+ [ drop and-class boa ]
+ } case
+ ] if ;
+
+M: and-class class-member?
+ seq>> [ class-member? ] with all? ;
+
+: <or-class> ( seq -- class )
+ f swap remove
+ t over member? [ drop t ] [
+ dup length {
+ { 0 [ drop f ] }
+ { 1 [ first ] }
+ [ drop or-class boa ]
+ } case
+ ] if ;
+
M: or-class class-member?
seq>> [ class-member? ] with any? ;
+: <not-class> ( class -- inverse )
+ {
+ { t [ f ] }
+ { f [ t ] }
+ [ not-class boa ]
+ } case ;
+
M: not-class class-member?
class>> class-member? not ;
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs combinators fry kernel locals
math math.order regexp.nfa regexp.transition-tables sequences
-sets sorting vectors sequences.deep math.functions regexp.classes ;
-USING: io prettyprint threads ;
+sets sorting vectors ;
IN: regexp.dfa
:: (while-changes) ( obj quot: ( obj -- obj' ) comp: ( obj -- key ) old-key -- obj )
: while-changes ( obj quot pred -- obj' )
3dup nip call (while-changes) ; inline
-TUPLE: parts in out ;
-
-: make-partition ( choices classes -- partition )
- zip [ first ] partition parts boa ;
-
-: powerset-partition ( classes -- partitions )
- ! Here is where class algebra will happen, when I implement it
- [ length [ 2^ ] keep ] keep '[
- _ [ ] map-bits _ make-partition
- ] map ;
-
-: partition>class ( parts -- class )
- [ in>> ] [ out>> ] bi
- [ <or-class> ] bi@ <not-class> 2array <and-class> ;
-
-: get-transitions ( partition state-transitions -- next-states )
- [ in>> ] dip '[ at ] gather ;
-
-: disambiguate-overlap ( nfa -- nfa' )
- [
- [
- [ keys powerset-partition ] keep '[
- [ partition>class ]
- [ _ get-transitions ] bi
- ] H{ } map>assoc
- ] assoc-map
- ] change-transitions ;
-
: find-delta ( states transition nfa -- new-states )
transitions>> '[ _ swap _ at at ] gather sift ;
: states ( hashtable -- array )
[ keys ]
- [ values [ values concat ] map concat append ] bi ;
+ [ values [ values concat ] map concat ] bi
+ append ;
: set-final-states ( nfa dfa -- )
[
swap find-start-state >>start-state ;
: construct-dfa ( nfa -- dfa )
- disambiguate-overlap
dup initialize-dfa
dup start-state>> 1vector
H{ } clone
--- /dev/null
+! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
+! See http://factorcode.org/license.txt for BSD license.
+USING: kernel accessors regexp.classes math.bits assocs sequences
+arrays sets regexp.dfa math fry regexp.minimize ;
+IN: regexp.disambiguate
+
+TUPLE: parts in out ;
+
+: make-partition ( choices classes -- partition )
+ zip [ first ] partition [ values ] bi@ parts boa ;
+
+: powerset-partition ( classes -- partitions )
+ [ length [ 2^ ] keep ] keep '[
+ _ <bits> _ make-partition
+ ] map ;
+
+: partition>class ( parts -- class )
+ [ in>> ] [ out>> ] bi
+ [ <or-class> ] bi@ <not-class> 2array <and-class> ;
+
+: get-transitions ( partition state-transitions -- next-states )
+ [ in>> ] dip '[ _ at ] map prune ;
+
+: disambiguate ( dfa -- nfa )
+ [
+ [
+ [ keys powerset-partition ] keep '[
+ [ partition>class ]
+ [ _ get-transitions ] bi
+ ] H{ } map>assoc
+ [ drop ] assoc-filter
+ ] assoc-map
+ ] change-transitions ;
+
+: nfa>dfa ( nfa -- dfa )
+ construct-dfa
+ minimize disambiguate
+ construct-dfa minimize ;
! Copyright (C) 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
-USING: regexp.nfa regexp.dfa regexp.minimize kernel sequences
+USING: regexp.nfa regexp.disambiguate kernel sequences
assocs regexp.classes hashtables accessors fry vectors
-regexp.ast regexp.transition-tables ;
+regexp.ast regexp.transition-tables regexp.minimize ;
IN: regexp.negation
: ast>dfa ( parse-tree -- minimal-dfa )
- construct-nfa construct-dfa minimize ;
+ construct-nfa nfa>dfa ;
CONSTANT: fail-state -1
[ tree>> remove-lookahead ] [ options>> ] bi <with-options> ;
M: alternation remove-lookahead
- [ first>> ] [ second>> ] bi [ remove-lookahead ] bi@ ;
+ [ first>> ] [ second>> ] bi [ remove-lookahead ] bi@ alternation boa ;
M: concatenation remove-lookahead ;