1 ! Copyright (C) 2009 Daniel Ehrenberg.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: kernel accessors regexp.classes math.bits assocs sequences
4 arrays sets regexp.dfa math fry regexp.minimize regexp.ast
5 locals regexp.transition-tables ;
6 IN: regexp.disambiguate
10 : make-partition ( choices classes -- partition )
11 zip [ first ] partition [ values ] bi@ parts boa ;
13 : powerset-partition ( sequence -- partitions )
14 [ length [ 2^ ] keep ] keep '[
15 _ <bits> _ make-partition
18 : partition>class ( parts -- class )
19 [ out>> [ <not-class> ] map ]
20 [ in>> <and-class> ] bi
23 : singleton-partition ( integer non-integers -- {class,partition} )
25 '[ _ [ class-member? ] with filter ] keep
29 : add-out ( seq partition -- partition' )
30 [ out>> append ] [ in>> ] bi swap parts boa ;
32 : intersection ( seq -- elts )
33 [ f ] [ unclip [ intersect ] reduce ] if-empty ;
35 : meaningful-integers ( partition table -- integers )
36 [ [ in>> ] [ out>> ] bi ] dip
37 '[ [ _ at ] map intersection ] bi@ diff ;
39 : class-integers ( classes integers -- table )
40 '[ _ over '[ _ class-member? ] filter ] H{ } map>assoc ;
42 : add-integers ( partitions classes integers -- partitions )
44 [ _ meaningful-integers ] keep add-out
47 : class-partitions ( classes -- assoc )
48 [ integer? ] partition [
49 dup powerset-partition spin add-integers
50 [ [ partition>class ] keep 2array ] map
52 ] [ '[ _ singleton-partition ] map ] 2bi append ;
54 : new-transitions ( transitions -- assoc ) ! assoc is class, partition
55 values [ keys ] gather
56 [ tagged-epsilon? not ] filter
59 : get-transitions ( partition state-transitions -- next-states )
60 [ in>> ] dip '[ _ at ] gather sift ;
62 : preserving-epsilon ( state-transitions quot -- new-state-transitions )
63 [ [ drop tagged-epsilon? ] assoc-filter ] bi
64 assoc-union H{ } assoc-like ; inline
66 : disambiguate ( nfa -- nfa )
68 dup new-transitions '[
70 _ swap '[ _ get-transitions ] assoc-map
71 [ nip empty? not ] assoc-filter
74 ] change-transitions ;