1 ! Copyright (C) 2009 Daniel Ehrenberg.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: regexp.classes kernel sequences regexp.negation
4 quotations regexp.minimize assocs fry math locals combinators
5 accessors words compiler.units kernel.private strings
6 sequences.private arrays regexp.matchers call ;
9 : literals>cases ( literal-transitions -- case-body )
10 [ 1quotation ] assoc-map ;
12 : condition>quot ( condition -- quot )
14 [ question>> ] [ yes>> ] [ no>> ] tri
15 [ condition>quot ] bi@
16 '[ dup _ class-member? _ _ if ]
18 [ [ 3drop ] ] [ '[ drop _ execute ] ] if-empty
21 : new-non-literals>dispatch ( non-literal-transitions -- quot )
22 table>condition condition>quot ;
24 : non-literals>dispatch ( non-literal-transitions -- quot )
25 [ [ '[ dup _ class-member? ] ] [ '[ drop _ execute ] ] bi* ] assoc-map
26 [ 3drop ] suffix '[ _ cond ] ;
28 : expand-one-or ( or-class transition -- alist )
29 [ seq>> ] dip '[ _ 2array ] map ;
31 : expand-or ( alist -- new-alist )
34 [ expand-one-or ] [ 2array 1array ] if
37 : split-literals ( transitions -- case default )
38 >alist expand-or [ first integer? ] partition
39 [ literals>cases ] [ non-literals>dispatch ] bi* ;
41 :: step ( last-match index str case-body final? -- last-index/f )
42 final? index last-match ?
43 index str bounds-check? [
49 : transitions>quot ( transitions final-state? -- quot )
50 [ split-literals suffix ] dip
51 '[ { array-capacity sequence } declare _ _ step ] ;
53 : word>quot ( word dfa -- quot )
55 [ final-states>> key? ] 2bi
58 : states>code ( words dfa -- )
62 (( last-match index string -- ? ))
65 ] with-compilation-unit ;
67 : transitions-at ( transitions assoc -- new-transitions )
70 [ [ _ at ] assoc-map ] bi*
73 : states>words ( dfa -- words dfa )
74 dup transitions>> keys [ gensym ] H{ } map>assoc
75 [ [ transitions-at ] rewrite-transitions ]
79 : dfa>word ( dfa -- word )
80 states>words [ states>code ] keep start-state>> ;
82 : check-sequence ( string -- string )
83 ! Make this configurable
84 dup sequence? [ "String required" throw ] unless ;
86 : run-regexp ( start-index string word -- ? )
87 { [ f ] [ >fixnum ] [ check-sequence ] [ execute ] } spread ; inline
89 : dfa>quotation ( dfa -- quot )
90 dfa>word '[ _ run-regexp ] ;
92 TUPLE: quot-matcher quot ;
93 C: <quot-matcher> quot-matcher
95 M: quot-matcher match-index-from
96 quot>> call( index string -- i/f ) ;