1 ! Copyright (C) 2009 Daniel Ehrenberg.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: regexp regexp.private regexp.classes kernel sequences regexp.negation
4 quotations regexp.minimize assocs fry math locals combinators
5 accessors words compiler.units ;
8 : literals>cases ( literal-transitions -- case-body )
9 [ 1quotation ] assoc-map ;
11 : non-literals>dispatch ( non-literal-transitions -- quot )
12 [ [ '[ dup _ class-member? ] ] [ 1quotation ] bi* ] assoc-map
13 [ 3drop f ] suffix '[ _ cond ] ;
15 : split-literals ( transitions -- case default )
16 ! Convert disjunction of literals to literals. Also maybe small ranges.
17 >alist [ first integer? ] partition
18 [ literals>cases ] [ non-literals>dispatch ] bi* ;
20 USING: kernel.private strings sequences.private ;
22 :: step ( index str case-body final? -- match? )
23 index str bounds-check? [
27 ] [ final? ] if ; inline
29 : transitions>quot ( transitions final-state? -- quot )
30 [ split-literals suffix ] dip
31 '[ { array-capacity string } declare _ _ step ] ;
33 : word>quot ( word dfa -- quot )
35 [ final-states>> key? ] 2bi
38 : states>code ( words dfa -- )
42 (( index string -- ? )) define-declared
44 ] with-compilation-unit ;
46 : transitions-at ( transitions assoc -- new-transitions )
49 [ [ _ at ] assoc-map ] bi*
52 : states>words ( dfa -- words dfa )
53 dup transitions>> keys [ gensym ] H{ } map>assoc
54 [ [ transitions-at ] rewrite-transitions ]
58 : dfa>word ( dfa -- word )
59 states>words [ states>code ] keep start-state>> ;
61 : run-regexp ( string word -- ? )
62 [ 0 ] 2dip execute ; inline
64 : regexp>quotation ( regexp -- quot )
65 compile-regexp dfa>> dfa>word '[ _ run-regexp ] ;