1 ! Copyright (C) 2009 Daniel Ehrenberg.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: regexp.classes kernel sequences regexp.negation
4 quotations regexp.minimize assocs fry math locals combinators
5 accessors words compiler.units kernel.private strings
6 sequences.private arrays regexp.matchers call namespaces
7 regexp.transition-tables combinators.short-circuit ;
10 GENERIC: question>quot ( question -- quot )
17 M: t question>quot drop [ 2drop t ] ;
19 M: beginning-of-input question>quot
22 M: end-of-input question>quot
25 M: end-of-file question>quot
28 [ length swap - 2 <= ]
29 [ swap tail { "\n" "\r\n" "\r" "" } member? ]
31 [ [ nip [ length ] keep ] when ] keep
35 drop [ { [ length = ] [ ?nth "\r\n" member? ] } 2|| ] ;
38 drop [ { [ drop zero? ] [ [ 1- ] dip ?nth "\r\n" member? ] } 2|| ] ;
40 ! Maybe the condition>quot things can be combined, given a suitable method
41 ! for question>quot on classes, but maybe that'd make stack shuffling annoying
43 : execution-quot ( next-state -- quot )
44 ! The conditions here are for lookaround and anchors, etc
46 [ question>> question>quot ] [ yes>> ] [ no>> ] tri
47 [ execution-quot ] bi@
50 ! There shouldn't be a condition like this!
52 [ [ [ 2drop ] ] [ first '[ _ execute ] ] if-empty ]
59 : condition>quot ( condition -- quot )
60 ! Conditions here are for different classes
62 [ question>> ] [ yes>> ] [ no>> ] tri
63 [ condition>quot ] bi@
64 '[ dup _ class-member? _ _ if ]
67 [ [ 3drop ] ] [ execution-quot '[ drop @ ] ] if-empty
70 : non-literals>dispatch ( non-literal-transitions -- quot )
71 [ swap ] assoc-map ! we want state => predicate, and get the opposite as input
72 table>condition [ <box> ] condition-map condition>quot ;
74 : literals>cases ( literal-transitions -- case-body )
75 [ execution-quot ] assoc-map ;
77 : expand-one-or ( or-class transition -- alist )
78 [ seq>> ] dip '[ _ 2array ] map ;
80 : expand-or ( alist -- new-alist )
83 [ expand-one-or ] [ 2array 1array ] if
86 : split-literals ( transitions -- case default )
87 >alist expand-or [ first integer? ] partition
88 [ literals>cases ] [ non-literals>dispatch ] bi* ;
90 :: step ( last-match index str quot final? direction -- last-index/f )
91 final? index last-match ?
92 index str bounds-check? [
99 backwards? get -1 1 ? ;
101 : transitions>quot ( transitions final-state? -- quot )
102 dup shortest? get and [ 2drop [ drop nip ] ] [
103 [ split-literals swap case>quot ] dip direction
104 '[ { array-capacity string } declare _ _ _ step ]
107 : word>quot ( word dfa -- quot )
109 [ final-states>> key? ] 2bi
112 : states>code ( words dfa -- )
116 (( last-match index string -- ? ))
119 ] with-compilation-unit ;
121 : states>words ( dfa -- words dfa )
122 dup transitions>> keys [ gensym ] H{ } map>assoc
127 : dfa>word ( dfa -- word )
128 states>words [ states>code ] keep start-state>> ;
130 : check-string ( string -- string )
131 ! Make this configurable
132 dup string? [ "String required" throw ] unless ;
134 : setup-regexp ( start-index string -- f start-index string )
135 [ f ] [ >fixnum ] [ check-string ] tri* ; inline
139 ! The quotation returned is ( start-index string -- i/f )
141 : dfa>quotation ( dfa -- quot )
142 dfa>word execution-quot '[ setup-regexp @ ] ;
144 : dfa>shortest-quotation ( dfa -- quot )
145 t shortest? [ dfa>quotation ] with-variable ;
147 : dfa>reverse-quotation ( dfa -- quot )
148 t backwards? [ dfa>quotation ] with-variable ;
150 : dfa>reverse-shortest-quotation ( dfa -- quot )
151 t backwards? [ dfa>shortest-quotation ] with-variable ;
153 TUPLE: quot-matcher quot ;
154 C: <quot-matcher> quot-matcher
156 M: quot-matcher match-index-from
157 quot>> call( index string -- i/f ) ;