1 ! Copyright (C) 2009 Daniel Ehrenberg.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: regexp.classes kernel sequences regexp.negation
4 quotations assocs fry math locals combinators
5 accessors words compiler.units kernel.private strings
6 sequences.private arrays call namespaces unicode.breaks
7 regexp.transition-tables combinators.short-circuit ;
10 GENERIC: question>quot ( question -- quot )
17 M: t question>quot drop [ 2drop t ] ;
18 M: f question>quot drop [ 2drop f ] ;
20 M: not-class question>quot
21 class>> question>quot [ not ] compose ;
23 M: beginning-of-input question>quot
26 M: end-of-input question>quot
29 M: end-of-file question>quot
32 [ length swap - 2 <= ]
33 [ swap tail { "\n" "\r\n" "\r" "" } member? ]
38 drop [ { [ length = ] [ ?nth "\r\n" member? ] } 2|| ] ;
41 drop [ { [ drop zero? ] [ [ 1- ] dip ?nth "\r\n" member? ] } 2|| ] ;
43 M: word-break question>quot
44 drop [ word-break-at? ] ;
46 : (execution-quot) ( next-state -- quot )
47 ! The conditions here are for lookaround and anchors, etc
49 [ question>> question>quot ] [ yes>> ] [ no>> ] tri
50 [ (execution-quot) ] bi@
52 ] [ '[ _ execute ] ] if ;
54 : execution-quot ( next-state -- quot )
55 dup sequence? [ first ] when
61 : condition>quot ( condition -- quot )
62 ! Conditions here are for different classes
64 [ question>> ] [ yes>> ] [ no>> ] tri
65 [ condition>quot ] bi@
66 '[ dup _ class-member? _ _ if ]
69 [ [ 3drop ] ] [ execution-quot '[ drop @ ] ] if-empty
72 : non-literals>dispatch ( literals non-literals -- quot )
73 [ swap ] assoc-map ! we want state => predicate, and get the opposite as input
74 swap keys f assoc-answers
75 table>condition [ <box> ] condition-map condition>quot ;
77 : literals>cases ( literal-transitions -- case-body )
78 [ execution-quot ] assoc-map ;
80 : expand-one-or ( or-class transition -- alist )
81 [ seq>> ] dip '[ _ 2array ] map ;
83 : expand-or ( alist -- new-alist )
86 [ expand-one-or ] [ 2array 1array ] if
89 : split-literals ( transitions -- case default )
90 >alist expand-or [ first integer? ] partition
91 [ [ literals>cases ] keep ] dip non-literals>dispatch ;
93 :: step ( last-match index str quot final? direction -- last-index/f )
94 final? index last-match ?
95 index str bounds-check? [
102 backwards? get -1 1 ? ;
104 : transitions>quot ( transitions final-state? -- quot )
105 dup shortest? get and [ 2drop [ drop nip ] ] [
106 [ split-literals swap case>quot ] dip direction
107 '[ { array-capacity string } declare _ _ _ step ]
110 : word>quot ( word dfa -- quot )
112 [ final-states>> key? ] 2bi
115 : states>code ( words dfa -- )
116 [ ! with-compilation-unit doesn't compile, so we need call( -- )
120 (( last-match index string -- ? ))
123 ] with-compilation-unit
124 ] call( words dfa -- ) ;
126 : states>words ( dfa -- words dfa )
127 dup transitions>> keys [ gensym ] H{ } map>assoc
132 : dfa>main-word ( dfa -- word )
133 states>words [ states>code ] keep start-state>> ;
137 : simple-define-temp ( quot effect -- word )
138 [ [ define-temp ] with-compilation-unit ] call( quot effect -- word ) ;
140 : dfa>word ( dfa -- quot )
141 dfa>main-word execution-quot '[ drop [ f ] 2dip @ ]
142 (( start-index string regexp -- i/f )) simple-define-temp ;
144 : dfa>shortest-word ( dfa -- word )
145 t shortest? [ dfa>word ] with-variable ;
147 : dfa>reverse-word ( dfa -- word )
148 t backwards? [ dfa>word ] with-variable ;
150 : dfa>reverse-shortest-word ( dfa -- word )
151 t backwards? [ dfa>shortest-word ] with-variable ;