1 ! Copyright (C) 2008 Doug Coleman.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors arrays assocs grouping kernel
4 locals math namespaces sequences fry quotations
5 math.order math.ranges vectors unicode.categories
6 regexp.transition-tables words sets
7 unicode.case.private regexp.ast regexp.classes ;
8 ! This uses unicode.case.private for ch>upper and ch>lower
9 ! but case-insensitive matching should be done by case-folding everything
10 ! before processing starts
13 ERROR: feature-is-broken feature ;
18 negated? [ not ] change ;
28 : next-state ( -- state )
29 state [ get ] [ inc ] bi ;
33 : set-each ( keys value hashtable -- )
34 '[ _ swap _ set-at ] each ;
36 : options>hash ( options -- hashtable )
38 [ [ on>> t ] dip set-each ]
39 [ [ off>> f ] dip set-each ] 2bi
42 : using-options ( options quot -- )
43 [ options>hash option-stack [ ?push ] change ] dip
44 call option-stack get pop* ; inline
46 : option? ( obj -- ? )
47 option-stack get assoc-stack ;
49 : set-start-state ( -- nfa-table )
51 combine-stack get pop first >>start-state ;
53 GENERIC: nfa-node ( node -- )
55 :: add-simple-entry ( obj class -- )
56 [let* | s0 [ next-state ]
58 stack [ combine-stack get ]
59 table [ nfa-table get ] |
61 s0 f obj class make-transition table add-transition
62 s0 s1 <default-transition> table add-transition
64 s0 s1 obj class make-transition table add-transition
66 s0 s1 2array stack push
67 t s1 table final-states>> set-at ] ;
69 :: concatenate-nodes ( -- )
70 [let* | stack [ combine-stack get ]
71 table [ nfa-table get ]
72 s2 [ stack peek first ]
73 s3 [ stack pop second ]
74 s0 [ stack peek first ]
75 s1 [ stack pop second ] |
76 s1 s2 eps <literal-transition> table add-transition
77 s1 table final-states>> delete-at
78 s0 s3 2array stack push ] ;
80 :: alternate-nodes ( -- )
81 [let* | stack [ combine-stack get ]
82 table [ nfa-table get ]
83 s2 [ stack peek first ]
84 s3 [ stack pop second ]
85 s0 [ stack peek first ]
86 s1 [ stack pop second ]
89 s4 s0 eps <literal-transition> table add-transition
90 s4 s2 eps <literal-transition> table add-transition
91 s1 s5 eps <literal-transition> table add-transition
92 s3 s5 eps <literal-transition> table add-transition
93 s1 table final-states>> delete-at
94 s3 table final-states>> delete-at
95 t s5 table final-states>> set-at
96 s4 s5 2array stack push ] ;
98 M: star nfa-node ( node -- )
100 [let* | stack [ combine-stack get ]
101 s0 [ stack peek first ]
102 s1 [ stack pop second ]
105 table [ nfa-table get ] |
106 s1 table final-states>> delete-at
107 t s3 table final-states>> set-at
108 s1 s0 eps <literal-transition> table add-transition
109 s2 s0 eps <literal-transition> table add-transition
110 s2 s3 eps <literal-transition> table add-transition
111 s1 s3 eps <literal-transition> table add-transition
112 s2 s3 2array stack push ] ;
114 M: concatenation nfa-node ( node -- )
115 seq>> [ eps literal-transition add-simple-entry ] [
116 reversed-regexp option? [ <reversed> ] when
117 [ [ nfa-node ] each ]
118 [ length 1- [ concatenate-nodes ] times ] bi
121 M: alternation nfa-node ( node -- )
123 [ [ nfa-node ] each ]
124 [ length 1- [ alternate-nodes ] times ] bi ;
126 M: integer nfa-node ( node -- )
127 case-insensitive option? [
128 dup [ ch>lower ] [ ch>upper ] bi
131 literal-transition add-simple-entry
133 [ literal-transition add-simple-entry ] bi@
137 literal-transition add-simple-entry
140 M: primitive-class nfa-node ( node -- )
142 { letter-class LETTER-class } member? case-insensitive option? and
143 [ drop Letter-class ] when
144 class-transition add-simple-entry ;
146 M: any-char nfa-node ( node -- )
147 [ dotall option? ] dip any-char-no-nl ?
148 class-transition add-simple-entry ;
150 M: negation nfa-node ( node -- )
151 negate term>> nfa-node negate ;
153 M: range nfa-node ( node -- )
154 case-insensitive option? [
155 ! This should be implemented for Unicode by case-folding
156 ! the input and all strings in the regexp.
157 dup [ from>> ] [ to>> ] bi
158 2dup [ Letter? ] bi@ and [
160 [ [ ch>lower ] bi@ <range> ]
161 [ [ ch>upper ] bi@ <range> ] 2bi
162 [ class-transition add-simple-entry ] bi@
166 class-transition add-simple-entry
169 class-transition add-simple-entry
172 M: with-options nfa-node ( node -- )
173 dup options>> [ tree>> nfa-node ] using-options ;
175 : construct-nfa ( ast -- nfa-table )
178 V{ } clone combine-stack set
180 <transition-table> clone nfa-table set