1 ! Copyright (C) 2008 Doug Coleman.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors arrays assocs grouping kernel
4 locals math namespaces sequences fry quotations
5 math.order math.ranges vectors unicode.categories
6 regexp.transition-tables words sets hashtables combinators.short-circuit
7 unicode.case.private regexp.ast regexp.classes ;
10 ! This uses unicode.case.private for ch>upper and ch>lower
11 ! but case-insensitive matching should be done by case-folding everything
12 ! before processing starts
14 GENERIC: remove-lookahead ( syntax-tree -- syntax-tree' )
15 ! This is unfinished and does nothing right now!
17 M: object remove-lookahead ;
19 M: with-options remove-lookahead
20 [ tree>> remove-lookahead ] [ options>> ] bi <with-options> ;
22 M: alternation remove-lookahead
23 [ first>> ] [ second>> ] bi [ remove-lookahead ] bi@ alternation boa ;
25 M: concatenation remove-lookahead ;
33 : next-state ( -- state )
34 state [ get ] [ inc ] bi ;
37 : table ( -- table ) nfa-table get ;
39 : set-each ( keys value hashtable -- )
40 '[ _ swap _ set-at ] each ;
42 : options>hash ( options -- hashtable )
44 [ [ on>> t ] dip set-each ]
45 [ [ off>> f ] dip set-each ] 2bi
48 : using-options ( options quot -- )
49 [ options>hash option-stack [ ?push ] change ] dip
50 call option-stack get pop* ; inline
52 : option? ( obj -- ? )
53 option-stack get assoc-stack ;
55 GENERIC: nfa-node ( node -- start-state end-state )
57 : add-simple-entry ( obj class -- start-state end-state )
58 [ next-state next-state 2dup ] 2dip
59 make-transition table add-transition ;
61 : epsilon-transition ( source target -- )
62 eps <literal-transition> table add-transition ;
64 M:: star nfa-node ( node -- start end )
65 node term>> nfa-node :> s1 :> s0
68 s1 s0 epsilon-transition
69 s2 s0 epsilon-transition
70 s2 s3 epsilon-transition
71 s1 s3 epsilon-transition
75 drop eps literal-transition add-simple-entry ;
77 M: concatenation nfa-node ( node -- start end )
78 [ first>> ] [ second>> ] bi
79 reversed-regexp option? [ swap ] when
81 [ epsilon-transition ] dip ;
83 :: alternate-nodes ( s0 s1 s2 s3 -- start end )
86 s4 s0 epsilon-transition
87 s4 s2 epsilon-transition
88 s1 s5 epsilon-transition
89 s3 s5 epsilon-transition
92 M: alternation nfa-node ( node -- start end )
93 [ first>> ] [ second>> ] bi
97 GENERIC: modify-class ( char-class -- char-class' )
99 M: object modify-class ;
101 M: integer modify-class
102 case-insensitive option? [
104 [ ch>lower ] [ ch>upper ] bi 2array <or-class>
108 M: integer nfa-node ( node -- start end )
109 modify-class dup class?
110 class-transition literal-transition ?
113 M: primitive-class modify-class
114 class>> modify-class <primitive-class> ;
116 M: or-class modify-class
117 seq>> [ modify-class ] map <or-class> ;
119 M: not-class modify-class
120 class>> modify-class <not-class> ;
122 M: any-char modify-class
123 [ dotall option? ] dip any-char-no-nl ? ;
125 : modify-letter-class ( class -- newclass )
126 case-insensitive option? [ drop Letter-class ] when ;
127 M: letter-class modify-class modify-letter-class ;
128 M: LETTER-class modify-class modify-letter-class ;
130 : cased-range? ( range -- ? )
131 [ from>> ] [ to>> ] bi {
132 [ [ letter? ] bi@ and ]
133 [ [ LETTER? ] bi@ and ]
136 M: range modify-class
137 case-insensitive option? [
139 [ from>> ] [ to>> ] bi
140 [ [ ch>lower ] bi@ <range> ]
141 [ [ ch>upper ] bi@ <range> ] 2bi
147 modify-class class-transition add-simple-entry ;
149 M: with-options nfa-node ( node -- start end )
150 dup options>> [ tree>> nfa-node ] using-options ;
152 : construct-nfa ( ast -- nfa-table )
155 <transition-table> nfa-table set
156 remove-lookahead nfa-node
158 swap dup associate >>final-states