1 ! Copyright (C) 2008 Doug Coleman.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors arrays assocs grouping kernel
4 locals math namespaces sequences fry quotations
5 math.order math.ranges vectors unicode.categories
6 regexp.transition-tables words sets hashtables
7 unicode.case.private regexp.ast regexp.classes ;
8 ! This uses unicode.case.private for ch>upper and ch>lower
9 ! but case-insensitive matching should be done by case-folding everything
10 ! before processing starts
16 negated? [ not ] change ;
24 : next-state ( -- state )
25 state [ get ] [ inc ] bi ;
28 : table ( -- table ) nfa-table get ;
30 : set-each ( keys value hashtable -- )
31 '[ _ swap _ set-at ] each ;
33 : options>hash ( options -- hashtable )
35 [ [ on>> t ] dip set-each ]
36 [ [ off>> f ] dip set-each ] 2bi
39 : using-options ( options quot -- )
40 [ options>hash option-stack [ ?push ] change ] dip
41 call option-stack get pop* ; inline
43 : option? ( obj -- ? )
44 option-stack get assoc-stack ;
46 GENERIC: nfa-node ( node -- start-state end-state )
48 :: add-simple-entry ( obj class -- start-state end-state )
52 s0 f obj class make-transition table add-transition
53 s0 s1 <default-transition> table add-transition
55 s0 s1 obj class make-transition table add-transition
59 : epsilon-transition ( source target -- )
60 eps <literal-transition> table add-transition ;
62 M:: star nfa-node ( node -- start end )
63 node term>> nfa-node :> s1 :> s0
66 s1 s0 epsilon-transition
67 s2 s0 epsilon-transition
68 s2 s3 epsilon-transition
69 s1 s3 epsilon-transition
73 drop eps literal-transition add-simple-entry ;
75 M: concatenation nfa-node ( node -- start end )
76 [ first>> ] [ second>> ] bi
77 reversed-regexp option? [ swap ] when
79 [ epsilon-transition ] dip ;
81 :: alternate-nodes ( s0 s1 s2 s3 -- start end )
84 s4 s0 epsilon-transition
85 s4 s2 epsilon-transition
86 s1 s5 epsilon-transition
87 s3 s5 epsilon-transition
90 M: alternation nfa-node ( node -- start end )
91 [ first>> ] [ second>> ] bi
95 M: integer nfa-node ( node -- start end )
96 case-insensitive option? [
97 dup [ ch>lower ] [ ch>upper ] bi
100 literal-transition add-simple-entry
102 [ literal-transition add-simple-entry ] bi@
103 alternate-nodes [ nip ] dip
105 ] [ literal-transition add-simple-entry ] if ;
107 M: primitive-class nfa-node ( node -- start end )
109 { letter-class LETTER-class } member? case-insensitive option? and
110 [ drop Letter-class ] when
111 class-transition add-simple-entry ;
113 M: or-class nfa-node class-transition add-simple-entry ;
114 M: not-class nfa-node class-transition add-simple-entry ;
116 M: any-char nfa-node ( node -- start end )
117 [ dotall option? ] dip any-char-no-nl ?
118 class-transition add-simple-entry ;
120 ! M: negation nfa-node ( node -- start end )
121 ! negate term>> nfa-node negate ;
123 M: range nfa-node ( node -- start end )
124 case-insensitive option? [
125 ! This should be implemented for Unicode by case-folding
126 ! the input and all strings in the regexp.
127 dup [ from>> ] [ to>> ] bi
128 2dup [ Letter? ] bi@ and [
130 [ [ ch>lower ] bi@ <range> ]
131 [ [ ch>upper ] bi@ <range> ] 2bi
132 [ class-transition add-simple-entry ] bi@
136 class-transition add-simple-entry
139 class-transition add-simple-entry
142 M: with-options nfa-node ( node -- start end )
143 dup options>> [ tree>> nfa-node ] using-options ;
145 : construct-nfa ( ast -- nfa-table )
149 <transition-table> clone nfa-table set
152 swap dup associate >>final-states