1 ! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors arrays assocs grouping kernel locals math namespaces
4 sequences fry quotations math.order math.ranges vectors
5 unicode.categories regexp.transition-tables words sets hashtables
6 combinators.short-circuit unicode.case unicode.case.private regexp.ast
10 ! This uses unicode.case.private for ch>upper and ch>lower
11 ! but case-insensitive matching should be done by case-folding everything
12 ! before processing starts
18 : next-state ( -- state )
19 state [ get ] [ inc ] bi ;
23 : set-each ( keys value hashtable -- )
24 '[ _ swap _ set-at ] each ;
26 : options>hash ( options -- hashtable )
28 [ [ on>> t ] dip set-each ]
29 [ [ off>> f ] dip set-each ] 2bi
32 : using-options ( options quot -- )
33 [ options>hash option-stack [ ?push ] change ] dip
34 call option-stack get pop* ; inline
36 : option? ( obj -- ? )
37 option-stack get assoc-stack ;
39 GENERIC: nfa-node ( node -- start-state end-state )
41 : add-simple-entry ( obj -- start-state end-state )
42 [ next-state next-state 2dup ] dip
43 nfa-table get add-transition ;
45 : epsilon-transition ( source target -- )
46 epsilon nfa-table get add-transition ;
48 M:: star nfa-node ( node -- start end )
49 node term>> nfa-node :> s1 :> s0
52 s1 s0 epsilon-transition
53 s2 s0 epsilon-transition
54 s2 s3 epsilon-transition
55 s1 s3 epsilon-transition
58 GENERIC: modify-epsilon ( tag -- newtag )
59 ! Potential off-by-one errors when lookaround nested in lookbehind
61 M: object modify-epsilon ;
63 : line-option ( multiline unix-lines default -- option )
65 drop [ unix-lines option? ] 2dip swap ?
69 $unix end-of-input line-option ;
72 ^unix beginning-of-input line-option ;
74 M: tagged-epsilon nfa-node
75 clone [ modify-epsilon ] change-tag add-simple-entry ;
77 M: concatenation nfa-node ( node -- start end )
78 [ first>> ] [ second>> ] bi
79 reversed-regexp option? [ swap ] when
81 [ epsilon-transition ] dip ;
83 :: alternate-nodes ( s0 s1 s2 s3 -- start end )
86 s4 s0 epsilon-transition
87 s4 s2 epsilon-transition
88 s1 s5 epsilon-transition
89 s3 s5 epsilon-transition
92 M: alternation nfa-node ( node -- start end )
93 [ first>> ] [ second>> ] bi
97 GENERIC: modify-class ( char-class -- char-class' )
99 M: object modify-class ;
101 M: integer modify-class
102 case-insensitive option? [
104 [ ch>lower ] [ ch>upper ] bi 2array <or-class>
108 M: integer nfa-node ( node -- start end )
109 modify-class add-simple-entry ;
111 M: primitive-class modify-class
112 class>> modify-class <primitive-class> ;
114 M: or-class modify-class
115 seq>> [ modify-class ] map <or-class> ;
117 M: not-class modify-class
118 class>> modify-class <not-class> ;
120 M: any-char modify-class
121 drop dotall option? t any-char-no-nl ? ;
123 : modify-letter-class ( class -- newclass )
124 case-insensitive option? [ drop Letter-class ] when ;
125 M: letter-class modify-class modify-letter-class ;
126 M: LETTER-class modify-class modify-letter-class ;
128 : cased-range? ( range -- ? )
129 [ from>> ] [ to>> ] bi {
130 [ [ letter? ] bi@ and ]
131 [ [ LETTER? ] bi@ and ]
134 M: range modify-class
135 case-insensitive option? [
137 [ from>> ] [ to>> ] bi
138 [ [ ch>lower ] bi@ <range> ]
139 [ [ ch>upper ] bi@ <range> ] 2bi
145 modify-class add-simple-entry ;
147 M: with-options nfa-node ( node -- start end )
148 dup options>> [ tree>> nfa-node ] using-options ;
150 : construct-nfa ( ast -- nfa-table )
153 <transition-table> nfa-table set
156 swap dup associate >>final-states