! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors arrays assocs grouping kernel locals math namespaces
-sequences fry quotations math.order math.ranges vectors
-unicode.categories regexp.transition-tables words sets hashtables
-combinators.short-circuit unicode.data regexp.ast
-regexp.classes memoize ;
+USING: accessors arrays assocs combinators.short-circuit kernel
+math namespaces regexp.ast regexp.classes
+regexp.transition-tables sequences sets unicode vectors ;
IN: regexp.nfa
-! This uses unicode.data for ch>upper and ch>lower
+! This uses unicode for ch>upper and ch>lower
! but case-insensitive matching should be done by case-folding everything
! before processing starts
s1 s3 epsilon-transition
s2 s3 ;
-GENERIC: modify-epsilon ( tag -- newtag )
-! Potential off-by-one errors when lookaround nested in lookbehind
-
-M: object modify-epsilon ;
-
-: line-option ( multiline unix-lines default -- option )
- multiline option? [
- drop [ unix-lines option? ] 2dip swap ?
- ] [ 2nip ] if ;
+DEFER: modify-class
-M: $ modify-epsilon
- $unix end-of-input line-option ;
-
-M: ^ modify-epsilon
- ^unix beginning-of-input line-option ;
+! Potential off-by-one errors when lookaround nested in lookbehind
M: tagged-epsilon nfa-node
- clone [ modify-epsilon ] change-tag add-simple-entry ;
+ clone [ modify-class ] change-tag add-simple-entry ;
-M: concatenation nfa-node ( node -- start end )
+M: concatenation nfa-node
[ first>> ] [ second>> ] bi
reversed-regexp option? [ swap ] when
[ nfa-node ] bi@
s3 s5 epsilon-transition
s4 s5 ;
-M: alternation nfa-node ( node -- start end )
+M: alternation nfa-node
[ first>> ] [ second>> ] bi
[ nfa-node ] bi@
alternate-nodes ;
M: object modify-class ;
+M: concatenation modify-class
+ [ first>> ] [ second>> ] bi [ modify-class ] bi@
+ concatenation boa ;
+
+M: alternation modify-class
+ [ first>> ] [ second>> ] bi [ modify-class ] bi@
+ alternation boa ;
+
+M: lookahead modify-class
+ term>> modify-class lookahead boa ;
+
+M: lookbehind modify-class
+ term>> modify-class lookbehind boa ;
+
+: line-option ( multiline unix-lines default -- option )
+ multiline option? [
+ drop [ unix-lines option? ] 2dip swap ?
+ ] [ 2nip ] if ;
+
+M: $crlf modify-class
+ $unix end-of-input line-option ;
+
+M: ^crlf modify-class
+ ^unix beginning-of-input line-option ;
+
M: integer modify-class
case-insensitive option? [
dup Letter? [
] when
] when ;
-M: integer nfa-node ( node -- start end )
- modify-class add-simple-entry ;
-
M: primitive-class modify-class
class>> modify-class <primitive-class> ;
: cased-range? ( range -- ? )
[ from>> ] [ to>> ] bi {
- [ [ letter? ] bi@ and ]
- [ [ LETTER? ] bi@ and ]
+ [ [ letter? ] both? ]
+ [ [ LETTER? ] both? ]
} 2|| ;
M: range-class modify-class
dup cased-range? [
[ from>> ] [ to>> ] bi
[ [ ch>lower ] bi@ <range-class> ]
- [ [ ch>upper ] bi@ <range-class> ] 2bi
+ [ [ ch>upper ] bi@ <range-class> ] 2bi
2array <or-class>
] when
] when ;
M: object nfa-node
modify-class add-simple-entry ;
-M: with-options nfa-node ( node -- start end )
+M: with-options nfa-node
dup options>> [ tree>> nfa-node ] using-options ;
: construct-nfa ( ast -- nfa-table )
[
- 0 state set
- <transition-table> nfa-table set
+ 0 state namespaces:set
+ <transition-table> nfa-table namespaces:set
nfa-node
nfa-table get
- swap dup associate >>final-states
+ swap 1array fast-set >>final-states
swap >>start-state
] with-scope ;