! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
-USING: kernel arrays accessors fry sequences ;
+USING: kernel arrays accessors fry sequences regexp.classes ;
FROM: math.ranges => [a,b] ;
IN: regexp.ast
-TUPLE: primitive-class class ;
-C: <primitive-class> primitive-class
-
TUPLE: negation term ;
C: <negation> negation
[ n>> ] [ m>> ] bi [a,b] swap '[ _ repetition ] map <alternation> ;
: char-class ( ranges ? -- term )
- [ <alternation> ] dip [ <negation> ] when ;
+ [ <or-class> ] dip [ <not-class> ] when ;
TUPLE: not-class class ;
C: <not-class> not-class
+TUPLE: primitive-class class ;
+C: <primitive-class> primitive-class
+
M: or-class class-member?
seq>> [ class-member? ] with any? ;
M: not-class class-member?
class>> class-member? not ;
+
+M: primitive-class class-member?
+ class>> class-member? ;
! Copyright (C) 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: regexp.nfa regexp.dfa regexp.minimize kernel sequences
-assocs regexp.classes hashtables accessors ;
+assocs regexp.classes hashtables accessors fry vectors
+regexp.ast regexp.transition-tables ;
IN: regexp.negation
: ast>dfa ( parse-tree -- minimal-dfa )
[ add-fail-state ] change-transitions
dup inverse-final-states >>final-states ;
-! M: negation nfa-node ( node -- )
-! ast>dfa negate-table adjoin-dfa ;
+: renumber-transitions ( transitions numbering -- new-transitions )
+ dup '[
+ [ _ at ]
+ [ [ [ _ at ] map ] assoc-map ] bi*
+ ] assoc-map ;
+
+: renumber-states ( transition-table -- transition-table )
+ dup transitions>> keys [ next-state ] H{ } map>assoc
+ [ renumber-transitions ] rewrite-transitions ;
+
+: box-transitions ( transition-table -- transition-table )
+ [ [ [ 1vector ] assoc-map ] assoc-map ] change-transitions ;
+
+: unify-final-state ( transition-table -- transition-table )
+ dup [ final-states>> keys ] keep
+ '[ -1 eps <literal-transition> _ add-transition ] each
+ H{ { -1 -1 } } >>final-states ;
+
+: adjoin-dfa ( transition-table -- start end )
+ box-transitions unify-final-state renumber-states
+ [ start-state>> ]
+ [ final-states>> keys first ]
+ [ table [ transitions>> ] bi@ swap update ] tri ;
+
+M: negation nfa-node ( node -- start end )
+ term>> ast>dfa negate-table adjoin-dfa ;
[ literal-transition add-simple-entry ] bi@
alternate-nodes [ nip ] dip
] if
- ] [
- literal-transition add-simple-entry
- ] if ;
+ ] [ literal-transition add-simple-entry ] if ;
M: primitive-class nfa-node ( node -- start end )
class>> dup
[ drop Letter-class ] when
class-transition add-simple-entry ;
+M: or-class nfa-node class-transition add-simple-entry ;
+M: not-class nfa-node class-transition add-simple-entry ;
+
M: any-char nfa-node ( node -- start end )
[ dotall option? ] dip any-char-no-nl ?
class-transition add-simple-entry ;
-M: negation nfa-node ( node -- start end )
- negate term>> nfa-node negate ;
+! M: negation nfa-node ( node -- start end )
+! negate term>> nfa-node negate ;
M: range nfa-node ( node -- start end )
case-insensitive option? [
{ CHAR: \\ [ CHAR: \\ ] }
{ CHAR: w [ c-identifier-class <primitive-class> ] }
- { CHAR: W [ c-identifier-class <primitive-class> <negation> ] }
+ { CHAR: W [ c-identifier-class <primitive-class> <not-class> ] }
{ CHAR: s [ java-blank-class <primitive-class> ] }
- { CHAR: S [ java-blank-class <primitive-class> <negation> ] }
+ { CHAR: S [ java-blank-class <primitive-class> <not-class> ] }
{ CHAR: d [ digit-class <primitive-class> ] }
- { CHAR: D [ digit-class <primitive-class> <negation> ] }
+ { CHAR: D [ digit-class <primitive-class> <not-class> ] }
[ ]
} case ;