: char-class ( ranges ? -- term )
[ <or-class> ] dip [ <not-class> ] when ;
-TUPLE: lookahead term ;
+TUPLE: lookahead term positive? ;
C: <lookahead> lookahead
-TUPLE: lookbehind term ;
+TUPLE: lookbehind term positive? ;
C: <lookbehind> lookbehind
-
-TUPLE: possessive-star term ;
-C: <possessive-star> possessive-star
-
-: <possessive-plus> ( term -- term' )
- dup <possessive-star> 2array <concatenation> ;
-
'[ _ _ replace-question ] assoc-map
[ nip ] assoc-filter ;
+: answers ( table questions answer -- new-table )
+ '[ _ answer ] each ;
+
DEFER: make-condition
: (make-condition) ( table questions question -- condition )
M: ^ question>quot
drop [ { [ drop zero? ] [ [ 1- ] dip ?nth "\r\n" member? ] } 2|| ] ;
-! Maybe the condition>quot things can be combined, given a suitable method
-! for question>quot on classes, but maybe that'd make stack shuffling annoying
-
-: execution-quot ( next-state -- quot )
+: (execution-quot) ( next-state -- quot )
! The conditions here are for lookaround and anchors, etc
dup condition? [
[ question>> question>quot ] [ yes>> ] [ no>> ] tri
- [ execution-quot ] bi@
+ [ (execution-quot) ] bi@
'[ 2dup @ _ _ if ]
- ] [
- ! There shouldn't be a condition like this!
- dup sequence?
- [ [ [ 2drop ] ] [ first '[ _ execute ] ] if-empty ]
- [ '[ _ execute ] ] if
- ] if ;
+ ] [ '[ _ execute ] ] if ;
+
+: execution-quot ( next-state -- quot )
+ dup sequence? [ first ] when
+ (execution-quot) ;
TUPLE: box contents ;
C: <box> box
[ [ 3drop ] ] [ execution-quot '[ drop @ ] ] if-empty
] if ;
-: non-literals>dispatch ( non-literal-transitions -- quot )
+: non-literals>dispatch ( literals non-literals -- quot )
[ swap ] assoc-map ! we want state => predicate, and get the opposite as input
+ swap keys f answers
table>condition [ <box> ] condition-map condition>quot ;
: literals>cases ( literal-transitions -- case-body )
: split-literals ( transitions -- case default )
>alist expand-or [ first integer? ] partition
- [ literals>cases ] [ non-literals>dispatch ] bi* ;
+ [ [ literals>cases ] keep ] dip non-literals>dispatch ;
:: step ( last-match index str quot final? direction -- last-index/f )
final? index last-match ?
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: tools.test regexp.minimize assocs regexp
-accessors regexp.transition-tables regexp.parser regexp.negation ;
+accessors regexp.transition-tables regexp.parser
+regexp.classes regexp.negation ;
IN: regexp.minimize.tests
[ t ] [ 1 2 H{ { { 1 2 } t } } same-partition? ] unit-test
] unit-test
[ [ ] [ ] while-changes ] must-infer
+
+[ H{ { T{ or-class f { 1 2 } } 3 } { 4 5 } } ]
+[ H{ { 1 3 } { 2 3 } { 4 5 } } combine-state-transitions ] unit-test
: number-states ( table -- newtable )
dup table>state-numbers transitions-at ;
-: no-conditions? ( state transition-table -- ? )
- transitions>> at values [ condition? ] any? not ;
+: has-conditions? ( state transitions -- ? )
+ at values [ condition? ] any? ;
: initially-same? ( s1 s2 transition-table -- ? )
{
! Partition table is sorted-array => ?
H{ } clone :> out
transition-table transitions>> keys
- [ transition-table no-conditions? ] filter :> states
+ [ transition-table transitions>> has-conditions? ] partition :> states
+ [ dup 2array out conjoin ] each
states [| s1 |
states [| s2 |
s1 s2 transition-table initially-same?
'[ _ partition-more ] [ assoc-size ] while-changes
partition>classes ;
-: canonical-state? ( state state-classes -- ? )
- dupd at = ;
+: canonical-state? ( state transitions state-classes -- ? )
+ '[ dup _ at = ] swap '[ _ has-conditions? ] bi or ;
: delete-duplicates ( transitions state-classes -- new-transitions )
- '[ drop _ canonical-state? ] assoc-filter ;
+ dupd '[ drop _ _ canonical-state? ] assoc-filter ;
: combine-states ( table -- smaller-table )
dup state-classes
[ transitions-at ] keep
'[ _ delete-duplicates ] change-transitions ;
+: combine-state-transitions ( hash -- hash )
+ H{ } clone tuck '[
+ _ [ 2array <or-class> ] change-at
+ ] assoc-each [ swap ] assoc-map ;
+
+: combine-transitions ( table -- table )
+ [ [ combine-state-transitions ] assoc-map ] change-transitions ;
+
: minimize ( table -- minimal-table )
- clone number-states combine-states ;
+ clone
+ number-states
+ combine-states
+ combine-transitions ;
: unify-final-state ( transition-table -- transition-table )
dup [ final-states>> keys ] keep
- '[ -2 epsilon _ add-transition ] each
+ '[ -2 epsilon _ set-transition ] each
H{ { -2 -2 } } >>final-states ;
: adjoin-dfa ( transition-table -- start end )
- box-transitions unify-final-state renumber-states
+ unify-final-state renumber-states box-transitions
[ start-state>> ]
[ final-states>> keys first ]
[ nfa-table get [ transitions>> ] bi@ swap update ] tri ;
=> [[ a on off parse-options <with-options> ]]
| "?#" [^)]* => [[ f ]]
| "?~" Alternation:a => [[ a <negation> ]]
- | "?=" Alternation:a => [[ a <lookahead> <tagged-epsilon> ]]
- | "?!" Alternation:a => [[ a <negation> <lookahead> <tagged-epsilon> ]]
- | "?<=" Alternation:a => [[ a <lookbehind> <tagged-epsilon> ]]
- | "?<!" Alternation:a => [[ a <negation> <lookbehind> <tagged-epsilon> ]]
+ | "?=" Alternation:a => [[ a t <lookahead> <tagged-epsilon> ]]
+ | "?!" Alternation:a => [[ a f <lookahead> <tagged-epsilon> ]]
+ | "?<=" Alternation:a => [[ a t <lookbehind> <tagged-epsilon> ]]
+ | "?<!" Alternation:a => [[ a f <lookbehind> <tagged-epsilon> ]]
| Alternation
Element = "(" Parenthized:p ")" => [[ p ]]
| Number:n "," Number:m "}" => [[ n m <from-to> ]]
Repeated = Element:e "{" Times:t => [[ e t <times> ]]
- | Element:e "*+" => [[ e <possessive-star> ]]
- | Element:e "++" => [[ e <possessive-plus> ]]
| Element:e "?" => [[ e <maybe> ]]
| Element:e "*" => [[ e <star> ]]
| Element:e "+" => [[ e <plus> ]]
[ t ] [ "b" "b|" <regexp> matches? ] unit-test
[ t ] [ "" "b|" <regexp> matches? ] unit-test
[ t ] [ "" "b|" <regexp> matches? ] unit-test
-[ f ] [ "" "|" <regexp> matches? ] unit-test
-[ f ] [ "" "|||||||" <regexp> matches? ] unit-test
+[ t ] [ "" "|" <regexp> matches? ] unit-test
+[ t ] [ "" "|||||||" <regexp> matches? ] unit-test
[ f ] [ "aa" "a|b|c" <regexp> matches? ] unit-test
[ f ] [ "bb" "a|b|c" <regexp> matches? ] unit-test
[ f ] [ "abc" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
[ t ] [ "ABC" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
-[ f ] [ "" "\\Q\\E" <regexp> matches? ] unit-test
+[ t ] [ "" "\\Q\\E" <regexp> matches? ] unit-test
[ f ] [ "a" "\\Q\\E" <regexp> matches? ] unit-test
[ t ] [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
[ f ] [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
[ "-- title --" ] [ "== title ==" R/ =/ "-" re-replace ] unit-test
-[ f ] [ "ab" "a(?!b)" <regexp> match-head ] unit-test
+[ "" ] [ "ab" "a(?!b)" <regexp> match-head >string ] unit-test
[ "a" ] [ "ac" "a(?!b)" <regexp> match-head >string ] unit-test
+[ t ] [ "fxxbar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test
+[ t ] [ "foobar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test
[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
[ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> match-head >string ] unit-test
[ f ] [ "\ra" R/ \Aa\Z/m matches? ] unit-test
[ 1 ] [ "a" R/ \Aa\Z/m count-matches ] unit-test
-[ 1 ] [ "\na" R/ \Aaa\Z/m count-matches ] unit-test
-[ 1 ] [ "\r\na" R/ \Aa\Z/m count-matches ] unit-test
-[ 1 ] [ "\ra" R/ \Aa\Z/m count-matches ] unit-test
+[ 0 ] [ "\na" R/ \Aaa\Z/m count-matches ] unit-test
+[ 0 ] [ "\r\na" R/ \Aa\Z/m count-matches ] unit-test
+[ 0 ] [ "\ra" R/ \Aa\Z/m count-matches ] unit-test
[ t ] [ "a" R/ ^a/m matches? ] unit-test
[ f ] [ "\na" R/ ^a/m matches? ] unit-test
: <reversed-option> ( ast -- reversed )
"r" string>options <with-options> ;
+: maybe-negated ( lookaround quot -- regexp-quot )
+ '[ term>> @ ] [ positive?>> [ ] [ not ] ? ] bi compose ;
+
M: lookahead question>quot ! Returns ( index string -- ? )
- term>> ast>dfa dfa>shortest-quotation ;
+ [ ast>dfa dfa>shortest-quotation ] maybe-negated ;
M: lookbehind question>quot ! Returns ( index string -- ? )
- term>> <reversed-option>
- ast>dfa dfa>reverse-shortest-quotation
- [ [ 1- ] dip ] prepose ;
+ [
+ <reversed-option>
+ ast>dfa dfa>reverse-shortest-quotation
+ [ [ 1- ] dip ] prepose
+ ] maybe-negated ;
: compile-reverse ( regexp -- regexp )
dup '[