USING: regexp.classes tools.test arrays kernel ;
IN: regexp.classes.tests
+! Class algebra
+
[ f ] [ { 1 2 } <and-class> ] unit-test
[ T{ or-class f { 2 1 } } ] [ { 1 2 } <or-class> ] unit-test
[ 3 ] [ { 1 2 } <and-class> 3 2array <or-class> ] unit-test
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <and-class> ] unit-test
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <or-class> ] unit-test
[ T{ or-class { seq { 2 3 1 } } } ] [ { 1 2 } <or-class> { 2 3 } <or-class> 2array <or-class> ] unit-test
+
+! Making classes into nested conditionals
+
+[ V{ 1 2 3 4 } ] [ T{ and-class f { 1 T{ not-class f 2 } T{ or-class f { 3 4 } } 2 } } class>questions ] unit-test
+[ { 3 } ] [ { { t 3 } } table>condition ] unit-test
+[ { T{ primitive-class } } ] [ { { t 1 } { T{ primitive-class } 2 } } table>questions ] unit-test
+[ { { t 1 } { t 2 } } ] [ { { t 1 } { T{ primitive-class } 2 } } T{ primitive-class } t answer ] unit-test
+[ { { t 1 } } ] [ { { t 1 } { T{ primitive-class } 2 } } T{ primitive-class } f answer ] unit-test
+[ T{ condition f T{ primitive-class } { 1 2 } { 1 } } ] [ { { t 1 } { T{ primitive-class } 2 } } table>condition ] unit-test
+
+SYMBOL: foo
+SYMBOL: bar
+
+[ T{ condition f T{ primitive-class f bar } T{ condition f T{ primitive-class f foo } { 1 2 3 } { 1 3 } } T{ condition f T{ primitive-class f foo } { 1 2 } { 1 } } } ] [ { { t 1 } { T{ primitive-class f foo } 2 } { T{ primitive-class f bar } 3 } } table>condition ] unit-test
+
+[ t ] [ foo <primitive-class> dup t replace-question ] unit-test
+[ f ] [ foo <primitive-class> dup f replace-question ] unit-test
+[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> t replace-question ] unit-test
+[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> f replace-question ] unit-test
+[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> bar <primitive-class> t replace-question ] unit-test
+[ T{ primitive-class f bar } ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> foo <primitive-class> t replace-question ] unit-test
+[ f ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> foo <primitive-class> f replace-question ] unit-test
+[ f ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> bar <primitive-class> f replace-question ] unit-test
+[ t ] [ foo <primitive-class> bar <primitive-class> 2array <or-class> bar <primitive-class> t replace-question ] unit-test
+[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> 2array <or-class> bar <primitive-class> f replace-question ] unit-test
! See http://factorcode.org/license.txt for BSD license.
USING: accessors kernel math math.order words combinators locals
ascii unicode.categories combinators.short-circuit sequences
-fry macros arrays ;
+fry macros arrays assocs sets ;
IN: regexp.classes
SINGLETONS: any-char any-char-no-nl
class>> class-member? ;
UNION: class primitive-class not-class or-class and-class range ;
+
+TUPLE: condition question yes no ;
+C: <condition> condition
+
+GENERIC# replace-question 2 ( class from to -- new-class )
+
+M:: object replace-question ( class from to -- new-class )
+ class from = to class ? ;
+
+: replace-compound ( class from to -- seq )
+ [ seq>> ] 2dip '[ _ _ replace-question ] map ;
+
+M: and-class replace-question
+ replace-compound <and-class> ;
+
+M: or-class replace-question
+ replace-compound <or-class> ;
+
+M: not-class replace-question
+ class>> replace-question <not-class> ;
+
+: answer ( table question answer -- new-table )
+ '[ [ _ _ replace-question ] dip ] assoc-map
+ [ drop ] assoc-filter ;
+
+DEFER: make-condition
+
+: (make-condition) ( table questions question -- condition )
+ [ 2nip ]
+ [ swap [ t answer ] dip make-condition ]
+ [ swap [ f answer ] dip make-condition ] 3tri
+ 2dup = [ 2nip ] [ <condition> ] if ;
+
+: make-condition ( table questions -- condition )
+ [ values ] [ unclip (make-condition) ] if-empty ;
+
+GENERIC: class>questions ( class -- questions )
+: compound-questions ( class -- questions ) seq>> [ class>questions ] gather ;
+M: or-class class>questions compound-questions ;
+M: and-class class>questions compound-questions ;
+M: not-class class>questions class>> class>questions ;
+M: object class>questions 1array ;
+
+: table>questions ( table -- questions )
+ keys <and-class> class>questions t swap remove ;
+
+: table>condition ( table -- condition )
+ >alist dup table>questions make-condition ;
+
+: condition-map ( condition quot: ( obj -- obj' ) -- new-condition )
+ over condition? [
+ [ [ question>> ] [ yes>> ] [ no>> ] tri ] dip
+ '[ _ condition-map ] bi@ <condition>
+ ] [ call ] if ; inline recursive
: literals>cases ( literal-transitions -- case-body )
[ 1quotation ] assoc-map ;
+: condition>quot ( condition -- quot )
+ dup condition? [
+ [ question>> ] [ yes>> ] [ no>> ] tri
+ [ condition>quot ] bi@
+ '[ dup _ class-member? _ _ if ]
+ ] [
+ [ [ 3drop ] ] [ '[ drop _ execute ] ] if-empty
+ ] if ;
+
: non-literals>dispatch ( non-literal-transitions -- quot )
- [ [ '[ dup _ class-member? ] ] [ '[ drop _ execute ] ] bi* ] assoc-map
- [ 3drop ] suffix '[ _ cond ] ;
+ table>condition condition>quot ;
: expand-one-or ( or-class transition -- alist )
[ seq>> ] dip '[ _ 2array ] map ;
: transitions>quot ( transitions final-state? -- quot )
[ split-literals suffix ] dip
- '[ { array-capacity string } declare _ _ step ] ;
+ '[ { array-capacity sequence } declare _ _ step ] ;
: word>quot ( word dfa -- quot )
[ transitions>> at ]
: dfa>word ( dfa -- word )
states>words [ states>code ] keep start-state>> ;
-: check-string ( string -- string )
- dup string? [ "String required" throw ] unless ;
+: check-sequence ( string -- string )
+ ! Make this configurable
+ dup sequence? [ "String required" throw ] unless ;
: run-regexp ( start-index string word -- ? )
- { [ f ] [ >fixnum ] [ check-string ] [ execute ] } spread ; inline
+ { [ f ] [ >fixnum ] [ check-sequence ] [ execute ] } spread ; inline
: dfa>quotation ( dfa -- quot )
dfa>word '[ _ run-regexp ] ;
: find-delta ( states transition nfa -- new-states )
transitions>> '[ _ swap _ at at ] gather sift ;
-TUPLE: condition question yes no ;
-C: <condition> condition
-
:: epsilon-loop ( state table nfa question -- )
state table at :> old-value
old-value question 2array <or-class> :> new-question
] assoc-each
] unless ;
-GENERIC# replace-question 2 ( class from to -- new-class )
-
-M: object replace-question
- [ [ = ] keep ] dip swap ? ;
-
-: replace-compound ( class from to -- seq )
- [ seq>> ] 2dip '[ _ _ replace-question ] map ;
-
-M: and-class replace-question
- replace-compound <and-class> ;
-
-M: or-class replace-question
- replace-compound <or-class> ;
-
-: answer ( table question answer -- new-table )
- '[ _ _ replace-question ] assoc-map
- [ nip ] assoc-filter ;
-
-DEFER: make-condition
-
-: (make-condition) ( table questions question -- condition )
- [ 2nip ]
- [ swap [ t answer ] dip make-condition ]
- [ swap [ f answer ] dip make-condition ] 3tri
- <condition> ;
-
-: make-condition ( table questions -- condition )
- [ keys ] [ unclip (make-condition) ] if-empty ;
-
-GENERIC: class>questions ( class -- questions )
-: compound-questions ( class -- questions ) seq>> [ class>questions ] gather ;
-M: or-class class>questions compound-questions ;
-M: and-class class>questions compound-questions ;
-M: object class>questions 1array ;
-
-: table>condition ( table -- condition )
- ! This is wrong, since actually an arbitrary and-class or or-class can be used
- dup
- values <or-class> class>questions t swap remove
- make-condition ;
-
: epsilon-table ( states nfa -- table )
[ H{ } clone tuck ] dip
'[ _ _ t epsilon-loop ] each ;
: find-epsilon-closure ( states nfa -- dfa-state )
- epsilon-table table>condition ;
+ epsilon-table [ swap ] assoc-map table>condition ;
: find-closure ( states transition nfa -- new-states )
[ find-delta ] keep find-epsilon-closure ;
! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences regexp.transition-tables fry assocs
accessors locals math sorting arrays sets hashtables regexp.dfa
-combinators.short-circuit ;
+combinators.short-circuit regexp.classes ;
IN: regexp.minimize
: number-transitions ( transitions numbering -- new-transitions )
dup '[
[ _ at ]
- [ [ _ at ] assoc-map ] bi*
+ [ [ [ _ at ] condition-map ] assoc-map ] bi*
] assoc-map ;
: table>state-numbers ( table -- assoc )
dup table>state-numbers
[ number-transitions ] rewrite-transitions ;
+: no-conditions? ( state transition-table -- ? )
+ transitions>> at values [ condition? ] any? not ;
+
: initially-same? ( s1 s2 transition-table -- ? )
{
[ drop <= ]
:: initialize-partitions ( transition-table -- partitions )
! Partition table is sorted-array => ?
H{ } clone :> out
- transition-table transitions>> keys :> states
+ transition-table transitions>> keys
+ [ transition-table no-conditions? ] filter :> states
states [| s1 |
states [| s2 |
s1 s2 transition-table initially-same?
USING: regexp tools.test kernel sequences regexp.parser regexp.private
-regexp.traversal eval strings multiline accessors regexp.matchers ;
+eval strings multiline accessors regexp.matchers ;
IN: regexp-tests
\ <regexp> must-infer
USING: accessors combinators kernel math sequences strings sets
assocs prettyprint.backend prettyprint.custom make lexer
namespaces parser arrays fry locals regexp.minimize
-regexp.parser regexp.nfa regexp.dfa regexp.traversal
+regexp.parser regexp.nfa regexp.dfa
regexp.transition-tables splitting sorting regexp.ast
regexp.negation regexp.matchers regexp.compiler ;
IN: regexp
{ raw read-only }
{ parse-tree read-only }
{ options read-only }
- dfa reverse-dfa dfa-quot ;
+ dfa reverse-dfa ;
: make-regexp ( string ast -- regexp )
- f f <options> f f f regexp boa ; foldable
+ f f <options> f f regexp boa ; foldable
! Foldable because, when the dfa slot is set,
! it'll be set to the same thing regardless of who sets it
: <optioned-regexp> ( string options -- regexp )
[ dup parse-regexp ] [ string>options ] bi*
- f f f regexp boa ;
+ f f regexp boa ;
: <regexp> ( string -- regexp ) "" <optioned-regexp> ;
[ parse-tree>> ] [ options>> ] bi <with-options> ;
: compile-regexp ( regexp -- regexp )
- dup '[ [ _ get-ast ast>dfa ] unless* ] change-dfa ;
-
-: compile-dfa-quot ( regexp -- regexp )
- dup '[ [ _ compile-regexp dfa>> dfa>quotation ] unless* ] change-dfa-quot ;
+ dup '[ [ _ get-ast ast>dfa dfa>quotation ] unless* ] change-dfa ;
: <reversed-option> ( ast -- reversed )
"r" string>options <with-options> ;
: compile-reverse ( regexp -- regexp )
- dup '[ [ _ get-ast <reversed-option> ast>dfa ] unless* ] change-reverse-dfa ;
+ dup '[
+ [
+ _ get-ast <reversed-option>
+ ast>dfa dfa>quotation
+ ] unless*
+ ] change-reverse-dfa ;
M: regexp match-index-from ( string regexp -- index/f )
- dup dfa-quot>>
- [ <quot-matcher> ]
- [ compile-regexp dfa>> <dfa-matcher> ] ?if
- match-index-from ;
+ compile-regexp dfa-quot>> <quot-matcher> match-index-from ;
M: reverse-matcher match-index-from ( string regexp -- index/f )
[ <reversed> ] [ regexp>> compile-reverse reverse-dfa>> ] bi*
- <dfa-traverser> do-match match-index>> ;
+ <quot-matcher> match-index-from ;
: find-regexp-syntax ( string -- prefix suffix )
{