]> gitweb.factorcode.org Git - factor.git/commitdiff
machine-learning.rebalancing: Don't use a distribution for equal rebalancing.
authorDoug Coleman <doug.coleman@gmail.com>
Sat, 17 Nov 2012 20:52:55 +0000 (12:52 -0800)
committerDoug Coleman <doug.coleman@gmail.com>
Sat, 17 Nov 2012 20:52:55 +0000 (12:52 -0800)
extra/machine-learning/rebalancing/rebalancing.factor

index c8c77f6f41643d3d5c48adeff43b0f5044dbd33e..d95029f19706ef80e3e935a7eaaa63f8b17921ee 100644 (file)
@@ -22,12 +22,14 @@ MEMO: probabilities-seq ( seq -- seq' )
 : stratified-sample ( stratified-sequences probability-sequence -- elt )
     probabilities-quot call swap nth random ; inline
 
+: equal-stratified-sample ( stratified-sequences -- elt )
+    random random ; inline
+
 : balance-labels ( X y n -- X' y' )
     [
         dup [ ] collect-index-by
-        values dup length equal-probabilities
-        '[
-            _ _ _ _ stratified-sample
+        values '[
+            _ _ _ equal-stratified-sample
             '[ _ swap nth ] bi@ 2array
         ]
     ] dip swap replicate [ keys ] [ values ] bi ;