math.statistics: Make you think about which std you want--population or sample. Hopef...

author Doug Coleman <doug.coleman@gmail.com>

Mon, 5 Nov 2012 22:40:14 +0000 (14:40 -0800)

committer Doug Coleman <doug.coleman@gmail.com>

Thu, 8 Nov 2012 06:53:53 +0000 (22:53 -0800)
author Doug Coleman <doug.coleman@gmail.com>
Mon, 5 Nov 2012 22:40:14 +0000 (14:40 -0800)
committer Doug Coleman <doug.coleman@gmail.com>
Thu, 8 Nov 2012 06:53:53 +0000 (22:53 -0800)
diff --git a/basis/math/statistics/statistics-docs.factor b/basis/math/statistics/statistics-docs.factor

index 8f953c2bf3081f4b5f2c69069cad2d2b13411b52..a335241fbc4ce46ef4bf3575fde7726a0555d9a9 100644 (file)
--- a/basis/math/statistics/statistics-docs.factor
+++ b/basis/math/statistics/statistics-docs.factor
@@ -46,33 +46,33 @@ HELP: minmax
      }
  } ;
  
-HELP: std
+HELP: sample-std
  { $values { "seq" sequence } { "x" "a non-negative real number"} }
-{ $description "Computes the standard deviation of " { $snippet "seq" } ", which is the square root of the variance. It measures how widely spread the values in a sequence are about the mean." }
+{ $description "Computes the sample standard deviation of " { $snippet "seq" } ", which is the square root of the sample variance. It measures how widely spread the values in a sequence are about the mean for a random subset of a dataset." }
  { $examples
-  { $example "USING: math.statistics prettyprint ;" "{ 7 8 9 } std ." "1.0" } } ;
+  { $example "USING: math.statistics prettyprint ;" "{ 7 8 9 } sample-std ." "1.0" } } ;
  
-HELP: ste
+HELP: sample-ste
    { $values { "seq" sequence } { "x" "a non-negative real number"} }
    { $description "Computes the standard error of the mean for " { $snippet "seq" } ". It's defined as the standard deviation divided by the square root of the length of the sequence, and measures uncertainty associated with the estimate of the mean." }
    { $examples
-    { $example "USING: math.statistics prettyprint ;" "{ -2 2 } ste ." "2.0" }
+    { $example "USING: math.statistics prettyprint ;" "{ -2 2 } sample-ste ." "2.0" }
    } ;
  
-HELP: var
+HELP: sample-var
  { $values { "seq" sequence } { "x" "a non-negative real number"} }
-{ $description "Computes the variance of " { $snippet "seq" } ". It's a measurement of the spread of values in a sequence. The larger the variance, the larger the distance of values from the mean." }
+{ $description "Computes the variance of " { $snippet "seq" } ". It's a measurement of the spread of values in a sequence." }
  { $notes "If the number of elements in " { $snippet "seq" } " is 1 or less, it outputs 0." }
  { $examples
-  { $example "USING: math.statistics prettyprint ;" "{ 1 } var ." "0" }
-  { $example "USING: math.statistics prettyprint ;" "{ 1 2 3 } var ." "1" }
-  { $example "USING: math.statistics prettyprint ;" "{ 1 2 3 4 } var ." "1+2/3" } } ;
+  { $example "USING: math.statistics prettyprint ;" "{ 1 } sample-var ." "0" }
+  { $example "USING: math.statistics prettyprint ;" "{ 1 2 3 } sample-var ." "1" }
+  { $example "USING: math.statistics prettyprint ;" "{ 1 2 3 4 } sample-var ." "1+2/3" } } ;
  
-HELP: cov
+HELP: population-cov 
  { $values { "{x}" sequence } { "{y}" sequence } { "cov" "a real number" } }
  { $description "Computes the covariance of two sequences, " { $snippet "{x}" } " and " { $snippet "{y}" } "." } ;
  
-HELP: corr
+HELP: population-corr
  { $values { "{x}" sequence } { "{y}" sequence } { "corr" "a real number" } }
  { $description "Computes the correlation of two sequences, " { $snippet "{x}" } " and " { $snippet "{y}" } "." } ;
  
@@ -281,8 +281,12 @@ ARTICLE: "math.statistics" "Statistics"
  { $subsections median lower-median upper-median medians }
  "Computing the mode:"
  { $subsections mode }
-"Computing the standard deviation, standard error, and variance:"
-{ $subsections std ste var }
+"Computing the population standard deviation, standard error, and variance:"
+{ $subsections population-std population-ste population-var }
+"Computing the sample standard deviation, standard error, and variance:"
+{ $subsections sample-std sample-ste sample-var }
+"Computing the nth delta-degrees-of-freedom statistics:"
+{ $subsections std-ddof ste-ddof var-ddof }
  "Computing the range and minimum and maximum elements:"
  { $subsections range minmax }
  "Computing the kth smallest element:"
@@ -294,7 +298,8 @@ ARTICLE: "math.statistics" "Statistics"
  
  ABOUT: "math.statistics"
  
-{ var var-ddof population-var sample-var } related-words
-{ std std-ddof population-std sample-std } related-words
-{ ste ste-ddof population-ste sample-ste } related-words
-{ corr corr-ddof population-corr sample-corr } related-words
+{ var-ddof population-var sample-var } related-words
+{ std-ddof population-std sample-std } related-words
+{ ste-ddof population-ste sample-ste } related-words
+{ corr-ddof population-corr sample-corr } related-words
+{ cov-ddof population-cov sample-cov } related-words
diff --git a/basis/math/statistics/statistics-tests.factor b/basis/math/statistics/statistics-tests.factor

index 396f9842b44b4442912aef721df67924efc3ea83..925bd0616828ea5b625cb5378a18c05cae80e236 100644 (file)
--- a/basis/math/statistics/statistics-tests.factor
+++ b/basis/math/statistics/statistics-tests.factor
@@ -52,15 +52,14 @@ IN: math.statistics.tests
  [ 2 ] [ { 1 2 } upper-median ] unit-test
  [ 3/2 ] [ { 1 2 } median ] unit-test
  
-[ 1 ] [ { 1 2 3 } var ] unit-test
-[ 16 ] [ { 4 6 8 10 10 12 14 16 } var ] unit-test
+[ 1 ] [ { 1 2 3 } sample-var ] unit-test
+[ 16 ] [ { 4 6 8 10 10 12 14 16 } sample-var ] unit-test
  
  { 16 } [ { 4 6 8 10 12 14 16 } population-var ] unit-test
-{ 1.0 } [ { 7 8 9 } std ] unit-test
+{ 1.0 } [ { 7 8 9 } sample-std ] unit-test
  { 2/3 } [ { 7 8 9 } 0 var-ddof ] unit-test
  { 2/3 } [ { 7 8 9 } population-var ] unit-test
  { 1 } [ { 7 8 9 } 1 var-ddof ] unit-test
-{ 1 } [ { 7 8 9 } var ] unit-test
  { 1 } [ { 7 8 9 } sample-var ] unit-test
  { 2 } [ { 7 8 9 } 2 var-ddof ] unit-test
  { 0 } [ { 7 8 9 } 3 var-ddof ] unit-test
@@ -68,18 +67,18 @@ IN: math.statistics.tests
  { t } [ { 7 8 9 } 0 std-ddof 0.816496580927726 .0001 ~ ] unit-test
  { t } [ { 7 8 9 } population-std 0.816496580927726 .0001 ~ ] unit-test
  { 1.0 } [ { 7 8 9 } 1 std-ddof ] unit-test
-{ 1.0 } [ { 7 8 9 } std ] unit-test
+{ 1.0 } [ { 7 8 9 } sample-std ] unit-test
  { 1.0 } [ { 7 8 9 } sample-std ] unit-test
  { t } [ { 7 8 9 } 2 std-ddof 1.414213562373095 .0001 ~ ] unit-test
  { 0.0 } [ { 7 8 9 } 3 std-ddof ] unit-test
  
-[ t ] [ { 1 2 3 4 } ste 0.6454972243679028 - .0001 < ] unit-test
+[ t ] [ { 1 2 3 4 } sample-ste 0.6454972243679028 - .0001 < ] unit-test
  
-[ t ] [ { 23.2 33.4 22.5 66.3 44.5 } std 18.1906 - .0001 < ] unit-test
+[ t ] [ { 23.2 33.4 22.5 66.3 44.5 } sample-std 18.1906 - .0001 < ] unit-test
  
-[ 0 ] [ { 1 } var ] unit-test
-[ 0.0 ] [ { 1 } std ] unit-test
-[ 0.0 ] [ { 1 } ste ] unit-test
+[ 0 ] [ { 1 } sample-var ] unit-test
+[ 0.0 ] [ { 1 } sample-std ] unit-test
+[ 0.0 ] [ { 1 } sample-ste ] unit-test
  
  { 2 } [ { 1 3 5 7 } mean-dev ] unit-test
  { 4/5 } [ { 1 3 3 3 5 } median-dev ] unit-test
@@ -106,11 +105,12 @@ IN: math.statistics.tests
      [ 0 swap at ] [ 1 swap at ] [ 2 swap at ] tri
  ] unit-test
  
-[ 0 ] [ { 1 } { 1 } cov ] unit-test
-[ 2/3 ] [ { 1 2 3 } { 4 5 6 } cov ] unit-test
+[ 0 ] [ { 1 } { 1 } sample-cov ] unit-test
+[ 2/3 ] [ { 1 2 3 } { 4 5 6 } population-cov ] unit-test
  
-[ 0.75 ] [ { 1 2 3 4 } dup corr ] unit-test
-[ -0.75 ] [ { 1 2 3 4 } { -4 -5 -6 -7 } corr ] unit-test
+[ 0.75 ] [ { 1 2 3 4 } dup sample-corr ] unit-test
+[ 1.0 ] [ { 1 2 3 4 } dup population-corr ] unit-test
+[ -0.75 ] [ { 1 2 3 4 } { -4 -5 -6 -7 } sample-corr ] unit-test
  
  [ { 1 2 4 7 } ] [ { 1 1 2 3 } cum-sum ] unit-test
  [ { 1 1 2 6 } ] [ { 1 1 2 3 } cum-product ] unit-test
@@ -173,7 +173,14 @@ IN: math.statistics.tests
  
  { t t } [
      { 6.5 3.8 6.6 5.7 6.0 6.4 5.3 } standardize
-    [ mean 0 1e-10 ~ ] [ var 1 1e-10 ~ ] bi
+    [ mean 0 1e-10 ~ ] [ sample-var 1 1e-10 ~ ] bi
+] unit-test
+
+{ t t } [
+    { { 1 -1 2 } { 2 0 0 } { 0 1 -1 } } standardize-2d
+    flip
+    [ [ mean ] map { 0 0 0 } 1e-10 v~ ]
+    [ [ sample-var ] map { 1 1 1 } 1e-10 v~ ] bi
  ] unit-test
  
  { { 0 0 0 } } [ { 1 1 1 } standardize ] unit-test
diff --git a/basis/math/statistics/statistics.factor b/basis/math/statistics/statistics.factor

index 838241061ed87cc5822506ad134a59546be3e681..bcaacbb82ef315b77adae2a55e64392741abb7c3 100644 (file)
--- a/basis/math/statistics/statistics.factor
+++ b/basis/math/statistics/statistics.factor
@@ -265,8 +265,6 @@ PRIVATE>
  
  : sample-var ( seq -- x ) 1 var-ddof ; inline
  
-ALIAS: var sample-var
-
  : std-ddof ( seq n -- x )
      var-ddof sqrt ; inline
  
@@ -274,9 +272,7 @@ ALIAS: var sample-var
  
  : sample-std ( seq -- x ) 1 std-ddof ; inline
  
-ALIAS: std sample-std
-
-: signal-to-noise ( seq -- x ) [ mean ] [ std ] bi / ;
+: signal-to-noise ( seq -- x ) [ mean ] [ population-std ] bi / ;
  
  : mean-dev ( seq -- x ) dup mean v-n vabs mean ;
  
@@ -288,8 +284,6 @@ ALIAS: std sample-std
  
  : sample-ste ( seq -- x ) 1 ste-ddof ;
  
-ALIAS: ste sample-ste
-
  : ((r)) ( mean(x) mean(y) {x} {y} -- (r) )
      ! finds sigma((xi-mean(x))(yi-mean(y))
      0 [ [ [ pick ] dip swap - ] bi@ * + ] 2reduce 2nip ;
@@ -298,7 +292,7 @@ ALIAS: ste sample-ste
      * recip [ [ ((r)) ] keep length 1 - / ] dip * ;
  
  : [r] ( {{x,y}...} -- mean(x) mean(y) {x} {y} sx sy )
-    first2 [ [ [ mean ] bi@ ] 2keep ] 2keep [ std ] bi@ ;
+    first2 [ [ [ mean ] bi@ ] 2keep ] 2keep [ population-std ] bi@ ;
  
  : r ( {{x,y}...} -- r )
      [r] (r) ;
@@ -316,20 +310,18 @@ ALIAS: ste sample-ste
  : cov-ddof ( {x} {y} ddof -- cov )
      [ [ dup mean v-n ] bi@ v* ] dip mean-ddof ;
  
-: cov ( {x} {y} -- cov ) 0 cov-ddof ; inline
+: population-cov ( {x} {y} -- cov ) 0 cov-ddof ; inline
  
-: unbiased-cov ( {x} {y} -- cov ) 1 cov-ddof ; inline
+: sample-cov ( {x} {y} -- cov ) 1 cov-ddof ; inline
  
  : corr-ddof ( {x} {y} n -- corr )
-    [ [ cov ] ] dip
+    [ [ population-cov ] ] dip
      '[ [ _ var-ddof ] bi@ * sqrt ] 2bi / ;
  
  : population-corr ( {x} {y} -- corr ) 0 corr-ddof ; inline
  
  : sample-corr ( {x} {y} -- corr ) 1 corr-ddof ; inline
  
-ALIAS: corr sample-corr
-
  : cum-map ( seq identity quot -- seq' )
      swapd [ dup ] compose map nip ; inline
  
@@ -368,11 +360,11 @@ ALIAS: corr sample-corr
      [ dup log * ] [ 1 swap - dup log * ] bi + neg 2 log / ;
  
  : standardize ( u -- v )
-    [ dup mean v-n ] [ std ] bi
+    [ dup mean v-n ] [ sample-std ] bi
      dup zero? [ drop ] [ v/n ] if ;
  
  : standardize-2d ( u -- v )
-    flip dup [ [ mean ] [ std ] bi 2array ] map
+    flip dup [ [ mean ] [ sample-std ] bi 2array ] map
      [ [ first v-n ] 2map ] keep [ second v/n ] 2map flip ;
  
  : differences ( u -- v )
author	Doug Coleman <doug.coleman@gmail.com>
	Mon, 5 Nov 2012 22:40:14 +0000 (14:40 -0800)
committer	Doug Coleman <doug.coleman@gmail.com>
	Thu, 8 Nov 2012 06:53:53 +0000 (22:53 -0800)
basis/math/statistics/statistics-docs.factor		patch \| blob \| history
basis/math/statistics/statistics-tests.factor		patch \| blob \| history
basis/math/statistics/statistics.factor		patch \| blob \| history