math.statistics: Add discounted cumulative gain and normalized dcg.

author Doug Coleman <doug.coleman@gmail.com>

Wed, 7 Oct 2020 03:36:39 +0000 (22:36 -0500)

committer Doug Coleman <doug.coleman@gmail.com>

Wed, 7 Oct 2020 03:46:12 +0000 (22:46 -0500)
author Doug Coleman <doug.coleman@gmail.com>
Wed, 7 Oct 2020 03:36:39 +0000 (22:36 -0500)
committer Doug Coleman <doug.coleman@gmail.com>
Wed, 7 Oct 2020 03:46:12 +0000 (22:46 -0500)
diff --git a/basis/math/statistics/statistics-docs.factor b/basis/math/statistics/statistics-docs.factor

index 17b29a9aedddce543e44c08d462816071a38f60e..021e470543f5b7d870c59a1b4976b5bd06c6a0c6 100644 (file)
--- a/basis/math/statistics/statistics-docs.factor
+++ b/basis/math/statistics/statistics-docs.factor
@@ -1,5 +1,5 @@
-USING: assocs debugger hashtables help.markup help.syntax
-kernel quotations sequences math ;
+USING: debugger hashtables help.markup help.syntax kernel math
+sequences ;
  IN: math.statistics
  
  HELP: geometric-mean
@@ -235,6 +235,28 @@ HELP: z-score
  { $values { "seq" sequence } { "n" number } }
  { $description "Calculates the Z-Score for " { $snippet "seq" } "." } ;
  
+HELP: dcg
+{ $values
+    { "scores" sequence }
+    { "dcg" number }
+}
+{ $description "Calculates the discounted cumulative gain from a list of scores. The discounted cumulative gain can be used to compare two lists of results against each other given scores for each of the results."
+$nl
+" See " { $url "https://en.wikipedia.org/wiki/Discounted_cumulative_gain" }
+} ;
+
+HELP: ndcg
+{ $values
+    { "scores" sequence }
+    { "ndcg" number }
+}
+{ $description "Calculates the normalized discounted cumulative gain from a list of scores. The ndcg is the discounted cumulative gain divided by the theoretical maximum dcg for the given list."
+$nl
+"See " { $url "https://en.wikipedia.org/wiki/Discounted_cumulative_gain" }
+} ;
+
+{ dcg ndcg } related-words
+
  ARTICLE: "histogram" "Computing histograms"
  "Counting elements in a sequence:"
  { $subsections
@@ -284,7 +306,9 @@ ARTICLE: "math.statistics" "Statistics"
  "Counting the frequency of occurrence of elements:"
  { $subsections "histogram" }
  "Computing cumulative sequences:"
-{ $subsections "cumulative" } ;
+{ $subsections "cumulative" }
+"Calculating discounted cumulative gain:"
+{ $subsections dcg ndcg } ;
  
  ABOUT: "math.statistics"
  
diff --git a/basis/math/statistics/statistics-tests.factor b/basis/math/statistics/statistics-tests.factor

index dd023412321623040e538c57a3d349e67986c2c7..1ce4e881f12f21beef00720534b44b5889b33037 100644 (file)
--- a/basis/math/statistics/statistics-tests.factor
+++ b/basis/math/statistics/statistics-tests.factor
@@ -1,6 +1,7 @@
-USING: arrays assocs kernel math math.functions math.statistics sequences
-math.order tools.test math.vectors ;
+USING: arrays kernel math math.functions math.order math.vectors
+sequences tools.test ;
  FROM: math.ranges => [a,b] ;
+IN: math.statistics
  
  { 3 } [ { 1 2 3 4 5 } 1 power-mean ] unit-test
  { t } [ { 1 2 3 4 5 } [ 2 power-mean ] [ quadratic-mean ] bi 1e-10 ~ ] unit-test
@@ -217,3 +218,9 @@ FROM: math.ranges => [a,b] ;
  ] unit-test
  
  { 15+1/2 } [ { 4 8 15 16 23 42 } trimean ] unit-test
+
+{ 0 } [ { } dcg ] unit-test
+{ 0 } [ { } ndcg ] unit-test
+
+{ t } [ { 3 2 3 0 1 2 } dcg 6.861126688593501 1e-6 ~ ] unit-test
+{ t } [ { 3 2 3 0 1 2 } ndcg 0.9608081943360615 1e-6 ~ ] unit-test
+\ No newline at end of file
diff --git a/basis/math/statistics/statistics.factor b/basis/math/statistics/statistics.factor

index 4950dd983cb5750762c7634cc8155274f719f1ac..f1f03d37d52d2004732854216f71f48454c5abc7 100644 (file)
--- a/basis/math/statistics/statistics.factor
+++ b/basis/math/statistics/statistics.factor
@@ -3,6 +3,7 @@
  USING: arrays assocs combinators fry generalizations grouping
  kernel locals math math.functions math.order math.vectors
  sequences sequences.private sorting ;
+FROM: math.ranges => [a,b] ;
  IN: math.statistics
  
  : power-mean ( seq p -- x )
@@ -370,3 +371,11 @@ PRIVATE>
  
  : z-score ( seq -- n )
      [ demean ] [ sample-std ] bi v/n ;
+
+: dcg ( scores -- dcg )
+    dup length 1 + 2 swap [a,b] [ log 2 log /f ] map v/ sum ;
+
+: ndcg ( scores -- ndcg )
+    [ 0 ] [
+        [ dcg ] [ natural-sort <reversed> dcg ] bi /f
+    ] if-empty ;
author	Doug Coleman <doug.coleman@gmail.com>
	Wed, 7 Oct 2020 03:36:39 +0000 (22:36 -0500)
committer	Doug Coleman <doug.coleman@gmail.com>
	Wed, 7 Oct 2020 03:46:12 +0000 (22:46 -0500)
basis/math/statistics/statistics-docs.factor		patch \| blob \| history
basis/math/statistics/statistics-tests.factor		patch \| blob \| history
basis/math/statistics/statistics.factor		patch \| blob \| history