! Copyright (C) 2008 Doug Coleman, Michael Judge, Loryn Jenkins.
! See http://factorcode.org/license.txt for BSD license.
-USING: assocs combinators generalizations kernel locals math
-math.functions math.order math.vectors math.ranges sequences
-sequences.private sorting fry arrays grouping sets
-splitting.monotonic ;
+USING: accessors arrays assocs combinators
+combinators.short-circuit fry generalizations grouping kernel
+locals math math.functions math.order ranges math.vectors
+sequences sequences.private sorting ;
IN: math.statistics
: power-mean ( seq p -- x )
! Delta in degrees-of-freedom
: mean-ddof ( seq ddof -- x )
[ [ sum ] [ length ] bi ] dip -
- dup zero? [ 2drop 0 ] [ / ] if ; inline
+ [ drop 0 ] [ / ] if-zero ; inline
: mean ( seq -- x )
0 mean-ddof ; inline
-: unbiased-mean ( seq -- x )
- 1 mean-ddof ; inline
-
-: sum-of-squares ( seq -- x )
- [ sq ] map-sum ; inline
+: meanest ( seq -- x )
+ [ mean ] keep [ - abs ] with infimum-by ;
+
+GENERIC: sum-of-squares ( seq -- x )
+M: object sum-of-squares [ sq ] map-sum ;
+M: iota sum-of-squares
+ n>> 1 - [ ] [ 1 + ] [ 1/2 + ] tri * * 3 / ;
+M: ranges:range sum-of-squares
+ dup { [ step>> 1 = ] [ from>> integer? ] } 1&& [
+ [ from>> ] [ length>> ] bi dupd +
+ [ <iota> sum-of-squares ] bi@ swap -
+ ] [ call-next-method ] if ;
+
+GENERIC: sum-of-cubes ( seq -- x )
+M: object sum-of-cubes [ 3 ^ ] map-sum ;
+M: iota sum-of-cubes sum sq ;
+M: ranges:range sum-of-cubes
+ dup { [ step>> 1 = ] [ from>> integer? ] } 1&& [
+ [ from>> ] [ length>> ] bi dupd +
+ [ <iota> sum-of-cubes ] bi@ swap -
+ ] [ call-next-method ] if ;
+
+GENERIC: sum-of-quads ( seq -- x )
+M: object sum-of-quads [ 4 ^ ] map-sum ;
+M: iota sum-of-quads
+ [let n>> 1 - :> n
+ n 0 > [
+ n
+ n 1 +
+ n 2 * 1 +
+ n sq 3 * n 3 * + 1 -
+ * * * 30 /
+ ] [ 0 ] if
+ ] ;
+M: ranges:range sum-of-quads
+ dup { [ step>> 1 = ] [ from>> integer? ] } 1&& [
+ [ from>> ] [ length>> ] bi dupd +
+ [ <iota> sum-of-quads ] bi@ swap -
+ ] [ call-next-method ] if ;
: sum-of-squared-errors ( seq -- x )
[ mean ] keep [ - sq ] with map-sum ; inline
[ sum-of-squares ] [ length ] bi / sqrt ; inline
: geometric-mean ( seq -- x )
- [ length ] [ product ] bi nth-root ; inline
+ [ [ log ] map-sum ] [ length ] bi /f e^ ; inline
: harmonic-mean ( seq -- x )
- [ recip ] map-sum recip ; inline
+ [ [ recip ] map-sum ] [ length swap / ] bi ; inline
: contraharmonic-mean ( seq -- x )
[ sum-of-squares ] [ sum ] bi / ; inline
<PRIVATE
-:: ((kth-object)) ( seq k nth-quot exchange-quot quot: ( x y -- ? ) -- elt )
- #! Wirth's method, Algorithm's + Data structues = Programs p. 84
+:: kth-object-impl ( seq k nth-quot exchange-quot quot: ( x y -- ? ) -- elt )
+ ! Wirth's method, Algorithm's + Data structues = Programs p. 84
k seq bounds-check 2drop
0 :> i!
0 :> j!
k seq nth-unsafe ; inline
: (kth-object) ( seq k nth-quot exchange-quot quot: ( x y -- ? ) -- elt )
- #! The algorithm modifiers seq, so we clone it
- [ { } clone-like ] 4dip ((kth-object)) ; inline
+ ! The algorithm modifies seq, so we clone it
+ [ >array ] 4dip kth-object-impl ; inline
: kth-object-unsafe ( seq k quot: ( x y -- ? ) -- elt )
[ [ nth-unsafe ] [ exchange-unsafe ] ] dip (kth-object) ; inline
} case
] each ;
-: lower-median-index ( seq -- n )
+: lower-median-index ( seq -- n )
[ midpoint@ ]
[ length odd? [ 1 - ] unless ] bi ;
! could subtract 1 from a
: quantile-x ( a b N q -- x )
- [ + ] dip * + 1 [-] ; inline
+ [ + ] dip * + 1 - ; inline
! 2+1/4 frac is 1/4
: frac ( x -- x' )
>fraction [ /mod nip ] keep / ; inline
-:: quantile-indices ( seq qs a b c d -- seq )
+:: quantile-indices ( seq qs a b -- seq )
qs [ [ a b seq length ] dip quantile-x ] map ;
:: qabcd ( y-floor y-ceiling x c d -- qabcd )
y-floor y-ceiling y-floor - c d x frac * + * + ;
:: quantile-abcd ( seq qs a b c d -- quantile )
- seq qs a b c d quantile-indices :> indices
- indices [ [ floor ] [ ceiling ] bi 2array ] map
+ seq qs a b quantile-indices :> indices
+ indices [ [ floor 0 max ] [ ceiling seq length 1 - min ] bi 2array ] map
concat :> index-pairs
seq index-pairs kth-smallests
: quartile ( seq -- seq' )
{ 1/4 1/2 3/4 } quantile5 ;
-<PRIVATE
-
-: (sequence>assoc) ( seq map-quot insert-quot assoc -- assoc )
- [ swap curry compose each ] keep ; inline
-
-: (sequence-index>assoc) ( seq map-quot insert-quot assoc -- assoc )
- [ swap curry compose each-index ] keep ; inline
+: interquartile ( seq -- q1 q3 )
+ quartile [ first ] [ last ] bi ;
-PRIVATE>
-
-: sequence>assoc! ( assoc seq map-quot: ( x -- ..y ) insert-quot: ( ..y assoc -- ) -- assoc )
- 4 nrot (sequence>assoc) ; inline
+: interquartile-range ( seq -- n )
+ interquartile - ;
-: sequence>assoc ( seq map-quot insert-quot exemplar -- assoc )
- clone (sequence>assoc) ; inline
+: midhinge ( seq -- n )
+ interquartile + 2 / ;
-: sequence-index>assoc ( seq map-quot insert-quot exemplar -- assoc )
- clone (sequence-index>assoc) ; inline
+: trimean ( seq -- x )
+ quartile first3 [ 2 * ] dip + + 4 / ;
-: sequence-index>hashtable ( seq map-quot insert-quot -- hashtable )
- H{ } sequence-index>assoc ; inline
-
-: sequence>hashtable ( seq map-quot insert-quot -- hashtable )
- H{ } sequence>assoc ; inline
+: histogram-by! ( assoc seq quot: ( x -- bin ) -- hashtable )
+ rot [ '[ @ _ inc-at ] each ] keep ; inline
: histogram! ( hashtable seq -- hashtable )
- [ ] [ inc-at ] sequence>assoc! ;
+ [ ] histogram-by! ; inline
: histogram-by ( seq quot: ( x -- bin ) -- hashtable )
- [ inc-at ] sequence>hashtable ; inline
+ [ H{ } clone ] 2dip histogram-by! ; inline
: histogram ( seq -- hashtable )
[ ] histogram-by ;
: normalized-histogram ( seq -- alist )
[ histogram ] [ length ] bi '[ _ / ] assoc-map ;
-: collect-index-by ( ... seq quot: ( ... obj -- ... key ) -- ... hashtable )
- [ dip swap ] curry [ push-at ] sequence-index>hashtable ; inline
-
-: collect-by ( ... seq quot: ( ... obj -- ... key ) -- ... hashtable )
- [ keep swap ] curry [ push-at ] sequence>hashtable ; inline
-
: equal-probabilities ( n -- array )
dup recip <array> ; inline
histogram >alist [ second ] supremum-by first ;
: minmax ( seq -- min max )
- [ first dup ] keep [ [ min ] [ max ] bi-curry bi* ] each ;
+ [ first dup ] keep [ [ min ] [ max ] bi-curry bi* ] 1 each-from ;
: range ( seq -- x )
minmax swap - ;
+: fivenum ( seq -- seq' )
+ [ quartile ] [ minmax ] bi [ prefix ] [ suffix ] bi* ;
+
: var-ddof ( seq n -- x )
2dup [ length ] dip - 0 <= [
2drop 0
: sample-var ( seq -- x ) 1 var-ddof ; inline
-: std-ddof ( seq n -- x )
- var-ddof sqrt ; inline
+: std-ddof ( seq n -- x ) var-ddof sqrt ; inline
: population-std ( seq -- x ) 0 std-ddof ; inline
: sample-ste ( seq -- x ) 1 ste-ddof ;
-: ((r)) ( mean(x) mean(y) {x} {y} -- (r) )
+<PRIVATE
+: r-sum-diffs ( x-mean y-mean x-seq y-seq -- (r) )
! finds sigma((xi-mean(x))(yi-mean(y))
- 0 [ [ [ pick ] dip swap - ] bi@ * + ] 2reduce 2nip ;
+ 0 [ [ reach - ] bi@ * + ] 2reduce 2nip ;
-: (r) ( mean(x) mean(y) {x} {y} sx sy -- r )
- * recip [ [ ((r)) ] keep length 1 - / ] dip * ;
+: (r) ( x-mean y-mean x-seq y-seq x-std y-std -- r )
+ * recip [ [ r-sum-diffs ] keep length 1 - / ] dip * ;
-: [r] ( {{x,y}...} -- mean(x) mean(y) {x} {y} sx sy )
+: r-stats ( xy-pairs -- x-mean y-mean x-seq y-seq x-std y-std )
first2 [ [ [ mean ] bi@ ] 2keep ] 2keep [ population-std ] bi@ ;
+PRIVATE>
-: r ( {{x,y}...} -- r )
- [r] (r) ;
-
-: r^2 ( {{x,y}...} -- r )
- r sq ;
+: pearson-r ( xy-pairs -- r ) r-stats (r) ;
-: least-squares ( {{x,y}...} -- alpha beta )
- [r] { [ 2dup ] [ ] [ ] [ ] [ ] } spread
- ! stack is mean(x) mean(y) mean(x) mean(y) {x} {y} sx sy
+: least-squares ( xy-pairs -- alpha beta )
+ r-stats [ 2dup ] 4dip
+ ! stack is x-mean y-mean x-mean y-mean x-seq y-seq x-std y-std
[ (r) ] 2keep ! stack is mean(x) mean(y) r sx sy
swap / * ! stack is mean(x) mean(y) beta
[ swapd * - ] keep ;
-: cov-ddof ( {x} {y} ddof -- cov )
+: cov-ddof ( x-seq y-seq ddof -- cov )
[ [ demean ] bi@ v* ] dip mean-ddof ;
-: population-cov ( {x} {y} -- cov ) 0 cov-ddof ; inline
+: population-cov ( x-seq y-seq -- cov ) 0 cov-ddof ; inline
-: sample-cov ( {x} {y} -- cov ) 1 cov-ddof ; inline
+: sample-cov ( x-seq y-seq -- cov ) 1 cov-ddof ; inline
-: corr-ddof ( {x} {y} n -- corr )
+: corr-ddof ( x-seq y-seq n -- corr )
[ [ population-cov ] ] dip
'[ [ _ var-ddof ] bi@ * sqrt ] 2bi / ;
-: population-corr ( {x} {y} -- corr ) 0 corr-ddof ; inline
-
-: sample-corr ( {x} {y} -- corr ) 1 corr-ddof ; inline
+: population-corr ( x-seq y-seq -- corr ) 0 corr-ddof ; inline
-: cum-map ( seq identity quot: ( prev elt -- next ) -- seq' )
- swapd [ dup ] compose map nip ; inline
+: sample-corr ( x-seq y-seq -- corr ) 1 corr-ddof ; inline
: cum-sum ( seq -- seq' )
- 0 [ + ] cum-map ;
+ 0 [ + ] accumulate* ;
: cum-sum0 ( seq -- seq' )
0 [ + ] accumulate nip ;
: cum-product ( seq -- seq' )
- 1 [ * ] cum-map ;
+ 1 [ * ] accumulate* ;
+
+: cum-product1 ( seq -- seq' )
+ 1 [ * ] accumulate nip ;
: cum-mean ( seq -- seq' )
0 swap [ [ + dup ] dip 1 + / ] map-index nip ;
-: cum-count ( seq quot -- seq' )
- [ 0 ] dip '[ _ call [ 1 + ] when ] cum-map ; inline
+: cum-count ( seq quot: ( elt -- ? ) -- seq' )
+ [ 0 ] dip '[ @ [ 1 + ] when ] accumulate* ; inline
: cum-min ( seq -- seq' )
- dup ?first [ min ] cum-map ;
+ dup ?first [ min ] accumulate* ;
: cum-max ( seq -- seq' )
- dup ?first [ max ] cum-map ;
+ dup ?first [ max ] accumulate* ;
: entropy ( probabilities -- n )
dup sum '[ _ / dup log * ] map-sum neg ;
: z-score ( seq -- n )
[ demean ] [ sample-std ] bi v/n ;
+
+: dcg ( scores -- dcg )
+ dup length 1 + 2 swap [a..b] [ log 2 log /f ] map v/ sum ;
+
+: ndcg ( scores -- ndcg )
+ [ 0.0 ] [
+ dup dcg [
+ drop 0.0
+ ] [
+ swap natural-sort <reversed> dcg /f
+ ] if-zero
+ ] if-empty ;