]> gitweb.factorcode.org Git - factor.git/commitdiff
machine-learning.data-sets: adding the linnerud dataset.
authorJohn Benediktsson <mrjbq7@gmail.com>
Tue, 4 Dec 2012 18:21:04 +0000 (10:21 -0800)
committerJohn Benediktsson <mrjbq7@gmail.com>
Tue, 4 Dec 2012 18:21:04 +0000 (10:21 -0800)
extra/machine-learning/data-sets/data-sets.factor
extra/machine-learning/data-sets/linnerud.rst [new file with mode: 0644]
extra/machine-learning/data-sets/linnerud_exercise.csv [new file with mode: 0644]
extra/machine-learning/data-sets/linnerud_physiological.csv [new file with mode: 0644]

index d7172e97873abb09a8eff0e4b7ab9746cf9ce294..fa6ebf962220c1ba186984e7f8c66827c1b2125d 100644 (file)
@@ -1,8 +1,8 @@
 ! Copyright (C) 2012 John Benediktsson
 ! See http://factorcode.org/license.txt for BSD license
 
-USING: assocs csv io.encodings.utf8 io.files kernel math.parser
-sequences ;
+USING: accessors ascii assocs csv io.encodings.utf8 io.files
+kernel math.parser sequences splitting ;
 
 IN: machine-learning.data-sets
 
@@ -17,6 +17,11 @@ C: <data-set> data-set
     "resource:extra/machine-learning/data-sets/" prepend
     utf8 file-contents ;
 
+: load-table ( name -- data names )
+    load-file [ blank? ] trim string-lines
+    [ [ blank? ] split-when ] map unclip
+    [ [ [ string>number ] map ] map ] dip ;
+
 PRIVATE>
 
 : load-iris ( -- data-set )
@@ -32,3 +37,12 @@ PRIVATE>
         "sepal length (cm)" "sepal width (cm)"
         "petal length (cm)" "petal width (cm)"
     } <data-set> ;
+
+: load-linnerud ( -- data-set )
+    data-set new
+        "linnerud_exercise.csv" load-table
+        [ >>data ] [ >>feature-names ] bi*
+        "linnerud_physiological.csv" load-table
+        [ >>target ] [ >>target-names ] bi*
+        "linnerud.rst" load-file >>description ;
+
diff --git a/extra/machine-learning/data-sets/linnerud.rst b/extra/machine-learning/data-sets/linnerud.rst
new file mode 100644 (file)
index 0000000..10e2e1b
--- /dev/null
@@ -0,0 +1,21 @@
+Linnerrud dataset
+
+Notes
+-----
+Data Set Characteristics:
+    :Number of Instances: 20
+    :Number of Attributes: 3
+    :Missing Attribute Values: None
+
+The Linnerud dataset constains two small dataset:
+
+- *exercise*: A list containing the following components: exercise data with
+  20 observations on 3 exercise variables: Weight, Waist and Pulse.
+
+- *physiological*: Data frame with 20 observations on 3 physiological variables:
+   Chins, Situps and Jumps.
+
+References
+----------
+  * http://rgm2.lab.nig.ac.jp/RGM2/func.php?rd_id=mixOmics:linnerud
+  * Tenenhaus, M. (1998). La regression PLS: theorie et pratique. Paris: Editions Technic.
diff --git a/extra/machine-learning/data-sets/linnerud_exercise.csv b/extra/machine-learning/data-sets/linnerud_exercise.csv
new file mode 100644 (file)
index 0000000..ac0db1b
--- /dev/null
@@ -0,0 +1,21 @@
+Chins Situps Jumps
+5 162 60
+2 110 60
+12 101 101
+12 105 37
+13 155 58
+4 101 42
+8 101 38
+6 125 40
+15 200 40
+17 251 250
+17 120 38
+13 210 115
+14 215 105
+1 50 50
+6 70 31
+12 210 120
+4 60 25
+11 230 80
+15 225 73
+2 110 43
diff --git a/extra/machine-learning/data-sets/linnerud_physiological.csv b/extra/machine-learning/data-sets/linnerud_physiological.csv
new file mode 100644 (file)
index 0000000..68bd0cd
--- /dev/null
@@ -0,0 +1,21 @@
+Weight Waist Pulse
+191 36 50
+189 37 52
+193 38 58
+162 35 62
+189 35 46
+182 36 56
+211 38 56
+167 34 60
+176 31 74
+154 33 56
+169 34 50
+166 33 52
+154 34 64
+247 46 50
+193 36 46
+202 37 62
+176 37 54
+157 32 52
+156 33 54
+138 33 68