]> gitweb.factorcode.org Git - factor.git/blobdiff - extra/machine-learning/data-sets/data-sets.factor
http.download: fix for new changes
[factor.git] / extra / machine-learning / data-sets / data-sets.factor
index d01cf51a2c79f16193ea9fac6eb75a39abb28083..8de52112510cdea90acec6a7431e11947d675716 100644 (file)
@@ -2,11 +2,11 @@
 ! See https://factorcode.org/license.txt for BSD license
 
 USING: accessors arrays ascii assocs byte-arrays combinators
-combinators.short-circuit concurrency.combinators csv grouping
-http.client images images.viewer io io.directories
-io.encodings.binary io.encodings.utf8 io.files io.launcher
-io.pathnames kernel math math.parser namespaces sequences
-splitting ui.gadgets.panes ;
+concurrency.combinators csv grouping http.download images
+images.viewer io io.directories io.encodings.binary
+io.encodings.utf8 io.files io.launcher io.pathnames kernel math
+math.parser namespaces sequences splitting ui.gadgets.panes ;
+
 IN: machine-learning.data-sets
 
 TUPLE: data-set
@@ -65,16 +65,6 @@ PRIVATE>
         [ >>targets ] [ >>target-names ] bi*
         "linnerud.rst" load-file >>description ;
 
-: download-to-directory ( url directory -- )
-    dup make-directories
-    [
-        dup { [ download-name file-exists? ] [ file-stem file-exists? ] } 1|| [
-            drop
-        ] [
-            download
-        ] if
-    ] with-directory ;
-
 : gzip-decompress-file ( path -- )
     { "gzip" "-d" } swap suffix try-process ;
 
@@ -98,15 +88,17 @@ PRIVATE>
         output-stream get stream-nl
     ] each ;
 
+CONSTANT: datasets-path "resource:datasets/"
+
 : load-mnist ( -- data-set )
-    "resource:datasets" dup make-directories [
+    datasets-path dup make-directories [
         {
-            "https://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz"
-            "https://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz"
-            "https://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz"
-            "https://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz"
+            "https://github.com/golbin/TensorFlow-MNIST/raw/master/mnist/data/train-images-idx3-ubyte.gz"
+            "https://github.com/golbin/TensorFlow-MNIST/raw/master/mnist/data/train-labels-idx1-ubyte.gz"
+            "https://github.com/golbin/TensorFlow-MNIST/raw/master/mnist/data/t10k-images-idx3-ubyte.gz"
+            "https://github.com/golbin/TensorFlow-MNIST/raw/master/mnist/data/t10k-labels-idx1-ubyte.gz"
         }
-        [ [ "resource:datasets/" download-to-directory ] parallel-each ]
+        [ [ download-once ] parallel-each ]
         [ [ dup file-stem file-exists? [ drop ] [ file-name gzip-decompress-file ] if ] each ]
         [ [ file-stem binary file-contents ] map ] tri
         first4 {