X-Git-Url: https://gitweb.factorcode.org/gitweb.cgi?p=factor.git;a=blobdiff_plain;f=extra%2Fmachine-learning%2Fdata-sets%2Fdata-sets.factor;fp=extra%2Fmachine-learning%2Fdata-sets%2Fdata-sets.factor;h=8de52112510cdea90acec6a7431e11947d675716;hp=d01cf51a2c79f16193ea9fac6eb75a39abb28083;hb=1a083937fb43db3c978df1960d41ae257ff449aa;hpb=722d92fbfdeff55645f4148d3016798afe4fd433 diff --git a/extra/machine-learning/data-sets/data-sets.factor b/extra/machine-learning/data-sets/data-sets.factor index d01cf51a2c..8de5211251 100644 --- a/extra/machine-learning/data-sets/data-sets.factor +++ b/extra/machine-learning/data-sets/data-sets.factor @@ -2,11 +2,11 @@ ! See https://factorcode.org/license.txt for BSD license USING: accessors arrays ascii assocs byte-arrays combinators -combinators.short-circuit concurrency.combinators csv grouping -http.client images images.viewer io io.directories -io.encodings.binary io.encodings.utf8 io.files io.launcher -io.pathnames kernel math math.parser namespaces sequences -splitting ui.gadgets.panes ; +concurrency.combinators csv grouping http.download images +images.viewer io io.directories io.encodings.binary +io.encodings.utf8 io.files io.launcher io.pathnames kernel math +math.parser namespaces sequences splitting ui.gadgets.panes ; + IN: machine-learning.data-sets TUPLE: data-set @@ -65,16 +65,6 @@ PRIVATE> [ >>targets ] [ >>target-names ] bi* "linnerud.rst" load-file >>description ; -: download-to-directory ( url directory -- ) - dup make-directories - [ - dup { [ download-name file-exists? ] [ file-stem file-exists? ] } 1|| [ - drop - ] [ - download - ] if - ] with-directory ; - : gzip-decompress-file ( path -- ) { "gzip" "-d" } swap suffix try-process ; @@ -98,15 +88,17 @@ PRIVATE> output-stream get stream-nl ] each ; +CONSTANT: datasets-path "resource:datasets/" + : load-mnist ( -- data-set ) - "resource:datasets" dup make-directories [ + datasets-path dup make-directories [ { - "https://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz" - "https://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz" - "https://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz" - "https://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz" + "https://github.com/golbin/TensorFlow-MNIST/raw/master/mnist/data/train-images-idx3-ubyte.gz" + "https://github.com/golbin/TensorFlow-MNIST/raw/master/mnist/data/train-labels-idx1-ubyte.gz" + "https://github.com/golbin/TensorFlow-MNIST/raw/master/mnist/data/t10k-images-idx3-ubyte.gz" + "https://github.com/golbin/TensorFlow-MNIST/raw/master/mnist/data/t10k-labels-idx1-ubyte.gz" } - [ [ "resource:datasets/" download-to-directory ] parallel-each ] + [ [ download-once ] parallel-each ] [ [ dup file-stem file-exists? [ drop ] [ file-name gzip-decompress-file ] if ] each ] [ [ file-stem binary file-contents ] map ] tri first4 {