{ $description "Constructs an HTTP TRACE request for the requested URL." }
{ $notes "The request can be passed on to " { $link http-request } ", possibly after cookies and headers are set." } ;
-HELP: download
-{ $values { "url" { $or url string } } }
-{ $description "Downloads the contents of the URL to a file in the " { $link current-directory } " having the same file name." }
-{ $errors "Throws an error if the HTTP request fails." } ;
-
-HELP: download-to
-{ $values { "url" { $or url string } } { "file" "a pathname string" } }
-{ $description "Downloads the contents of the URL to a file with the given pathname." }
-{ $errors "Throws an error if the HTTP request fails." } ;
-
-HELP: ?download-to
-{ $values { "url" { $or url string } } { "file" "a pathname string" } }
-{ $description "Version of " { $link download-to } " that only downloads if " { $snippet "file" } " does not exist." }
-{ $errors "Throws an error if the HTTP request fails." } ;
-
HELP: http-get
{ $values { "url" { $or url string } } { "response" response } { "data" sequence } }
{ $description "Downloads the contents of a URL." }
http-get
http-get*
}
-"Utilities to retrieve a " { $link url } " and save the contents to a file:"
-{ $subsections
- download
- download-to
- ?download-to
-}
+"To download to a file, see the " { $link "http.download" } " vocabulary."
+
"Advanced usage involves constructing a " { $link request } ", which allows " { $link "http.cookies" } " and " { $link "http.headers" } " to be set:"
{ $subsections
<get-request>
: http-get* ( url -- response data )
<get-request> http-request* ;
-: file-too-old? ( file duration -- ? )
- over file-exists? [
- [ file-info created>> ago ] dip after?
- ] [ 2drop t ] if ;
-
-: download-name ( url -- name )
- present file-name "?" split1 drop "/" ?tail drop ;
-
-: download-to ( url file -- )
- binary [
- <get-request> [ write ] with-http-request drop
- ] with-file-writer ;
-
-: ?download-to ( url file -- )
- dup file-exists? [ 2drop ] [ download-to ] if ;
-
-: ?download-update-to ( url file duration -- )
- 2dup file-too-old? [ drop download-to ] [ 3drop ] if ;
-
-: download ( url -- )
- dup download-name download-to ;
-
: <post-request> ( post-data url -- request )
"POST" <client-request>
swap >>post-data ;
--- /dev/null
+Doug Coleman
--- /dev/null
+! Copyright (C) 2024 Doug Coleman.
+! See https://factorcode.org/license.txt for BSD license.
+USING: calendar help.markup help.syntax io.pathnames kernel math
+strings urls ;
+IN: http.download
+
+HELP: download
+{ $values { "url" { $or url string } } { "path" "a pathname string" } }
+{ $description "Downloads the contents of the URL to a file in the " { $link current-directory } " having the same file name and returns the pathname." }
+{ $errors "Throws an error if the HTTP request fails." } ;
+
+HELP: download-to
+{ $values { "url" { $or url string } } { "file" "a pathname string" } { "path" "a pathname string" } }
+{ $description "Downloads the contents of the URL to a file with the given pathname and returns the pathname." }
+{ $errors "Throws an error if the HTTP request fails." } ;
+
+
+ARTICLE: "http.download" "HTTP Download Utilities"
+"The " { $vocab-link "http.download" } " vocabulary provides utilities for downloading files from the web."
+
+"Utilities to retrieve a " { $link url } " and save the contents to a file:"
+{ $subsections
+ download
+ download-to
+}
+;
+
+ABOUT: "http.download"
--- /dev/null
+! Copyright (C) 2024 Doug Coleman.
+! See https://factorcode.org/license.txt for BSD license.
+USING: accessors calendar checksums combinators.short-circuit
+http.client io io.backend io.directories io.encodings.binary
+io.files io.files.info io.files.unique io.pathnames kernel math
+math.order math.parser present sequences shuffle splitting ;
+IN: http.download
+
+: file-too-old-or-not-exists? ( file duration -- ? )
+ [ ?file-info [ created>> ] ?call ]
+ [ ago ] bi*
+ over [ before? ] [ 2drop t ] if ;
+
+: delete-when-old ( file duration -- deleted? )
+ dupd file-too-old-or-not-exists? [ ?delete-file t ] [ drop f ] if ;
+
+: file-matches-checksum? ( file checksum-type bytes -- ? )
+ [ checksum-file ] dip = ;
+
+: delete-when-checksum-mismatches ( file checksum-type bytes -- deleted? )
+ dupdd file-matches-checksum? [ drop f ] [ ?delete-file t ] if ;
+
+: file-size= ( path n -- ? ) [ ?file-info [ size>> ] ?call ] dip = ;
+
+: file-zero-size? ( path -- ? ) 0 file-size= ;
+
+: delete-when-zero-size ( path -- deleted? )
+ dup file-zero-size? [ ?delete-file t ] [ drop f ] if ;
+
+: delete-when-file-size-mismatches? ( file size -- deleted? )
+ dupd file-size= [ drop f ] [ ?delete-file t ] if ;
+
+: download-name ( url -- name )
+ present file-name "?" split1 drop "/" ?tail drop ;
+
+<PRIVATE
+
+: increment-file-extension ( path -- path' )
+ dup file-extension
+ [ ?tail drop ]
+ [
+ ?string>number
+ [ 1 + number>string append ]
+ [ ".1" 3append ] if
+ ] bi ;
+
+: ?parenthesis-number ( str -- n/str ? )
+ dup { [ "(" head? ] [ ")" tail? ] } 1&&
+ [ rest but-last ?string>number ] [ f ] if ;
+
+: increment-file-name ( path -- path' )
+ [
+ file-stem " " split1-last
+ ?parenthesis-number
+ [ 1 + number>string "(" ")" surround " " glue ]
+ [ "(1)" append " " glue ] if
+ ] [
+ file-extension
+ ] bi "." glue ;
+
+: find-next-incremented-name ( path -- path' )
+ dup file-exists? [
+ increment-file-name find-next-incremented-name
+ ] when ;
+
+: next-download-name ( url -- name )
+ download-name find-next-incremented-name ;
+
+: http-write-request ( url -- )
+ <get-request> [ write ] with-http-request drop ;
+
+: download-temporary-name ( url -- prefix suffix )
+ [ "temp." ".temp" ] dip download-name prepend ;
+
+: download-file-to ( url file -- path )
+ [
+ [ download-temporary-name binary ] keep
+ '[ _ http-write-request ] with-unique-file-writer
+ ] dip [ move-file ] keep ;
+
+PRIVATE>
+
+: download-to ( url file -- path )
+ dup file-exists? [ nip ] [ download-file-to ] if ;
+
+: download-outdated-to ( url file duration -- path )
+ 2dup delete-when-old [ drop download-to ] [ drop nip ] if ;
+
+: download ( url -- path )
+ dup download-name download-to ;
: parse-content-length-safe ( request -- content-length )
"content-length" header [
- dup string>number [
- nip dup 0 upload-limit get between? [
+ ?string>number [
+ dup 0 upload-limit get between? [
invalid-content-length
] unless
- ] [ invalid-content-length ] if*
+ ] [ invalid-content-length ] if
] [ content-length-missing ] if* ;
: parse-content ( request content-type -- post-data )