CONSTANT: url URL" http://downloads.factorcode.org/images/latest/"
: download-checksums ( -- alist )
- url "checksums.txt" >url derive-url http-get nip
+ url "checksums.txt" >url derive-url http-get*
string-lines [ " " split1 ] { } map>assoc ;
: file-checksum ( image -- checksum )
URL" http://www.oracle.com"\r
}\r
2 <semaphore> '[\r
- _ [\r
- http-get nip\r
- ] with-semaphore\r
+ _ [ http-get* ] with-semaphore\r
] parallel-map"""\r
} ;\r
\r
: download-feed ( url -- feed )
#! Retrieve an news syndication file, return as a feed tuple.
- http-get nip parse-feed ;
+ http-get* parse-feed ;
! Atom generation
] unless ;
: json-data ( url -- json )
- http-get nip json> check-status "data" of ;
+ http-get* json> check-status "data" of ;
: get-short-url ( short-url path -- data )
<bitly-url> swap "shortUrl" set-query-param json-data ;
PRIVATE>
: google-search ( query -- results )
- search-url http-get nip json>
+ search-url http-get* json>
{ "responseData" "results" } deep-at
[ \ search-result from-slots ] map ;
: translate-tts ( text -- file )
"http://translate.google.com/translate_tts?tl=en" >url
- swap "q" set-query-param http-get nip
+ swap "q" set-query-param http-get*
temporary-file ".mp3" append
[ binary set-file-contents ] keep ;
] map ;
: hacker-news-items ( -- seq )
- "http://api.ihackernews.com/page" http-get nip
+ "http://api.ihackernews.com/page" http-get*
json> "items" of items> ;
: write-title ( title url -- )
urls.encoding shuffle ;
IN: html.parser.analyzer
-: scrape-html ( url -- headers vector )
+: scrape-html ( url -- response vector )
http-get parse-html ;
: attribute ( tag string -- obj/f )
[ maybe-download-image drop ] [ file-checksum ] bi ;
: latest-counter ( -- counter )
- counter-url get-global http-get nip string>number ;
+ counter-url get-global http-get* string>number ;
: update-sources ( -- )
#! Must be run from builds-dir
TUPLE: page url data before after ;
: json-page ( url -- page )
- >url dup http-get nip json> "data" of {
+ >url dup http-get* json> "data" of {
[ "children" of [ parse-data ] map ]
[ "before" of [ f ] when-json-null ]
[ "after" of [ f ] when-json-null ]
: get-user-info ( username -- user )
"http://api.reddit.com/user/%s/about" sprintf
- http-get nip json> parse-data ;
+ http-get* json> parse-data ;
: get-url-info ( url -- page )
"http://api.reddit.com/api/info?url=%s" sprintf json-page ;
: >robots.txt-url ( url -- url' )
>url URL" robots.txt" derive-url ;
-: get-robots.txt ( url -- headers robots.txt )
- >robots.txt-url http-get ;
+: get-robots.txt ( url -- robots.txt )
+ >robots.txt-url http-get* ;
: normalize-robots.txt ( string -- sitemaps seq )
string-lines
] map ;
: url>robots ( url -- robots )
- >url
- dup get-robots.txt nip parse-robots.txt <robots> ;
+ >url dup get-robots.txt parse-robots.txt <robots> ;
! and popular such as CPAN for Perl or Boost for C++.
: web-scraping-main ( -- )
- "http://tycho.usno.navy.mil/cgi-bin/timer.pl" http-get nip
+ "http://tycho.usno.navy.mil/cgi-bin/timer.pl" http-get*
[ "UTC" swap start [ 9 - ] [ 1 - ] bi ] keep subseq print ;
MAIN: web-scraping-main
MEMO: load-http-image ( url -- image/f )
'[ _
- [ http-get [ check-response drop ] dip ]
- [ image-class ] bi load-image*
+ [ http-get* ] [ image-class ] bi load-image*
] [ drop f ] recover ;
: user-image ( user -- image/f )
: do-compile-url ( url -- response )
[
- absolute-url http-get nip 'expression' parse fjsc-compile write "();" write
+ absolute-url http-get* 'expression' parse fjsc-compile write "();" write
] with-string-writer
"application/javascript" <content> ;
"http://en.wikipedia.org/wiki/%s_%s" sprintf ;
: (historical-events) ( timestamp -- seq )
- historical-url http-get nip string>xml "ul" deep-tags-named ;
+ historical-url http-get* string>xml "ul" deep-tags-named ;
: items>sequence ( tag -- seq )
children-tags [ deep-children>string ] map ;
: article. ( name -- )
"http://en.wikipedia.org/wiki/%s" sprintf
- http-get nip parse-html "content" find-by-id-between
+ http-get* parse-html "content" find-by-id-between
[ html-text. ] with-string-writer string-lines
[ [ blank? ] trim ] map harvest [
R/ </ "<" re-replace
: query ( query -- xml )
url-encode wolfram-api-id get-global
"http://api.wolframalpha.com/v2/query?input=%s&appid=%s"
- sprintf http-get nip string>xml
+ sprintf http-get* string>xml
dup "error" tag-named [
"msg" tag-named children>string throw
] when* ;
<PRIVATE
: comic-image ( url -- image )
- http-get nip
+ http-get*
R" http://imgs\.xkcd\.com/comics/[^\.]+\.(png|jpg)"
first-match >string load-http-image ;
: comic-image. ( url -- ) comic-image image. ;
: comic-string ( url -- string )
- http-get nip string>xml
+ http-get* string>xml
"transcript" "id" deep-tag-with-attr children>string ;
: comic-text. ( url -- )
swap >>query ;
: yahoo-search ( search -- seq )
- query http-get nip string>xml parse-yahoo ;
+ query http-get* string>xml parse-yahoo ;