-USING: http http.server http.client http.client.private tools.test multiline
-io.streams.string io.encodings.utf8 io.encodings.8-bit
-io.encodings.binary io.encodings.string kernel arrays splitting
-sequences assocs io.sockets db db.sqlite continuations urls
-hashtables accessors namespaces xml.data ;
+USING: http http.server http.client http.client.private tools.test
+multiline io.streams.string io.encodings.utf8 io.encodings.8-bit
+io.encodings.binary io.encodings.string io.encodings.ascii kernel
+arrays splitting sequences assocs io.sockets db db.sqlite
+continuations urls hashtables accessors namespaces xml.data ;
IN: http.tests
[ "text/plain" latin1 ] [ "text/plain" parse-content-type ] unit-test
! Test basic auth
[ "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==" ] [ <request> "Aladdin" "open sesame" set-basic-auth "Authorization" header ] unit-test
+! Test a corner case with static responder
+[ ] [
+ <dispatcher>
+ add-quit-action
+ "vocab:http/test/foo.html" <static> >>default
+ test-httpd
+] unit-test
+
+[ t ] [
+ "http://localhost/" add-port http-get nip
+ "vocab:http/test/foo.html" ascii file-contents =
+] unit-test
+
+[ ] [ "http://localhost/quit" add-port http-get 2drop ] unit-test
+
+! Check behavior of 307 redirect (reported by Chris Double)
+[ ] [
+ <dispatcher>
+ add-quit-action
+ <action>
+ [ "b" <temporary-redirect> ] >>submit
+ "a" add-responder
+ <action>
+ [
+ request get post-data>> data>> "data" =
+ [ "OK" "text/plain" <content> ] [ "OOPS" throw ] if
+ ] >>submit
+ "b" add-responder
+ test-httpd
+] unit-test
+
+[ "OK" ] [ "data" "http://localhost/a" add-port http-post nip ] unit-test
+[ ] [ "http://localhost/quit" add-port http-get 2drop ] unit-test
\ No newline at end of file
! Copyright (C) 2009 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
-USING: calendar io.encodings.utf8 io.files robots tools.test ;
+USING: calendar io.encodings.utf8 io.files robots tools.test
+urls ;
IN: robots.tests
[
-{ "http://www.chiplist.com/sitemap.txt" }
-{
- T{ rules
- { user-agents V{ "*" } }
- { allows V{ } }
- { disallows
- V{
- "/cgi-bin/"
- "/scripts/"
- "/ChipList2/scripts/"
- "/ChipList2/styles/"
- "/ads/"
- "/ChipList2/ads/"
- "/advertisements/"
- "/ChipList2/advertisements/"
- "/graphics/"
- "/ChipList2/graphics/"
+ { "http://www.chiplist.com/sitemap.txt" }
+ {
+ T{ rules
+ { user-agents V{ "*" } }
+ { allows V{ } }
+ { disallows
+ V{
+ URL" /cgi-bin/"
+ URL" /scripts/"
+ URL" /ChipList2/scripts/"
+ URL" /ChipList2/styles/"
+ URL" /ads/"
+ URL" /ChipList2/ads/"
+ URL" /advertisements/"
+ URL" /ChipList2/advertisements/"
+ URL" /graphics/"
+ URL" /ChipList2/graphics/"
+ }
}
- }
- { visit-time
- {
- T{ timestamp { hour 2 } }
- T{ timestamp { hour 5 } }
+ { visit-time
+ {
+ T{ timestamp { hour 2 } }
+ T{ timestamp { hour 5 } }
+ }
}
+ { request-rate 1 }
+ { crawl-delay 1 }
+ { unknowns H{ } }
}
- { request-rate 1 }
- { crawl-delay 1 }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "UbiCrawler" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "DOC" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "Zao" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "sitecheck.internetseer.com" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "Zealbot" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "MSIECrawler" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "SiteSnagger" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "WebStripper" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "WebCopier" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "Fetch" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "Offline Explorer" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "Teleport" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "TeleportPro" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "WebZIP" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "linko" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "HTTrack" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "Microsoft.URL.Control" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "Xenu" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "larbin" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "libwww" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "ZyBORG" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "Download Ninja" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "wget" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "grub-client" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "k2spider" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "NPBot" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents V{ "WebReaper" } }
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
- }
- T{ rules
- { user-agents
- V{
- "abot"
- "ALeadSoftbot"
- "BeijingCrawler"
- "BilgiBot"
- "bot"
- "botlist"
- "BOTW Spider"
- "bumblebee"
- "Bumblebee"
- "BuzzRankingBot"
- "Charlotte"
- "Clushbot"
- "Crawler"
- "CydralSpider"
- "DataFountains"
- "DiamondBot"
- "Dulance bot"
- "DYNAMIC"
- "EARTHCOM.info"
- "EDI"
- "envolk"
- "Exabot"
- "Exabot-Images"
- "Exabot-Test"
- "exactseek-pagereaper"
- "Exalead NG"
- "FANGCrawl"
- "Feed::Find"
- "flatlandbot"
- "Gigabot"
- "GigabotSiteSearch"
- "GurujiBot"
- "Hatena Antenna"
- "Hatena Bookmark"
- "Hatena RSS"
- "HatenaScreenshot"
- "Helix"
- "HiddenMarket"
- "HyperEstraier"
- "iaskspider"
- "IIITBOT"
- "InfociousBot"
- "iVia"
- "iVia Page Fetcher"
- "Jetbot"
- "Kolinka Forum Search"
- "KRetrieve"
- "LetsCrawl.com"
- "Lincoln State Web Browser"
- "Links4US-Crawler"
- "LOOQ"
- "Lsearch/sondeur"
- "MapoftheInternet.com"
- "NationalDirectory"
- "NetCarta_WebMapper"
- "NewsGator"
- "NextGenSearchBot"
- "ng"
- "nicebot"
- "NP"
- "NPBot"
- "Nudelsalat"
- "Nutch"
- "OmniExplorer_Bot"
- "OpenIntelligenceData"
- "Oracle Enterprise Search"
- "Pajaczek"
- "panscient.com"
- "PeerFactor 404 crawler"
- "PeerFactor Crawler"
- "PlantyNet"
- "PlantyNet_WebRobot"
- "plinki"
- "PMAFind"
- "Pogodak!"
- "QuickFinder Crawler"
- "Radiation Retriever"
- "Reaper"
- "RedCarpet"
- "ScorpionBot"
- "Scrubby"
- "Scumbot"
- "searchbot"
- "Seeker.lookseek.com"
- "SeznamBot"
- "ShowXML"
- "snap.com"
- "snap.com beta crawler"
- "Snapbot"
- "SnapPreviewBot"
- "sohu"
- "SpankBot"
- "Speedy Spider"
- "Speedy_Spider"
- "SpeedySpider"
- "spider"
- "SquigglebotBot"
- "SurveyBot"
- "SynapticSearch"
- "T-H-U-N-D-E-R-S-T-O-N-E"
- "Talkro Web-Shot"
- "Tarantula"
- "TerrawizBot"
- "TheInformant"
- "TMCrawler"
- "TridentSpider"
- "Tutorial Crawler"
- "Twiceler"
- "unwrapbot"
- "URI::Fetch"
- "VengaBot"
- "Vonna.com b o t"
- "Vortex"
- "Votay bot"
- "WebAlta Crawler"
- "Webbot"
- "Webclipping.com"
- "WebCorp"
- "Webinator"
- "WIRE"
- "WISEbot"
- "Xerka WebBot"
- "XSpider"
- "YodaoBot"
- "Yoono"
- "yoono"
+ T{ rules
+ { user-agents V{ "UbiCrawler" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "DOC" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "Zao" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "sitecheck.internetseer.com" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "Zealbot" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "MSIECrawler" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "SiteSnagger" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "WebStripper" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "WebCopier" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "Fetch" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "Offline Explorer" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "Teleport" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "TeleportPro" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "WebZIP" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "linko" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "HTTrack" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "Microsoft.URL.Control" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "Xenu" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "larbin" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "libwww" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "ZyBORG" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "Download Ninja" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "wget" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "grub-client" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "k2spider" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "NPBot" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents V{ "WebReaper" } }
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
+ }
+ T{ rules
+ { user-agents
+ V{
+ "abot"
+ "ALeadSoftbot"
+ "BeijingCrawler"
+ "BilgiBot"
+ "bot"
+ "botlist"
+ "BOTW Spider"
+ "bumblebee"
+ "Bumblebee"
+ "BuzzRankingBot"
+ "Charlotte"
+ "Clushbot"
+ "Crawler"
+ "CydralSpider"
+ "DataFountains"
+ "DiamondBot"
+ "Dulance bot"
+ "DYNAMIC"
+ "EARTHCOM.info"
+ "EDI"
+ "envolk"
+ "Exabot"
+ "Exabot-Images"
+ "Exabot-Test"
+ "exactseek-pagereaper"
+ "Exalead NG"
+ "FANGCrawl"
+ "Feed::Find"
+ "flatlandbot"
+ "Gigabot"
+ "GigabotSiteSearch"
+ "GurujiBot"
+ "Hatena Antenna"
+ "Hatena Bookmark"
+ "Hatena RSS"
+ "HatenaScreenshot"
+ "Helix"
+ "HiddenMarket"
+ "HyperEstraier"
+ "iaskspider"
+ "IIITBOT"
+ "InfociousBot"
+ "iVia"
+ "iVia Page Fetcher"
+ "Jetbot"
+ "Kolinka Forum Search"
+ "KRetrieve"
+ "LetsCrawl.com"
+ "Lincoln State Web Browser"
+ "Links4US-Crawler"
+ "LOOQ"
+ "Lsearch/sondeur"
+ "MapoftheInternet.com"
+ "NationalDirectory"
+ "NetCarta_WebMapper"
+ "NewsGator"
+ "NextGenSearchBot"
+ "ng"
+ "nicebot"
+ "NP"
+ "NPBot"
+ "Nudelsalat"
+ "Nutch"
+ "OmniExplorer_Bot"
+ "OpenIntelligenceData"
+ "Oracle Enterprise Search"
+ "Pajaczek"
+ "panscient.com"
+ "PeerFactor 404 crawler"
+ "PeerFactor Crawler"
+ "PlantyNet"
+ "PlantyNet_WebRobot"
+ "plinki"
+ "PMAFind"
+ "Pogodak!"
+ "QuickFinder Crawler"
+ "Radiation Retriever"
+ "Reaper"
+ "RedCarpet"
+ "ScorpionBot"
+ "Scrubby"
+ "Scumbot"
+ "searchbot"
+ "Seeker.lookseek.com"
+ "SeznamBot"
+ "ShowXML"
+ "snap.com"
+ "snap.com beta crawler"
+ "Snapbot"
+ "SnapPreviewBot"
+ "sohu"
+ "SpankBot"
+ "Speedy Spider"
+ "Speedy_Spider"
+ "SpeedySpider"
+ "spider"
+ "SquigglebotBot"
+ "SurveyBot"
+ "SynapticSearch"
+ "T-H-U-N-D-E-R-S-T-O-N-E"
+ "Talkro Web-Shot"
+ "Tarantula"
+ "TerrawizBot"
+ "TheInformant"
+ "TMCrawler"
+ "TridentSpider"
+ "Tutorial Crawler"
+ "Twiceler"
+ "unwrapbot"
+ "URI::Fetch"
+ "VengaBot"
+ "Vonna.com b o t"
+ "Vortex"
+ "Votay bot"
+ "WebAlta Crawler"
+ "Webbot"
+ "Webclipping.com"
+ "WebCorp"
+ "Webinator"
+ "WIRE"
+ "WISEbot"
+ "Xerka WebBot"
+ "XSpider"
+ "YodaoBot"
+ "Yoono"
+ "yoono"
+ }
}
+ { allows V{ } }
+ { disallows V{ URL" /" } }
+ { unknowns H{ } }
}
- { allows V{ } }
- { disallows V{ "/" } }
- { unknowns H{ } }
}
-}
] [ "vocab:robots/robots.txt" utf8 file-contents parse-robots.txt ] unit-test