]> gitweb.factorcode.org Git - factor.git/commitdiff
Merge branch 'master' of git://factorcode.org/git/factor
authorJohn Benediktsson <mrjbq7@gmail.com>
Tue, 7 Apr 2009 20:59:59 +0000 (13:59 -0700)
committerJohn Benediktsson <mrjbq7@gmail.com>
Tue, 7 Apr 2009 20:59:59 +0000 (13:59 -0700)
basis/http/client/client.factor
basis/http/http-tests.factor
basis/http/server/static/static.factor
basis/io/streams/limited/limited-tests.factor
basis/io/streams/limited/limited.factor
extra/robots/robots-tests.factor
extra/robots/robots.factor
extra/site-watcher/site-watcher-tests.factor

index 4099e3d84cc35fea26d929db7ed75a060d13c1b2..805929d27b0a109f9726611da4261ac11a79b64a 100644 (file)
@@ -6,7 +6,7 @@ math.order hashtables byte-arrays destructors
 io io.sockets io.streams.string io.files io.timeouts
 io.pathnames io.encodings io.encodings.string io.encodings.ascii
 io.encodings.utf8 io.encodings.8-bit io.encodings.binary io.crlf
-io.streams.duplex fry ascii urls urls.encoding present
+io.streams.duplex fry ascii urls urls.encoding present locals
 http http.parsers http.client.post-data ;
 IN: http.client
 
@@ -77,12 +77,13 @@ SYMBOL: redirects
 : redirect? ( response -- ? )
     code>> 300 399 between? ;
 
-: do-redirect ( quot: ( chunk -- ) response -- response )
+:: do-redirect ( quot: ( chunk -- ) response -- response )
     redirects inc
     redirects get max-redirects < [
         request get clone
-        swap "location" header redirect-url
-        "GET" >>method swap (with-http-request)
+        response "location" header redirect-url
+        response code>> 307 = [ "GET" >>method ] unless
+        quot (with-http-request)
     ] [ too-many-redirects ] if ; inline recursive
 
 : read-chunk-size ( -- n )
index bc906fad44b3e4862e84d8ca15b33c0ea1f5eb97..da50a6f85f3a63a6dd2c656b04c6138c269d5e8b 100644 (file)
@@ -1,8 +1,8 @@
-USING: http http.server http.client http.client.private tools.test multiline
-io.streams.string io.encodings.utf8 io.encodings.8-bit
-io.encodings.binary io.encodings.string kernel arrays splitting
-sequences assocs io.sockets db db.sqlite continuations urls
-hashtables accessors namespaces xml.data ;
+USING: http http.server http.client http.client.private tools.test
+multiline io.streams.string io.encodings.utf8 io.encodings.8-bit
+io.encodings.binary io.encodings.string io.encodings.ascii kernel
+arrays splitting sequences assocs io.sockets db db.sqlite
+continuations urls hashtables accessors namespaces xml.data ;
 IN: http.tests
 
 [ "text/plain" latin1 ] [ "text/plain" parse-content-type ] unit-test
@@ -359,4 +359,37 @@ SYMBOL: a
 ! Test basic auth
 [ "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==" ] [ <request> "Aladdin" "open sesame" set-basic-auth "Authorization" header ] unit-test
 
+! Test a corner case with static responder
+[ ] [
+    <dispatcher>
+        add-quit-action
+        "vocab:http/test/foo.html" <static> >>default
+    test-httpd
+] unit-test
+
+[ t ] [
+    "http://localhost/" add-port http-get nip
+    "vocab:http/test/foo.html" ascii file-contents =
+] unit-test
+
+[ ] [ "http://localhost/quit" add-port http-get 2drop ] unit-test
+
+! Check behavior of 307 redirect (reported by Chris Double)
+[ ] [
+    <dispatcher>
+        add-quit-action
+        <action>
+            [ "b" <temporary-redirect> ] >>submit
+        "a" add-responder
+        <action>
+            [
+                request get post-data>> data>> "data" =
+                [ "OK" "text/plain" <content> ] [ "OOPS" throw ] if
+            ] >>submit
+        "b" add-responder
+    test-httpd
+] unit-test
+
+[ "OK" ] [ "data" "http://localhost/a" add-port http-post nip ] unit-test
 
+[ ] [ "http://localhost/quit" add-port http-get 2drop ] unit-test
\ No newline at end of file
index bbca70d84591dab79913dc8a0ebf68f107c69d9b..f80a3cc7cde7338549bbedbae949cf1d354ac6f1 100644 (file)
@@ -47,8 +47,8 @@ TUPLE: file-responder root hook special allow-listings ;
     if ;\r
 \r
 : serving-path ( filename -- filename )\r
-    [ file-responder get root>> trim-tail-separators "/" ] dip\r
-    "" or trim-head-separators 3append ;\r
+    [ file-responder get root>> trim-tail-separators ] dip\r
+    [ "/" swap trim-head-separators 3append ] unless-empty ;\r
 \r
 : serve-file ( filename -- response )\r
     dup mime-type\r
index 36c257fb5eaa78aeb8f6dd1b734ca316724b082c..86d652d17c52e5d438e8ce7bfb929455ace4498d 100644 (file)
@@ -76,3 +76,9 @@ IN: io.streams.limited.tests
         [ decoder? ] both?
     ] with-destructors
 ] unit-test
+
+[ "HELL" ] [
+    "HELLO"
+    [ f stream-throws limit-input 4 read ]
+    with-string-reader
+] unit-test
\ No newline at end of file
index fe3dd9ad9319589dd3ff6f43b04a6a59020324d3..b1b07a08c07c2c288d658f906c7547b07664d532 100755 (executable)
@@ -22,7 +22,7 @@ M: decoder limit ( stream limit mode -- stream' )
     [ clone ] 2dip '[ _ _ limit ] change-stream ;
 
 M: object limit ( stream limit mode -- stream' )
-    <limited-stream> ;
+    over [ <limited-stream> ] [ 2drop ] if ;
 
 GENERIC: unlimited ( stream -- stream' )
 
@@ -32,9 +32,11 @@ M: decoder unlimited ( stream -- stream' )
 M: object unlimited ( stream -- stream' )
     stream>> stream>> ;
 
-: limit-input ( limit mode -- ) input-stream [ -rot limit ] change ;
+: limit-input ( limit mode -- )
+    [ input-stream ] 2dip '[ _ _ limit ] change ;
 
-: unlimited-input ( -- ) input-stream [ unlimited ] change ;
+: unlimited-input ( -- )
+    input-stream [ unlimited ] change ;
 
 : with-unlimited-stream ( stream quot -- )
     [ clone unlimited ] dip call ; inline
index a590d9eee0d6b377c12b11353da68eedc544779d..54b489268018b6d339121ad82e21d17e3d3dd397 100644 (file)
 ! Copyright (C) 2009 Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: calendar io.encodings.utf8 io.files robots tools.test ;
+USING: calendar io.encodings.utf8 io.files robots tools.test
+urls ;
 IN: robots.tests
 
 [
-{ "http://www.chiplist.com/sitemap.txt" }
-{
-    T{ rules
-        { user-agents V{ "*" } }
-        { allows V{ } }
-        { disallows
-            V{
-                "/cgi-bin/"
-                "/scripts/"
-                "/ChipList2/scripts/"
-                "/ChipList2/styles/"
-                "/ads/"
-                "/ChipList2/ads/"
-                "/advertisements/"
-                "/ChipList2/advertisements/"
-                "/graphics/"
-                "/ChipList2/graphics/"
+    { "http://www.chiplist.com/sitemap.txt" }
+    {
+        T{ rules
+            { user-agents V{ "*" } }
+            { allows V{ } }
+            { disallows
+                V{
+                    URL" /cgi-bin/"
+                    URL" /scripts/"
+                    URL" /ChipList2/scripts/"
+                    URL" /ChipList2/styles/"
+                    URL" /ads/"
+                    URL" /ChipList2/ads/"
+                    URL" /advertisements/"
+                    URL" /ChipList2/advertisements/"
+                    URL" /graphics/"
+                    URL" /ChipList2/graphics/"
+                }
             }
-        }
-        { visit-time
-            {
-                T{ timestamp { hour 2 } }
-                T{ timestamp { hour 5 } }
+            { visit-time
+                {
+                    T{ timestamp { hour 2 } }
+                    T{ timestamp { hour 5 } }
+                }
             }
+            { request-rate 1 }
+            { crawl-delay 1 }
+            { unknowns H{ } }
         }
-        { request-rate 1 }
-        { crawl-delay 1 }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "UbiCrawler" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "DOC" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "Zao" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "sitecheck.internetseer.com" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "Zealbot" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "MSIECrawler" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "SiteSnagger" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "WebStripper" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "WebCopier" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "Fetch" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "Offline Explorer" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "Teleport" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "TeleportPro" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "WebZIP" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "linko" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "HTTrack" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "Microsoft.URL.Control" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "Xenu" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "larbin" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "libwww" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "ZyBORG" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "Download Ninja" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "wget" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "grub-client" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "k2spider" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "NPBot" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents V{ "WebReaper" } }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
-    }
-    T{ rules
-        { user-agents
-            V{
-                "abot"
-                "ALeadSoftbot"
-                "BeijingCrawler"
-                "BilgiBot"
-                "bot"
-                "botlist"
-                "BOTW Spider"
-                "bumblebee"
-                "Bumblebee"
-                "BuzzRankingBot"
-                "Charlotte"
-                "Clushbot"
-                "Crawler"
-                "CydralSpider"
-                "DataFountains"
-                "DiamondBot"
-                "Dulance bot"
-                "DYNAMIC"
-                "EARTHCOM.info"
-                "EDI"
-                "envolk"
-                "Exabot"
-                "Exabot-Images"
-                "Exabot-Test"
-                "exactseek-pagereaper"
-                "Exalead NG"
-                "FANGCrawl"
-                "Feed::Find"
-                "flatlandbot"
-                "Gigabot"
-                "GigabotSiteSearch"
-                "GurujiBot"
-                "Hatena Antenna"
-                "Hatena Bookmark"
-                "Hatena RSS"
-                "HatenaScreenshot"
-                "Helix"
-                "HiddenMarket"
-                "HyperEstraier"
-                "iaskspider"
-                "IIITBOT"
-                "InfociousBot"
-                "iVia"
-                "iVia Page Fetcher"
-                "Jetbot"
-                "Kolinka Forum Search"
-                "KRetrieve"
-                "LetsCrawl.com"
-                "Lincoln State Web Browser"
-                "Links4US-Crawler"
-                "LOOQ"
-                "Lsearch/sondeur"
-                "MapoftheInternet.com"
-                "NationalDirectory"
-                "NetCarta_WebMapper"
-                "NewsGator"
-                "NextGenSearchBot"
-                "ng"
-                "nicebot"
-                "NP"
-                "NPBot"
-                "Nudelsalat"
-                "Nutch"
-                "OmniExplorer_Bot"
-                "OpenIntelligenceData"
-                "Oracle Enterprise Search"
-                "Pajaczek"
-                "panscient.com"
-                "PeerFactor 404 crawler"
-                "PeerFactor Crawler"
-                "PlantyNet"
-                "PlantyNet_WebRobot"
-                "plinki"
-                "PMAFind"
-                "Pogodak!"
-                "QuickFinder Crawler"
-                "Radiation Retriever"
-                "Reaper"
-                "RedCarpet"
-                "ScorpionBot"
-                "Scrubby"
-                "Scumbot"
-                "searchbot"
-                "Seeker.lookseek.com"
-                "SeznamBot"
-                "ShowXML"
-                "snap.com"
-                "snap.com beta crawler"
-                "Snapbot"
-                "SnapPreviewBot"
-                "sohu"
-                "SpankBot"
-                "Speedy Spider"
-                "Speedy_Spider"
-                "SpeedySpider"
-                "spider"
-                "SquigglebotBot"
-                "SurveyBot"
-                "SynapticSearch"
-                "T-H-U-N-D-E-R-S-T-O-N-E"
-                "Talkro Web-Shot"
-                "Tarantula"
-                "TerrawizBot"
-                "TheInformant"
-                "TMCrawler"
-                "TridentSpider"
-                "Tutorial Crawler"
-                "Twiceler"
-                "unwrapbot"
-                "URI::Fetch"
-                "VengaBot"
-                "Vonna.com b o t"
-                "Vortex"
-                "Votay bot"
-                "WebAlta Crawler"
-                "Webbot"
-                "Webclipping.com"
-                "WebCorp"
-                "Webinator"
-                "WIRE"
-                "WISEbot"
-                "Xerka WebBot"
-                "XSpider"
-                "YodaoBot"
-                "Yoono"
-                "yoono"
+        T{ rules
+            { user-agents V{ "UbiCrawler" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "DOC" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "Zao" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "sitecheck.internetseer.com" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "Zealbot" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "MSIECrawler" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "SiteSnagger" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "WebStripper" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "WebCopier" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "Fetch" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "Offline Explorer" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "Teleport" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "TeleportPro" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "WebZIP" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "linko" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "HTTrack" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "Microsoft.URL.Control" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "Xenu" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "larbin" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "libwww" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "ZyBORG" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "Download Ninja" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "wget" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "grub-client" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "k2spider" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "NPBot" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents V{ "WebReaper" } }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
+        }
+        T{ rules
+            { user-agents
+                V{
+                    "abot"
+                    "ALeadSoftbot"
+                    "BeijingCrawler"
+                    "BilgiBot"
+                    "bot"
+                    "botlist"
+                    "BOTW Spider"
+                    "bumblebee"
+                    "Bumblebee"
+                    "BuzzRankingBot"
+                    "Charlotte"
+                    "Clushbot"
+                    "Crawler"
+                    "CydralSpider"
+                    "DataFountains"
+                    "DiamondBot"
+                    "Dulance bot"
+                    "DYNAMIC"
+                    "EARTHCOM.info"
+                    "EDI"
+                    "envolk"
+                    "Exabot"
+                    "Exabot-Images"
+                    "Exabot-Test"
+                    "exactseek-pagereaper"
+                    "Exalead NG"
+                    "FANGCrawl"
+                    "Feed::Find"
+                    "flatlandbot"
+                    "Gigabot"
+                    "GigabotSiteSearch"
+                    "GurujiBot"
+                    "Hatena Antenna"
+                    "Hatena Bookmark"
+                    "Hatena RSS"
+                    "HatenaScreenshot"
+                    "Helix"
+                    "HiddenMarket"
+                    "HyperEstraier"
+                    "iaskspider"
+                    "IIITBOT"
+                    "InfociousBot"
+                    "iVia"
+                    "iVia Page Fetcher"
+                    "Jetbot"
+                    "Kolinka Forum Search"
+                    "KRetrieve"
+                    "LetsCrawl.com"
+                    "Lincoln State Web Browser"
+                    "Links4US-Crawler"
+                    "LOOQ"
+                    "Lsearch/sondeur"
+                    "MapoftheInternet.com"
+                    "NationalDirectory"
+                    "NetCarta_WebMapper"
+                    "NewsGator"
+                    "NextGenSearchBot"
+                    "ng"
+                    "nicebot"
+                    "NP"
+                    "NPBot"
+                    "Nudelsalat"
+                    "Nutch"
+                    "OmniExplorer_Bot"
+                    "OpenIntelligenceData"
+                    "Oracle Enterprise Search"
+                    "Pajaczek"
+                    "panscient.com"
+                    "PeerFactor 404 crawler"
+                    "PeerFactor Crawler"
+                    "PlantyNet"
+                    "PlantyNet_WebRobot"
+                    "plinki"
+                    "PMAFind"
+                    "Pogodak!"
+                    "QuickFinder Crawler"
+                    "Radiation Retriever"
+                    "Reaper"
+                    "RedCarpet"
+                    "ScorpionBot"
+                    "Scrubby"
+                    "Scumbot"
+                    "searchbot"
+                    "Seeker.lookseek.com"
+                    "SeznamBot"
+                    "ShowXML"
+                    "snap.com"
+                    "snap.com beta crawler"
+                    "Snapbot"
+                    "SnapPreviewBot"
+                    "sohu"
+                    "SpankBot"
+                    "Speedy Spider"
+                    "Speedy_Spider"
+                    "SpeedySpider"
+                    "spider"
+                    "SquigglebotBot"
+                    "SurveyBot"
+                    "SynapticSearch"
+                    "T-H-U-N-D-E-R-S-T-O-N-E"
+                    "Talkro Web-Shot"
+                    "Tarantula"
+                    "TerrawizBot"
+                    "TheInformant"
+                    "TMCrawler"
+                    "TridentSpider"
+                    "Tutorial Crawler"
+                    "Twiceler"
+                    "unwrapbot"
+                    "URI::Fetch"
+                    "VengaBot"
+                    "Vonna.com b o t"
+                    "Vortex"
+                    "Votay bot"
+                    "WebAlta Crawler"
+                    "Webbot"
+                    "Webclipping.com"
+                    "WebCorp"
+                    "Webinator"
+                    "WIRE"
+                    "WISEbot"
+                    "Xerka WebBot"
+                    "XSpider"
+                    "YodaoBot"
+                    "Yoono"
+                    "yoono"
+                }
             }
+            { allows V{ } }
+            { disallows V{ URL" /" } }
+            { unknowns H{ } }
         }
-        { allows V{ } }
-        { disallows V{ "/" } }
-        { unknowns H{ } }
     }
-}
 ] [ "vocab:robots/robots.txt" utf8 file-contents parse-robots.txt ] unit-test
index 242aa1dea2e7dfb9f5f3516bdb9655012df54dba..3c0eb045f7598046f8dee75b212a9cf0d06232c7 100644 (file)
@@ -85,7 +85,7 @@ PRIVATE>
 : parse-robots.txt ( string -- sitemaps rules-seq )
     normalize-robots.txt [
         [ <rules> dup ] dip [ parse-robots.txt-line drop ] with each
-    ] map first ;
+    ] map ;
 
 : robots ( url -- robots )
     >url
index b067504e2efb1f6139d91f4f42f9e26fc8947b64..e58d5a79d5fa7d8c5f556994b4a1a53b30314ec0 100644 (file)
@@ -2,7 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: db.tuples locals site-watcher site-watcher.db
 site-watcher.private kernel db io.directories io.files.temp
-continuations site-watcher.db.private db.sqlite
+continuations db.sqlite
 sequences tools.test ;
 IN: site-watcher.tests