! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors arrays assocs combinators kernel math
-math.statistics namespaces sequences sorting xml.syntax
-spider ;
+USING: accessors arrays assocs combinators html kernel math
+math.statistics namespaces sequences sorting urls xml.syntax ;
IN: spider.report
SYMBOL: network-failures
broken-pages push ;
: record-page-timings ( url spider-result -- )
- fetch-time>> 2array timings get push ;
+ fetched-in>> 2array timings get push ;
: record-network-failure ( url -- )
network-failures get push ;
timings get sort-values
[ slowest short tail* reverse slowest-pages set ]
[
- values
- [ mean 1000000 /f mean-time set ]
- [ median 1000000 /f median-time set ]
- [ std 1000000 /f time-std set ] tri
+ values [
+ [ mean 1000000 /f mean-time set ]
+ [ median 1000000 /f median-time set ]
+ [ std 1000000 /f time-std set ] tri
+ ] unless-empty
] bi ;
: process-results ( results -- )
slowest-pages-table
timing-summary-table
[XML
- <h2>Slowest pages</h2>
+ <h3>Slowest pages</h3>
<->
- <h2>Summary</h2>
+ <h3>Summary</h3>
<->
XML] ;
: generate-report ( -- html )
+ url get dup
report-broken-pages
report-network-failures
report-timings
[XML
- <h1>Broken pages</h1>
+ <h1>Spider report</h1>
+ URL: <a href=<->><-></a>
+
+ <h2>Broken pages</h2>
<->
- <h1>Network failures</h1>
+ <h2>Network failures</h2>
<->
- <h1>Load times</h1>
+ <h2>Load times</h2>
<->
XML] ;
: spider-report ( spider -- html )
- [ spidered>> process-results generate-report ] with-scope ;
+ [ "Spider report" f ] dip
+ [
+ [ base>> url set ]
+ [ spidered>> process-results ] bi
+ generate-report
+ ] with-scope
+ simple-page ;