! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: accessors arrays assocs combinators kernel math math.statistics namespaces sequences sorting xml.syntax spider ; IN: spider.report SYMBOL: network-failures SYMBOL: broken-pages SYMBOL: timings : record-broken-page ( url spider-result -- ) headers>> [ code>> ] [ message>> ] bi 2array 2array broken-pages push ; : record-page-timings ( url spider-result -- ) fetched-in>> 2array timings get push ; : record-network-failure ( url -- ) network-failures get push ; : process-result ( url spider-result -- ) { { f [ record-network-failure ] } [ dup headers>> code>> 200 = [ record-page-timings ] [ record-broken-page ] if ] } case ; CONSTANT: slowest 5 SYMBOL: slowest-pages SYMBOL: mean-time SYMBOL: median-time SYMBOL: time-std : process-timings ( -- ) timings get sort-values [ slowest short tail* reverse slowest-pages set ] [ values [ mean 1000000 /f mean-time set ] [ median 1000000 /f median-time set ] [ std 1000000 /f time-std set ] tri ] bi ; : process-results ( results -- ) V{ } clone network-failures set V{ } clone broken-pages set V{ } clone timings set [ process-result ] assoc-each process-timings ; : info-table ( alist -- html ) [ first2 dupd 1000000 /f [XML ><-><-> seconds XML] ] map [XML <->
XML] ; : report-broken-pages ( -- html ) broken-pages get info-table ; : report-network-failures ( -- html ) network-failures get [ dup [XML
  • ><->
  • XML] ] map [XML XML] ; : slowest-pages-table ( -- html ) slowest-pages get info-table ; : timing-summary-table ( -- html ) mean-time get median-time get time-std get [XML
    Mean<-> seconds
    Median<-> seconds
    Standard deviation<-> seconds
    XML] ; : report-timings ( -- html ) slowest-pages-table timing-summary-table [XML

    Slowest pages

    <->

    Summary

    <-> XML] ; : generate-report ( -- html ) report-broken-pages report-network-failures report-timings [XML

    Broken pages

    <->

    Network failures

    <->

    Load times

    <-> XML] ; : spider-report ( spider -- html ) [ spidered>> process-results generate-report ] with-scope ;