1 ! Copyright (C) 2006 Chris Double, Daniel Ehrenberg.
2 ! Portions copyright (C) 2008 Slava Pestov.
3 ! See http://factorcode.org/license.txt for BSD license.
4 USING: xml.utilities kernel assocs xml.generator math.order
5 strings sequences xml.data xml.writer
6 io.streams.string combinators xml xml.entities io.files io
7 http.client namespaces make xml.generator hashtables
8 calendar.format accessors continuations urls present ;
11 : any-tag-named ( tag names -- tag-inside )
12 f -rot [ tag-named nip dup ] with find 2drop ;
14 TUPLE: feed title url entries ;
16 : <feed> ( -- feed ) feed new ;
18 TUPLE: entry title url description date ;
20 : set-entries ( feed entries -- feed )
22 [ [ derive-url ] change-url ] with map
25 : <entry> ( -- entry ) entry new ;
27 : try-parsing-timestamp ( string -- timestamp )
28 [ rfc822>timestamp ] [ drop rfc3339>timestamp ] recover ;
30 : rss1.0-entry ( tag -- entry )
33 [ "title" tag-named children>string >>title ]
34 [ "link" tag-named children>string >url >>url ]
35 [ "description" tag-named children>string >>description ]
37 f "date" "http://purl.org/dc/elements/1.1/" <name>
38 tag-named dup [ children>string try-parsing-timestamp ] when
43 : rss1.0 ( xml -- feed )
47 [ "title" tag-named children>string >>title ]
48 [ "link" tag-named children>string >url >>url ] bi
49 ] [ "item" tags-named [ rss1.0-entry ] map set-entries ] bi ;
51 : rss2.0-entry ( tag -- entry )
54 [ "title" tag-named children>string >>title ]
55 [ { "link" "guid" } any-tag-named children>string >url >>url ]
56 [ { "description" "encoded" } any-tag-named children>string >>description ]
58 { "date" "pubDate" } any-tag-named
59 children>string try-parsing-timestamp >>date
63 : rss2.0 ( xml -- feed )
67 [ "title" tag-named children>string >>title ]
68 [ "link" tag-named children>string >url >>url ]
69 [ "item" tags-named [ rss2.0-entry ] map set-entries ]
72 : atom1.0-entry ( tag -- entry )
75 [ "title" tag-named children>string >>title ]
76 [ "link" tag-named "href" swap at >url >>url ]
78 { "content" "summary" } any-tag-named
79 dup children>> [ string? not ] contains?
80 [ children>> [ write-chunk ] with-string-writer ]
81 [ children>string ] if >>description
84 { "published" "updated" "issued" "modified" }
85 any-tag-named children>string try-parsing-timestamp
90 : atom1.0 ( xml -- feed )
93 [ "title" tag-named children>string >>title ]
94 [ "link" tag-named "href" swap at >url >>url ]
95 [ "entry" tags-named [ atom1.0-entry ] map set-entries ]
98 : xml>feed ( xml -- feed )
102 { "feed" [ atom1.0 ] }
105 : read-feed ( string -- feed )
106 [ string>xml xml>feed ] with-html-entities ;
108 : download-feed ( url -- feed )
109 #! Retrieve an news syndication file, return as a feed tuple.
110 http-get nip read-feed ;
113 : simple-tag, ( content name -- )
116 : simple-tag*, ( content name attrs -- )
119 : entry, ( entry -- )
122 [ title>> "title" { { "type" "html" } } simple-tag*, ]
123 [ url>> present "href" associate "link" swap contained*, ]
124 [ date>> timestamp>rfc3339 "published" simple-tag, ]
125 [ description>> [ "content" { { "type" "html" } } simple-tag*, ] when* ]
129 : feed>xml ( feed -- xml )
130 "feed" { { "xmlns" "http://www.w3.org/2005/Atom" } } [
131 [ title>> "title" simple-tag, ]
132 [ url>> present "href" associate "link" swap contained*, ]
133 [ entries>> [ entry, ] each ]