1 ! Copyright (C) 2006 Chris Double, Daniel Ehrenberg.
2 ! Portions copyright (C) 2008, 2009 Slava Pestov.
3 ! See http://factorcode.org/license.txt for BSD license.
4 USING: accessors byte-arrays calendar.format calendar.parser
5 combinators combinators.short-circuit continuations http.client
6 kernel present sequences strings urls xml xml.data
7 xml.entities.html xml.syntax xml.traversal xml.writer ;
10 : any-tag-named ( tag names -- tag-inside )
11 [ f ] 2dip [ tag-named nip dup ] with find 2drop ;
13 TUPLE: feed title url entries ;
15 : <feed> ( -- feed ) feed new ;
17 TUPLE: entry title url description date ;
19 : set-entries ( feed entries -- feed )
21 [ [ derive-url ] change-url ] with map
24 : <entry> ( -- entry ) entry new ;
26 : try-parsing-timestamp ( string -- timestamp )
27 [ rfc822>timestamp ] [ drop rfc3339>timestamp ] recover ;
29 : ?children>string ( tag -- str/f )
30 dup [ children>string ] when ; inline
32 : rss1.0-entry ( tag -- entry )
34 [ "title" tag-named children>string >>title ]
35 [ "link" tag-named children>string >url >>url ]
36 [ "description" tag-named children>string >>description ]
38 f "date" "http://purl.org/dc/elements/1.1/" <name>
39 tag-named dup [ children>string try-parsing-timestamp ] when
44 : rss1.0 ( xml -- feed )
47 [ "title" tag-named children>string >>title ]
48 [ "link" tag-named children>string >url >>url ] bi
49 ] [ "item" tags-named [ rss1.0-entry ] map set-entries ] bi ;
51 : rss2.0-entry ( tag -- entry )
53 [ "title" tag-named ?children>string >>title ]
54 [ { "link" "guid" } any-tag-named ?children>string >url >>url ]
55 [ { "description" "encoded" } any-tag-named ?children>string >>description ]
57 { "date" "pubDate" } any-tag-named
58 ?children>string try-parsing-timestamp >>date
62 : rss2.0 ( xml -- feed )
65 [ "title" tag-named children>string >>title ]
66 [ "link" tag-named children>string >url >>url ]
67 [ "item" tags-named [ rss2.0-entry ] map set-entries ]
70 : atom-link ( tag -- url/f )
72 [ "link" "alternate" "rel" tag-named-with-attr ]
73 [ "link" "self" "rel" tag-named-with-attr ]
76 [ "href" attr >url ] [ f ] if* ;
78 : atom1.0-entry ( tag -- entry )
80 [ "title" tag-named children>string >>title ]
83 { "content" "summary" } any-tag-named
84 dup children>> [ string? not ] any?
85 [ children>> xml>string ]
86 [ children>string ] if >>description
89 { "published" "updated" "issued" "modified" }
90 any-tag-named children>string try-parsing-timestamp
95 : atom1.0 ( xml -- feed )
97 [ "title" tag-named children>string >>title ]
99 [ "entry" tags-named [ atom1.0-entry ] map set-entries ]
102 : xml>feed ( xml -- feed )
106 { "feed" [ atom1.0 ] }
109 GENERIC: parse-feed ( seq -- feed )
111 M: string parse-feed [ string>xml xml>feed ] with-html-entities ;
113 M: byte-array parse-feed [ bytes>xml xml>feed ] with-html-entities ;
115 : download-feed ( url -- feed )
116 ! Retrieve an news syndication file, return as a feed tuple.
117 http-get nip parse-feed ;
121 : entry>xml ( entry -- xml )
125 [ date>> timestamp>rfc3339 ]
130 <title type="html"><-></title>
131 <link rel="alternate" href=<-> />
132 <published><-></published>
133 <content type="html"><-></content>
137 : feed>xml ( feed -- xml )
140 [ entries>> [ entry>xml ] map ] tri
142 <feed xmlns="http://www.w3.org/2005/Atom">
144 <link rel="alternate" href=<-> />