]> gitweb.factorcode.org Git - factor.git/blob - basis/syndication/syndication.factor
syndication: Fix atom parsing.
[factor.git] / basis / syndication / syndication.factor
1 ! Copyright (C) 2006 Chris Double, Daniel Ehrenberg.
2 ! Portions copyright (C) 2008, 2009 Slava Pestov.
3 ! See http://factorcode.org/license.txt for BSD license.
4 USING: accessors byte-arrays calendar.format calendar.parser
5 combinators combinators.short-circuit continuations http.client
6 kernel present sequences strings urls xml xml.data
7 xml.entities.html xml.syntax xml.traversal xml.writer ;
8 IN: syndication
9
10 : any-tag-named ( tag names -- tag-inside )
11     [ f ] 2dip [ tag-named nip dup ] with find 2drop ;
12
13 TUPLE: feed title url entries ;
14
15 : <feed> ( -- feed ) feed new ;
16
17 TUPLE: entry title url description date ;
18
19 : set-entries ( feed entries -- feed )
20     [ dup url>> ] dip
21     [ [ derive-url ] change-url ] with map
22     >>entries ;
23
24 : <entry> ( -- entry ) entry new ;
25
26 : try-parsing-timestamp ( string -- timestamp )
27     [ rfc822>timestamp ] [ drop rfc3339>timestamp ] recover ;
28
29 : rss1.0-entry ( tag -- entry )
30     <entry> swap {
31         [ "title" tag-named children>string >>title ]
32         [ "link" tag-named children>string >url >>url ]
33         [ "description" tag-named children>string >>description ]
34         [
35             f "date" "http://purl.org/dc/elements/1.1/" <name>
36             tag-named dup [ children>string try-parsing-timestamp ] when
37             >>date
38         ]
39     } cleave ;
40
41 : rss1.0 ( xml -- feed )
42     <feed> swap [
43         "channel" tag-named
44         [ "title" tag-named children>string >>title ]
45         [ "link" tag-named children>string >url >>url ] bi
46     ] [ "item" tags-named [ rss1.0-entry ] map set-entries ] bi ;
47
48 : rss2.0-entry ( tag -- entry )
49     <entry> swap {
50         [ "title" tag-named children>string >>title ]
51         [ { "link" "guid" } any-tag-named children>string >url >>url ]
52         [ { "description" "encoded" } any-tag-named children>string >>description ]
53         [
54             { "date" "pubDate" } any-tag-named
55             children>string try-parsing-timestamp >>date
56         ]
57     } cleave ;
58
59 : rss2.0 ( xml -- feed )
60     <feed> swap
61     "channel" tag-named
62     [ "title" tag-named children>string >>title ]
63     [ "link" tag-named children>string >url >>url ]
64     [ "item" tags-named [ rss2.0-entry ] map set-entries ]
65     tri ;
66
67 ERROR: invalid-atom-link ;
68
69 : atom-link ( tag -- url/f )
70     {
71         [ "link" "alternate" "rel" tag-named-with-attr ]
72         [ "link" "self" "rel" tag-named-with-attr ]
73         [ "link" tag-named ]
74     } 1||
75     [ "href" attr >url ] [ f ] if* ;
76
77 : atom1.0-entry ( tag -- entry )
78     <entry> swap {
79         [ "title" tag-named children>string >>title ]
80         [ atom-link >>url ]
81         [
82             { "content" "summary" } any-tag-named
83             dup children>> [ string? not ] any?
84             [ children>> xml>string ]
85             [ children>string ] if >>description
86         ]
87         [
88             { "published" "updated" "issued" "modified" }
89             any-tag-named children>string try-parsing-timestamp
90             >>date
91         ]
92     } cleave ;
93
94 : atom1.0 ( xml -- feed )
95     <feed> swap
96     [ "title" tag-named children>string >>title ]
97     [ atom-link >>url ]
98     [ "entry" tags-named [ atom1.0-entry ] map set-entries ]
99     tri ;
100
101 : xml>feed ( xml -- feed )
102     dup main>> {
103         { "RDF" [ rss1.0 ] }
104         { "rss" [ rss2.0 ] }
105         { "feed" [ atom1.0 ] }
106     } case ;
107
108 GENERIC: parse-feed ( seq -- feed )
109
110 M: string parse-feed [ string>xml xml>feed ] with-html-entities ;
111
112 M: byte-array parse-feed [ bytes>xml xml>feed ] with-html-entities ;
113
114 : download-feed ( url -- feed )
115     ! Retrieve an news syndication file, return as a feed tuple.
116     http-get nip parse-feed ;
117
118 ! Atom generation
119
120 : entry>xml ( entry -- xml )
121     {
122         [ title>> ]
123         [ url>> present ]
124         [ date>> timestamp>rfc3339 ]
125         [ description>> ]
126     } cleave
127     [XML
128         <entry>
129             <title type="html"><-></title>
130             <link rel="alternate" href=<-> />
131             <published><-></published>
132             <content type="html"><-></content>
133         </entry>
134     XML] ;
135
136 : feed>xml ( feed -- xml )
137     [ title>> ]
138     [ url>> present ]
139     [ entries>> [ entry>xml ] map ] tri
140     <XML
141         <feed xmlns="http://www.w3.org/2005/Atom">
142             <title><-></title>
143             <link rel="alternate" href=<-> />
144             <->
145         </feed>
146     XML> ;