]> gitweb.factorcode.org Git - factor.git/blob - basis/syndication/syndication.factor
Fix comments to be ! not #!.
[factor.git] / basis / syndication / syndication.factor
1 ! Copyright (C) 2006 Chris Double, Daniel Ehrenberg.
2 ! Portions copyright (C) 2008, 2009 Slava Pestov.
3 ! See http://factorcode.org/license.txt for BSD license.
4 USING: xml.traversal kernel assocs math.order strings sequences
5 xml.data xml.writer io.streams.string combinators xml
6 xml.entities.html io.files io http.client namespaces make
7 xml.syntax hashtables calendar.format accessors continuations
8 urls present byte-arrays ;
9 IN: syndication
10
11 : any-tag-named ( tag names -- tag-inside )
12     [ f ] 2dip [ tag-named nip dup ] with find 2drop ;
13
14 TUPLE: feed title url entries ;
15
16 : <feed> ( -- feed ) feed new ;
17
18 TUPLE: entry title url description date ;
19
20 : set-entries ( feed entries -- feed )
21     [ dup url>> ] dip
22     [ [ derive-url ] change-url ] with map
23     >>entries ;
24
25 : <entry> ( -- entry ) entry new ;
26
27 : try-parsing-timestamp ( string -- timestamp )
28     [ rfc822>timestamp ] [ drop rfc3339>timestamp ] recover ;
29
30 : rss1.0-entry ( tag -- entry )
31     <entry> swap {
32         [ "title" tag-named children>string >>title ]
33         [ "link" tag-named children>string >url >>url ]
34         [ "description" tag-named children>string >>description ]
35         [
36             f "date" "http://purl.org/dc/elements/1.1/" <name>
37             tag-named dup [ children>string try-parsing-timestamp ] when
38             >>date
39         ]
40     } cleave ;
41
42 : rss1.0 ( xml -- feed )
43     <feed> swap [
44         "channel" tag-named
45         [ "title" tag-named children>string >>title ]
46         [ "link" tag-named children>string >url >>url ] bi
47     ] [ "item" tags-named [ rss1.0-entry ] map set-entries ] bi ;
48
49 : rss2.0-entry ( tag -- entry )
50     <entry> swap {
51         [ "title" tag-named children>string >>title ]
52         [ { "link" "guid" } any-tag-named children>string >url >>url ]
53         [ { "description" "encoded" } any-tag-named children>string >>description ]
54         [
55             { "date" "pubDate" } any-tag-named
56             children>string try-parsing-timestamp >>date
57         ]
58     } cleave ;
59
60 : rss2.0 ( xml -- feed )
61     <feed> swap
62     "channel" tag-named
63     [ "title" tag-named children>string >>title ]
64     [ "link" tag-named children>string >url >>url ]
65     [ "item" tags-named [ rss2.0-entry ] map set-entries ]
66     tri ;
67
68 : atom-link ( tag -- url/f )
69     "link" "alternate" "rel" tag-named-with-attr
70     [ "href" attr >url ] [ f ] if* ;
71
72 : atom1.0-entry ( tag -- entry )
73     <entry> swap {
74         [ "title" tag-named children>string >>title ]
75         [ atom-link >>url ]
76         [
77             { "content" "summary" } any-tag-named
78             dup children>> [ string? not ] any?
79             [ children>> xml>string ]
80             [ children>string ] if >>description
81         ]
82         [
83             { "published" "updated" "issued" "modified" }
84             any-tag-named children>string try-parsing-timestamp
85             >>date
86         ]
87     } cleave ;
88
89 : atom1.0 ( xml -- feed )
90     <feed> swap
91     [ "title" tag-named children>string >>title ]
92     [ atom-link >>url ]
93     [ "entry" tags-named [ atom1.0-entry ] map set-entries ]
94     tri ;
95
96 : xml>feed ( xml -- feed )
97     dup main>> {
98         { "RDF" [ rss1.0 ] }
99         { "rss" [ rss2.0 ] }
100         { "feed" [ atom1.0 ] }
101     } case ;
102
103 GENERIC: parse-feed ( seq -- feed )
104
105 M: string parse-feed [ string>xml xml>feed ] with-html-entities ;
106
107 M: byte-array parse-feed [ bytes>xml xml>feed ] with-html-entities ;
108
109 : download-feed ( url -- feed )
110     ! Retrieve an news syndication file, return as a feed tuple.
111     http-get nip parse-feed ;
112
113 ! Atom generation
114
115 : entry>xml ( entry -- xml )
116     {
117         [ title>> ]
118         [ url>> present ]
119         [ date>> timestamp>rfc3339 ]
120         [ description>> ]
121     } cleave
122     [XML
123         <entry>
124             <title type="html"><-></title>
125             <link rel="alternate" href=<-> />
126             <published><-></published>
127             <content type="html"><-></content>
128         </entry>
129     XML] ;
130
131 : feed>xml ( feed -- xml )
132     [ title>> ]
133     [ url>> present ]
134     [ entries>> [ entry>xml ] map ] tri
135     <XML
136         <feed xmlns="http://www.w3.org/2005/Atom">
137             <title><-></title>
138             <link rel="alternate" href=<-> />
139             <->
140         </feed>
141     XML> ;