]> gitweb.factorcode.org Git - factor.git/blob - basis/syndication/syndication.factor
scryfall: parse mtga deck format
[factor.git] / basis / syndication / syndication.factor
1 ! Copyright (C) 2006 Chris Double, Daniel Ehrenberg.
2 ! Portions copyright (C) 2008, 2009 Slava Pestov.
3 ! See http://factorcode.org/license.txt for BSD license.
4 USING: accessors byte-arrays calendar.format calendar.parser
5 combinators combinators.short-circuit continuations http.client
6 kernel present sequences strings urls xml xml.data
7 xml.entities.html xml.syntax xml.traversal xml.writer ;
8 IN: syndication
9
10 : any-tag-named ( tag names -- tag-inside )
11     [ f ] 2dip [ tag-named nip dup ] with find 2drop ;
12
13 TUPLE: feed title url entries ;
14
15 : <feed> ( -- feed ) feed new ;
16
17 TUPLE: entry title url description date ;
18
19 : set-entries ( feed entries -- feed )
20     [ dup url>> ] dip
21     [ [ derive-url ] change-url ] with map
22     >>entries ;
23
24 : <entry> ( -- entry ) entry new ;
25
26 : try-parsing-timestamp ( string -- timestamp )
27     [ rfc822>timestamp ] [ drop rfc3339>timestamp ] recover ;
28
29 : ?children>string ( tag -- str/f )
30     dup [ children>string ] when ; inline
31
32 : rss1.0-entry ( tag -- entry )
33     <entry> swap {
34         [ "title" tag-named children>string >>title ]
35         [ "link" tag-named children>string >url >>url ]
36         [ "description" tag-named children>string >>description ]
37         [
38             f "date" "http://purl.org/dc/elements/1.1/" <name>
39             tag-named dup [ children>string try-parsing-timestamp ] when
40             >>date
41         ]
42     } cleave ;
43
44 : rss1.0 ( xml -- feed )
45     <feed> swap [
46         "channel" tag-named
47         [ "title" tag-named children>string >>title ]
48         [ "link" tag-named children>string >url >>url ] bi
49     ] [ "item" tags-named [ rss1.0-entry ] map set-entries ] bi ;
50
51 : rss2.0-entry ( tag -- entry )
52     <entry> swap {
53         [ "title" tag-named ?children>string >>title ]
54         [ { "link" "guid" } any-tag-named ?children>string >url >>url ]
55         [ { "description" "encoded" } any-tag-named ?children>string >>description ]
56         [
57             { "date" "pubDate" } any-tag-named
58             ?children>string try-parsing-timestamp >>date
59         ]
60     } cleave ;
61
62 : rss2.0 ( xml -- feed )
63     <feed> swap
64     "channel" tag-named
65     [ "title" tag-named children>string >>title ]
66     [ "link" tag-named children>string >url >>url ]
67     [ "item" tags-named [ rss2.0-entry ] map set-entries ]
68     tri ;
69
70 : atom-link ( tag -- url/f )
71     {
72         [ "link" "alternate" "rel" tag-named-with-attr ]
73         [ "link" "self" "rel" tag-named-with-attr ]
74         [ "link" tag-named ]
75     } 1||
76     [ "href" attr >url ] [ f ] if* ;
77
78 : atom1.0-entry ( tag -- entry )
79     <entry> swap {
80         [ "title" tag-named children>string >>title ]
81         [ atom-link >>url ]
82         [
83             { "content" "summary" } any-tag-named
84             dup children>> [ string? not ] any?
85             [ children>> xml>string ]
86             [ children>string ] if >>description
87         ]
88         [
89             { "published" "updated" "issued" "modified" }
90             any-tag-named children>string try-parsing-timestamp
91             >>date
92         ]
93     } cleave ;
94
95 : atom1.0 ( xml -- feed )
96     <feed> swap
97     [ "title" tag-named children>string >>title ]
98     [ atom-link >>url ]
99     [ "entry" tags-named [ atom1.0-entry ] map set-entries ]
100     tri ;
101
102 : xml>feed ( xml -- feed )
103     dup main>> {
104         { "RDF" [ rss1.0 ] }
105         { "rss" [ rss2.0 ] }
106         { "feed" [ atom1.0 ] }
107     } case ;
108
109 GENERIC: parse-feed ( seq -- feed )
110
111 M: string parse-feed [ string>xml xml>feed ] with-html-entities ;
112
113 M: byte-array parse-feed [ bytes>xml xml>feed ] with-html-entities ;
114
115 : download-feed ( url -- feed )
116     ! Retrieve an news syndication file, return as a feed tuple.
117     http-get nip parse-feed ;
118
119 ! Atom generation
120
121 : entry>xml ( entry -- xml )
122     {
123         [ title>> ]
124         [ url>> present ]
125         [ date>> timestamp>rfc3339 ]
126         [ description>> ]
127     } cleave
128     [XML
129         <entry>
130             <title type="html"><-></title>
131             <link rel="alternate" href=<-> />
132             <published><-></published>
133             <content type="html"><-></content>
134         </entry>
135     XML] ;
136
137 : feed>xml ( feed -- xml )
138     [ title>> ]
139     [ url>> present ]
140     [ entries>> [ entry>xml ] map ] tri
141     <XML
142         <feed xmlns="http://www.w3.org/2005/Atom">
143             <title><-></title>
144             <link rel="alternate" href=<-> />
145             <->
146         </feed>
147     XML> ;