1 ! Copyright (C) 2005, 2009 Daniel Ehrenberg
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors arrays ascii assocs combinators
4 combinators.short-circuit io io.encodings.binary
5 io.encodings.utf8 io.files io.streams.byte-array
6 io.streams.string kernel namespaces sequences splitting strings
7 xml.autoencoding xml.data xml.elements xml.errors xml.name
8 xml.state xml.tokenize ;
13 : add-child ( object -- )
14 xml-stack get last second push ;
16 : push-xml ( object -- )
17 V{ } clone 2array xml-stack get push ;
19 : pop-xml ( -- object )
22 GENERIC: process ( object -- )
24 M: object process add-child ;
28 { V{ { f V{ "" } } } V{ { f V{ } } } } member?
29 [ bad-prolog ] unless add-child ;
31 : before-main? ( -- ? )
34 [ first second [ tag? ] none? ]
38 before-main? [ misplaced-directive ] unless add-child ;
41 [ name>> ] [ attrs>> ] bi
42 <contained-tag> add-child ;
44 M: opener process push-xml ;
46 : check-closer ( name opener -- name opener )
47 dup [ unopened ] unless
49 [ name>> swap mismatched ] unless ;
53 [ check-closer attrs>> ] dip
56 : init-xml-stack ( -- )
57 V{ } clone xml-stack namespaces:set
60 : default-prolog ( -- prolog )
61 "1.0" "UTF-8" f <prolog> ;
65 extra-entities [ H{ } assoc-like ] change ;
67 : assert-blanks ( seq pre? -- )
68 swap [ string? ] filter
71 [ drop ] [ swap pre/post-content ] if
74 : no-pre/post ( pre post -- pre post/* )
75 ! this does *not* affect the contents of the stack
76 [ dup t assert-blanks ] [ dup f assert-blanks ] bi* ;
78 : no-post-tags ( post -- post/* )
79 ! this does *not* affect the contents of the stack
80 dup [ tag? ] any? [ multitags ] when ;
82 : assure-tags ( seq -- seq )
83 ! this does *not* affect the contents of the stack
86 : get-prolog ( seq -- prolog )
89 [ drop default-prolog ] unless ;
91 : cut-prolog ( seq -- newseq )
92 [ { [ prolog? not ] [ "" = not ] } 1&& ] filter ;
94 : make-xml-doc ( seq -- xml-doc )
98 [ cut-prolog ] [ rest ] bi*
99 no-pre/post no-post-tags
106 : collect-variables ( -- hash )
113 } [ dup get ] H{ } map>assoc ;
117 TUPLE: pull-xml scope ;
118 : <pull-xml> ( -- pull-xml )
120 init-parser init-xml text-now? on collect-variables
121 ] with-scope pull-xml boa ;
122 ! pull-xml needs to call start-document somewhere
124 : pull-event ( pull -- xml-event/f )
126 text-now? get [ parse-text f ] [
127 get-char [ make-tag t ] [ f f ] if
128 ] if text-now? namespaces:set
134 xml-stack get length 1 = ;
136 : (pull-elem) ( pull -- xml-elem/f )
137 dup pull-event dup closer? done? and [ nip ] [
139 [ drop xml-stack get first second ]
145 : pull-elem ( pull -- xml-elem/f )
146 [ init-xml-stack (pull-elem) ] with-scope ;
150 : call-under ( quot object -- quot )
151 swap [ call ] keep ; inline
153 : xml-loop ( quot: ( xml-elem -- ) -- )
154 parse-text call-under get-char
155 [ make-tag call-under xml-loop ]
156 [ drop ] if ; inline recursive
158 : read-seq ( stream quot n -- seq )
161 init-xml init-xml-stack
164 done? [ throw-unclosed ] unless
165 xml-stack get first second
166 ] with-state ; inline
168 : make-xml ( stream quot -- xml )
169 0 read-seq make-xml-doc ; inline
173 : each-element ( stream quot: ( xml-elem -- ) -- )
176 start-document [ call-under ] when*
178 ] with-state ; inline
180 : read-xml ( stream -- xml )
181 dup stream-element-type {
182 { +character+ [ [ check ] make-xml ] }
183 { +byte+ [ [ start-document [ process ] when* ] make-xml ] }
186 : read-xml-chunk ( stream -- seq )
187 [ check ] 1 read-seq <xml-chunk> ;
189 : string>xml ( string -- xml )
190 <string-reader> read-xml ;
192 : string>xml-chunk ( string -- xml )
193 <string-reader> read-xml-chunk ;
195 : file>xml ( filename -- xml )
196 binary <file-reader> read-xml ;
198 : bytes>xml ( byte-array -- xml )
199 binary <byte-reader> read-xml ;
201 : read-dtd ( stream -- dtd )
203 H{ } clone extra-entities namespaces:set
207 : file>dtd ( filename -- dtd )
208 utf8 <file-reader> read-dtd ;
210 : string>dtd ( string -- dtd )
211 <string-reader> read-dtd ;