1 ! Copyright (C) 2005, 2006 Daniel Ehrenberg
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: io io.streams.string io.files kernel math namespaces
4 prettyprint sequences arrays generic strings vectors
5 xml.char-classes xml.data xml.errors xml.tokenize xml.writer
6 xml.utilities state-parser assocs ascii io.encodings.utf8
7 accessors xml.backend ;
10 ! -- Overall parser with data tree
12 : add-child ( object -- )
13 xml-stack get peek second push ;
15 : push-xml ( object -- )
16 V{ } clone 2array xml-stack get push ;
18 : pop-xml ( -- object )
21 GENERIC: process ( object -- )
23 M: object process add-child ;
26 xml-stack get V{ { f V{ "" } } } =
27 [ bad-prolog ] unless drop ;
29 M: instruction process
30 xml-stack get length 1 =
31 [ bad-instruction ] unless
35 xml-stack get dup length 1 =
36 swap first second [ tag? ] contains? not and
37 [ misplaced-directive ] unless
41 [ name>> ] [ attrs>> ] bi
42 <contained-tag> add-child ;
44 M: opener process push-xml ;
46 : check-closer ( name opener -- name opener )
47 dup [ unopened ] unless
49 [ name>> swap mismatched ] unless ;
53 [ check-closer attrs>> ] dip
56 : init-xml-stack ( -- )
57 V{ } clone xml-stack set f push-xml ;
59 : default-prolog ( -- prolog )
60 "1.0" "UTF-8" f <prolog> ;
63 default-prolog prolog-data set ;
66 reset-prolog init-xml-stack init-ns-stack ;
68 : assert-blanks ( seq pre? -- )
69 swap [ string? ] filter
72 [ drop ] [ swap pre/post-content ] if
75 : no-pre/post ( pre post -- pre post/* )
76 ! this does *not* affect the contents of the stack
77 [ dup t assert-blanks ] [ dup f assert-blanks ] bi* ;
79 : no-post-tags ( post -- post/* )
80 ! this does *not* affect the contents of the stack
81 dup [ tag? ] contains? [ multitags ] when ;
83 : assure-tags ( seq -- seq )
84 ! this does *not* affect the contents of the stack
87 : make-xml-doc ( prolog seq -- xml-doc )
89 [ assure-tags cut rest no-pre/post no-post-tags ] dip
96 TUPLE: pull-xml scope ;
97 : <pull-xml> ( -- pull-xml )
99 input-stream [ ] change ! bring var in this scope
100 init-parser reset-prolog init-ns-stack
105 : pull-event ( pull -- xml-event/f )
107 text-now? get [ parse-text f ] [
108 get-char [ make-tag t ] [ f f ] if
113 xml-stack get length 1 = ;
115 : (pull-elem) ( pull -- xml-elem/f )
116 dup pull-event dup closer? done? and [ nip ] [
118 [ drop xml-stack get first second ]
122 : pull-elem ( pull -- xml-elem/f )
123 [ init-xml-stack (pull-elem) ] with-scope ;
125 : call-under ( quot object -- quot )
126 swap dup slip ; inline
128 : sax-loop ( quot: ( xml-elem -- ) -- )
129 parse-text call-under
130 get-char [ make-tag call-under sax-loop ]
131 [ drop ] if ; inline recursive
133 : sax ( stream quot: ( xml-elem -- ) -- )
135 reset-prolog init-ns-stack
136 prolog-data get call-under
138 ] state-parse ; inline recursive
141 [ process ] sax-loop ; inline
143 : (read-xml-chunk) ( stream -- prolog seq )
146 done? [ unclosed ] unless
147 xml-stack get first second
151 : read-xml ( stream -- xml )
152 #! Produces a tree of XML nodes
153 (read-xml-chunk) make-xml-doc ;
155 : read-xml-chunk ( stream -- seq )
156 (read-xml-chunk) nip ;
158 : string>xml ( string -- xml )
159 <string-reader> read-xml ;
161 : string>xml-chunk ( string -- xml )
162 <string-reader> read-xml-chunk ;
164 : file>xml ( filename -- xml )
165 ! Autodetect encoding!
166 utf8 <file-reader> read-xml ;
168 : xml-reprint ( string -- )
169 string>xml print-xml ;