1 ! Copyright (C) 2005, 2006 Daniel Ehrenberg
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: io io.streams.string io.files kernel math namespaces
4 prettyprint sequences arrays generic strings vectors
5 xml.char-classes xml.data xml.errors xml.tokenize xml.writer
6 xml.utilities state-parser assocs ;
9 ! -- Overall parser with data tree
11 ! A stack of { tag children } pairs
14 : <unclosed> ( -- unclosed )
15 xml-stack get 1 tail-slice [ first opener-name ] map
16 { set-unclosed-tags } unclosed construct ;
18 : add-child ( object -- )
19 xml-stack get peek second push ;
21 : push-xml ( object -- )
22 V{ } clone 2array xml-stack get push ;
24 : pop-xml ( -- object )
27 GENERIC: process ( object -- )
29 M: object process add-child ;
32 xml-stack get V{ { f V{ "" } } } =
33 [ <bad-prolog> throw ] unless drop ;
35 M: instruction process
36 xml-stack get length 1 =
37 [ <bad-instruction> throw ] unless
41 xml-stack get dup length 1 =
42 swap first second [ tag? ] contains? not and
43 [ <bad-directive> throw ] unless
47 [ contained-name ] keep contained-attrs
48 <contained-tag> add-child ;
50 M: opener process push-xml ;
52 : check-closer ( name opener -- name opener )
53 dup [ <unopened> throw ] unless
55 [ opener-name swap <mismatched> throw ] unless ;
58 closer-name pop-xml first2
59 >r check-closer opener-attrs r>
62 : init-xml-stack ( -- )
63 V{ } clone xml-stack set f push-xml ;
65 : default-prolog ( -- prolog )
66 "1.0" "iso-8859-1" f <prolog> ;
69 default-prolog prolog-data set ;
72 reset-prolog init-xml-stack init-ns-stack ;
74 : assert-blanks ( seq pre? -- )
75 swap [ string? ] subset
78 [ drop ] [ swap <pre/post-content> throw ] if
81 : no-pre/post ( pre post -- pre post/* )
82 ! this does *not* affect the contents of the stack
83 >r dup t assert-blanks r>
86 : no-post-tags ( post -- post/* )
87 ! this does *not* affect the contents of the stack
88 dup [ tag? ] contains? [ <multitags> throw ] when ;
90 : assure-tags ( seq -- seq )
91 ! this does *not* affect the contents of the stack
92 [ <notags> throw ] unless* ;
94 : make-xml-doc ( prolog seq -- xml-doc )
96 >r assure-tags swap cut 1 tail
97 no-pre/post no-post-tags
104 TUPLE: pull-xml scope ;
105 : <pull-xml> ( -- pull-xml )
107 stdio [ ] change ! bring stdio var in this scope
108 init-parser reset-prolog init-ns-stack
111 { set-pull-xml-scope } pull-xml construct ;
113 : pull-event ( pull -- xml-event/f )
115 text-now? get [ parse-text f ] [
116 get-char [ make-tag t ] [ f f ] if
121 xml-stack get length 1 = ;
123 : (pull-elem) ( pull -- xml-elem/f )
124 dup pull-event dup closer? done? and [ nip ] [
126 [ drop xml-stack get first second ]
130 : pull-elem ( pull -- xml-elem/f )
131 [ init-xml-stack (pull-elem) ] with-scope ;
133 : call-under ( quot object -- quot )
134 swap dup slip ; inline
136 : sax-loop ( quot -- ) ! quot: xml-elem --
137 parse-text call-under
138 get-char [ make-tag call-under sax-loop ]
141 : sax ( stream quot -- ) ! quot: xml-elem --
143 reset-prolog init-ns-stack
144 prolog-data get call-under
146 ] state-parse ; inline
149 [ process ] sax-loop ; inline
151 : (xml-chunk) ( stream -- prolog seq )
154 done? [ <unclosed> throw ] unless
155 xml-stack get first second
159 : read-xml ( stream -- xml )
160 #! Produces a tree of XML nodes
161 (xml-chunk) make-xml-doc ;
163 : xml-chunk ( stream -- seq )
166 : string>xml ( string -- xml )
167 <string-reader> read-xml ;
169 : file>xml ( filename -- xml )
170 <file-reader> read-xml ;
172 : xml-reprint ( string -- )
173 string>xml print-xml ;