1 ! Copyright (C) 2005, 2009 Daniel Ehrenberg
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: kernel namespaces xml.tokenize xml.state xml.name
4 xml.data accessors arrays make xml.char-classes fry assocs sequences
5 math xml.errors sets combinators io.encodings io.encodings.iana
6 unicode.case xml.dtd strings ;
10 parse-name pass-blank CHAR: = expect pass-blank
11 t parse-quote* 2array , ;
13 : start-tag ( -- name ? )
14 #! Outputs the name and whether this is a closing tag
15 get-char CHAR: / = dup [ next ] when
19 pass-blank version=1.0? get-char name-start?
20 [ parse-attr (middle-tag) ] when ;
22 : assure-no-duplicates ( attrs-alist -- attrs-alist )
23 H{ } clone 2dup '[ swap _ push-at ] assoc-each
24 [ nip length 2 >= ] assoc-filter >alist
25 [ first first2 duplicate-attr ] unless-empty ;
27 : middle-tag ( -- attrs-alist )
28 ! f make will make a vector if it has any elements
29 [ (middle-tag) ] f make pass-blank
30 assure-no-duplicates ;
32 : end-tag ( name attrs-alist -- tag )
33 tag-ns pass-blank get-char CHAR: / =
34 [ pop-ns <contained> next CHAR: > expect ]
35 [ depth inc <opener> close ] if ;
37 : take-comment ( -- comment )
43 : assure-no-extra ( seq -- )
45 T{ name f "" "version" f }
46 T{ name f "" "encoding" f }
47 T{ name f "" "standalone" f }
49 [ extra-attrs ] unless-empty ;
51 : good-version ( version -- version )
52 dup { "1.0" "1.1" } member? [ bad-version ] unless ;
54 : prolog-version ( alist -- version )
55 T{ name f "" "version" f } swap at
56 [ good-version ] [ versionless-prolog ] if* ;
58 : prolog-encoding ( alist -- encoding )
59 T{ name f "" "encoding" f } swap at "UTF-8" or ;
61 : yes/no>bool ( string -- t/f )
68 : prolog-standalone ( alist -- version )
69 T{ name f "" "standalone" f } swap at
70 [ yes/no>bool ] [ f ] if* ;
72 : prolog-attrs ( alist -- prolog )
79 : decode-input-if ( encoding -- )
80 string-input? get [ drop ] [ decode-input ] if ;
82 : parse-prolog ( -- prolog )
83 pass-blank middle-tag "?>" expect-string
84 dup assure-no-extra prolog-attrs
85 dup encoding>> dup "UTF-16" =
86 [ drop ] [ name>encoding [ decode-input-if ] when* ] if
89 : instruct ( -- instruction )
91 { [ dup "xml" = ] [ drop parse-prolog ] }
92 { [ dup >lower "xml" = ] [ capitalized-prolog ] }
93 { [ dup valid-name? not ] [ bad-name ] }
94 [ "?>" take-string append <instruction> ]
97 : take-cdata ( -- string )
98 depth get zero? [ bad-cdata ] when
99 "[CDATA[" expect-string "]]>" take-string ;
101 DEFER: make-tag ! Is this unavoidable?
103 : expand-pe ( -- ) ; ! Make this run the contents of the pe within a DOCTYPE
105 : (take-internal-subset) ( -- )
106 pass-blank get-char {
108 { CHAR: % [ expand-pe ] }
110 next make-tag dup dtd-acceptable?
111 [ bad-doctype ] unless , (take-internal-subset)
113 [ 1string bad-doctype ]
116 : take-internal-subset ( -- seq )
120 (take-internal-subset)
123 : nontrivial-doctype ( -- external-id internal-subset )
124 pass-blank get-char CHAR: [ = [
125 next take-internal-subset f swap close
127 " >" take-until-one-of {
128 { CHAR: \s [ (take-external-id) ] }
129 { CHAR: > [ only-blanks f ] }
133 : take-doctype-decl ( -- doctype-decl )
134 pass-blank " >" take-until-one-of {
135 { CHAR: \s [ nontrivial-doctype ] }
137 } case <doctype-decl> ;
140 : take-directive ( -- directive )
142 { "ELEMENT" [ take-element-decl ] }
143 { "ATTLIST" [ take-attlist-decl ] }
144 { "DOCTYPE" [ take-doctype-decl ] }
145 { "ENTITY" [ take-entity-decl ] }
146 { "NOTATION" [ take-notation-decl ] }
150 : direct ( -- object )
152 { CHAR: - [ take-comment ] }
153 { CHAR: [ [ take-cdata ] }
154 [ drop take-directive ]
157 : make-tag ( -- tag )
159 { [ get-char dup CHAR: ! = ] [ drop next direct ] }
160 { [ CHAR: ? = ] [ next instruct ] }
162 start-tag [ dup add-ns pop-ns <closer> depth dec close ]
163 [ middle-tag end-tag ] if