1 ! Copyright (C) 2005, 2009 Daniel Ehrenberg
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: namespaces xml.state kernel sequences accessors
4 xml.char-classes xml.errors math io sbufs fry strings ascii
5 circular xml.entities assocs make splitting math.parser
6 locals combinators arrays ;
13 : version=1.0? ( -- ? )
14 prolog-data get [ version>> "1.0" = ] [ t ] if* ;
16 : assure-good-char ( ch -- ch )
18 version=1.0? over text? not get-check and
19 [ disallowed-char ] when
22 ! * Basic utility words
26 [ 0 get-line 1+ set-line ] [ get-column 1+ ] if
29 ! (next) normalizes \r\n and \r
32 2dup swap CHAR: \r = [
34 [ nip read1 ] [ nip CHAR: \n swap ] if
36 set-next dup set-char assure-good-char ;
40 get-char [ unexpected-end ] unless (next) record ;
43 0 1 0 f f <spot> spot set
46 : with-state ( stream quot -- )
47 ! with-input-stream implicitly creates a new scope which we use
48 swap [ init-parser call ] with-input-stream ; inline
50 : skip-until ( quot: ( -- ? ) -- )
52 [ call ] keep swap [ drop ] [
55 ] [ drop ] if ; inline recursive
57 : take-until ( quot -- string )
58 #! Take the substring of a string starting at spot
59 #! from code until the quotation given is true and
60 #! advance spot to after the substring.
62 '[ @ [ t ] [ get-char _ push f ] if ] skip-until
63 ] keep >string ; inline
65 : take-char ( ch -- string )
66 [ dup get-char = ] take-until nip ;
69 #! Advance code past any whitespace, including newlines
70 [ get-char blank? not ] skip-until ;
72 : string-matches? ( string circular -- ? )
73 get-char over push-circular
76 : take-string ( match -- string )
77 dup length <circular-string>
78 [ 2dup string-matches? ] take-until nip
79 dup length rot length 1- - head
80 get-char [ missing-close ] unless next ;
83 get-char 2dup = [ 2drop ] [
84 [ 1string ] bi@ expected
87 : expect-string ( string -- )
88 dup [ get-char next ] replicate 2dup =
89 [ 2drop ] [ expected ] if ;
91 : parse-named-entity ( string -- )
92 dup entities at [ , ] [
93 dup extra-entities get at
94 [ % ] [ no-entity ] ?if
98 next CHAR: ; take-char next
100 "x" ?head 16 10 ? base> ,
101 ] [ parse-named-entity ] if ;
107 next CHAR: ; take-char dup next
108 pe-table get at [ % ] [ no-entity ] ?if ;
110 :: (parse-char) ( quot: ( ch -- ? ) -- )
114 { [ char quot call ] [ next ] }
115 { [ char CHAR: & = ] [ parse-entity quot (parse-char) ] }
116 { [ in-dtd? get char CHAR: % = and ] [ parse-pe quot (parse-char) ] }
117 [ char , next quot (parse-char) ]
118 } cond ; inline recursive
120 : parse-char ( quot: ( ch -- ? ) -- seq )
121 [ (parse-char) ] "" make ; inline
123 : assure-no-]]> ( circular -- )
124 "]]>" sequence= [ text-w/]]> ] when ;
126 :: parse-text ( -- string )
127 3 f <array> <circular> :> circ
128 depth get zero? :> no-text [| char |
129 char circ push-circular
131 no-text [ char blank? char CHAR: < = or [
132 char 1string t pre/post-content
138 pass-blank CHAR: > expect ;
140 : normalize-quote ( str -- str )
141 [ dup "\t\r\n" member? [ drop CHAR: \s ] when ] map ;
143 : (parse-quote) ( <-disallowed? ch -- string )
146 [ CHAR: < = _ and [ attr-w/< ] [ f ] if ] if
147 ] parse-char normalize-quote get-char
148 [ unclosed-quote ] unless ; inline
150 : parse-quote* ( <-disallowed? -- seq )
151 pass-blank get-char dup "'\"" member?
152 [ next (parse-quote) ] [ quoteless-attr ] if ; inline
154 : parse-quote ( -- seq )