1 ! Copyright (C) 2005, 2009 Daniel Ehrenberg
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: namespaces xml.state kernel sequences accessors
4 xml.char-classes xml.errors math io sbufs fry strings ascii
5 circular xml.entities assocs splitting math.parser
6 locals combinators arrays hints ;
9 ! * Basic utility words
11 : assure-good-char ( spot ch -- )
14 [ version-1.0?>> over text? not ]
16 spot get [ 1 + ] change-column drop
21 HINTS: assure-good-char { spot fixnum } ;
23 : record ( spot char -- spot )
26 [ [ 1 + ] change-line -1 ] [ dup column>> 1 + ] if
30 HINTS: record { spot fixnum } ;
32 :: (next) ( spot -- spot char )
33 spot next>> :> old-next
34 spot stream>> stream-read1 :> new-next
38 [ spot stream>> stream-read1 >>next ]
39 [ new-next >>next ] if
40 ] [ spot old-next >>char new-next >>next ] if
44 dup char>> [ unexpected-end ] unless
45 (next) [ record ] keep assure-good-char ;
47 HINTS: next* { spot } ;
54 input-stream get >>stream
58 : with-state ( stream quot -- )
59 ! with-input-stream implicitly creates a new scope which we use
60 swap [ init-parser call ] with-input-stream ; inline
62 :: (skip-until) ( quot: ( -- ? ) spot -- )
65 spot next* quot spot (skip-until)
67 ] when ; inline recursive
69 : skip-until ( quot: ( -- ? ) -- )
70 spot get (skip-until) ; inline
72 : take-until ( quot -- string )
73 #! Take the substring of a string starting at spot
74 #! from code until the quotation given is true and
75 #! advance spot to after the substring.
78 '[ @ [ t ] [ _ char>> _ push f ] if ] skip-until
79 ] keep >string ; inline
81 : take-to ( seq -- string )
82 spot get swap '[ _ char>> _ member? ] take-until ;
85 #! Advance code past any whitespace, including newlines
86 spot get '[ _ char>> blank? not ] skip-until ;
88 : string-matches? ( string circular spot -- ? )
89 char>> over push-circular sequence= ;
91 : take-string ( match -- string )
92 dup length <circular-string>
93 spot get '[ 2dup _ string-matches? ] take-until nip
94 dup length rot length 1 - - head
95 get-char [ missing-close ] unless next ;
97 : expect ( string -- )
98 dup spot get '[ _ [ char>> ] keep next* ] replicate
99 2dup = [ 2drop ] [ expected ] if ;
101 ! Suddenly XML-specific
103 : parse-named-entity ( accum string -- )
104 dup entities at [ swap push ] [
105 dup extra-entities get at
106 [ swap push-all ] [ no-entity ] ?if
109 : take-; ( -- string )
110 next ";" take-to next ;
112 : parse-entity ( accum -- )
114 "x" ?head 16 10 ? base> swap push
115 ] [ parse-named-entity ] if ;
117 : parse-pe ( accum -- )
118 take-; dup pe-table get at
119 [ swap push-all ] [ no-entity ] ?if ;
121 :: (parse-char) ( quot: ( ch -- ? ) accum spot -- )
125 { [ char quot call ] [ spot next* ] }
126 { [ char CHAR: & = ] [
128 quot accum spot (parse-char)
130 { [ in-dtd? get char CHAR: % = and ] [
132 quot accum spot (parse-char)
137 quot accum spot (parse-char)
139 } cond ; inline recursive
141 : parse-char ( quot: ( ch -- ? ) -- seq )
142 1024 <sbuf> [ spot get (parse-char) ] keep >string ; inline
144 : assure-no-]]> ( circular -- )
145 "]]>" sequence= [ text-w/]]> ] when ;
147 :: parse-text ( -- string )
148 3 f <array> <circular> :> circ
149 depth get zero? :> no-text [| char |
150 char circ push-circular
152 no-text [ char blank? char CHAR: < = or [
153 char 1string t pre/post-content
159 pass-blank ">" expect ;
161 : normalize-quote ( str -- str )
162 [ dup "\t\r\n" member? [ drop CHAR: \s ] when ] map ;
164 : (parse-quote) ( <-disallowed? ch -- string )
167 [ CHAR: < = _ and [ attr-w/< ] [ f ] if ] if
168 ] parse-char normalize-quote get-char
169 [ unclosed-quote ] unless ; inline
171 : parse-quote* ( <-disallowed? -- seq )
172 pass-blank get-char dup "'\"" member?
173 [ next (parse-quote) ] [ quoteless-attr ] if ; inline
175 : parse-quote ( -- seq )