1 ! Copyright (C) 2005, 2006 Daniel Ehrenberg
\r
2 ! See http://factorcode.org/license.txt for BSD license.
\r
4 USING: errors hashtables io kernel math namespaces prettyprint
\r
5 sequences tools generic strings char-classes ;
\r
7 ! -- Low-level parsing
\r
8 ! Code stored in stdio
\r
9 ! Spot is composite so it won't be lost in sub-scopes
\r
10 SYMBOL: spot #! { char line column line-str }
\r
11 : get-char ( -- char ) spot get first ;
\r
12 : set-char ( char -- ) 0 spot get set-nth ;
\r
13 : get-line ( -- line ) spot get second ;
\r
14 : set-line ( line -- ) 1 spot get set-nth ;
\r
15 : get-column ( -- column ) spot get third ;
\r
16 : set-column ( column -- ) 2 spot get set-nth ;
\r
17 : get-line-str ( -- line-str ) 3 spot get nth ;
\r
18 : set-line-str ( line-str -- ) 3 spot get set-nth ;
\r
21 ! Record is composite so it changes in nested scopes
\r
22 SYMBOL: record ! string
\r
23 SYMBOL: now-recording? ! t/f
\r
24 : recording? ( -- t/f ) now-recording? get ;
\r
25 : get-record ( -- sbuf ) record get ;
\r
27 : push-record ( ch -- )
\r
30 SBUF" " clone record set
\r
31 t now-recording? set
\r
32 get-char [ push-record ] when* ;
\r
36 : (end-record) ( -- sbuf )
\r
37 f now-recording? set
\r
39 : end-record* ( n -- string )
\r
40 (end-record) tuck length swap -
\r
41 head-slice >string ;
\r
42 : end-record ( -- string )
\r
43 get-record length 0 =
\r
44 [ "" f recording? set ]
\r
45 [ 1 end-record* ] if ;
\r
47 ! -- Error reporting
\r
49 TUPLE: xml-error line column ;
\r
50 C: xml-error ( -- xml-error )
\r
51 [ get-line swap set-xml-error-line ] keep
\r
52 [ get-column swap set-xml-error-column ] keep ;
\r
54 : xml-error. ( xml-error -- )
\r
56 "Line: " write dup xml-error-line .
\r
57 "Column: " write xml-error-column . ;
\r
59 TUPLE: expected should-be was ;
\r
60 C: expected ( should-be was -- error )
\r
61 [ <xml-error> swap set-delegate ] keep
\r
62 [ set-expected-was ] keep
\r
63 [ set-expected-should-be ] keep ;
\r
67 "Token expected: " write dup expected-should-be print
\r
68 "Token present: " write expected-was print ;
\r
70 TUPLE: no-entity thing ;
\r
71 C: no-entity ( string -- entitiy )
\r
72 [ <xml-error> swap set-delegate ] keep
\r
73 [ set-no-entity-thing ] keep ;
\r
77 "Entity does not exist: &" write no-entity-thing write ";" print ;
\r
79 TUPLE: xml-string-error string ;
\r
80 C: xml-string-error ( string -- xml-string-error )
\r
81 [ set-xml-string-error-string ] keep
\r
82 [ <xml-error> swap set-delegate ] keep ;
\r
84 M: xml-string-error error.
\r
86 xml-string-error-string print ;
\r
88 ! -- Basic utility words
\r
90 : next-line ( -- string )
\r
91 ! read a non-blank line
\r
92 readln dup "" = [ drop next-line ] when ;
\r
94 : (next) ( -- char )
\r
95 get-column get-line-str 2dup length 1- < [
\r
96 >r 1+ dup set-column r> nth
\r
99 next-line dup set-line-str
\r
100 [ first ] [ f ] if*
\r
101 get-line 1+ set-line
\r
107 "XML document unexpectedly ended"
\r
108 <xml-string-error> throw
\r
110 (next) dup set-char
\r
111 recording? over and [ push-record ] [ drop ] if ;
\r
113 : skip-until ( quot -- )
\r
116 [ call ] keep swap [ drop ] [
\r
119 ] [ 2drop ] if ; inline
\r
121 : take-until ( quot -- string | quot: -- ? )
\r
122 #! Take the substring of a string starting at spot
\r
123 #! from code until the quotation given is true and
\r
124 #! advance spot to after the substring.
\r
125 new-record skip-until end-record ; inline
\r
127 : take-char ( ch -- string )
\r
128 [ dup get-char = ] take-until nip ;
\r
130 : pass-blank ( -- )
\r
131 #! Advance code past any whitespace, including newlines
\r
132 [ get-char blank? not ] skip-until ;
\r
134 : string-matches? ( string -- ? )
\r
135 dup length get-column tuck +
\r
136 dup get-line-str length <=
\r
137 [ get-line-str <slice> sequence= ]
\r
140 : take-string ( match -- string )
\r
141 ! match must not contain a newline
\r
142 [ dup string-matches? ] take-until
\r
144 [ "Missing closing token" <xml-string-error> throw ] unless
\r
145 swap length [ next ] times ;
\r
147 ! -- Parsing strings
\r
150 get-char 2dup = [ 2drop ] [
\r
151 >r ch>string r> ch>string <expected> throw
\r
154 : expect-string* ( num -- )
\r
155 #! only skips string, and only for when you're sure the string is there
\r
158 : expect-string ( string -- )
\r
159 ! TODO: add error if this isn't long enough
\r
160 new-record dup length [ next ] times
\r
161 end-record 2dup = [ 2drop ]
\r
162 [ <expected> throw ] if ;
\r
164 TUPLE: prolog version encoding standalone ; ! part of xml-doc, see parser
\r
167 #! We have both directions here as a shortcut.
\r
176 { CHAR: & "&" }
\r
177 { CHAR: ' "'" }
\r
178 { CHAR: " """ }
\r
181 TUPLE: entity name ;
\r
183 : (parse-entity) ( string -- )
\r
184 dup entities hash [ push-record ] [
\r
185 prolog-data get prolog-standalone
\r
186 [ <no-entity> throw ] [
\r
187 end-record , <entity> , next new-record
\r
191 : parse-entity ( -- )
\r
192 next unrecord unrecord
\r
193 ! the following line is in a scope to shield this
\r
194 ! word from the record-altering side effects of
\r
196 [ CHAR: ; take-char ] with-scope
\r
198 "x" ?head 16 10 ? base>
\r
200 ] [ (parse-entity) ] if ;
\r
202 TUPLE: reference name ;
\r
204 : parse-reference ( -- )
\r
205 next unrecord end-record , CHAR: ; take-char
\r
206 <reference> , next new-record ;
\r
208 : (parse-char) ( ch -- )
\r
211 [ 2drop 0 end-record* , ] }
\r
213 [ 2drop end-record , next ] }
\r
214 { [ dup CHAR: & = ]
\r
215 [ drop parse-entity (parse-char) ] }
\r
216 { [ CHAR: % = ] [ parse-reference (parse-char) ] }
\r
217 { [ t ] [ next (parse-char) ] }
\r
220 : parse-char ( ch -- array )
\r
221 [ new-record (parse-char) ] { } make ;
\r
223 : parse-quot ( ch -- array )
\r
224 parse-char get-char
\r
225 [ "XML file ends in a quote" <xml-string-error> throw ] unless ;
\r
227 : parse-text ( -- array )
\r
228 CHAR: < parse-char ;
\r
232 TUPLE: name space tag url ;
\r
234 : version=1.0? ( -- ? )
\r
235 prolog-data get prolog-version "1.0" = ;
\r
237 ! version=1.0? is calculated once and passed around for efficiency
\r
238 : name-start-char? ( 1.0? char -- ? )
\r
239 swap [ 1.0name-start-char? ] [ 1.1name-start-char? ] if ;
\r
241 : name-char? ( 1.0? char -- ? )
\r
242 swap [ 1.0name-char? ] [ 1.1name-char? ] if ;
\r
244 : (parse-name) ( -- str )
\r
246 new-record get-char name-start-char? [
\r
247 [ dup get-char name-char? not ] skip-until
\r
250 "Malformed name" <xml-string-error> throw
\r
253 : parse-name ( -- name )
\r
254 (parse-name) get-char CHAR: : =
\r
255 [ next (parse-name) ] [ "" swap ] if f <name> ;
\r
257 : ?= ( object/f object/f -- ? )
\r
258 2dup and [ = ] [ 2drop t ] if ;
\r
260 : names-match? ( name1 name2 -- ? )
\r
261 [ name-space swap name-space ?= ] 2keep
\r
262 [ name-url swap name-url ?= ] 2keep
\r
263 name-tag swap name-tag ?= and and ;
\r