1 ! Copyright (C) 2005, 2009 Daniel Ehrenberg
2 ! See https://factorcode.org/license.txt for BSD license.
3 USING: accessors ascii assocs combinators
4 combinators.short-circuit hints io kernel math math.parser
5 namespaces sbufs sequences splitting strings xml.char-classes
6 xml.entities xml.errors xml.state ;
9 ! * Basic utility words
11 : assure-good-char ( spot ch -- )
14 [ version-1.0?>> over text? not ]
18 [ [ 1 + ] change-column drop ] dip
23 HINTS: assure-good-char { spot fixnum } ;
25 : record ( spot char -- spot )
28 [ [ 1 + ] change-line -1 ] [ dup column>> 1 + ] if
32 HINTS: record { spot fixnum } ;
34 :: (next) ( spot -- spot char )
35 spot next>> :> old-next
36 spot stream>> stream-read1 :> new-next
37 old-next CHAR: \r eq? [
40 [ spot stream>> stream-read1 >>next ]
41 [ new-next >>next ] if
42 ] [ spot old-next >>char new-next >>next ] if
46 dup char>> [ unexpected-end ] unless
47 (next) [ record ] keep assure-good-char ;
49 HINTS: next* { spot } ;
56 input-stream get >>stream
60 : with-state ( stream quot -- )
61 ! with-input-stream implicitly creates a new scope which we use
62 swap [ init-parser call ] with-input-stream ; inline
64 :: (skip-until) ( ... quot: ( ... char -- ... ? ) spot -- ... )
67 spot next* quot spot (skip-until)
69 ] when* ; inline recursive
71 : skip-until ( ... quot: ( ... char -- ... ? ) -- ... )
72 spot get (skip-until) ; inline
74 : take-until ( ... quot: ( ... char -- ... ? ) -- ... string )
75 ! Take the substring of a string starting at spot
76 ! from code until the quotation given is true and
77 ! advance spot to after the substring.
79 '[ _ keep over [ drop ] [ _ push ] if ] skip-until
80 ] keep "" like ; inline
82 : take-to ( seq -- string )
83 '[ _ member? ] take-until ; inline
86 ! Advance code past any whitespace, including newlines
87 [ blank? not ] skip-until ;
89 : next-matching ( pos ch str -- pos' )
90 overd nth eq? [ 1 + ] [ drop 0 ] if ; inline
92 : string-matcher ( str -- quot: ( pos char -- pos ? ) )
93 dup length 1 - '[ _ next-matching dup _ > ] ; inline
95 :: (take-string) ( match spot -- sbuf matched? )
100 dup match tail? dup not
104 : take-string ( match -- string )
105 [ spot get (take-string) [ missing-close ] unless ]
106 [ dupd 2length - over shorten "" like ] bi ;
108 : expect ( string -- )
109 dup length spot get '[ _ [ char>> ] keep next* ] "" replicate-as
110 2dup = [ 2drop ] [ expected ] if ;
112 ! Suddenly XML-specific
114 : parse-named-entity ( accum string -- )
118 [ extra-entities get at ]
119 [ swap push-all ] [ no-entity ] ?if
122 : take-; ( -- string )
123 next ";" take-to next ;
125 : parse-entity ( accum -- )
127 "x" ?head 16 10 ? base> swap push
128 ] [ parse-named-entity ] if ;
130 : parse-pe ( accum -- )
133 [ swap push-all ] [ no-entity ] ?if ;
135 :: (parse-char) ( quot: ( ch -- ? ) accum spot -- )
139 { [ char quot call ] [ spot next* ] }
140 { [ char CHAR: & eq? ] [
142 quot accum spot (parse-char)
144 { [ char CHAR: % eq? [ in-dtd? get ] [ f ] if ] [
146 quot accum spot (parse-char)
151 quot accum spot (parse-char)
153 } cond ; inline recursive
155 : parse-char ( quot: ( ch -- ? ) -- seq )
156 512 <sbuf> [ spot get (parse-char) ] keep "" like ; inline
158 : assure-no-]]> ( pos char -- pos' )
159 "]]>" next-matching dup 2 > [ text-w/]]> ] when ; inline
161 :: parse-text ( -- string )
162 depth get zero? :> no-text
165 pos char assure-no-]]> pos!
167 char blank? char CHAR: < eq? or [
168 char 1string t pre/post-content
175 pass-blank ">" expect ;
177 : normalize-quote ( str -- str )
178 [ dup "\t\r\n" member? [ drop CHAR: \s ] when ] map! ;
180 : (parse-quote) ( <-disallowed? ch -- string )
183 [ CHAR: < eq? _ and [ attr-w/< ] [ f ] if ] if
184 ] parse-char normalize-quote get-char
185 [ unclosed-quote ] unless ; inline
187 : parse-quote* ( <-disallowed? -- seq )
188 pass-blank get-char dup "'\"" member?
189 [ next (parse-quote) ] [ quoteless-attr ] if ; inline
191 : parse-quote ( -- seq )