1 ! Copyright (C) 2021 Doug Coleman.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors arrays assocs combinators
4 combinators.short-circuit kernel make math modern modern.slices
5 sequences sequences.extras shuffle combinators.extras splitting
9 TUPLE: tag open name props close children ;
11 TUPLE: processing-instruction open target props close ;
12 : <processing-instruction> ( open target props close -- processing-instruction )
13 processing-instruction new
19 TUPLE: embedded-language open payload close ;
20 : <embedded-language> ( open payload close -- embedded-language )
26 TUPLE: doctype open close values ;
27 : <doctype> ( open values close -- doctype )
33 TUPLE: comment open payload close ;
34 : <comment> ( open payload close -- comment )
40 TUPLE: close-tag name ;
41 : <close-tag> ( name -- tag )
43 swap >string rest rest but-last >>name ;
45 TUPLE: open-tag < tag close-tag ;
46 : <open-tag> ( open name props close -- tag )
52 V{ } clone >>children ;
54 TUPLE: self-close-tag < tag ;
55 : <self-close-tag> ( open name props close -- tag )
61 V{ } clone >>children ;
63 TUPLE: squote payload ;
65 TUPLE: dquote payload ;
68 : advance-squote-payload ( n string -- n' string )
70 { CHAR: \\ CHAR: ' } slice-til-separator-inclusive {
71 { f [ to>> over string-expected-got-eof ] }
73 { CHAR: \\ [ drop take-char drop advance-squote-payload ] }
76 string-expected-got-eof
79 :: read-string ( $n $string $char -- n' string payload )
80 $n $string $char CHAR: ' =
81 [ advance-squote-payload ]
82 [ advance-dquote-payload ] if drop :> $n'
84 $n $n' 1 - $string <slice> ;
86 : take-tag-name ( n string -- n' string tag )
87 [ "\s\r\n/>" member? ] slice-until ;
89 : read-value ( n string -- n' string value )
90 skip-whitespace take-char {
91 { CHAR: ' [ CHAR: ' read-string >string <squote> ] }
92 { CHAR: " [ CHAR: " read-string >string <dquote> ] }
93 { CHAR: [ [ "[" throw ] }
94 { CHAR: { [ "{" throw ] }
95 [ [ take-tag-name ] dip prefix ]
98 : read-prop ( n string -- n' string prop/f closing/f )
99 skip-whitespace "\s\n\r\t\"'<=/>?" slice-til-either {
100 { CHAR: < [ "< error" throw ] }
101 { CHAR: = [ 1 split-slice-back drop >string [ read-value ] dip swap 2array f ] }
102 { CHAR: / [ ">" expect-and-span 2 split-slice-back [ >string f like ] bi@ ] }
103 { CHAR: > [ 1 split-slice-back [ >string f like ] bi@ ] }
104 { CHAR: ' [ first read-string >string <squote> f ] }
105 { CHAR: " [ first read-string >string <dquote> f ] }
106 { CHAR: ? [ ">" expect-and-span >string f swap ] }
107 { CHAR: \s [ >string f ] }
108 { CHAR: \r [ >string f ] }
109 { CHAR: \n [ >string f ] }
110 { CHAR: \t [ >string f ] }
111 { f [ "efff" throw ] }
114 : read-props* ( n string props -- n' string props closing )
115 [ read-prop ] dip-2up [
116 [ [ over push ] when* ] dip
118 [ over push ] when* read-props*
119 ] if* ; inline recursive
121 : read-props ( n string -- n' string props closing )
122 V{ } clone read-props* ;
124 : read-processing-instruction ( n string opening -- n string processing-instruction )
125 "?" expect-and-span >string
126 [ take-tag-name >string ] dip-1up
127 [ read-props ] 2dip-2up
128 <processing-instruction> ;
130 : read-doctype ( n string opening -- n string doctype/comment )
132 2over 2 peek-from "--" sequence= [
133 "--" expect-and-span >string
134 [ "-->" slice-til-string [ >string ] bi@ ] dip-2up <comment>
136 "DOCTYPE" expect-and-span-insensitive
137 [ read-props ] dip-2up
141 : read-embedded-language ( n string opening -- n string embedded-language )
142 "%" expect-and-span >string
143 [ take-tag-name >string ] dip-1up append
144 [ "%>" slice-til-string [ >string ] bi@ ] dip-2up
145 <embedded-language> ;
147 : read-open-tag ( n string opening -- n' string tag )
148 [ take-tag-name ] dip-1up
149 [ read-props ] 2dip-2up
156 : read-close-tag ( n string opening -- n' string tag )
158 [ take-tag-name ] dip span-slices
162 : unclosed-open-tag? ( obj -- ? )
163 { [ open-tag? ] [ close-tag>> not ] } 1&& ; inline
165 ERROR: unmatched-open-tags-error stack seq ;
166 : check-tag-stack ( stack -- stack )
167 dup [ unclosed-open-tag? ] filter
168 [ unmatched-open-tags-error ] unless-empty ;
170 ERROR: unmatched-closing-tag-error stack tag ;
171 :: find-last-open-tag ( stack name -- seq )
172 stack [ { [ unclosed-open-tag? ] [ name>> name = ] } 1&& ] find-last drop [
175 stack name unmatched-closing-tag-error
178 : lex-html ( stack n string -- stack n' string )
179 skip-whitespace "<" slice-til-either {
181 1 split-slice-back [ >string f like [ reach push ] when* ] dip
182 [ 2dup peek1-from ] dip
185 read-close-tag reach over name>> find-last-open-tag unclip
186 swap check-tag-stack >>children
189 { CHAR: ! [ read-doctype ] }
190 { CHAR: ? [ read-processing-instruction ] }
191 { CHAR: % [ read-embedded-language ] }
192 [ drop read-open-tag ]
197 } case [ reach push lex-html ] when* ;
199 : string>html ( string -- sequence )
200 [ V{ } clone 0 ] dip lex-html 2drop check-tag-stack ;
202 GENERIC: write-html ( tag -- )
204 : >value ( obj -- string )
206 { [ dup squote? ] [ payload>> "'" 1surround ] }
207 { [ dup dquote? ] [ payload>> "\"" 1surround ] }
211 M: doctype write-html
213 [ values>> [ >value ] map join-words [ " " % % ] unless-empty ]
216 : write-props ( seq -- )
217 [ dup array? [ first2 >value "=" glue ] [ >value ] if ] map join-words [ " " % % ] unless-empty ;
219 M: processing-instruction write-html
223 [ props>> write-props ]
227 M: open-tag write-html
231 [ props>> write-props ]
233 [ children>> [ write-html ] each ]
234 [ close-tag>> name>> "</" ">" surround % ]
237 M: self-close-tag write-html
241 [ props>> write-props ]
245 M: comment write-html
250 M: string write-html % ;
252 : html>string ( sequence -- string )
253 [ [ write-html ] each ] "" make ;
255 GENERIC#: walk-html 1 ( seq/tag quot -- )
257 M: sequence walk-html [ walk-html ] curry each ;
258 M: string walk-html call( obj -- ) ;
259 M: doctype walk-html call( obj -- ) ;
260 M: processing-instruction walk-html call( obj -- ) ;
261 M: embedded-language walk-html call( obj -- ) ;
262 M: open-tag walk-html [ call( obj -- ) ] 2keep [ children>> ] dip [ walk-html ] curry each ;
263 M: self-close-tag walk-html [ call( obj -- ) ] 2keep [ children>> ] dip [ walk-html ] curry each ;
264 M: comment walk-html call( obj -- ) ;
266 : find-links ( seq -- links )
270 props>> [ drop >lower "href" = ] assoc-find
271 [ nip , ] [ 2drop ] if
274 ] { } make [ payload>> ] map ;