1 ! Copyright (C) 2021 Doug Coleman.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors arrays combinators combinators.short-circuit
4 generalizations kernel make math modern modern.slices multiline
5 sequences sequences.extras splitting strings unicode ;
8 TUPLE: tag name open-close-delimiter props children ;
10 TUPLE: doctype open close values ;
11 : <doctype> ( open close values -- doctype )
17 TUPLE: comment open payload close ;
18 : <comment> ( open payload close -- comment )
24 TUPLE: close-tag name ;
25 : <close-tag> ( name -- tag )
27 swap >string rest rest but-last >>name ;
29 TUPLE: open-tag < tag close-tag ;
30 : <open-tag> ( name delimiter props -- tag )
33 swap >string drop ! >>open-close-delimiter
35 V{ } clone >>children ;
37 TUPLE: self-close-tag < tag ;
38 : <self-close-tag> ( name delimiter props -- tag )
41 swap >string drop ! >>open-close-delimiter
43 V{ } clone >>children ;
45 TUPLE: squote payload ;
47 TUPLE: dquote payload ;
50 : read-squote-string-payload ( n string -- n' string )
52 { CHAR: \\ CHAR: ' } slice-til-separator-inclusive {
55 { CHAR: \\ [ drop next-char-from drop read-string-payload ] }
58 string-expected-got-eof
61 : read-dquote-string-payload ( n string -- n' string )
63 { CHAR: \\ CHAR: \" } slice-til-separator-inclusive {
66 { CHAR: \\ [ drop next-char-from drop read-string-payload ] }
69 string-expected-got-eof
72 :: read-string ( n string char -- n' string payload )
73 n string char CHAR: ' = [ read-squote-string-payload ] [ read-dquote-string-payload ] if drop :> n'
75 n' [ n string string-expected-got-eof ] unless
76 n n' 1 - string <slice> ;
78 : take-tag-name ( n string -- n' string tag )
79 [ "\s\r\n/>" member? ] slice-until ;
81 : read-value ( n string -- n' string value )
82 skip-whitespace next-char-from {
83 { CHAR: ' [ CHAR: ' read-string >string <squote> ] }
84 { CHAR: " [ CHAR: " read-string >string <dquote> ] }
85 { CHAR: [ [ "[" throw ] }
86 { CHAR: { [ "{" throw ] }
87 [ [ take-tag-name ] dip prefix ]
90 : read-prop ( n string -- n' string closing/f prop/f )
91 skip-whitespace "\s\n\r\"'<=/>" slice-til-either {
92 { CHAR: < [ "< error" throw ] }
93 { CHAR: = [ 1 split-slice-back drop >string [ read-value ] dip swap 2array f swap ] }
94 { CHAR: / [ ">" expect-and-span 2 split-slice-back swap >string f like ] }
95 { CHAR: > [ 1 split-slice-back swap >string f like ] }
96 { CHAR: ' [ first read-string >string <squote> f swap ] }
97 { CHAR: " [ first read-string >string <dquote> f swap ] }
98 { CHAR: \s [ f swap >string ] }
99 { CHAR: \r [ f swap >string ] }
100 { CHAR: \n [ f swap >string ] }
101 { f [ "efff" throw ] }
104 : read-props ( props n string -- props n' string closing )
106 [ 5 npick push ] when*
107 [ ] [ read-props ] if* ;
109 : read-doctype ( n string opening -- n string doctype/comment )
111 2over 2 peek-from "--" sequence= [
112 "--" expect-and-span >string
113 [ "-->" slice-til-string [ >string ] bi@ ] dip -rot <comment>
115 "DOCTYPE" expect-and-span
116 [ V{ } clone -rot read-props ] dip
117 swap 5 nrot <doctype>
120 : read-open-tag ( n string opening -- n' string tag )
121 [ take-tag-name ] dip drop ! B span-slices
122 [ V{ } clone -rot read-props ] dip
123 swap 5 nrot over ">" sequence= [
129 : read-close-tag ( n string opening -- n' string tag )
131 [ take-tag-name ] dip span-slices
135 :: shorten* ( vector n -- seq )
139 : pop-til-end ( stack quot -- seq/f )
140 [ find-last drop ] keepd swap
141 [ shorten* ] [ drop f ] if* ; inline
143 ERROR: unmatched-open-tags-error stack seq ;
144 : check-tag-stack ( stack -- stack )
146 { [ open-tag? ] [ close-tag>> not ] } 1&&
147 ] filter [ unmatched-open-tags-error ] unless-empty ;
149 ERROR: unmatched-closing-tag-error stack tag ;
150 :: find-last-open-tag ( stack name -- seq )
151 stack [ { [ tag? ] [ name>> name = ] } 1&& ] find-last drop [
154 stack name unmatched-closing-tag-error
157 : lex-html ( stack n string -- stack n' string )
158 skip-whitespace "<" slice-til-either {
160 1 split-slice-back [ >string f like [ reach push ] when* ] dip
161 [ 2dup peek1-from ] dip
164 read-close-tag reach over name>> find-last-open-tag unclip
165 swap check-tag-stack >>children
168 { CHAR: ! [ read-doctype ] }
169 [ drop read-open-tag ]
174 } case [ reach push lex-html ] when* ;
176 : string>html ( string -- sequence )
177 [ V{ } clone 0 ] dip lex-html 2drop check-tag-stack ;
179 GENERIC: write-html ( tag -- )
181 : >value ( obj -- string )
183 { [ dup squote? ] [ payload>> "'" dup surround ] }
184 { [ dup dquote? ] [ payload>> "\"" dup surround ] }
188 M: doctype write-html
190 [ values>> [ >value ] map join-words [ " " % % ] unless-empty ]
194 : write-props ( seq -- )
195 [ dup array? [ first2 >value "=" glue ] [ >value ] if ] map join-words [ " " % % ] unless-empty ;
197 M: open-tag write-html
200 [ props>> write-props ">" % ]
201 [ children>> [ write-html ] each ]
202 [ close-tag>> name>> "</" ">" surround % ]
205 M: self-close-tag write-html
208 [ props>> write-props "/>" % ]
211 M: comment write-html
216 M: string write-html % ;
218 : html>string ( sequence -- string )
219 [ [ write-html ] each ] "" make ;