1 ! Copyright (C) 2020 Doug Coleman.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors arrays assocs combinators
4 combinators.short-circuit io io.encodings.utf8 io.files
5 json.reader kernel math math.order memoize modern.slices
6 prettyprint sequences sequences.extras strings suffix-arrays
10 : 1sbuf ( ch -- sbuf ) [ SBUF" " clone ] dip over push ; inline
11 : ?1sbuf ( ch -- sbuf ) [ SBUF" " clone ] dip [ over push ] when* ; inline
13 ! https://html.spec.whatwg.org/multipage/parsing.html#tokenization
15 ! https://infra.spec.whatwg.org/#namespaces
16 CONSTANT: html-namespace "http://www.w3.org/1999/xhtml"
17 CONSTANT: mathml-namespace "http://www.w3.org/1998/Math/MathML"
18 CONSTANT: svg-namespace "http://www.w3.org/2000/svg"
19 CONSTANT: xlink-namespace "http://www.w3.org/1999/xlink"
20 CONSTANT: xml-namespace "http://www.w3.org/XML/1998/namespace"
21 CONSTANT: xmlns-namespace "http://www.w3.org/2000/xmlns/"
28 DEFER: (rawtext-state)
29 DEFER: script-data-state
30 DEFER: (script-data-state)
31 DEFER: plaintext-state
32 DEFER: (plaintext-state)
34 DEFER: (tag-open-state)
35 DEFER: end-tag-open-state
36 DEFER: (end-tag-open-state)
38 DEFER: (tag-name-state)
39 DEFER: rcdata-less-than-sign-state
40 DEFER: (rcdata-less-than-sign-state)
41 DEFER: rcdata-end-tag-open-state
42 DEFER: (rcdata-end-tag-open-state)
43 DEFER: rcdata-end-tag-name-state
44 DEFER: (rcdata-end-tag-name-state)
45 DEFER: rawtext-less-than-sign-state
46 DEFER: (rawtext-less-than-sign-state)
47 DEFER: rawtext-end-tag-open-state
48 DEFER: (rawtext-end-tag-open-state)
49 DEFER: rawtext-end-tag-name-state
50 DEFER: (rawtext-end-tag-name-state)
51 DEFER: script-data-less-than-sign-state
52 DEFER: (script-data-less-than-sign-state)
53 DEFER: script-data-end-tag-open-state
54 DEFER: (script-data-end-tag-open-state)
55 DEFER: script-data-end-tag-name-state
56 DEFER: (script-data-end-tag-name-state)
57 DEFER: script-data-escape-start-state
58 DEFER: (script-data-escape-start-state)
59 DEFER: script-data-escape-start-dash-state
60 DEFER: (script-data-escape-start-dash-state)
61 DEFER: script-data-escaped-state
62 DEFER: (script-data-escaped-state)
63 DEFER: script-data-escaped-dash-state
64 DEFER: (script-data-escaped-dash-state)
65 DEFER: script-data-escaped-dash-dash-state
66 DEFER: (script-data-escaped-dash-dash-state)
67 DEFER: script-data-escaped-less-than-sign-state
68 DEFER: (script-data-escaped-less-than-sign-state)
69 DEFER: script-data-escaped-end-tag-open-state
70 DEFER: (script-data-escaped-end-tag-open-state)
71 DEFER: script-data-escaped-end-tag-name-state
72 DEFER: (script-data-escaped-end-tag-name-state)
73 DEFER: script-data-double-escape-start-state
74 DEFER: (script-data-double-escape-start-state)
75 DEFER: script-data-double-escaped-state
76 DEFER: (script-data-double-escaped-state)
77 DEFER: script-data-double-escaped-dash-state
78 DEFER: (script-data-double-escaped-dash-state)
79 DEFER: script-data-double-escaped-dash-dash-state
80 DEFER: (script-data-double-escaped-dash-dash-state)
81 DEFER: script-data-double-escaped-less-than-sign-state
82 DEFER: (script-data-double-escaped-less-than-sign-state)
83 DEFER: script-data-double-escape-end-state
84 DEFER: (script-data-double-escape-end-state)
85 DEFER: before-attribute-name-state
86 DEFER: (before-attribute-name-state)
87 DEFER: attribute-name-state
88 DEFER: (attribute-name-state)
89 DEFER: after-attribute-name-state
90 DEFER: (after-attribute-name-state)
91 DEFER: before-attribute-value-state
92 DEFER: (before-attribute-value-state)
93 DEFER: attribute-value-double-quoted-state
94 DEFER: (attribute-value-double-quoted-state)
95 DEFER: attribute-value-single-quoted-state
96 DEFER: (attribute-value-single-quoted-state)
97 DEFER: attribute-value-unquoted-state
98 DEFER: (attribute-value-unquoted-state)
99 DEFER: after-attribute-value-quoted-state
100 DEFER: (after-attribute-value-quoted-state)
101 DEFER: self-closing-start-tag-state
102 DEFER: (self-closing-start-tag-state)
103 DEFER: bogus-comment-state
104 DEFER: (bogus-comment-state)
105 DEFER: markup-declaration-open-state
106 DEFER: (markup-declaration-open-state)
107 DEFER: comment-start-state
108 DEFER: (comment-start-state)
109 DEFER: comment-start-dash-state
110 DEFER: (comment-start-dash-state)
112 DEFER: (comment-state)
113 DEFER: comment-less-than-sign-state
114 DEFER: (comment-less-than-sign-state)
115 DEFER: comment-less-than-sign-bang-state
116 DEFER: (comment-less-than-sign-bang-state)
117 DEFER: comment-less-than-sign-bang-dash-state
118 DEFER: (comment-less-than-sign-bang-dash-state)
119 DEFER: comment-less-than-sign-bang-dash-dash-state
120 DEFER: (comment-less-than-sign-bang-dash-dash-state)
121 DEFER: comment-end-dash-state
122 DEFER: (comment-end-dash-state)
123 DEFER: comment-end-state
124 DEFER: (comment-end-state)
125 DEFER: comment-end-bang-state
126 DEFER: (comment-end-bang-state)
128 DEFER: (doctype-state)
129 DEFER: before-doctype-name-state
130 DEFER: (before-doctype-name-state)
131 DEFER: doctype-name-state
132 DEFER: (doctype-name-state)
133 DEFER: after-doctype-name-state
134 DEFER: (after-doctype-name-state)
135 DEFER: after-doctype-public-keyword-state
136 DEFER: (after-doctype-public-keyword-state)
137 DEFER: before-doctype-public-identifier-state
138 DEFER: (before-doctype-public-identifier-state)
139 DEFER: doctype-public-identifier-double-quoted-state
140 DEFER: (doctype-public-identifier-double-quoted-state)
141 DEFER: doctype-public-identifier-single-quoted-state
142 DEFER: (doctype-public-identifier-single-quoted-state)
143 DEFER: after-doctype-public-identifier-state
144 DEFER: (after-doctype-public-identifier-state)
145 DEFER: between-doctype-public-and-system-identifiers-state
146 DEFER: (between-doctype-public-and-system-identifiers-state)
147 DEFER: after-doctype-system-keyword-state
148 DEFER: (after-doctype-system-keyword-state)
149 DEFER: before-doctype-system-identifier-state
150 DEFER: (before-doctype-system-identifier-state)
151 DEFER: doctype-system-identifier-double-quoted-state
152 DEFER: (doctype-system-identifier-double-quoted-state)
153 DEFER: doctype-system-identifier-single-quoted-state
154 DEFER: (doctype-system-identifier-single-quoted-state)
155 DEFER: after-doctype-system-identifier-state
156 DEFER: (after-doctype-system-identifier-state)
157 DEFER: bogus-doctype-state
158 DEFER: (bogus-doctype-state)
159 DEFER: cdata-section-state
160 DEFER: (cdata-section-state)
161 DEFER: cdata-section-bracket-state
162 DEFER: (cdata-section-bracket-state)
163 DEFER: cdata-section-end-state
164 DEFER: (cdata-section-end-state)
165 DEFER: character-reference-state
166 DEFER: (character-reference-state)
167 DEFER: named-character-reference-state
168 DEFER: (named-character-reference-state)
169 DEFER: ambiguous-ampersand-state
170 DEFER: (ambiguous-ampersand-state)
171 DEFER: numeric-character-reference-state
172 DEFER: (numeric-character-reference-state)
173 DEFER: hexadecimal-character-reference-start-state
174 DEFER: (hexadecimal-character-reference-start-state)
175 DEFER: decimal-character-reference-start-state
176 DEFER: (decimal-character-reference-start-state)
177 DEFER: hexadecimal-character-reference-state
178 DEFER: (hexadecimal-character-reference-state)
179 DEFER: decimal-character-reference-state
180 DEFER: (decimal-character-reference-state)
181 DEFER: numeric-character-reference-end-state
182 DEFER: (numeric-character-reference-end-state)
185 ERROR: unimplemented string ;
186 ERROR: unimplemented* ;
188 ! Errors: https://html.spec.whatwg.org/multipage/parsing.html#parse-errors
189 ERROR: abrupt-closing-of-empty-comment ;
190 ERROR: abrupt-doctype-public-identifier ;
191 ERROR: abrupt-doctype-system-identifier ;
192 ERROR: absence-of-digits-in-numeric-character-reference ;
193 ERROR: cdata-in-html-content ;
194 ERROR: character-reference-outside-unicode-range ;
195 ERROR: control-character-in-input-stream ;
196 ERROR: control-character-reference ;
197 ERROR: end-tag-with-attributes ;
198 ERROR: duplicate-attribute ;
199 ERROR: end-tag-with-trailing-solidus ;
200 ERROR: eof-before-tag-name ;
201 ERROR: eof-in-cdata ;
202 ERROR: eof-in-comment ;
203 ERROR: eof-in-doctype ;
204 ERROR: eof-in-script-html-comment-like-text ;
206 ERROR: incorrectly-closed-comment ;
207 ERROR: incorrectly-opened-comment ;
208 ERROR: invalid-character-sequence-after-doctype-name ;
209 ERROR: invalid-first-character-of-tag-name ;
210 ERROR: missing-attribute-value ;
211 ERROR: missing-doctype-name ;
212 ERROR: missing-doctype-public-identifier ;
213 ERROR: missing-doctype-system-identifier ;
214 ERROR: missing-end-tag-name ;
215 ERROR: missing-quote-before-doctype-public-identifier ;
217 ERROR: missing-quote-before-doctype-system-identifier ;
218 ERROR: missing-semicolon-after-character-reference ;
219 ERROR: missing-whitespace-after-doctype-public-keyword ;
220 ERROR: missing-whitespace-after-doctype-system-keyword ;
221 ERROR: missing-whitespace-before-doctype-name ;
222 ERROR: missing-whitespace-between-attributes ;
223 ERROR: missing-whitespace-between-doctype-public-and-system-identifiers ;
224 ERROR: nested-comment ;
225 ERROR: noncharacter-character-reference ;
226 ERROR: noncharacter-in-input-stream ;
227 ERROR: non-void-html-element-start-tag-with-trailing-solidus ;
228 ERROR: null-character-reference ;
229 ERROR: surrogate-character-reference ;
230 ERROR: surrogate-in-input-stream ;
231 ERROR: unexpected-character-after-doctype-system-identifier ;
232 ERROR: unexpected-character-in-attribute-name ;
233 ERROR: unexpected-character-in-unquoted-attribute-value ;
234 ERROR: unexpected-equals-sign-before-attribute-name ;
235 ERROR: unexpected-null-character ;
236 ERROR: unexpected-question-mark-instead-of-tag-name ;
237 ERROR: unexpected-solidus-in-tag ;
238 ERROR: unknown-named-character-reference ;
240 ! Tree insertion modes
241 SINGLETONS: initial-mode before-html-mode before-head-mode
242 in-head-mode in-head-noscript-mode after-head-mode
243 in-body-mode text-mode in-table-mode in-table-text-mode
244 in-caption-mode in-column-group-mode in-table-body-mode
245 in-row-mode in-cell-mode in-select-mode in-select-in-table-mode in-template-mode
246 after-body-mode in-frameset-mode after-frameset-mode after-after-body-mode
247 after-after-frameset-mode ;
253 scripting? ! set in constructor
254 frameset-ok? ! frameset-ok? but we want default to f
258 head-element-pointer ! set during insertion time
259 parser-cannot-change-mode-flag
261 original-insertion-mode
278 ! "reset the insertion mode appropriately"
279 ! : reset-insertion-mode ( document -- document )
281 ! dup open-elements>> ?last >>node
282 ! dup [ open-elements>> ?first ] [ node>> ] bi = [
283 ! t >>last dup node>> >>context
286 ! { [ dup name>> >lower "select" = ] [ drop in-select >>insertion-mode ] }
288 ! dup name>> >lower { "td" "th" } member?
289 ! pick last>> f = and
290 ! ] [ drop in-select >>insertion-mode ] }
291 ! { [ dup name>> >lower "select" = ] [ drop in-select >>insertion-mode ] }
292 ! { [ dup name>> >lower "select" = ] [ drop in-select >>insertion-mode ] }
293 ! { [ dup name>> >lower "select" = ] [ drop in-select >>insertion-mode ] }
294 ! { [ dup name>> >lower "select" = ] [ drop in-select >>insertion-mode ] }
295 ! { [ dup name>> >lower "select" = ] [ drop in-select >>insertion-mode ] }
296 ! { [ dup name>> >lower "select" = ] [ drop in-select >>insertion-mode ] }
297 ! { [ dup name>> >lower "select" = ] [ drop in-select >>insertion-mode ] }
298 ! { [ dup name>> >lower "select" = ] [ drop in-select >>insertion-mode ] }
299 ! { [ dup name>> >lower "select" = ] [ drop in-select >>insertion-mode ] }
303 : temporary-buffer-attribute? ( document -- ? )
306 attribute-value-unquoted-state
307 attribute-value-single-quoted-state
308 attribute-value-double-quoted-state
311 ! name, public/system identifier should not be empty strings
312 ! until the state machine demands it
319 : <doctype> ( -- doctype )
322 : new-doctype-from-ch ( ch document -- )
326 ] dip doctype<< ; inline
328 : new-doctype-with-quirks ( document -- )
329 <doctype> t >>quirks? >>doctype drop ;
331 TUPLE: tag self-closing? name attributes children end-tag ;
336 V{ } clone >>attributes
337 V{ } clone >>children ;
339 TUPLE: end-tag self-closing? name attributes ;
341 : <end-tag> ( -- tag )
344 V{ } clone >>attributes ;
346 : new-tag ( document -- )
349 : new-end-tag ( document -- )
350 <end-tag> >>tag drop ;
352 : set-self-closing ( document -- )
353 tag>> t >>self-closing? drop ;
355 : <document> ( -- document )
358 initial-mode >>insertion-mode
361 ! SBUF" " clone >>tag-name
362 SBUF" " clone >>attribute-name
363 SBUF" " clone >>attribute-value
364 SBUF" " clone >>temporary-buffer
365 SBUF" " clone >>comment-token
366 V{ } clone >>open-elements
369 TUPLE: comment open payload close ;
371 : <comment> ( payload -- comment )
373 swap >>payload ; inline
375 : force-quirks ( document -- )
376 doctype>> t >>quirks? drop ;
378 : initialize-doctype-name ( document -- )
379 [ SBUF" " clone ] dip doctype>> name<< ;
381 : initialize-doctype-public-identifier ( document -- )
382 [ SBUF" " clone ] dip doctype>> public-identifier<< ;
384 : initialize-doctype-system-identifier ( document -- )
385 [ SBUF" " clone ] dip doctype>> system-identifier<< ;
387 : push-doctype-name ( ch document -- )
388 doctype>> name>> push ;
390 : push-doctype-public-identifier ( ch document -- )
391 doctype>> public-identifier>> push ;
393 : push-doctype-system-identifier ( ch document -- )
394 doctype>> system-identifier>> push ;
396 ! XXX: not html5 spec, fix
397 ERROR: unmatched-closing-tag-error stack tag ;
399 : unclosed-tag? ( obj -- ? )
400 { [ tag? ] [ end-tag>> not ] } 1&& ; inline
402 :: find-matching-tag ( name stack -- seq )
403 stack [ { [ unclosed-tag? ] [ name>> name = ] } 1&& ] find-last drop [
406 stack name unmatched-closing-tag-error
410 GENERIC: tree-insert* ( document obj insertion-mode -- document )
412 : limited-quirks-mode? ( doctype -- ? )
414 [ public-identifier>> "-//W3C//DTD XHTML 1.0 Frameset//" head? ]
415 [ public-identifier>> "-//W3C//DTD XHTML 1.0 Transitional//" head? ]
416 [ { [ system-identifier>> ] [ public-identifier>> "-//W3C//DTD HTML 4.01 Frameset//" head? ] } 1&& ]
417 [ { [ system-identifier>> ] [ public-identifier>> "-//W3C//DTD HTML 4.01 Transitional//" head? ] } 1&& ]
420 ! https://html.spec.whatwg.org/multipage/parsing.html#the-initial-insertion-mode
421 M: initial-mode tree-insert*
423 { [ dup "\t\n\f\r\s" member? ] [ drop ] }
425 >>tree-doctype before-html-mode >>insertion-mode
427 { [ dup comment? ] [ over tree>> push ] }
428 { [ dup tag? ] [ over tree>> push ] }
430 dup name>> pick tree>> find-matching-tag
437 over iframe-srcdoc?>> [
438 over parser-cannot-change-mode-flag>> [
439 [ t >>quirks-mode? ] dip
442 "must be iframe-srcdoc here" throw
444 ! reprocess the token
445 before-html-mode >>insertion-mode tree-insert
449 ! https://html.spec.whatwg.org/multipage/parsing.html#the-before-html-insertion-mode
450 M: before-html-mode tree-insert*
452 { [ dup doctype? ] [ drop ] }
453 { [ dup comment? ] [ over tree>> push ] }
454 { [ dup "\t\n\f\r\s" member? ] [ drop ] }
455 { [ dup { [ tag? ] [ name>> "html" = ] } 1&& ] [
457 before-head-mode >>insertion-mode
459 ! these tags are handled in the default case
460 { [ dup { [ end-tag? ] [ name>> { "head" "body" "html" "br" } member? not ] } 1&& ] [
461 ! error end-tag, ignore
465 ! Create missing html tag and reprocess the token
466 <tag> "html" >>name pick tree>> push
467 before-head-mode >>insertion-mode tree-insert
471 M: before-head-mode tree-insert*
473 { [ dup "\t\n\f\r\s" member? ] [ drop ] }
474 { [ dup comment? ] [ over tree>> push ] }
475 { [ dup doctype? ] [ drop ] }
476 { [ dup { [ tag? ] [ name>> "html" = ] } 1&& ] [
477 ! XXX: in-body-mode rules here for html tag
480 ! before-head-mode >>insertion-mode
481 "handle html in-body-mode here" throw
483 { [ dup { [ tag? ] [ name>> "head" = ] } 1&& ] [
485 [ >>head-element-pointer drop ]
486 [ drop in-head-mode >>insertion-mode ] 2tri
488 ! these tags are handled in the default case
489 { [ dup { [ end-tag? ] [ name>> { "head" "body" "html" "br" } member? not ] } 1&& ] [
490 ! error end-tag, ignore
494 { [ dup tag? ] [ drop ] }
496 ! Create missing html tag and reprocess the token
498 [ "head" >>name pick tree>> push ]
499 [ >>head-element-pointer ] bi
500 in-head-mode >>insertion-mode tree-insert
504 M: in-head-mode tree-insert*
506 { [ dup "\t\n\f\r\s" member? ] [ over tree>> push ] }
507 { [ dup comment? ] [ over tree>> push ] }
508 { [ dup doctype? ] [ drop ] }
509 { [ dup { [ tag? ] [ name>> "html" = ] } 1&& ] [
510 ! XXX: in-body-mode rules here for html tag
513 ! before-head-mode >>insertion-mode
514 "handle html in-body-mode here" throw
517 { [ dup { [ tag? ] [ name>> { "base" "basefont" "bgsound" "link" } member? ] } 1&& ] [
518 ! non-void-html-element-start-tag-with-trailing-solidus soft error if not self-closing
521 { [ dup { [ tag? ] [ name>> "meta" = ] } 1&& ] [
524 { [ dup { [ tag? ] [ name>> "title" = ] } 1&& ] [
525 ! https://html.spec.whatwg.org/multipage/parsing.html#generic-rcdata-element-parsing-algorithm
526 "insert title node" throw
531 [ { [ tag? ] [ name>> "noscript" = ] [ scripting?>> ] } 1&& ]
532 [ { [ tag? ] [ name>> { "noframes" "style" } member? ] } 1&& ]
535 ! https://html.spec.whatwg.org/multipage/parsing.html#generic-raw-text-element-parsing-algorithm
538 { [ dup { [ tag? ] [ name>> "noscript" = ] [ scripting?>> not ] } 1&& ] [
541 in-head-noscript-mode >>insertion-mode
543 { [ dup { [ tag? ] [ name>> "script" = ] } 1&& ] [
545 text-mode >>insertion-mode
547 { [ dup { [ end-tag? ] [ name>> "head" = ] } 1&& ] [
548 over tree>> last end-tag<<
549 after-head-mode >>insertion-mode
551 { [ dup { [ end-tag? ] [ name>> { "body" "html" "br" } member? ] } 1&& ] [
552 ! non-void-html-element-start-tag-with-trailing-solidus soft error if not self-closing
555 { [ dup { [ tag? ] [ name>> "template" = ] } 1&& ] [
557 in-template-mode >>insertion-mode
559 { [ dup { [ end-tag? ] [ name>> "template" = ] } 1&& ] [
564 [ { [ tag? ] [ name>> "head" = ] } 1&& ]
566 } 1|| ] [ drop "ignore here" throw ] }
568 ! end head tag should be here, pop off, reprocess
569 over tree>> pop swap >>end-tag
570 after-head-mode >>insertion-mode "omg" throw
574 M: in-head-noscript-mode tree-insert* drop unimplemented* ;
576 M: after-head-mode tree-insert*
578 { [ dup "\t\n\f\r\s" member? ] [ over tree>> push ] }
579 { [ dup comment? ] [ over tree>> push ] }
580 { [ dup doctype? ] [ drop ] }
581 { [ dup { [ tag? ] [ name>> "html" = ] } 1&& ] [
582 ! XXX: in-body-mode rules here for html tag
585 ! before-head-mode >>insertion-mode
586 "handle html in-body-mode here" throw
589 { [ dup { [ tag? ] [ name>> "html" = ] } 1&& ] [
592 { [ dup { [ tag? ] [ name>> "body" = ] } 1&& ] [
595 in-body-mode >>insertion-mode
597 { [ dup { [ tag? ] [ name>> "frameset" = ] } 1&& ] [
602 "base" "basefont" "bgsound" "link" "meta"
603 "noframes" "script" "style" "template" "title"
608 { [ dup { [ end-tag? ] [ name>> "template" = ] } 1&& ] [
611 ! same as default case
612 ! { [ dup { [ end-tag? ] [ name>> { "body" "html" "br" } member? not ] } 1&& ] [
617 [ { [ tag? ] [ name>> "head" = ] } 1&& ]
618 [ { [ end-tag? ] [ name>> { "body" "html" "br" } member? not ] } 1&& ]
621 "omg revisit this" throw
626 <tag> "body" >>name pick tree>> push
627 in-body-mode >>insertion-mode tree-insert
631 M: in-body-mode tree-insert*
633 { [ dup CHAR: \0 = ] [ drop ] }
634 { [ dup "\t\n\f\r\s" member? ] [ over tree>> push ] }
635 { [ dup comment? ] [ over tree>> push ] }
636 { [ dup doctype? ] [ drop ] }
637 { [ dup { [ tag? ] [ name>> "html" = ] } 1&& ] [ drop ] }
644 "base" "basefont" "bgsound" "link" "meta"
645 "noframes" "script" "style" "template" "title"
650 { [ end-tag? ] [ name>> "template" = ] } 1&&
656 { [ dup { [ tag? ] [ name>> "body" = ] } 1&& ] [ drop unimplemented* ] }
657 { [ dup { [ tag? ] [ name>> "frameset" = ] } 1&& ] [ drop unimplemented* ] }
660 { [ dup { [ end-tag? ] [ name>> "body" = ] } 1&& ] [
661 "body" pick tree>> find-matching-tag
667 after-body-mode >>insertion-mode
669 { [ dup { [ end-tag? ] [ name>> "html" = ] } 1&& ] [ drop unimplemented* ] }
676 M: text-mode tree-insert* drop unimplemented* ;
677 M: in-table-mode tree-insert* drop unimplemented* ;
678 M: in-table-text-mode tree-insert* drop unimplemented* ;
679 M: in-caption-mode tree-insert* drop unimplemented* ;
680 M: in-column-group-mode tree-insert* drop unimplemented* ;
681 M: in-table-body-mode tree-insert* drop unimplemented* ;
682 M: in-row-mode tree-insert* drop unimplemented* ;
683 M: in-cell-mode tree-insert* drop unimplemented* ;
684 M: in-select-mode tree-insert* drop unimplemented* ;
685 M: in-select-in-table-mode tree-insert* drop unimplemented* ;
686 M: in-template-mode tree-insert* drop unimplemented* ;
687 M: after-body-mode tree-insert*
689 { [ dup "\t\n\f\r\s" member? ] [ over tree>> push ] }
690 { [ dup comment? ] [ over tree>> push ] }
691 { [ dup doctype? ] [ drop ] }
692 { [ dup { [ tag? ] [ name>> "html" = ] } 1&& ] [
695 { [ dup { [ end-tag? ] [ name>> "html" = ] } 1&& ] [
696 ! XXX: make this a function
697 "html" pick tree>> find-matching-tag
703 after-after-body-mode >>insertion-mode
709 M: in-frameset-mode tree-insert* drop unimplemented* ;
710 M: after-frameset-mode tree-insert* drop unimplemented* ;
712 M: after-after-body-mode tree-insert*
714 { [ dup comment? ] [ over tree>> push ] }
715 { [ dup doctype? ] [ unimplemented* ] }
716 { [ dup "\t\n\f\r\s" member? ] [ unimplemented* ] }
717 { [ dup { [ tag? ] [ name>> "html" = ] } 1&& ] [ unimplemented* ] }
719 { [ dup f = ] [ drop ] }
722 [ in-body-mode >>insertion-mode ] dip tree-insert
726 M: after-after-frameset-mode tree-insert* drop unimplemented* ;
728 : tree-insert ( document obj -- document )
729 over insertion-mode>> tree-insert* ;
731 MEMO: load-entities ( -- assoc )
732 "vocab:html5/entities.json" utf8 file-contents json> ;
734 MEMO: entities-suffix-array ( -- assoc )
735 load-entities keys >suffix-array ;
737 : lookup-entity ( string -- entity/string ? )
740 : named-character-match? ( document -- prefix? exact? )
742 [ entities-suffix-array query f like ]
743 [ last CHAR: ; = ] bi ;
745 ERROR: unknown-named-entity entity ;
746 : take-named-character ( document -- )
748 temporary-buffer>> >string lookup-entity [
750 SBUF" " clone-like >>temporary-buffer drop
755 ! XXX: remove the tag>> name>> push part
756 : push-tag-name ( ch document -- )
757 [ tag>> name>> push ]
759 2drop ! tag-name>> push
761 : push-attribute-name ( ch document -- ) attribute-name>> push ;
762 : push-attribute-value ( ch document -- ) attribute-value>> push ;
763 : push-comment-token ( ch document -- ) comment-token>> push ;
764 : push-all-comment-token ( string document -- ) comment-token>> push-all ;
766 ERROR: invalid-return-state obj ;
767 : check-return-state ( obj -- return-state )
768 dup word? [ invalid-return-state ] unless ;
770 : current-attribute ( document -- attribute/f )
771 [ attribute-name>> >string f like ]
772 [ attribute-value>> >string f like ] bi
773 2dup or [ 2array ] [ 2drop f ] if ;
775 : push-when ( obj/f seq -- )
776 over [ push ] [ 2drop ] if ; inline
778 : reset-attribute ( document -- )
779 SBUF" " clone >>attribute-name
780 SBUF" " clone >>attribute-value drop ;
782 : push-attribute ( document -- )
783 [ current-attribute ]
784 [ tag>> attributes>> push-when ]
785 [ reset-attribute ] tri ;
787 : emit-eof ( document -- )
790 : emit-char ( char document -- ) drop "emit-char: " write 1string . ;
791 : emit-string ( char document -- ) drop "emit-string: " write . ;
792 : emit-tag ( document -- )
795 [ tag>> [ name>> >string ] [ name<< ] bi ]
798 [ dup tag>> tree-insert drop ]
801 : emit-end-tag ( document -- )
802 "emit-end-tag: " write
804 [ f >>tag drop ] bi ;
805 : emit-comment-token ( document -- )
806 "emit-comment-token: " write
808 [ comment-token>> >string . ]
809 [ dup comment-token>> >string <comment> tree-insert drop ]
810 [ SBUF" " clone >>comment-token drop ]
812 : emit-doctype ( document -- )
813 "emit-doctype: " write dup doctype>> .
815 [ doctype>> [ >string ] change-name drop ]
817 ! XXX: handle iframe srcdoc document
818 dup { [ doctype>> name>> "html" = not ] [ parser-cannot-change-mode-flag>> not ] } 1&& [
821 dup { [ iframe-srcdoc?>> not ] [ parser-cannot-change-mode-flag>> not ] } 1&& [
822 dup doctype>> limited-quirks-mode? [ t >>limited-quirks-mode? ] when
827 [ dup doctype>> tree-insert drop ]
831 : reset-temporary-buffer ( document -- ) SBUF" " clone temporary-buffer<< ;
832 : ch>new-temporary-buffer ( ch document -- ) [ 1sbuf ] dip temporary-buffer<< ;
833 : string>new-temporary-buffer ( string document -- ) [ SBUF" " clone-like ] dip temporary-buffer<< ;
834 : temporary-buffer-last ( document -- ch/f ) temporary-buffer>> ?last ;
835 : push-temporary-buffer ( ch document -- ) temporary-buffer>> push ;
836 : push-all-temporary-buffer ( string document -- ) temporary-buffer>> push-all ;
838 : flush-temporary-buffer ( document -- )
839 "flush-temporary-buffer: " write
840 [ [ temporary-buffer>> ] keep [ emit-char ] curry each ]
841 [ SBUF" " clone >>temporary-buffer drop ] bi ;
843 : emit-temporary-buffer-with ( string document -- )
844 [ temporary-buffer>> push-all ]
845 [ flush-temporary-buffer ] bi ;
847 ! check if matches open tag
848 : appropriate-end-tag-token? ( document -- ? )
851 : ascii-upper-alpha? ( ch -- ? ) [ CHAR: A CHAR: Z between? ] [ f ] if* ; inline
852 : ascii-lower-alpha? ( ch -- ? ) [ CHAR: a CHAR: z between? ] [ f ] if* ; inline
853 : ascii-upper-hex-digit? ( ch -- ? ) [ CHAR: A CHAR: F between? ] [ f ] if* ; inline
854 : ascii-lower-hex-digit? ( ch -- ? ) [ CHAR: a CHAR: f between? ] [ f ] if* ; inline
855 : ascii-hex-alpha? ( ch -- ? ) { [ ascii-upper-hex-digit? ] [ ascii-lower-hex-digit? ] } 1|| ; inline
857 : ascii-digit? ( ch/f -- ? ) [ CHAR: 0 CHAR: 9 between? ] [ f ] if* ;
858 : ascii-alpha? ( ch/f -- ? ) { [ ascii-lower-alpha? ] [ ascii-upper-alpha? ] } 1|| ;
859 : ascii-alphanumeric? ( ch/f -- ? ) { [ ascii-alpha? ] [ ascii-digit? ] } 1|| ;
860 : ascii-hex-digit? ( ch/f -- ? ) { [ ascii-digit? ] [ ascii-hex-alpha? ] } 1|| ;
862 : (return-state) ( document n/f string ch/f -- document n'/f string )
863 reach [ f ] change-return-state drop check-return-state
864 execute( document n/f string ch/f -- document n'/f string ) ;
866 : return-state ( document n/f string -- document n'/f string )
867 pick [ f ] change-return-state drop check-return-state
868 execute( document n/f string -- document n'/f string ) ;
870 : (data-state) ( document n/f string ch/f -- document n'/f string )
872 { [ dup CHAR: & = ] [ drop [ \ data-state >>return-state ] 2dip character-reference-state ] }
873 { [ dup CHAR: < = ] [ drop tag-open-state ] }
874 { [ dup CHAR: \0 = ] [ unexpected-null-character ] }
875 { [ dup f = ] [ drop pick emit-eof ] }
876 [ reach emit-char data-state ]
879 : data-state ( document n/f string -- document n'/f string )
880 take-char (data-state) ;
883 : (rcdata-state) ( document n/f string ch/f -- document n'/f string )
885 { [ dup CHAR: & = ] [ drop [ \ rcdata-state >>return-state ] 2dip character-reference-state ] }
886 { [ dup CHAR: < = ] [ drop rcdata-less-than-sign-state ] }
887 { [ dup CHAR: \0 = ] [ unexpected-null-character ] }
888 { [ dup f = ] [ drop pick emit-eof ] }
889 [ reach emit-char rcdata-state ]
892 : rcdata-state ( document n/f string -- document n'/f string )
893 take-char (rcdata-state) ;
896 : (rawtext-state) ( document n/f string ch/f -- document n'/f string )
898 { [ dup CHAR: < = ] [ drop rawtext-less-than-sign-state ] }
899 { [ dup CHAR: \0 = ] [ drop unexpected-null-character ] }
900 { [ dup f = ] [ drop pick emit-eof ] }
901 [ reach emit-char rawtext-state ]
904 : rawtext-state ( document n/f string -- document n'/f string )
905 take-char (rawtext-state) ;
908 : (script-data-state) ( document n/f string ch/f -- document n'/f string )
910 { [ dup CHAR: < = ] [ drop script-data-less-than-sign-state ] }
911 { [ dup CHAR: \0 = ] [ drop unexpected-null-character ] }
912 { [ dup f = ] [ drop pick emit-eof ] }
913 [ reach emit-char script-data-state ]
916 : script-data-state ( document n/f string -- document n'/f string )
917 take-char (script-data-state) ;
920 : (plaintext-state) ( document n/f string ch/f -- document n'/f string )
922 { [ dup CHAR: \0 = ] [ drop unexpected-null-character ] }
923 { [ dup f = ] [ drop pick emit-eof ] }
924 [ reach emit-char plaintext-state ]
927 : plaintext-state ( document n/f string -- document n'/f string )
928 take-char (plaintext-state) ;
931 : (tag-open-state) ( document n/f string ch/f -- document n'/f string )
933 { [ dup ascii-alpha? ] [ reach new-tag (tag-name-state) ] }
934 { [ dup CHAR: ! = ] [ drop markup-declaration-open-state ] }
935 { [ dup CHAR: / = ] [ drop end-tag-open-state ] }
936 { [ dup CHAR: ? = ] [ unexpected-question-mark-instead-of-tag-name ] }
937 { [ dup f = ] [ eof-before-tag-name ] }
938 [ invalid-first-character-of-tag-name ]
941 : tag-open-state ( document n/f string -- document n'/f string )
942 take-char (tag-open-state) ;
945 : (end-tag-open-state) ( document n/f string ch/f -- document n'/f string )
947 { [ dup ascii-alpha? ] [ reach new-end-tag (tag-name-state) ] }
948 { [ dup CHAR: > = ] [ missing-end-tag-name ] }
949 { [ dup f = ] [ eof-before-tag-name ] }
950 [ invalid-first-character-of-tag-name ]
953 : end-tag-open-state ( document n/f string -- document n'/f string )
954 take-char (end-tag-open-state) ;
957 : (tag-name-state) ( document n/f string ch/f -- document n'/f string )
959 { [ dup ascii-upper-alpha? ] [ 0x20 + reach push-tag-name tag-name-state ] }
960 { [ dup "\t\n\f\s" member? ] [ drop before-attribute-name-state ] }
961 { [ dup CHAR: / = ] [ drop self-closing-start-tag-state ] }
962 { [ dup CHAR: > = ] [ drop pick emit-tag data-state ] }
963 { [ dup CHAR: \0 = ] [ unexpected-null-character ] }
964 { [ dup f = ] [ eof-before-tag-name ] }
965 [ reach push-tag-name tag-name-state ]
968 : tag-name-state ( document n/f string -- document n'/f string )
969 take-char (tag-name-state) ;
972 : (rcdata-less-than-sign-state) ( document n/f string ch/f -- document n'/f string )
974 { [ dup CHAR: / = ] [ drop pick reset-temporary-buffer rcdata-end-tag-open-state ] }
975 [ [ CHAR: < reach emit-char ] dip (rcdata-state) ]
978 : rcdata-less-than-sign-state ( document n/f string -- document n'/f string )
979 take-char (rcdata-less-than-sign-state) ;
982 : (rcdata-end-tag-open-state) ( document n/f string ch/f -- document n'/f string )
984 { [ dup ascii-alpha? ] [ reach new-end-tag (rcdata-end-tag-name-state) ] }
985 [ [ CHAR: < reach emit-char ] dip (rcdata-state) ]
988 : rcdata-end-tag-open-state ( document n/f string -- document n'/f string )
989 take-char (rcdata-end-tag-open-state) ;
992 : (rcdata-end-tag-name-state) ( document n/f string ch/f -- document n'/f string )
994 { [ dup "\t\n\f\s" member? ] [
995 drop pick appropriate-end-tag-token?
996 [ before-attribute-name-state ] [ "</" reach emit-temporary-buffer-with rcdata-state ] if
998 { [ dup CHAR: / = ] [
999 drop pick appropriate-end-tag-token?
1000 [ self-closing-start-tag-state ] [ "</" reach emit-temporary-buffer-with rcdata-state ] if
1002 { [ dup CHAR: > = ] [
1003 drop pick appropriate-end-tag-token?
1004 [ pick emit-end-tag data-state ] [ "</" reach emit-temporary-buffer-with rcdata-state ] if
1006 { [ dup ascii-upper-alpha? ] [ [ 0x20 + reach push-tag-name ] [ reach push-temporary-buffer ] bi rcdata-end-tag-name-state ] }
1007 { [ dup ascii-lower-alpha? ] [ [ reach push-tag-name ] [ reach push-temporary-buffer ] bi rcdata-end-tag-name-state ] }
1008 [ [ "</" reach emit-temporary-buffer-with ] dip (rcdata-state) ]
1011 : rcdata-end-tag-name-state ( document n/f string -- document n'/f string )
1012 take-char (rcdata-end-tag-name-state) ;
1015 : (rawtext-less-than-sign-state) ( document n/f string ch/f -- document n'/f string )
1017 { [ dup CHAR: / = ] [ drop pick reset-temporary-buffer rawtext-end-tag-open-state ] }
1018 [ [ CHAR: < reach emit-char ] dip (rawtext-state) ]
1021 : rawtext-less-than-sign-state ( document n/f string -- document n'/f string )
1022 take-char (rawtext-less-than-sign-state) ;
1025 : (rawtext-end-tag-open-state) ( document n/f string ch/f -- document n'/f string )
1027 { [ dup ascii-alpha? ] [ reach new-end-tag (rawtext-end-tag-name-state) ] }
1028 [ [ CHAR: < reach emit-char ] dip (rawtext-state) ]
1031 : rawtext-end-tag-open-state ( document n/f string -- document n'/f string )
1032 take-char (rawtext-end-tag-open-state) ;
1035 : (rawtext-end-tag-name-state) ( document n/f string ch/f -- document n'/f string )
1037 { [ dup "\t\n\f\s" member? ] [
1038 drop pick appropriate-end-tag-token?
1039 [ before-attribute-name-state ] [ "</" reach emit-temporary-buffer-with rawtext-state ] if
1041 { [ dup CHAR: / = ] [
1042 drop pick appropriate-end-tag-token?
1043 [ self-closing-start-tag-state ] [ "</" reach emit-temporary-buffer-with rawtext-state ] if
1045 { [ dup CHAR: > = ] [
1046 drop pick appropriate-end-tag-token?
1047 [ pick emit-end-tag data-state ] [ "</" reach emit-temporary-buffer-with rawtext-state ] if
1049 { [ dup ascii-upper-alpha? ] [ [ 0x20 + reach push-tag-name ] [ reach push-temporary-buffer ] bi rawtext-end-tag-name-state ] }
1050 { [ dup ascii-lower-alpha? ] [ [ reach push-tag-name ] [ reach push-temporary-buffer ] bi rawtext-end-tag-name-state ] }
1051 [ [ "</" reach emit-temporary-buffer-with ] dip (rawtext-state) ]
1054 : rawtext-end-tag-name-state ( document n/f string -- document n'/f string )
1055 take-char (rawtext-end-tag-name-state) ;
1058 : (script-data-less-than-sign-state) ( document n/f string ch/f -- document n'/f string )
1060 { [ dup CHAR: / = ] [ drop pick reset-temporary-buffer script-data-end-tag-open-state ] }
1061 { [ dup CHAR: ! = ] [ drop "<!" reach emit-string script-data-escape-start-state ] }
1062 [ [ CHAR: < reach emit-char ] dip (script-data-state) ]
1065 : script-data-less-than-sign-state ( document n/f string -- document n'/f string )
1066 take-char (script-data-less-than-sign-state) ;
1069 : (script-data-end-tag-open-state) ( document n/f string ch/f -- document n'/f string )
1071 { [ dup ascii-alpha? ] [ reach new-end-tag (script-data-end-tag-name-state) ] }
1072 [ [ "</" reach emit-string ] dip (script-data-state) ]
1075 : script-data-end-tag-open-state ( document n/f string -- document n'/f string )
1076 take-char (script-data-end-tag-open-state) ;
1079 : (script-data-end-tag-name-state) ( document n/f string ch/f -- document n'/f string )
1081 { [ dup "\t\n\f\s" member? ] [
1082 drop pick appropriate-end-tag-token?
1083 [ before-attribute-name-state ] [ "</" reach emit-temporary-buffer-with script-data-state ] if
1085 { [ dup CHAR: / = ] [
1086 drop pick appropriate-end-tag-token?
1087 [ self-closing-start-tag-state ] [ "</" reach emit-temporary-buffer-with script-data-state ] if
1089 { [ dup CHAR: > = ] [
1090 drop pick appropriate-end-tag-token?
1091 [ pick emit-end-tag data-state ] [ "</" reach emit-temporary-buffer-with script-data-state ] if
1093 { [ dup ascii-upper-alpha? ] [ [ 0x20 + reach push-tag-name ] [ reach push-temporary-buffer ] bi rawtext-end-tag-name-state ] }
1094 { [ dup ascii-lower-alpha? ] [ [ reach push-tag-name ] [ reach push-temporary-buffer ] bi rawtext-end-tag-name-state ] }
1095 [ [ "</" reach emit-temporary-buffer-with ] dip (script-data-state) ]
1098 : script-data-end-tag-name-state ( document n/f string -- document n'/f string )
1099 take-char (script-data-end-tag-name-state) ;
1102 : (script-data-escape-start-state) ( document n/f string ch/f -- document n'/f string )
1104 { [ dup CHAR: - = ] [ drop script-data-escape-start-dash-state ] }
1105 [ (script-data-state) ]
1108 : script-data-escape-start-state ( document n/f string -- document n'/f string )
1109 take-char (script-data-escape-start-state) ;
1112 : (script-data-escape-start-dash-state) ( document n/f string ch/f -- document n'/f string )
1114 { [ dup CHAR: - = ] [ drop script-data-escaped-dash-dash-state ] }
1115 [ (script-data-state) ]
1118 : script-data-escape-start-dash-state ( document n/f string -- document n'/f string )
1119 take-char (script-data-escape-start-dash-state) ;
1122 : (script-data-escaped-state) ( document n/f string ch/f -- document n'/f string )
1124 { [ dup CHAR: - = ] [ drop script-data-escaped-dash-state ] }
1125 { [ dup CHAR: < = ] [ drop script-data-escaped-less-than-sign-state ] }
1126 { [ dup CHAR: \0 = ] [ unexpected-null-character CHAR: replacement-character unimplemented* ] }
1127 { [ dup f = ] [ eof-in-script-html-comment-like-text ] }
1128 [ reach emit-char script-data-escaped-state ]
1131 : script-data-escaped-state ( document n/f string -- document n'/f string )
1132 take-char (script-data-escaped-state) ;
1135 : (script-data-escaped-dash-state) ( document n/f string ch/f -- document n'/f string )
1137 { [ dup CHAR: - = ] [ drop script-data-escaped-dash-dash-state ] }
1138 { [ dup CHAR: < = ] [ drop script-data-escaped-less-than-sign-state ] }
1139 { [ dup CHAR: \0 = ] [ unexpected-null-character script-data-escaped-state ] }
1140 { [ dup f = ] [ eof-in-script-html-comment-like-text ] }
1141 [ reach emit-char script-data-escaped-state ]
1144 : script-data-escaped-dash-state ( document n/f string -- document n'/f string )
1145 take-char (script-data-escaped-dash-state) ;
1148 : (script-data-escaped-dash-dash-state) ( document n/f string ch/f -- document n'/f string )
1150 { [ dup CHAR: - = ] [ reach emit-char script-data-escaped-dash-dash-state ] }
1151 { [ dup CHAR: < = ] [ drop script-data-escaped-less-than-sign-state ] }
1152 { [ dup CHAR: > = ] [ reach emit-char script-data-state ] }
1153 { [ dup CHAR: \0 = ] [ unexpected-null-character script-data-escaped-state ] }
1154 { [ dup f = ] [ eof-in-script-html-comment-like-text ] }
1155 [ reach emit-char script-data-escaped-state ]
1158 : script-data-escaped-dash-dash-state ( document n/f string -- document n'/f string )
1159 take-char (script-data-escaped-dash-dash-state) ;
1162 : (script-data-escaped-less-than-sign-state) ( document n/f string ch/f -- document n'/f string )
1164 { [ dup CHAR: / = ] [ drop pick reset-temporary-buffer script-data-escaped-end-tag-open-state ] }
1165 { [ dup ascii-alpha? ] [ [ pick reset-temporary-buffer CHAR: < reach emit-char ] dip (script-data-double-escape-start-state) ] }
1166 [ [ CHAR: < reach emit-char ] dip (script-data-escaped-state) ]
1169 : script-data-escaped-less-than-sign-state ( document n/f string -- document n'/f string )
1170 take-char (script-data-escaped-less-than-sign-state) ;
1173 : (script-data-escaped-end-tag-open-state) ( document n/f string ch/f -- document n'/f string )
1175 { [ dup ascii-alpha? ] [ [ pick new-end-tag ] dip (script-data-escaped-end-tag-name-state) ] }
1176 [ [ "</" reach emit-string ] dip (script-data-escaped-state) ]
1179 : script-data-escaped-end-tag-open-state ( document n/f string -- document n'/f string )
1180 take-char (script-data-escaped-end-tag-open-state) ;
1183 : (script-data-escaped-end-tag-name-state) ( document n/f string ch/f -- document n'/f string )
1185 { [ dup "\t\n\f\s" member? ] [
1186 drop pick appropriate-end-tag-token?
1187 [ before-attribute-name-state ] [ "</" reach emit-temporary-buffer-with script-data-escaped-state ] if
1189 { [ dup CHAR: / = ] [
1190 drop pick appropriate-end-tag-token?
1191 [ self-closing-start-tag-state ] [ "</" reach emit-temporary-buffer-with script-data-escaped-state ] if
1193 { [ dup CHAR: > = ] [
1194 drop pick appropriate-end-tag-token?
1195 [ pick emit-end-tag data-state ] [ "</" reach emit-temporary-buffer-with script-data-escaped-state ] if
1197 { [ dup ascii-upper-alpha? ] [ [ 0x20 + reach push-tag-name ] [ reach push-temporary-buffer ] bi script-data-escaped-end-tag-name-state ] }
1198 { [ dup ascii-lower-alpha? ] [ [ reach push-tag-name ] [ reach push-temporary-buffer ] bi script-data-escaped-end-tag-name-state ] }
1199 [ [ "</" reach emit-temporary-buffer-with ] dip (script-data-escaped-state) ]
1202 : script-data-escaped-end-tag-name-state ( document n/f string -- document n'/f string )
1203 take-char (script-data-escaped-end-tag-name-state) ;
1206 : (script-data-double-escape-start-state) ( document n/f string ch/f -- document n'/f string )
1208 { [ dup "\t\n\f\s/>" member? ] [
1210 pick temporary-buffer>> "script" sequence=
1211 [ script-data-double-escaped-state ] [ script-data-escaped-state ] if
1213 { [ dup ascii-upper-alpha? ] [ [ 0x20 + reach push-tag-name ] [ reach push-temporary-buffer ] bi script-data-double-escape-start-state ] }
1214 { [ dup ascii-lower-alpha? ] [ [ reach push-tag-name ] [ reach push-temporary-buffer ] bi script-data-double-escape-start-state ] } ! todo
1215 [ (script-data-escaped-state) ]
1218 : script-data-double-escape-start-state ( document n/f string -- document n'/f string )
1219 take-char (script-data-double-escape-start-state) ;
1222 : (script-data-double-escaped-state) ( document n/f string ch/f -- document n'/f string )
1224 { [ dup CHAR: - = ] [ reach emit-char script-data-double-escaped-dash-state ] }
1225 { [ dup CHAR: < = ] [ reach emit-char script-data-double-escaped-less-than-sign-state ] }
1226 { [ dup CHAR: \0 = ] [
1227 unexpected-null-character
1228 CHAR: replacement-character reach emit-char
1229 script-data-double-escaped-state
1231 { [ dup f = ] [ eof-in-script-html-comment-like-text ] }
1232 [ reach emit-char script-data-double-escaped-state ]
1235 : script-data-double-escaped-state ( document n/f string -- document n'/f string )
1236 take-char (script-data-double-escaped-state) ;
1239 : (script-data-double-escaped-dash-state) ( document n/f string ch/f -- document n'/f string )
1241 { [ dup CHAR: - = ] [ reach emit-char script-data-double-escaped-dash-dash-state ] }
1242 { [ dup CHAR: < = ] [ reach emit-char script-data-double-escaped-less-than-sign-state ] }
1243 { [ dup CHAR: \0 = ] [
1244 unexpected-null-character
1245 CHAR: replacement-character reach emit-char
1246 script-data-double-escaped-state
1248 { [ dup f = ] [ eof-in-script-html-comment-like-text ] }
1249 [ reach emit-char script-data-double-escaped-state ]
1252 : script-data-double-escaped-dash-state ( document n/f string -- document n'/f string )
1253 take-char (script-data-double-escaped-dash-state) ;
1256 : (script-data-double-escaped-dash-dash-state) ( document n/f string ch/f -- document n'/f string )
1258 { [ dup CHAR: - = ] [ reach emit-char script-data-double-escaped-dash-dash-state ] }
1259 { [ dup CHAR: < = ] [ reach emit-char script-data-double-escaped-less-than-sign-state ] }
1260 { [ dup CHAR: > = ] [ reach emit-char script-data-state ] }
1261 { [ dup CHAR: \0 = ] [
1262 unexpected-null-character
1263 CHAR: replacement-character reach emit-char
1264 script-data-double-escaped-state
1266 { [ dup f = ] [ eof-in-script-html-comment-like-text ] }
1267 [ reach emit-char script-data-escaped-state ]
1270 : script-data-double-escaped-dash-dash-state ( document n/f string -- document n'/f string )
1271 take-char (script-data-double-escaped-dash-dash-state) ;
1274 : (script-data-double-escaped-less-than-sign-state) ( document n/f string ch/f -- document n'/f string )
1276 { [ dup CHAR: / = ] [ reach emit-char pick reset-temporary-buffer script-data-double-escape-end-state ] }
1277 [ (script-data-double-escaped-state) ]
1280 : script-data-double-escaped-less-than-sign-state ( document n/f string -- document n'/f string )
1281 take-char (script-data-double-escaped-less-than-sign-state) ;
1284 : (script-data-double-escape-end-state) ( document n/f string ch/f -- document n'/f string )
1286 { [ dup "\t\n\f\s/>" member? ] [
1288 pick temporary-buffer>> "script" sequence=
1289 [ script-data-escaped-state ] [ script-data-double-escaped-state ] if
1291 { [ dup ascii-upper-alpha? ] [ [ 0x20 + reach push-tag-name ] [ reach push-temporary-buffer ] bi script-data-double-escape-end-state ] }
1292 { [ dup ascii-lower-alpha? ] [ [ reach push-tag-name ] [ reach push-temporary-buffer ] bi script-data-double-escape-end-state ] } ! todo
1293 [ (script-data-double-escaped-state) ]
1296 : script-data-double-escape-end-state ( document n/f string -- document n'/f string )
1297 take-char (script-data-double-escape-end-state) ;
1300 : (before-attribute-name-state) ( document n/f string ch/f -- document n'/f string )
1302 { [ dup "\t\n\f\s" member? ] [ drop before-attribute-name-state ] }
1303 { [ dup "/>" member? ] [ (after-attribute-name-state) ] }
1304 { [ dup f = ] [ (after-attribute-name-state) ] }
1305 { [ dup CHAR: = = ] [ unexpected-equals-sign-before-attribute-name ] }
1306 [ reach push-attribute (attribute-name-state) ]
1309 : before-attribute-name-state ( document n/f string -- document n'/f string )
1310 take-char (before-attribute-name-state) ;
1313 : (attribute-name-state) ( document n/f string ch/f -- document n'/f string )
1315 { [ dup "\t\n\f\s/>" member? ] [ (after-attribute-name-state) ] }
1316 { [ dup f = ] [ (after-attribute-name-state) ] }
1317 { [ dup CHAR: = = ] [ drop before-attribute-value-state ] }
1318 { [ dup ascii-upper-alpha? ] [
1319 0x20 + reach push-attribute-name
1320 attribute-name-state
1322 { [ dup CHAR: \0 = ] [ unexpected-null-character ] }
1323 { [ dup "\"'<" member? ] [
1324 unexpected-character-in-attribute-name
1325 reach push-attribute-name attribute-name-state
1327 [ reach push-attribute-name attribute-name-state ]
1330 : attribute-name-state ( document n/f string -- document n'/f string )
1331 take-char (attribute-name-state) ;
1334 : (after-attribute-name-state) ( document n/f string ch/f -- document n'/f string )
1336 { [ dup "\t\n\f\s" member? ] [ drop after-attribute-name-state ] }
1337 { [ dup CHAR: / = ] [ drop self-closing-start-tag-state ] }
1338 { [ dup CHAR: = = ] [ drop before-attribute-value-state ] }
1339 { [ dup CHAR: > = ] [ drop pick emit-tag data-state ] }
1340 { [ dup f = ] [ eof-in-tag ] }
1341 [ [ pick push-attribute ] dip (attribute-name-state) ]
1344 : after-attribute-name-state ( document n/f string -- document n'/f string )
1345 take-char (after-attribute-name-state) ;
1348 : (before-attribute-value-state) ( document n/f string ch/f -- document n'/f string )
1350 { [ dup "\t\n\f\s" member? ] [ drop before-attribute-name-state ] }
1351 { [ dup CHAR: " = ] [ drop attribute-value-double-quoted-state ] }
1352 { [ dup CHAR: ' = ] [ drop attribute-value-single-quoted-state ] }
1353 { [ dup CHAR: > = ] [ drop missing-attribute-value ] }
1354 [ (attribute-value-unquoted-state) ]
1357 : before-attribute-value-state ( document n/f string -- document n'/f string )
1358 take-char (before-attribute-value-state) ;
1361 : (attribute-value-double-quoted-state) ( document n/f string ch/f -- document n'/f string )
1363 { [ dup CHAR: " = ] [ drop after-attribute-value-quoted-state ] }
1364 { [ dup CHAR: & = ] [
1366 [ \ attribute-value-double-quoted-state >>return-state ] 2dip character-reference-state
1368 { [ dup CHAR: \0 = ] [ unexpected-null-character ] }
1369 { [ dup f = ] [ eof-in-tag ] }
1370 [ reach push-attribute-value attribute-value-double-quoted-state ]
1373 : attribute-value-double-quoted-state ( document n/f string -- document n'/f string )
1374 take-char (attribute-value-double-quoted-state) ;
1377 : (attribute-value-single-quoted-state) ( document n/f string ch/f -- document n'/f string )
1379 { [ dup CHAR: ' = ] [ drop after-attribute-value-quoted-state ] }
1380 { [ dup CHAR: & = ] [
1381 drop [ \ attribute-value-single-quoted-state >>return-state ] 2dip
1382 character-reference-state
1384 { [ dup CHAR: \0 = ] [
1385 drop unexpected-null-character
1386 CHAR: replacement-character reach push-attribute-value
1388 { [ dup f = ] [ eof-in-tag ] }
1389 [ reach push-attribute-value attribute-value-single-quoted-state ]
1392 : attribute-value-single-quoted-state ( document n/f string -- document n'/f string )
1393 take-char (attribute-value-single-quoted-state) ;
1396 : (attribute-value-unquoted-state) ( document n/f string ch/f -- document n'/f string )
1398 { [ dup "\t\n\f\s" member? ] [ drop before-attribute-name-state ] }
1399 { [ dup CHAR: & = ] [
1401 [ \ attribute-value-unquoted-state >>return-state ] 2dip character-reference-state
1403 { [ dup CHAR: > = ] [ drop pick emit-tag data-state ] }
1404 { [ dup CHAR: \0 = ] [ drop unexpected-null-character CHAR: replacement-character reach push-attribute-value ] }
1405 { [ dup "\"'<=`" member? ] [
1406 unexpected-character-in-unquoted-attribute-value
1407 reach push-attribute-value
1408 attribute-value-unquoted-state
1410 { [ dup f = ] [ eof-in-tag ] }
1411 [ reach push-attribute-value attribute-value-unquoted-state ]
1414 : attribute-value-unquoted-state ( document n/f string -- document n'/f string )
1415 take-char (attribute-value-unquoted-state) ;
1418 : (after-attribute-value-quoted-state) ( document n/f string ch/f -- document n'/f string )
1420 { [ dup "\t\n\f\s" member? ] [ drop before-attribute-name-state ] }
1421 { [ dup CHAR: / = ] [ drop self-closing-start-tag-state ] }
1422 { [ dup CHAR: > = ] [ drop pick emit-tag data-state ] }
1423 { [ dup f = ] [ eof-in-tag ] }
1424 [ missing-whitespace-between-attributes (before-attribute-name-state) ]
1427 : after-attribute-value-quoted-state ( document n/f string -- document n'/f string )
1428 take-char (after-attribute-value-quoted-state) ;
1431 : (self-closing-start-tag-state) ( document n/f string ch/f -- document n'/f string )
1433 { [ dup CHAR: > = ] [ drop pick [ set-self-closing ] [ emit-tag ] bi data-state ] }
1434 { [ dup f = ] [ eof-in-tag ] }
1435 [ unexpected-solidus-in-tag ]
1438 : self-closing-start-tag-state ( document n/f string -- document n'/f string )
1439 take-char (self-closing-start-tag-state) ;
1442 : (bogus-comment-state) ( document n/f string ch/f -- document n'/f string )
1444 { [ dup CHAR: > = ] [ drop pick emit-comment-token data-state ] }
1445 { [ dup f = ] [ drop pick [ emit-comment-token ] [ emit-eof ] bi ] }
1446 { [ dup CHAR: \0 = ] [ drop unexpected-null-character CHAR: replacement-character reach push-comment-token ] }
1447 [ reach push-comment-token bogus-comment-state ]
1450 : bogus-comment-state ( document n/f string -- document n'/f string )
1451 take-char (bogus-comment-state) ;
1454 : markup-declaration-open-state ( document n/f string -- document n'/f string )
1456 { [ "--" take-from? ] [ comment-start-state ] }
1457 { [ "DOCTYPE" take-from-insensitive? ] [ doctype-state ] }
1458 { [ "[CDATA[" take-from-insensitive? ] [ unimplemented* ] }
1460 incorrectly-opened-comment ! bogus-comment-state
1464 : (comment-start-state) ( document n/f string ch/f -- document n'/f string )
1466 { [ dup CHAR: - = ] [ drop comment-start-dash-state ] }
1467 { [ dup CHAR: > = ] [ drop abrupt-closing-of-empty-comment pick emit-comment-token data-state ] }
1471 : comment-start-state ( document n/f string -- document n'/f string )
1472 take-char (comment-start-state) ;
1475 : (comment-start-dash-state) ( document n/f string ch/f -- document n'/f string )
1477 { [ dup CHAR: - = ] [ drop comment-end-state ] }
1478 { [ dup CHAR: > = ] [ drop abrupt-closing-of-empty-comment ] }
1479 { [ dup f = ] [ eof-in-comment ] }
1480 [ [ CHAR: - reach push-comment-token ] dip (comment-state) ]
1483 : comment-start-dash-state ( document n/f string -- document n'/f string )
1484 take-char (comment-start-dash-state) ;
1487 : (comment-state) ( document n/f string ch/f -- document n'/f string )
1489 { [ dup CHAR: < = ] [ reach push-comment-token comment-less-than-sign-state ] }
1490 { [ dup CHAR: - = ] [ drop comment-end-dash-state ] }
1491 { [ dup CHAR: \0 = ] [ drop unexpected-null-character ] }
1492 { [ dup f = ] [ eof-in-comment ] }
1493 [ reach push-comment-token comment-state ]
1496 : comment-state ( document n/f string -- document n'/f string )
1497 take-char (comment-state) ;
1500 : (comment-less-than-sign-state) ( document n/f string ch/f -- document n'/f string )
1502 { [ dup CHAR: ! = ] [ reach push-comment-token comment-less-than-sign-bang-state ] }
1503 { [ dup CHAR: < = ] [ reach push-comment-token comment-less-than-sign-state ] }
1507 : comment-less-than-sign-state ( document n/f string -- document n'/f string )
1508 take-char (comment-less-than-sign-state) ;
1511 : (comment-less-than-sign-bang-state) ( document n/f string ch/f -- document n'/f string )
1513 { [ dup CHAR: - = ] [ reach push-comment-token comment-less-than-sign-bang-dash-state ] }
1517 : comment-less-than-sign-bang-state ( document n/f string -- document n'/f string )
1518 take-char (comment-less-than-sign-bang-state) ;
1521 : (comment-less-than-sign-bang-dash-state) ( document n/f string ch/f -- document n'/f string )
1523 { [ dup CHAR: - = ] [ drop comment-less-than-sign-bang-dash-dash-state ] }
1524 [ (comment-end-dash-state) ]
1527 : comment-less-than-sign-bang-dash-state ( document n/f string -- document n'/f string )
1528 take-char (comment-less-than-sign-bang-dash-state) ;
1531 : (comment-less-than-sign-bang-dash-dash-state) ( document n/f string ch/f -- document n'/f string )
1533 { [ dup CHAR: > = ] [ (comment-end-state) ] }
1534 { [ dup f = ] [ (comment-end-state) ] }
1535 [ nested-comment (comment-end-state) ]
1538 : comment-less-than-sign-bang-dash-dash-state ( document n/f string -- document n'/f string )
1539 take-char (comment-less-than-sign-bang-dash-dash-state) ;
1542 : (comment-end-dash-state) ( document n/f string ch/f -- document n'/f string )
1544 { [ dup CHAR: - = ] [ drop comment-end-state ] }
1545 { [ dup f = ] [ eof-in-comment ] }
1546 [ [ CHAR: - reach push-comment-token ] dip (comment-state) ]
1549 : comment-end-dash-state ( document n/f string -- document n'/f string )
1550 take-char (comment-end-dash-state) ;
1553 : (comment-end-state) ( document n/f string ch/f -- document n'/f string )
1555 { [ dup CHAR: > = ] [ drop pick emit-comment-token data-state ] }
1556 { [ dup CHAR: ! = ] [ drop comment-end-bang-state ] }
1557 { [ dup CHAR: - = ] [ reach push-comment-token comment-end-state ] }
1558 { [ dup f = ] [ drop eof-in-comment pick [ emit-comment-token ] [ emit-eof ] bi ] }
1559 [ [ "--" reach push-all-comment-token ] dip (comment-state) ]
1562 : comment-end-state ( document n/f string -- document n'/f string )
1563 take-char (comment-end-state) ;
1566 : (comment-end-bang-state) ( document n/f string ch/f -- document n'/f string )
1568 { [ dup CHAR: - = ] [ drop comment-end-dash-state ] }
1569 { [ dup CHAR: > = ] [ drop incorrectly-closed-comment pick emit-comment-token data-state ] }
1570 { [ dup f = ] [ eof-in-comment ] }
1571 [ [ "--!" reach push-all-comment-token ] dip (comment-state) ]
1574 : comment-end-bang-state ( document n/f string -- document n'/f string )
1575 take-char (comment-end-bang-state) ;
1578 : (doctype-state) ( document n/f string ch/f -- document n'/f string )
1580 { [ dup "\t\n\f\s" member? ] [ drop before-doctype-name-state ] }
1581 { [ dup CHAR: > = ] [ (before-doctype-name-state) ] }
1582 { [ dup f = ] [ drop eof-in-doctype pick [ new-doctype-with-quirks ] [ emit-doctype ] [ emit-eof ] tri ] }
1583 [ missing-whitespace-before-doctype-name ]
1586 : doctype-state ( document n/f string -- document n'/f string )
1587 take-char (doctype-state) ;
1590 : (before-doctype-name-state) ( document n/f string ch/f -- document n'/f string )
1592 { [ dup "\t\n\f\s" member? ] [ drop before-doctype-name-state ] }
1593 { [ dup ascii-upper-alpha? ] [ 0x20 + reach new-doctype-from-ch doctype-name-state ] }
1594 { [ dup CHAR: \0 = ] [
1596 unexpected-null-character
1597 CHAR: replacement-character reach new-doctype-from-ch
1600 { [ dup CHAR: > = ] [
1601 drop missing-doctype-name
1602 pick [ new-doctype-with-quirks ] [ emit-doctype ] bi
1606 pick [ new-doctype-with-quirks ] [ emit-doctype ] [ emit-eof ] tri
1608 [ reach new-doctype-from-ch doctype-name-state ]
1611 : before-doctype-name-state ( document n/f string -- document n'/f string )
1612 take-char (before-doctype-name-state) ;
1615 : (doctype-name-state) ( document n/f string ch/f -- document n'/f string )
1617 { [ dup "\t\n\f\s" member? ] [ drop after-doctype-name-state ] }
1618 { [ dup CHAR: > = ] [ drop pick emit-doctype data-state ] }
1619 { [ dup ascii-upper-alpha? ] [ 0x20 + reach push-doctype-name doctype-name-state ] }
1620 { [ dup CHAR: \0 = ] [
1621 drop unexpected-null-character
1622 CHAR: replacement-character pick push-doctype-name
1625 { [ dup f = ] [ drop eof-in-doctype pick [ emit-doctype ] [ emit-eof ] bi ] } ! force-quirks on for doctype
1626 [ reach push-doctype-name doctype-name-state ]
1629 : doctype-name-state ( document n/f string -- document n'/f string )
1630 take-char (doctype-name-state) ;
1633 : (after-doctype-name-state) ( document n/f string ch/f -- document n'/f string )
1635 { [ dup "\t\n\f\s" member? ] [ drop after-doctype-name-state ] }
1636 { [ dup CHAR: > = ] [ drop pick emit-doctype data-state ] }
1637 { [ dup f = ] [ eof-in-doctype ] }
1638 { [ [ "PUBLIC" take-from-insensitive? ] dip swap ] [ drop after-doctype-public-keyword-state ] }
1639 { [ [ "SYSTEM" take-from-insensitive? ] dip swap ] [ drop after-doctype-system-keyword-state ] }
1640 [ invalid-character-sequence-after-doctype-name ]
1643 : after-doctype-name-state ( document n/f string -- document n'/f string )
1644 take-char (after-doctype-name-state) ;
1647 : (after-doctype-public-keyword-state) ( document n/f string ch/f -- document n'/f string )
1649 { [ dup "\t\n\f\s" member? ] [ drop before-doctype-public-identifier-state ] }
1650 { [ dup CHAR: " = ] [ missing-whitespace-after-doctype-public-keyword ] }
1651 { [ dup CHAR: ' = ] [ missing-whitespace-after-doctype-public-keyword ] }
1652 { [ dup CHAR: > = ] [ drop missing-doctype-public-identifier force-quirks data-state ] }
1653 { [ dup f = ] [ drop eof-in-doctype pick [ emit-doctype ] [ emit-eof ] bi ] }
1655 missing-quote-before-doctype-public-identifier
1656 [ reach force-quirks ] dip
1657 (bogus-doctype-state)
1661 : after-doctype-public-keyword-state ( document n/f string -- document n'/f string )
1662 take-char (after-doctype-public-keyword-state) ;
1665 : (before-doctype-public-identifier-state) ( document n/f string ch/f -- document n'/f string )
1667 { [ dup "\t\n\f\s" member? ] [ drop before-doctype-public-identifier-state ] }
1668 { [ dup CHAR: " = ] [
1669 drop pick initialize-doctype-public-identifier
1670 doctype-public-identifier-double-quoted-state
1672 { [ dup CHAR: ' = ] [
1673 drop pick initialize-doctype-public-identifier
1674 doctype-public-identifier-single-quoted-state
1676 { [ dup CHAR: > = ] [
1677 drop missing-doctype-public-identifier
1678 pick [ force-quirks ] [ emit-doctype ] bi
1681 { [ dup f = ] [ drop eof-in-doctype pick [ emit-doctype ] [ emit-eof ] bi ] }
1683 missing-quote-before-doctype-public-identifier
1684 [ reach force-quirks ] dip
1685 (bogus-doctype-state)
1689 : before-doctype-public-identifier-state ( document n/f string -- document n'/f string )
1690 take-char (before-doctype-public-identifier-state) ;
1693 : (doctype-public-identifier-double-quoted-state) ( document n/f string ch/f -- document n'/f string )
1695 { [ dup CHAR: " = ] [ drop after-doctype-public-identifier-state ] }
1696 { [ dup CHAR: \0 = ] [
1698 unexpected-null-character
1699 CHAR: replacement-character pick push-doctype-public-identifier
1700 doctype-public-identifier-double-quoted-state
1702 { [ dup CHAR: > = ] [
1703 drop abrupt-doctype-public-identifier
1704 pick [ force-quirks ] [ emit-doctype ] bi
1709 pick [ force-quirks ] [ emit-doctype ] [ emit-eof ] tri
1711 [ reach push-doctype-public-identifier doctype-public-identifier-double-quoted-state ]
1714 : doctype-public-identifier-double-quoted-state ( document n/f string -- document n'/f string )
1715 take-char (doctype-public-identifier-double-quoted-state) ;
1718 : (doctype-public-identifier-single-quoted-state) ( document n/f string ch/f -- document n'/f string )
1720 { [ dup CHAR: ' = ] [ drop after-doctype-public-identifier-state ] }
1721 { [ dup CHAR: \0 = ] [
1723 unexpected-null-character
1724 CHAR: replacement-character pick push-doctype-public-identifier
1725 doctype-public-identifier-double-quoted-state
1727 { [ dup CHAR: > = ] [
1728 drop abrupt-doctype-public-identifier
1729 pick [ force-quirks ] [ emit-doctype ] bi
1734 pick [ force-quirks ] [ emit-doctype ] [ emit-eof ] tri
1736 [ reach push-doctype-public-identifier doctype-public-identifier-single-quoted-state ]
1739 : doctype-public-identifier-single-quoted-state ( document n/f string -- document n'/f string )
1740 take-char (doctype-public-identifier-single-quoted-state) ;
1743 : (after-doctype-public-identifier-state) ( document n/f string ch/f -- document n'/f string )
1745 { [ dup "\t\n\f\s" member? ] [ drop between-doctype-public-and-system-identifiers-state ] }
1746 { [ dup CHAR: > = ] [
1747 drop pick emit-doctype
1750 { [ dup CHAR: " = ] [
1751 drop missing-whitespace-between-doctype-public-and-system-identifiers
1752 pick initialize-doctype-system-identifier
1753 doctype-system-identifier-double-quoted-state
1755 { [ dup CHAR: ' = ] [
1756 drop missing-whitespace-between-doctype-public-and-system-identifiers
1757 pick initialize-doctype-system-identifier
1758 doctype-system-identifier-single-quoted-state
1760 { [ dup f = ] [ drop eof-in-doctype pick [ force-quirks ] [ emit-doctype ] [ emit-eof ] tri ] }
1762 missing-quote-before-doctype-system-identifier
1763 [ reach force-quirks ] dip
1764 (bogus-doctype-state)
1768 : after-doctype-public-identifier-state ( document n/f string -- document n'/f string )
1769 take-char (after-doctype-public-identifier-state) ;
1772 : (between-doctype-public-and-system-identifiers-state) ( document n/f string ch/f -- document n'/f string )
1774 { [ dup "\t\n\f\s" member? ] [ drop between-doctype-public-and-system-identifiers-state ] }
1775 { [ dup CHAR: > = ] [
1776 drop pick emit-doctype
1779 { [ dup CHAR: " = ] [
1780 drop pick initialize-doctype-system-identifier
1781 doctype-system-identifier-double-quoted-state
1783 { [ dup CHAR: ' = ] [
1784 drop pick initialize-doctype-system-identifier
1785 doctype-system-identifier-single-quoted-state
1787 { [ dup f = ] [ drop eof-in-doctype pick [ force-quirks ] [ emit-doctype ] [ emit-eof ] tri ] }
1789 missing-quote-before-doctype-system-identifier
1790 [ reach force-quirks ] dip
1791 (bogus-doctype-state)
1795 : between-doctype-public-and-system-identifiers-state ( document n/f string -- document n'/f string )
1796 take-char (between-doctype-public-and-system-identifiers-state) ;
1799 : (after-doctype-system-keyword-state) ( document n/f string ch/f -- document n'/f string )
1801 { [ dup "\t\n\f\s" member? ] [ drop between-doctype-public-and-system-identifiers-state ] }
1802 { [ dup CHAR: " = ] [
1803 drop missing-whitespace-after-doctype-system-keyword
1804 pick initialize-doctype-system-identifier
1805 doctype-system-identifier-double-quoted-state
1807 { [ dup CHAR: ' = ] [
1808 drop missing-whitespace-after-doctype-system-keyword
1809 pick initialize-doctype-system-identifier
1810 doctype-system-identifier-single-quoted-state
1812 { [ dup CHAR: > = ] [
1813 drop missing-doctype-system-identifier
1814 pick [ force-quirks ] [ emit-doctype ] bi
1817 { [ dup f = ] [ drop eof-in-doctype pick [ force-quirks ] [ emit-doctype ] [ emit-eof ] tri ] }
1819 missing-quote-before-doctype-system-identifier
1820 [ reach force-quirks ] dip
1821 (bogus-doctype-state)
1825 : after-doctype-system-keyword-state ( document n/f string -- document n'/f string )
1826 take-char (after-doctype-system-keyword-state) ;
1829 : (before-doctype-system-identifier-state) ( document n/f string ch/f -- document n'/f string )
1831 { [ dup "\t\n\f\s" member? ] [ drop before-doctype-system-identifier-state ] }
1832 { [ dup CHAR: " = ] [
1833 drop pick initialize-doctype-system-identifier
1834 doctype-system-identifier-double-quoted-state
1836 { [ dup CHAR: ' = ] [
1837 drop pick initialize-doctype-system-identifier
1838 doctype-system-identifier-single-quoted-state
1840 { [ dup CHAR: > = ] [
1841 drop missing-doctype-system-identifier
1842 pick [ force-quirks ] [ emit-doctype ] bi
1845 { [ dup f = ] [ drop eof-in-doctype pick [ emit-doctype ] [ emit-eof ] bi ] }
1847 missing-quote-before-doctype-system-identifier
1848 [ reach force-quirks ] dip
1849 (bogus-doctype-state)
1853 : before-doctype-system-identifier-state ( document n/f string -- document n'/f string )
1854 take-char (before-doctype-system-identifier-state) ;
1857 : (doctype-system-identifier-double-quoted-state) ( document n/f string ch/f -- document n'/f string )
1859 { [ dup CHAR: " = ] [ drop after-doctype-system-identifier-state ] }
1860 { [ dup CHAR: \0 = ] [
1862 unexpected-null-character
1863 CHAR: replacement-character pick push-doctype-system-identifier
1864 doctype-system-identifier-double-quoted-state
1866 { [ dup CHAR: > = ] [
1867 drop abrupt-doctype-system-identifier
1868 pick [ force-quirks ] [ emit-doctype ] bi
1873 pick [ force-quirks ] [ emit-doctype ] [ emit-eof ] tri
1875 [ reach push-doctype-system-identifier doctype-system-identifier-double-quoted-state ]
1878 : doctype-system-identifier-double-quoted-state ( document n/f string -- document n'/f string )
1879 take-char (doctype-system-identifier-double-quoted-state) ;
1882 : (doctype-system-identifier-single-quoted-state) ( document n/f string ch/f -- document n'/f string )
1884 { [ dup CHAR: ' = ] [ drop after-doctype-system-identifier-state ] }
1885 { [ dup CHAR: \0 = ] [
1887 unexpected-null-character
1888 CHAR: replacement-character pick push-doctype-system-identifier
1889 doctype-system-identifier-double-quoted-state
1891 { [ dup CHAR: > = ] [
1892 drop abrupt-doctype-system-identifier
1893 pick [ force-quirks ] [ emit-doctype ] bi
1898 pick [ force-quirks ] [ emit-doctype ] [ emit-eof ] tri
1900 [ reach push-doctype-system-identifier doctype-system-identifier-single-quoted-state ]
1903 : doctype-system-identifier-single-quoted-state ( document n/f string -- document n'/f string )
1904 take-char (doctype-system-identifier-single-quoted-state) ;
1907 : (after-doctype-system-identifier-state) ( document n/f string ch/f -- document n'/f string )
1909 { [ dup "\t\n\f\s" member? ] [ drop after-doctype-system-identifier-state ] }
1910 { [ dup CHAR: > = ] [
1911 drop pick emit-doctype
1914 { [ dup f = ] [ drop eof-in-doctype pick [ force-quirks ] [ emit-doctype ] [ emit-eof ] tri ] }
1916 unexpected-character-after-doctype-system-identifier
1917 [ reach force-quirks ] dip
1918 (bogus-doctype-state)
1922 : after-doctype-system-identifier-state ( document n/f string -- document n'/f string )
1923 take-char (after-doctype-system-identifier-state) ;
1926 : (bogus-doctype-state) ( document n/f string ch/f -- document n'/f string )
1928 { [ dup CHAR: > = ] [ drop pick emit-doctype data-state ] }
1929 { [ dup CHAR: \0 = ] [ drop unexpected-null-character bogus-doctype-state ] }
1930 { [ dup f = ] [ drop eof-in-doctype pick emit-eof ] }
1931 [ drop bogus-doctype-state ]
1934 : bogus-doctype-state ( document n/f string -- document n'/f string )
1935 take-char (bogus-doctype-state) ;
1938 : (cdata-section-state) ( document n/f string ch/f -- document n'/f string )
1940 { [ dup CHAR: ] = ] [ drop cdata-section-bracket-state ] }
1941 { [ dup f = ] [ drop eof-in-cdata pick emit-eof ] }
1942 [ reach emit-char cdata-section-state ]
1945 : cdata-section-state ( document n/f string -- document n'/f string )
1946 take-char (cdata-section-state) ;
1949 : (cdata-section-bracket-state) ( document n/f string ch/f -- document n'/f string )
1951 { [ dup CHAR: ] = ] [ drop cdata-section-end-state ] }
1952 [ [ CHAR: ] reach emit-char ] dip (cdata-section-state) ]
1955 : cdata-section-bracket-state ( document n/f string -- document n'/f string )
1956 take-char (cdata-section-bracket-state) ;
1959 : (cdata-section-end-state) ( document n/f string ch/f -- document n'/f string )
1961 { [ dup CHAR: ] = ] [ reach emit-char cdata-section-end-state ] }
1962 { [ dup CHAR: > = ] [ drop data-state ] }
1963 [ [ "]]" reach emit-string ] dip (cdata-section-state) ]
1966 : cdata-section-end-state ( document n/f string -- document n'/f string )
1967 take-char (cdata-section-end-state) ;
1970 : (character-reference-state) ( document n/f string ch/f -- document n'/f string )
1971 [ CHAR: & reach ch>new-temporary-buffer ] dip
1973 { [ dup ascii-alphanumeric? ] [ (named-character-reference-state) ] }
1974 { [ dup CHAR: # = ] [ reach push-temporary-buffer numeric-character-reference-state ] }
1975 [ reach flush-temporary-buffer (return-state) ]
1978 : character-reference-state ( document n/f string -- document n'/f string )
1979 take-char (character-reference-state) ;
1982 : (named-character-reference-state) ( document n/f string ch/f -- document n'/f string )
1983 reach push-temporary-buffer
1984 pick named-character-match?
1986 drop ! exact match, drop prefix match
1989 [ pick temporary-buffer-attribute? ]
1990 [ pick temporary-buffer>> ?last CHAR: ; = not ]
1991 [ 3dup peek-from { [ CHAR: = = ] [ ascii-alphanumeric? ] } 1|| ]
1994 flush-temporary-buffer
1997 pick [ take-named-character ] [ flush-temporary-buffer ] bi return-state
2001 [ named-character-reference-state ]
2002 [ pick flush-temporary-buffer ambiguous-ampersand-state ] if
2005 : named-character-reference-state ( document n/f string -- document n'/f string )
2006 take-char (named-character-reference-state) ;
2009 : (ambiguous-ampersand-state) ( document n/f string ch/f -- document n'/f string )
2011 { [ dup ascii-alphanumeric? ] [
2014 { [ dup CHAR: ; = ] [ unknown-named-character-reference (return-state) ] }
2018 : ambiguous-ampersand-state ( document n/f string -- document n'/f string )
2019 take-char (ambiguous-ampersand-state) ;
2022 : (numeric-character-reference-state) ( document n/f string ch/f -- document n'/f string )
2024 { [ dup "xX" member? ] [ reach push-temporary-buffer hexadecimal-character-reference-start-state ] }
2025 [ (decimal-character-reference-start-state) ]
2028 : numeric-character-reference-state ( document n/f string -- document n'/f string )
2029 take-char (numeric-character-reference-state) ;
2032 : (hexadecimal-character-reference-start-state) ( document n/f string ch/f -- document n'/f string )
2034 { [ dup ascii-hex-digit? ] [ (hexadecimal-character-reference-state) ] }
2035 [ absence-of-digits-in-numeric-character-reference reach flush-temporary-buffer (return-state) ]
2038 : hexadecimal-character-reference-start-state ( document n/f string -- document n'/f string )
2039 take-char (hexadecimal-character-reference-start-state) ;
2042 : (decimal-character-reference-start-state) ( document n/f string ch/f -- document n'/f string )
2044 { [ dup ascii-digit? ] [ (decimal-character-reference-state) ] }
2045 [ absence-of-digits-in-numeric-character-reference reach flush-temporary-buffer (return-state) ]
2048 : decimal-character-reference-start-state ( document n/f string -- document n'/f string )
2049 take-char (decimal-character-reference-start-state) ;
2052 : (hexadecimal-character-reference-state) ( document n/f string ch/f -- document n'/f string )
2054 { [ dup ascii-digit? ] [ unimplemented* ] }
2055 { [ dup ascii-upper-hex-digit? ] [ unimplemented* ] }
2056 { [ dup ascii-lower-hex-digit? ] [ unimplemented* ] }
2057 { [ dup CHAR: ; = ] [ drop numeric-character-reference-end-state ] }
2058 [ missing-semicolon-after-character-reference ]
2061 : hexadecimal-character-reference-state ( document n/f string -- document n'/f string )
2062 take-char (hexadecimal-character-reference-state) ;
2065 : (decimal-character-reference-state) ( document n/f string ch/f -- document n'/f string )
2067 { [ dup ascii-digit? ] [ unimplemented* ] }
2068 { [ dup CHAR: ; = ] [ drop numeric-character-reference-end-state ] }
2069 [ missing-semicolon-after-character-reference ]
2072 : decimal-character-reference-state ( document n/f string -- document n'/f string )
2073 take-char (decimal-character-reference-state) ;
2076 : (numeric-character-reference-end-state) ( document n/f string ch/f -- document n'/f string )
2078 [ missing-semicolon-after-character-reference ]
2081 : numeric-character-reference-end-state ( document n/f string -- document n'/f string )
2082 take-char (numeric-character-reference-end-state) ;
2086 : parse-html5 ( string -- document )
2087 [ <document> 0 ] dip data-state 2drop ;