! https://html.spec.whatwg.org/multipage/parsing.html#tokenization
+DEFER: data-state
+DEFER: (data-state)
+DEFER: rcdata-state
+DEFER: (rcdata-state)
+DEFER: rawtext-state
+DEFER: (rawtext-state)
+DEFER: script-data-state
+DEFER: (script-data-state)
+DEFER: plaintext-state
+DEFER: (plaintext-state)
+DEFER: tag-open-state
+DEFER: (tag-open-state)
+DEFER: end-tag-open-state
+DEFER: (end-tag-open-state)
+DEFER: tag-name-state
+DEFER: (tag-name-state)
+DEFER: rcdata-less-than-sign-state
+DEFER: (rcdata-less-than-sign-state)
+DEFER: rcdata-end-tag-open-state
+DEFER: (rcdata-end-tag-open-state)
+DEFER: rcdata-end-tag-name-state
+DEFER: (rcdata-end-tag-name-state)
+DEFER: rawtext-less-than-sign-state
+DEFER: (rawtext-less-than-sign-state)
+DEFER: rawtext-end-tag-open-state
+DEFER: (rawtext-end-tag-open-state)
+DEFER: rawtext-end-tag-name-state
+DEFER: (rawtext-end-tag-name-state)
+DEFER: script-data-less-than-sign-state
+DEFER: (script-data-less-than-sign-state)
+DEFER: script-data-end-tag-open-state
+DEFER: (script-data-end-tag-open-state)
+DEFER: script-data-end-tag-name-state
+DEFER: (script-data-end-tag-name-state)
+DEFER: script-data-escape-start-state
+DEFER: (script-data-escape-start-state)
+DEFER: script-data-escape-start-dash-state
+DEFER: (script-data-escape-start-dash-state)
+DEFER: script-data-escaped-state
+DEFER: (script-data-escaped-state)
+DEFER: script-data-escaped-dash-state
+DEFER: (script-data-escaped-dash-state)
+DEFER: script-data-escaped-dash-dash-state
+DEFER: (script-data-escaped-dash-dash-state)
+DEFER: script-data-escaped-less-than-sign-state
+DEFER: (script-data-escaped-less-than-sign-state)
+DEFER: script-data-escaped-end-tag-open-state
+DEFER: (script-data-escaped-end-tag-open-state)
+DEFER: script-data-escaped-end-tag-name-state
+DEFER: (script-data-escaped-end-tag-name-state)
+DEFER: script-data-double-escape-start-state
+DEFER: (script-data-double-escape-start-state)
+DEFER: script-data-double-escaped-state
+DEFER: (script-data-double-escaped-state)
+DEFER: script-data-double-escaped-dash-state
+DEFER: (script-data-double-escaped-dash-state)
+DEFER: script-data-double-escaped-dash-dash-state
+DEFER: (script-data-double-escaped-dash-dash-state)
+DEFER: script-data-double-escaped-less-than-sign-state
+DEFER: (script-data-double-escaped-less-than-sign-state)
+DEFER: script-data-double-escape-end-state
+DEFER: (script-data-double-escape-end-state)
+DEFER: before-attribute-name-state
+DEFER: (before-attribute-name-state)
+DEFER: attribute-name-state
+DEFER: (attribute-name-state)
+DEFER: after-attribute-name-state
+DEFER: (after-attribute-name-state)
+DEFER: before-attribute-value-state
+DEFER: (before-attribute-value-state)
+DEFER: attribute-value-double-quoted-state
+DEFER: (attribute-value-double-quoted-state)
+DEFER: attribute-value-single-quoted-state
+DEFER: (attribute-value-single-quoted-state)
+DEFER: attribute-value-unquoted-state
+DEFER: (attribute-value-unquoted-state)
+DEFER: after-attribute-value-quoted-state
+DEFER: (after-attribute-value-quoted-state)
+DEFER: self-closing-start-tag-state
+DEFER: (self-closing-start-tag-state)
+DEFER: bogus-comment-state
+DEFER: (bogus-comment-state)
+DEFER: markup-declaration-open-state
+DEFER: (markup-declaration-open-state)
+DEFER: comment-start-state
+DEFER: (comment-start-state)
+DEFER: comment-start-dash-state
+DEFER: (comment-start-dash-state)
+DEFER: comment-state
+DEFER: (comment-state)
+DEFER: comment-less-than-sign-state
+DEFER: (comment-less-than-sign-state)
+DEFER: comment-less-than-sign-bang-state
+DEFER: (comment-less-than-sign-bang-state)
+DEFER: comment-less-than-sign-bang-dash-state
+DEFER: (comment-less-than-sign-bang-dash-state)
+DEFER: comment-less-than-sign-bang-dash-dash-state
+DEFER: (comment-less-than-sign-bang-dash-dash-state)
+DEFER: comment-end-dash-state
+DEFER: (comment-end-dash-state)
+DEFER: comment-end-state
+DEFER: (comment-end-state)
+DEFER: comment-end-bang-state
+DEFER: (comment-end-bang-state)
+DEFER: doctype-state
+DEFER: (doctype-state)
+DEFER: before-doctype-name-state
+DEFER: (before-doctype-name-state)
+DEFER: doctype-name-state
+DEFER: (doctype-name-state)
+DEFER: after-doctype-name-state
+DEFER: (after-doctype-name-state)
+DEFER: after-doctype-public-keyword-state
+DEFER: (after-doctype-public-keyword-state)
+DEFER: before-doctype-public-identifier-state
+DEFER: (before-doctype-public-identifier-state)
+DEFER: doctype-public-identifier-double-quoted-state
+DEFER: (doctype-public-identifier-double-quoted-state)
+DEFER: doctype-public-identifier-single-quoted-state
+DEFER: (doctype-public-identifier-single-quoted-state)
+DEFER: after-doctype-public-identifier-state
+DEFER: (after-doctype-public-identifier-state)
+DEFER: between-doctype-public-and-system-identifiers-state
+DEFER: (between-doctype-public-and-system-identifiers-state)
+DEFER: after-doctype-system-keyword-state
+DEFER: (after-doctype-system-keyword-state)
+DEFER: before-doctype-system-identifier-state
+DEFER: (before-doctype-system-identifier-state)
+DEFER: doctype-system-identifier-double-quoted-state
+DEFER: (doctype-system-identifier-double-quoted-state)
+DEFER: doctype-system-identifier-single-quoted-state
+DEFER: (doctype-system-identifier-single-quoted-state)
+DEFER: after-doctype-system-identifier-state
+DEFER: (after-doctype-system-identifier-state)
+DEFER: bogus-doctype-state
+DEFER: (bogus-doctype-state)
+DEFER: cdata-section-state
+DEFER: (cdata-section-state)
+DEFER: cdata-section-bracket-state
+DEFER: (cdata-section-bracket-state)
+DEFER: cdata-section-end-state
+DEFER: (cdata-section-end-state)
+DEFER: character-reference-state
+DEFER: (character-reference-state)
+DEFER: named-character-reference-state
+DEFER: (named-character-reference-state)
+DEFER: ambiguous-ampersand-state
+DEFER: (ambiguous-ampersand-state)
+DEFER: numeric-character-reference-state
+DEFER: (numeric-character-reference-state)
+DEFER: hexadecimal-character-reference-start-state
+DEFER: (hexadecimal-character-reference-start-state)
+DEFER: decimal-character-reference-start-state
+DEFER: (decimal-character-reference-start-sttag
+DEFER: numeric-character-reference-end-state
+DEFER: (numeric-character-reference-end-state)
+
+
ERROR: unimplemented string ;
ERROR: unimplemented* ;
tree-insert-mode
doctype-name
tag-name
+end-tag-name
attribute-name
attribute-value
temporary-buffer
initial-mode >>tree-insert-mode
SBUF" " clone >>doctype-name
SBUF" " clone >>tag-name
+ SBUF" " clone >>end-tag-name
SBUF" " clone >>attribute-name
SBUF" " clone >>attribute-value
SBUF" " clone >>temporary-buffer
: push-attribute-name ( ch document -- ) attribute-name>> push ;
: push-attribute-value ( ch document -- ) attribute-value>> push ;
: push-temporary-buffer ( ch document -- ) temporary-buffer>> push ;
+: reset-temporary-buffer ( document -- ) SBUF" " clone temporary-buffer<< ;
+: reset-end-tag ( document -- ) SBUF" " clone end-tag-name<< ;
: push-comment-token ( ch document -- ) comment-token>> push ;
-: emit-tag ( document -- ) "emit tag" print . ;
+: emit-eof ( document -- ) drop "emit-eof" print ;
+: emit-char ( char document -- ) drop "emit-char:" write . ;
+: emit-temporary-buffer-with ( string document -- ) "emit-temp-buffer: " write temporary-buffer>> append . ;
+: emit-string ( char document -- ) drop "emit-string:" write . ;
+: emit-tag ( document -- ) "emit tag: " write . ;
+: emit-end-tag ( document -- ) "emit end tag: " write . ;
: emit-doctype ( document -- )
"emit doctype: " write
[ doctype-name>> >string . ]
[ SBUF" " clone doctype-name<< ] bi ;
+! check if matches open tag
+: appropriate-end-tag-token? ( document -- ? )
+ drop f ;
+
: ascii-upper-alpha? ( ch -- ? ) [ CHAR: A CHAR: Z between? ] [ f ] if* ; inline
: ascii-lower-alpha? ( ch -- ? ) [ CHAR: a CHAR: z between? ] [ f ] if* ; inline
: ascii-digit? ( ch/f -- ? ) [ CHAR: 0 CHAR: 9 between? ] [ f ] if* ;
: ascii-alpha? ( ch/f -- ? ) { [ ascii-lower-alpha? ] [ ascii-upper-alpha? ] } 1|| ;
: ascii-alphanumeric? ( ch/f -- ? ) { [ ascii-alpha? ] [ ascii-digit? ] } 1|| ;
-DEFER: data-state
-DEFER: (data-state)
-DEFER: rcdata-state
-DEFER: (rcdata-state)
-DEFER: rawtext-state
-DEFER: (rawtext-state)
-DEFER: script-data-state
-DEFER: (script-data-state)
-DEFER: plaintext-state
-DEFER: (plaintext-state)
-DEFER: tag-open-state
-DEFER: (tag-open-state)
-DEFER: end-tag-open-state
-DEFER: (end-tag-open-state)
-DEFER: tag-name-state
-DEFER: (tag-name-state)
-DEFER: rcdata-less-than-sign-state
-DEFER: (rcdata-less-than-sign-state)
-DEFER: rcdata-end-tag-open-state
-DEFER: (rcdata-end-tag-open-state)
-DEFER: rcdata-end-tag-name-state
-DEFER: (rcdata-end-tag-name-state)
-DEFER: rawtext-less-than-sign-state
-DEFER: (rawtext-less-than-sign-state)
-DEFER: rawtext-end-tag-open-state
-DEFER: (rawtext-end-tag-open-state)
-DEFER: rawtext-end-tag-name-state
-DEFER: (rawtext-end-tag-name-state)
-DEFER: script-data-less-than-sign-state
-DEFER: (script-data-less-than-sign-state)
-DEFER: script-data-end-tag-open-state
-DEFER: (script-data-end-tag-open-state)
-DEFER: script-data-end-tag-name-state
-DEFER: (script-data-end-tag-name-state)
-DEFER: script-data-escape-start-state
-DEFER: (script-data-escape-start-state)
-DEFER: script-data-escape-start-dash-state
-DEFER: (script-data-escape-start-dash-state)
-DEFER: script-data-escaped-state
-DEFER: (script-data-escaped-state)
-DEFER: script-data-escaped-dash-state
-DEFER: (script-data-escaped-dash-state)
-DEFER: script-data-escaped-dash-dash-state
-DEFER: (script-data-escaped-dash-dash-state)
-DEFER: script-data-escaped-less-than-sign-state
-DEFER: (script-data-escaped-less-than-sign-state)
-DEFER: script-data-escaped-end-tag-open-state
-DEFER: (script-data-escaped-end-tag-open-state)
-DEFER: script-data-escaped-end-tag-name-state
-DEFER: (script-data-escaped-end-tag-name-state)
-DEFER: script-data-double-escape-start-state
-DEFER: (script-data-double-escape-start-state)
-DEFER: script-data-double-escaped-state
-DEFER: (script-data-double-escaped-state)
-DEFER: script-data-double-escaped-dash-state
-DEFER: (script-data-double-escaped-dash-state)
-DEFER: script-data-double-escaped-dash-dash-state
-DEFER: (script-data-double-escaped-dash-dash-state)
-DEFER: script-data-double-escaped-less-than-sign-state
-DEFER: (script-data-double-escaped-less-than-sign-state)
-DEFER: script-data-double-escape-end-state
-DEFER: (script-data-double-escape-end-state)
-DEFER: before-attribute-name-state
-DEFER: (before-attribute-name-state)
-DEFER: attribute-name-state
-DEFER: (attribute-name-state)
-DEFER: after-attribute-name-state
-DEFER: (after-attribute-name-state)
-DEFER: before-attribute-value-state
-DEFER: (before-attribute-value-state)
-DEFER: attribute-value-double-quoted-state
-DEFER: (attribute-value-double-quoted-state)
-DEFER: attribute-value-single-quoted-state
-DEFER: (attribute-value-single-quoted-state)
-DEFER: attribute-value-unquoted-state
-DEFER: (attribute-value-unquoted-state)
-DEFER: after-attribute-value-quoted-state
-DEFER: (after-attribute-value-quoted-state)
-DEFER: self-closing-start-tag-state
-DEFER: (self-closing-start-tag-state)
-DEFER: bogus-comment-state
-DEFER: (bogus-comment-state)
-DEFER: markup-declaration-open-state
-DEFER: (markup-declaration-open-state)
-DEFER: comment-start-state
-DEFER: (comment-start-state)
-DEFER: comment-start-dash-state
-DEFER: (comment-start-dash-state)
-DEFER: comment-state
-DEFER: (comment-state)
-DEFER: comment-less-than-sign-state
-DEFER: (comment-less-than-sign-state)
-DEFER: comment-less-than-sign-bang-state
-DEFER: (comment-less-than-sign-bang-state)
-DEFER: comment-less-than-sign-bang-dash-state
-DEFER: (comment-less-than-sign-bang-dash-state)
-DEFER: comment-less-than-sign-bang-dash-dash-state
-DEFER: (comment-less-than-sign-bang-dash-dash-state)
-DEFER: comment-end-dash-state
-DEFER: (comment-end-dash-state)
-DEFER: comment-end-state
-DEFER: (comment-end-state)
-DEFER: comment-end-bang-state
-DEFER: (comment-end-bang-state)
-DEFER: doctype-state
-DEFER: (doctype-state)
-DEFER: before-doctype-name-state
-DEFER: (before-doctype-name-state)
-DEFER: doctype-name-state
-DEFER: (doctype-name-state)
-DEFER: after-doctype-name-state
-DEFER: (after-doctype-name-state)
-DEFER: after-doctype-public-keyword-state
-DEFER: (after-doctype-public-keyword-state)
-DEFER: before-doctype-public-identifier-state
-DEFER: (before-doctype-public-identifier-state)
-DEFER: doctype-public-identifier-double-quoted-state
-DEFER: (doctype-public-identifier-double-quoted-state)
-DEFER: doctype-public-identifier-single-quoted-state
-DEFER: (doctype-public-identifier-single-quoted-state)
-DEFER: after-doctype-public-identifier-state
-DEFER: (after-doctype-public-identifier-state)
-DEFER: between-doctype-public-and-system-identifiers-state
-DEFER: (between-doctype-public-and-system-identifiers-state)
-DEFER: after-doctype-system-keyword-state
-DEFER: (after-doctype-system-keyword-state)
-DEFER: before-doctype-system-identifier-state
-DEFER: (before-doctype-system-identifier-state)
-DEFER: doctype-system-identifier-double-quoted-state
-DEFER: (doctype-system-identifier-double-quoted-state)
-DEFER: doctype-system-identifier-single-quoted-state
-DEFER: (doctype-system-identifier-single-quoted-state)
-DEFER: after-doctype-system-identifier-state
-DEFER: (after-doctype-system-identifier-state)
-DEFER: bogus-doctype-state
-DEFER: (bogus-doctype-state)
-DEFER: cdata-section-state
-DEFER: (cdata-section-state)
-DEFER: cdata-section-bracket-state
-DEFER: (cdata-section-bracket-state)
-DEFER: cdata-section-end-state
-DEFER: (cdata-section-end-state)
-DEFER: character-reference-state
-DEFER: (character-reference-state)
-DEFER: named-character-reference-state
-DEFER: (named-character-reference-state)
-DEFER: ambiguous-ampersand-state
-DEFER: (ambiguous-ampersand-state)
-DEFER: numeric-character-reference-state
-DEFER: (numeric-character-reference-state)
-DEFER: hexadecimal-character-reference-start-state
-DEFER: (hexadecimal-character-reference-start-state)
-DEFER: decimal-character-reference-start-state
-DEFER: (decimal-character-reference-start-state)
-DEFER: hexadecimal-character-reference-state
-DEFER: (hexadecimal-character-reference-state)
-DEFER: decimal-character-reference-state
-DEFER: (decimal-character-reference-state)
-DEFER: numeric-character-reference-end-state
-DEFER: (numeric-character-reference-end-state)
-
: (data-state) ( document n/f string ch/f -- document n'/f string )
{
- ! { CHAR: & [ "character-reference-state-mode" unimplemented ] }
- { CHAR: < [ tag-open-state ] }
- { CHAR: \0 [ unexpected-null-character ] }
- { f [ ] }
- [ reach push-tag-name data-state ]
- } case ;
+ { [ dup CHAR: & = ] [ drop \ data-state reach return-state<< character-reference-state ] }
+ { [ dup CHAR: < = ] [ drop tag-open-state ] }
+ { [ dup CHAR: \0 = ] [ unexpected-null-character ] }
+ { [ dup f = ] [ drop pick emit-eof ] }
+ [ reach emit-char data-state ]
+ } cond ;
: data-state ( document n/f string -- document n'/f string )
next-char-from (data-state) ;
: (rcdata-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup CHAR: & = ] [ drop \ rcdata-state reach return-state<< character-reference-state ] }
+ { [ dup CHAR: < = ] [ drop rcdata-less-than-sign-state ] }
+ { [ dup CHAR: \0 = ] [ unexpected-null-character ] }
+ { [ dup f = ] [ drop pick emit-eof ] }
+ [ reach emit-char rcdata-state ]
} cond ;
: rcdata-state ( document n/f string -- document n'/f string )
: (rawtext-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup CHAR: < = ] [ drop rawtext-less-than-sign-state ] }
+ { [ dup CHAR: \0 = ] [ drop unexpected-null-character ] }
+ { [ dup f = ] [ drop pick emit-eof ] }
+ [ reach emit-char rawtext-state ]
} cond ;
: rawtext-state ( document n/f string -- document n'/f string )
: (script-data-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup CHAR: < = ] [ drop script-data-less-than-sign-state ] }
+ { [ dup CHAR: \0 = ] [ drop unexpected-null-character ] }
+ { [ dup f = ] [ drop pick emit-eof ] }
+ [ reach emit-char script-data-state ]
} cond ;
: script-data-state ( document n/f string -- document n'/f string )
: (plaintext-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup CHAR: \0 = ] [ drop unexpected-null-character ] }
+ { [ dup f = ] [ drop pick emit-eof ] }
+ [ reach emit-char plaintext-state ]
} cond ;
: plaintext-state ( document n/f string -- document n'/f string )
: (tag-open-state) ( document n/f string ch/f -- document n'/f string )
-{
+ {
{ [ dup ascii-alpha? ] [ (tag-name-state) ] }
{ [ dup CHAR: ! = ] [ drop markup-declaration-open-state ] }
{ [ dup CHAR: / = ] [ drop end-tag-open-state ] }
: (rcdata-less-than-sign-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup CHAR: / = ] [ drop pick reset-temporary-buffer rcdata-end-tag-open-state ] }
+ [ [ CHAR: < reach emit-char ] dip (rcdata-state) ]
} cond ;
: rcdata-less-than-sign-state ( document n/f string -- document n'/f string )
: (rcdata-end-tag-open-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup ascii-alpha? ] [ reach reset-end-tag (rcdata-end-tag-name-state) ] }
+ [ [ CHAR: < reach emit-char ] dip (rcdata-state) ]
} cond ;
: rcdata-end-tag-open-state ( document n/f string -- document n'/f string )
: (rcdata-end-tag-name-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup "\t\n\f\s" member? ] [
+ drop pick appropriate-end-tag-token?
+ [ before-attribute-name-state ] [ "</" reach emit-temporary-buffer-with rcdata-state ] if
+ ] }
+ { [ dup CHAR: / = ] [
+ drop pick appropriate-end-tag-token?
+ [ self-closing-start-tag-state ] [ "</" reach emit-temporary-buffer-with rcdata-state ] if
+ ] }
+ { [ dup CHAR: > = ] [
+ drop pick appropriate-end-tag-token?
+ [ pick emit-end-tag data-state ] [ "</" reach emit-temporary-buffer-with rcdata-state ] if
+ ] }
+ { [ dup ascii-upper-alpha? ] [ [ 0x20 + reach push-tag-name ] [ reach push-temporary-buffer ] bi rcdata-end-tag-name-state ] }
+ { [ dup ascii-lower-alpha? ] [ [ reach push-tag-name ] [ reach push-temporary-buffer ] bi rcdata-end-tag-name-state ] }
+ [ [ "</" reach emit-temporary-buffer-with ] dip (rcdata-state) ]
} cond ;
: rcdata-end-tag-name-state ( document n/f string -- document n'/f string )
: (rawtext-less-than-sign-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup CHAR: / = ] [ drop pick reset-temporary-buffer rawtext-end-tag-open-state ] }
+ [ [ CHAR: < reach emit-char ] dip (rawtext-state) ]
} cond ;
: rawtext-less-than-sign-state ( document n/f string -- document n'/f string )
: (rawtext-end-tag-open-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup ascii-alpha? ] [ reach reset-end-tag (rawtext-end-tag-name-state) ] }
+ [ [ CHAR: < reach emit-char ] dip (rawtext-state) ]
} cond ;
: rawtext-end-tag-open-state ( document n/f string -- document n'/f string )
: (rawtext-end-tag-name-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup "\t\n\f\s" member? ] [
+ drop pick appropriate-end-tag-token?
+ [ before-attribute-name-state ] [ "</" reach emit-temporary-buffer-with rawtext-state ] if
+ ] }
+ { [ dup CHAR: / = ] [
+ drop pick appropriate-end-tag-token?
+ [ self-closing-start-tag-state ] [ "</" reach emit-temporary-buffer-with rawtext-state ] if
+ ] }
+ { [ dup CHAR: > = ] [
+ drop pick appropriate-end-tag-token?
+ [ pick emit-end-tag data-state ] [ "</" reach emit-temporary-buffer-with rawtext-state ] if
+ ] }
+ { [ dup ascii-upper-alpha? ] [ [ 0x20 + reach push-tag-name ] [ reach push-temporary-buffer ] bi rawtext-end-tag-name-state ] }
+ { [ dup ascii-lower-alpha? ] [ [ reach push-tag-name ] [ reach push-temporary-buffer ] bi rawtext-end-tag-name-state ] }
+ [ [ "</" reach emit-temporary-buffer-with ] dip (rawtext-state) ]
} cond ;
: rawtext-end-tag-name-state ( document n/f string -- document n'/f string )
: (script-data-less-than-sign-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup CHAR: / = ] [ drop pick reset-temporary-buffer script-data-end-tag-open-state ] }
+ { [ dup CHAR: ! = ] [ drop "<!" reach emit-string script-data-escape-start-state ] }
+ [ [ CHAR: < reach emit-char ] dip (script-data-state) ]
} cond ;
: script-data-less-than-sign-state ( document n/f string -- document n'/f string )
: (script-data-end-tag-open-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup ascii-alpha? ] [ reach reset-end-tag (script-data-end-tag-name-state) ] }
+ [ [ "</" reach emit-string ] dip (script-data-state) ]
} cond ;
: script-data-end-tag-open-state ( document n/f string -- document n'/f string )
: (script-data-end-tag-name-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup "\t\n\f\s" member? ] [
+ drop pick appropriate-end-tag-token?
+ [ before-attribute-name-state ] [ "</" reach emit-temporary-buffer-with script-data-state ] if
+ ] }
+ { [ dup CHAR: / = ] [
+ drop pick appropriate-end-tag-token?
+ [ self-closing-start-tag-state ] [ "</" reach emit-temporary-buffer-with script-data-state ] if
+ ] }
+ { [ dup CHAR: > = ] [
+ drop pick appropriate-end-tag-token?
+ [ pick emit-end-tag data-state ] [ "</" reach emit-temporary-buffer-with script-data-state ] if
+ ] }
+ { [ dup ascii-upper-alpha? ] [ [ 0x20 + reach push-tag-name ] [ reach push-temporary-buffer ] bi rawtext-end-tag-name-state ] }
+ { [ dup ascii-lower-alpha? ] [ [ reach push-tag-name ] [ reach push-temporary-buffer ] bi rawtext-end-tag-name-state ] }
+ [ [ "</" reach emit-temporary-buffer-with ] dip (script-data-state) ]
} cond ;
: script-data-end-tag-name-state ( document n/f string -- document n'/f string )
: (script-data-escape-start-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup CHAR: - = ] [ drop script-data-escape-start-dash-state ] }
+ [ (script-data-state) ]
} cond ;
: script-data-escape-start-state ( document n/f string -- document n'/f string )
: (script-data-escape-start-dash-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup CHAR: - = ] [ drop script-data-escaped-dash-dash-state ] }
+ [ (script-data-state) ]
} cond ;
: script-data-escape-start-dash-state ( document n/f string -- document n'/f string )
: (script-data-escaped-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup CHAR: - = ] [ drop script-data-escaped-dash-state ] }
+ { [ dup CHAR: < = ] [ drop script-data-escaped-less-than-sign-state ] }
+ { [ dup CHAR: \0 = ] [ unexpected-null-character CHAR: replacement-character unimplemented* ] }
+ { [ dup f = ] [ eof-in-script-html-comment-like-text ] }
+ [ reach emit-char script-data-escaped-state ]
} cond ;
: script-data-escaped-state ( document n/f string -- document n'/f string )
: (script-data-escaped-dash-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup CHAR: - = ] [ drop script-data-escaped-dash-dash-state ] }
+ { [ dup CHAR: < = ] [ drop script-data-escaped-less-than-sign-state ] }
+ { [ dup CHAR: \0 = ] [ unexpected-null-character script-data-escaped-state ] }
+ { [ dup f = ] [ eof-in-script-html-comment-like-text ] }
+ [ reach emit-char script-data-escaped-state ]
} cond ;
: script-data-escaped-dash-state ( document n/f string -- document n'/f string )
: (script-data-escaped-dash-dash-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup CHAR: - = ] [ reach emit-char script-data-escaped-dash-dash-state ] }
+ { [ dup CHAR: < = ] [ drop script-data-escaped-less-than-sign-state ] }
+ { [ dup CHAR: > = ] [ reach emit-char script-data-state ] }
+ { [ dup CHAR: \0 = ] [ unexpected-null-character script-data-escaped-state ] }
+ { [ dup f = ] [ eof-in-script-html-comment-like-text ] }
+ [ reach emit-char script-data-escaped-state ]
} cond ;
: script-data-escaped-dash-dash-state ( document n/f string -- document n'/f string )
: (script-data-escaped-less-than-sign-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup CHAR: / = ] [ drop pick reset-temporary-buffer script-data-escaped-end-tag-open-state ] }
+ { [ dup ascii-alpha? ] [ [ pick reset-temporary-buffer CHAR: < reach emit-char ] dip (script-data-double-escape-start-state) ] }
+ [ [ CHAR: < reach emit-char ] dip (script-data-escaped-state) ]
} cond ;
: script-data-escaped-less-than-sign-state ( document n/f string -- document n'/f string )
: (script-data-escaped-end-tag-open-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup ascii-alpha? ] [ [ pick reset-end-tag ] dip (script-data-escaped-end-tag-name-state) ] }
+ [ [ "</" reach emit-string ] dip (script-data-escaped-state) ]
} cond ;
: script-data-escaped-end-tag-open-state ( document n/f string -- document n'/f string )
: (script-data-escaped-end-tag-name-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup "\t\n\f\s" member? ] [
+ drop pick appropriate-end-tag-token?
+ [ before-attribute-name-state ] [ "</" reach emit-temporary-buffer-with script-data-escaped-state ] if
+ ] }
+ { [ dup CHAR: / = ] [
+ drop pick appropriate-end-tag-token?
+ [ self-closing-start-tag-state ] [ "</" reach emit-temporary-buffer-with script-data-escaped-state ] if
+ ] }
+ { [ dup CHAR: > = ] [
+ drop pick appropriate-end-tag-token?
+ [ pick emit-end-tag data-state ] [ "</" reach emit-temporary-buffer-with script-data-escaped-state ] if
+ ] }
+ { [ dup ascii-upper-alpha? ] [ [ 0x20 + reach push-tag-name ] [ reach push-temporary-buffer ] bi script-data-escaped-end-tag-name-state ] }
+ { [ dup ascii-lower-alpha? ] [ [ reach push-tag-name ] [ reach push-temporary-buffer ] bi script-data-escaped-end-tag-name-state ] }
+ [ [ "</" reach emit-temporary-buffer-with ] dip (script-data-escaped-state) ]
} cond ;
: script-data-escaped-end-tag-name-state ( document n/f string -- document n'/f string )
: (script-data-double-escape-start-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup "\t\n\f\s/>" member? ] [
+ reach emit-char
+ pick temporary-buffer>> "script" sequence=
+ [ script-data-double-escaped-state ] [ script-data-escaped-state ] if
+ ] }
+ { [ dup ascii-upper-alpha? ] [ [ 0x20 + reach push-tag-name ] [ reach push-temporary-buffer ] bi script-data-double-escape-start-state ] }
+ { [ dup ascii-lower-alpha? ] [ [ reach push-tag-name ] [ reach push-temporary-buffer ] bi script-data-double-escape-start-state ] } ! todo
+ [ (script-data-escaped-state) ]
} cond ;
: script-data-double-escape-start-state ( document n/f string -- document n'/f string )
: (script-data-double-escaped-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup CHAR: - = ] [ reach emit-char script-data-double-escaped-dash-state ] }
+ { [ dup CHAR: < = ] [ reach emit-char script-data-double-escaped-less-than-sign-state ] }
+ { [ dup CHAR: \0 = ] [
+ unexpected-null-character
+ CHAR: replacement-character reach emit-char
+ script-data-double-escaped-state
+ ] }
+ { [ dup f = ] [ eof-in-script-html-comment-like-text ] }
+ [ reach emit-char script-data-double-escaped-state ]
} cond ;
: script-data-double-escaped-state ( document n/f string -- document n'/f string )
: (script-data-double-escaped-dash-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup CHAR: - = ] [ reach emit-char script-data-double-escaped-dash-dash-state ] }
+ { [ dup CHAR: < = ] [ reach emit-char script-data-double-escaped-less-than-sign-state ] }
+ { [ dup CHAR: \0 = ] [
+ unexpected-null-character
+ CHAR: replacement-character reach emit-char
+ script-data-double-escaped-state
+ ] }
+ { [ dup f = ] [ eof-in-script-html-comment-like-text ] }
+ [ reach emit-char script-data-double-escaped-state ]
} cond ;
: script-data-double-escaped-dash-state ( document n/f string -- document n'/f string )
: (script-data-double-escaped-dash-dash-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup CHAR: - = ] [ reach emit-char script-data-double-escaped-dash-dash-state ] }
+ { [ dup CHAR: < = ] [ reach emit-char script-data-double-escaped-less-than-sign-state ] }
+ { [ dup CHAR: > = ] [ reach emit-char script-data-state ] }
+ { [ dup CHAR: \0 = ] [
+ unexpected-null-character
+ CHAR: replacement-character reach emit-char
+ script-data-double-escaped-state
+ ] }
+ { [ dup f = ] [ eof-in-script-html-comment-like-text ] }
+ [ reach emit-char script-data-escaped-state ]
} cond ;
: script-data-double-escaped-dash-dash-state ( document n/f string -- document n'/f string )
: (script-data-double-escaped-less-than-sign-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup CHAR: / = ] [ reach emit-char pick reset-temporary-buffer script-data-double-escape-end-state ] }
+ [ (script-data-double-escaped-state) ]
} cond ;
: script-data-double-escaped-less-than-sign-state ( document n/f string -- document n'/f string )
: (script-data-double-escape-end-state) ( document n/f string ch/f -- document n'/f string )
{
- [ unimplemented* ]
+ { [ dup "\t\n\f\s/>" member? ] [
+ reach emit-char
+ pick temporary-buffer>> "script" sequence=
+ [ script-data-escaped-state ] [ script-data-double-escaped-state ] if
+ ] }
+ { [ dup ascii-upper-alpha? ] [ [ 0x20 + reach push-tag-name ] [ reach push-temporary-buffer ] bi script-data-double-escape-end-state ] }
+ { [ dup ascii-lower-alpha? ] [ [ reach push-tag-name ] [ reach push-temporary-buffer ] bi script-data-double-escape-end-state ] } ! todo
+ [ (script-data-double-escaped-state) ]
} cond ;
: script-data-double-escape-end-state ( document n/f string -- document n'/f string )