]> gitweb.factorcode.org Git - factor.git/commitdiff
html5: implementing more and restructuring
authorDoug Coleman <doug.coleman@gmail.com>
Thu, 7 May 2020 00:46:43 +0000 (19:46 -0500)
committerDoug Coleman <doug.coleman@gmail.com>
Fri, 10 Jun 2022 17:44:40 +0000 (12:44 -0500)
basis/html5/html5.factor

index b1990768231f580f4d77d68e6b211d1d773c1cb9..1432a3584353828403f83d74cafa30a7fbe2b68c 100644 (file)
@@ -7,6 +7,164 @@ IN: html5
 
 ! https://html.spec.whatwg.org/multipage/parsing.html#tokenization
 
+DEFER: data-state
+DEFER: (data-state)
+DEFER: rcdata-state
+DEFER: (rcdata-state)
+DEFER: rawtext-state
+DEFER: (rawtext-state)
+DEFER: script-data-state
+DEFER: (script-data-state)
+DEFER: plaintext-state
+DEFER: (plaintext-state)
+DEFER: tag-open-state
+DEFER: (tag-open-state)
+DEFER: end-tag-open-state
+DEFER: (end-tag-open-state)
+DEFER: tag-name-state
+DEFER: (tag-name-state)
+DEFER: rcdata-less-than-sign-state
+DEFER: (rcdata-less-than-sign-state)
+DEFER: rcdata-end-tag-open-state
+DEFER: (rcdata-end-tag-open-state)
+DEFER: rcdata-end-tag-name-state
+DEFER: (rcdata-end-tag-name-state)
+DEFER: rawtext-less-than-sign-state
+DEFER: (rawtext-less-than-sign-state)
+DEFER: rawtext-end-tag-open-state
+DEFER: (rawtext-end-tag-open-state)
+DEFER: rawtext-end-tag-name-state
+DEFER: (rawtext-end-tag-name-state)
+DEFER: script-data-less-than-sign-state
+DEFER: (script-data-less-than-sign-state)
+DEFER: script-data-end-tag-open-state
+DEFER: (script-data-end-tag-open-state)
+DEFER: script-data-end-tag-name-state
+DEFER: (script-data-end-tag-name-state)
+DEFER: script-data-escape-start-state
+DEFER: (script-data-escape-start-state)
+DEFER: script-data-escape-start-dash-state
+DEFER: (script-data-escape-start-dash-state)
+DEFER: script-data-escaped-state
+DEFER: (script-data-escaped-state)
+DEFER: script-data-escaped-dash-state
+DEFER: (script-data-escaped-dash-state)
+DEFER: script-data-escaped-dash-dash-state
+DEFER: (script-data-escaped-dash-dash-state)
+DEFER: script-data-escaped-less-than-sign-state
+DEFER: (script-data-escaped-less-than-sign-state)
+DEFER: script-data-escaped-end-tag-open-state
+DEFER: (script-data-escaped-end-tag-open-state)
+DEFER: script-data-escaped-end-tag-name-state
+DEFER: (script-data-escaped-end-tag-name-state)
+DEFER: script-data-double-escape-start-state
+DEFER: (script-data-double-escape-start-state)
+DEFER: script-data-double-escaped-state
+DEFER: (script-data-double-escaped-state)
+DEFER: script-data-double-escaped-dash-state
+DEFER: (script-data-double-escaped-dash-state)
+DEFER: script-data-double-escaped-dash-dash-state
+DEFER: (script-data-double-escaped-dash-dash-state)
+DEFER: script-data-double-escaped-less-than-sign-state
+DEFER: (script-data-double-escaped-less-than-sign-state)
+DEFER: script-data-double-escape-end-state
+DEFER: (script-data-double-escape-end-state)
+DEFER: before-attribute-name-state
+DEFER: (before-attribute-name-state)
+DEFER: attribute-name-state
+DEFER: (attribute-name-state)
+DEFER: after-attribute-name-state
+DEFER: (after-attribute-name-state)
+DEFER: before-attribute-value-state
+DEFER: (before-attribute-value-state)
+DEFER: attribute-value-double-quoted-state
+DEFER: (attribute-value-double-quoted-state)
+DEFER: attribute-value-single-quoted-state
+DEFER: (attribute-value-single-quoted-state)
+DEFER: attribute-value-unquoted-state
+DEFER: (attribute-value-unquoted-state)
+DEFER: after-attribute-value-quoted-state
+DEFER: (after-attribute-value-quoted-state)
+DEFER: self-closing-start-tag-state
+DEFER: (self-closing-start-tag-state)
+DEFER: bogus-comment-state
+DEFER: (bogus-comment-state)
+DEFER: markup-declaration-open-state
+DEFER: (markup-declaration-open-state)
+DEFER: comment-start-state
+DEFER: (comment-start-state)
+DEFER: comment-start-dash-state
+DEFER: (comment-start-dash-state)
+DEFER: comment-state
+DEFER: (comment-state)
+DEFER: comment-less-than-sign-state
+DEFER: (comment-less-than-sign-state)
+DEFER: comment-less-than-sign-bang-state
+DEFER: (comment-less-than-sign-bang-state)
+DEFER: comment-less-than-sign-bang-dash-state
+DEFER: (comment-less-than-sign-bang-dash-state)
+DEFER: comment-less-than-sign-bang-dash-dash-state
+DEFER: (comment-less-than-sign-bang-dash-dash-state)
+DEFER: comment-end-dash-state
+DEFER: (comment-end-dash-state)
+DEFER: comment-end-state
+DEFER: (comment-end-state)
+DEFER: comment-end-bang-state
+DEFER: (comment-end-bang-state)
+DEFER: doctype-state
+DEFER: (doctype-state)
+DEFER: before-doctype-name-state
+DEFER: (before-doctype-name-state)
+DEFER: doctype-name-state
+DEFER: (doctype-name-state)
+DEFER: after-doctype-name-state
+DEFER: (after-doctype-name-state)
+DEFER: after-doctype-public-keyword-state
+DEFER: (after-doctype-public-keyword-state)
+DEFER: before-doctype-public-identifier-state
+DEFER: (before-doctype-public-identifier-state)
+DEFER: doctype-public-identifier-double-quoted-state
+DEFER: (doctype-public-identifier-double-quoted-state)
+DEFER: doctype-public-identifier-single-quoted-state
+DEFER: (doctype-public-identifier-single-quoted-state)
+DEFER: after-doctype-public-identifier-state
+DEFER: (after-doctype-public-identifier-state)
+DEFER: between-doctype-public-and-system-identifiers-state
+DEFER: (between-doctype-public-and-system-identifiers-state)
+DEFER: after-doctype-system-keyword-state
+DEFER: (after-doctype-system-keyword-state)
+DEFER: before-doctype-system-identifier-state
+DEFER: (before-doctype-system-identifier-state)
+DEFER: doctype-system-identifier-double-quoted-state
+DEFER: (doctype-system-identifier-double-quoted-state)
+DEFER: doctype-system-identifier-single-quoted-state
+DEFER: (doctype-system-identifier-single-quoted-state)
+DEFER: after-doctype-system-identifier-state
+DEFER: (after-doctype-system-identifier-state)
+DEFER: bogus-doctype-state
+DEFER: (bogus-doctype-state)
+DEFER: cdata-section-state
+DEFER: (cdata-section-state)
+DEFER: cdata-section-bracket-state
+DEFER: (cdata-section-bracket-state)
+DEFER: cdata-section-end-state
+DEFER: (cdata-section-end-state)
+DEFER: character-reference-state
+DEFER: (character-reference-state)
+DEFER: named-character-reference-state
+DEFER: (named-character-reference-state)
+DEFER: ambiguous-ampersand-state
+DEFER: (ambiguous-ampersand-state)
+DEFER: numeric-character-reference-state
+DEFER: (numeric-character-reference-state)
+DEFER: hexadecimal-character-reference-start-state
+DEFER: (hexadecimal-character-reference-start-state)
+DEFER: decimal-character-reference-start-state
+DEFER: (decimal-character-reference-start-sttag
+DEFER: numeric-character-reference-end-state
+DEFER: (numeric-character-reference-end-state)
+
+
 ERROR: unimplemented string ;
 ERROR: unimplemented* ;
 
@@ -95,6 +253,7 @@ tree
 tree-insert-mode
 doctype-name
 tag-name
+end-tag-name
 attribute-name
 attribute-value
 temporary-buffer
@@ -108,6 +267,7 @@ return-state ;
         initial-mode >>tree-insert-mode
         SBUF" " clone >>doctype-name
         SBUF" " clone >>tag-name
+        SBUF" " clone >>end-tag-name
         SBUF" " clone >>attribute-name
         SBUF" " clone >>attribute-value
         SBUF" " clone >>temporary-buffer
@@ -163,189 +323,39 @@ MEMO: load-entities ( -- assoc )
 : push-attribute-name ( ch document -- ) attribute-name>> push ;
 : push-attribute-value ( ch document -- ) attribute-value>> push ;
 : push-temporary-buffer ( ch document -- ) temporary-buffer>> push ;
+: reset-temporary-buffer ( document -- ) SBUF" " clone temporary-buffer<< ;
+: reset-end-tag ( document -- ) SBUF" " clone end-tag-name<< ;
 : push-comment-token ( ch document -- ) comment-token>> push ;
-: emit-tag ( document -- ) "emit tag" print .  ;
+: emit-eof ( document -- ) drop "emit-eof" print ;
+: emit-char ( char document -- ) drop "emit-char:" write . ;
+: emit-temporary-buffer-with ( string document -- ) "emit-temp-buffer: " write temporary-buffer>> append . ;
+: emit-string ( char document -- ) drop "emit-string:" write . ;
+: emit-tag ( document -- ) "emit tag: " write . ;
+: emit-end-tag ( document -- ) "emit end tag: " write . ;
 : emit-doctype ( document -- )
     "emit doctype: " write
     [ doctype-name>> >string . ]
     [ SBUF" " clone doctype-name<< ] bi ;
 
+! check if matches open tag
+: appropriate-end-tag-token? ( document -- ? )
+    drop f ;
+
 : ascii-upper-alpha? ( ch -- ? ) [ CHAR: A CHAR: Z between? ] [ f ] if* ; inline
 : ascii-lower-alpha? ( ch -- ? ) [ CHAR: a CHAR: z between? ] [ f ] if* ; inline
 : ascii-digit? ( ch/f -- ? ) [ CHAR: 0 CHAR: 9 between? ] [ f ] if* ;
 : ascii-alpha? ( ch/f -- ? ) { [ ascii-lower-alpha? ] [ ascii-upper-alpha? ] } 1|| ;
 : ascii-alphanumeric? ( ch/f -- ? ) { [ ascii-alpha? ] [ ascii-digit? ] } 1|| ;
 
-DEFER: data-state
-DEFER: (data-state)
-DEFER: rcdata-state
-DEFER: (rcdata-state)
-DEFER: rawtext-state
-DEFER: (rawtext-state)
-DEFER: script-data-state
-DEFER: (script-data-state)
-DEFER: plaintext-state
-DEFER: (plaintext-state)
-DEFER: tag-open-state
-DEFER: (tag-open-state)
-DEFER: end-tag-open-state
-DEFER: (end-tag-open-state)
-DEFER: tag-name-state
-DEFER: (tag-name-state)
-DEFER: rcdata-less-than-sign-state
-DEFER: (rcdata-less-than-sign-state)
-DEFER: rcdata-end-tag-open-state
-DEFER: (rcdata-end-tag-open-state)
-DEFER: rcdata-end-tag-name-state
-DEFER: (rcdata-end-tag-name-state)
-DEFER: rawtext-less-than-sign-state
-DEFER: (rawtext-less-than-sign-state)
-DEFER: rawtext-end-tag-open-state
-DEFER: (rawtext-end-tag-open-state)
-DEFER: rawtext-end-tag-name-state
-DEFER: (rawtext-end-tag-name-state)
-DEFER: script-data-less-than-sign-state
-DEFER: (script-data-less-than-sign-state)
-DEFER: script-data-end-tag-open-state
-DEFER: (script-data-end-tag-open-state)
-DEFER: script-data-end-tag-name-state
-DEFER: (script-data-end-tag-name-state)
-DEFER: script-data-escape-start-state
-DEFER: (script-data-escape-start-state)
-DEFER: script-data-escape-start-dash-state
-DEFER: (script-data-escape-start-dash-state)
-DEFER: script-data-escaped-state
-DEFER: (script-data-escaped-state)
-DEFER: script-data-escaped-dash-state
-DEFER: (script-data-escaped-dash-state)
-DEFER: script-data-escaped-dash-dash-state
-DEFER: (script-data-escaped-dash-dash-state)
-DEFER: script-data-escaped-less-than-sign-state
-DEFER: (script-data-escaped-less-than-sign-state)
-DEFER: script-data-escaped-end-tag-open-state
-DEFER: (script-data-escaped-end-tag-open-state)
-DEFER: script-data-escaped-end-tag-name-state
-DEFER: (script-data-escaped-end-tag-name-state)
-DEFER: script-data-double-escape-start-state
-DEFER: (script-data-double-escape-start-state)
-DEFER: script-data-double-escaped-state
-DEFER: (script-data-double-escaped-state)
-DEFER: script-data-double-escaped-dash-state
-DEFER: (script-data-double-escaped-dash-state)
-DEFER: script-data-double-escaped-dash-dash-state
-DEFER: (script-data-double-escaped-dash-dash-state)
-DEFER: script-data-double-escaped-less-than-sign-state
-DEFER: (script-data-double-escaped-less-than-sign-state)
-DEFER: script-data-double-escape-end-state
-DEFER: (script-data-double-escape-end-state)
-DEFER: before-attribute-name-state
-DEFER: (before-attribute-name-state)
-DEFER: attribute-name-state
-DEFER: (attribute-name-state)
-DEFER: after-attribute-name-state
-DEFER: (after-attribute-name-state)
-DEFER: before-attribute-value-state
-DEFER: (before-attribute-value-state)
-DEFER: attribute-value-double-quoted-state
-DEFER: (attribute-value-double-quoted-state)
-DEFER: attribute-value-single-quoted-state
-DEFER: (attribute-value-single-quoted-state)
-DEFER: attribute-value-unquoted-state
-DEFER: (attribute-value-unquoted-state)
-DEFER: after-attribute-value-quoted-state
-DEFER: (after-attribute-value-quoted-state)
-DEFER: self-closing-start-tag-state
-DEFER: (self-closing-start-tag-state)
-DEFER: bogus-comment-state
-DEFER: (bogus-comment-state)
-DEFER: markup-declaration-open-state
-DEFER: (markup-declaration-open-state)
-DEFER: comment-start-state
-DEFER: (comment-start-state)
-DEFER: comment-start-dash-state
-DEFER: (comment-start-dash-state)
-DEFER: comment-state
-DEFER: (comment-state)
-DEFER: comment-less-than-sign-state
-DEFER: (comment-less-than-sign-state)
-DEFER: comment-less-than-sign-bang-state
-DEFER: (comment-less-than-sign-bang-state)
-DEFER: comment-less-than-sign-bang-dash-state
-DEFER: (comment-less-than-sign-bang-dash-state)
-DEFER: comment-less-than-sign-bang-dash-dash-state
-DEFER: (comment-less-than-sign-bang-dash-dash-state)
-DEFER: comment-end-dash-state
-DEFER: (comment-end-dash-state)
-DEFER: comment-end-state
-DEFER: (comment-end-state)
-DEFER: comment-end-bang-state
-DEFER: (comment-end-bang-state)
-DEFER: doctype-state
-DEFER: (doctype-state)
-DEFER: before-doctype-name-state
-DEFER: (before-doctype-name-state)
-DEFER: doctype-name-state
-DEFER: (doctype-name-state)
-DEFER: after-doctype-name-state
-DEFER: (after-doctype-name-state)
-DEFER: after-doctype-public-keyword-state
-DEFER: (after-doctype-public-keyword-state)
-DEFER: before-doctype-public-identifier-state
-DEFER: (before-doctype-public-identifier-state)
-DEFER: doctype-public-identifier-double-quoted-state
-DEFER: (doctype-public-identifier-double-quoted-state)
-DEFER: doctype-public-identifier-single-quoted-state
-DEFER: (doctype-public-identifier-single-quoted-state)
-DEFER: after-doctype-public-identifier-state
-DEFER: (after-doctype-public-identifier-state)
-DEFER: between-doctype-public-and-system-identifiers-state
-DEFER: (between-doctype-public-and-system-identifiers-state)
-DEFER: after-doctype-system-keyword-state
-DEFER: (after-doctype-system-keyword-state)
-DEFER: before-doctype-system-identifier-state
-DEFER: (before-doctype-system-identifier-state)
-DEFER: doctype-system-identifier-double-quoted-state
-DEFER: (doctype-system-identifier-double-quoted-state)
-DEFER: doctype-system-identifier-single-quoted-state
-DEFER: (doctype-system-identifier-single-quoted-state)
-DEFER: after-doctype-system-identifier-state
-DEFER: (after-doctype-system-identifier-state)
-DEFER: bogus-doctype-state
-DEFER: (bogus-doctype-state)
-DEFER: cdata-section-state
-DEFER: (cdata-section-state)
-DEFER: cdata-section-bracket-state
-DEFER: (cdata-section-bracket-state)
-DEFER: cdata-section-end-state
-DEFER: (cdata-section-end-state)
-DEFER: character-reference-state
-DEFER: (character-reference-state)
-DEFER: named-character-reference-state
-DEFER: (named-character-reference-state)
-DEFER: ambiguous-ampersand-state
-DEFER: (ambiguous-ampersand-state)
-DEFER: numeric-character-reference-state
-DEFER: (numeric-character-reference-state)
-DEFER: hexadecimal-character-reference-start-state
-DEFER: (hexadecimal-character-reference-start-state)
-DEFER: decimal-character-reference-start-state
-DEFER: (decimal-character-reference-start-state)
-DEFER: hexadecimal-character-reference-state
-DEFER: (hexadecimal-character-reference-state)
-DEFER: decimal-character-reference-state
-DEFER: (decimal-character-reference-state)
-DEFER: numeric-character-reference-end-state
-DEFER: (numeric-character-reference-end-state)
-
 
 : (data-state) ( document n/f string ch/f -- document n'/f string )
     {
-        ! { CHAR: & [ "character-reference-state-mode" unimplemented ] }
-        { CHAR: < [ tag-open-state ] }
-        { CHAR: \0 [ unexpected-null-character ] }
-        { f [ ] }
-        [ reach push-tag-name data-state ]
-    } case ;
+        { [ dup CHAR: & = ] [ drop \ data-state reach return-state<< character-reference-state ] }
+        { [ dup CHAR: < = ] [ drop tag-open-state ] }
+        { [ dup CHAR: \0 = ] [ unexpected-null-character ] }
+        { [ dup f = ] [ drop pick emit-eof ] }
+        [ reach emit-char data-state ]
+    } cond ;
 
 : data-state ( document n/f string -- document n'/f string )
     next-char-from (data-state) ;
@@ -353,7 +363,11 @@ DEFER: (numeric-character-reference-end-state)
 
 : (rcdata-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup CHAR: & = ] [ drop \ rcdata-state reach return-state<< character-reference-state ] }
+        { [ dup CHAR: < = ] [ drop rcdata-less-than-sign-state ] }
+        { [ dup CHAR: \0 = ] [ unexpected-null-character ] }
+        { [ dup f = ] [ drop pick emit-eof ] }
+        [ reach emit-char rcdata-state ]
     } cond ;
 
 : rcdata-state ( document n/f string -- document n'/f string )
@@ -362,7 +376,10 @@ DEFER: (numeric-character-reference-end-state)
 
 : (rawtext-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup CHAR: < = ] [ drop rawtext-less-than-sign-state ] }
+        { [ dup CHAR: \0 = ] [ drop unexpected-null-character ] }
+        { [ dup f = ] [ drop pick emit-eof ] }
+        [ reach emit-char rawtext-state ]
     } cond ;
 
 : rawtext-state ( document n/f string -- document n'/f string )
@@ -371,7 +388,10 @@ DEFER: (numeric-character-reference-end-state)
 
 : (script-data-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup CHAR: < = ] [ drop script-data-less-than-sign-state ] }
+        { [ dup CHAR: \0 = ] [ drop unexpected-null-character ] }
+        { [ dup f = ] [ drop pick emit-eof ] }
+        [ reach emit-char script-data-state ]
     } cond ;
 
 : script-data-state ( document n/f string -- document n'/f string )
@@ -380,7 +400,9 @@ DEFER: (numeric-character-reference-end-state)
 
 : (plaintext-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup CHAR: \0 = ] [ drop unexpected-null-character ] }
+        { [ dup f = ] [ drop pick emit-eof ] }
+        [ reach emit-char plaintext-state ]
     } cond ;
 
 : plaintext-state ( document n/f string -- document n'/f string )
@@ -388,7 +410,7 @@ DEFER: (numeric-character-reference-end-state)
 
 
 : (tag-open-state) ( document n/f string ch/f -- document n'/f string )
-{
+    {
         { [ dup ascii-alpha? ] [ (tag-name-state) ] }
         { [ dup CHAR: ! = ] [ drop markup-declaration-open-state ] }
         { [ dup CHAR: / = ] [ drop end-tag-open-state ] }
@@ -430,7 +452,8 @@ DEFER: (numeric-character-reference-end-state)
 
 : (rcdata-less-than-sign-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup CHAR: / = ] [ drop pick reset-temporary-buffer rcdata-end-tag-open-state ] }
+        [ [ CHAR: < reach emit-char ] dip (rcdata-state) ]
     } cond ;
 
 : rcdata-less-than-sign-state ( document n/f string -- document n'/f string )
@@ -439,7 +462,8 @@ DEFER: (numeric-character-reference-end-state)
 
 : (rcdata-end-tag-open-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup ascii-alpha? ] [ reach reset-end-tag (rcdata-end-tag-name-state) ] }
+        [ [ CHAR: < reach emit-char ] dip (rcdata-state) ]
     } cond ;
 
 : rcdata-end-tag-open-state ( document n/f string -- document n'/f string )
@@ -448,7 +472,21 @@ DEFER: (numeric-character-reference-end-state)
 
 : (rcdata-end-tag-name-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup "\t\n\f\s" member? ] [
+            drop pick appropriate-end-tag-token?
+            [ before-attribute-name-state ] [ "</" reach emit-temporary-buffer-with rcdata-state ] if
+        ] }
+        { [ dup CHAR: / = ] [
+            drop pick appropriate-end-tag-token?
+            [ self-closing-start-tag-state ] [ "</" reach emit-temporary-buffer-with rcdata-state ] if
+        ] }
+        { [ dup CHAR: > = ] [
+            drop pick appropriate-end-tag-token?
+            [ pick emit-end-tag data-state ] [ "</" reach emit-temporary-buffer-with rcdata-state ] if
+        ] }
+        { [ dup ascii-upper-alpha? ] [ [ 0x20 + reach push-tag-name ] [ reach push-temporary-buffer ] bi rcdata-end-tag-name-state ] }
+        { [ dup ascii-lower-alpha? ] [ [ reach push-tag-name ] [ reach push-temporary-buffer ] bi rcdata-end-tag-name-state ] }
+        [ [ "</" reach emit-temporary-buffer-with ] dip (rcdata-state) ]
     } cond ;
 
 : rcdata-end-tag-name-state ( document n/f string -- document n'/f string )
@@ -457,7 +495,8 @@ DEFER: (numeric-character-reference-end-state)
 
 : (rawtext-less-than-sign-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup CHAR: / = ] [ drop pick reset-temporary-buffer rawtext-end-tag-open-state ] }
+        [ [ CHAR: < reach emit-char ] dip (rawtext-state) ]
     } cond ;
 
 : rawtext-less-than-sign-state ( document n/f string -- document n'/f string )
@@ -466,7 +505,8 @@ DEFER: (numeric-character-reference-end-state)
 
 : (rawtext-end-tag-open-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup ascii-alpha? ] [ reach reset-end-tag (rawtext-end-tag-name-state) ] }
+        [ [ CHAR: < reach emit-char ] dip (rawtext-state) ]
     } cond ;
 
 : rawtext-end-tag-open-state ( document n/f string -- document n'/f string )
@@ -475,7 +515,21 @@ DEFER: (numeric-character-reference-end-state)
 
 : (rawtext-end-tag-name-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup "\t\n\f\s" member? ] [
+            drop pick appropriate-end-tag-token?
+            [ before-attribute-name-state ] [ "</" reach emit-temporary-buffer-with rawtext-state ] if
+        ] }
+        { [ dup CHAR: / = ] [
+            drop pick appropriate-end-tag-token?
+            [ self-closing-start-tag-state ] [ "</" reach emit-temporary-buffer-with rawtext-state ] if
+        ] }
+        { [ dup CHAR: > = ] [
+            drop pick appropriate-end-tag-token?
+            [ pick emit-end-tag data-state ] [ "</" reach emit-temporary-buffer-with rawtext-state ] if
+        ] }
+        { [ dup ascii-upper-alpha? ] [ [ 0x20 + reach push-tag-name ] [ reach push-temporary-buffer ] bi rawtext-end-tag-name-state ] }
+        { [ dup ascii-lower-alpha? ] [ [ reach push-tag-name ] [ reach push-temporary-buffer ] bi rawtext-end-tag-name-state ] }
+        [ [ "</" reach emit-temporary-buffer-with ] dip (rawtext-state) ]
     } cond ;
 
 : rawtext-end-tag-name-state ( document n/f string -- document n'/f string )
@@ -484,7 +538,9 @@ DEFER: (numeric-character-reference-end-state)
 
 : (script-data-less-than-sign-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup CHAR: / = ] [ drop pick reset-temporary-buffer script-data-end-tag-open-state ] }
+        { [ dup CHAR: ! = ] [ drop "<!" reach emit-string script-data-escape-start-state ] }
+        [ [ CHAR: < reach emit-char ] dip (script-data-state) ]
     } cond ;
 
 : script-data-less-than-sign-state ( document n/f string -- document n'/f string )
@@ -493,7 +549,8 @@ DEFER: (numeric-character-reference-end-state)
 
 : (script-data-end-tag-open-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup ascii-alpha? ] [ reach reset-end-tag (script-data-end-tag-name-state) ] }
+        [ [ "</" reach emit-string ] dip (script-data-state) ]
     } cond ;
 
 : script-data-end-tag-open-state ( document n/f string -- document n'/f string )
@@ -502,7 +559,21 @@ DEFER: (numeric-character-reference-end-state)
 
 : (script-data-end-tag-name-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup "\t\n\f\s" member? ] [
+            drop pick appropriate-end-tag-token?
+            [ before-attribute-name-state ] [ "</" reach emit-temporary-buffer-with script-data-state ] if
+        ] }
+        { [ dup CHAR: / = ] [
+            drop pick appropriate-end-tag-token?
+            [ self-closing-start-tag-state ] [ "</" reach emit-temporary-buffer-with script-data-state ] if
+        ] }
+        { [ dup CHAR: > = ] [
+            drop pick appropriate-end-tag-token?
+            [ pick emit-end-tag data-state ] [ "</" reach emit-temporary-buffer-with script-data-state ] if
+        ] }
+        { [ dup ascii-upper-alpha? ] [ [ 0x20 + reach push-tag-name ] [ reach push-temporary-buffer ] bi rawtext-end-tag-name-state ] }
+        { [ dup ascii-lower-alpha? ] [ [ reach push-tag-name ] [ reach push-temporary-buffer ] bi rawtext-end-tag-name-state ] }
+        [ [ "</" reach emit-temporary-buffer-with ] dip (script-data-state) ]
     } cond ;
 
 : script-data-end-tag-name-state ( document n/f string -- document n'/f string )
@@ -511,7 +582,8 @@ DEFER: (numeric-character-reference-end-state)
 
 : (script-data-escape-start-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup CHAR: - = ] [ drop script-data-escape-start-dash-state ] }
+        [ (script-data-state) ]
     } cond ;
 
 : script-data-escape-start-state ( document n/f string -- document n'/f string )
@@ -520,7 +592,8 @@ DEFER: (numeric-character-reference-end-state)
 
 : (script-data-escape-start-dash-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup CHAR: - = ] [ drop script-data-escaped-dash-dash-state ] }
+        [ (script-data-state) ]
     } cond ;
 
 : script-data-escape-start-dash-state ( document n/f string -- document n'/f string )
@@ -529,7 +602,11 @@ DEFER: (numeric-character-reference-end-state)
 
 : (script-data-escaped-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup CHAR: - = ] [ drop script-data-escaped-dash-state ] }
+        { [ dup CHAR: < = ] [ drop script-data-escaped-less-than-sign-state ] }
+        { [ dup CHAR: \0 = ] [ unexpected-null-character CHAR: replacement-character unimplemented* ] }
+        { [ dup f = ] [ eof-in-script-html-comment-like-text ] }
+        [ reach emit-char script-data-escaped-state ]
     } cond ;
 
 : script-data-escaped-state ( document n/f string -- document n'/f string )
@@ -538,7 +615,11 @@ DEFER: (numeric-character-reference-end-state)
 
 : (script-data-escaped-dash-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup CHAR: - = ] [ drop script-data-escaped-dash-dash-state ] }
+        { [ dup CHAR: < = ] [ drop script-data-escaped-less-than-sign-state ] }
+        { [ dup CHAR: \0 = ] [ unexpected-null-character script-data-escaped-state ] }
+        { [ dup f = ] [ eof-in-script-html-comment-like-text ] }
+        [ reach emit-char script-data-escaped-state ]
     } cond ;
 
 : script-data-escaped-dash-state ( document n/f string -- document n'/f string )
@@ -547,7 +628,12 @@ DEFER: (numeric-character-reference-end-state)
 
 : (script-data-escaped-dash-dash-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup CHAR: - = ] [ reach emit-char script-data-escaped-dash-dash-state ] }
+        { [ dup CHAR: < = ] [ drop script-data-escaped-less-than-sign-state ] }
+        { [ dup CHAR: > = ] [ reach emit-char script-data-state ] }
+        { [ dup CHAR: \0 = ] [ unexpected-null-character script-data-escaped-state ] }
+        { [ dup f = ] [ eof-in-script-html-comment-like-text ] }
+        [ reach emit-char script-data-escaped-state ]
     } cond ;
 
 : script-data-escaped-dash-dash-state ( document n/f string -- document n'/f string )
@@ -556,7 +642,9 @@ DEFER: (numeric-character-reference-end-state)
 
 : (script-data-escaped-less-than-sign-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup CHAR: / = ] [ drop pick reset-temporary-buffer script-data-escaped-end-tag-open-state ] }
+        { [ dup ascii-alpha? ] [ [ pick reset-temporary-buffer CHAR: < reach emit-char ] dip (script-data-double-escape-start-state) ] }
+        [ [ CHAR: < reach emit-char ] dip (script-data-escaped-state) ]
     } cond ;
 
 : script-data-escaped-less-than-sign-state ( document n/f string -- document n'/f string )
@@ -565,7 +653,8 @@ DEFER: (numeric-character-reference-end-state)
 
 : (script-data-escaped-end-tag-open-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup ascii-alpha? ] [ [ pick reset-end-tag ] dip (script-data-escaped-end-tag-name-state) ] }
+        [ [ "</" reach emit-string ] dip (script-data-escaped-state) ]
     } cond ;
 
 : script-data-escaped-end-tag-open-state ( document n/f string -- document n'/f string )
@@ -574,7 +663,21 @@ DEFER: (numeric-character-reference-end-state)
 
 : (script-data-escaped-end-tag-name-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup "\t\n\f\s" member? ] [
+            drop pick appropriate-end-tag-token?
+            [ before-attribute-name-state ] [ "</" reach emit-temporary-buffer-with script-data-escaped-state ] if
+        ] }
+        { [ dup CHAR: / = ] [
+            drop pick appropriate-end-tag-token?
+            [ self-closing-start-tag-state ] [ "</" reach emit-temporary-buffer-with script-data-escaped-state ] if
+        ] }
+        { [ dup CHAR: > = ] [
+            drop pick appropriate-end-tag-token?
+            [ pick emit-end-tag data-state ] [ "</" reach emit-temporary-buffer-with script-data-escaped-state ] if
+        ] }
+        { [ dup ascii-upper-alpha? ] [ [ 0x20 + reach push-tag-name ] [ reach push-temporary-buffer ] bi script-data-escaped-end-tag-name-state ] }
+        { [ dup ascii-lower-alpha? ] [ [ reach push-tag-name ] [ reach push-temporary-buffer ] bi script-data-escaped-end-tag-name-state ] }
+        [ [ "</" reach emit-temporary-buffer-with ] dip (script-data-escaped-state) ]
     } cond ;
 
 : script-data-escaped-end-tag-name-state ( document n/f string -- document n'/f string )
@@ -583,7 +686,14 @@ DEFER: (numeric-character-reference-end-state)
 
 : (script-data-double-escape-start-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup "\t\n\f\s/>" member? ] [
+            reach emit-char
+            pick temporary-buffer>> "script" sequence=
+            [ script-data-double-escaped-state ] [ script-data-escaped-state ] if
+        ] }
+        { [ dup ascii-upper-alpha? ] [ [ 0x20 + reach push-tag-name ] [ reach push-temporary-buffer ] bi script-data-double-escape-start-state ] }
+        { [ dup ascii-lower-alpha? ] [ [ reach push-tag-name ] [ reach push-temporary-buffer ] bi script-data-double-escape-start-state ] } ! todo
+        [ (script-data-escaped-state) ]
     } cond ;
 
 : script-data-double-escape-start-state ( document n/f string -- document n'/f string )
@@ -592,7 +702,15 @@ DEFER: (numeric-character-reference-end-state)
 
 : (script-data-double-escaped-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup CHAR: - = ] [ reach emit-char script-data-double-escaped-dash-state ] }
+        { [ dup CHAR: < = ] [ reach emit-char script-data-double-escaped-less-than-sign-state ] }
+        { [ dup CHAR: \0 = ] [
+            unexpected-null-character
+            CHAR: replacement-character reach emit-char
+            script-data-double-escaped-state
+        ] }
+        { [ dup f = ] [ eof-in-script-html-comment-like-text ] }
+        [ reach emit-char script-data-double-escaped-state ]
     } cond ;
 
 : script-data-double-escaped-state ( document n/f string -- document n'/f string )
@@ -601,7 +719,15 @@ DEFER: (numeric-character-reference-end-state)
 
 : (script-data-double-escaped-dash-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup CHAR: - = ] [ reach emit-char script-data-double-escaped-dash-dash-state ] }
+        { [ dup CHAR: < = ] [ reach emit-char script-data-double-escaped-less-than-sign-state ] }
+        { [ dup CHAR: \0 = ] [
+            unexpected-null-character
+            CHAR: replacement-character reach emit-char
+            script-data-double-escaped-state
+        ] }
+        { [ dup f = ] [ eof-in-script-html-comment-like-text ] }
+        [ reach emit-char script-data-double-escaped-state ]
     } cond ;
 
 : script-data-double-escaped-dash-state ( document n/f string -- document n'/f string )
@@ -610,7 +736,16 @@ DEFER: (numeric-character-reference-end-state)
 
 : (script-data-double-escaped-dash-dash-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup CHAR: - = ] [ reach emit-char script-data-double-escaped-dash-dash-state ] }
+        { [ dup CHAR: < = ] [ reach emit-char script-data-double-escaped-less-than-sign-state ] }
+        { [ dup CHAR: > = ] [ reach emit-char script-data-state ] }
+        { [ dup CHAR: \0 = ] [
+            unexpected-null-character
+            CHAR: replacement-character reach emit-char
+            script-data-double-escaped-state
+        ] }
+        { [ dup f = ] [ eof-in-script-html-comment-like-text ] }
+        [ reach emit-char script-data-escaped-state ]
     } cond ;
 
 : script-data-double-escaped-dash-dash-state ( document n/f string -- document n'/f string )
@@ -619,7 +754,8 @@ DEFER: (numeric-character-reference-end-state)
 
 : (script-data-double-escaped-less-than-sign-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup CHAR: / = ] [ reach emit-char pick reset-temporary-buffer script-data-double-escape-end-state ] }
+        [ (script-data-double-escaped-state) ]
     } cond ;
 
 : script-data-double-escaped-less-than-sign-state ( document n/f string -- document n'/f string )
@@ -628,7 +764,14 @@ DEFER: (numeric-character-reference-end-state)
 
 : (script-data-double-escape-end-state) ( document n/f string ch/f -- document n'/f string )
     {
-        [ unimplemented* ]
+        { [ dup "\t\n\f\s/>" member? ] [
+            reach emit-char
+            pick temporary-buffer>> "script" sequence=
+            [ script-data-escaped-state ] [ script-data-double-escaped-state ] if
+        ] }
+        { [ dup ascii-upper-alpha? ] [ [ 0x20 + reach push-tag-name ] [ reach push-temporary-buffer ] bi script-data-double-escape-end-state ] }
+        { [ dup ascii-lower-alpha? ] [ [ reach push-tag-name ] [ reach push-temporary-buffer ] bi script-data-double-escape-end-state ] } ! todo
+        [ (script-data-double-escaped-state) ]
     } cond ;
 
 : script-data-double-escape-end-state ( document n/f string -- document n'/f string )