1 USING: accessors arrays html.parser.utils hashtables io kernel
2 namespaces prettyprint quotations
3 sequences splitting state-parser strings unicode.categories unicode.case
7 TUPLE: tag name attributes text closing? ;
17 : closing-tag? ( string -- ? )
19 [ [ first ] [ peek ] bi [ CHAR: / = ] bi@ or ] if-empty ;
21 : <tag> ( name attributes closing? -- tag )
27 : make-tag ( string attribs -- tag )
28 >r [ closing-tag? ] keep "/" trim1 r> rot <tag> ;
30 : make-text-tag ( string -- tag )
35 : make-comment-tag ( string -- tag )
40 : make-dtd-tag ( string -- tag )
45 : read-whitespace ( -- string )
46 [ get-char blank? not ] take-until ;
48 : read-whitespace* ( -- ) read-whitespace drop ;
50 : read-token ( -- string )
52 [ get-char blank? ] take-until ;
54 : read-single-quote ( -- string )
55 [ get-char CHAR: ' = ] take-until ;
57 : read-double-quote ( -- string )
58 [ get-char CHAR: " = ] take-until ;
60 : read-quote ( -- string )
61 get-char next* CHAR: ' =
62 [ read-single-quote ] [ read-double-quote ] if next* ;
64 : read-key ( -- string )
66 [ get-char [ CHAR: = = ] [ blank? ] bi or ] take-until ;
70 [ get-char CHAR: = = ] take-until drop next* ;
72 : read-value ( -- string )
74 get-char quote? [ read-quote ] [ read-token ] if
78 "-->" take-string* make-comment-tag push-tag ;
81 ">" take-string* make-dtd-tag push-tag ;
84 next* get-char CHAR: - = get-next CHAR: - = and [
91 : read-tag ( -- string )
92 [ get-char CHAR: > = get-char CHAR: < = or ] take-until
93 get-char CHAR: < = [ next* ] unless ;
95 : read-< ( -- string )
96 next* get-char CHAR: ! = [
102 : read-until-< ( -- string )
103 [ get-char CHAR: < = ] take-until ;
107 make-text-tag push-tag
110 : (parse-attributes) ( -- )
113 read-key >lower read-= read-value
114 2array , (parse-attributes)
117 : parse-attributes ( -- hashtable )
118 [ (parse-attributes) ] { } make >hashtable ;
120 : (parse-tag) ( string -- string' hashtable )
128 (parse-tag) make-tag push-tag
131 : (parse-html) ( -- )
138 : tag-parse ( quot -- vector )
139 V{ } clone tagstack [ string-parse ] with-variable ;
141 : parse-html ( string -- vector )
142 [ (parse-html) tagstack get ] tag-parse ;