1 ! Copyright (C) 2008 Doug Coleman.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors arrays hashtables html.parser.state
4 html.parser.utils kernel make namespaces sequences
5 unicode.case unicode.categories combinators.short-circuit
10 TUPLE: tag name attributes text closing? ;
20 : closing-tag? ( string -- ? )
22 [ [ first ] [ peek ] bi [ CHAR: / = ] bi@ or ] if-empty ;
24 : <tag> ( name attributes closing? -- tag )
30 : make-tag ( string attribs -- tag )
31 [ [ closing-tag? ] keep "/" trim1 ] dip rot <tag> ;
33 : new-tag ( string type -- tag )
38 : make-text-tag ( string -- tag ) text new-tag ; inline
40 : make-comment-tag ( string -- tag ) comment new-tag ; inline
42 : make-dtd-tag ( string -- tag ) dtd new-tag ; inline
44 : read-single-quote ( state-parser -- string )
45 [ [ CHAR: ' = ] take-until ] [ next drop ] bi ;
47 : read-double-quote ( state-parser -- string )
48 [ [ CHAR: " = ] take-until ] [ next drop ] bi ;
50 : read-quote ( state-parser -- string )
51 dup get+increment CHAR: ' =
52 [ read-single-quote ] [ read-double-quote ] if ;
54 : read-key ( state-parser -- string )
56 [ { [ CHAR: = = ] [ blank? ] } 1|| ] take-until ;
58 : read-= ( state-parser -- )
60 [ [ CHAR: = = ] take-until drop ] [ next drop ] bi ;
62 : read-token ( state-parser -- string )
63 [ blank? ] take-until ;
65 : read-value ( state-parser -- string )
67 dup get-char quote? [ read-quote ] [ read-token ] if
70 : read-comment ( state-parser -- )
71 "-->" take-until-string make-comment-tag push-tag ;
73 : read-dtd ( state-parser -- )
74 ">" take-until-string make-dtd-tag push-tag ;
76 : read-bang ( state-parser -- )
77 next dup { [ get-char CHAR: - = ] [ get-next CHAR: - = ] } 1&& [
84 : read-tag ( state-parser -- string )
85 [ [ "><" member? ] take-until ]
86 [ dup get-char CHAR: < = [ next ] unless drop ] bi ;
88 : read-until-< ( state-parser -- string )
89 [ CHAR: < = ] take-until ;
91 : parse-text ( state-parser -- )
92 read-until-< [ make-text-tag push-tag ] unless-empty ;
94 : (parse-attributes) ( state-parser -- )
96 dup string-parse-end? [
100 [ read-key >lower ] [ read-= ] [ read-value ] tri
102 ] keep (parse-attributes)
105 : parse-attributes ( state-parser -- hashtable )
106 [ (parse-attributes) ] { } make >hashtable ;
108 : (parse-tag) ( string -- string' hashtable )
110 [ read-token >lower ] [ parse-attributes ] bi
113 : read-< ( state-parser -- string/f )
115 CHAR: ! = [ read-bang f ] [ read-tag ] if
120 : parse-tag ( state-parser -- )
121 read-< [ (parse-tag) make-tag push-tag ] unless-empty ;
123 : (parse-html) ( state-parser -- )
125 [ parse-text ] [ parse-tag ] [ (parse-html) ] tri
128 : tag-parse ( quot -- vector )
129 V{ } clone tagstack [ string-parse ] with-variable ; inline
131 : parse-html ( string -- vector )
132 [ (parse-html) tagstack get ] tag-parse ;