USING: assocs html.parser kernel math sequences strings ascii
arrays generalizations shuffle unicode.case namespaces make
splitting http accessors io combinators http.client urls
-urls.encoding fry prettyprint ;
+urls.encoding fry prettyprint sets ;
IN: html.parser.analyzer
TUPLE: link attributes clickable ;
[ [
[ name>> "a" = ]
[ attributes>> "href" swap key? ] bi and ] filter
- ] map sift [ [ attributes>> "href" swap at ] map ] map concat ;
+ ] map sift
+ [ [ attributes>> "href" swap at ] map ] map concat ;
+
+: find-frame-links ( vector -- vector' )
+ [ name>> "frame" = ] find-between-all
+ [ [ attributes>> "src" swap at ] map sift ] map concat sift ;
+
+: find-all-links ( vector -- vector' )
+ [ find-hrefs ] [ find-frame-links ] bi append prune ;
: find-forms ( vector -- vector' )
"form" over find-opening-tags-by-name
: read-value ( state-parser -- string )
skip-whitespace
- dup get-char quote? [ read-quote ] [ read-token ] if
+ dup current quote? [ read-quote ] [ read-token ] if
[ blank? ] trim ;
: read-comment ( state-parser -- )
">" take-until-sequence make-dtd-tag push-tag ;
: read-bang ( state-parser -- )
- next dup { [ get-char CHAR: - = ] [ get-next CHAR: - = ] } 1&& [
+ next dup { [ current CHAR: - = ] [ peek-next CHAR: - = ] } 1&& [
next next
read-comment
] [
: read-tag ( state-parser -- string )
[ [ "><" member? ] take-until ]
- [ dup get-char CHAR: < = [ next ] unless drop ] bi ;
+ [ dup current CHAR: < = [ next ] unless drop ] bi ;
: read-until-< ( state-parser -- string )
[ CHAR: < = ] take-until ;
] state-parse ;
: read-< ( state-parser -- string/f )
- next dup get-char [
+ next dup current [
CHAR: ! = [ read-bang f ] [ read-tag ] if
] [
drop f
read-< [ (parse-tag) make-tag push-tag ] unless-empty ;
: (parse-html) ( state-parser -- )
- dup get-next [
+ dup peek-next [
[ parse-text ] [ parse-tag ] [ (parse-html) ] tri
] [ drop ] if ;