From 3c53214b43aaf1a1c03f172545ad9f50cb7fef4a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Bj=C3=B6rn=20Lindqvist?= Date: Sat, 15 Feb 2014 23:14:29 +0100 Subject: [PATCH] html.parser.analyzer: when matching on classname, check that the tag has the given class --- .../html/parser/analyzer/analyzer-docs.factor | 8 ++++-- .../parser/analyzer/analyzer-tests.factor | 26 ++++++++++++++++++- extra/html/parser/analyzer/analyzer.factor | 24 ++++++++--------- 3 files changed, 43 insertions(+), 15 deletions(-) diff --git a/extra/html/parser/analyzer/analyzer-docs.factor b/extra/html/parser/analyzer/analyzer-docs.factor index 7c846ad891..d28f82d12c 100644 --- a/extra/html/parser/analyzer/analyzer-docs.factor +++ b/extra/html/parser/analyzer/analyzer-docs.factor @@ -1,6 +1,11 @@ -USING: help.syntax help.markup html.parser.analyzer sequences strings ; +USING: help.syntax help.markup html.parser html.parser.analyzer sequences +strings ; IN: html.parser.analyzer +HELP: html-class? +{ $values { "tag" tag } { "string" "a classname" } } +{ $description "t if the tag has the given class." } ; + HELP: stack-find { $values { "seq" sequence } { "quot" { $quotation "( elt -- 1/0/-1 )" } } { "i/f" "an index or " { $link f } } } { $description "Takes a sequence and a quotation expected to return -1 if the element decrements the stack, 0 if it doesnt affect it and 1 if it increments it. Then finds the first element where the stack is empty." } ; @@ -8,4 +13,3 @@ HELP: stack-find HELP: tag-classifier { $values { "string" string } { "quot" { $quotation "( elt -- 1/0/-1 )" } } } { $description "Builds a function that classifies tag tuples. Returns 1 if the tag is an opening tag with the given name, -1 if it is a closing tag and 0 otherwise." } ; - diff --git a/extra/html/parser/analyzer/analyzer-tests.factor b/extra/html/parser/analyzer/analyzer-tests.factor index 426fd75b26..fc40ee9825 100644 --- a/extra/html/parser/analyzer/analyzer-tests.factor +++ b/extra/html/parser/analyzer/analyzer-tests.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2010 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: html.parser html.parser.analyzer math tools.test ; +USING: html.parser html.parser.analyzer kernel math sequences tools.test ; IN: html.parser.analyzer.tests [ 0 3 ] @@ -70,3 +70,27 @@ IN: html.parser.analyzer.tests "

para

" parse-html "foo" find-by-class-between ] unit-test + +[ t ] [ + T{ tag { name "f" } { attributes H{ { "class" "a b c" } } } } + { "a" "b" "c" } [ html-class? ] with all? +] unit-test + +[ + V{ + T{ tag + { name "div" } + { attributes H{ { "class" "foo and more" } } } + } + T{ tag { name "div" } { attributes H{ } } { closing? t } } + } +] [ "
" parse-html + "foo" find-by-class-between +] unit-test + +[ + 0 + T{ tag { name "div" } { attributes H{ { "class" "foo bar" } } } } +] [ + "
" parse-html "bar" find-by-class +] unit-test diff --git a/extra/html/parser/analyzer/analyzer.factor b/extra/html/parser/analyzer/analyzer.factor index 1805f3dcc9..d38fdbc1f8 100644 --- a/extra/html/parser/analyzer/analyzer.factor +++ b/extra/html/parser/analyzer/analyzer.factor @@ -27,6 +27,12 @@ IN: html.parser.analyzer : loopn ( n quot -- ) [ drop ] prepose loopn-index ; inline +: html-class? ( tag string -- ? ) + swap "class" attribute [ blank? ] split-when member? ; + +: html-id? ( tag string -- ? ) + swap "id" attribute = ; + ERROR: undefined-find-nth m n seq quot ; : check-trivial-find ( m n seq quot -- m n seq quot ) @@ -94,9 +100,9 @@ ERROR: undefined-find-nth m n seq quot ; : find-by-id ( vector id -- vector' elt/f ) '[ "id" attribute _ = ] find ; - + : find-by-class ( vector id -- vector' elt/f ) - '[ "class" attribute _ = ] find ; + '[ _ html-class? ] find ; : find-by-name ( vector string -- vector elt/f ) >lower '[ name>> _ = ] find ; @@ -104,15 +110,15 @@ ERROR: undefined-find-nth m n seq quot ; : find-by-id-between ( vector string -- vector' ) dupd '[ "id" attribute _ = ] find find-between* ; - + : find-by-class-between ( vector string -- vector' ) dupd - '[ "class" attribute _ = ] find find-between* ; - + '[ _ html-class? ] find find-between* ; + : find-by-class-id-between ( vector class id -- vector' ) [ '[ - [ "class" attribute _ = ] + [ _ html-class? ] [ "id" attribute _ = ] bi and ] find ] [ @@ -203,12 +209,6 @@ ERROR: undefined-find-nth m n seq quot ; : query>assoc* ( str -- hash ) "?" split1 nip query>assoc ; - -: html-class? ( tag string -- ? ) - swap "class" attribute = ; - -: html-id? ( tag string -- ? ) - swap "id" attribute = ; : opening-tag? ( tag -- ? ) closing?>> not ; -- 2.34.1