! Copyright (C) 2021 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors arrays combinators combinators.short-circuit
-generalizations kernel lexer make math modern modern.slices
-sequences sequences.extras shuffle splitting strings ;
+USING: accessors arrays assocs combinators
+combinators.short-circuit kernel make math modern modern.slices
+sequences sequences.extras shuffle splitting strings unicode ;
IN: modern.html
TUPLE: tag open name props close children ;
"--" expect-and-span >string
[ "-->" slice-til-string [ >string ] bi@ ] dip -rot <comment>
] [
- "DOCTYPE" expect-and-span
+ "DOCTYPE" expect-and-span-insensitive
[ read-props ] dip
-rot <doctype>
] if ;
M: open-tag walk-html [ call( obj -- ) ] 2keep [ children>> ] dip [ walk-html ] curry each ;
M: self-close-tag walk-html [ call( obj -- ) ] 2keep [ children>> ] dip [ walk-html ] curry each ;
M: comment walk-html call( obj -- ) ;
+
+: find-links ( seq -- links )
+ [
+ [
+ dup tag? [
+ props>> [ drop >lower "href" = ] assoc-find
+ [ nip , ] [ 2drop ] if
+ ] [ drop ] if
+ ] walk-html
+ ] { } make [ payload>> ] map ;
rest ">" append
] if ;
+: accept1 ( n string quot: ( ch -- ? ) -- n/n' string ch/f )
+ [ 2dup nth ] dip keep swap [ [ 1 + ] 2dip ] [ drop f ] if ; inline
+
ERROR: unexpected-end n string ;
: nth-check-eof ( n string -- nth )
2dup ?nth [ 2nip ] [ unexpected-end ] if* ;
: check-sequence ( expected actual -- actual/* )
2dup sequence= [ nip ] [ expected-sequence-error ] if ;
+: check-sequence-insensitive ( expected actual -- actual/* )
+ 2dup [ >lower ] bi@ sequence= [ nip ] [ expected-sequence-error ] if ;
+
: expect-and-span ( n string slice expected-string -- n' string slice' )
dup length '[ _ take-slice ] 2dip
rot check-sequence span-slices ;
+: expect-and-span-insensitive ( n string slice expected-string -- n' string slice' )
+ dup length '[ _ take-slice ] 2dip
+ rot check-sequence-insensitive span-slices ;
+
:: split-slice-back ( slice n -- slice1 slice2 )
slice [ from>> ] [ to>> ] [ seq>> ] tri :> ( from to seq )
from to n - seq <slice>