! Copyright (C) 2008, 2011 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors arrays ascii assocs combinators fry
-io.pathnames io.sockets io.sockets.secure kernel lexer
-linked-assocs make math.parser multiline namespaces peg.ebnf
-present sequences splitting strings strings.parser urls.encoding
-vocabs.loader ;
+USING: accessors ascii assocs combinators
+combinators.short-circuit io.pathnames io.sockets
+io.sockets.secure kernel lexer linked-assocs make math.parser
+multiline namespaces peg.ebnf present sequences
+sequences.generalizations splitting strings strings.parser
+urls.encoding vocabs.loader ;
IN: urls
: set-query-params ( url params -- url )
[ swap set-query-param ] assoc-each ;
-ERROR: malformed-port ;
+ERROR: malformed-port string ;
+
+: parse-port ( string -- port/f )
+ [ f ] [ dup string>number [ ] [ malformed-port ] ?if ] if-empty ;
: parse-host ( string -- host/f port/f )
[
- ":" split1-last [ url-decode ]
- [ dup [ string>number [ malformed-port ] unless* ] when ] bi*
+ ":" split1-last [ url-decode ] [ parse-port ] bi*
] [ f f ] if* ;
GENERIC: >url ( obj -- url )
<PRIVATE
-EBNF: parse-url [=[
-
-protocol = [a-zA-Z0-9.+-]+ => [[ url-decode ]]
-username = [^/:@#?]+ => [[ url-decode ]]
-password = [^/:@#?]+ => [[ url-decode ]]
-pathname = [^#?]+ => [[ url-decode ]]
-query = [^#]+ => [[ query>assoc ]]
-anchor = .+ => [[ url-decode ]]
+: remove-dot-segments ( path -- path' )
+ [ "//" split1 ] [ "/" glue ] while*
+ [ "/./" split1 ] [ "/" glue ] while*
+ [ "/../" split1 ] [ [ "/" split1-last drop ] dip "/" glue ] while*
+ "/.." ?tail [ "/" split1-last drop "/" append ] when
+ "../" ?head [ "/" prepend ] when
+ "./" ?head [ "/" prepend ] when
+ "/." ?tail [ "/" append ] when
+ [ "/" ] when-empty ;
-hostname = [^/#?]+ => [[ url-decode ]]
+: parse-path ( string -- path )
+ "/" split [ url-decode "/" "%2F" replace ] map "/" join
+ remove-dot-segments ;
-hostname-spec = hostname ("/"|!(.)) => [[ first ]]
-
-auth = (username (":" password => [[ second ]])? "@"
- => [[ first2 2array ]])?
+EBNF: parse-url [=[
-url = (((protocol "://") => [[ first ]] auth hostname)
- | (("//") => [[ f ]] auth hostname))?
- (pathname)?
- ("?" query => [[ second ]])?
- ("#" anchor => [[ second ]])?
+protocol = [a-zA-Z0-9.+-]+ => [[ url-decode ]]
+username = [^/:@#?]* => [[ url-decode ]]
+password = [^/:@#?]* => [[ url-decode ]]
+path = [^#?]+ => [[ parse-path ]]
+query = [^#]+ => [[ query>assoc ]]
+anchor = .+ => [[ url-decode ]]
+hostname = [^/#?:]+ => [[ url-decode ]]
+ipv6 = "[" [^\]]+ "]" => [[ concat url-decode ]]
+port = [^/#?]+ => [[ url-decode parse-port ]]
+
+auth = username (":"~ password?)? "@"~
+host = (ipv6 | hostname) (":"~ port?)?
+
+url = (protocol ":"~)?
+ ("//"~ auth? host?)?
+ path?
+ ("?"~ query?)?
+ ("#"~ anchor?)?
]=]
PRIVATE>
M: string >url
- [ <url> ] dip
- parse-url {
+ [ <url> ] dip parse-url 5 firstn {
+ [ >lower >>protocol ]
[
- first [
- [ first >lower >>protocol ]
- [
- second
- [ first [ first2 [ >>username ] [ >>password ] bi* ] when* ]
- [ second parse-host [ >>host ] [ >>port ] bi* ] bi
- ] bi
+ [
+ [ first [ first2 [ >>username ] [ >>password ] bi* ] when* ]
+ [ second [ first2 [ >>host ] [ >>port ] bi* ] when* ] bi
] when*
]
- [ second >>path ]
- [ third >>query ]
- [ fourth >>anchor ]
- } cleave
- dup host>> [ [ "/" or ] change-path ] when ;
+ [ >>path ]
+ [ >>query ]
+ [ >>anchor ]
+ } spread dup host>> [ [ "/" or ] change-path ] when ;
M: pathname >url string>> >url ;
: unparse-username-password ( url -- )
dup username>> dup [
- % password>> [ ":" % % ] when* "@" %
+ url-encode % password>> [ ":" % url-encode % ] when* "@" %
] [ 2drop ] if ;
: url-port ( url -- port/f )
[ port>> ] [ protocol>> protocol-port ] bi over =
[ drop f ] when ;
+: ipv6-host ( host -- host/ipv6 ipv6? )
+ dup { [ "[" head? ] [ "]" tail? ] } 1&& [
+ 1 swap index-of-last subseq t
+ ] [ f ] if ;
+
+: unparse-host ( url -- host )
+ host>> ipv6-host [ url-encode ] [ [ "[" "]" surround ] when ] bi* ;
+
: unparse-host-part ( url -- )
{
[ unparse-username-password ]
- [ host>> url-encode % ]
+ [ unparse-host % ]
[ url-port [ ":" % # ] when* ]
[ path>> "/" head? [ "/" % ] unless ]
} cleave ;
! URL" //foo.com" takes on the protocol of the url it's derived from
: unparse-protocol ( url -- )
- dup protocol>> [
- % "://" % unparse-host-part
- ] [
- dup host>> [
- "//" % unparse-host-part
- ] [
- drop
- ] if
- ] if* ;
+ protocol>> [ % ":" % ] when* ;
+
+: unparse-authority ( url -- )
+ dup host>> [ "//" % unparse-host-part ] [ drop ] if ;
+
+: unparse-path ( url -- )
+ path>> "/" split [
+ "%2F" "/" replace url-encode "/" "%2F" replace
+ ] map "/" join % ;
M: url present
[
{
[ unparse-protocol ]
- [ path>> url-encode % ]
+ [ unparse-authority ]
+ [ unparse-path ]
[ query>> dup assoc-empty? [ drop ] [ "?" % assoc>query % ] if ]
[ anchor>> [ "#" % present url-encode % ] when* ]
} cleave
{ [ dup "/" head? ] [ nip ] }
{ [ dup empty? ] [ drop ] }
{ [ over "/" tail? ] [ append ] }
- { [ "/" pick subseq-start not ] [ nip ] }
+ { [ over "/" subseq-index not ] [ nip ] }
[ [ "/" split1-last drop "/" ] dip 3append ]
- } cond ;
+ } cond remove-dot-segments ;
<PRIVATE
[ [ anchor>> ] either? >>anchor ]
} 2cleave ;
+: redacted-url ( url -- url' )
+ clone [ "xxxxx" and ] change-password ;
+
! Half-baked stuff follows
: secure-protocol? ( protocol -- ? )
"https" = ;
: url-addr ( url -- addr )
[
- [ host>> ]
+ [ host>> ipv6-host drop ]
[ port>> ]
[ protocol>> protocol-port ]
tri or <inet>
] [
dup protocol>> secure-protocol?
- [ host>> <secure> ] [ drop ] if
+ [ host>> ipv6-host drop <secure> ] [ drop ] if
] bi ;
: set-url-addr ( url addr -- url )
- [ host>> >>host ] [ port>> >>port ] bi ;
+ [ [ host>> ] [ inet6? ] bi [ "[" "]" surround ] when >>host ]
+ [ port>> >>port ] bi ;
: ensure-port ( url -- url' )
clone dup protocol>> '[ _ protocol-port or ] change-port ;