! See http://factorcode.org/license.txt for BSD license.
USING: accessors ascii assocs combinators
-combinators.short-circuit fry io.pathnames io.sockets
-io.sockets.secure kernel lexer linked-assocs make math
-math.parser multiline namespaces peg.ebnf present sequences
+combinators.short-circuit io.pathnames io.sockets
+io.sockets.secure kernel lexer linked-assocs make math.parser
+multiline namespaces peg.ebnf present sequences
sequences.generalizations splitting strings strings.parser
urls.encoding vocabs.loader ;
<PRIVATE
+: remove-dot-segments ( path -- path' )
+ [ "//" split1 ] [ "/" glue ] while*
+ [ "/./" split1 ] [ "/" glue ] while*
+ [ "/../" split1 ] [ [ "/" split1-last drop ] dip "/" glue ] while*
+ "/.." ?tail [ "/" split1-last drop "/" append ] when
+ "../" ?head [ "/" prepend ] when
+ "./" ?head [ "/" prepend ] when
+ "/." ?tail [ "/" append ] when
+ [ "/" ] when-empty ;
+
+: parse-path ( string -- path )
+ "/" split [ url-decode "/" "%2F" replace ] map "/" join
+ remove-dot-segments ;
+
EBNF: parse-url [=[
protocol = [a-zA-Z0-9.+-]+ => [[ url-decode ]]
username = [^/:@#?]* => [[ url-decode ]]
password = [^/:@#?]* => [[ url-decode ]]
-path = [^#?]+ => [[ url-decode ]]
+path = [^#?]+ => [[ parse-path ]]
query = [^#]+ => [[ query>assoc ]]
anchor = .+ => [[ url-decode ]]
hostname = [^/#?:]+ => [[ url-decode ]]
: unparse-username-password ( url -- )
dup username>> dup [
- % password>> [ ":" % % ] when* "@" %
+ url-encode % password>> [ ":" % url-encode % ] when* "@" %
] [ 2drop ] if ;
: url-port ( url -- port/f )
: ipv6-host ( host -- host/ipv6 ipv6? )
dup { [ "[" head? ] [ "]" tail? ] } 1&& [
- 1 swap [ length 1 - ] [ subseq ] bi t
+ 1 swap index-of-last subseq t
] [ f ] if ;
: unparse-host ( url -- host )
: unparse-authority ( url -- )
dup host>> [ "//" % unparse-host-part ] [ drop ] if ;
+: unparse-path ( url -- )
+ path>> "/" split [
+ "%2F" "/" replace url-encode "/" "%2F" replace
+ ] map "/" join % ;
+
M: url present
[
{
[ unparse-protocol ]
[ unparse-authority ]
- [ path>> url-encode % ]
+ [ unparse-path ]
[ query>> dup assoc-empty? [ drop ] [ "?" % assoc>query % ] if ]
[ anchor>> [ "#" % present url-encode % ] when* ]
} cleave
{ [ dup "/" head? ] [ nip ] }
{ [ dup empty? ] [ drop ] }
{ [ over "/" tail? ] [ append ] }
- { [ "/" pick subseq-start not ] [ nip ] }
+ { [ over "/" subseq-index not ] [ nip ] }
[ [ "/" split1-last drop "/" ] dip 3append ]
- } cond ;
+ } cond remove-dot-segments ;
<PRIVATE