1 ! Copyright (C) 2008, 2011 Slava Pestov.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors arrays ascii assocs combinators fry io.pathnames
4 io.sockets io.sockets.secure kernel lexer linked-assocs make
5 math.parser namespaces peg.ebnf present sequences splitting
6 strings strings.parser urls.encoding vocabs.loader ;
9 TUPLE: url protocol username password host port path query anchor ;
11 : <url> ( -- url ) url new ;
13 : query-param ( url key -- value )
16 : set-or-delete ( value key query -- )
17 pick [ set-at ] [ delete-at drop ] if ;
19 : set-query-param ( url value key -- url )
20 pick query>> [ <linked-hash> ] unless* [ set-or-delete ] keep >>query ;
22 : set-query-params ( url params -- url )
23 [ swap set-query-param ] assoc-each ;
25 ERROR: malformed-port ;
27 : parse-host ( string -- host/f port/f )
29 ":" split1-last [ url-decode ]
30 [ dup [ string>number [ malformed-port ] unless* ] when ] bi*
33 GENERIC: >url ( obj -- url )
35 M: f >url drop <url> ;
43 protocol = [a-zA-Z0-9.+-]+ => [[ url-decode ]]
44 username = [^/:@#?]+ => [[ url-decode ]]
45 password = [^/:@#?]+ => [[ url-decode ]]
46 pathname = [^#?]+ => [[ url-decode ]]
47 query = [^#]+ => [[ query>assoc ]]
48 anchor = .+ => [[ url-decode ]]
50 hostname = [^/#?]+ => [[ url-decode ]]
52 hostname-spec = hostname ("/"|!(.)) => [[ first ]]
54 auth = (username (":" password => [[ second ]])? "@"
55 => [[ first2 2array ]])?
57 url = (((protocol "://") => [[ first ]] auth hostname)
58 | (("//") => [[ f ]] auth hostname))?
60 ("?" query => [[ second ]])?
61 ("#" anchor => [[ second ]])?
72 [ first >lower >>protocol ]
75 [ first [ first2 [ >>username ] [ >>password ] bi* ] when* ]
76 [ second parse-host [ >>host ] [ >>port ] bi* ] bi
84 dup host>> [ [ "/" or ] change-path ] when ;
86 M: pathname >url string>> >url ;
88 : relative-url ( url -- url' )
94 : relative-url? ( url -- ? ) protocol>> not ;
98 : unparse-username-password ( url -- )
100 % password>> [ ":" % % ] when* "@" %
103 : url-port ( url -- port/f )
104 [ port>> ] [ port>> ] [ protocol>> protocol-port ] tri =
107 : unparse-host-part ( url -- )
109 [ unparse-username-password ]
110 [ host>> url-encode % ]
111 [ url-port [ ":" % # ] when* ]
112 [ path>> "/" head? [ "/" % ] unless ]
115 ! URL" //foo.com" takes on the protocol of the url it's derived from
116 : unparse-protocol ( url -- )
118 % "://" % unparse-host-part
121 "//" % unparse-host-part
131 [ path>> url-encode % ]
132 [ query>> dup assoc-empty? [ drop ] [ "?" % assoc>query % ] if ]
133 [ anchor>> [ "#" % present url-encode % ] when* ]
139 : url-append-path ( path1 path2 -- path )
141 { [ dup "/" head? ] [ nip ] }
142 { [ dup empty? ] [ drop ] }
143 { [ over "/" tail? ] [ append ] }
144 { [ "/" pick subseq-start not ] [ nip ] }
145 [ [ "/" split1-last drop "/" ] dip 3append ]
150 : derive-port ( url base -- url' )
151 over relative-url? [ [ port>> ] either? ] [ drop port>> ] if ;
153 : derive-path ( url base -- url' )
154 [ path>> ] bi@ swap url-append-path ;
158 : derive-url ( base url -- url' )
160 [ [ protocol>> ] either? >>protocol ]
161 [ [ username>> ] either? >>username ]
162 [ [ password>> ] either? >>password ]
163 [ [ host>> ] either? >>host ]
164 [ derive-port >>port ]
165 [ derive-path >>path ]
166 [ [ query>> ] either? >>query ]
167 [ [ anchor>> ] either? >>anchor ]
170 ! Half-baked stuff follows
171 : secure-protocol? ( protocol -- ? )
174 : url-addr ( url -- addr )
178 [ protocol>> protocol-port ]
181 dup protocol>> secure-protocol?
182 [ host>> <secure> ] [ drop ] if
185 : set-url-addr ( url addr -- url )
186 [ host>> >>host ] [ port>> >>port ] bi ;
188 : ensure-port ( url -- url' )
189 clone dup protocol>> '[ _ protocol-port or ] change-port ;
192 SYNTAX: URL" lexer get skip-blank parse-string >url suffix! ;
194 { "urls" "prettyprint" } "urls.prettyprint" require-when