1 ! Copyright (C) 2008, 2011 Slava Pestov.
2 ! See http://factorcode.org/license.txt for BSD license.
4 USING: accessors ascii assocs combinators
5 combinators.short-circuit fry io.encodings.string
6 io.encodings.utf8 io.pathnames io.sockets io.sockets.secure
7 kernel lexer linked-assocs make math math.parser multiline
8 namespaces peg.ebnf present sequences sequences.generalizations
9 splitting strings strings.parser urls.encoding vocabs.loader ;
13 TUPLE: url protocol username password host port path query anchor ;
15 : <url> ( -- url ) url new ;
17 : query-param ( url key -- value )
20 : set-or-delete ( value key query -- )
21 pick [ set-at ] [ delete-at drop ] if ;
23 : set-query-param ( url value key -- url )
24 pick query>> [ <linked-hash> ] unless* [ set-or-delete ] keep >>query ;
26 : set-query-params ( url params -- url )
27 [ swap set-query-param ] assoc-each ;
29 ERROR: malformed-port string ;
31 : parse-port ( string -- port/f )
32 [ f ] [ dup string>number [ ] [ malformed-port ] ?if ] if-empty ;
34 : parse-host ( string -- host/f port/f )
36 ":" split1-last [ url-decode ] [ parse-port ] bi*
39 GENERIC: >url ( obj -- url )
41 M: f >url drop <url> ;
47 : remove-dot-segments ( path -- path' )
48 [ "//" split1 ] [ "/" glue ] while*
49 [ "/./" split1 ] [ "/" glue ] while*
50 [ "/../" split1 ] [ [ "/" split1-last drop ] dip "/" glue ] while*
51 "/.." ?tail [ "/" split1-last drop "/" append ] when
52 "../" ?head [ "/" prepend ] when
53 "./" ?head [ "/" prepend ] when
54 "/." ?tail [ "/" append ] when
57 : parse-path ( string -- path )
58 "/" split [ url-decode "/" "%2F" replace ] map "/" join
63 protocol = [a-zA-Z0-9.+-]+ => [[ url-decode ]]
64 username = [^/:@#?]* => [[ url-decode ]]
65 password = [^/:@#?]* => [[ url-decode ]]
66 path = [^#?]+ => [[ parse-path ]]
67 query = [^#]+ => [[ query>assoc ]]
68 anchor = .+ => [[ url-decode ]]
69 hostname = [^/#?:]+ => [[ url-decode ]]
70 ipv6 = "[" [^\]]+ "]" => [[ concat url-decode ]]
71 port = [^/#?]+ => [[ url-decode parse-port ]]
73 auth = username (":"~ password?)? "@"~
74 host = (ipv6 | hostname) (":"~ port?)?
76 url = (protocol ":"~)?
87 [ <url> ] dip parse-url 5 firstn {
91 [ first [ first2 [ >>username ] [ >>password ] bi* ] when* ]
92 [ second [ first2 [ >>host ] [ >>port ] bi* ] when* ] bi
98 } spread dup host>> [ [ "/" or ] change-path ] when ;
100 M: pathname >url string>> >url ;
102 : relative-url ( url -- url' )
108 : relative-url? ( url -- ? ) protocol>> not ;
112 : unparse-username-password ( url -- )
114 url-encode % password>> [ ":" % url-encode % ] when* "@" %
117 : url-port ( url -- port/f )
118 [ port>> ] [ protocol>> protocol-port ] bi over =
121 : ipv6-host ( host -- host/ipv6 ipv6? )
122 dup { [ "[" head? ] [ "]" tail? ] } 1&& [
123 1 swap index-of-last subseq t
126 : unparse-host ( url -- host )
127 host>> ipv6-host [ url-encode ] [ [ "[" "]" surround ] when ] bi* ;
129 : unparse-host-part ( url -- )
131 [ unparse-username-password ]
133 [ url-port [ ":" % # ] when* ]
134 [ path>> "/" head? [ "/" % ] unless ]
137 ! URL" //foo.com" takes on the protocol of the url it's derived from
138 : unparse-protocol ( url -- )
139 protocol>> [ % ":" % ] when* ;
141 : unparse-authority ( url -- )
142 dup host>> [ "//" % unparse-host-part ] [ drop ] if ;
144 : unparse-path ( url -- )
146 "%2F" "/" replace url-encode "/" "%2F" replace
153 [ unparse-authority ]
155 [ query>> dup assoc-empty? [ drop ] [ "?" % assoc>query % ] if ]
156 [ anchor>> [ "#" % present url-encode % ] when* ]
162 : url-append-path ( path1 path2 -- path )
164 { [ dup "/" head? ] [ nip ] }
165 { [ dup empty? ] [ drop ] }
166 { [ over "/" tail? ] [ append ] }
167 { [ over "/" subseq-index not ] [ nip ] }
168 [ [ "/" split1-last drop "/" ] dip 3append ]
169 } cond remove-dot-segments ;
173 : derive-port ( url base -- url' )
174 over relative-url? [ [ port>> ] either? ] [ drop port>> ] if ;
176 : derive-path ( url base -- url' )
177 [ path>> ] bi@ swap url-append-path ;
181 : derive-url ( base url -- url' )
183 [ [ protocol>> ] either? >>protocol ]
184 [ [ username>> ] either? >>username ]
185 [ [ password>> ] either? >>password ]
186 [ [ host>> ] either? >>host ]
187 [ derive-port >>port ]
188 [ derive-path >>path ]
189 [ [ query>> ] either? >>query ]
190 [ [ anchor>> ] either? >>anchor ]
193 : redacted-url ( url -- url' )
194 clone [ "xxxxx" and ] change-password ;
196 ! Half-baked stuff follows
197 : secure-protocol? ( protocol -- ? )
200 : url-addr ( url -- addr )
202 [ host>> ipv6-host drop ]
204 [ protocol>> protocol-port ]
207 dup protocol>> secure-protocol?
208 [ host>> ipv6-host drop <secure> ] [ drop ] if
211 : set-url-addr ( url addr -- url )
212 [ [ host>> ] [ inet6? ] bi [ "[" "]" surround ] when >>host ]
213 [ port>> >>port ] bi ;
215 : ensure-port ( url -- url' )
216 clone dup protocol>> '[ _ protocol-port or ] change-port ;
219 SYNTAX: URL" lexer get skip-blank parse-string >url suffix! ;
221 { "urls" "prettyprint" } "urls.prettyprint" require-when