1 ! Copyright (C) 2008 Slava Pestov.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors arrays ascii assocs classes combinators
4 combinators.short-circuit fry hashtables io.encodings.string
5 io.encodings.utf8 io.sockets kernel lexer make math math.parser
6 namespaces parser peg.ebnf present sequences splitting strings
7 strings.parser urls.encoding ;
10 TUPLE: url protocol username password addr path query anchor ;
12 : <url> ( -- url ) url new ;
14 : query-param ( url key -- value )
17 : delete-query-param ( url key -- url )
18 over query>> delete-at ;
20 : set-query-param ( url value key -- url )
22 '[ [ _ _ ] dip ?set-at ] change-query
24 nip delete-query-param
27 ERROR: malformed-port ;
29 : parse-host ( string -- host/f port/f )
31 ":" split1-last [ url-decode ]
32 [ dup [ string>number [ malformed-port ] unless* ] when ] bi*
35 GENERIC: >url ( obj -- url )
37 M: f >url drop <url> ;
45 protocol = [a-z]+ => [[ url-decode ]]
46 username = [^/:@#?]+ => [[ url-decode ]]
47 password = [^/:@#?]+ => [[ url-decode ]]
48 pathname = [^#?]+ => [[ url-decode ]]
49 query = [^#]+ => [[ query>assoc ]]
50 anchor = .+ => [[ url-decode ]]
52 hostname = [^/#?]+ => [[ url-decode ]]
54 hostname-spec = hostname ("/"|!(.)) => [[ first ]]
56 auth = (username (":" password => [[ second ]])? "@"
57 => [[ first2 2array ]])?
59 url = ((protocol "://") => [[ first ]] auth hostname)?
61 ("?" query => [[ second ]])?
62 ("#" anchor => [[ second ]])?
76 [ first [ first2 [ >>username ] [ >>password ] bi* ] when* ]
77 [ second parse-host <inet> >>addr ] bi
85 dup addr>> [ [ "/" or ] change-path ] when ;
89 : inet>url ( inet -- url ) [ <url> ] dip >>addr ;
93 M: inet >url inet>url ;
94 M: inet4 >url inet>url ;
95 M: inet6 >url inet>url ;
97 : protocol-port ( protocol -- port )
107 : unparse-username-password ( url -- )
109 % password>> [ ":" % % ] when* "@" %
112 : url-port ( url -- port/f )
115 [ protocol>> protocol-port ] tri =
118 : unparse-host-part ( url protocol -- )
122 [ unparse-username-password ]
123 [ addr>> host>> url-encode % ]
124 [ url-port [ ":" % # ] when* ]
125 [ path>> "/" head? [ "/" % ] unless ]
133 [ dup protocol>> dup [ unparse-host-part ] [ 2drop ] if ]
134 [ path>> url-encode % ]
135 [ query>> dup assoc-empty? [ drop ] [ "?" % assoc>query % ] if ]
136 [ anchor>> [ "#" % present url-encode % ] when* ]
140 : url-append-path ( path1 path2 -- path )
142 { [ dup "/" head? ] [ nip ] }
143 { [ dup empty? ] [ drop ] }
144 { [ over "/" tail? ] [ append ] }
145 { [ "/" pick start not ] [ nip ] }
146 [ [ "/" split1-last drop "/" ] dip 3append ]
151 : derive-url ( base url -- url' )
153 [ [ protocol>> ] either? >>protocol ]
154 [ [ username>> ] either? >>username ]
155 [ [ password>> ] either? >>password ]
156 [ [ addr>> ] either? >>addr ]
157 [ [ path>> ] bi@ swap url-append-path >>path ]
158 [ [ query>> ] either? >>query ]
159 [ [ anchor>> ] either? >>anchor ]
162 : relative-url ( url -- url' )
167 : relative-url? ( url -- ? ) protocol>> not ;
169 ! Half-baked stuff follows
170 : secure-protocol? ( protocol -- ? )
175 GENERIC: >secure-addr ( addrspec -- addrspec' )
179 : url-addr ( url -- addr )
182 [ [ addr>> port>> ] [ protocol>> protocol-port ] bi or ] bi with-port
184 secure-protocol? [ >secure-addr ] when ;
186 : ensure-port ( url -- url' )
187 clone dup protocol>> '[
188 dup port>> _ protocol-port or with-port
192 SYNTAX: URL" lexer get skip-blank parse-string >url suffix! ;
196 { "urls" "prettyprint" } "urls.prettyprint" require-when