--- /dev/null
+Slava Pestov
--- /dev/null
+IN: urls.encoding
+USING: strings help.markup help.syntax assocs multiline ;
+
+HELP: url-decode
+{ $values { "str" string } { "decoded" string } }
+{ $description "Decodes a URL-encoded string." } ;
+
+HELP: url-encode
+{ $values { "str" string } { "encoded" string } }
+{ $description "URL-encodes a string." } ;
+
+HELP: url-quotable?
+{ $values { "ch" "a character" } { "?" "a boolean" } }
+{ $description "Tests if a character be used without URL-encoding in a URL." } ;
+
+HELP: assoc>query
+{ $values { "assoc" assoc } { "str" string } }
+{ $description "Converts an assoc of query parameters into a query string, performing URL encoding." }
+{ $notes "This word is used by the implementation of " { $link "urls" } ". It is also used by the HTTP client to encode POST requests." }
+{ $examples
+ { $example
+ "USING: io urls ;"
+ "{ { \"from\" \"Lead\" } { \"to\" \"Gold, please\" } }"
+ "assoc>query print"
+ "from=Lead&to=Gold%2c+please"
+ }
+} ;
+
+HELP: query>assoc
+{ $values { "query" string } { "assoc" assoc } }
+{ $description "Parses a URL query string and URL-decodes each component." }
+{ $notes "This word is used by the implementation of " { $link "urls" } ". It is also used by the HTTP server to parse POST requests." }
+{ $examples
+ { $unchecked-example
+ "USING: prettyprint urls ;"
+ "\"gender=female&agefrom=22&ageto=28&location=Omaha+NE\""
+ "query>assoc ."
+ <" H{
+ { "gender" "female" }
+ { "agefrom" "22" }
+ { "ageto" "28" }
+ { "location" "Omaha NE" }
+}">
+ }
+} ;
+
+ARTICLE: "url-encoding" "URL encoding and decoding"
+"URL encoding and decoding strings:"
+{ $subsection url-encode }
+{ $subsection url-decode }
+{ $subsection url-quotable? }
+"Encoding and decoding queries:"
+{ $subsection assoc>query }
+{ $subsection query>assoc }
+"See " { $url "http://en.wikipedia.org/wiki/Percent-encoding" } " for a description of URL encoding." ;
+
+ABOUT: "url-encoding"
--- /dev/null
+IN: urls.encoding.tests
+USING: urls.encoding tools.test arrays kernel assocs present accessors ;
+
+[ "~hello world" ] [ "%7ehello world" url-decode ] unit-test
+[ f ] [ "%XX%XX%XX" url-decode ] unit-test
+[ f ] [ "%XX%XX%X" url-decode ] unit-test
+
+[ "hello world" ] [ "hello%20world" url-decode ] unit-test
+[ " ! " ] [ "%20%21%20" url-decode ] unit-test
+[ "hello world" ] [ "hello world%" url-decode ] unit-test
+[ "hello world" ] [ "hello world%x" url-decode ] unit-test
+[ "hello%20world" ] [ "hello world" url-encode ] unit-test
+
+[ "hello world" ] [ "hello+world" query-decode ] unit-test
+
+[ "\u001234hi\u002045" ] [ "\u001234hi\u002045" url-encode url-decode ] unit-test
+
+[ "a=b&a=c" ] [ { { "a" { "b" "c" } } } assoc>query ] unit-test
+
+[ H{ { "a" "b" } } ] [ "a=b" query>assoc ] unit-test
+
+[ H{ { "a" { "b" "c" } } } ] [ "a=b&a=c" query>assoc ] unit-test
+
+[ H{ { "text" "hello world" } } ] [ "text=hello+world" query>assoc ] unit-test
+
+[ "a=3" ] [ { { "a" 3 } } assoc>query ] unit-test
--- /dev/null
+! Copyright (C) 2008 Slava Pestov.
+! See http://factorcode.org/license.txt for BSD license.
+USING: kernel ascii combinators combinators.short-circuit
+sequences splitting fry namespaces make assocs arrays strings
+io.sockets io.sockets.secure io.encodings.string
+io.encodings.utf8 math math.parser accessors hashtables present ;
+IN: urls.encoding
+
+: url-quotable? ( ch -- ? )
+ {
+ [ letter? ]
+ [ LETTER? ]
+ [ digit? ]
+ [ "/_-.:" member? ]
+ } 1|| ; foldable
+
+<PRIVATE
+
+: push-utf8 ( ch -- )
+ 1string utf8 encode
+ [ CHAR: % , >hex 2 CHAR: 0 pad-left % ] each ;
+
+PRIVATE>
+
+: url-encode ( str -- encoded )
+ [
+ [ dup url-quotable? [ , ] [ push-utf8 ] if ] each
+ ] "" make ;
+
+<PRIVATE
+
+: url-decode-hex ( index str -- )
+ 2dup length 2 - >= [
+ 2drop
+ ] [
+ [ 1+ dup 2 + ] dip subseq hex> [ , ] when*
+ ] if ;
+
+: url-decode-% ( index str -- index str )
+ 2dup url-decode-hex ;
+
+: url-decode-iter ( index str -- )
+ 2dup length >= [
+ 2drop
+ ] [
+ 2dup nth dup CHAR: % = [
+ drop url-decode-% [ 3 + ] dip
+ ] [
+ , [ 1+ ] dip
+ ] if url-decode-iter
+ ] if ;
+
+PRIVATE>
+
+: url-decode ( str -- decoded )
+ [ 0 swap url-decode-iter ] "" make utf8 decode ;
+
+: query-decode ( str -- decoded )
+ [ dup CHAR: + = [ drop "%20" ] [ 1string ] if ] { } map-as
+ concat url-decode ;
+
+<PRIVATE
+
+: add-query-param ( value key assoc -- )
+ [
+ at [
+ {
+ { [ dup string? ] [ swap 2array ] }
+ { [ dup array? ] [ swap suffix ] }
+ { [ dup not ] [ drop ] }
+ } cond
+ ] when*
+ ] 2keep set-at ;
+
+PRIVATE>
+
+: query>assoc ( query -- assoc )
+ dup [
+ "&" split H{ } clone [
+ [
+ [ "=" split1 [ dup [ query-decode ] when ] bi@ swap ] dip
+ add-query-param
+ ] curry each
+ ] keep
+ ] when ;
+
+: assoc>query ( assoc -- str )
+ [
+ dup array? [ [ present ] map ] [ present 1array ] if
+ ] assoc-map
+ [
+ [
+ [ url-encode ] dip
+ [ url-encode "=" swap 3append , ] with each
+ ] assoc-each
+ ] { } make "&" join ;
--- /dev/null
+URL and form encoding/decoding
}
} ;
-HELP: assoc>query
-{ $values { "assoc" assoc } { "str" string } }
-{ $description "Converts an assoc of query parameters into a query string, performing URL encoding." }
-{ $notes "This word is used to implement the " { $link present } " method on URLs; it is also used by the HTTP client to encode POST requests." }
-{ $examples
- { $example
- "USING: io urls ;"
- "{ { \"from\" \"Lead\" } { \"to\" \"Gold, please\" } }"
- "assoc>query print"
- "from=Lead&to=Gold%2c+please"
- }
-} ;
-
-HELP: query>assoc
-{ $values { "query" string } { "assoc" assoc } }
-{ $description "Parses a URL query string and URL-decodes each component." }
-{ $notes "This word is used to implement " { $link >url } ". It is also used by the HTTP server to parse POST requests." }
-{ $examples
- { $unchecked-example
- "USING: prettyprint urls ;"
- "\"gender=female&agefrom=22&ageto=28&location=Omaha+NE\""
- "query>assoc ."
- <" H{
- { "gender" "female" }
- { "agefrom" "22" }
- { "ageto" "28" }
- { "location" "Omaha NE" }
-}">
- }
-} ;
-
HELP: derive-url
{ $values { "base" url } { "url" url } { "url'" url } }
{ $description "Builds a URL by filling in missing components of " { $snippet "url" } " from " { $snippet "base" } "." }
{ $values { "path1" string } { "path2" string } { "path" string } }
{ $description "Like " { $link append-path } ", but intended for use with URL paths and not filesystem paths." } ;
-HELP: url-decode
-{ $values { "str" string } { "decoded" string } }
-{ $description "Decodes a URL-encoded string." } ;
-
-HELP: url-encode
-{ $values { "str" string } { "encoded" string } }
-{ $description "URL-encodes a string." } ;
-
-HELP: url-quotable?
-{ $values { "ch" "a character" } { "?" "a boolean" } }
-{ $description "Tests if a character be used without URL-encoding in a URL." } ;
-
-ARTICLE: "url-encoding" "URL encoding and decoding"
-"URL encoding and decoding strings:"
-{ $subsection url-encode }
-{ $subsection url-decode }
-{ $subsection url-quotable? }
-"The URL implemention encodes and decodes components of " { $link url } " instances automatically, but sometimes it is required for non-URL strings. See " { $url "http://en.wikipedia.org/wiki/Percent-encoding" } " for a description of URL encoding." ;
-
ARTICLE: "url-utilities" "URL implementation utilities"
-{ $subsection assoc>query }
-{ $subsection query>assoc }
{ $subsection parse-host }
{ $subsection secure-protocol? }
{ $subsection url-append-path } ;
{ $subsection set-query-param }
"Creating " { $link "network-addressing" } " from URLs:"
{ $subsection url-addr }
-"Additional topics:"
-{ $subsection "url-utilities" }
-{ $subsection "url-encoding" } ;
+"The URL implemention encodes and decodes components of " { $link url } " instances automatically, but sometimes this functionality is needed for non-URL strings."
+{ $subsection "url-encoding" }
+"Utility words used by the URL implementation:"
+{ $subsection "url-utilities" } ;
ABOUT: "urls"
USING: urls urls.private tools.test
arrays kernel assocs present accessors ;
-[ "hello+world" ] [ "hello world" url-encode ] unit-test
-[ "hello world" ] [ "hello%20world" url-decode ] unit-test
-[ "~hello world" ] [ "%7ehello+world" url-decode ] unit-test
-[ f ] [ "%XX%XX%XX" url-decode ] unit-test
-[ f ] [ "%XX%XX%X" url-decode ] unit-test
-
-[ "hello world" ] [ "hello+world" url-decode ] unit-test
-[ "hello world" ] [ "hello%20world" url-decode ] unit-test
-[ " ! " ] [ "%20%21%20" url-decode ] unit-test
-[ "hello world" ] [ "hello world%" url-decode ] unit-test
-[ "hello world" ] [ "hello world%x" url-decode ] unit-test
-[ "hello+world" ] [ "hello world" url-encode ] unit-test
-[ "+%21+" ] [ " ! " url-encode ] unit-test
-
-[ "\u001234hi\u002045" ] [ "\u001234hi\u002045" url-encode url-decode ] unit-test
-
-[ "a=b&a=c" ] [ { { "a" { "b" "c" } } } assoc>query ] unit-test
-
-[ H{ { "a" "b" } } ] [ "a=b" query>assoc ] unit-test
-
-[ H{ { "a" { "b" "c" } } } ] [ "a=b&a=c" query>assoc ] unit-test
-
-[ "a=3" ] [ { { "a" 3 } } assoc>query ] unit-test
-
: urls
{
{
io.sockets io.sockets.secure io.encodings.string
io.encodings.utf8 math math.parser accessors parser
strings.parser lexer prettyprint.backend hashtables present
-peg.ebnf ;
+peg.ebnf urls.encoding ;
IN: urls
-: url-quotable? ( ch -- ? )
- {
- [ letter? ]
- [ LETTER? ]
- [ digit? ]
- [ "/_-.:" member? ]
- } 1|| ; foldable
-
-<PRIVATE
-
-: push-utf8 ( ch -- )
- dup CHAR: \s = [ drop "+" % ] [
- 1string utf8 encode
- [ CHAR: % , >hex 2 CHAR: 0 pad-left % ] each
- ] if ;
-
-PRIVATE>
-
-: url-encode ( str -- encoded )
- [
- [ dup url-quotable? [ , ] [ push-utf8 ] if ] each
- ] "" make ;
-
-<PRIVATE
-
-: url-decode-hex ( index str -- )
- 2dup length 2 - >= [
- 2drop
- ] [
- [ 1+ dup 2 + ] dip subseq hex> [ , ] when*
- ] if ;
-
-: url-decode-% ( index str -- index str )
- 2dup url-decode-hex [ 3 + ] dip ;
-
-: url-decode-+-or-other ( index str ch -- index str )
- dup CHAR: + = [ drop CHAR: \s ] when , [ 1+ ] dip ;
-
-: url-decode-iter ( index str -- )
- 2dup length >= [
- 2drop
- ] [
- 2dup nth dup CHAR: % = [
- drop url-decode-%
- ] [
- url-decode-+-or-other
- ] if url-decode-iter
- ] if ;
-
-PRIVATE>
-
-: url-decode ( str -- decoded )
- [ 0 swap url-decode-iter ] "" make utf8 decode ;
-
-<PRIVATE
-
-: add-query-param ( value key assoc -- )
- [
- at [
- {
- { [ dup string? ] [ swap 2array ] }
- { [ dup array? ] [ swap suffix ] }
- { [ dup not ] [ drop ] }
- } cond
- ] when*
- ] 2keep set-at ;
-
-PRIVATE>
-
-: query>assoc ( query -- assoc )
- dup [
- "&" split H{ } clone [
- [
- [ "=" split1 [ dup [ url-decode ] when ] bi@ swap ] dip
- add-query-param
- ] curry each
- ] keep
- ] when ;
-
-: assoc>query ( assoc -- str )
- [
- dup array? [ [ present ] map ] [ present 1array ] if
- ] assoc-map
- [
- [
- [ url-encode ] dip
- [ url-encode "=" swap 3append , ] with each
- ] assoc-each
- ] { } make "&" join ;
-
TUPLE: url protocol username password host port path query anchor ;
: <url> ( -- url ) url new ;