]> gitweb.factorcode.org Git - factor.git/commitdiff
urls: better parsing of the scheme component of urls
authorBjörn Lindqvist <bjourne@gmail.com>
Sun, 31 Jul 2016 00:46:39 +0000 (02:46 +0200)
committerBjörn Lindqvist <bjourne@gmail.com>
Sun, 31 Jul 2016 00:46:39 +0000 (02:46 +0200)
normalize to lowercase and a few more characters are allowed in the
protocol part.

basis/urls/urls-tests.factor
basis/urls/urls.factor

index 167cce13a68d094341de2ec682f67c348cef569c..b754de7086f09b353fca4c9e2e892316dae9394a 100644 (file)
-USING: accessors arrays assocs io.sockets io.sockets.secure
-kernel linked-assocs present prettyprint tools.test urls ;
+USING: accessors arrays assocs io.sockets io.sockets.secure kernel
+linked-assocs present prettyprint sequences tools.test urls ;
 IN: urls.tests
 
-CONSTANT: urls
+CONSTANT: urls {
     {
-        {
-            T{ url
-                { protocol "http" }
-                { host "www.apple.com" }
-                { port 1234 }
-                { path "/a/path" }
-                { query LH{ { "a" "b" } } }
-                { anchor "foo" }
-            }
-            "http://www.apple.com:1234/a/path?a=b#foo"
-        }
-        {
-            T{ url
-                { protocol "http" }
-                { host "www.apple.com" }
-                { path "/a/path" }
-                { query LH{ { "a" "b" } } }
-                { anchor "foo" }
-            }
-            "http://www.apple.com/a/path?a=b#foo"
-        }
-        {
-            T{ url
-                { protocol "http" }
-                { host "www.apple.com" }
-                { port 1234 }
-                { path "/another/fine/path" }
-                { anchor "foo" }
-            }
-            "http://www.apple.com:1234/another/fine/path#foo"
-        }
-        {
-            T{ url
-                { path "/a/relative/path" }
-                { anchor "foo" }
-            }
-            "/a/relative/path#foo"
-        }
-        {
-            T{ url
-                { path "/a/relative/path" }
-            }
-            "/a/relative/path"
-        }
-        {
-            T{ url
-                { path "a/relative/path" }
-            }
-            "a/relative/path"
-        }
-        {
-            T{ url
-                { path "bar" }
-                { query LH{ { "a" "b" } } }
-            }
-            "bar?a=b"
-        }
-        {
-            T{ url
-                { protocol "ftp" }
-                { host "ftp.kernel.org" }
-                { username "slava" }
-                { path "/" }
-            }
-            "ftp://slava@ftp.kernel.org/"
-        }
-        {
-            T{ url
-                { protocol "ftp" }
-                { host "ftp.kernel.org" }
-                { username "slava" }
-                { password "secret" }
-                { path "/" }
-            }
-            "ftp://slava:secret@ftp.kernel.org/"
-        }
-        {
-            T{ url
-               { protocol "http" }
-               { host "foo.com" }
-               { path "/" }
-               { query LH{ { "a" f } } }
-            }
-            "http://foo.com/?a"
-        }
+        T{ url
+           { protocol "http" }
+           { host "www.apple.com" }
+           { port 1234 }
+           { path "/a/path" }
+           { query LH{ { "a" "b" } } }
+           { anchor "foo" }
+         }
+        "http://www.apple.com:1234/a/path?a=b#foo"
     }
+    {
+        T{ url
+           { protocol "http" }
+           { host "www.apple.com" }
+           { path "/a/path" }
+           { query LH{ { "a" "b" } } }
+           { anchor "foo" }
+         }
+        "http://www.apple.com/a/path?a=b#foo"
+    }
+    {
+        T{ url
+           { protocol "http" }
+           { host "www.apple.com" }
+           { port 1234 }
+           { path "/another/fine/path" }
+           { anchor "foo" }
+         }
+        "http://www.apple.com:1234/another/fine/path#foo"
+    }
+    {
+        T{ url
+           { path "/a/relative/path" }
+           { anchor "foo" }
+         }
+        "/a/relative/path#foo"
+    }
+    {
+        T{ url
+           { path "/a/relative/path" }
+         }
+        "/a/relative/path"
+    }
+    {
+        T{ url
+           { path "a/relative/path" }
+         }
+        "a/relative/path"
+    }
+    {
+        T{ url
+           { path "bar" }
+           { query LH{ { "a" "b" } } }
+         }
+        "bar?a=b"
+    }
+    {
+        T{ url
+           { protocol "ftp" }
+           { host "ftp.kernel.org" }
+           { username "slava" }
+           { path "/" }
+         }
+        "ftp://slava@ftp.kernel.org/"
+    }
+    {
+        T{ url
+           { protocol "ftp" }
+           { host "ftp.kernel.org" }
+           { username "slava" }
+           { password "secret" }
+           { path "/" }
+         }
+        "ftp://slava:secret@ftp.kernel.org/"
+    }
+    {
+        T{ url
+           { protocol "http" }
+           { host "foo.com" }
+           { path "/" }
+           { query LH{ { "a" f } } }
+         }
+        "http://foo.com/?a"
+    }
+    ! Capital letters, digits, hyphen, plus and period are allowed
+    ! characters in the scheme
+    ! part. https://tools.ietf.org/html/rfc1738#section-5
+    {
+        T{ url
+           { protocol "foo.bar" }
+           { host "www.google.com" }
+           { path "/" }
+         }
+        "foo.bar://www.google.com/"
+    }
+    {
+        T{ url
+           { protocol "foo.-bar" }
+           { host "www.google.com" }
+           { path "/" }
+         }
+        "foo.-bar://www.google.com/"
+    }
+    {
+        T{ url
+           { protocol "t1000" }
+           { host "www.google.com" }
+           { path "/" }
+         }
+        "t1000://www.google.com/"
+    }
+}
 
 urls [
     [ 1array ] [ [ >url ] curry ] bi* unit-test
@@ -293,3 +319,9 @@ urls [
     "baz" "baz" set-query-param
     present
 ] unit-test
+
+! Scheme characters are
+! case-insensitive. https://tools.ietf.org/html/rfc3986#section-3.1
+{ URL" http://www.google.com/" } [
+    URL" http://www.google.com/"
+] unit-test
index 5f31a0c27d613a33408af34df2203c937ef5fe46..858f4a57c94d36ab405c41024ecd9a3f6aaa49a8 100644 (file)
@@ -1,6 +1,6 @@
 ! Copyright (C) 2008, 2011 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: accessors arrays assocs combinators fry io.pathnames
+USING: accessors arrays ascii assocs combinators fry io.pathnames
 io.sockets io.sockets.secure kernel lexer linked-assocs make
 math.parser namespaces peg.ebnf present sequences splitting
 strings strings.parser urls.encoding vocabs.loader ;
@@ -37,7 +37,7 @@ M: url >url ;
 
 EBNF: parse-url
 
-protocol = [a-z+]+                  => [[ url-decode ]]
+protocol = [a-zA-Z0-9.+-]+          => [[ url-decode ]]
 username = [^/:@#?]+                => [[ url-decode ]]
 password = [^/:@#?]+                => [[ url-decode ]]
 pathname = [^#?]+                   => [[ url-decode ]]
@@ -66,7 +66,7 @@ M: string >url
     parse-url {
         [
             first [
-                [ first >>protocol ]
+                [ first >lower >>protocol ]
                 [
                     second
                     [ first [ first2 [ >>username ] [ >>password ] bi* ] when* ]