diff -r b07400659dba Lib/test/test_urlparse.py
--- a/Lib/test/test_urlparse.py Thu Mar 06 17:06:41 2014 +0100
+++ b/Lib/test/test_urlparse.py Fri Mar 07 17:15:21 2014 +0100
@@ -533,28 +533,23 @@ class UrlParseTestCase(unittest.TestCase
self.assertEqual(p.geturl(), url)
# Verify an illegal port is returned as None
- url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
- p = urllib.parse.urlsplit(url)
- self.assertEqual(p.port, None)
+
+ with self.assertRaises(ValueError) as cm:
+ urllib.parse.urlsplit(b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag")
+ self.assertRegex(str(cm.exception), '^Invalid port number: ')
def test_attributes_bad_port(self):
"""Check handling of non-integer ports."""
- p = urllib.parse.urlsplit("http://www.example.net:foo")
- self.assertEqual(p.netloc, "www.example.net:foo")
- self.assertRaises(ValueError, lambda: p.port)
-
- p = urllib.parse.urlparse("http://www.example.net:foo")
- self.assertEqual(p.netloc, "www.example.net:foo")
- self.assertRaises(ValueError, lambda: p.port)
+ self.assertRaises(ValueError,
+ urllib.parse.urlsplit, "http://www.example.net:foo")
+ self.assertRaises(ValueError,
+ urllib.parse.urlparse, "http://www.example.net:foo")
# Once again, repeat ourselves to test bytes
- p = urllib.parse.urlsplit(b"http://www.example.net:foo")
- self.assertEqual(p.netloc, b"www.example.net:foo")
- self.assertRaises(ValueError, lambda: p.port)
-
- p = urllib.parse.urlparse(b"http://www.example.net:foo")
- self.assertEqual(p.netloc, b"www.example.net:foo")
- self.assertRaises(ValueError, lambda: p.port)
+ self.assertRaises(ValueError,
+ urllib.parse.urlsplit, b"http://www.example.net:foo")
+ self.assertRaises(ValueError,
+ urllib.parse.urlsplit, b"http://www.example.net:foo")
def test_attributes_without_netloc(self):
# This example is straight from RFC 3261. It looks like it
@@ -755,7 +750,13 @@ class UrlParseTestCase(unittest.TestCase
self.assertEqual(splitport('parrot'), ('parrot', None))
self.assertEqual(splitport('parrot:'), ('parrot', None))
self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
- self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
+
+ # invalid host (":")
+ self.assertRaises(ValueError, splitport, '::1')
+ # invalid port number ("cheese" is not an integer)
+ self.assertRaises(ValueError, splitport, 'parrot:cheese')
+ # 123456789 is an invalid port number
+ self.assertRaises(ValueError, splitport, 'host:123456789')
def test_splitnport(self):
splitnport = urllib.parse.splitnport
@@ -871,6 +872,15 @@ class UrlParseTestCase(unittest.TestCase
quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE)
self.assertIn('Quoter', repr(quoter))
+ def test_invalid_ipv6(self):
+ urls = (
+ 'http://::1/',
+ 'http://[127.0.0.1]/',
+ 'http://[host]/',
+ )
+ for url in urls:
+ self.assertRaises(ValueError, urllib.parse.urlparse, url)
+
def test_main():
support.run_unittest(UrlParseTestCase)
diff -r b07400659dba Lib/urllib/parse.py
--- a/Lib/urllib/parse.py Thu Mar 06 17:06:41 2014 +0100
+++ b/Lib/urllib/parse.py Fri Mar 07 17:15:21 2014 +0100
@@ -27,9 +27,10 @@ parsing quirks from older RFCs are retai
test_urlparse.py provides a good indicator of parsing behavior.
"""
+import collections
+import ipaddress
import re
import sys
-import collections
__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
"urlsplit", "urlunsplit", "urlencode", "parse_qs",
@@ -314,7 +315,12 @@ def _splitnetloc(url, start=0):
wdelim = url.find(c, start) # find first of this delim
if wdelim >= 0: # if found
delim = min(delim, wdelim) # use earliest delim position
- return url[start:delim], url[delim:] # return (domain, rest)
+ netloc, url = url[start:delim], url[delim:] # return (domain, rest)
+
+ # call splitport() to validate netloc
+ splitport(netloc)
+
+ return netloc, url
def urlsplit(url, scheme='', allow_fragments=True):
"""Parse a URL into 5 components:
@@ -338,9 +344,6 @@ def urlsplit(url, scheme='', allow_fragm
url = url[i+1:]
if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
- if (('[' in netloc and ']' not in netloc) or
- (']' in netloc and '[' not in netloc)):
- raise ValueError("Invalid IPv6 URL")
if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
if '?' in url:
@@ -361,9 +364,6 @@ def urlsplit(url, scheme='', allow_fragm
if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
- if (('[' in netloc and ']' not in netloc) or
- (']' in netloc and '[' not in netloc)):
- raise ValueError("Invalid IPv6 URL")
if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
if '?' in url:
@@ -894,18 +894,39 @@ def splitpasswd(user):
# splittag('/path#tag') --> '/path', 'tag'
_portprog = None
-def splitport(host):
+def splitport(netloc):
"""splitport('host:port') --> 'host', 'port'."""
- global _portprog
- if _portprog is None:
- _portprog = re.compile('^(.*):([0-9]*)$')
+ auth = max(netloc.rfind('@'), 0)
+ pos = max(netloc.rfind(']'), auth)
+ pos = netloc.rfind(':', pos)
+ if pos != -1:
+ full_host = netloc[:pos]
+ port = netloc[pos+1:]
+ if port:
+ try:
+ number = int(port)
+ if not(1 <= number <= 65535):
+ raise ValueError
+ except ValueError:
+ raise ValueError("Invalid port number: %r" % port) from None
+ else:
+ port = None
+ else:
+ full_host = netloc
+ port = None
- match = _portprog.match(host)
- if match:
- host, port = match.groups()
- if port:
- return host, port
- return host, None
+ host = full_host[auth:]
+ if host.startswith('[') and host.endswith(']'):
+ ipv6 = host[1:-1]
+ try:
+ ipaddress.IPv6Address(ipv6)
+ except ValueError:
+ raise ValueError("Invalid IPv6 URL: %r" % ipv6) from None
+ elif re.search('[][:]', host):
+ # host must not contain '[', ']' or ':'
+ raise ValueError("Invalid host: %r" % host)
+
+ return full_host, port
_nportprog = None
def splitnport(host, defport=-1):