diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index e5f6130e4a075e..84bdd8c99b532c 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -981,6 +981,15 @@ def test_splittype(self): self.assertEqual(splittype('type:'), ('type', '')) self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string')) + # bpo-30713: The newline character U+000A is invalid in URLs + for url in ( + '\ntype:string', + 'ty\npe:string', + ): + self.assertEqual(splittype(url), (None, url)) + self.assertEqual(splittype('data:xxx\nyyy'), ('data', 'xxx\nyyy')) + self.assertEqual(splittype('data:xxxyyy\n'), ('data', 'xxxyyy\n')) + def test_splithost(self): splithost = urllib.parse.splithost self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'), @@ -1010,6 +1019,15 @@ def test_splithost(self): self.assertEqual(splithost("//example.net/file#"), ('example.net', '/file#')) + # bpo-30713: The newline character U+000A is invalid in URLs + for url in ( + '\n//hostname/url', + '//host\nname/url', + '//hostname/u\nrl', + '//hostname/url\n', + ): + self.assertEqual(splithost(url), (None, url)) + def test_splituser(self): splituser = urllib.parse.splituser self.assertEqual(splituser('User:Pass@www.python.org:080'), @@ -1052,6 +1070,15 @@ def test_splitport(self): self.assertEqual(splitport('[::1]'), ('[::1]', None)) self.assertEqual(splitport(':88'), ('', '88')) + # bpo-30713: The newline character U+000A is invalid in URLs + for url in ( + '\nparrot:88', + 'par\nrot:88', + 'parrot:8\n8', + 'parrot:88\n', + ): + self.assertEqual(splitport(url), (url, None)) + def test_splitnport(self): splitnport = urllib.parse.splitnport self.assertEqual(splitnport('parrot:88'), ('parrot', 88)) diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 01eb54906c8a53..3ed8b75bfbc3f4 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -934,9 +934,9 @@ def splittype(url): """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" global _typeprog if _typeprog is None: - _typeprog = re.compile('([^/:]+):(.*)', re.DOTALL) + _typeprog = re.compile('([^/:\n]+):(.*)', re.DOTALL) - match = _typeprog.match(url) + match = _typeprog.fullmatch(url) if match: scheme, data = match.groups() return scheme.lower(), data @@ -947,9 +947,9 @@ def splithost(url): """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" global _hostprog if _hostprog is None: - _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL) + _hostprog = re.compile('//([^/#?\n]*)(.*)') - match = _hostprog.match(url) + match = _hostprog.fullmatch(url) if match: host_port, path = match.groups() if path and path[0] != '/': @@ -973,9 +973,9 @@ def splitport(host): """splitport('host:port') --> 'host', 'port'.""" global _portprog if _portprog is None: - _portprog = re.compile('(.*):([0-9]*)$', re.DOTALL) + _portprog = re.compile('(.*):([0-9]*)') - match = _portprog.match(host) + match = _portprog.fullmatch(host) if match: host, port = match.groups() if port: diff --git a/Misc/NEWS.d/next/Security/2017-06-28-03-50-42.bpo-30713.9tfV5r.rst b/Misc/NEWS.d/next/Security/2017-06-28-03-50-42.bpo-30713.9tfV5r.rst new file mode 100644 index 00000000000000..245d42bb10246f --- /dev/null +++ b/Misc/NEWS.d/next/Security/2017-06-28-03-50-42.bpo-30713.9tfV5r.rst @@ -0,0 +1,3 @@ +The splittype(), splitport() and splithost() functions of the urllib.parse +module now reject URLs which contain a newline character, but splittype() +accepts newlines after the type.