From 0354ae8514baeccbdfda6c47163792365669d596 Mon Sep 17 00:00:00 2001
From: Victor Stinner <victor.stinner@gmail.com>
Date: Tue, 20 Jun 2017 17:20:37 +0200
Subject: [PATCH] bpo-30713: Reject newline in urllib.parse

The splittype(), splitport() and splithost() functions of the
urllib.parse module now reject URLs which contain a newline
character.
---
 Lib/test/test_urlparse.py | 27 +++++++++++++++++++++++++++
 Lib/urllib/parse.py       | 12 ++++++------
 Misc/NEWS                 |  3 +++
 3 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index e5f6130e4a075e..6beaa3d4f3b86b 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -981,6 +981,15 @@ def test_splittype(self):
         self.assertEqual(splittype('type:'), ('type', ''))
         self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
 
+        # bpo-30713: The newline character U+000A is invalid in URLs
+        for url in (
+            '\ntype:string',
+            'ty\npe:string',
+            'type:str\ning',
+            'type:string\n',
+        ):
+            self.assertEqual(splittype(url), (None, url))
+
     def test_splithost(self):
         splithost = urllib.parse.splithost
         self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
@@ -1010,6 +1019,15 @@ def test_splithost(self):
         self.assertEqual(splithost("//example.net/file#"),
                          ('example.net', '/file#'))
 
+        # bpo-30713: The newline character U+000A is invalid in URLs
+        for url in (
+            '\n//hostname/url',
+            '//host\nname/url',
+            '//hostname/u\nrl',
+            '//hostname/url\n',
+        ):
+            self.assertEqual(splithost(url), (None, url))
+
     def test_splituser(self):
         splituser = urllib.parse.splituser
         self.assertEqual(splituser('User:Pass@www.python.org:080'),
@@ -1052,6 +1070,15 @@ def test_splitport(self):
         self.assertEqual(splitport('[::1]'), ('[::1]', None))
         self.assertEqual(splitport(':88'), ('', '88'))
 
+        # bpo-30713: The newline character U+000A is invalid in URLs
+        for url in (
+            '\nparrot:88',
+            'par\nrot:88',
+            'parrot:8\n8',
+            'parrot:88\n',
+        ):
+            self.assertEqual(splitport(url), (url, None))
+
     def test_splitnport(self):
         splitnport = urllib.parse.splitnport
         self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 01eb54906c8a53..65bf7215e00d89 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -934,9 +934,9 @@ def splittype(url):
     """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
     global _typeprog
     if _typeprog is None:
-        _typeprog = re.compile('([^/:]+):(.*)', re.DOTALL)
+        _typeprog = re.compile('([^/:\n]+):(.*)')
 
-    match = _typeprog.match(url)
+    match = _typeprog.fullmatch(url)
     if match:
         scheme, data = match.groups()
         return scheme.lower(), data
@@ -947,9 +947,9 @@ def splithost(url):
     """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
     global _hostprog
     if _hostprog is None:
-        _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL)
+        _hostprog = re.compile('//([^/#?\n]*)(.*)')
 
-    match = _hostprog.match(url)
+    match = _hostprog.fullmatch(url)
     if match:
         host_port, path = match.groups()
         if path and path[0] != '/':
@@ -973,9 +973,9 @@ def splitport(host):
     """splitport('host:port') --> 'host', 'port'."""
     global _portprog
     if _portprog is None:
-        _portprog = re.compile('(.*):([0-9]*)$', re.DOTALL)
+        _portprog = re.compile('(.*):([0-9]*)')
 
-    match = _portprog.match(host)
+    match = _portprog.fullmatch(host)
     if match:
         host, port = match.groups()
         if port:
diff --git a/Misc/NEWS b/Misc/NEWS
index 88b1e3e22f7118..807e9bace6262e 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -368,6 +368,9 @@ Extension Modules
 Library
 -------
 
+- [Security] bpo-30713: The splittype(), splitport() and splithost() functions
+  of the urllib.parse module now reject URLs which contain a newline character.
+
 - bpo-29755: Fixed the lgettext() family of functions in the gettext module.
   They now always return bytes.