# HG changeset patch
# Parent e72aab0801650ae6ce20cee3e29b3f352a178efc
Issue #1621: Avoid signed integer overflow in str.join()
diff -r e72aab080165 Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py Sat Jul 23 11:16:56 2016 -0400
+++ b/Lib/test/test_unicode.py Sun Jul 24 09:47:45 2016 +0000
@@ -464,6 +464,13 @@
self.checkraises(TypeError, ' ', 'join', [1, 2, 3])
self.checkraises(TypeError, ' ', 'join', ['1', '2', 3])
+ @unittest.skipIf(sys.maxsize > 2**32,
+ 'needs too much memory on a 64-bit platform')
+ def test_join_overflow(self):
+ size = int(sys.maxsize**0.5) + 1
+ seq = ('A' * size,) * size
+ self.assertRaises(OverflowError, ''.join, seq)
+
def test_replace(self):
string_tests.CommonTest.test_replace(self)
diff -r e72aab080165 Misc/NEWS
--- a/Misc/NEWS Sat Jul 23 11:16:56 2016 -0400
+++ b/Misc/NEWS Sun Jul 24 09:47:45 2016 +0000
@@ -10,6 +10,8 @@
Core and Builtins
-----------------
+- Issue #1621: Avoid signed integer overflow in the str.join() method.
+
- Issue #27507: Add integer overflow check in bytearray.extend(). Patch by
Xiang Zhang.
diff -r e72aab080165 Objects/unicodeobject.c
--- a/Objects/unicodeobject.c Sat Jul 23 11:16:56 2016 -0400
+++ b/Objects/unicodeobject.c Sun Jul 24 09:47:45 2016 +0000
@@ -9913,7 +9913,7 @@
use_memcpy = 1;
#endif
for (i = 0; i < seqlen; i++) {
- const Py_ssize_t old_sz = sz;
+ size_t add_sz; /* Maximum value is double that of Py_ssize_t */
item = items[i];
if (!PyUnicode_Check(item)) {
PyErr_Format(PyExc_TypeError,
@@ -9924,16 +9924,17 @@
}
if (PyUnicode_READY(item) == -1)
goto onError;
- sz += PyUnicode_GET_LENGTH(item);
+ add_sz = PyUnicode_GET_LENGTH(item);
item_maxchar = PyUnicode_MAX_CHAR_VALUE(item);
maxchar = Py_MAX(maxchar, item_maxchar);
if (i != 0)
- sz += seplen;
- if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
+ add_sz += seplen;
+ if (add_sz > (size_t)(PY_SSIZE_T_MAX - sz)) {
PyErr_SetString(PyExc_OverflowError,
"join() result is too long for a Python string");
goto onError;
}
+ sz += add_sz;
if (use_memcpy && last_obj != NULL) {
if (PyUnicode_KIND(last_obj) != PyUnicode_KIND(item))
use_memcpy = 0;