From 217af1d38db3e1e875180c6fa160f0fc80e46003 Mon Sep 17 00:00:00 2001
From: Victor Stinner
Date: Tue, 28 Aug 2018 09:35:25 +0200
Subject: [PATCH] bpo-34403, bpo-34207: Fix test_utf8_mode.test_cmd_line()
Make the test more generic: instead of hardcoding the encoding, get
the locale encoding at runtime, and then make sure that the command
line is properly decoded from the locale encoding.
Test also that the UTF-8 Mode decodes command line arguments from
UTF-8 with the C locale.
---
Lib/test/test_utf8_mode.py | 31 ++++++++++++++++++++-----------
1 file changed, 20 insertions(+), 11 deletions(-)
diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py
index 3e918fd54ce3ca..5af35aed614355 100644
--- a/Lib/test/test_utf8_mode.py
+++ b/Lib/test/test_utf8_mode.py
@@ -206,9 +206,6 @@ def test_locale_getpreferredencoding(self):
@unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
def test_cmd_line(self):
- arg = 'h\xe9\u20ac'.encode('utf-8')
- arg_utf8 = arg.decode('utf-8')
- arg_ascii = arg.decode('ascii', 'surrogateescape')
code = 'import locale, sys; print("%s:%s" % (locale.getpreferredencoding(), ascii(sys.argv[1:])))'
def check(utf8_opt, expected, **kw):
@@ -216,14 +213,26 @@ def check(utf8_opt, expected, **kw):
args = out.partition(':')[2].rstrip()
self.assertEqual(args, ascii(expected), out)
- check('utf8', [arg_utf8])
- if sys.platform == 'darwin' or support.is_android:
- c_arg = arg_utf8
- elif sys.platform.startswith("aix"):
- c_arg = arg.decode('iso-8859-1')
- else:
- c_arg = arg_ascii
- check('utf8=0', [c_arg], LC_ALL='C')
+ # UTF-8 Mode must use the UTF-8 encoding for any locale
+ arg = 'h\xe9\u20ac\U0010ffff'.encode('utf-8')
+ check('utf8', [arg.decode('utf-8')])
+ check('utf8', [arg.decode('utf-8')], LC_ALL='C')
+
+ # Non-ASCII byte string. Don't test Euro sign (U+20AC): Roman8 doesn't
+ # support it, and HP-UX uses Roman8 encoding for its C locale. The
+ # test just requires a single non-ASCII character to validate the code.
+ arg = b'h\xa7\xe9'
+
+ # Get the locale encoding when the UTF-8 mode is disabled
+ out = self.get_output('-X', 'utf8=0', '-c',
+ 'import locale; print(locale.getpreferredencoding())',
+ LC_ALL='C')
+ encoding = out.rstrip()
+
+ # Check that the command line is decoded from the locale encoding
+ with self.subTest(encoding=encoding):
+ check('utf8=0', [arg.decode(encoding, 'surrogateescape')],
+ LC_ALL='C')
def test_optim_level(self):
# CPython: check that Py_Main() doesn't increment Py_OptimizeFlag