From 6fb9dee470d57882e378247fd2706d5f9867b5f9 Mon Sep 17 00:00:00 2001 From: Ramiro Morales Date: Sun, 28 Dec 2014 23:02:14 -0300 Subject: [PATCH] Fixed #23271 -- Don't corrupt PO files on Windows when updating them. Make sure PO catalog text fetched from gettext programs via standard output isn't corrupted by mismatch between assumed (UTF-8) and real (CP1252) encodings. This can cause mojibake to be written when creating or updating PO files. Also fixes #23311. Thanks to contributor with Trac nick 'danielmenzel' for the report, excellent research and fix. --- .../core/management/commands/makemessages.py | 25 ++++++++++++---- tests/i18n/commands/__init__.py | 2 ++ .../locale/pt_BR/LC_MESSAGES/django.pristine | 16 ++++++++++ tests/i18n/test_extraction.py | 29 +++++++++++++++---- 4 files changed, 62 insertions(+), 10 deletions(-) create mode 100644 tests/i18n/commands/locale/pt_BR/LC_MESSAGES/django.pristine diff --git a/django/core/management/commands/makemessages.py b/django/core/management/commands/makemessages.py index 932bc0897e..6da1ae0ae6 100644 --- a/django/core/management/commands/makemessages.py +++ b/django/core/management/commands/makemessages.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import fnmatch import glob import io +import locale import os import re import sys @@ -30,6 +31,20 @@ def check_programs(*programs): "gettext tools 0.15 or newer installed." % program) +def gettext_popen_wrapper(args, os_err_exc_type=CommandError): + """ + Makes sure text obtained from stdout of gettext utilities contains valid + Unicode on Windows. + """ + stdout, stderr, status_code = popen_wrapper(args, os_err_exc_type=os_err_exc_type) + if os.name == 'nt': + # This looks weird because it's undoing what subprocess.Popen(universal_newlines=True).communicate() + # does when capturing PO files contents from stdout of gettext command line programs. See ticket #23271 + # for details. + stdout = stdout.encode(locale.getpreferredencoding(False)).decode('utf-8') + return stdout, stderr, status_code + + @total_ordering class TranslatableFile(object): def __init__(self, dirpath, file_name, locale_dir): @@ -115,7 +130,7 @@ class TranslatableFile(object): args.append(work_file) else: return - msgs, errors, status = popen_wrapper(args) + msgs, errors, status = gettext_popen_wrapper(args) if errors: if status != STATUS_OK: if is_templatized: @@ -309,7 +324,7 @@ class Command(BaseCommand): @cached_property def gettext_version(self): - out, err, status = popen_wrapper(['xgettext', '--version']) + out, err, status = gettext_popen_wrapper(['xgettext', '--version']) m = re.search(r'(\d)\.(\d+)\.?(\d+)?', out) if m: return tuple(int(d) for d in m.groups() if d is not None) @@ -334,7 +349,7 @@ class Command(BaseCommand): if not os.path.exists(potfile): continue args = ['msguniq'] + self.msguniq_options + [potfile] - msgs, errors, status = popen_wrapper(args) + msgs, errors, status = gettext_popen_wrapper(args) if six.PY2: msgs = msgs.decode('utf-8') if errors: @@ -426,7 +441,7 @@ class Command(BaseCommand): if os.path.exists(pofile): args = ['msgmerge'] + self.msgmerge_options + [pofile, potfile] - msgs, errors, status = popen_wrapper(args) + msgs, errors, status = gettext_popen_wrapper(args) if six.PY2: msgs = msgs.decode('utf-8') if errors: @@ -447,7 +462,7 @@ class Command(BaseCommand): if self.no_obsolete: args = ['msgattrib'] + self.msgattrib_options + ['-o', pofile, pofile] - msgs, errors, status = popen_wrapper(args) + msgs, errors, status = gettext_popen_wrapper(args) if errors: if status != STATUS_OK: raise CommandError( diff --git a/tests/i18n/commands/__init__.py b/tests/i18n/commands/__init__.py index 76249ca34d..d26751895f 100644 --- a/tests/i18n/commands/__init__.py +++ b/tests/i18n/commands/__init__.py @@ -10,3 +10,5 @@ dummy2 = _("This is another translatable string.") # shouldn't create a .po file with duplicate `Plural-Forms` headers number = 3 dummuy3 = ungettext("%(number)s Foo", "%(number)s Foos", number) % {'number': number} + +dummy4 = _('Size') diff --git a/tests/i18n/commands/locale/pt_BR/LC_MESSAGES/django.pristine b/tests/i18n/commands/locale/pt_BR/LC_MESSAGES/django.pristine new file mode 100644 index 0000000000..95607d679b --- /dev/null +++ b/tests/i18n/commands/locale/pt_BR/LC_MESSAGES/django.pristine @@ -0,0 +1,16 @@ +msgid "" +msgstr "" +"Project-Id-Version: \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2014-03-03 10:44+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: \n" +"Language-Team: \n" +"Language: pt_BR\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Plural-Forms: nplurals=2; plural=(n > 1);\n" + +msgid "Size" +msgstr "Größe" diff --git a/tests/i18n/test_extraction.py b/tests/i18n/test_extraction.py index 6f8fea1bfe..ad96f978bd 100644 --- a/tests/i18n/test_extraction.py +++ b/tests/i18n/test_extraction.py @@ -67,14 +67,21 @@ class ExtractorTests(SimpleTestCase): po_contents = fp.read() return output, po_contents - def assertMsgId(self, msgid, s, use_quotes=True): + def _assertPoKeyword(self, keyword, expected_value, haystack, use_quotes=True): q = '"' if use_quotes: - msgid = '"%s"' % msgid + expected_value = '"%s"' % expected_value q = "'" - needle = 'msgid %s' % msgid - msgid = re.escape(msgid) - return self.assertTrue(re.search('^msgid %s' % msgid, s, re.MULTILINE), 'Could not find %(q)s%(n)s%(q)s in generated PO file' % {'n': needle, 'q': q}) + needle = '%s %s' % (keyword, expected_value) + expected_value = re.escape(expected_value) + return self.assertTrue(re.search('^%s %s' % (keyword, expected_value), haystack, re.MULTILINE), + 'Could not find %(q)s%(n)s%(q)s in generated PO file' % {'n': needle, 'q': q}) + + def assertMsgId(self, msgid, haystack, use_quotes=True): + return self._assertPoKeyword('msgid', msgid, haystack, use_quotes=use_quotes) + + def assertMsgStr(self, msgstr, haystack, use_quotes=True): + return self._assertPoKeyword('msgstr', msgstr, haystack, use_quotes=use_quotes) def assertNotMsgId(self, msgid, s, use_quotes=True): if use_quotes: @@ -391,6 +398,18 @@ class BasicExtractorTests(ExtractorTests): with six.assertRaisesRegex(self, CommandError, "Unable to get gettext version. Is it installed?"): cmd.gettext_version + def test_po_file_encoding_when_updating(self): + """Update of PO file doesn't corrupt it with non-UTF-8 encoding on Python3+Windows (#23271)""" + BR_PO_BASE = 'locale/pt_BR/LC_MESSAGES/django' + os.chdir(self.test_dir) + shutil.copyfile(BR_PO_BASE + '.pristine', BR_PO_BASE + '.po') + self.addCleanup(self.rmfile, os.path.join(self.test_dir, 'locale', 'pt_BR', 'LC_MESSAGES', 'django.po')) + management.call_command('makemessages', locale=['pt_BR'], verbosity=0) + self.assertTrue(os.path.exists(BR_PO_BASE + '.po')) + with io.open(BR_PO_BASE + '.po', 'r', encoding='utf-8') as fp: + po_contents = force_text(fp.read()) + self.assertMsgStr("Größe", po_contents) + class JavascriptExtractorTests(ExtractorTests):