Fixed #23271 -- Don't corrupt PO files on Windows when updating them.

Make sure PO catalog text fetched from gettext programs via standard
output isn't corrupted by mismatch between assumed (UTF-8) and real
(CP1252) encodings. This can cause mojibake to be written when creating
or updating PO files.

Also fixes #23311.

Thanks to contributor with Trac nick 'danielmenzel' for the report,
excellent research and fix.
This commit is contained in:
Ramiro Morales 2014-12-28 23:02:14 -03:00
parent 1ee9507eb3
commit 6fb9dee470
4 changed files with 62 additions and 10 deletions

View File

@ -3,6 +3,7 @@ from __future__ import unicode_literals
import fnmatch
import glob
import io
import locale
import os
import re
import sys
@ -30,6 +31,20 @@ def check_programs(*programs):
"gettext tools 0.15 or newer installed." % program)
def gettext_popen_wrapper(args, os_err_exc_type=CommandError):
"""
Makes sure text obtained from stdout of gettext utilities contains valid
Unicode on Windows.
"""
stdout, stderr, status_code = popen_wrapper(args, os_err_exc_type=os_err_exc_type)
if os.name == 'nt':
# This looks weird because it's undoing what subprocess.Popen(universal_newlines=True).communicate()
# does when capturing PO files contents from stdout of gettext command line programs. See ticket #23271
# for details.
stdout = stdout.encode(locale.getpreferredencoding(False)).decode('utf-8')
return stdout, stderr, status_code
@total_ordering
class TranslatableFile(object):
def __init__(self, dirpath, file_name, locale_dir):
@ -115,7 +130,7 @@ class TranslatableFile(object):
args.append(work_file)
else:
return
msgs, errors, status = popen_wrapper(args)
msgs, errors, status = gettext_popen_wrapper(args)
if errors:
if status != STATUS_OK:
if is_templatized:
@ -309,7 +324,7 @@ class Command(BaseCommand):
@cached_property
def gettext_version(self):
out, err, status = popen_wrapper(['xgettext', '--version'])
out, err, status = gettext_popen_wrapper(['xgettext', '--version'])
m = re.search(r'(\d)\.(\d+)\.?(\d+)?', out)
if m:
return tuple(int(d) for d in m.groups() if d is not None)
@ -334,7 +349,7 @@ class Command(BaseCommand):
if not os.path.exists(potfile):
continue
args = ['msguniq'] + self.msguniq_options + [potfile]
msgs, errors, status = popen_wrapper(args)
msgs, errors, status = gettext_popen_wrapper(args)
if six.PY2:
msgs = msgs.decode('utf-8')
if errors:
@ -426,7 +441,7 @@ class Command(BaseCommand):
if os.path.exists(pofile):
args = ['msgmerge'] + self.msgmerge_options + [pofile, potfile]
msgs, errors, status = popen_wrapper(args)
msgs, errors, status = gettext_popen_wrapper(args)
if six.PY2:
msgs = msgs.decode('utf-8')
if errors:
@ -447,7 +462,7 @@ class Command(BaseCommand):
if self.no_obsolete:
args = ['msgattrib'] + self.msgattrib_options + ['-o', pofile, pofile]
msgs, errors, status = popen_wrapper(args)
msgs, errors, status = gettext_popen_wrapper(args)
if errors:
if status != STATUS_OK:
raise CommandError(

View File

@ -10,3 +10,5 @@ dummy2 = _("This is another translatable string.")
# shouldn't create a .po file with duplicate `Plural-Forms` headers
number = 3
dummuy3 = ungettext("%(number)s Foo", "%(number)s Foos", number) % {'number': number}
dummy4 = _('Size')

View File

@ -0,0 +1,16 @@
msgid ""
msgstr ""
"Project-Id-Version: \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2014-03-03 10:44+0100\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: \n"
"Language-Team: \n"
"Language: pt_BR\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=2; plural=(n > 1);\n"
msgid "Size"
msgstr "Größe"

View File

@ -67,14 +67,21 @@ class ExtractorTests(SimpleTestCase):
po_contents = fp.read()
return output, po_contents
def assertMsgId(self, msgid, s, use_quotes=True):
def _assertPoKeyword(self, keyword, expected_value, haystack, use_quotes=True):
q = '"'
if use_quotes:
msgid = '"%s"' % msgid
expected_value = '"%s"' % expected_value
q = "'"
needle = 'msgid %s' % msgid
msgid = re.escape(msgid)
return self.assertTrue(re.search('^msgid %s' % msgid, s, re.MULTILINE), 'Could not find %(q)s%(n)s%(q)s in generated PO file' % {'n': needle, 'q': q})
needle = '%s %s' % (keyword, expected_value)
expected_value = re.escape(expected_value)
return self.assertTrue(re.search('^%s %s' % (keyword, expected_value), haystack, re.MULTILINE),
'Could not find %(q)s%(n)s%(q)s in generated PO file' % {'n': needle, 'q': q})
def assertMsgId(self, msgid, haystack, use_quotes=True):
return self._assertPoKeyword('msgid', msgid, haystack, use_quotes=use_quotes)
def assertMsgStr(self, msgstr, haystack, use_quotes=True):
return self._assertPoKeyword('msgstr', msgstr, haystack, use_quotes=use_quotes)
def assertNotMsgId(self, msgid, s, use_quotes=True):
if use_quotes:
@ -391,6 +398,18 @@ class BasicExtractorTests(ExtractorTests):
with six.assertRaisesRegex(self, CommandError, "Unable to get gettext version. Is it installed?"):
cmd.gettext_version
def test_po_file_encoding_when_updating(self):
"""Update of PO file doesn't corrupt it with non-UTF-8 encoding on Python3+Windows (#23271)"""
BR_PO_BASE = 'locale/pt_BR/LC_MESSAGES/django'
os.chdir(self.test_dir)
shutil.copyfile(BR_PO_BASE + '.pristine', BR_PO_BASE + '.po')
self.addCleanup(self.rmfile, os.path.join(self.test_dir, 'locale', 'pt_BR', 'LC_MESSAGES', 'django.po'))
management.call_command('makemessages', locale=['pt_BR'], verbosity=0)
self.assertTrue(os.path.exists(BR_PO_BASE + '.po'))
with io.open(BR_PO_BASE + '.po', 'r', encoding='utf-8') as fp:
po_contents = force_text(fp.read())
self.assertMsgStr("Größe", po_contents)
class JavascriptExtractorTests(ExtractorTests):