Fixed #27007 -- Handled non-UTF-8 bytes objects for text/* attachments.
The fallback logic which allows non-UTF-8 encoded files to be passed to attach_file() even when a `text/*` mime type has been specified is moved to attach(). Both functions now fall back to a content type of `application/octet-stream`. A side effect is that a file's content is decoded in memory instead of opening it in text mode and reading it into a string. Some mimetype-related logic in _create_attachment() has become obsolete as the code moved from attach_file() to attach() already handles this.
This commit is contained in:
parent
311a8e8d50
commit
72d541b61c
1
AUTHORS
1
AUTHORS
|
@ -519,6 +519,7 @@ answer newbie questions, and generally made Django that much better:
|
|||
michael.mcewan@gmail.com
|
||||
Michael Placentra II <someone@michaelplacentra2.net>
|
||||
Michael Radziej <mir@noris.de>
|
||||
Michael Schwarz <michi.schwarz@gmail.com>
|
||||
Michael Thornhill <michael.thornhill@gmail.com>
|
||||
Michal Chruszcz <troll@pld-linux.org>
|
||||
michal@plovarna.cz
|
||||
|
|
|
@ -356,6 +356,11 @@ class EmailMessage(object):
|
|||
|
||||
If the first parameter is a MIMEBase subclass it is inserted directly
|
||||
into the resulting message attachments.
|
||||
|
||||
For a text/* mimetype (guessed or specified), when a bytes object is
|
||||
specified as content, it will be decoded as UTF-8. If that fails,
|
||||
the mimetype will be set to DEFAULT_ATTACHMENT_MIME_TYPE and the
|
||||
content is not decoded.
|
||||
"""
|
||||
if isinstance(filename, MIMEBase):
|
||||
assert content is None
|
||||
|
@ -363,6 +368,22 @@ class EmailMessage(object):
|
|||
self.attachments.append(filename)
|
||||
else:
|
||||
assert content is not None
|
||||
|
||||
if not mimetype:
|
||||
mimetype, _ = mimetypes.guess_type(filename)
|
||||
if not mimetype:
|
||||
mimetype = DEFAULT_ATTACHMENT_MIME_TYPE
|
||||
basetype, subtype = mimetype.split('/', 1)
|
||||
|
||||
if basetype == 'text':
|
||||
if isinstance(content, six.binary_type):
|
||||
try:
|
||||
content = content.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
# If mimetype suggests the file is text but it's actually
|
||||
# binary, read() will raise a UnicodeDecodeError on Python 3.
|
||||
mimetype = DEFAULT_ATTACHMENT_MIME_TYPE
|
||||
|
||||
self.attachments.append((filename, content, mimetype))
|
||||
|
||||
def attach_file(self, path, mimetype=None):
|
||||
|
@ -370,33 +391,17 @@ class EmailMessage(object):
|
|||
Attaches a file from the filesystem.
|
||||
|
||||
The mimetype will be set to the DEFAULT_ATTACHMENT_MIME_TYPE if it is
|
||||
not specified and cannot be guessed or (PY3 only) if it suggests
|
||||
text/* for a binary file.
|
||||
not specified and cannot be guessed.
|
||||
|
||||
For a text/* mimetype (guessed or specified), the file's content
|
||||
will be decoded as UTF-8. If that fails, the mimetype will be set to
|
||||
DEFAULT_ATTACHMENT_MIME_TYPE and the content is not decoded.
|
||||
"""
|
||||
filename = os.path.basename(path)
|
||||
if not mimetype:
|
||||
mimetype, _ = mimetypes.guess_type(filename)
|
||||
if not mimetype:
|
||||
mimetype = DEFAULT_ATTACHMENT_MIME_TYPE
|
||||
basetype, subtype = mimetype.split('/', 1)
|
||||
read_mode = 'r' if basetype == 'text' else 'rb'
|
||||
content = None
|
||||
|
||||
with open(path, read_mode) as f:
|
||||
try:
|
||||
content = f.read()
|
||||
except UnicodeDecodeError:
|
||||
# If mimetype suggests the file is text but it's actually
|
||||
# binary, read() will raise a UnicodeDecodeError on Python 3.
|
||||
pass
|
||||
|
||||
# If the previous read in text mode failed, try binary mode.
|
||||
if content is None:
|
||||
with open(path, 'rb') as f:
|
||||
content = f.read()
|
||||
mimetype = DEFAULT_ATTACHMENT_MIME_TYPE
|
||||
|
||||
self.attach(filename, content, mimetype)
|
||||
with open(path, 'rb') as file:
|
||||
content = file.read()
|
||||
self.attach(filename, content, mimetype)
|
||||
|
||||
def _create_message(self, msg):
|
||||
return self._create_attachments(msg)
|
||||
|
@ -450,10 +455,6 @@ class EmailMessage(object):
|
|||
Converts the filename, content, mimetype triple into a MIME attachment
|
||||
object.
|
||||
"""
|
||||
if mimetype is None:
|
||||
mimetype, _ = mimetypes.guess_type(filename)
|
||||
if mimetype is None:
|
||||
mimetype = DEFAULT_ATTACHMENT_MIME_TYPE
|
||||
attachment = self._create_mime_attachment(content, mimetype)
|
||||
if filename:
|
||||
try:
|
||||
|
|
|
@ -162,6 +162,10 @@ Email
|
|||
* Added the :setting:`EMAIL_USE_LOCALTIME` setting to allow sending SMTP date
|
||||
headers in the local time zone rather than in UTC.
|
||||
|
||||
* ``EmailMessage.attach()`` and ``attach_file()`` now fall back to MIME type
|
||||
``application/octet-stream`` when binary content that can't be decoded as
|
||||
UTF-8 is specified for a ``text/*`` attachment.
|
||||
|
||||
File Storage
|
||||
~~~~~~~~~~~~
|
||||
|
||||
|
|
|
@ -345,6 +345,11 @@ The class has the following methods:
|
|||
If you specify a ``mimetype`` of ``message/rfc822``, it will also accept
|
||||
:class:`django.core.mail.EmailMessage` and :py:class:`email.message.Message`.
|
||||
|
||||
For a ``mimetype`` starting with ``text/``, content is expected to be a
|
||||
string. Binary data will be decoded using UTF-8, and if that fails, the
|
||||
MIME type will be changed to ``application/octet-stream`` and the data will
|
||||
be attached unchanged.
|
||||
|
||||
In addition, ``message/rfc822`` attachments will no longer be
|
||||
base64-encoded in violation of :rfc:`2046#section-5.2.1`, which can cause
|
||||
issues with displaying the attachments in `Evolution`__ and `Thunderbird`__.
|
||||
|
@ -359,6 +364,14 @@ The class has the following methods:
|
|||
|
||||
message.attach_file('/images/weather_map.png')
|
||||
|
||||
For MIME types starting with ``text/``, binary data is handled as in
|
||||
``attach()``.
|
||||
|
||||
.. versionchanged:: 1.11
|
||||
|
||||
Added the fallback to MIME type ``application/octet-stream`` when binary
|
||||
data for a ``text/*`` attachment cannot be decoded.
|
||||
|
||||
Sending alternative content types
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
|
|
@ -422,6 +422,31 @@ class MailTests(HeadersCheckMixin, SimpleTestCase):
|
|||
self.assertEqual(content, b'file content')
|
||||
self.assertEqual(mimetype, 'text/plain')
|
||||
|
||||
def test_attach_utf8_text_as_bytes(self):
|
||||
"""
|
||||
Non-ASCII characters encoded as valid UTF-8 are correctly transported
|
||||
and decoded.
|
||||
"""
|
||||
msg = EmailMessage('subject', 'body', 'from@example.com', ['to@example.com'])
|
||||
msg.attach('file.txt', b'\xc3\xa4') # UTF-8 encoded a umlaut.
|
||||
filename, content, mimetype = self.get_decoded_attachments(msg)[0]
|
||||
self.assertEqual(filename, 'file.txt')
|
||||
self.assertEqual(content, b'\xc3\xa4')
|
||||
self.assertEqual(mimetype, 'text/plain')
|
||||
|
||||
def test_attach_non_utf8_text_as_bytes(self):
|
||||
"""
|
||||
Binary data that can't be decoded as UTF-8 overrides the MIME type
|
||||
instead of decoding the data.
|
||||
"""
|
||||
msg = EmailMessage('subject', 'body', 'from@example.com', ['to@example.com'])
|
||||
msg.attach('file.txt', b'\xff') # Invalid UTF-8.
|
||||
filename, content, mimetype = self.get_decoded_attachments(msg)[0]
|
||||
self.assertEqual(filename, 'file.txt')
|
||||
# Content should be passed through unmodified.
|
||||
self.assertEqual(content, b'\xff')
|
||||
self.assertEqual(mimetype, 'application/octet-stream')
|
||||
|
||||
def test_dummy_backend(self):
|
||||
"""
|
||||
Make sure that dummy backends returns correct number of sent messages
|
||||
|
|
Loading…
Reference in New Issue