Merge pull request #2126 from apollo13/email_bytes

Many thanks to @bitdancer and @aaugustin for answering my stupid questions about (mail)encodings and pointing me in the right direction.
This commit is contained in:
Florian Apolloner 2013-12-30 14:58:49 -08:00
commit df075c7489
5 changed files with 76 additions and 61 deletions

View File

@ -5,6 +5,7 @@ import sys
import threading
from django.core.mail.backends.base import BaseEmailBackend
from django.utils import six
class EmailBackend(BaseEmailBackend):
@ -13,6 +14,16 @@ class EmailBackend(BaseEmailBackend):
self._lock = threading.RLock()
super(EmailBackend, self).__init__(*args, **kwargs)
def write_message(self, message):
msg = message.message()
msg_data = msg.as_bytes()
if six.PY3:
charset = msg.get_charset().get_output_charset() if msg.get_charset() else 'utf-8'
msg_data = msg_data.decode(charset)
self.stream.write('%s\n' % msg_data)
self.stream.write('-' * 79)
self.stream.write('\n')
def send_messages(self, email_messages):
"""Write all messages to the stream in a thread-safe way."""
if not email_messages:
@ -22,9 +33,7 @@ class EmailBackend(BaseEmailBackend):
try:
stream_created = self.open()
for message in email_messages:
self.stream.write('%s\n' % message.message().as_string())
self.stream.write('-' * 79)
self.stream.write('\n')
self.write_message(message)
self.stream.flush() # flush after each message
msg_count += 1
if stream_created:

View File

@ -38,6 +38,11 @@ class EmailBackend(ConsoleEmailBackend):
kwargs['stream'] = None
super(EmailBackend, self).__init__(*args, **kwargs)
def write_message(self, message):
self.stream.write(message.message().as_bytes() + b'\n')
self.stream.write(b'-' * 79)
self.stream.write(b'\n')
def _get_filename(self):
"""Return a unique file name."""
if self._fname is None:
@ -48,7 +53,7 @@ class EmailBackend(ConsoleEmailBackend):
def open(self):
if self.stream is None:
self.stream = open(self._get_filename(), 'a')
self.stream = open(self._get_filename(), 'ab')
return True
return False

View File

@ -7,7 +7,6 @@ from django.conf import settings
from django.core.mail.backends.base import BaseEmailBackend
from django.core.mail.utils import DNS_NAME
from django.core.mail.message import sanitize_address
from django.utils.encoding import force_bytes
class EmailBackend(BaseEmailBackend):
@ -111,10 +110,8 @@ class EmailBackend(BaseEmailBackend):
recipients = [sanitize_address(addr, email_message.encoding)
for addr in email_message.recipients()]
message = email_message.message()
charset = message.get_charset().get_output_charset() if message.get_charset() else 'utf-8'
try:
self.connection.sendmail(from_email, recipients,
force_bytes(message.as_string(), charset))
self.connection.sendmail(from_email, recipients, message.as_bytes())
except smtplib.SMTPException:
if not self.fail_silently:
raise

View File

@ -131,21 +131,25 @@ class MIMEMixin():
This overrides the default as_string() implementation to not mangle
lines that begin with 'From '. See bug #13433 for details.
"""
# Using a normal Generator on python 3 will yield a string, which will
# get base64 encoded in some cases to ensure that it's always convertable
# to ascii. We don't want base64 encoded emails, so we use a BytesGenertor
# which will do the right thing and then decode according to our known
# encoding. See #21093 and #3472 for details.
if six.PY3 and sys.version_info >= (3, 3, 3):
fp = six.StringIO()
g = generator.Generator(fp, mangle_from_=False)
g.flatten(self, unixfrom=unixfrom)
return fp.getvalue()
if six.PY2:
as_bytes = as_string
else:
def as_bytes(self, unixfrom=False):
"""Return the entire formatted message as bytes.
Optional `unixfrom' when True, means include the Unix From_ envelope
header.
This overrides the default as_bytes() implementation to not mangle
lines that begin with 'From '. See bug #13433 for details.
"""
fp = six.BytesIO()
g = generator.BytesGenerator(fp, mangle_from_=False)
g.flatten(self, unixfrom=unixfrom)
encoding = self.get_charset().get_output_charset() if self.get_charset() else 'utf-8'
return fp.getvalue().decode(encoding)
else:
fp = six.StringIO()
g = generator.Generator(fp, mangle_from_=False)
g.flatten(self, unixfrom=unixfrom)
return fp.getvalue()
@ -167,9 +171,8 @@ class SafeMIMEText(MIMEMixin, MIMEText):
# We do it manually and trigger re-encoding of the payload.
MIMEText.__init__(self, text, subtype, None)
del self['Content-Transfer-Encoding']
# Work around a bug in python 3.3.3 [sic], see
# http://bugs.python.org/issue19063 for details.
if sys.version_info[:3] == (3, 3, 3):
# Workaround for versions without http://bugs.python.org/issue19063
if (3, 2) < sys.version_info < (3, 3, 4):
payload = text.encode(utf8_charset.output_charset)
self._payload = payload.decode('ascii', 'surrogateescape')
self.set_charset(utf8_charset)

View File

@ -2,7 +2,6 @@
from __future__ import unicode_literals
import asyncore
from email import message_from_file, message_from_string
from email.mime.text import MIMEText
import os
import shutil
@ -20,14 +19,17 @@ from django.core.mail.backends import console, dummy, locmem, filebased, smtp
from django.core.mail.message import BadHeaderError
from django.test import SimpleTestCase
from django.test import override_settings
from django.utils.encoding import force_str, force_text
from django.utils.six import PY3, StringIO, string_types
from django.utils.encoding import force_text, force_bytes
from django.utils.six import PY3, StringIO, binary_type
from django.utils.translation import ugettext_lazy
if PY3:
from email.utils import parseaddr
from email import message_from_bytes, message_from_binary_file
else:
from email.Utils import parseaddr
from email import (message_from_string as message_from_bytes,
message_from_file as message_from_binary_file)
class HeadersCheckMixin(object):
@ -40,13 +42,9 @@ class HeadersCheckMixin(object):
string with the contens of an email message.
:param headers: should be a set of (header-name, header-value) tuples.
"""
if isinstance(message, string_types):
just_headers = message.split('\n\n', 1)[0]
hlist = just_headers.split('\n')
pairs = [hl.split(':', 1) for hl in hlist]
msg_headers = {(n, v.lstrip()) for (n, v) in pairs}
else:
msg_headers = set(message.items())
if isinstance(message, binary_type):
message = message_from_bytes(message)
msg_headers = set(message.items())
self.assertTrue(headers.issubset(msg_headers), msg='Message is missing '
'the following headers: %s' % (headers - msg_headers),)
@ -231,13 +229,13 @@ class MailTests(HeadersCheckMixin, SimpleTestCase):
('MIME-Version', '1.0'),
('Content-Type', 'text/plain; charset="iso-8859-1"'),
('Content-Transfer-Encoding', 'quoted-printable')})
self.assertTrue(payload0.as_string().endswith('\n\nFirstname S=FCrname is a great guy.'))
self.assertTrue(payload0.as_bytes().endswith(b'\n\nFirstname S=FCrname is a great guy.'))
payload1 = msg.message().get_payload(1)
self.assertMessageHasHeaders(payload1, {
('MIME-Version', '1.0'),
('Content-Type', 'text/html; charset="iso-8859-1"'),
('Content-Transfer-Encoding', 'quoted-printable')})
self.assertTrue(payload1.as_string().endswith('\n\n<p>Firstname S=FCrname is a <strong>great</strong> guy.</p>'))
self.assertTrue(payload1.as_bytes().endswith(b'\n\n<p>Firstname S=FCrname is a <strong>great</strong> guy.</p>'))
def test_attachments(self):
"""Regression test for #9367"""
@ -248,8 +246,8 @@ class MailTests(HeadersCheckMixin, SimpleTestCase):
msg = EmailMultiAlternatives(subject, text_content, from_email, [to], headers=headers)
msg.attach_alternative(html_content, "text/html")
msg.attach("an attachment.pdf", b"%PDF-1.4.%...", mimetype="application/pdf")
msg_str = msg.message().as_string()
message = message_from_string(msg_str)
msg_bytes = msg.message().as_bytes()
message = message_from_bytes(msg_bytes)
self.assertTrue(message.is_multipart())
self.assertEqual(message.get_content_type(), 'multipart/mixed')
self.assertEqual(message.get_default_type(), 'text/plain')
@ -265,8 +263,8 @@ class MailTests(HeadersCheckMixin, SimpleTestCase):
msg = EmailMessage(subject, content, from_email, [to], headers=headers)
# Unicode in file name
msg.attach("une pièce jointe.pdf", b"%PDF-1.4.%...", mimetype="application/pdf")
msg_str = msg.message().as_string()
message = message_from_string(msg_str)
msg_bytes = msg.message().as_bytes()
message = message_from_bytes(msg_bytes)
payload = message.get_payload()
self.assertEqual(payload[1].get_filename(), 'une pièce jointe.pdf')
@ -348,31 +346,31 @@ class MailTests(HeadersCheckMixin, SimpleTestCase):
# Regression for #13433 - Make sure that EmailMessage doesn't mangle
# 'From ' in message body.
email = EmailMessage('Subject', 'From the future', 'bounce@example.com', ['to@example.com'], headers={'From': 'from@example.com'})
self.assertFalse('>From the future' in email.message().as_string())
self.assertFalse(b'>From the future' in email.message().as_bytes())
def test_dont_base64_encode(self):
# Ticket #3472
# Shouldn't use Base64 encoding at all
msg = EmailMessage('Subject', 'UTF-8 encoded body', 'bounce@example.com', ['to@example.com'], headers={'From': 'from@example.com'})
self.assertFalse('Content-Transfer-Encoding: base64' in msg.message().as_string())
self.assertFalse(b'Content-Transfer-Encoding: base64' in msg.message().as_bytes())
# Ticket #11212
# Shouldn't use quoted printable, should detect it can represent content with 7 bit data
msg = EmailMessage('Subject', 'Body with only ASCII characters.', 'bounce@example.com', ['to@example.com'], headers={'From': 'from@example.com'})
s = msg.message().as_string()
self.assertFalse('Content-Transfer-Encoding: quoted-printable' in s)
self.assertTrue('Content-Transfer-Encoding: 7bit' in s)
s = msg.message().as_bytes()
self.assertFalse(b'Content-Transfer-Encoding: quoted-printable' in s)
self.assertTrue(b'Content-Transfer-Encoding: 7bit' in s)
# Shouldn't use quoted printable, should detect it can represent content with 8 bit data
msg = EmailMessage('Subject', 'Body with latin characters: àáä.', 'bounce@example.com', ['to@example.com'], headers={'From': 'from@example.com'})
s = msg.message().as_string()
self.assertFalse(str('Content-Transfer-Encoding: quoted-printable') in s)
self.assertTrue(str('Content-Transfer-Encoding: 8bit') in s)
s = msg.message().as_bytes()
self.assertFalse(b'Content-Transfer-Encoding: quoted-printable' in s)
self.assertTrue(b'Content-Transfer-Encoding: 8bit' in s)
msg = EmailMessage('Subject', 'Body with non latin characters: А Б В Г Д Е Ж Ѕ З И І К Л М Н О П.', 'bounce@example.com', ['to@example.com'], headers={'From': 'from@example.com'})
s = msg.message().as_string()
self.assertFalse(str('Content-Transfer-Encoding: quoted-printable') in s)
self.assertTrue(str('Content-Transfer-Encoding: 8bit') in s)
s = msg.message().as_bytes()
self.assertFalse(b'Content-Transfer-Encoding: quoted-printable' in s)
self.assertTrue(b'Content-Transfer-Encoding: 8bit' in s)
def test_dont_base64_encode_message_rfc822(self):
# Ticket #18967
@ -476,7 +474,7 @@ class BaseEmailBackendTests(HeadersCheckMixin, object):
self.assertEqual(num_sent, 1)
message = self.get_the_message()
self.assertEqual(message["subject"], '=?utf-8?q?Ch=C3=A8re_maman?=')
self.assertEqual(force_text(message.get_payload()), 'Je t\'aime très fort')
self.assertEqual(force_text(message.get_payload(decode=True)), 'Je t\'aime très fort')
def test_send_many(self):
email1 = EmailMessage('Subject', 'Content1', 'from@example.com', ['to@example.com'])
@ -692,9 +690,9 @@ class FileBackendTests(BaseEmailBackendTests, SimpleTestCase):
def get_mailbox_content(self):
messages = []
for filename in os.listdir(self.tmp_dir):
with open(os.path.join(self.tmp_dir, filename), 'r') as fp:
session = force_text(fp.read()).split('\n' + ('-' * 79) + '\n')
messages.extend(message_from_string(force_str(m)) for m in session if m)
with open(os.path.join(self.tmp_dir, filename), 'rb') as fp:
session = fp.read().split(force_bytes('\n' + ('-' * 79) + '\n', encoding='ascii'))
messages.extend(message_from_bytes(m) for m in session if m)
return messages
def test_file_sessions(self):
@ -704,8 +702,8 @@ class FileBackendTests(BaseEmailBackendTests, SimpleTestCase):
connection.send_messages([msg])
self.assertEqual(len(os.listdir(self.tmp_dir)), 1)
with open(os.path.join(self.tmp_dir, os.listdir(self.tmp_dir)[0])) as fp:
message = message_from_file(fp)
with open(os.path.join(self.tmp_dir, os.listdir(self.tmp_dir)[0]), 'rb') as fp:
message = message_from_binary_file(fp)
self.assertEqual(message.get_content_type(), 'text/plain')
self.assertEqual(message.get('subject'), 'Subject')
self.assertEqual(message.get('from'), 'from@example.com')
@ -746,8 +744,8 @@ class ConsoleBackendTests(BaseEmailBackendTests, SimpleTestCase):
self.stream = sys.stdout = StringIO()
def get_mailbox_content(self):
messages = force_text(self.stream.getvalue()).split('\n' + ('-' * 79) + '\n')
return [message_from_string(force_str(m)) for m in messages if m]
messages = self.stream.getvalue().split(str('\n' + ('-' * 79) + '\n'))
return [message_from_bytes(force_bytes(m)) for m in messages if m]
def test_console_stream_kwarg(self):
"""
@ -756,14 +754,15 @@ class ConsoleBackendTests(BaseEmailBackendTests, SimpleTestCase):
s = StringIO()
connection = mail.get_connection('django.core.mail.backends.console.EmailBackend', stream=s)
send_mail('Subject', 'Content', 'from@example.com', ['to@example.com'], connection=connection)
self.assertMessageHasHeaders(s.getvalue(), {
message = force_bytes(s.getvalue().split('\n' + ('-' * 79) + '\n')[0])
self.assertMessageHasHeaders(message, {
('MIME-Version', '1.0'),
('Content-Type', 'text/plain; charset="utf-8"'),
('Content-Transfer-Encoding', '7bit'),
('Subject', 'Subject'),
('From', 'from@example.com'),
('To', 'to@example.com')})
self.assertIn('\nDate: ', s.getvalue())
self.assertIn(b'\nDate: ', message)
class FakeSMTPChannel(smtpd.SMTPChannel):
@ -793,7 +792,9 @@ class FakeSMTPServer(smtpd.SMTPServer, threading.Thread):
self.sink_lock = threading.Lock()
def process_message(self, peer, mailfrom, rcpttos, data):
m = message_from_string(data)
if PY3:
data = data.encode('utf-8')
m = message_from_bytes(data)
maddr = parseaddr(m.get('from'))[1]
if mailfrom != maddr:
return "553 '%s' != '%s'" % (mailfrom, maddr)