Fixed #25986 -- Fixed crash sending email with non-ASCII in local part of the address.

On Python 3, sending emails failed for addresses containing non-ASCII
characters due to the usage of the legacy Python email.utils.formataddr()
function. This is fixed by using the proper Address object on Python 3.
This commit is contained in:
Sergei Maertens 2016-04-02 11:41:47 +02:00 committed by Tim Graham
parent 086510fde0
commit ec009ef1d8
4 changed files with 104 additions and 13 deletions

View File

@ -660,6 +660,7 @@ answer newbie questions, and generally made Django that much better:
Sengtha Chay <sengtha@e-khmer.com> Sengtha Chay <sengtha@e-khmer.com>
Senko Rašić <senko.rasic@dobarkod.hr> Senko Rašić <senko.rasic@dobarkod.hr>
serbaut@gmail.com serbaut@gmail.com
Sergei Maertens <sergeimaertens@gmail.com>
Sergey Fedoseev <fedoseev.sergey@gmail.com> Sergey Fedoseev <fedoseev.sergey@gmail.com>
Sergey Kolosov <m17.admin@gmail.com> Sergey Kolosov <m17.admin@gmail.com>
Seth Hill <sethrh@gmail.com> Seth Hill <sethrh@gmail.com>

View File

@ -115,9 +115,9 @@ class EmailBackend(BaseEmailBackend):
"""A helper method that does the actual sending.""" """A helper method that does the actual sending."""
if not email_message.recipients(): if not email_message.recipients():
return False return False
from_email = sanitize_address(email_message.from_email, email_message.encoding) encoding = email_message.encoding or settings.DEFAULT_CHARSET
recipients = [sanitize_address(addr, email_message.encoding) from_email = sanitize_address(email_message.from_email, encoding)
for addr in email_message.recipients()] recipients = [sanitize_address(addr, encoding) for addr in email_message.recipients()]
message = email_message.message() message = email_message.message()
try: try:
self.connection.sendmail(from_email, recipients, message.as_bytes(linesep='\r\n')) self.connection.sendmail(from_email, recipients, message.as_bytes(linesep='\r\n'))

View File

@ -13,7 +13,7 @@ from email.mime.base import MIMEBase
from email.mime.message import MIMEMessage from email.mime.message import MIMEMessage
from email.mime.multipart import MIMEMultipart from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText from email.mime.text import MIMEText
from email.utils import formataddr, formatdate, getaddresses, parseaddr from email.utils import formatdate, getaddresses, parseaddr
from io import BytesIO from io import BytesIO
from django.conf import settings from django.conf import settings
@ -103,22 +103,66 @@ def forbid_multi_line_headers(name, val, encoding):
return str(name), val return str(name), val
def split_addr(addr, encoding):
"""
Split the address into local part and domain, properly encoded.
When non-ascii characters are present in the local part, it must be
MIME-word encoded. The domain name must be idna-encoded if it contains
non-ascii characters.
"""
if '@' in addr:
localpart, domain = addr.split('@', 1)
# Try to get the simplest encoding - ascii if possible so that
# to@example.com doesn't become =?utf-8?q?to?=@example.com. This
# makes unit testing a bit easier and more readable.
try:
localpart.encode('ascii')
except UnicodeEncodeError:
localpart = Header(localpart, encoding).encode()
domain = domain.encode('idna').decode('ascii')
else:
localpart = Header(addr, encoding).encode()
domain = ''
return (localpart, domain)
def sanitize_address(addr, encoding): def sanitize_address(addr, encoding):
"""
Format a pair of (name, address) or an email address string.
"""
if not isinstance(addr, tuple): if not isinstance(addr, tuple):
addr = parseaddr(force_text(addr)) addr = parseaddr(force_text(addr))
nm, addr = addr nm, addr = addr
localpart, domain = None, None
nm = Header(nm, encoding).encode() nm = Header(nm, encoding).encode()
try: try:
addr.encode('ascii') addr.encode('ascii')
except UnicodeEncodeError: # IDN except UnicodeEncodeError: # IDN or non-ascii in the local part
if '@' in addr: localpart, domain = split_addr(addr, encoding)
localpart, domain = addr.split('@', 1)
localpart = str(Header(localpart, encoding)) if six.PY2:
domain = domain.encode('idna').decode('ascii') # On Python 2, use the stdlib since `email.headerregistry` doesn't exist.
from email.utils import formataddr
if localpart and domain:
addr = '@'.join([localpart, domain]) addr = '@'.join([localpart, domain])
else: return formataddr((nm, addr))
addr = Header(addr, encoding).encode()
return formataddr((nm, addr)) # On Python 3, an `email.headerregistry.Address` object is used since
# email.utils.formataddr() naively encodes the name as ascii (see #25986).
from email.headerregistry import Address
from email.errors import InvalidHeaderDefect, NonASCIILocalPartDefect
if localpart and domain:
address = Address(nm, username=localpart, domain=domain)
return str(address)
try:
address = Address(nm, addr_spec=addr)
except (InvalidHeaderDefect, NonASCIILocalPartDefect):
localpart, domain = split_addr(addr, encoding)
address = Address(nm, username=localpart, domain=domain)
return str(address)
class MIMEMixin(): class MIMEMixin():

View File

@ -9,6 +9,7 @@ import smtpd
import sys import sys
import tempfile import tempfile
import threading import threading
from email.header import Header
from email.mime.text import MIMEText from email.mime.text import MIMEText
from smtplib import SMTP, SMTPException from smtplib import SMTP, SMTPException
from ssl import SSLError from ssl import SSLError
@ -19,7 +20,7 @@ from django.core.mail import (
send_mail, send_mass_mail, send_mail, send_mass_mail,
) )
from django.core.mail.backends import console, dummy, filebased, locmem, smtp from django.core.mail.backends import console, dummy, filebased, locmem, smtp
from django.core.mail.message import BadHeaderError from django.core.mail.message import BadHeaderError, sanitize_address
from django.test import SimpleTestCase, override_settings from django.test import SimpleTestCase, override_settings
from django.utils._os import upath from django.utils._os import upath
from django.utils.encoding import force_bytes, force_text from django.utils.encoding import force_bytes, force_text
@ -567,6 +568,42 @@ class MailTests(HeadersCheckMixin, SimpleTestCase):
# Verify that the child message header is not base64 encoded # Verify that the child message header is not base64 encoded
self.assertIn(str('Child Subject'), parent_s) self.assertIn(str('Child Subject'), parent_s)
def test_sanitize_address(self):
"""
Email addresses are properly sanitized.
"""
# Simple ASCII address - string form
self.assertEqual(sanitize_address('to@example.com', 'ascii'), 'to@example.com')
self.assertEqual(sanitize_address('to@example.com', 'utf-8'), 'to@example.com')
# Bytestrings are transformed to normal strings.
self.assertEqual(sanitize_address(b'to@example.com', 'utf-8'), 'to@example.com')
# Simple ASCII address - tuple form
self.assertEqual(
sanitize_address(('A name', 'to@example.com'), 'ascii'),
'A name <to@example.com>'
)
if PY3:
self.assertEqual(
sanitize_address(('A name', 'to@example.com'), 'utf-8'),
'=?utf-8?q?A_name?= <to@example.com>'
)
else:
self.assertEqual(
sanitize_address(('A name', 'to@example.com'), 'utf-8'),
'A name <to@example.com>'
)
# Unicode characters are are supported in RFC-6532.
self.assertEqual(
sanitize_address('tó@example.com', 'utf-8'),
'=?utf-8?b?dMOz?=@example.com'
)
self.assertEqual(
sanitize_address(('Tó Example', 'tó@example.com'), 'utf-8'),
'=?utf-8?q?T=C3=B3_Example?= <=?utf-8?b?dMOz?=@example.com>'
)
class PythonGlobalState(SimpleTestCase): class PythonGlobalState(SimpleTestCase):
""" """
@ -1026,6 +1063,15 @@ class FakeSMTPServer(smtpd.SMTPServer, threading.Thread):
data = data.encode('utf-8') data = data.encode('utf-8')
m = message_from_bytes(data) m = message_from_bytes(data)
maddr = parseaddr(m.get('from'))[1] maddr = parseaddr(m.get('from'))[1]
if mailfrom != maddr:
# According to the spec, mailfrom does not necessarily match the
# From header - on Python 3 this is the case where the local part
# isn't encoded, so try to correct that.
lp, domain = mailfrom.split('@', 1)
lp = Header(lp, 'utf-8').encode()
mailfrom = '@'.join([lp, domain])
if mailfrom != maddr: if mailfrom != maddr:
return "553 '%s' != '%s'" % (mailfrom, maddr) return "553 '%s' != '%s'" % (mailfrom, maddr)
with self.sink_lock: with self.sink_lock: