Fixed #25986 -- Fixed crash sending email with non-ASCII in local part of the address.

On Python 3, sending emails failed for addresses containing non-ASCII
characters due to the usage of the legacy Python email.utils.formataddr()
function. This is fixed by using the proper Address object on Python 3.
This commit is contained in:
Sergei Maertens 2016-04-02 11:41:47 +02:00 committed by Tim Graham
parent 086510fde0
commit ec009ef1d8
4 changed files with 104 additions and 13 deletions

View File

@ -660,6 +660,7 @@ answer newbie questions, and generally made Django that much better:
Sengtha Chay <sengtha@e-khmer.com>
Senko Rašić <senko.rasic@dobarkod.hr>
serbaut@gmail.com
Sergei Maertens <sergeimaertens@gmail.com>
Sergey Fedoseev <fedoseev.sergey@gmail.com>
Sergey Kolosov <m17.admin@gmail.com>
Seth Hill <sethrh@gmail.com>

View File

@ -115,9 +115,9 @@ class EmailBackend(BaseEmailBackend):
"""A helper method that does the actual sending."""
if not email_message.recipients():
return False
from_email = sanitize_address(email_message.from_email, email_message.encoding)
recipients = [sanitize_address(addr, email_message.encoding)
for addr in email_message.recipients()]
encoding = email_message.encoding or settings.DEFAULT_CHARSET
from_email = sanitize_address(email_message.from_email, encoding)
recipients = [sanitize_address(addr, encoding) for addr in email_message.recipients()]
message = email_message.message()
try:
self.connection.sendmail(from_email, recipients, message.as_bytes(linesep='\r\n'))

View File

@ -13,7 +13,7 @@ from email.mime.base import MIMEBase
from email.mime.message import MIMEMessage
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.utils import formataddr, formatdate, getaddresses, parseaddr
from email.utils import formatdate, getaddresses, parseaddr
from io import BytesIO
from django.conf import settings
@ -103,22 +103,66 @@ def forbid_multi_line_headers(name, val, encoding):
return str(name), val
def split_addr(addr, encoding):
"""
Split the address into local part and domain, properly encoded.
When non-ascii characters are present in the local part, it must be
MIME-word encoded. The domain name must be idna-encoded if it contains
non-ascii characters.
"""
if '@' in addr:
localpart, domain = addr.split('@', 1)
# Try to get the simplest encoding - ascii if possible so that
# to@example.com doesn't become =?utf-8?q?to?=@example.com. This
# makes unit testing a bit easier and more readable.
try:
localpart.encode('ascii')
except UnicodeEncodeError:
localpart = Header(localpart, encoding).encode()
domain = domain.encode('idna').decode('ascii')
else:
localpart = Header(addr, encoding).encode()
domain = ''
return (localpart, domain)
def sanitize_address(addr, encoding):
"""
Format a pair of (name, address) or an email address string.
"""
if not isinstance(addr, tuple):
addr = parseaddr(force_text(addr))
nm, addr = addr
localpart, domain = None, None
nm = Header(nm, encoding).encode()
try:
addr.encode('ascii')
except UnicodeEncodeError: # IDN
if '@' in addr:
localpart, domain = addr.split('@', 1)
localpart = str(Header(localpart, encoding))
domain = domain.encode('idna').decode('ascii')
except UnicodeEncodeError: # IDN or non-ascii in the local part
localpart, domain = split_addr(addr, encoding)
if six.PY2:
# On Python 2, use the stdlib since `email.headerregistry` doesn't exist.
from email.utils import formataddr
if localpart and domain:
addr = '@'.join([localpart, domain])
else:
addr = Header(addr, encoding).encode()
return formataddr((nm, addr))
return formataddr((nm, addr))
# On Python 3, an `email.headerregistry.Address` object is used since
# email.utils.formataddr() naively encodes the name as ascii (see #25986).
from email.headerregistry import Address
from email.errors import InvalidHeaderDefect, NonASCIILocalPartDefect
if localpart and domain:
address = Address(nm, username=localpart, domain=domain)
return str(address)
try:
address = Address(nm, addr_spec=addr)
except (InvalidHeaderDefect, NonASCIILocalPartDefect):
localpart, domain = split_addr(addr, encoding)
address = Address(nm, username=localpart, domain=domain)
return str(address)
class MIMEMixin():

View File

@ -9,6 +9,7 @@ import smtpd
import sys
import tempfile
import threading
from email.header import Header
from email.mime.text import MIMEText
from smtplib import SMTP, SMTPException
from ssl import SSLError
@ -19,7 +20,7 @@ from django.core.mail import (
send_mail, send_mass_mail,
)
from django.core.mail.backends import console, dummy, filebased, locmem, smtp
from django.core.mail.message import BadHeaderError
from django.core.mail.message import BadHeaderError, sanitize_address
from django.test import SimpleTestCase, override_settings
from django.utils._os import upath
from django.utils.encoding import force_bytes, force_text
@ -567,6 +568,42 @@ class MailTests(HeadersCheckMixin, SimpleTestCase):
# Verify that the child message header is not base64 encoded
self.assertIn(str('Child Subject'), parent_s)
def test_sanitize_address(self):
"""
Email addresses are properly sanitized.
"""
# Simple ASCII address - string form
self.assertEqual(sanitize_address('to@example.com', 'ascii'), 'to@example.com')
self.assertEqual(sanitize_address('to@example.com', 'utf-8'), 'to@example.com')
# Bytestrings are transformed to normal strings.
self.assertEqual(sanitize_address(b'to@example.com', 'utf-8'), 'to@example.com')
# Simple ASCII address - tuple form
self.assertEqual(
sanitize_address(('A name', 'to@example.com'), 'ascii'),
'A name <to@example.com>'
)
if PY3:
self.assertEqual(
sanitize_address(('A name', 'to@example.com'), 'utf-8'),
'=?utf-8?q?A_name?= <to@example.com>'
)
else:
self.assertEqual(
sanitize_address(('A name', 'to@example.com'), 'utf-8'),
'A name <to@example.com>'
)
# Unicode characters are are supported in RFC-6532.
self.assertEqual(
sanitize_address('tó@example.com', 'utf-8'),
'=?utf-8?b?dMOz?=@example.com'
)
self.assertEqual(
sanitize_address(('Tó Example', 'tó@example.com'), 'utf-8'),
'=?utf-8?q?T=C3=B3_Example?= <=?utf-8?b?dMOz?=@example.com>'
)
class PythonGlobalState(SimpleTestCase):
"""
@ -1026,6 +1063,15 @@ class FakeSMTPServer(smtpd.SMTPServer, threading.Thread):
data = data.encode('utf-8')
m = message_from_bytes(data)
maddr = parseaddr(m.get('from'))[1]
if mailfrom != maddr:
# According to the spec, mailfrom does not necessarily match the
# From header - on Python 3 this is the case where the local part
# isn't encoded, so try to correct that.
lp, domain = mailfrom.split('@', 1)
lp = Header(lp, 'utf-8').encode()
mailfrom = '@'.join([lp, domain])
if mailfrom != maddr:
return "553 '%s' != '%s'" % (mailfrom, maddr)
with self.sink_lock: