diff --git a/AUTHORS b/AUTHORS index e217143ac8..b31b531b73 100644 --- a/AUTHORS +++ b/AUTHORS @@ -660,6 +660,7 @@ answer newbie questions, and generally made Django that much better: Sengtha Chay Senko Rašić serbaut@gmail.com + Sergei Maertens Sergey Fedoseev Sergey Kolosov Seth Hill diff --git a/django/core/mail/backends/smtp.py b/django/core/mail/backends/smtp.py index 432f3a64c4..4e085c15f8 100644 --- a/django/core/mail/backends/smtp.py +++ b/django/core/mail/backends/smtp.py @@ -115,9 +115,9 @@ class EmailBackend(BaseEmailBackend): """A helper method that does the actual sending.""" if not email_message.recipients(): return False - from_email = sanitize_address(email_message.from_email, email_message.encoding) - recipients = [sanitize_address(addr, email_message.encoding) - for addr in email_message.recipients()] + encoding = email_message.encoding or settings.DEFAULT_CHARSET + from_email = sanitize_address(email_message.from_email, encoding) + recipients = [sanitize_address(addr, encoding) for addr in email_message.recipients()] message = email_message.message() try: self.connection.sendmail(from_email, recipients, message.as_bytes(linesep='\r\n')) diff --git a/django/core/mail/message.py b/django/core/mail/message.py index 8dbc6b935c..e2ee13c7cb 100644 --- a/django/core/mail/message.py +++ b/django/core/mail/message.py @@ -13,7 +13,7 @@ from email.mime.base import MIMEBase from email.mime.message import MIMEMessage from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText -from email.utils import formataddr, formatdate, getaddresses, parseaddr +from email.utils import formatdate, getaddresses, parseaddr from io import BytesIO from django.conf import settings @@ -103,22 +103,66 @@ def forbid_multi_line_headers(name, val, encoding): return str(name), val +def split_addr(addr, encoding): + """ + Split the address into local part and domain, properly encoded. + + When non-ascii characters are present in the local part, it must be + MIME-word encoded. The domain name must be idna-encoded if it contains + non-ascii characters. + """ + if '@' in addr: + localpart, domain = addr.split('@', 1) + # Try to get the simplest encoding - ascii if possible so that + # to@example.com doesn't become =?utf-8?q?to?=@example.com. This + # makes unit testing a bit easier and more readable. + try: + localpart.encode('ascii') + except UnicodeEncodeError: + localpart = Header(localpart, encoding).encode() + domain = domain.encode('idna').decode('ascii') + else: + localpart = Header(addr, encoding).encode() + domain = '' + return (localpart, domain) + + def sanitize_address(addr, encoding): + """ + Format a pair of (name, address) or an email address string. + """ if not isinstance(addr, tuple): addr = parseaddr(force_text(addr)) nm, addr = addr + localpart, domain = None, None nm = Header(nm, encoding).encode() try: addr.encode('ascii') - except UnicodeEncodeError: # IDN - if '@' in addr: - localpart, domain = addr.split('@', 1) - localpart = str(Header(localpart, encoding)) - domain = domain.encode('idna').decode('ascii') + except UnicodeEncodeError: # IDN or non-ascii in the local part + localpart, domain = split_addr(addr, encoding) + + if six.PY2: + # On Python 2, use the stdlib since `email.headerregistry` doesn't exist. + from email.utils import formataddr + if localpart and domain: addr = '@'.join([localpart, domain]) - else: - addr = Header(addr, encoding).encode() - return formataddr((nm, addr)) + return formataddr((nm, addr)) + + # On Python 3, an `email.headerregistry.Address` object is used since + # email.utils.formataddr() naively encodes the name as ascii (see #25986). + from email.headerregistry import Address + from email.errors import InvalidHeaderDefect, NonASCIILocalPartDefect + + if localpart and domain: + address = Address(nm, username=localpart, domain=domain) + return str(address) + + try: + address = Address(nm, addr_spec=addr) + except (InvalidHeaderDefect, NonASCIILocalPartDefect): + localpart, domain = split_addr(addr, encoding) + address = Address(nm, username=localpart, domain=domain) + return str(address) class MIMEMixin(): diff --git a/tests/mail/tests.py b/tests/mail/tests.py index 4e7075e695..7988e08612 100644 --- a/tests/mail/tests.py +++ b/tests/mail/tests.py @@ -9,6 +9,7 @@ import smtpd import sys import tempfile import threading +from email.header import Header from email.mime.text import MIMEText from smtplib import SMTP, SMTPException from ssl import SSLError @@ -19,7 +20,7 @@ from django.core.mail import ( send_mail, send_mass_mail, ) from django.core.mail.backends import console, dummy, filebased, locmem, smtp -from django.core.mail.message import BadHeaderError +from django.core.mail.message import BadHeaderError, sanitize_address from django.test import SimpleTestCase, override_settings from django.utils._os import upath from django.utils.encoding import force_bytes, force_text @@ -567,6 +568,42 @@ class MailTests(HeadersCheckMixin, SimpleTestCase): # Verify that the child message header is not base64 encoded self.assertIn(str('Child Subject'), parent_s) + def test_sanitize_address(self): + """ + Email addresses are properly sanitized. + """ + # Simple ASCII address - string form + self.assertEqual(sanitize_address('to@example.com', 'ascii'), 'to@example.com') + self.assertEqual(sanitize_address('to@example.com', 'utf-8'), 'to@example.com') + # Bytestrings are transformed to normal strings. + self.assertEqual(sanitize_address(b'to@example.com', 'utf-8'), 'to@example.com') + + # Simple ASCII address - tuple form + self.assertEqual( + sanitize_address(('A name', 'to@example.com'), 'ascii'), + 'A name ' + ) + if PY3: + self.assertEqual( + sanitize_address(('A name', 'to@example.com'), 'utf-8'), + '=?utf-8?q?A_name?= ' + ) + else: + self.assertEqual( + sanitize_address(('A name', 'to@example.com'), 'utf-8'), + 'A name ' + ) + + # Unicode characters are are supported in RFC-6532. + self.assertEqual( + sanitize_address('tó@example.com', 'utf-8'), + '=?utf-8?b?dMOz?=@example.com' + ) + self.assertEqual( + sanitize_address(('Tó Example', 'tó@example.com'), 'utf-8'), + '=?utf-8?q?T=C3=B3_Example?= <=?utf-8?b?dMOz?=@example.com>' + ) + class PythonGlobalState(SimpleTestCase): """ @@ -1026,6 +1063,15 @@ class FakeSMTPServer(smtpd.SMTPServer, threading.Thread): data = data.encode('utf-8') m = message_from_bytes(data) maddr = parseaddr(m.get('from'))[1] + + if mailfrom != maddr: + # According to the spec, mailfrom does not necessarily match the + # From header - on Python 3 this is the case where the local part + # isn't encoded, so try to correct that. + lp, domain = mailfrom.split('@', 1) + lp = Header(lp, 'utf-8').encode() + mailfrom = '@'.join([lp, domain]) + if mailfrom != maddr: return "553 '%s' != '%s'" % (mailfrom, maddr) with self.sink_lock: