Fixed #30512 -- Used email.headerregistry.parser for parsing emails in sanitize_address().
This commit is contained in:
parent
0c2ffdd526
commit
2628ea9515
|
@ -2,15 +2,15 @@ import mimetypes
|
||||||
from email import (
|
from email import (
|
||||||
charset as Charset, encoders as Encoders, generator, message_from_string,
|
charset as Charset, encoders as Encoders, generator, message_from_string,
|
||||||
)
|
)
|
||||||
from email.errors import InvalidHeaderDefect, NonASCIILocalPartDefect
|
from email.errors import HeaderParseError
|
||||||
from email.header import Header
|
from email.header import Header
|
||||||
from email.headerregistry import Address
|
from email.headerregistry import Address, parser
|
||||||
from email.message import Message
|
from email.message import Message
|
||||||
from email.mime.base import MIMEBase
|
from email.mime.base import MIMEBase
|
||||||
from email.mime.message import MIMEMessage
|
from email.mime.message import MIMEMessage
|
||||||
from email.mime.multipart import MIMEMultipart
|
from email.mime.multipart import MIMEMultipart
|
||||||
from email.mime.text import MIMEText
|
from email.mime.text import MIMEText
|
||||||
from email.utils import formatdate, getaddresses, make_msgid, parseaddr
|
from email.utils import formatdate, getaddresses, make_msgid
|
||||||
from io import BytesIO, StringIO
|
from io import BytesIO, StringIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
@ -71,56 +71,44 @@ def forbid_multi_line_headers(name, val, encoding):
|
||||||
return name, val
|
return name, val
|
||||||
|
|
||||||
|
|
||||||
def split_addr(addr, encoding):
|
|
||||||
"""
|
|
||||||
Split the address into local part and domain and encode them.
|
|
||||||
|
|
||||||
When non-ascii characters are present in the local part, it must be
|
|
||||||
MIME-word encoded. The domain name must be idna-encoded if it contains
|
|
||||||
non-ascii characters.
|
|
||||||
"""
|
|
||||||
if '@' in addr:
|
|
||||||
localpart, domain = addr.split('@', 1)
|
|
||||||
# Try to get the simplest encoding - ascii if possible so that
|
|
||||||
# to@example.com doesn't become =?utf-8?q?to?=@example.com. This
|
|
||||||
# makes unit testing a bit easier and more readable.
|
|
||||||
try:
|
|
||||||
localpart.encode('ascii')
|
|
||||||
except UnicodeEncodeError:
|
|
||||||
localpart = Header(localpart, encoding).encode()
|
|
||||||
domain = domain.encode('idna').decode('ascii')
|
|
||||||
else:
|
|
||||||
localpart = Header(addr, encoding).encode()
|
|
||||||
domain = ''
|
|
||||||
return (localpart, domain)
|
|
||||||
|
|
||||||
|
|
||||||
def sanitize_address(addr, encoding):
|
def sanitize_address(addr, encoding):
|
||||||
"""
|
"""
|
||||||
Format a pair of (name, address) or an email address string.
|
Format a pair of (name, address) or an email address string.
|
||||||
"""
|
"""
|
||||||
|
address = None
|
||||||
if not isinstance(addr, tuple):
|
if not isinstance(addr, tuple):
|
||||||
addr = parseaddr(addr)
|
addr = force_str(addr)
|
||||||
nm, addr = addr
|
try:
|
||||||
localpart, domain = None, None
|
token, rest = parser.get_mailbox(addr)
|
||||||
|
except (HeaderParseError, ValueError, IndexError):
|
||||||
|
raise ValueError('Invalid address "%s"' % addr)
|
||||||
|
else:
|
||||||
|
if rest:
|
||||||
|
# The entire email address must be parsed.
|
||||||
|
raise ValueError(
|
||||||
|
'Invalid adddress; only %s could be parsed from "%s"'
|
||||||
|
% (token, addr)
|
||||||
|
)
|
||||||
|
nm = token.display_name or ''
|
||||||
|
localpart = token.local_part
|
||||||
|
domain = token.domain or ''
|
||||||
|
else:
|
||||||
|
nm, address = addr
|
||||||
|
localpart, domain = address.rsplit('@', 1)
|
||||||
|
|
||||||
nm = Header(nm, encoding).encode()
|
nm = Header(nm, encoding).encode()
|
||||||
|
# Avoid UTF-8 encode, if it's possible.
|
||||||
try:
|
try:
|
||||||
addr.encode('ascii')
|
localpart.encode('ascii')
|
||||||
except UnicodeEncodeError: # IDN or non-ascii in the local part
|
except UnicodeEncodeError:
|
||||||
localpart, domain = split_addr(addr, encoding)
|
localpart = Header(localpart, encoding).encode()
|
||||||
|
|
||||||
# An `email.headerregistry.Address` object is used since
|
|
||||||
# email.utils.formataddr() naively encodes the name as ascii (see #25986).
|
|
||||||
if localpart and domain:
|
|
||||||
address = Address(nm, username=localpart, domain=domain)
|
|
||||||
return str(address)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
address = Address(nm, addr_spec=addr)
|
domain.encode('ascii')
|
||||||
except (InvalidHeaderDefect, NonASCIILocalPartDefect):
|
except UnicodeEncodeError:
|
||||||
localpart, domain = split_addr(addr, encoding)
|
domain = domain.encode('idna').decode('ascii')
|
||||||
address = Address(nm, username=localpart, domain=domain)
|
|
||||||
return str(address)
|
parsed_address = Address(nm, username=localpart, domain=domain)
|
||||||
|
return str(parsed_address)
|
||||||
|
|
||||||
|
|
||||||
class MIMEMixin:
|
class MIMEMixin:
|
||||||
|
|
|
@ -748,10 +748,30 @@ class MailTests(HeadersCheckMixin, SimpleTestCase):
|
||||||
'utf-8',
|
'utf-8',
|
||||||
'=?utf-8?q?to=40other=2Ecom?= <to@example.com>',
|
'=?utf-8?q?to=40other=2Ecom?= <to@example.com>',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
('To Example', 'to@other.com@example.com'),
|
||||||
|
'utf-8',
|
||||||
|
'=?utf-8?q?To_Example?= <"to@other.com"@example.com>',
|
||||||
|
),
|
||||||
):
|
):
|
||||||
with self.subTest(email_address=email_address, encoding=encoding):
|
with self.subTest(email_address=email_address, encoding=encoding):
|
||||||
self.assertEqual(sanitize_address(email_address, encoding), expected_result)
|
self.assertEqual(sanitize_address(email_address, encoding), expected_result)
|
||||||
|
|
||||||
|
def test_sanitize_address_invalid(self):
|
||||||
|
for email_address in (
|
||||||
|
# Invalid address with two @ signs.
|
||||||
|
'to@other.com@example.com',
|
||||||
|
# Invalid address without the quotes.
|
||||||
|
'to@other.com <to@example.com>',
|
||||||
|
# Other invalid addresses.
|
||||||
|
'@',
|
||||||
|
'to@',
|
||||||
|
'@example.com',
|
||||||
|
):
|
||||||
|
with self.subTest(email_address=email_address):
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
sanitize_address(email_address, encoding='utf-8')
|
||||||
|
|
||||||
|
|
||||||
@requires_tz_support
|
@requires_tz_support
|
||||||
class MailTimeZoneTests(SimpleTestCase):
|
class MailTimeZoneTests(SimpleTestCase):
|
||||||
|
|
Loading…
Reference in New Issue