Fixed #30512 -- Used email.headerregistry.parser for parsing emails in sanitize_address().
This commit is contained in:
parent
0c2ffdd526
commit
2628ea9515
|
@ -2,15 +2,15 @@ import mimetypes
|
|||
from email import (
|
||||
charset as Charset, encoders as Encoders, generator, message_from_string,
|
||||
)
|
||||
from email.errors import InvalidHeaderDefect, NonASCIILocalPartDefect
|
||||
from email.errors import HeaderParseError
|
||||
from email.header import Header
|
||||
from email.headerregistry import Address
|
||||
from email.headerregistry import Address, parser
|
||||
from email.message import Message
|
||||
from email.mime.base import MIMEBase
|
||||
from email.mime.message import MIMEMessage
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
from email.mime.text import MIMEText
|
||||
from email.utils import formatdate, getaddresses, make_msgid, parseaddr
|
||||
from email.utils import formatdate, getaddresses, make_msgid
|
||||
from io import BytesIO, StringIO
|
||||
from pathlib import Path
|
||||
|
||||
|
@ -71,56 +71,44 @@ def forbid_multi_line_headers(name, val, encoding):
|
|||
return name, val
|
||||
|
||||
|
||||
def split_addr(addr, encoding):
|
||||
"""
|
||||
Split the address into local part and domain and encode them.
|
||||
|
||||
When non-ascii characters are present in the local part, it must be
|
||||
MIME-word encoded. The domain name must be idna-encoded if it contains
|
||||
non-ascii characters.
|
||||
"""
|
||||
if '@' in addr:
|
||||
localpart, domain = addr.split('@', 1)
|
||||
# Try to get the simplest encoding - ascii if possible so that
|
||||
# to@example.com doesn't become =?utf-8?q?to?=@example.com. This
|
||||
# makes unit testing a bit easier and more readable.
|
||||
try:
|
||||
localpart.encode('ascii')
|
||||
except UnicodeEncodeError:
|
||||
localpart = Header(localpart, encoding).encode()
|
||||
domain = domain.encode('idna').decode('ascii')
|
||||
else:
|
||||
localpart = Header(addr, encoding).encode()
|
||||
domain = ''
|
||||
return (localpart, domain)
|
||||
|
||||
|
||||
def sanitize_address(addr, encoding):
|
||||
"""
|
||||
Format a pair of (name, address) or an email address string.
|
||||
"""
|
||||
address = None
|
||||
if not isinstance(addr, tuple):
|
||||
addr = parseaddr(addr)
|
||||
nm, addr = addr
|
||||
localpart, domain = None, None
|
||||
addr = force_str(addr)
|
||||
try:
|
||||
token, rest = parser.get_mailbox(addr)
|
||||
except (HeaderParseError, ValueError, IndexError):
|
||||
raise ValueError('Invalid address "%s"' % addr)
|
||||
else:
|
||||
if rest:
|
||||
# The entire email address must be parsed.
|
||||
raise ValueError(
|
||||
'Invalid adddress; only %s could be parsed from "%s"'
|
||||
% (token, addr)
|
||||
)
|
||||
nm = token.display_name or ''
|
||||
localpart = token.local_part
|
||||
domain = token.domain or ''
|
||||
else:
|
||||
nm, address = addr
|
||||
localpart, domain = address.rsplit('@', 1)
|
||||
|
||||
nm = Header(nm, encoding).encode()
|
||||
# Avoid UTF-8 encode, if it's possible.
|
||||
try:
|
||||
addr.encode('ascii')
|
||||
except UnicodeEncodeError: # IDN or non-ascii in the local part
|
||||
localpart, domain = split_addr(addr, encoding)
|
||||
|
||||
# An `email.headerregistry.Address` object is used since
|
||||
# email.utils.formataddr() naively encodes the name as ascii (see #25986).
|
||||
if localpart and domain:
|
||||
address = Address(nm, username=localpart, domain=domain)
|
||||
return str(address)
|
||||
|
||||
localpart.encode('ascii')
|
||||
except UnicodeEncodeError:
|
||||
localpart = Header(localpart, encoding).encode()
|
||||
try:
|
||||
address = Address(nm, addr_spec=addr)
|
||||
except (InvalidHeaderDefect, NonASCIILocalPartDefect):
|
||||
localpart, domain = split_addr(addr, encoding)
|
||||
address = Address(nm, username=localpart, domain=domain)
|
||||
return str(address)
|
||||
domain.encode('ascii')
|
||||
except UnicodeEncodeError:
|
||||
domain = domain.encode('idna').decode('ascii')
|
||||
|
||||
parsed_address = Address(nm, username=localpart, domain=domain)
|
||||
return str(parsed_address)
|
||||
|
||||
|
||||
class MIMEMixin:
|
||||
|
|
|
@ -748,10 +748,30 @@ class MailTests(HeadersCheckMixin, SimpleTestCase):
|
|||
'utf-8',
|
||||
'=?utf-8?q?to=40other=2Ecom?= <to@example.com>',
|
||||
),
|
||||
(
|
||||
('To Example', 'to@other.com@example.com'),
|
||||
'utf-8',
|
||||
'=?utf-8?q?To_Example?= <"to@other.com"@example.com>',
|
||||
),
|
||||
):
|
||||
with self.subTest(email_address=email_address, encoding=encoding):
|
||||
self.assertEqual(sanitize_address(email_address, encoding), expected_result)
|
||||
|
||||
def test_sanitize_address_invalid(self):
|
||||
for email_address in (
|
||||
# Invalid address with two @ signs.
|
||||
'to@other.com@example.com',
|
||||
# Invalid address without the quotes.
|
||||
'to@other.com <to@example.com>',
|
||||
# Other invalid addresses.
|
||||
'@',
|
||||
'to@',
|
||||
'@example.com',
|
||||
):
|
||||
with self.subTest(email_address=email_address):
|
||||
with self.assertRaises(ValueError):
|
||||
sanitize_address(email_address, encoding='utf-8')
|
||||
|
||||
|
||||
@requires_tz_support
|
||||
class MailTimeZoneTests(SimpleTestCase):
|
||||
|
|
Loading…
Reference in New Issue