Fixed #25302 (again) -- Ignored scheme when checking for bad referers.

The check introduced in 4ce433e was too strict in real life. The poorly
implemented bots this patch attempted to ignore are sloppy when it comes
to http vs. https.
This commit is contained in:
Aymeric Augustin 2015-11-26 21:27:12 +01:00
parent cc2ca9c550
commit 11f10b70f3
2 changed files with 20 additions and 6 deletions

View File

@ -8,6 +8,7 @@ from django.core.exceptions import PermissionDenied
from django.core.mail import mail_managers from django.core.mail import mail_managers
from django.utils.cache import get_conditional_response, set_response_etag from django.utils.cache import get_conditional_response, set_response_etag
from django.utils.encoding import force_text from django.utils.encoding import force_text
from django.utils.six.moves.urllib.parse import urlparse
logger = logging.getLogger('django.request') logger = logging.getLogger('django.request')
@ -163,13 +164,17 @@ class BrokenLinkEmailsMiddleware(object):
according to project settings or in three specific situations: according to project settings or in three specific situations:
- If the referer is empty. - If the referer is empty.
- If a '?' in referer is identified as a search engine source. - If a '?' in referer is identified as a search engine source.
- If the referer is equal to the current URL (assumed to be a - If the referer is equal to the current URL, ignoring the scheme
malicious bot). (assumed to be a poorly implemented bot).
""" """
full_url = "%s://%s/%s" % (request.scheme, domain, uri.lstrip('/')) if not referer:
if (not referer or return True
(not self.is_internal_request(domain, referer) and '?' in referer) or
(referer == uri or referer == full_url)): if not self.is_internal_request(domain, referer) and '?' in referer:
return True
parsed_referer = urlparse(referer)
if parsed_referer.netloc in ['', domain] and parsed_referer.path == uri:
return True return True
return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS) return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)

View File

@ -383,11 +383,20 @@ class BrokenLinkEmailsMiddlewareTest(SimpleTestCase):
self.req.META['HTTP_REFERER'] = self.req.path self.req.META['HTTP_REFERER'] = self.req.path
BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
self.assertEqual(len(mail.outbox), 0) self.assertEqual(len(mail.outbox), 0)
# URL with scheme and domain should also be ignored # URL with scheme and domain should also be ignored
self.req.META['HTTP_REFERER'] = 'http://testserver%s' % self.req.path self.req.META['HTTP_REFERER'] = 'http://testserver%s' % self.req.path
BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
self.assertEqual(len(mail.outbox), 0) self.assertEqual(len(mail.outbox), 0)
# URL with a different scheme should be ignored as well because bots
# tend to use http:// in referers even when browsing HTTPS websites.
self.req.META['HTTP_X_PROTO'] = 'https'
self.req.META['SERVER_PORT'] = 443
with self.settings(SECURE_PROXY_SSL_HEADER=('HTTP_X_PROTO', 'https')):
BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
self.assertEqual(len(mail.outbox), 0)
def test_referer_equal_to_requested_url_on_another_domain(self): def test_referer_equal_to_requested_url_on_another_domain(self):
self.req.META['HTTP_REFERER'] = 'http://anotherserver%s' % self.req.path self.req.META['HTTP_REFERER'] = 'http://anotherserver%s' % self.req.path
BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)