diff --git a/django/middleware/common.py b/django/middleware/common.py index fa38c63a24..5aeb746f81 100644 --- a/django/middleware/common.py +++ b/django/middleware/common.py @@ -8,6 +8,7 @@ from django.core.exceptions import PermissionDenied from django.core.mail import mail_managers from django.utils.cache import get_conditional_response, set_response_etag from django.utils.encoding import force_text +from django.utils.six.moves.urllib.parse import urlparse logger = logging.getLogger('django.request') @@ -163,13 +164,17 @@ class BrokenLinkEmailsMiddleware(object): according to project settings or in three specific situations: - If the referer is empty. - If a '?' in referer is identified as a search engine source. - - If the referer is equal to the current URL (assumed to be a - malicious bot). + - If the referer is equal to the current URL, ignoring the scheme + (assumed to be a poorly implemented bot). """ - full_url = "%s://%s/%s" % (request.scheme, domain, uri.lstrip('/')) - if (not referer or - (not self.is_internal_request(domain, referer) and '?' in referer) or - (referer == uri or referer == full_url)): + if not referer: + return True + + if not self.is_internal_request(domain, referer) and '?' in referer: + return True + + parsed_referer = urlparse(referer) + if parsed_referer.netloc in ['', domain] and parsed_referer.path == uri: return True return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS) diff --git a/tests/middleware/tests.py b/tests/middleware/tests.py index 328073b8ac..e9ec2b46c5 100644 --- a/tests/middleware/tests.py +++ b/tests/middleware/tests.py @@ -383,11 +383,20 @@ class BrokenLinkEmailsMiddlewareTest(SimpleTestCase): self.req.META['HTTP_REFERER'] = self.req.path BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) self.assertEqual(len(mail.outbox), 0) + # URL with scheme and domain should also be ignored self.req.META['HTTP_REFERER'] = 'http://testserver%s' % self.req.path BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) self.assertEqual(len(mail.outbox), 0) + # URL with a different scheme should be ignored as well because bots + # tend to use http:// in referers even when browsing HTTPS websites. + self.req.META['HTTP_X_PROTO'] = 'https' + self.req.META['SERVER_PORT'] = 443 + with self.settings(SECURE_PROXY_SSL_HEADER=('HTTP_X_PROTO', 'https')): + BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) + self.assertEqual(len(mail.outbox), 0) + def test_referer_equal_to_requested_url_on_another_domain(self): self.req.META['HTTP_REFERER'] = 'http://anotherserver%s' % self.req.path BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)