Fixed #25302 -- Prevented BrokenLinkEmailsMiddleware from reporting 404s when Referer = URL.

This commit is contained in:
Maxime Lorant 2015-08-23 20:54:15 +02:00 committed by Tim Graham
parent d3fdaf907d
commit 4ce433e811
5 changed files with 43 additions and 5 deletions

View File

@ -483,6 +483,7 @@ answer newbie questions, and generally made Django that much better:
mattycakes@gmail.com mattycakes@gmail.com
Max Burstein <http://maxburstein.com> Max Burstein <http://maxburstein.com>
Max Derkachev <mderk@yandex.ru> Max Derkachev <mderk@yandex.ru>
Maxime Lorant <maxime.lorant@gmail.com>
Maxime Turcotte <maxocub@riseup.net> Maxime Turcotte <maxocub@riseup.net>
Maximillian Dornseif <md@hudora.de> Maximillian Dornseif <md@hudora.de>
mccutchen@gmail.com mccutchen@gmail.com

View File

@ -159,10 +159,17 @@ class BrokenLinkEmailsMiddleware(object):
def is_ignorable_request(self, request, uri, domain, referer): def is_ignorable_request(self, request, uri, domain, referer):
""" """
Returns True if the given request *shouldn't* notify the site managers. Return True if the given request *shouldn't* notify the site managers
according to project settings or in three specific situations:
- If the referer is empty.
- If a '?' in referer is identified as a search engine source.
- If the referer is equal to the current URL (assumed to be a
malicious bot).
""" """
# '?' in referer is identified as search engine source full_url = "%s://%s/%s" % (request.scheme, domain, uri.lstrip('/'))
if (not referer or if (not referer or
(not self.is_internal_request(domain, referer) and '?' in referer)): (not self.is_internal_request(domain, referer) and '?' in referer) or
(referer == uri or referer == full_url)):
return True return True
return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS) return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)

View File

@ -61,8 +61,15 @@ not found" errors). Django sends emails about 404 errors when:
If those conditions are met, Django will email the users listed in the If those conditions are met, Django will email the users listed in the
:setting:`MANAGERS` setting whenever your code raises a 404 and the request has :setting:`MANAGERS` setting whenever your code raises a 404 and the request has
a referer. (It doesn't bother to email for 404s that don't have a referer -- a referer. It doesn't bother to email for 404s that don't have a referer --
those are usually just people typing in broken URLs or broken Web 'bots). those are usually just people typing in broken URLs or broken Web bots. It also
ignores 404s when the referer is equal to the requested URL, since this
behavior is from broken Web bots too.
.. versionchanged:: 1.9
In older versions, 404s were not ignored when the referer was equal to the
requested URL.
.. note:: .. note::

View File

@ -551,6 +551,11 @@ Requests and Responses
:class:`~django.http.JsonResponse` to allow passing keyword arguments to the :class:`~django.http.JsonResponse` to allow passing keyword arguments to the
``json.dumps()`` call used to generate the response. ``json.dumps()`` call used to generate the response.
* The :class:`~django.middleware.common.BrokenLinkEmailsMiddleware` now
ignores 404s when the referer is equal to the requested URL. To circumvent
the empty referer check already implemented, some Web bots set the referer to
the requested URL.
Tests Tests
^^^^^ ^^^^^

View File

@ -375,6 +375,24 @@ class BrokenLinkEmailsMiddlewareTest(SimpleTestCase):
SubclassedMiddleware().process_response(self.req, self.resp) SubclassedMiddleware().process_response(self.req, self.resp)
self.assertEqual(len(mail.outbox), 1) self.assertEqual(len(mail.outbox), 1)
def test_referer_equal_to_requested_url(self):
"""
Some bots set the referer to the current URL to avoid being blocked by
an referer check (#25302).
"""
self.req.META['HTTP_REFERER'] = self.req.path
BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
self.assertEqual(len(mail.outbox), 0)
# URL with scheme and domain should also be ignored
self.req.META['HTTP_REFERER'] = 'http://testserver%s' % self.req.path
BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
self.assertEqual(len(mail.outbox), 0)
def test_referer_equal_to_requested_url_on_another_domain(self):
self.req.META['HTTP_REFERER'] = 'http://anotherserver%s' % self.req.path
BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
self.assertEqual(len(mail.outbox), 1)
@override_settings(ROOT_URLCONF='middleware.cond_get_urls') @override_settings(ROOT_URLCONF='middleware.cond_get_urls')
class ConditionalGetMiddlewareTest(SimpleTestCase): class ConditionalGetMiddlewareTest(SimpleTestCase):