From 4ce433e811763f29c32e0553fe1e0070fd14c6a2 Mon Sep 17 00:00:00 2001 From: Maxime Lorant Date: Sun, 23 Aug 2015 20:54:15 +0200 Subject: [PATCH] Fixed #25302 -- Prevented BrokenLinkEmailsMiddleware from reporting 404s when Referer = URL. --- AUTHORS | 1 + django/middleware/common.py | 13 ++++++++++--- docs/howto/error-reporting.txt | 11 +++++++++-- docs/releases/1.9.txt | 5 +++++ tests/middleware/tests.py | 18 ++++++++++++++++++ 5 files changed, 43 insertions(+), 5 deletions(-) diff --git a/AUTHORS b/AUTHORS index 3fa10dc0a3..8a13ca9965 100644 --- a/AUTHORS +++ b/AUTHORS @@ -483,6 +483,7 @@ answer newbie questions, and generally made Django that much better: mattycakes@gmail.com Max Burstein Max Derkachev + Maxime Lorant Maxime Turcotte Maximillian Dornseif mccutchen@gmail.com diff --git a/django/middleware/common.py b/django/middleware/common.py index 376e82b939..fa38c63a24 100644 --- a/django/middleware/common.py +++ b/django/middleware/common.py @@ -159,10 +159,17 @@ class BrokenLinkEmailsMiddleware(object): def is_ignorable_request(self, request, uri, domain, referer): """ - Returns True if the given request *shouldn't* notify the site managers. + Return True if the given request *shouldn't* notify the site managers + according to project settings or in three specific situations: + - If the referer is empty. + - If a '?' in referer is identified as a search engine source. + - If the referer is equal to the current URL (assumed to be a + malicious bot). """ - # '?' in referer is identified as search engine source + full_url = "%s://%s/%s" % (request.scheme, domain, uri.lstrip('/')) if (not referer or - (not self.is_internal_request(domain, referer) and '?' in referer)): + (not self.is_internal_request(domain, referer) and '?' in referer) or + (referer == uri or referer == full_url)): return True + return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS) diff --git a/docs/howto/error-reporting.txt b/docs/howto/error-reporting.txt index be5634e48b..d27f734398 100644 --- a/docs/howto/error-reporting.txt +++ b/docs/howto/error-reporting.txt @@ -61,8 +61,15 @@ not found" errors). Django sends emails about 404 errors when: If those conditions are met, Django will email the users listed in the :setting:`MANAGERS` setting whenever your code raises a 404 and the request has -a referer. (It doesn't bother to email for 404s that don't have a referer -- -those are usually just people typing in broken URLs or broken Web 'bots). +a referer. It doesn't bother to email for 404s that don't have a referer -- +those are usually just people typing in broken URLs or broken Web bots. It also +ignores 404s when the referer is equal to the requested URL, since this +behavior is from broken Web bots too. + +.. versionchanged:: 1.9 + + In older versions, 404s were not ignored when the referer was equal to the + requested URL. .. note:: diff --git a/docs/releases/1.9.txt b/docs/releases/1.9.txt index 54323c09f7..6460b8a7bc 100644 --- a/docs/releases/1.9.txt +++ b/docs/releases/1.9.txt @@ -551,6 +551,11 @@ Requests and Responses :class:`~django.http.JsonResponse` to allow passing keyword arguments to the ``json.dumps()`` call used to generate the response. +* The :class:`~django.middleware.common.BrokenLinkEmailsMiddleware` now + ignores 404s when the referer is equal to the requested URL. To circumvent + the empty referer check already implemented, some Web bots set the referer to + the requested URL. + Tests ^^^^^ diff --git a/tests/middleware/tests.py b/tests/middleware/tests.py index 21f45419c0..6e1bbaf772 100644 --- a/tests/middleware/tests.py +++ b/tests/middleware/tests.py @@ -375,6 +375,24 @@ class BrokenLinkEmailsMiddlewareTest(SimpleTestCase): SubclassedMiddleware().process_response(self.req, self.resp) self.assertEqual(len(mail.outbox), 1) + def test_referer_equal_to_requested_url(self): + """ + Some bots set the referer to the current URL to avoid being blocked by + an referer check (#25302). + """ + self.req.META['HTTP_REFERER'] = self.req.path + BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) + self.assertEqual(len(mail.outbox), 0) + # URL with scheme and domain should also be ignored + self.req.META['HTTP_REFERER'] = 'http://testserver%s' % self.req.path + BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) + self.assertEqual(len(mail.outbox), 0) + + def test_referer_equal_to_requested_url_on_another_domain(self): + self.req.META['HTTP_REFERER'] = 'http://anotherserver%s' % self.req.path + BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) + self.assertEqual(len(mail.outbox), 1) + @override_settings(ROOT_URLCONF='middleware.cond_get_urls') class ConditionalGetMiddlewareTest(SimpleTestCase):