From 50a985b09b439a0d52aad8694d377a3483cb02e1 Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Tue, 1 Jan 2013 22:28:48 +0100 Subject: [PATCH] Fixed #19099 -- Split broken link emails out of common middleware. --- django/conf/global_settings.py | 4 +- django/middleware/common.py | 76 ++++++++++++++--------- docs/howto/error-reporting.txt | 19 +++--- docs/internals/deprecation.txt | 7 +++ docs/ref/middleware.txt | 8 ++- docs/ref/settings.txt | 13 +++- docs/releases/1.6.txt | 18 ++++++ tests/regressiontests/middleware/tests.py | 61 ++++++++++++++---- 8 files changed, 146 insertions(+), 60 deletions(-) diff --git a/django/conf/global_settings.py b/django/conf/global_settings.py index 4d69c6365f..740c792dcf 100644 --- a/django/conf/global_settings.py +++ b/django/conf/global_settings.py @@ -146,7 +146,7 @@ FILE_CHARSET = 'utf-8' # Email address that error messages come from. SERVER_EMAIL = 'root@localhost' -# Whether to send broken-link emails. +# Whether to send broken-link emails. Deprecated, must be removed in 1.8. SEND_BROKEN_LINK_EMAILS = False # Database connection info. If left empty, will default to the dummy backend. @@ -245,7 +245,7 @@ ALLOWED_INCLUDE_ROOTS = () ADMIN_FOR = () # List of compiled regular expression objects representing URLs that need not -# be reported when SEND_BROKEN_LINK_EMAILS is True. Here are a few examples: +# be reported by BrokenLinkEmailsMiddleware. Here are a few examples: # import re # IGNORABLE_404_URLS = ( # re.compile(r'^/apple-touch-icon.*\.png$'), diff --git a/django/middleware/common.py b/django/middleware/common.py index c6e71e0d48..92f8cb3992 100644 --- a/django/middleware/common.py +++ b/django/middleware/common.py @@ -1,13 +1,14 @@ import hashlib import logging import re +import warnings from django.conf import settings -from django import http from django.core.mail import mail_managers +from django.core import urlresolvers +from django import http from django.utils.http import urlquote from django.utils import six -from django.core import urlresolvers logger = logging.getLogger('django.request') @@ -102,25 +103,15 @@ class CommonMiddleware(object): return http.HttpResponsePermanentRedirect(newurl) def process_response(self, request, response): - "Send broken link emails and calculate the Etag, if needed." - if response.status_code == 404: - if settings.SEND_BROKEN_LINK_EMAILS and not settings.DEBUG: - # If the referrer was from an internal link or a non-search-engine site, - # send a note to the managers. - domain = request.get_host() - referer = request.META.get('HTTP_REFERER', None) - is_internal = _is_internal_request(domain, referer) - path = request.get_full_path() - if referer and not _is_ignorable_404(path) and (is_internal or '?' not in referer): - ua = request.META.get('HTTP_USER_AGENT', '') - ip = request.META.get('REMOTE_ADDR', '') - mail_managers("Broken %slink on %s" % ((is_internal and 'INTERNAL ' or ''), domain), - "Referrer: %s\nRequested URL: %s\nUser agent: %s\nIP address: %s\n" \ - % (referer, request.get_full_path(), ua, ip), - fail_silently=True) - return response + """ + Calculate the ETag, if needed. + """ + if settings.SEND_BROKEN_LINK_EMAILS: + warnings.warn("SEND_BROKEN_LINK_EMAILS is deprecated. " + "Use BrokenLinkEmailsMiddleware instead.", + PendingDeprecationWarning, stacklevel=2) + BrokenLinkEmailsMiddleware().process_response(request, response) - # Use ETags, if requested. if settings.USE_ETAGS: if response.has_header('ETag'): etag = response['ETag'] @@ -139,15 +130,38 @@ class CommonMiddleware(object): return response -def _is_ignorable_404(uri): - """ - Returns True if a 404 at the given URL *shouldn't* notify the site managers. - """ - return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS) -def _is_internal_request(domain, referer): - """ - Returns true if the referring URL is the same domain as the current request. - """ - # Different subdomains are treated as different domains. - return referer is not None and re.match("^https?://%s/" % re.escape(domain), referer) +class BrokenLinkEmailsMiddleware(object): + + def process_response(self, request, response): + """ + Send broken link emails for relevant 404 NOT FOUND responses. + """ + if response.status_code == 404 and not settings.DEBUG: + domain = request.get_host() + path = request.get_full_path() + referer = request.META.get('HTTP_REFERER', '') + is_internal = self.is_internal_request(domain, referer) + is_not_search_engine = '?' not in referer + is_ignorable = self.is_ignorable_404(path) + if referer and (is_internal or is_not_search_engine) and not is_ignorable: + ua = request.META.get('HTTP_USER_AGENT', '') + ip = request.META.get('REMOTE_ADDR', '') + mail_managers( + "Broken %slink on %s" % (('INTERNAL ' if is_internal else ''), domain), + "Referrer: %s\nRequested URL: %s\nUser agent: %s\nIP address: %s\n" % (referer, path, ua, ip), + fail_silently=True) + return response + + def is_internal_request(self, domain, referer): + """ + Returns True if the referring URL is the same domain as the current request. + """ + # Different subdomains are treated as different domains. + return re.match("^https?://%s/" % re.escape(domain), referer) + + def is_ignorable_404(self, uri): + """ + Returns True if a 404 at the given URL *shouldn't* notify the site managers. + """ + return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS) diff --git a/docs/howto/error-reporting.txt b/docs/howto/error-reporting.txt index 742b81b7e2..7f3c68c136 100644 --- a/docs/howto/error-reporting.txt +++ b/docs/howto/error-reporting.txt @@ -54,18 +54,24 @@ setting. Django can also be configured to email errors about broken links (404 "page not found" errors). Django sends emails about 404 errors when: -* :setting:`DEBUG` is ``False`` +* :setting:`DEBUG` is ``False``; -* :setting:`SEND_BROKEN_LINK_EMAILS` is ``True`` - -* Your :setting:`MIDDLEWARE_CLASSES` setting includes ``CommonMiddleware`` - (which it does by default). +* Your :setting:`MIDDLEWARE_CLASSES` setting includes + :class:`django.middleware.common.BrokenLinkEmailsMiddleware`. If those conditions are met, Django will email the users listed in the :setting:`MANAGERS` setting whenever your code raises a 404 and the request has a referer. (It doesn't bother to email for 404s that don't have a referer -- those are usually just people typing in broken URLs or broken Web 'bots). +.. note:: + + :class:`~django.middleware.common.BrokenLinkEmailsMiddleware` must appear + before other middleware that intercepts 404 errors, such as + :class:`~django.middleware.locale.LocaleMiddleware` or + :class:`~django.contrib.flatpages.middleware.FlatpageFallbackMiddleware`. + Put it towards the top of your :setting:`MIDDLEWARE_CLASSES` setting. + You can tell Django to stop reporting particular 404s by tweaking the :setting:`IGNORABLE_404_URLS` setting. It should be a tuple of compiled regular expression objects. For example:: @@ -92,9 +98,6 @@ crawlers often request:: (Note that these are regular expressions, so we put a backslash in front of periods to escape them.) -The best way to disable this behavior is to set -:setting:`SEND_BROKEN_LINK_EMAILS` to ``False``. - .. seealso:: 404 errors are logged using the logging framework. By default, these log diff --git a/docs/internals/deprecation.txt b/docs/internals/deprecation.txt index faa6d1ff02..63d65d1e4a 100644 --- a/docs/internals/deprecation.txt +++ b/docs/internals/deprecation.txt @@ -308,6 +308,13 @@ these changes. * The ``depth`` keyword argument will be removed from :meth:`~django.db.models.query.QuerySet.select_related`. +1.8 +--- + +* The ``SEND_BROKEN_LINK_EMAILS`` setting will be removed. Add the + :class:`django.middleware.common.BrokenLinkEmailsMiddleware` middleware to + your :setting:`MIDDLEWARE_CLASSES` setting instead. + 2.0 --- diff --git a/docs/ref/middleware.txt b/docs/ref/middleware.txt index 2b053d80ab..1e6e57f720 100644 --- a/docs/ref/middleware.txt +++ b/docs/ref/middleware.txt @@ -61,14 +61,16 @@ Adds a few conveniences for perfectionists: indexer would treat them as separate URLs -- so it's best practice to normalize URLs. -* Sends broken link notification emails to :setting:`MANAGERS` if - :setting:`SEND_BROKEN_LINK_EMAILS` is set to ``True``. - * Handles ETags based on the :setting:`USE_ETAGS` setting. If :setting:`USE_ETAGS` is set to ``True``, Django will calculate an ETag for each request by MD5-hashing the page content, and it'll take care of sending ``Not Modified`` responses, if appropriate. +.. class:: BrokenLinkEmailsMiddleware + +* Sends broken link notification emails to :setting:`MANAGERS` (see + :doc:`/howto/error-reporting`). + View metadata middleware ------------------------ diff --git a/docs/ref/settings.txt b/docs/ref/settings.txt index 110d5dbdc9..d057323c06 100644 --- a/docs/ref/settings.txt +++ b/docs/ref/settings.txt @@ -1090,8 +1090,9 @@ query string, if any). Use this if your site does not provide a commonly requested file such as ``favicon.ico`` or ``robots.txt``, or if it gets hammered by script kiddies. -This is only used if :setting:`SEND_BROKEN_LINK_EMAILS` is set to ``True`` and -``CommonMiddleware`` is installed (see :doc:`/topics/http/middleware`). +This is only used if +:class:`~django.middleware.common.BrokenLinkEmailsMiddleware` is enabled (see +:doc:`/topics/http/middleware`). .. setting:: INSTALLED_APPS @@ -1250,7 +1251,8 @@ MANAGERS Default: ``()`` (Empty tuple) A tuple in the same format as :setting:`ADMINS` that specifies who should get -broken-link notifications when :setting:`SEND_BROKEN_LINK_EMAILS` is ``True``. +broken link notifications when +:class:`~django.middleware.common.BrokenLinkEmailsMiddleware` is enabled. .. setting:: MEDIA_ROOT @@ -1448,6 +1450,11 @@ available in ``request.META``.) SEND_BROKEN_LINK_EMAILS ----------------------- +.. deprecated:: 1.6 + Since :class:`~django.middleware.common.BrokenLinkEmailsMiddleware` + was split from :class:`~django.middleware.common.CommonMiddleware`, + this setting no longer serves a purpose. + Default: ``False`` Whether to send an email to the :setting:`MANAGERS` each time somebody visits diff --git a/docs/releases/1.6.txt b/docs/releases/1.6.txt index e425036839..dcf6f2604a 100644 --- a/docs/releases/1.6.txt +++ b/docs/releases/1.6.txt @@ -46,3 +46,21 @@ Backwards incompatible changes in 1.6 Features deprecated in 1.6 ========================== + +``SEND_BROKEN_LINK_EMAILS`` setting +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:class:`~django.middleware.common.CommonMiddleware` used to provide basic +reporting of broken links by email when ``SEND_BROKEN_LINK_EMAILS`` is set to +``True``. + +Because of intractable ordering problems between +:class:`~django.middleware.common.CommonMiddleware` and +:class:`~django.middleware.locale.LocaleMiddleware`, this feature was split +out into a new middleware: +:class:`~django.middleware.common.BrokenLinkEmailsMiddleware`. + +If you're relying on this feature, you should add +``'django.middleware.common.BrokenLinkEmailsMiddleware'`` to your +:setting:`MIDDLEWARE_CLASSES` setting and remove ``SEND_BROKEN_LINK_EMAILS`` +from your settings. diff --git a/tests/regressiontests/middleware/tests.py b/tests/regressiontests/middleware/tests.py index e3d8350da6..c6d42a6964 100644 --- a/tests/regressiontests/middleware/tests.py +++ b/tests/regressiontests/middleware/tests.py @@ -1,16 +1,17 @@ # -*- coding: utf-8 -*- import gzip -import re -import random from io import BytesIO +import random +import re +import warnings from django.conf import settings from django.core import mail from django.http import HttpRequest from django.http import HttpResponse, StreamingHttpResponse from django.middleware.clickjacking import XFrameOptionsMiddleware -from django.middleware.common import CommonMiddleware +from django.middleware.common import CommonMiddleware, BrokenLinkEmailsMiddleware from django.middleware.http import ConditionalGetMiddleware from django.middleware.gzip import GZipMiddleware from django.test import TestCase, RequestFactory @@ -232,33 +233,39 @@ class CommonMiddlewareTest(TestCase): self.assertEqual(r['Location'], 'http://www.testserver/middleware/customurlconf/slash/') - # Tests for the 404 error reporting via email + # Legacy tests for the 404 error reporting via email (to be removed in 1.8) @override_settings(IGNORABLE_404_URLS=(re.compile(r'foo'),), - SEND_BROKEN_LINK_EMAILS = True) + SEND_BROKEN_LINK_EMAILS=True) def test_404_error_reporting(self): request = self._get_request('regular_url/that/does/not/exist') request.META['HTTP_REFERER'] = '/another/url/' - response = self.client.get(request.path) - CommonMiddleware().process_response(request, response) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", PendingDeprecationWarning) + response = self.client.get(request.path) + CommonMiddleware().process_response(request, response) self.assertEqual(len(mail.outbox), 1) self.assertIn('Broken', mail.outbox[0].subject) @override_settings(IGNORABLE_404_URLS=(re.compile(r'foo'),), - SEND_BROKEN_LINK_EMAILS = True) + SEND_BROKEN_LINK_EMAILS=True) def test_404_error_reporting_no_referer(self): request = self._get_request('regular_url/that/does/not/exist') - response = self.client.get(request.path) - CommonMiddleware().process_response(request, response) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", PendingDeprecationWarning) + response = self.client.get(request.path) + CommonMiddleware().process_response(request, response) self.assertEqual(len(mail.outbox), 0) @override_settings(IGNORABLE_404_URLS=(re.compile(r'foo'),), - SEND_BROKEN_LINK_EMAILS = True) + SEND_BROKEN_LINK_EMAILS=True) def test_404_error_reporting_ignored_url(self): request = self._get_request('foo_url/that/does/not/exist/either') request.META['HTTP_REFERER'] = '/another/url/' - response = self.client.get(request.path) - CommonMiddleware().process_response(request, response) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", PendingDeprecationWarning) + response = self.client.get(request.path) + CommonMiddleware().process_response(request, response) self.assertEqual(len(mail.outbox), 0) # Other tests @@ -271,6 +278,34 @@ class CommonMiddlewareTest(TestCase): self.assertEqual(response.status_code, 301) +@override_settings(IGNORABLE_404_URLS=(re.compile(r'foo'),)) +class BrokenLinkEmailsMiddlewareTest(TestCase): + + def setUp(self): + self.req = HttpRequest() + self.req.META = { + 'SERVER_NAME': 'testserver', + 'SERVER_PORT': 80, + } + self.req.path = self.req.path_info = 'regular_url/that/does/not/exist' + self.resp = self.client.get(self.req.path) + + def test_404_error_reporting(self): + self.req.META['HTTP_REFERER'] = '/another/url/' + BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) + self.assertEqual(len(mail.outbox), 1) + self.assertIn('Broken', mail.outbox[0].subject) + + def test_404_error_reporting_no_referer(self): + BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) + self.assertEqual(len(mail.outbox), 0) + + def test_404_error_reporting_ignored_url(self): + self.req.path = self.req.path_info = 'foo_url/that/does/not/exist' + BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) + self.assertEqual(len(mail.outbox), 0) + + class ConditionalGetMiddlewareTest(TestCase): urls = 'regressiontests.middleware.cond_get_urls' def setUp(self):