Fixed #19099 -- Split broken link emails out of common middleware.

This commit is contained in:
Aymeric Augustin 2013-01-01 22:28:48 +01:00
parent 83d0cc5214
commit 50a985b09b
8 changed files with 146 additions and 60 deletions

View File

@ -146,7 +146,7 @@ FILE_CHARSET = 'utf-8'
# Email address that error messages come from.
SERVER_EMAIL = 'root@localhost'
# Whether to send broken-link emails.
# Whether to send broken-link emails. Deprecated, must be removed in 1.8.
SEND_BROKEN_LINK_EMAILS = False
# Database connection info. If left empty, will default to the dummy backend.
@ -245,7 +245,7 @@ ALLOWED_INCLUDE_ROOTS = ()
ADMIN_FOR = ()
# List of compiled regular expression objects representing URLs that need not
# be reported when SEND_BROKEN_LINK_EMAILS is True. Here are a few examples:
# be reported by BrokenLinkEmailsMiddleware. Here are a few examples:
# import re
# IGNORABLE_404_URLS = (
# re.compile(r'^/apple-touch-icon.*\.png$'),

View File

@ -1,13 +1,14 @@
import hashlib
import logging
import re
import warnings
from django.conf import settings
from django import http
from django.core.mail import mail_managers
from django.core import urlresolvers
from django import http
from django.utils.http import urlquote
from django.utils import six
from django.core import urlresolvers
logger = logging.getLogger('django.request')
@ -102,25 +103,15 @@ class CommonMiddleware(object):
return http.HttpResponsePermanentRedirect(newurl)
def process_response(self, request, response):
"Send broken link emails and calculate the Etag, if needed."
if response.status_code == 404:
if settings.SEND_BROKEN_LINK_EMAILS and not settings.DEBUG:
# If the referrer was from an internal link or a non-search-engine site,
# send a note to the managers.
domain = request.get_host()
referer = request.META.get('HTTP_REFERER', None)
is_internal = _is_internal_request(domain, referer)
path = request.get_full_path()
if referer and not _is_ignorable_404(path) and (is_internal or '?' not in referer):
ua = request.META.get('HTTP_USER_AGENT', '<none>')
ip = request.META.get('REMOTE_ADDR', '<none>')
mail_managers("Broken %slink on %s" % ((is_internal and 'INTERNAL ' or ''), domain),
"Referrer: %s\nRequested URL: %s\nUser agent: %s\nIP address: %s\n" \
% (referer, request.get_full_path(), ua, ip),
fail_silently=True)
return response
"""
Calculate the ETag, if needed.
"""
if settings.SEND_BROKEN_LINK_EMAILS:
warnings.warn("SEND_BROKEN_LINK_EMAILS is deprecated. "
"Use BrokenLinkEmailsMiddleware instead.",
PendingDeprecationWarning, stacklevel=2)
BrokenLinkEmailsMiddleware().process_response(request, response)
# Use ETags, if requested.
if settings.USE_ETAGS:
if response.has_header('ETag'):
etag = response['ETag']
@ -139,15 +130,38 @@ class CommonMiddleware(object):
return response
def _is_ignorable_404(uri):
"""
Returns True if a 404 at the given URL *shouldn't* notify the site managers.
"""
return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)
def _is_internal_request(domain, referer):
"""
Returns true if the referring URL is the same domain as the current request.
"""
# Different subdomains are treated as different domains.
return referer is not None and re.match("^https?://%s/" % re.escape(domain), referer)
class BrokenLinkEmailsMiddleware(object):
def process_response(self, request, response):
"""
Send broken link emails for relevant 404 NOT FOUND responses.
"""
if response.status_code == 404 and not settings.DEBUG:
domain = request.get_host()
path = request.get_full_path()
referer = request.META.get('HTTP_REFERER', '')
is_internal = self.is_internal_request(domain, referer)
is_not_search_engine = '?' not in referer
is_ignorable = self.is_ignorable_404(path)
if referer and (is_internal or is_not_search_engine) and not is_ignorable:
ua = request.META.get('HTTP_USER_AGENT', '<none>')
ip = request.META.get('REMOTE_ADDR', '<none>')
mail_managers(
"Broken %slink on %s" % (('INTERNAL ' if is_internal else ''), domain),
"Referrer: %s\nRequested URL: %s\nUser agent: %s\nIP address: %s\n" % (referer, path, ua, ip),
fail_silently=True)
return response
def is_internal_request(self, domain, referer):
"""
Returns True if the referring URL is the same domain as the current request.
"""
# Different subdomains are treated as different domains.
return re.match("^https?://%s/" % re.escape(domain), referer)
def is_ignorable_404(self, uri):
"""
Returns True if a 404 at the given URL *shouldn't* notify the site managers.
"""
return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)

View File

@ -54,18 +54,24 @@ setting.
Django can also be configured to email errors about broken links (404 "page
not found" errors). Django sends emails about 404 errors when:
* :setting:`DEBUG` is ``False``
* :setting:`DEBUG` is ``False``;
* :setting:`SEND_BROKEN_LINK_EMAILS` is ``True``
* Your :setting:`MIDDLEWARE_CLASSES` setting includes ``CommonMiddleware``
(which it does by default).
* Your :setting:`MIDDLEWARE_CLASSES` setting includes
:class:`django.middleware.common.BrokenLinkEmailsMiddleware`.
If those conditions are met, Django will email the users listed in the
:setting:`MANAGERS` setting whenever your code raises a 404 and the request has
a referer. (It doesn't bother to email for 404s that don't have a referer --
those are usually just people typing in broken URLs or broken Web 'bots).
.. note::
:class:`~django.middleware.common.BrokenLinkEmailsMiddleware` must appear
before other middleware that intercepts 404 errors, such as
:class:`~django.middleware.locale.LocaleMiddleware` or
:class:`~django.contrib.flatpages.middleware.FlatpageFallbackMiddleware`.
Put it towards the top of your :setting:`MIDDLEWARE_CLASSES` setting.
You can tell Django to stop reporting particular 404s by tweaking the
:setting:`IGNORABLE_404_URLS` setting. It should be a tuple of compiled
regular expression objects. For example::
@ -92,9 +98,6 @@ crawlers often request::
(Note that these are regular expressions, so we put a backslash in front of
periods to escape them.)
The best way to disable this behavior is to set
:setting:`SEND_BROKEN_LINK_EMAILS` to ``False``.
.. seealso::
404 errors are logged using the logging framework. By default, these log

View File

@ -308,6 +308,13 @@ these changes.
* The ``depth`` keyword argument will be removed from
:meth:`~django.db.models.query.QuerySet.select_related`.
1.8
---
* The ``SEND_BROKEN_LINK_EMAILS`` setting will be removed. Add the
:class:`django.middleware.common.BrokenLinkEmailsMiddleware` middleware to
your :setting:`MIDDLEWARE_CLASSES` setting instead.
2.0
---

View File

@ -61,14 +61,16 @@ Adds a few conveniences for perfectionists:
indexer would treat them as separate URLs -- so it's best practice to
normalize URLs.
* Sends broken link notification emails to :setting:`MANAGERS` if
:setting:`SEND_BROKEN_LINK_EMAILS` is set to ``True``.
* Handles ETags based on the :setting:`USE_ETAGS` setting. If
:setting:`USE_ETAGS` is set to ``True``, Django will calculate an ETag
for each request by MD5-hashing the page content, and it'll take care of
sending ``Not Modified`` responses, if appropriate.
.. class:: BrokenLinkEmailsMiddleware
* Sends broken link notification emails to :setting:`MANAGERS` (see
:doc:`/howto/error-reporting`).
View metadata middleware
------------------------

View File

@ -1090,8 +1090,9 @@ query string, if any). Use this if your site does not provide a commonly
requested file such as ``favicon.ico`` or ``robots.txt``, or if it gets
hammered by script kiddies.
This is only used if :setting:`SEND_BROKEN_LINK_EMAILS` is set to ``True`` and
``CommonMiddleware`` is installed (see :doc:`/topics/http/middleware`).
This is only used if
:class:`~django.middleware.common.BrokenLinkEmailsMiddleware` is enabled (see
:doc:`/topics/http/middleware`).
.. setting:: INSTALLED_APPS
@ -1250,7 +1251,8 @@ MANAGERS
Default: ``()`` (Empty tuple)
A tuple in the same format as :setting:`ADMINS` that specifies who should get
broken-link notifications when :setting:`SEND_BROKEN_LINK_EMAILS` is ``True``.
broken link notifications when
:class:`~django.middleware.common.BrokenLinkEmailsMiddleware` is enabled.
.. setting:: MEDIA_ROOT
@ -1448,6 +1450,11 @@ available in ``request.META``.)
SEND_BROKEN_LINK_EMAILS
-----------------------
.. deprecated:: 1.6
Since :class:`~django.middleware.common.BrokenLinkEmailsMiddleware`
was split from :class:`~django.middleware.common.CommonMiddleware`,
this setting no longer serves a purpose.
Default: ``False``
Whether to send an email to the :setting:`MANAGERS` each time somebody visits

View File

@ -46,3 +46,21 @@ Backwards incompatible changes in 1.6
Features deprecated in 1.6
==========================
``SEND_BROKEN_LINK_EMAILS`` setting
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
:class:`~django.middleware.common.CommonMiddleware` used to provide basic
reporting of broken links by email when ``SEND_BROKEN_LINK_EMAILS`` is set to
``True``.
Because of intractable ordering problems between
:class:`~django.middleware.common.CommonMiddleware` and
:class:`~django.middleware.locale.LocaleMiddleware`, this feature was split
out into a new middleware:
:class:`~django.middleware.common.BrokenLinkEmailsMiddleware`.
If you're relying on this feature, you should add
``'django.middleware.common.BrokenLinkEmailsMiddleware'`` to your
:setting:`MIDDLEWARE_CLASSES` setting and remove ``SEND_BROKEN_LINK_EMAILS``
from your settings.

View File

@ -1,16 +1,17 @@
# -*- coding: utf-8 -*-
import gzip
import re
import random
from io import BytesIO
import random
import re
import warnings
from django.conf import settings
from django.core import mail
from django.http import HttpRequest
from django.http import HttpResponse, StreamingHttpResponse
from django.middleware.clickjacking import XFrameOptionsMiddleware
from django.middleware.common import CommonMiddleware
from django.middleware.common import CommonMiddleware, BrokenLinkEmailsMiddleware
from django.middleware.http import ConditionalGetMiddleware
from django.middleware.gzip import GZipMiddleware
from django.test import TestCase, RequestFactory
@ -232,33 +233,39 @@ class CommonMiddlewareTest(TestCase):
self.assertEqual(r['Location'],
'http://www.testserver/middleware/customurlconf/slash/')
# Tests for the 404 error reporting via email
# Legacy tests for the 404 error reporting via email (to be removed in 1.8)
@override_settings(IGNORABLE_404_URLS=(re.compile(r'foo'),),
SEND_BROKEN_LINK_EMAILS = True)
SEND_BROKEN_LINK_EMAILS=True)
def test_404_error_reporting(self):
request = self._get_request('regular_url/that/does/not/exist')
request.META['HTTP_REFERER'] = '/another/url/'
response = self.client.get(request.path)
CommonMiddleware().process_response(request, response)
with warnings.catch_warnings():
warnings.simplefilter("ignore", PendingDeprecationWarning)
response = self.client.get(request.path)
CommonMiddleware().process_response(request, response)
self.assertEqual(len(mail.outbox), 1)
self.assertIn('Broken', mail.outbox[0].subject)
@override_settings(IGNORABLE_404_URLS=(re.compile(r'foo'),),
SEND_BROKEN_LINK_EMAILS = True)
SEND_BROKEN_LINK_EMAILS=True)
def test_404_error_reporting_no_referer(self):
request = self._get_request('regular_url/that/does/not/exist')
response = self.client.get(request.path)
CommonMiddleware().process_response(request, response)
with warnings.catch_warnings():
warnings.simplefilter("ignore", PendingDeprecationWarning)
response = self.client.get(request.path)
CommonMiddleware().process_response(request, response)
self.assertEqual(len(mail.outbox), 0)
@override_settings(IGNORABLE_404_URLS=(re.compile(r'foo'),),
SEND_BROKEN_LINK_EMAILS = True)
SEND_BROKEN_LINK_EMAILS=True)
def test_404_error_reporting_ignored_url(self):
request = self._get_request('foo_url/that/does/not/exist/either')
request.META['HTTP_REFERER'] = '/another/url/'
response = self.client.get(request.path)
CommonMiddleware().process_response(request, response)
with warnings.catch_warnings():
warnings.simplefilter("ignore", PendingDeprecationWarning)
response = self.client.get(request.path)
CommonMiddleware().process_response(request, response)
self.assertEqual(len(mail.outbox), 0)
# Other tests
@ -271,6 +278,34 @@ class CommonMiddlewareTest(TestCase):
self.assertEqual(response.status_code, 301)
@override_settings(IGNORABLE_404_URLS=(re.compile(r'foo'),))
class BrokenLinkEmailsMiddlewareTest(TestCase):
def setUp(self):
self.req = HttpRequest()
self.req.META = {
'SERVER_NAME': 'testserver',
'SERVER_PORT': 80,
}
self.req.path = self.req.path_info = 'regular_url/that/does/not/exist'
self.resp = self.client.get(self.req.path)
def test_404_error_reporting(self):
self.req.META['HTTP_REFERER'] = '/another/url/'
BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
self.assertEqual(len(mail.outbox), 1)
self.assertIn('Broken', mail.outbox[0].subject)
def test_404_error_reporting_no_referer(self):
BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
self.assertEqual(len(mail.outbox), 0)
def test_404_error_reporting_ignored_url(self):
self.req.path = self.req.path_info = 'foo_url/that/does/not/exist'
BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
self.assertEqual(len(mail.outbox), 0)
class ConditionalGetMiddlewareTest(TestCase):
urls = 'regressiontests.middleware.cond_get_urls'
def setUp(self):