Fixed #15954 - New IGNORABLE_404_URLS setting that allows more powerful filtering of 404s to ignore

Thanks to aaugustin for implementing this.

(Technically this doesn't fix the original report, as we've decided against
having *any* default values, but the new feature makes it possible, and the
docs have an example addressing #15954).

git-svn-id: http://code.djangoproject.com/svn/django/trunk@16160 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Luke Plant 2011-05-05 20:49:26 +00:00
parent db5807bdb1
commit 171df93170
7 changed files with 165 additions and 34 deletions

View File

@ -246,9 +246,17 @@ ALLOWED_INCLUDE_ROOTS = ()
# is an admin.
ADMIN_FOR = ()
# 404s that may be ignored.
IGNORABLE_404_STARTS = ('/cgi-bin/', '/_vti_bin', '/_vti_inf')
IGNORABLE_404_ENDS = ('mail.pl', 'mailform.pl', 'mail.cgi', 'mailform.cgi', 'favicon.ico', '.php')
# List of compiled regular expression objects representing URLs that need not
# be reported when SEND_BROKEN_LINK_EMAILS is True. Here are a few examples:
# import re
# IGNORABLE_404_URLS = (
# re.compile(r'^/apple-touch-icon.*\.png$'),
# re.compile(r'^/favicon.ico$),
# re.compile(r'^/robots.txt$),
# re.compile(r'^/phpmyadmin/),
# re.compile(r'\.(cgi|php|pl)$'),
# )
IGNORABLE_404_URLS = ()
# A secret key for this particular Django installation. Used in secret-key
# hashing algorithms. Set this in your settings, or Django will complain

View File

@ -127,13 +127,23 @@ def _is_ignorable_404(uri):
"""
Returns True if a 404 at the given URL *shouldn't* notify the site managers.
"""
for start in settings.IGNORABLE_404_STARTS:
if uri.startswith(start):
return True
for end in settings.IGNORABLE_404_ENDS:
if uri.endswith(end):
return True
return False
if getattr(settings, 'IGNORABLE_404_STARTS', ()):
import warnings
warnings.warn('The IGNORABLE_404_STARTS setting has been deprecated '
'in favour of IGNORABLE_404_URLS.',
PendingDeprecationWarning)
for start in settings.IGNORABLE_404_STARTS:
if uri.startswith(start):
return True
if getattr(settings, 'IGNORABLE_404_ENDS', ()):
import warnings
warnings.warn('The IGNORABLE_404_ENDS setting has been deprecated '
'in favour of IGNORABLE_404_URLS.',
PendingDeprecationWarning)
for end in settings.IGNORABLE_404_ENDS:
if uri.endswith(end):
return True
return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)
def _is_internal_request(domain, referer):
"""

View File

@ -66,15 +66,29 @@ a referer. (It doesn't bother to email for 404s that don't have a referer --
those are usually just people typing in broken URLs or broken Web 'bots).
You can tell Django to stop reporting particular 404s by tweaking the
:setting:`IGNORABLE_404_ENDS` and :setting:`IGNORABLE_404_STARTS` settings. Both
should be a tuple of strings. For example::
:setting:`IGNORABLE_404_URLS` setting. It should be a tuple of compiled
regular expression objects. For example::
IGNORABLE_404_ENDS = ('.php', '.cgi')
IGNORABLE_404_STARTS = ('/phpmyadmin/',)
import re
IGNORABLE_404_URLS = (
re.compile(r'\.(php|cgi)$'),
re.compile(r'^/phpmyadmin/'),
)
In this example, a 404 to any URL ending with ``.php`` or ``.cgi`` will *not* be
reported. Neither will any URL starting with ``/phpmyadmin/``.
The following example shows how to exclude some conventional URLs that browsers and
crawlers often request::
import re
IGNORABLE_404_URLS = (
re.compile(r'^/apple-touch-icon.*\.png$'),
re.compile(r'^/favicon.ico$),
re.compile(r'^/robots.txt$),
)
The best way to disable this behavior is to set
:setting:`SEND_BROKEN_LINK_EMAILS` to ``False``.
@ -93,3 +107,10 @@ The best way to disable this behavior is to set
records are ignored, but you can use them for error reporting by writing a
handler and :doc:`configuring logging </topics/logging>` appropriately.
.. seealso::
.. versionchanged:: 1.4
Previously, two settings were used to control which URLs not to report:
:setting:`IGNORABLE_404_STARTS` and :setting:`IGNORABLE_404_ENDS`. They
were replaced by :setting:`IGNORABLE_404_URLS`.

View File

@ -199,6 +199,10 @@ their deprecation, as per the :ref:`Django deprecation policy
ISO 3166 code for United Kingdom). They have been depreacted since the
1.4 release.
* The :setting:`IGNORABLE_404_STARTS` and :setting:`IGNORABLE_404_ENDS`
settings have been superseded by :setting:`IGNORABLE_404_URLS` in
the 1.4 release. They will be removed.
* 2.0
* ``django.views.defaults.shortcut()``. This function has been moved
to ``django.contrib.contenttypes.views.shortcut()`` as part of the

View File

@ -1020,25 +1020,23 @@ Available formats are ``DATE_FORMAT``, ``TIME_FORMAT``, ``DATETIME_FORMAT``,
``SHORT_DATETIME_FORMAT``, ``FIRST_DAY_OF_WEEK``, ``DECIMAL_SEPARATOR``,
``THOUSAND_SEPARATOR`` and ``NUMBER_GROUPING``.
.. setting:: IGNORABLE_404_ENDS
.. setting:: IGNORABLE_404_URLS
IGNORABLE_404_ENDS
IGNORABLE_404_URLS
------------------
Default: ``('mail.pl', 'mailform.pl', 'mail.cgi', 'mailform.cgi', 'favicon.ico', '.php')``
.. versionadded:: 1.4
See also ``IGNORABLE_404_STARTS`` and ``Error reporting via email``.
Default: ``()``
.. setting:: IGNORABLE_404_STARTS
List of compiled regular expression objects describing URLs that should be
ignored when reporting HTTP 404 errors via email (see
:doc:`/howto/error-reporting`). Use this if your site does not provide a
commonly requested file such as ``favicon.ico`` or ``robots.txt``, or if it
gets hammered by script kiddies.
IGNORABLE_404_STARTS
--------------------
Default: ``('/cgi-bin/', '/_vti_bin', '/_vti_inf')``
A tuple of strings that specify beginnings of URLs that should be ignored by
the 404 emailer. See ``SEND_BROKEN_LINK_EMAILS``, ``IGNORABLE_404_ENDS`` and
the :doc:`/howto/error-reporting`.
This is only used if :setting:`SEND_BROKEN_LINK_EMAILS` is set to ``True`` and
``CommonMiddleware`` is installed (see :doc:`/topics/http/middleware`).
.. setting:: INSTALLED_APPS
@ -1435,8 +1433,8 @@ Default: ``False``
Whether to send an email to the ``MANAGERS`` each time somebody visits a
Django-powered page that is 404ed with a non-empty referer (i.e., a broken
link). This is only used if ``CommonMiddleware`` is installed (see
:doc:`/topics/http/middleware`. See also ``IGNORABLE_404_STARTS``,
``IGNORABLE_404_ENDS`` and :doc:`/howto/error-reporting`.
:doc:`/topics/http/middleware`). See also ``IGNORABLE_404_URLS`` and
:doc:`/howto/error-reporting`.
.. setting:: SERIALIZATION_MODULES
@ -2045,6 +2043,22 @@ DATABASE_USER
This setting has been replaced by :setting:`USER` in
:setting:`DATABASES`.
.. setting:: IGNORABLE_404_ENDS
IGNORABLE_404_ENDS
------------------
.. deprecated:: 1.4
This setting has been superseded by :setting:`IGNORABLE_404_URLS`.
.. setting:: IGNORABLE_404_STARTS
IGNORABLE_404_STARTS
--------------------
.. deprecated:: 1.4
This setting has been superseded by :setting:`IGNORABLE_404_URLS`.
.. setting:: TEST_DATABASE_CHARSET
TEST_DATABASE_CHARSET
@ -2071,4 +2085,3 @@ TEST_DATABASE_NAME
.. deprecated:: 1.2
This setting has been replaced by :setting:`TEST_NAME` in
:setting:`DATABASES`.

View File

@ -176,3 +176,41 @@ Save this model manager in your custom comment app (e.g. in
For more details see the docs about
:doc:`customizing the comments framework </ref/contrib/comments/custom>`.
`IGNORABLE_404_STARTS` and `IGNORABLE_404_ENDS` settings
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Django can report 404 errors: see :doc:`/howto/error-reporting`.
Until Django 1.3, it was possible to exclude some URLs from the reporting
by adding prefixes to :setting:`IGNORABLE_404_STARTS` and suffixes to
:setting:`IGNORABLE_404_ENDS`.
In Django 1.4, these two settings are superseded by
:setting:`IGNORABLE_404_URLS`, which is a list of compiled regular expressions.
Django won't send an email for 404 errors on URLs that match any of them.
Furthermore, the previous settings had some rather arbitrary default values::
IGNORABLE_404_STARTS = ('/cgi-bin/', '/_vti_bin', '/_vti_inf')
IGNORABLE_404_ENDS = ('mail.pl', 'mailform.pl', 'mail.cgi', 'mailform.cgi',
'favicon.ico', '.php')
It's not Django's role to decide if your website has a legacy ``/cgi-bin/``
section or a ``favicon.ico``. As a consequence, the default values of
:setting:`IGNORABLE_404_URLS`, :setting:`IGNORABLE_404_STARTS` and
:setting:`IGNORABLE_404_ENDS` are all now empty.
If you have customized :setting:`IGNORABLE_404_STARTS` or
:setting:`IGNORABLE_404_ENDS`, or if you want to keep the old default value,
you should add the following lines in your settings file::
import re
IGNORABLE_404_URLS = (
# for each <prefix> in IGNORABLE_404_STARTS
re.compile(r'^<prefix>'),
# for each <suffix> in IGNORABLE_404_ENDS
re.compile(r'<suffix>$'),
)
Don't forget to escape characters that have a special meaning in a regular
expression.

View File

@ -1,6 +1,9 @@
# -*- coding: utf-8 -*-
import re
from django.conf import settings
from django.core import mail
from django.http import HttpRequest
from django.middleware.common import CommonMiddleware
from django.middleware.http import ConditionalGetMiddleware
@ -9,12 +12,16 @@ from django.test import TestCase
class CommonMiddlewareTest(TestCase):
def setUp(self):
self.slash = settings.APPEND_SLASH
self.www = settings.PREPEND_WWW
self.append_slash = settings.APPEND_SLASH
self.prepend_www = settings.PREPEND_WWW
self.ignorable_404_urls = settings.IGNORABLE_404_URLS
self.send_broken_email_links = settings.SEND_BROKEN_LINK_EMAILS
def tearDown(self):
settings.APPEND_SLASH = self.slash
settings.PREPEND_WWW = self.www
settings.APPEND_SLASH = self.append_slash
settings.PREPEND_WWW = self.prepend_www
settings.IGNORABLE_404_URLS = self.ignorable_404_urls
settings.SEND_BROKEN_LINK_EMAILS = self.send_broken_email_links
def _get_request(self, path):
request = HttpRequest()
@ -249,6 +256,36 @@ class CommonMiddlewareTest(TestCase):
self.assertEqual(r['Location'],
'http://www.testserver/middleware/customurlconf/slash/')
# Tests for the 404 error reporting via email
def test_404_error_reporting(self):
settings.IGNORABLE_404_URLS = (re.compile(r'foo'),)
settings.SEND_BROKEN_LINK_EMAILS = True
request = self._get_request('regular_url/that/does/not/exist')
request.META['HTTP_REFERER'] = '/another/url/'
response = self.client.get(request.path)
CommonMiddleware().process_response(request, response)
self.assertEqual(len(mail.outbox), 1)
self.assertIn('Broken', mail.outbox[0].subject)
def test_404_error_reporting_no_referer(self):
settings.IGNORABLE_404_URLS = (re.compile(r'foo'),)
settings.SEND_BROKEN_LINK_EMAILS = True
request = self._get_request('regular_url/that/does/not/exist')
response = self.client.get(request.path)
CommonMiddleware().process_response(request, response)
self.assertEqual(len(mail.outbox), 0)
def test_404_error_reporting_ignored_url(self):
settings.IGNORABLE_404_URLS = (re.compile(r'foo'),)
settings.SEND_BROKEN_LINK_EMAILS = True
request = self._get_request('foo_url/that/does/not/exist/either')
request.META['HTTP_REFERER'] = '/another/url/'
response = self.client.get(request.path)
CommonMiddleware().process_response(request, response)
self.assertEqual(len(mail.outbox), 0)
class ConditionalGetMiddlewareTest(TestCase):
urls = 'regressiontests.middleware.cond_get_urls'
def setUp(self):