Fixed #15954 - New IGNORABLE_404_URLS setting that allows more powerful filtering of 404s to ignore

Thanks to aaugustin for implementing this.

(Technically this doesn't fix the original report, as we've decided against
having *any* default values, but the new feature makes it possible, and the
docs have an example addressing #15954).

git-svn-id: http://code.djangoproject.com/svn/django/trunk@16160 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Luke Plant 2011-05-05 20:49:26 +00:00
parent db5807bdb1
commit 171df93170
7 changed files with 165 additions and 34 deletions

View File

@ -246,9 +246,17 @@ ALLOWED_INCLUDE_ROOTS = ()
# is an admin. # is an admin.
ADMIN_FOR = () ADMIN_FOR = ()
# 404s that may be ignored. # List of compiled regular expression objects representing URLs that need not
IGNORABLE_404_STARTS = ('/cgi-bin/', '/_vti_bin', '/_vti_inf') # be reported when SEND_BROKEN_LINK_EMAILS is True. Here are a few examples:
IGNORABLE_404_ENDS = ('mail.pl', 'mailform.pl', 'mail.cgi', 'mailform.cgi', 'favicon.ico', '.php') # import re
# IGNORABLE_404_URLS = (
# re.compile(r'^/apple-touch-icon.*\.png$'),
# re.compile(r'^/favicon.ico$),
# re.compile(r'^/robots.txt$),
# re.compile(r'^/phpmyadmin/),
# re.compile(r'\.(cgi|php|pl)$'),
# )
IGNORABLE_404_URLS = ()
# A secret key for this particular Django installation. Used in secret-key # A secret key for this particular Django installation. Used in secret-key
# hashing algorithms. Set this in your settings, or Django will complain # hashing algorithms. Set this in your settings, or Django will complain

View File

@ -127,13 +127,23 @@ def _is_ignorable_404(uri):
""" """
Returns True if a 404 at the given URL *shouldn't* notify the site managers. Returns True if a 404 at the given URL *shouldn't* notify the site managers.
""" """
if getattr(settings, 'IGNORABLE_404_STARTS', ()):
import warnings
warnings.warn('The IGNORABLE_404_STARTS setting has been deprecated '
'in favour of IGNORABLE_404_URLS.',
PendingDeprecationWarning)
for start in settings.IGNORABLE_404_STARTS: for start in settings.IGNORABLE_404_STARTS:
if uri.startswith(start): if uri.startswith(start):
return True return True
if getattr(settings, 'IGNORABLE_404_ENDS', ()):
import warnings
warnings.warn('The IGNORABLE_404_ENDS setting has been deprecated '
'in favour of IGNORABLE_404_URLS.',
PendingDeprecationWarning)
for end in settings.IGNORABLE_404_ENDS: for end in settings.IGNORABLE_404_ENDS:
if uri.endswith(end): if uri.endswith(end):
return True return True
return False return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)
def _is_internal_request(domain, referer): def _is_internal_request(domain, referer):
""" """

View File

@ -66,15 +66,29 @@ a referer. (It doesn't bother to email for 404s that don't have a referer --
those are usually just people typing in broken URLs or broken Web 'bots). those are usually just people typing in broken URLs or broken Web 'bots).
You can tell Django to stop reporting particular 404s by tweaking the You can tell Django to stop reporting particular 404s by tweaking the
:setting:`IGNORABLE_404_ENDS` and :setting:`IGNORABLE_404_STARTS` settings. Both :setting:`IGNORABLE_404_URLS` setting. It should be a tuple of compiled
should be a tuple of strings. For example:: regular expression objects. For example::
IGNORABLE_404_ENDS = ('.php', '.cgi') import re
IGNORABLE_404_STARTS = ('/phpmyadmin/',) IGNORABLE_404_URLS = (
re.compile(r'\.(php|cgi)$'),
re.compile(r'^/phpmyadmin/'),
)
In this example, a 404 to any URL ending with ``.php`` or ``.cgi`` will *not* be In this example, a 404 to any URL ending with ``.php`` or ``.cgi`` will *not* be
reported. Neither will any URL starting with ``/phpmyadmin/``. reported. Neither will any URL starting with ``/phpmyadmin/``.
The following example shows how to exclude some conventional URLs that browsers and
crawlers often request::
import re
IGNORABLE_404_URLS = (
re.compile(r'^/apple-touch-icon.*\.png$'),
re.compile(r'^/favicon.ico$),
re.compile(r'^/robots.txt$),
)
The best way to disable this behavior is to set The best way to disable this behavior is to set
:setting:`SEND_BROKEN_LINK_EMAILS` to ``False``. :setting:`SEND_BROKEN_LINK_EMAILS` to ``False``.
@ -93,3 +107,10 @@ The best way to disable this behavior is to set
records are ignored, but you can use them for error reporting by writing a records are ignored, but you can use them for error reporting by writing a
handler and :doc:`configuring logging </topics/logging>` appropriately. handler and :doc:`configuring logging </topics/logging>` appropriately.
.. seealso::
.. versionchanged:: 1.4
Previously, two settings were used to control which URLs not to report:
:setting:`IGNORABLE_404_STARTS` and :setting:`IGNORABLE_404_ENDS`. They
were replaced by :setting:`IGNORABLE_404_URLS`.

View File

@ -199,6 +199,10 @@ their deprecation, as per the :ref:`Django deprecation policy
ISO 3166 code for United Kingdom). They have been depreacted since the ISO 3166 code for United Kingdom). They have been depreacted since the
1.4 release. 1.4 release.
* The :setting:`IGNORABLE_404_STARTS` and :setting:`IGNORABLE_404_ENDS`
settings have been superseded by :setting:`IGNORABLE_404_URLS` in
the 1.4 release. They will be removed.
* 2.0 * 2.0
* ``django.views.defaults.shortcut()``. This function has been moved * ``django.views.defaults.shortcut()``. This function has been moved
to ``django.contrib.contenttypes.views.shortcut()`` as part of the to ``django.contrib.contenttypes.views.shortcut()`` as part of the

View File

@ -1020,25 +1020,23 @@ Available formats are ``DATE_FORMAT``, ``TIME_FORMAT``, ``DATETIME_FORMAT``,
``SHORT_DATETIME_FORMAT``, ``FIRST_DAY_OF_WEEK``, ``DECIMAL_SEPARATOR``, ``SHORT_DATETIME_FORMAT``, ``FIRST_DAY_OF_WEEK``, ``DECIMAL_SEPARATOR``,
``THOUSAND_SEPARATOR`` and ``NUMBER_GROUPING``. ``THOUSAND_SEPARATOR`` and ``NUMBER_GROUPING``.
.. setting:: IGNORABLE_404_ENDS .. setting:: IGNORABLE_404_URLS
IGNORABLE_404_ENDS IGNORABLE_404_URLS
------------------ ------------------
Default: ``('mail.pl', 'mailform.pl', 'mail.cgi', 'mailform.cgi', 'favicon.ico', '.php')`` .. versionadded:: 1.4
See also ``IGNORABLE_404_STARTS`` and ``Error reporting via email``. Default: ``()``
.. setting:: IGNORABLE_404_STARTS List of compiled regular expression objects describing URLs that should be
ignored when reporting HTTP 404 errors via email (see
:doc:`/howto/error-reporting`). Use this if your site does not provide a
commonly requested file such as ``favicon.ico`` or ``robots.txt``, or if it
gets hammered by script kiddies.
IGNORABLE_404_STARTS This is only used if :setting:`SEND_BROKEN_LINK_EMAILS` is set to ``True`` and
-------------------- ``CommonMiddleware`` is installed (see :doc:`/topics/http/middleware`).
Default: ``('/cgi-bin/', '/_vti_bin', '/_vti_inf')``
A tuple of strings that specify beginnings of URLs that should be ignored by
the 404 emailer. See ``SEND_BROKEN_LINK_EMAILS``, ``IGNORABLE_404_ENDS`` and
the :doc:`/howto/error-reporting`.
.. setting:: INSTALLED_APPS .. setting:: INSTALLED_APPS
@ -1435,8 +1433,8 @@ Default: ``False``
Whether to send an email to the ``MANAGERS`` each time somebody visits a Whether to send an email to the ``MANAGERS`` each time somebody visits a
Django-powered page that is 404ed with a non-empty referer (i.e., a broken Django-powered page that is 404ed with a non-empty referer (i.e., a broken
link). This is only used if ``CommonMiddleware`` is installed (see link). This is only used if ``CommonMiddleware`` is installed (see
:doc:`/topics/http/middleware`. See also ``IGNORABLE_404_STARTS``, :doc:`/topics/http/middleware`). See also ``IGNORABLE_404_URLS`` and
``IGNORABLE_404_ENDS`` and :doc:`/howto/error-reporting`. :doc:`/howto/error-reporting`.
.. setting:: SERIALIZATION_MODULES .. setting:: SERIALIZATION_MODULES
@ -2045,6 +2043,22 @@ DATABASE_USER
This setting has been replaced by :setting:`USER` in This setting has been replaced by :setting:`USER` in
:setting:`DATABASES`. :setting:`DATABASES`.
.. setting:: IGNORABLE_404_ENDS
IGNORABLE_404_ENDS
------------------
.. deprecated:: 1.4
This setting has been superseded by :setting:`IGNORABLE_404_URLS`.
.. setting:: IGNORABLE_404_STARTS
IGNORABLE_404_STARTS
--------------------
.. deprecated:: 1.4
This setting has been superseded by :setting:`IGNORABLE_404_URLS`.
.. setting:: TEST_DATABASE_CHARSET .. setting:: TEST_DATABASE_CHARSET
TEST_DATABASE_CHARSET TEST_DATABASE_CHARSET
@ -2071,4 +2085,3 @@ TEST_DATABASE_NAME
.. deprecated:: 1.2 .. deprecated:: 1.2
This setting has been replaced by :setting:`TEST_NAME` in This setting has been replaced by :setting:`TEST_NAME` in
:setting:`DATABASES`. :setting:`DATABASES`.

View File

@ -176,3 +176,41 @@ Save this model manager in your custom comment app (e.g. in
For more details see the docs about For more details see the docs about
:doc:`customizing the comments framework </ref/contrib/comments/custom>`. :doc:`customizing the comments framework </ref/contrib/comments/custom>`.
`IGNORABLE_404_STARTS` and `IGNORABLE_404_ENDS` settings
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Django can report 404 errors: see :doc:`/howto/error-reporting`.
Until Django 1.3, it was possible to exclude some URLs from the reporting
by adding prefixes to :setting:`IGNORABLE_404_STARTS` and suffixes to
:setting:`IGNORABLE_404_ENDS`.
In Django 1.4, these two settings are superseded by
:setting:`IGNORABLE_404_URLS`, which is a list of compiled regular expressions.
Django won't send an email for 404 errors on URLs that match any of them.
Furthermore, the previous settings had some rather arbitrary default values::
IGNORABLE_404_STARTS = ('/cgi-bin/', '/_vti_bin', '/_vti_inf')
IGNORABLE_404_ENDS = ('mail.pl', 'mailform.pl', 'mail.cgi', 'mailform.cgi',
'favicon.ico', '.php')
It's not Django's role to decide if your website has a legacy ``/cgi-bin/``
section or a ``favicon.ico``. As a consequence, the default values of
:setting:`IGNORABLE_404_URLS`, :setting:`IGNORABLE_404_STARTS` and
:setting:`IGNORABLE_404_ENDS` are all now empty.
If you have customized :setting:`IGNORABLE_404_STARTS` or
:setting:`IGNORABLE_404_ENDS`, or if you want to keep the old default value,
you should add the following lines in your settings file::
import re
IGNORABLE_404_URLS = (
# for each <prefix> in IGNORABLE_404_STARTS
re.compile(r'^<prefix>'),
# for each <suffix> in IGNORABLE_404_ENDS
re.compile(r'<suffix>$'),
)
Don't forget to escape characters that have a special meaning in a regular
expression.

View File

@ -1,6 +1,9 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import re
from django.conf import settings from django.conf import settings
from django.core import mail
from django.http import HttpRequest from django.http import HttpRequest
from django.middleware.common import CommonMiddleware from django.middleware.common import CommonMiddleware
from django.middleware.http import ConditionalGetMiddleware from django.middleware.http import ConditionalGetMiddleware
@ -9,12 +12,16 @@ from django.test import TestCase
class CommonMiddlewareTest(TestCase): class CommonMiddlewareTest(TestCase):
def setUp(self): def setUp(self):
self.slash = settings.APPEND_SLASH self.append_slash = settings.APPEND_SLASH
self.www = settings.PREPEND_WWW self.prepend_www = settings.PREPEND_WWW
self.ignorable_404_urls = settings.IGNORABLE_404_URLS
self.send_broken_email_links = settings.SEND_BROKEN_LINK_EMAILS
def tearDown(self): def tearDown(self):
settings.APPEND_SLASH = self.slash settings.APPEND_SLASH = self.append_slash
settings.PREPEND_WWW = self.www settings.PREPEND_WWW = self.prepend_www
settings.IGNORABLE_404_URLS = self.ignorable_404_urls
settings.SEND_BROKEN_LINK_EMAILS = self.send_broken_email_links
def _get_request(self, path): def _get_request(self, path):
request = HttpRequest() request = HttpRequest()
@ -249,6 +256,36 @@ class CommonMiddlewareTest(TestCase):
self.assertEqual(r['Location'], self.assertEqual(r['Location'],
'http://www.testserver/middleware/customurlconf/slash/') 'http://www.testserver/middleware/customurlconf/slash/')
# Tests for the 404 error reporting via email
def test_404_error_reporting(self):
settings.IGNORABLE_404_URLS = (re.compile(r'foo'),)
settings.SEND_BROKEN_LINK_EMAILS = True
request = self._get_request('regular_url/that/does/not/exist')
request.META['HTTP_REFERER'] = '/another/url/'
response = self.client.get(request.path)
CommonMiddleware().process_response(request, response)
self.assertEqual(len(mail.outbox), 1)
self.assertIn('Broken', mail.outbox[0].subject)
def test_404_error_reporting_no_referer(self):
settings.IGNORABLE_404_URLS = (re.compile(r'foo'),)
settings.SEND_BROKEN_LINK_EMAILS = True
request = self._get_request('regular_url/that/does/not/exist')
response = self.client.get(request.path)
CommonMiddleware().process_response(request, response)
self.assertEqual(len(mail.outbox), 0)
def test_404_error_reporting_ignored_url(self):
settings.IGNORABLE_404_URLS = (re.compile(r'foo'),)
settings.SEND_BROKEN_LINK_EMAILS = True
request = self._get_request('foo_url/that/does/not/exist/either')
request.META['HTTP_REFERER'] = '/another/url/'
response = self.client.get(request.path)
CommonMiddleware().process_response(request, response)
self.assertEqual(len(mail.outbox), 0)
class ConditionalGetMiddlewareTest(TestCase): class ConditionalGetMiddlewareTest(TestCase):
urls = 'regressiontests.middleware.cond_get_urls' urls = 'regressiontests.middleware.cond_get_urls'
def setUp(self): def setUp(self):