Fixed #20099 -- Eased subclassing of BrokenLinkEmailsMiddleware
Thanks Ram Rachum for the report and the initial patch, and Simon Charette for the review.
This commit is contained in:
parent
6de81d65f4
commit
f940e564e4
1
AUTHORS
1
AUTHORS
|
@ -472,6 +472,7 @@ answer newbie questions, and generally made Django that much better:
|
||||||
Jyrki Pulliainen <jyrki.pulliainen@gmail.com>
|
Jyrki Pulliainen <jyrki.pulliainen@gmail.com>
|
||||||
Thejaswi Puthraya <thejaswi.puthraya@gmail.com>
|
Thejaswi Puthraya <thejaswi.puthraya@gmail.com>
|
||||||
Johann Queuniet <johann.queuniet@adh.naellia.eu>
|
Johann Queuniet <johann.queuniet@adh.naellia.eu>
|
||||||
|
Ram Rachum <ram@rachum.com>
|
||||||
Jan Rademaker
|
Jan Rademaker
|
||||||
Michael Radziej <mir@noris.de>
|
Michael Radziej <mir@noris.de>
|
||||||
Laurent Rahuel <laurent.rahuel@gmail.com>
|
Laurent Rahuel <laurent.rahuel@gmail.com>
|
||||||
|
|
|
@ -142,15 +142,17 @@ class BrokenLinkEmailsMiddleware(object):
|
||||||
domain = request.get_host()
|
domain = request.get_host()
|
||||||
path = request.get_full_path()
|
path = request.get_full_path()
|
||||||
referer = force_text(request.META.get('HTTP_REFERER', ''), errors='replace')
|
referer = force_text(request.META.get('HTTP_REFERER', ''), errors='replace')
|
||||||
is_internal = self.is_internal_request(domain, referer)
|
|
||||||
is_not_search_engine = '?' not in referer
|
if not self.is_ignorable_request(request, path, domain, referer):
|
||||||
is_ignorable = self.is_ignorable_404(path)
|
|
||||||
if referer and (is_internal or is_not_search_engine) and not is_ignorable:
|
|
||||||
ua = request.META.get('HTTP_USER_AGENT', '<none>')
|
ua = request.META.get('HTTP_USER_AGENT', '<none>')
|
||||||
ip = request.META.get('REMOTE_ADDR', '<none>')
|
ip = request.META.get('REMOTE_ADDR', '<none>')
|
||||||
mail_managers(
|
mail_managers(
|
||||||
"Broken %slink on %s" % (('INTERNAL ' if is_internal else ''), domain),
|
"Broken %slink on %s" % (
|
||||||
"Referrer: %s\nRequested URL: %s\nUser agent: %s\nIP address: %s\n" % (referer, path, ua, ip),
|
('INTERNAL ' if self.is_internal_request(domain, referer) else ''),
|
||||||
|
domain
|
||||||
|
),
|
||||||
|
"Referrer: %s\nRequested URL: %s\nUser agent: %s\n"
|
||||||
|
"IP address: %s\n" % (referer, path, ua, ip),
|
||||||
fail_silently=True)
|
fail_silently=True)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
@ -159,10 +161,14 @@ class BrokenLinkEmailsMiddleware(object):
|
||||||
Returns True if the referring URL is the same domain as the current request.
|
Returns True if the referring URL is the same domain as the current request.
|
||||||
"""
|
"""
|
||||||
# Different subdomains are treated as different domains.
|
# Different subdomains are treated as different domains.
|
||||||
return re.match("^https?://%s/" % re.escape(domain), referer)
|
return bool(re.match("^https?://%s/" % re.escape(domain), referer))
|
||||||
|
|
||||||
def is_ignorable_404(self, uri):
|
def is_ignorable_request(self, request, uri, domain, referer):
|
||||||
"""
|
"""
|
||||||
Returns True if a 404 at the given URL *shouldn't* notify the site managers.
|
Returns True if the given request *shouldn't* notify the site managers.
|
||||||
"""
|
"""
|
||||||
|
# '?' in referer is identified as search engine source
|
||||||
|
if (not referer or
|
||||||
|
(not self.is_internal_request(domain, referer) and '?' in referer)):
|
||||||
|
return True
|
||||||
return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)
|
return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)
|
||||||
|
|
|
@ -98,6 +98,11 @@ crawlers often request::
|
||||||
(Note that these are regular expressions, so we put a backslash in front of
|
(Note that these are regular expressions, so we put a backslash in front of
|
||||||
periods to escape them.)
|
periods to escape them.)
|
||||||
|
|
||||||
|
If you'd like to customize the behavior of
|
||||||
|
:class:`django.middleware.common.BrokenLinkEmailsMiddleware` further (for
|
||||||
|
example to ignore requests coming from web crawlers), you should subclass it
|
||||||
|
and override its methods.
|
||||||
|
|
||||||
.. seealso::
|
.. seealso::
|
||||||
|
|
||||||
404 errors are logged using the logging framework. By default, these log
|
404 errors are logged using the logging framework. By default, these log
|
||||||
|
|
|
@ -326,6 +326,25 @@ class BrokenLinkEmailsMiddlewareTest(TestCase):
|
||||||
BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
|
BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
|
||||||
self.assertEqual(len(mail.outbox), 1)
|
self.assertEqual(len(mail.outbox), 1)
|
||||||
|
|
||||||
|
def test_custom_request_checker(self):
|
||||||
|
class SubclassedMiddleware(BrokenLinkEmailsMiddleware):
|
||||||
|
ignored_user_agent_patterns = (re.compile(r'Spider.*'),
|
||||||
|
re.compile(r'Robot.*'))
|
||||||
|
def is_ignorable_request(self, request, uri, domain, referer):
|
||||||
|
'''Check user-agent in addition to normal checks.'''
|
||||||
|
if super(SubclassedMiddleware, self).is_ignorable_request(request, uri, domain, referer):
|
||||||
|
return True
|
||||||
|
user_agent = request.META['HTTP_USER_AGENT']
|
||||||
|
return any(pattern.search(user_agent) for pattern in
|
||||||
|
self.ignored_user_agent_patterns)
|
||||||
|
|
||||||
|
self.req.META['HTTP_REFERER'] = '/another/url/'
|
||||||
|
self.req.META['HTTP_USER_AGENT'] = 'Spider machine 3.4'
|
||||||
|
SubclassedMiddleware().process_response(self.req, self.resp)
|
||||||
|
self.assertEqual(len(mail.outbox), 0)
|
||||||
|
self.req.META['HTTP_USER_AGENT'] = 'My user agent'
|
||||||
|
SubclassedMiddleware().process_response(self.req, self.resp)
|
||||||
|
self.assertEqual(len(mail.outbox), 1)
|
||||||
|
|
||||||
class ConditionalGetMiddlewareTest(TestCase):
|
class ConditionalGetMiddlewareTest(TestCase):
|
||||||
urls = 'middleware.cond_get_urls'
|
urls = 'middleware.cond_get_urls'
|
||||||
|
|
Loading…
Reference in New Issue