mirror of https://github.com/django/django.git
[1.9.x] Fixed #25302 (again) -- Ignored scheme when checking for bad referers.
The check introduced in4ce433e
was too strict in real life. The poorly implemented bots this patch attempted to ignore are sloppy when it comes to http vs. https. Backport of11f10b7
from master
This commit is contained in:
parent
b4a1d545db
commit
8dc11dc592
|
@ -8,6 +8,7 @@ from django.core.exceptions import PermissionDenied
|
|||
from django.core.mail import mail_managers
|
||||
from django.utils.cache import get_conditional_response, set_response_etag
|
||||
from django.utils.encoding import force_text
|
||||
from django.utils.six.moves.urllib.parse import urlparse
|
||||
|
||||
logger = logging.getLogger('django.request')
|
||||
|
||||
|
@ -163,13 +164,17 @@ class BrokenLinkEmailsMiddleware(object):
|
|||
according to project settings or in three specific situations:
|
||||
- If the referer is empty.
|
||||
- If a '?' in referer is identified as a search engine source.
|
||||
- If the referer is equal to the current URL (assumed to be a
|
||||
malicious bot).
|
||||
- If the referer is equal to the current URL, ignoring the scheme
|
||||
(assumed to be a poorly implemented bot).
|
||||
"""
|
||||
full_url = "%s://%s/%s" % (request.scheme, domain, uri.lstrip('/'))
|
||||
if (not referer or
|
||||
(not self.is_internal_request(domain, referer) and '?' in referer) or
|
||||
(referer == uri or referer == full_url)):
|
||||
if not referer:
|
||||
return True
|
||||
|
||||
if not self.is_internal_request(domain, referer) and '?' in referer:
|
||||
return True
|
||||
|
||||
parsed_referer = urlparse(referer)
|
||||
if parsed_referer.netloc in ['', domain] and parsed_referer.path == uri:
|
||||
return True
|
||||
|
||||
return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)
|
||||
|
|
|
@ -383,11 +383,20 @@ class BrokenLinkEmailsMiddlewareTest(SimpleTestCase):
|
|||
self.req.META['HTTP_REFERER'] = self.req.path
|
||||
BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
|
||||
self.assertEqual(len(mail.outbox), 0)
|
||||
|
||||
# URL with scheme and domain should also be ignored
|
||||
self.req.META['HTTP_REFERER'] = 'http://testserver%s' % self.req.path
|
||||
BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
|
||||
self.assertEqual(len(mail.outbox), 0)
|
||||
|
||||
# URL with a different scheme should be ignored as well because bots
|
||||
# tend to use http:// in referers even when browsing HTTPS websites.
|
||||
self.req.META['HTTP_X_PROTO'] = 'https'
|
||||
self.req.META['SERVER_PORT'] = 443
|
||||
with self.settings(SECURE_PROXY_SSL_HEADER=('HTTP_X_PROTO', 'https')):
|
||||
BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
|
||||
self.assertEqual(len(mail.outbox), 0)
|
||||
|
||||
def test_referer_equal_to_requested_url_on_another_domain(self):
|
||||
self.req.META['HTTP_REFERER'] = 'http://anotherserver%s' % self.req.path
|
||||
BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
|
||||
|
|
Loading…
Reference in New Issue