2007-10-20 16:31:05 +08:00
|
|
|
import re
|
2016-04-03 18:15:10 +08:00
|
|
|
import warnings
|
2007-10-20 16:31:05 +08:00
|
|
|
|
2015-01-28 20:35:27 +08:00
|
|
|
from django import http
|
2005-07-13 09:25:57 +08:00
|
|
|
from django.conf import settings
|
2015-06-24 18:11:43 +08:00
|
|
|
from django.core.exceptions import PermissionDenied
|
2015-01-28 20:35:27 +08:00
|
|
|
from django.core.mail import mail_managers
|
2015-12-30 23:51:16 +08:00
|
|
|
from django.urls import is_valid_path
|
2016-06-21 03:46:33 +08:00
|
|
|
from django.utils.cache import (
|
|
|
|
cc_delim_re, get_conditional_response, set_response_etag,
|
|
|
|
)
|
2016-04-03 18:15:10 +08:00
|
|
|
from django.utils.deprecation import MiddlewareMixin, RemovedInDjango21Warning
|
2013-05-18 18:37:22 +08:00
|
|
|
from django.utils.encoding import force_text
|
2015-11-27 04:27:12 +08:00
|
|
|
from django.utils.six.moves.urllib.parse import urlparse
|
2012-09-21 03:03:24 +08:00
|
|
|
|
2010-10-04 23:12:39 +08:00
|
|
|
|
2015-11-07 23:12:37 +08:00
|
|
|
class CommonMiddleware(MiddlewareMixin):
|
2005-07-13 09:25:57 +08:00
|
|
|
"""
|
|
|
|
"Common" middleware for taking care of some basic operations:
|
|
|
|
|
|
|
|
- Forbids access to User-Agents in settings.DISALLOWED_USER_AGENTS
|
|
|
|
|
2005-11-07 05:55:57 +08:00
|
|
|
- URL rewriting: Based on the APPEND_SLASH and PREPEND_WWW settings,
|
2007-06-17 15:21:09 +08:00
|
|
|
this middleware appends missing slashes and/or prepends missing
|
|
|
|
"www."s.
|
2005-07-13 09:25:57 +08:00
|
|
|
|
2007-12-03 07:25:55 +08:00
|
|
|
- If APPEND_SLASH is set and the initial URL doesn't end with a
|
|
|
|
slash, and it is not found in urlpatterns, a new URL is formed by
|
|
|
|
appending a slash at the end. If this new URL is found in
|
|
|
|
urlpatterns, then an HTTP-redirect is returned to this new URL;
|
2008-08-02 13:56:57 +08:00
|
|
|
otherwise the initial URL is processed as usual.
|
2007-12-03 07:25:55 +08:00
|
|
|
|
2014-11-05 04:19:10 +08:00
|
|
|
This behavior can be customized by subclassing CommonMiddleware and
|
|
|
|
overriding the response_redirect_class attribute.
|
|
|
|
|
2005-11-07 05:55:57 +08:00
|
|
|
- ETags: If the USE_ETAGS setting is set, ETags will be calculated from
|
2005-07-13 09:25:57 +08:00
|
|
|
the entire page content and Not Modified responses will be returned
|
2016-04-03 18:15:10 +08:00
|
|
|
appropriately. USE_ETAGS is deprecated in favor of
|
|
|
|
ConditionalGetMiddleware.
|
2005-07-13 09:25:57 +08:00
|
|
|
"""
|
|
|
|
|
2014-11-05 04:19:10 +08:00
|
|
|
response_redirect_class = http.HttpResponsePermanentRedirect
|
|
|
|
|
2005-07-13 09:25:57 +08:00
|
|
|
def process_request(self, request):
|
|
|
|
"""
|
|
|
|
Check for denied User-Agents and rewrite the URL based on
|
|
|
|
settings.APPEND_SLASH and settings.PREPEND_WWW
|
|
|
|
"""
|
|
|
|
|
|
|
|
# Check for denied User-Agents
|
2007-04-26 21:30:48 +08:00
|
|
|
if 'HTTP_USER_AGENT' in request.META:
|
2005-07-13 09:25:57 +08:00
|
|
|
for user_agent_regex in settings.DISALLOWED_USER_AGENTS:
|
|
|
|
if user_agent_regex.search(request.META['HTTP_USER_AGENT']):
|
2015-06-24 18:11:43 +08:00
|
|
|
raise PermissionDenied('Forbidden user agent')
|
2005-07-13 09:25:57 +08:00
|
|
|
|
2015-04-28 13:23:42 +08:00
|
|
|
# Check for a redirect based on settings.PREPEND_WWW
|
2007-09-16 01:46:03 +08:00
|
|
|
host = request.get_host()
|
2016-03-19 03:21:41 +08:00
|
|
|
must_prepend = settings.PREPEND_WWW and host and not host.startswith('www.')
|
|
|
|
redirect_url = ('%s://www.%s' % (request.scheme, host)) if must_prepend else ''
|
|
|
|
|
|
|
|
# Check if a slash should be appended
|
|
|
|
if self.should_redirect_with_slash(request):
|
|
|
|
path = self.get_full_path_with_slash(request)
|
|
|
|
else:
|
|
|
|
path = request.get_full_path()
|
|
|
|
|
|
|
|
# Return a redirect if necessary
|
|
|
|
if redirect_url or path != request.get_full_path():
|
|
|
|
redirect_url += path
|
|
|
|
return self.response_redirect_class(redirect_url)
|
2015-04-28 13:23:42 +08:00
|
|
|
|
|
|
|
def should_redirect_with_slash(self, request):
|
|
|
|
"""
|
|
|
|
Return True if settings.APPEND_SLASH is True and appending a slash to
|
|
|
|
the request path turns an invalid path into a valid one.
|
|
|
|
"""
|
2016-11-06 17:38:36 +08:00
|
|
|
if settings.APPEND_SLASH and not request.path_info.endswith('/'):
|
2010-03-08 04:03:04 +08:00
|
|
|
urlconf = getattr(request, 'urlconf', None)
|
2015-04-28 13:23:42 +08:00
|
|
|
return (
|
2016-04-04 08:37:32 +08:00
|
|
|
not is_valid_path(request.path_info, urlconf) and
|
|
|
|
is_valid_path('%s/' % request.path_info, urlconf)
|
2015-04-28 13:23:42 +08:00
|
|
|
)
|
|
|
|
return False
|
|
|
|
|
|
|
|
def get_full_path_with_slash(self, request):
|
|
|
|
"""
|
|
|
|
Return the full path of the request with a trailing slash appended.
|
|
|
|
|
|
|
|
Raise a RuntimeError if settings.DEBUG is True and request.method is
|
2016-02-09 08:23:36 +08:00
|
|
|
POST, PUT, or PATCH.
|
2015-04-28 13:23:42 +08:00
|
|
|
"""
|
|
|
|
new_path = request.get_full_path(force_append_slash=True)
|
|
|
|
if settings.DEBUG and request.method in ('POST', 'PUT', 'PATCH'):
|
|
|
|
raise RuntimeError(
|
|
|
|
"You called this URL via %(method)s, but the URL doesn't end "
|
|
|
|
"in a slash and you have APPEND_SLASH set. Django can't "
|
|
|
|
"redirect to the slash URL while maintaining %(method)s data. "
|
|
|
|
"Change your form to point to %(url)s (note the trailing "
|
|
|
|
"slash), or set APPEND_SLASH=False in your Django settings." % {
|
|
|
|
'method': request.method,
|
|
|
|
'url': request.get_host() + new_path,
|
|
|
|
}
|
|
|
|
)
|
|
|
|
return new_path
|
2005-07-13 09:25:57 +08:00
|
|
|
|
|
|
|
def process_response(self, request, response):
|
2013-01-02 05:28:48 +08:00
|
|
|
"""
|
|
|
|
Calculate the ETag, if needed.
|
2015-04-28 13:23:42 +08:00
|
|
|
|
|
|
|
When the status code of the response is 404, it may redirect to a path
|
|
|
|
with an appended slash if should_redirect_with_slash() returns True.
|
2013-01-02 05:28:48 +08:00
|
|
|
"""
|
2015-04-28 13:23:42 +08:00
|
|
|
# If the given URL is "Not Found", then check if we should redirect to
|
|
|
|
# a path with a slash appended.
|
|
|
|
if response.status_code == 404:
|
|
|
|
if self.should_redirect_with_slash(request):
|
|
|
|
return self.response_redirect_class(self.get_full_path_with_slash(request))
|
|
|
|
|
2016-06-21 03:46:33 +08:00
|
|
|
if settings.USE_ETAGS and self.needs_etag(response):
|
2016-04-03 18:15:10 +08:00
|
|
|
warnings.warn(
|
|
|
|
"The USE_ETAGS setting is deprecated in favor of "
|
|
|
|
"ConditionalGetMiddleware which sets the ETag regardless of "
|
|
|
|
"the setting. CommonMiddleware won't do ETag processing in "
|
|
|
|
"Django 2.1.",
|
|
|
|
RemovedInDjango21Warning
|
|
|
|
)
|
2015-06-05 21:26:48 +08:00
|
|
|
if not response.has_header('ETag'):
|
|
|
|
set_response_etag(response)
|
|
|
|
|
2007-06-17 15:21:09 +08:00
|
|
|
if response.has_header('ETag'):
|
2015-06-05 21:26:48 +08:00
|
|
|
return get_conditional_response(
|
|
|
|
request,
|
2016-09-01 21:32:20 +08:00
|
|
|
etag=response['ETag'],
|
2015-06-05 21:26:48 +08:00
|
|
|
response=response,
|
|
|
|
)
|
2016-06-18 16:51:38 +08:00
|
|
|
# Add the Content-Length header to non-streaming responses if not
|
|
|
|
# already set.
|
|
|
|
if not response.streaming and not response.has_header('Content-Length'):
|
|
|
|
response['Content-Length'] = str(len(response.content))
|
2005-07-13 09:25:57 +08:00
|
|
|
|
|
|
|
return response
|
|
|
|
|
2016-06-21 03:46:33 +08:00
|
|
|
def needs_etag(self, response):
|
|
|
|
"""
|
|
|
|
Return True if an ETag header should be added to response.
|
|
|
|
"""
|
|
|
|
cache_control_headers = cc_delim_re.split(response.get('Cache-Control', ''))
|
|
|
|
return all(header.lower() != 'no-store' for header in cache_control_headers)
|
|
|
|
|
2006-09-27 02:49:28 +08:00
|
|
|
|
2015-11-07 23:12:37 +08:00
|
|
|
class BrokenLinkEmailsMiddleware(MiddlewareMixin):
|
2013-01-02 05:28:48 +08:00
|
|
|
|
|
|
|
def process_response(self, request, response):
|
|
|
|
"""
|
|
|
|
Send broken link emails for relevant 404 NOT FOUND responses.
|
|
|
|
"""
|
|
|
|
if response.status_code == 404 and not settings.DEBUG:
|
|
|
|
domain = request.get_host()
|
|
|
|
path = request.get_full_path()
|
2013-05-18 18:37:22 +08:00
|
|
|
referer = force_text(request.META.get('HTTP_REFERER', ''), errors='replace')
|
2013-05-24 23:55:50 +08:00
|
|
|
|
|
|
|
if not self.is_ignorable_request(request, path, domain, referer):
|
2015-04-21 20:51:43 +08:00
|
|
|
ua = force_text(request.META.get('HTTP_USER_AGENT', '<none>'), errors='replace')
|
2013-01-02 05:28:48 +08:00
|
|
|
ip = request.META.get('REMOTE_ADDR', '<none>')
|
|
|
|
mail_managers(
|
2013-05-24 23:55:50 +08:00
|
|
|
"Broken %slink on %s" % (
|
|
|
|
('INTERNAL ' if self.is_internal_request(domain, referer) else ''),
|
|
|
|
domain
|
|
|
|
),
|
|
|
|
"Referrer: %s\nRequested URL: %s\nUser agent: %s\n"
|
|
|
|
"IP address: %s\n" % (referer, path, ua, ip),
|
2013-01-02 05:28:48 +08:00
|
|
|
fail_silently=True)
|
|
|
|
return response
|
|
|
|
|
|
|
|
def is_internal_request(self, domain, referer):
|
|
|
|
"""
|
|
|
|
Returns True if the referring URL is the same domain as the current request.
|
|
|
|
"""
|
|
|
|
# Different subdomains are treated as different domains.
|
2013-05-24 23:55:50 +08:00
|
|
|
return bool(re.match("^https?://%s/" % re.escape(domain), referer))
|
2013-01-02 05:28:48 +08:00
|
|
|
|
2013-05-24 23:55:50 +08:00
|
|
|
def is_ignorable_request(self, request, uri, domain, referer):
|
2013-01-02 05:28:48 +08:00
|
|
|
"""
|
2015-08-24 02:54:15 +08:00
|
|
|
Return True if the given request *shouldn't* notify the site managers
|
2016-01-22 03:23:51 +08:00
|
|
|
according to project settings or in situations outlined by the inline
|
|
|
|
comments.
|
2013-01-02 05:28:48 +08:00
|
|
|
"""
|
2016-01-22 03:23:51 +08:00
|
|
|
# The referer is empty.
|
2015-11-27 04:27:12 +08:00
|
|
|
if not referer:
|
|
|
|
return True
|
|
|
|
|
2016-01-22 03:23:51 +08:00
|
|
|
# APPEND_SLASH is enabled and the referer is equal to the current URL
|
|
|
|
# without a trailing slash indicating an internal redirect.
|
|
|
|
if settings.APPEND_SLASH and uri.endswith('/') and referer == uri[:-1]:
|
|
|
|
return True
|
|
|
|
|
|
|
|
# A '?' in referer is identified as a search engine source.
|
2015-11-27 04:27:12 +08:00
|
|
|
if not self.is_internal_request(domain, referer) and '?' in referer:
|
|
|
|
return True
|
|
|
|
|
2016-01-22 03:23:51 +08:00
|
|
|
# The referer is equal to the current URL, ignoring the scheme (assumed
|
|
|
|
# to be a poorly implemented bot).
|
2015-11-27 04:27:12 +08:00
|
|
|
parsed_referer = urlparse(referer)
|
|
|
|
if parsed_referer.netloc in ['', domain] and parsed_referer.path == uri:
|
2013-05-24 23:55:50 +08:00
|
|
|
return True
|
2015-08-24 02:54:15 +08:00
|
|
|
|
2013-01-02 05:28:48 +08:00
|
|
|
return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)
|