2011-03-28 10:11:19 +08:00
|
|
|
import hashlib
|
2012-09-21 03:03:24 +08:00
|
|
|
import logging
|
2007-10-20 16:31:05 +08:00
|
|
|
import re
|
|
|
|
|
2005-07-13 09:25:57 +08:00
|
|
|
from django.conf import settings
|
2006-05-02 09:31:56 +08:00
|
|
|
from django import http
|
2005-07-13 09:25:57 +08:00
|
|
|
from django.core.mail import mail_managers
|
2007-10-20 16:31:05 +08:00
|
|
|
from django.utils.http import urlquote
|
2012-11-04 04:26:59 +08:00
|
|
|
from django.utils import six
|
2007-12-03 07:25:55 +08:00
|
|
|
from django.core import urlresolvers
|
2005-07-13 09:25:57 +08:00
|
|
|
|
2012-09-21 03:03:24 +08:00
|
|
|
|
|
|
|
logger = logging.getLogger('django.request')
|
2010-10-04 23:12:39 +08:00
|
|
|
|
|
|
|
|
2006-06-08 13:00:13 +08:00
|
|
|
class CommonMiddleware(object):
|
2005-07-13 09:25:57 +08:00
|
|
|
"""
|
|
|
|
"Common" middleware for taking care of some basic operations:
|
|
|
|
|
|
|
|
- Forbids access to User-Agents in settings.DISALLOWED_USER_AGENTS
|
|
|
|
|
2005-11-07 05:55:57 +08:00
|
|
|
- URL rewriting: Based on the APPEND_SLASH and PREPEND_WWW settings,
|
2007-06-17 15:21:09 +08:00
|
|
|
this middleware appends missing slashes and/or prepends missing
|
|
|
|
"www."s.
|
2005-07-13 09:25:57 +08:00
|
|
|
|
2007-12-03 07:25:55 +08:00
|
|
|
- If APPEND_SLASH is set and the initial URL doesn't end with a
|
|
|
|
slash, and it is not found in urlpatterns, a new URL is formed by
|
|
|
|
appending a slash at the end. If this new URL is found in
|
|
|
|
urlpatterns, then an HTTP-redirect is returned to this new URL;
|
2008-08-02 13:56:57 +08:00
|
|
|
otherwise the initial URL is processed as usual.
|
2007-12-03 07:25:55 +08:00
|
|
|
|
2005-11-07 05:55:57 +08:00
|
|
|
- ETags: If the USE_ETAGS setting is set, ETags will be calculated from
|
2005-07-13 09:25:57 +08:00
|
|
|
the entire page content and Not Modified responses will be returned
|
|
|
|
appropriately.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def process_request(self, request):
|
|
|
|
"""
|
|
|
|
Check for denied User-Agents and rewrite the URL based on
|
|
|
|
settings.APPEND_SLASH and settings.PREPEND_WWW
|
|
|
|
"""
|
|
|
|
|
|
|
|
# Check for denied User-Agents
|
2007-04-26 21:30:48 +08:00
|
|
|
if 'HTTP_USER_AGENT' in request.META:
|
2005-07-13 09:25:57 +08:00
|
|
|
for user_agent_regex in settings.DISALLOWED_USER_AGENTS:
|
|
|
|
if user_agent_regex.search(request.META['HTTP_USER_AGENT']):
|
2012-02-10 02:58:36 +08:00
|
|
|
logger.warning('Forbidden (User agent): %s', request.path,
|
2010-10-04 23:12:39 +08:00
|
|
|
extra={
|
|
|
|
'status_code': 403,
|
|
|
|
'request': request
|
|
|
|
}
|
|
|
|
)
|
2006-05-02 09:31:56 +08:00
|
|
|
return http.HttpResponseForbidden('<h1>Forbidden</h1>')
|
2005-07-13 09:25:57 +08:00
|
|
|
|
2007-12-03 07:25:55 +08:00
|
|
|
# Check for a redirect based on settings.APPEND_SLASH
|
|
|
|
# and settings.PREPEND_WWW
|
2007-09-16 01:46:03 +08:00
|
|
|
host = request.get_host()
|
2006-05-02 09:31:56 +08:00
|
|
|
old_url = [host, request.path]
|
2005-07-13 09:25:57 +08:00
|
|
|
new_url = old_url[:]
|
2007-12-03 07:25:55 +08:00
|
|
|
|
|
|
|
if (settings.PREPEND_WWW and old_url[0] and
|
|
|
|
not old_url[0].startswith('www.')):
|
2005-07-13 09:25:57 +08:00
|
|
|
new_url[0] = 'www.' + old_url[0]
|
2007-12-03 07:25:55 +08:00
|
|
|
|
|
|
|
# Append a slash if APPEND_SLASH is set and the URL doesn't have a
|
|
|
|
# trailing slash and there is no pattern for the current path
|
|
|
|
if settings.APPEND_SLASH and (not old_url[1].endswith('/')):
|
2010-03-08 04:03:04 +08:00
|
|
|
urlconf = getattr(request, 'urlconf', None)
|
2012-03-02 19:07:36 +08:00
|
|
|
if (not urlresolvers.is_valid_path(request.path_info, urlconf) and
|
|
|
|
urlresolvers.is_valid_path("%s/" % request.path_info, urlconf)):
|
2007-12-03 07:25:55 +08:00
|
|
|
new_url[1] = new_url[1] + '/'
|
|
|
|
if settings.DEBUG and request.method == 'POST':
|
2011-03-26 11:30:48 +08:00
|
|
|
raise RuntimeError((""
|
2007-12-03 07:25:55 +08:00
|
|
|
"You called this URL via POST, but the URL doesn't end "
|
|
|
|
"in a slash and you have APPEND_SLASH set. Django can't "
|
|
|
|
"redirect to the slash URL while maintaining POST data. "
|
|
|
|
"Change your form to point to %s%s (note the trailing "
|
|
|
|
"slash), or set APPEND_SLASH=False in your Django "
|
2011-03-26 11:30:48 +08:00
|
|
|
"settings.") % (new_url[0], new_url[1]))
|
2007-12-03 07:25:55 +08:00
|
|
|
|
2008-10-07 16:22:50 +08:00
|
|
|
if new_url == old_url:
|
|
|
|
# No redirects required.
|
|
|
|
return
|
|
|
|
if new_url[0]:
|
|
|
|
newurl = "%s://%s%s" % (
|
|
|
|
request.is_secure() and 'https' or 'http',
|
|
|
|
new_url[0], urlquote(new_url[1]))
|
|
|
|
else:
|
|
|
|
newurl = urlquote(new_url[1])
|
2011-10-07 04:39:15 +08:00
|
|
|
if request.META.get('QUERY_STRING', ''):
|
2012-11-04 04:26:59 +08:00
|
|
|
if six.PY3:
|
|
|
|
newurl += '?' + request.META['QUERY_STRING']
|
|
|
|
else:
|
|
|
|
# `query_string` is a bytestring. Appending it to the unicode
|
|
|
|
# string `newurl` will fail if it isn't ASCII-only. This isn't
|
|
|
|
# allowed; only broken software generates such query strings.
|
|
|
|
# Better drop the invalid query string than crash (#15152).
|
|
|
|
try:
|
|
|
|
newurl += '?' + request.META['QUERY_STRING'].decode()
|
|
|
|
except UnicodeDecodeError:
|
|
|
|
pass
|
2008-10-07 16:22:50 +08:00
|
|
|
return http.HttpResponsePermanentRedirect(newurl)
|
2005-07-13 09:25:57 +08:00
|
|
|
|
|
|
|
def process_response(self, request, response):
|
2010-11-03 02:30:00 +08:00
|
|
|
"Send broken link emails and calculate the Etag, if needed."
|
2005-07-13 09:25:57 +08:00
|
|
|
if response.status_code == 404:
|
2011-01-30 21:11:45 +08:00
|
|
|
if settings.SEND_BROKEN_LINK_EMAILS and not settings.DEBUG:
|
2005-07-13 09:25:57 +08:00
|
|
|
# If the referrer was from an internal link or a non-search-engine site,
|
|
|
|
# send a note to the managers.
|
2007-09-16 01:46:03 +08:00
|
|
|
domain = request.get_host()
|
2005-10-06 07:36:17 +08:00
|
|
|
referer = request.META.get('HTTP_REFERER', None)
|
2006-09-27 02:49:28 +08:00
|
|
|
is_internal = _is_internal_request(domain, referer)
|
2005-10-06 07:36:17 +08:00
|
|
|
path = request.get_full_path()
|
|
|
|
if referer and not _is_ignorable_404(path) and (is_internal or '?' not in referer):
|
2006-09-26 01:25:39 +08:00
|
|
|
ua = request.META.get('HTTP_USER_AGENT', '<none>')
|
2007-04-20 19:24:53 +08:00
|
|
|
ip = request.META.get('REMOTE_ADDR', '<none>')
|
2005-10-06 07:36:17 +08:00
|
|
|
mail_managers("Broken %slink on %s" % ((is_internal and 'INTERNAL ' or ''), domain),
|
2007-04-20 19:24:53 +08:00
|
|
|
"Referrer: %s\nRequested URL: %s\nUser agent: %s\nIP address: %s\n" \
|
2011-01-04 12:01:37 +08:00
|
|
|
% (referer, request.get_full_path(), ua, ip),
|
|
|
|
fail_silently=True)
|
2005-07-13 09:25:57 +08:00
|
|
|
return response
|
|
|
|
|
2005-10-06 07:36:17 +08:00
|
|
|
# Use ETags, if requested.
|
2005-07-13 09:25:57 +08:00
|
|
|
if settings.USE_ETAGS:
|
2007-06-17 15:21:09 +08:00
|
|
|
if response.has_header('ETag'):
|
|
|
|
etag = response['ETag']
|
2012-10-20 23:40:14 +08:00
|
|
|
elif response.streaming:
|
|
|
|
etag = None
|
2007-06-17 15:21:09 +08:00
|
|
|
else:
|
2011-03-28 10:11:19 +08:00
|
|
|
etag = '"%s"' % hashlib.md5(response.content).hexdigest()
|
2012-10-20 23:40:14 +08:00
|
|
|
if etag is not None:
|
|
|
|
if (200 <= response.status_code < 300
|
|
|
|
and request.META.get('HTTP_IF_NONE_MATCH') == etag):
|
|
|
|
cookies = response.cookies
|
|
|
|
response = http.HttpResponseNotModified()
|
|
|
|
response.cookies = cookies
|
|
|
|
else:
|
|
|
|
response['ETag'] = etag
|
2005-07-13 09:25:57 +08:00
|
|
|
|
|
|
|
return response
|
|
|
|
|
|
|
|
def _is_ignorable_404(uri):
|
2008-10-07 16:22:50 +08:00
|
|
|
"""
|
|
|
|
Returns True if a 404 at the given URL *shouldn't* notify the site managers.
|
|
|
|
"""
|
2011-05-06 04:49:26 +08:00
|
|
|
if getattr(settings, 'IGNORABLE_404_STARTS', ()):
|
|
|
|
import warnings
|
|
|
|
warnings.warn('The IGNORABLE_404_STARTS setting has been deprecated '
|
2012-05-03 21:27:01 +08:00
|
|
|
'in favor of IGNORABLE_404_URLS.', DeprecationWarning)
|
2011-05-06 04:49:26 +08:00
|
|
|
for start in settings.IGNORABLE_404_STARTS:
|
|
|
|
if uri.startswith(start):
|
|
|
|
return True
|
|
|
|
if getattr(settings, 'IGNORABLE_404_ENDS', ()):
|
|
|
|
import warnings
|
|
|
|
warnings.warn('The IGNORABLE_404_ENDS setting has been deprecated '
|
2012-05-03 21:27:01 +08:00
|
|
|
'in favor of IGNORABLE_404_URLS.', DeprecationWarning)
|
2011-05-06 04:49:26 +08:00
|
|
|
for end in settings.IGNORABLE_404_ENDS:
|
|
|
|
if uri.endswith(end):
|
|
|
|
return True
|
|
|
|
return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)
|
2006-09-27 02:49:28 +08:00
|
|
|
|
|
|
|
def _is_internal_request(domain, referer):
|
2008-10-07 16:22:50 +08:00
|
|
|
"""
|
|
|
|
Returns true if the referring URL is the same domain as the current request.
|
|
|
|
"""
|
2006-09-27 02:49:28 +08:00
|
|
|
# Different subdomains are treated as different domains.
|
|
|
|
return referer is not None and re.match("^https?://%s/" % re.escape(domain), referer)
|