import hashlib import logging import re from django import http from django.conf import settings from django.core import urlresolvers from django.core.mail import mail_managers from django.utils import six from django.utils.encoding import force_text from django.utils.http import urlquote logger = logging.getLogger('django.request') class CommonMiddleware(object): """ "Common" middleware for taking care of some basic operations: - Forbids access to User-Agents in settings.DISALLOWED_USER_AGENTS - URL rewriting: Based on the APPEND_SLASH and PREPEND_WWW settings, this middleware appends missing slashes and/or prepends missing "www."s. - If APPEND_SLASH is set and the initial URL doesn't end with a slash, and it is not found in urlpatterns, a new URL is formed by appending a slash at the end. If this new URL is found in urlpatterns, then an HTTP-redirect is returned to this new URL; otherwise the initial URL is processed as usual. This behavior can be customized by subclassing CommonMiddleware and overriding the response_redirect_class attribute. - ETags: If the USE_ETAGS setting is set, ETags will be calculated from the entire page content and Not Modified responses will be returned appropriately. """ response_redirect_class = http.HttpResponsePermanentRedirect def process_request(self, request): """ Check for denied User-Agents and rewrite the URL based on settings.APPEND_SLASH and settings.PREPEND_WWW """ # Check for denied User-Agents if 'HTTP_USER_AGENT' in request.META: for user_agent_regex in settings.DISALLOWED_USER_AGENTS: if user_agent_regex.search(request.META['HTTP_USER_AGENT']): logger.warning('Forbidden (User agent): %s', request.path, extra={ 'status_code': 403, 'request': request } ) return http.HttpResponseForbidden('

Forbidden

') # Check for a redirect based on settings.APPEND_SLASH # and settings.PREPEND_WWW host = request.get_host() old_url = [host, request.path] new_url = old_url[:] if (settings.PREPEND_WWW and old_url[0] and not old_url[0].startswith('www.')): new_url[0] = 'www.' + old_url[0] # Append a slash if APPEND_SLASH is set and the URL doesn't have a # trailing slash and there is no pattern for the current path if settings.APPEND_SLASH and (not old_url[1].endswith('/')): urlconf = getattr(request, 'urlconf', None) if (not urlresolvers.is_valid_path(request.path_info, urlconf) and urlresolvers.is_valid_path("%s/" % request.path_info, urlconf)): new_url[1] = new_url[1] + '/' if settings.DEBUG and request.method in ('POST', 'PUT', 'PATCH'): raise RuntimeError(("" "You called this URL via %(method)s, but the URL doesn't end " "in a slash and you have APPEND_SLASH set. Django can't " "redirect to the slash URL while maintaining %(method)s data. " "Change your form to point to %(url)s (note the trailing " "slash), or set APPEND_SLASH=False in your Django " "settings.") % {'method': request.method, 'url': ''.join(new_url)}) if new_url == old_url: # No redirects required. return if new_url[0]: newurl = "%s://%s%s" % ( request.scheme, new_url[0], urlquote(new_url[1])) else: newurl = urlquote(new_url[1]) if request.META.get('QUERY_STRING', ''): if six.PY3: newurl += '?' + request.META['QUERY_STRING'] else: # `query_string` is a bytestring. Appending it to the unicode # string `newurl` will fail if it isn't ASCII-only. This isn't # allowed; only broken software generates such query strings. # Better drop the invalid query string than crash (#15152). try: newurl += '?' + request.META['QUERY_STRING'].decode() except UnicodeDecodeError: pass return self.response_redirect_class(newurl) def process_response(self, request, response): """ Calculate the ETag, if needed. """ if settings.USE_ETAGS: if response.has_header('ETag'): etag = response['ETag'] elif response.streaming: etag = None else: etag = '"%s"' % hashlib.md5(response.content).hexdigest() if etag is not None: if (200 <= response.status_code < 300 and request.META.get('HTTP_IF_NONE_MATCH') == etag): cookies = response.cookies response = http.HttpResponseNotModified() response.cookies = cookies else: response['ETag'] = etag return response class BrokenLinkEmailsMiddleware(object): def process_response(self, request, response): """ Send broken link emails for relevant 404 NOT FOUND responses. """ if response.status_code == 404 and not settings.DEBUG: domain = request.get_host() path = request.get_full_path() referer = force_text(request.META.get('HTTP_REFERER', ''), errors='replace') if not self.is_ignorable_request(request, path, domain, referer): ua = request.META.get('HTTP_USER_AGENT', '') ip = request.META.get('REMOTE_ADDR', '') mail_managers( "Broken %slink on %s" % ( ('INTERNAL ' if self.is_internal_request(domain, referer) else ''), domain ), "Referrer: %s\nRequested URL: %s\nUser agent: %s\n" "IP address: %s\n" % (referer, path, ua, ip), fail_silently=True) return response def is_internal_request(self, domain, referer): """ Returns True if the referring URL is the same domain as the current request. """ # Different subdomains are treated as different domains. return bool(re.match("^https?://%s/" % re.escape(domain), referer)) def is_ignorable_request(self, request, uri, domain, referer): """ Returns True if the given request *shouldn't* notify the site managers. """ # '?' in referer is identified as search engine source if (not referer or (not self.is_internal_request(domain, referer) and '?' in referer)): return True return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)