From 8b216eb8656bfd83b34243189293f6433fdc2b80 Mon Sep 17 00:00:00 2001 From: Luke Plant Date: Tue, 26 Sep 2006 18:49:28 +0000 Subject: [PATCH] Improved detection of whether a URL is internal or not for the purpose of broken link e-mails (referred links from images.google were being misclassified). git-svn-id: http://code.djangoproject.com/svn/django/trunk@3870 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/middleware/common.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/django/middleware/common.py b/django/middleware/common.py index 8392fb0e5f..6283214fad 100644 --- a/django/middleware/common.py +++ b/django/middleware/common.py @@ -2,6 +2,7 @@ from django.conf import settings from django import http from django.core.mail import mail_managers import md5 +import re class CommonMiddleware(object): """ @@ -61,7 +62,7 @@ class CommonMiddleware(object): # send a note to the managers. domain = http.get_host(request) referer = request.META.get('HTTP_REFERER', None) - is_internal = referer and (domain in referer) + is_internal = _is_internal_request(domain, referer) path = request.get_full_path() if referer and not _is_ignorable_404(path) and (is_internal or '?' not in referer): ua = request.META.get('HTTP_USER_AGENT', '') @@ -88,3 +89,8 @@ def _is_ignorable_404(uri): if uri.endswith(end): return True return False + +def _is_internal_request(domain, referer): + "Return true if the referring URL is the same domain as the current request" + # Different subdomains are treated as different domains. + return referer is not None and re.match("^https?://%s/" % re.escape(domain), referer)