Fixed #8995 -- Added support for HTTPS in sitemaps.

Modularized tests and did a bit of cleanup while I was in the area.



git-svn-id: http://code.djangoproject.com/svn/django/trunk@17409 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Aymeric Augustin 2012-01-29 19:24:32 +00:00
parent 123f567093
commit a4b472dd80
13 changed files with 229 additions and 93 deletions

View File

@ -40,6 +40,10 @@ class Sitemap(object):
# http://sitemaps.org/protocol.php#index.
limit = 50000
# If protocol is None, the URLs in the sitemap will use the protocol
# with which the sitemap was requested.
protocol = None
def __get(self, name, obj, default=None):
try:
attr = getattr(self, name)
@ -61,7 +65,14 @@ class Sitemap(object):
return self._paginator
paginator = property(_get_paginator)
def get_urls(self, page=1, site=None):
def get_urls(self, page=1, site=None, protocol=None):
# Determine protocol
if self.protocol is not None:
protocol = self.protocol
if protocol is None:
protocol = 'http'
# Determine domain
if site is None:
if Site._meta.installed:
try:
@ -69,11 +80,15 @@ class Sitemap(object):
except Site.DoesNotExist:
pass
if site is None:
raise ImproperlyConfigured("In order to use Sitemaps you must either use the sites framework or pass in a Site or RequestSite object in your view code.")
raise ImproperlyConfigured("In order to use Sitemaps "
"you must either use the sites framework "
"or pass in a Site or RequestSite object "
"in your view code.")
domain = site.domain
urls = []
for item in self.paginator.page(page).object_list:
loc = "http://%s%s" % (site.domain, self.__get('location', item))
loc = "%s://%s%s" % (protocol, domain, self.__get('location', item))
priority = self.__get('priority', item, None)
url_info = {
'item': item,

View File

@ -1 +1,4 @@
from django.contrib.sitemaps.tests.basic import *
from .flatpages import FlatpagesSitemapTests
from .generic import GenericViewsSitemapTests
from .http import HTTPSitemapTests
from .https import HTTPSSitemapTests, HTTPSDetectionSitemapTests

View File

@ -0,0 +1,28 @@
import os
from django.conf import settings
from django.contrib.auth.models import User
from django.contrib.sites.models import Site
from django.test import TestCase
class SitemapTestsBase(TestCase):
protocol = 'http'
domain = 'example.com' if Site._meta.installed else 'testserver'
urls = 'django.contrib.sitemaps.tests.urls.http'
def setUp(self):
self.base_url = '%s://%s' % (self.protocol, self.domain)
self.old_USE_L10N = settings.USE_L10N
self.old_TEMPLATE_DIRS = settings.TEMPLATE_DIRS
settings.TEMPLATE_DIRS = (
os.path.join(os.path.dirname(__file__), 'templates'),
)
self.old_Site_meta_installed = Site._meta.installed
# Create a user that will double as sitemap content
User.objects.create_user('testuser', 'test@example.com', 's3krit')
def tearDown(self):
settings.USE_L10N = self.old_USE_L10N
settings.TEMPLATE_DIRS = self.old_TEMPLATE_DIRS
Site._meta.installed = self.old_Site_meta_installed

View File

@ -0,0 +1,37 @@
from django.conf import settings
from django.utils.unittest import skipUnless
from .base import SitemapTestsBase
class FlatpagesSitemapTests(SitemapTestsBase):
@skipUnless("django.contrib.flatpages" in settings.INSTALLED_APPS,
"django.contrib.flatpages app not installed.")
def test_flatpage_sitemap(self):
"Basic FlatPage sitemap test"
# Import FlatPage inside the test so that when django.contrib.flatpages
# is not installed we don't get problems trying to delete Site
# objects (FlatPage has an M2M to Site, Site.delete() tries to
# delete related objects, but the M2M table doesn't exist.
from django.contrib.flatpages.models import FlatPage
public = FlatPage.objects.create(
url=u'/public/',
title=u'Public Page',
enable_comments=True,
registration_required=False,
)
public.sites.add(settings.SITE_ID)
private = FlatPage.objects.create(
url=u'/private/',
title=u'Private Page',
enable_comments=True,
registration_required=True
)
private.sites.add(settings.SITE_ID)
response = self.client.get('/flatpages/sitemap.xml')
# Public flatpage should be in the sitemap
self.assertContains(response, '<loc>%s%s</loc>' % (self.base_url, public.url))
# Private flatpage should not be in the sitemap
self.assertNotContains(response, '<loc>%s%s</loc>' % (self.base_url, private.url))

View File

@ -0,0 +1,17 @@
from django.contrib.auth.models import User
from .base import SitemapTestsBase
class GenericViewsSitemapTests(SitemapTestsBase):
def test_generic_sitemap(self):
"A minimal generic sitemap can be rendered"
response = self.client.get('/generic/sitemap.xml')
expected = ''
for username in User.objects.values_list("username", flat=True):
expected += "<url><loc>%s/users/%s/</loc></url>" % (self.base_url, username)
self.assertEqual(response.content, """<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
%s
</urlset>
""" % expected)

View File

@ -1,39 +1,16 @@
import os
from datetime import date
from django.conf import settings
from django.contrib.auth.models import User
from django.contrib.sitemaps import Sitemap, GenericSitemap
from django.contrib.sites.models import Site
from django.core.exceptions import ImproperlyConfigured
from django.test import TestCase
from django.utils.unittest import skipUnless
from django.utils.formats import localize
from django.utils.translation import activate, deactivate
from .base import SitemapTestsBase
class SitemapTests(TestCase):
urls = 'django.contrib.sitemaps.tests.urls'
def setUp(self):
if Site._meta.installed:
self.base_url = 'http://example.com'
else:
self.base_url = 'http://testserver'
self.old_USE_L10N = settings.USE_L10N
self.old_Site_meta_installed = Site._meta.installed
self.old_TEMPLATE_DIRS = settings.TEMPLATE_DIRS
self.old_Site_meta_installed = Site._meta.installed
settings.TEMPLATE_DIRS = (
os.path.join(os.path.dirname(__file__), 'templates'),
)
# Create a user that will double as sitemap content
User.objects.create_user('testuser', 'test@example.com', 's3krit')
def tearDown(self):
settings.USE_L10N = self.old_USE_L10N
Site._meta.installed = self.old_Site_meta_installed
settings.TEMPLATE_DIRS = self.old_TEMPLATE_DIRS
Site._meta.installed = self.old_Site_meta_installed
class HTTPSitemapTests(SitemapTestsBase):
def test_simple_sitemap_index(self):
"A simple sitemap index can be rendered"
@ -54,6 +31,15 @@ class SitemapTests(TestCase):
</sitemapindex>
""" % self.base_url)
def test_simple_sitemap_section(self):
"A simple sitemap section can be rendered"
response = self.client.get('/simple/sitemap-simple.xml')
self.assertEqual(response.content, """<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url><loc>%s/location/</loc><lastmod>%s</lastmod><changefreq>never</changefreq><priority>0.5</priority></url>
</urlset>
""" % (self.base_url, date.today()))
def test_simple_sitemap(self):
"A simple sitemap can be rendered"
response = self.client.get('/simple/sitemap.xml')
@ -88,48 +74,6 @@ class SitemapTests(TestCase):
self.assertContains(response, '<lastmod>%s</lastmod>' % date.today())
deactivate()
def test_generic_sitemap(self):
"A minimal generic sitemap can be rendered"
response = self.client.get('/generic/sitemap.xml')
expected = ''
for username in User.objects.values_list("username", flat=True):
expected += "<url><loc>%s/users/%s/</loc></url>" % (self.base_url, username)
self.assertEqual(response.content, """<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
%s
</urlset>
""" % expected)
@skipUnless("django.contrib.flatpages" in settings.INSTALLED_APPS, "django.contrib.flatpages app not installed.")
def test_flatpage_sitemap(self):
"Basic FlatPage sitemap test"
# Import FlatPage inside the test so that when django.contrib.flatpages
# is not installed we don't get problems trying to delete Site
# objects (FlatPage has an M2M to Site, Site.delete() tries to
# delete related objects, but the M2M table doesn't exist.
from django.contrib.flatpages.models import FlatPage
public = FlatPage.objects.create(
url=u'/public/',
title=u'Public Page',
enable_comments=True,
registration_required=False,
)
public.sites.add(settings.SITE_ID)
private = FlatPage.objects.create(
url=u'/private/',
title=u'Private Page',
enable_comments=True,
registration_required=True
)
private.sites.add(settings.SITE_ID)
response = self.client.get('/flatpages/sitemap.xml')
# Public flatpage should be in the sitemap
self.assertContains(response, '<loc>%s%s</loc>' % (self.base_url, public.url))
# Private flatpage should not be in the sitemap
self.assertNotContains(response, '<loc>%s%s</loc>' % (self.base_url, private.url))
def test_requestsite_sitemap(self):
# Make sure hitting the flatpages sitemap without the sites framework
# installed doesn't raise an exception
@ -141,7 +85,8 @@ class SitemapTests(TestCase):
</urlset>
""" % date.today())
@skipUnless("django.contrib.sites" in settings.INSTALLED_APPS, "django.contrib.sites app not installed.")
@skipUnless("django.contrib.sites" in settings.INSTALLED_APPS,
"django.contrib.sites app not installed.")
def test_sitemap_get_urls_no_site_1(self):
"""
Check we get ImproperlyConfigured if we don't pass a site object to

View File

@ -0,0 +1,51 @@
from datetime import date
from django.test.utils import override_settings
from .base import SitemapTestsBase
class HTTPSSitemapTests(SitemapTestsBase):
protocol = 'https'
urls = 'django.contrib.sitemaps.tests.urls.https'
def test_secure_sitemap_index(self):
"A secure sitemap index can be rendered"
response = self.client.get('/secure/index.xml')
self.assertEqual(response.content, """<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<sitemap><loc>%s/secure/sitemap-simple.xml</loc></sitemap>
</sitemapindex>
""" % self.base_url)
def test_secure_sitemap_section(self):
"A secure sitemap section can be rendered"
response = self.client.get('/secure/sitemap-simple.xml')
self.assertEqual(response.content, """<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url><loc>%s/location/</loc><lastmod>%s</lastmod><changefreq>never</changefreq><priority>0.5</priority></url>
</urlset>
""" % (self.base_url, date.today()))
#@override_settings(SECURE_PROXY_SSL_HEADER=False)
class HTTPSDetectionSitemapTests(SitemapTestsBase):
extra = {'wsgi.url_scheme': 'https'}
def test_sitemap_index_with_https_request(self):
"A sitemap index requested in HTTPS is rendered with HTTPS links"
response = self.client.get('/simple/index.xml', **self.extra)
self.assertEqual(response.content, """<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<sitemap><loc>%s/simple/sitemap-simple.xml</loc></sitemap>
</sitemapindex>
""" % self.base_url.replace('http://', 'https://'))
def test_sitemap_section_with_https_request(self):
"A sitemap section requested in HTTPS is rendered with HTTPS links"
response = self.client.get('/simple/sitemap-simple.xml', **self.extra)
self.assertEqual(response.content, """<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url><loc>%s/location/</loc><lastmod>%s</lastmod><changefreq>never</changefreq><priority>0.5</priority></url>
</urlset>
""" % (self.base_url.replace('http://', 'https://'), date.today()))
HTTPSDetectionSitemapTests = override_settings(SECURE_PROXY_SSL_HEADER=False)(HTTPSDetectionSitemapTests)

View File

@ -18,9 +18,7 @@ simple_sitemaps = {
}
generic_sitemaps = {
'generic': GenericSitemap({
'queryset': User.objects.all()
}),
'generic': GenericSitemap({'queryset': User.objects.all()}),
}
flatpage_sitemaps = {

View File

@ -0,0 +1,16 @@
from django.conf.urls import patterns
from .http import SimpleSitemap
class HTTPSSitemap(SimpleSitemap):
protocol = 'https'
secure_sitemaps = {
'simple': HTTPSSitemap,
}
urlpatterns = patterns('django.contrib.sitemaps.views',
(r'^secure/index\.xml$', 'index', {'sitemaps': secure_sitemaps}),
(r'^secure/sitemap-(?P<section>.+)\.xml$', 'sitemap',
{'sitemaps': secure_sitemaps}),
)

View File

@ -7,40 +7,47 @@ from django.template.response import TemplateResponse
def index(request, sitemaps,
template_name='sitemap_index.xml', mimetype='application/xml',
sitemap_url_name='django.contrib.sitemaps.views.sitemap'):
current_site = get_current_site(request)
req_protocol = 'https' if request.is_secure() else 'http'
req_site = get_current_site(request)
sites = []
protocol = request.is_secure() and 'https' or 'http'
for section, site in sitemaps.items():
site.request = request
if callable(site):
pages = site().paginator.num_pages
else:
pages = site.paginator.num_pages
sitemap_url = urlresolvers.reverse(sitemap_url_name, kwargs={'section': section})
sites.append('%s://%s%s' % (protocol, current_site.domain, sitemap_url))
if pages > 1:
for page in range(2, pages+1):
sites.append('%s://%s%s?p=%s' % (protocol, current_site.domain, sitemap_url, page))
return TemplateResponse(request, template_name, {'sitemaps': sites}, content_type=mimetype)
site = site()
protocol = req_protocol if site.protocol is None else site.protocol
sitemap_url = urlresolvers.reverse(
sitemap_url_name, kwargs={'section': section})
absolute_url = '%s://%s%s' % (protocol, req_site.domain, sitemap_url)
sites.append(absolute_url)
for page in range(2, site.paginator.num_pages + 1):
sites.append('%s?p=%s' % (absolute_url, page))
return TemplateResponse(request, template_name, {'sitemaps': sites},
content_type=mimetype)
def sitemap(request, sitemaps, section=None,
template_name='sitemap.xml', mimetype='application/xml'):
maps, urls = [], []
req_protocol = 'https' if request.is_secure() else 'http'
req_site = get_current_site(request)
if section is not None:
if section not in sitemaps:
raise Http404("No sitemap available for section: %r" % section)
maps.append(sitemaps[section])
maps = [sitemaps[section]]
else:
maps = sitemaps.values()
page = request.GET.get("p", 1)
current_site = get_current_site(request)
urls = []
for site in maps:
try:
if callable(site):
site = site()
urls.extend(site.get_urls(page=page, site=current_site))
urls.extend(site.get_urls(page=page, site=req_site,
protocol=req_protocol))
except EmptyPage:
raise Http404("Page %s empty" % page)
except PageNotAnInteger:
raise Http404("No page '%s'" % page)
return TemplateResponse(request, template_name, {'urlset': urls}, content_type=mimetype)
return TemplateResponse(request, template_name, {'urlset': urls},
content_type=mimetype)

View File

@ -161,6 +161,9 @@ Sitemap class reference
the ``get_absolute_url()`` method on each object as returned by
:attr:`~Sitemap.items()`.
To specify a protocol other than ``'http'``, use
:attr:`~Sitemap.protocol`.
.. attribute:: Sitemap.lastmod
**Optional.** Either a method or attribute.
@ -193,7 +196,7 @@ Sitemap class reference
* ``'yearly'``
* ``'never'``
.. method:: Sitemap.priority
.. attribute:: Sitemap.priority
**Optional.** Either a method or attribute.
@ -208,6 +211,18 @@ Sitemap class reference
.. _sitemaps.org documentation: http://www.sitemaps.org/protocol.html#prioritydef
.. attribute:: Sitemap.protocol
.. versionadded:: 1.4
**Optional.**
This attribute defines the protocol (``'http'`` or ``'https'``) of the
URLs in the sitemap. If it isn't set, the protocol with which the
sitemap was requested is used. If the sitemap is built outside the
context of a request, the default is ``'http'``.
Shortcuts
=========

View File

@ -562,6 +562,10 @@ Django 1.4 also includes several smaller improvements worth noting:
just like with regular formsets. However, initial values only apply to extra
forms i.e. those which are not bound to an existing model instance.
* The sitemaps framework can now handle HTTPS links using the new
:attr:`Sitemap.protocol <django.contrib.sitemaps.Sitemap.protocol>` class
attribute.
Backwards incompatible changes in 1.4
=====================================