diff --git a/django/contrib/sitemaps/__init__.py b/django/contrib/sitemaps/__init__.py index 7ad0f31c20..27a2f4b434 100644 --- a/django/contrib/sitemaps/__init__.py +++ b/django/contrib/sitemaps/__init__.py @@ -157,6 +157,17 @@ class Sitemap: domain = self.get_domain(site) return self._urls(page, protocol, domain) + def get_latest_lastmod(self): + if not hasattr(self, 'lastmod'): + return None + if callable(self.lastmod): + try: + return max([self.lastmod(item) for item in self.items()]) + except TypeError: + return None + else: + return self.lastmod + def _urls(self, page, protocol, domain): urls = [] latest_lastmod = None @@ -226,3 +237,8 @@ class GenericSitemap(Sitemap): if self.date_field is not None: return getattr(item, self.date_field) return None + + def get_latest_lastmod(self): + if self.date_field is not None: + return self.queryset.order_by('-' + self.date_field).values_list(self.date_field, flat=True).first() + return None diff --git a/django/contrib/sitemaps/templates/sitemap_index.xml b/django/contrib/sitemaps/templates/sitemap_index.xml index f19e622c55..b7abd12b33 100644 --- a/django/contrib/sitemaps/templates/sitemap_index.xml +++ b/django/contrib/sitemaps/templates/sitemap_index.xml @@ -1,4 +1,13 @@ -{% for location in sitemaps %}{{ location }}{% endfor %} +{% spaceless %} +{% for site in sitemaps %} + + {{ site.location }} + {% if site.last_mod %} + {{ site.last_mod|date:"c" }} + {% endif %} + +{% endfor %} +{% endspaceless %} diff --git a/django/contrib/sitemaps/views.py b/django/contrib/sitemaps/views.py index 137049825f..f742bd7f7a 100644 --- a/django/contrib/sitemaps/views.py +++ b/django/contrib/sitemaps/views.py @@ -1,4 +1,6 @@ import datetime +import warnings +from dataclasses import dataclass from functools import wraps from django.contrib.sites.shortcuts import get_current_site @@ -7,9 +9,22 @@ from django.http import Http404 from django.template.response import TemplateResponse from django.urls import reverse from django.utils import timezone +from django.utils.deprecation import RemovedInDjango50Warning from django.utils.http import http_date +@dataclass +class SitemapIndexItem: + location: str + last_mod: bool = None + + # RemovedInDjango50Warning + def __str__(self): + msg = 'Calling `__str__` on SitemapIndexItem is deprecated, use the `location` attribute instead.' + warnings.warn(msg, RemovedInDjango50Warning, stacklevel=2) + return self.location + + def x_robots_tag(func): @wraps(func) def inner(request, *args, **kwargs): @@ -19,6 +34,18 @@ def x_robots_tag(func): return inner +def _get_latest_lastmod(current_lastmod, new_lastmod): + """ + Returns the latest `lastmod` where `lastmod` can be either a date or a + datetime. + """ + if not isinstance(new_lastmod, datetime.datetime): + new_lastmod = datetime.datetime.combine(new_lastmod, datetime.time.min) + if timezone.is_naive(new_lastmod): + new_lastmod = timezone.make_aware(new_lastmod, timezone.utc) + return new_lastmod if current_lastmod is None else max(current_lastmod, new_lastmod) + + @x_robots_tag def index(request, sitemaps, template_name='sitemap_index.xml', content_type='application/xml', @@ -28,6 +55,8 @@ def index(request, sitemaps, req_site = get_current_site(request) sites = [] # all sections' sitemap URLs + all_indexes_lastmod = True + latest_lastmod = None for section, site in sitemaps.items(): # For each section label, add links of all pages of its sitemap # (usually generated by the `sitemap` view). @@ -36,13 +65,29 @@ def index(request, sitemaps, protocol = req_protocol if site.protocol is None else site.protocol sitemap_url = reverse(sitemap_url_name, kwargs={'section': section}) absolute_url = '%s://%s%s' % (protocol, req_site.domain, sitemap_url) - sites.append(absolute_url) + site_lastmod = site.get_latest_lastmod() + if all_indexes_lastmod: + if site_lastmod is not None: + latest_lastmod = _get_latest_lastmod(latest_lastmod, site_lastmod) + else: + all_indexes_lastmod = False + sites.append(SitemapIndexItem(absolute_url, site_lastmod)) # Add links to all pages of the sitemap. for page in range(2, site.paginator.num_pages + 1): - sites.append('%s?p=%s' % (absolute_url, page)) - - return TemplateResponse(request, template_name, {'sitemaps': sites}, - content_type=content_type) + sites.append(SitemapIndexItem('%s?p=%s' % (absolute_url, page), site_lastmod)) + # If lastmod is defined for all sites, set header so as + # ConditionalGetMiddleware is able to send 304 NOT MODIFIED + if all_indexes_lastmod and latest_lastmod: + headers = {'Last-Modified': http_date(latest_lastmod.timestamp())} + else: + headers = None + return TemplateResponse( + request, + template_name, + {'sitemaps': sites}, + content_type=content_type, + headers=headers, + ) @x_robots_tag @@ -72,21 +117,23 @@ def sitemap(request, sitemaps, section=None, if all_sites_lastmod: site_lastmod = getattr(site, 'latest_lastmod', None) if site_lastmod is not None: - if not isinstance(site_lastmod, datetime.datetime): - site_lastmod = datetime.datetime.combine(site_lastmod, datetime.time.min) - if timezone.is_naive(site_lastmod): - site_lastmod = timezone.make_aware(site_lastmod, timezone.utc) - lastmod = site_lastmod if lastmod is None else max(lastmod, site_lastmod) + lastmod = _get_latest_lastmod(lastmod, site_lastmod) else: all_sites_lastmod = False except EmptyPage: raise Http404("Page %s empty" % page) except PageNotAnInteger: raise Http404("No page '%s'" % page) - response = TemplateResponse(request, template_name, {'urlset': urls}, - content_type=content_type) - if all_sites_lastmod and lastmod is not None: - # if lastmod is defined for all sites, set header so as - # ConditionalGetMiddleware is able to send 304 NOT MODIFIED - response.headers['Last-Modified'] = http_date(lastmod.timestamp()) - return response + # If lastmod is defined for all sites, set header so as + # ConditionalGetMiddleware is able to send 304 NOT MODIFIED + if all_sites_lastmod: + headers = {'Last-Modified': http_date(lastmod.timestamp())} if lastmod else None + else: + headers = None + return TemplateResponse( + request, + template_name, + {'urlset': urls}, + content_type=content_type, + headers=headers, + ) diff --git a/docs/internals/deprecation.txt b/docs/internals/deprecation.txt index 202acdf608..13c800e0ee 100644 --- a/docs/internals/deprecation.txt +++ b/docs/internals/deprecation.txt @@ -65,6 +65,8 @@ details on these changes. See the :ref:`Django 4.1 release notes ` for more details on these changes. +* The ``SitemapIndexItem.__str__()`` method will be removed. + .. _deprecation-removed-in-4.1: 4.1 diff --git a/docs/ref/contrib/sitemaps.txt b/docs/ref/contrib/sitemaps.txt index ec0358391c..e6c96be21d 100644 --- a/docs/ref/contrib/sitemaps.txt +++ b/docs/ref/contrib/sitemaps.txt @@ -294,6 +294,23 @@ Note: fallback entry with a value of :setting:`LANGUAGE_CODE`. The default is ``False``. + .. method:: Sitemap.get_latest_lastmod() + + .. versionadded:: 4.1 + + **Optional.** A method that returns the latest value returned by + :attr:`~Sitemap.lastmod`. This function is used to add the ``lastmod`` + attribute to :ref:`Sitemap index context + variables`. + + By default :meth:`~Sitemap.get_latest_lastmod` returns: + + * If :attr:`~Sitemap.lastmod` is an attribute: + :attr:`~Sitemap.lastmod`. + * If :attr:`~Sitemap.lastmod` is a method: + The latest ``lastmod`` returned by calling the method with all + items returned by :meth:`Sitemap.items`. + Shortcuts ========= @@ -306,7 +323,8 @@ The sitemap framework provides a convenience class for a common case: a ``queryset`` entry. This queryset will be used to generate the items of the sitemap. It may also have a ``date_field`` entry that specifies a date field for objects retrieved from the ``queryset``. - This will be used for the :attr:`~Sitemap.lastmod` attribute in the + This will be used for the :attr:`~Sitemap.lastmod` attribute and + :meth:`~Sitemap.get_latest_lastmod` methods in the in the generated sitemap. The :attr:`~Sitemap.priority`, :attr:`~Sitemap.changefreq`, @@ -413,6 +431,10 @@ both :file:`sitemap-flatpages.xml` and :file:`sitemap-blog.xml`. The :class:`~django.contrib.sitemaps.Sitemap` classes and the ``sitemaps`` dict don't change at all. +If all sitemaps have a ``lastmod`` returned by +:meth:`Sitemap.get_latest_lastmod` the sitemap index will have a +``Last-Modified`` header equal to the latest ``lastmod``. + You should create an index file if one of your sitemaps has more than 50,000 URLs. In this case, Django will automatically paginate the sitemap, and the index will reflect that. @@ -433,6 +455,9 @@ with a caching decorator -- you must name your sitemap view and pass {'sitemaps': sitemaps}, name='sitemaps'), ] +.. versionchanged:: 4.1 + + Use of the ``Last-Modified`` header was added. Template customization ====================== @@ -468,10 +493,23 @@ When customizing the templates for the :func:`~django.contrib.sitemaps.views.sitemap` views, you can rely on the following context variables. +.. _sitemap-index-context-variables: + Index ----- -The variable ``sitemaps`` is a list of absolute URLs to each of the sitemaps. +The variable ``sitemaps`` is a list of objects containing the ``location`` and +``lastmod`` attribute for each of the sitemaps. Each URL exposes the following +attributes: + +- ``location``: The location (url & page) of the sitemap. +- ``lastmod``: Populated by the :meth:`~Sitemap.get_latest_lastmod` + method for each sitemap. + +.. versionchanged:: 4.1 + + The context was changed to a list of objects with ``location`` and optional + ``lastmod`` attributes. Sitemap ------- diff --git a/docs/releases/4.1.txt b/docs/releases/4.1.txt index af4b6d83a3..0c133f24f1 100644 --- a/docs/releases/4.1.txt +++ b/docs/releases/4.1.txt @@ -88,7 +88,11 @@ Minor features :mod:`django.contrib.sitemaps` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -* ... +* The default sitemap index template ```` now includes the + ```` timestamp where available, through the new + :meth:`~django.contrib.sitemaps.Sitemap.get_latest_lastmod` method. Custom + sitemap index templates should be updated for the adjusted :ref:`context + variables `. :mod:`django.contrib.sites` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -253,7 +257,10 @@ Features deprecated in 4.1 Miscellaneous ------------- -* ... +* The context for sitemap index templates of a flat list of URLs is deprecated. + Custom sitemap index templates should be updated for the adjusted + :ref:`context variables `, expecting a list + of objects with ``location`` and optional ``lastmod`` attributes. Features removed in 4.1 ======================= diff --git a/tests/sitemaps_tests/templates/custom_sitemap_lastmod_index.xml b/tests/sitemaps_tests/templates/custom_sitemap_lastmod_index.xml new file mode 100644 index 0000000000..dcf5e32584 --- /dev/null +++ b/tests/sitemaps_tests/templates/custom_sitemap_lastmod_index.xml @@ -0,0 +1,14 @@ + + + +{% spaceless %} +{% for site in sitemaps %} + + {{ site.location }} + {% if site.last_mod %} + {{ site.last_mod|date:"c" }} + {% endif %} + +{% endfor %} +{% endspaceless %} + diff --git a/tests/sitemaps_tests/test_generic.py b/tests/sitemaps_tests/test_generic.py index d7d4908565..3c25f04d3a 100644 --- a/tests/sitemaps_tests/test_generic.py +++ b/tests/sitemaps_tests/test_generic.py @@ -85,3 +85,12 @@ class GenericViewsSitemapTests(SitemapTestsBase): ) with self.assertWarnsMessage(RemovedInDjango50Warning, msg): sitemap.get_protocol() + + def test_generic_sitemap_index(self): + TestModel.objects.update(lastmod=datetime(2013, 3, 13, 10, 0, 0)) + response = self.client.get('/generic-lastmod/index.xml') + expected_content = """ + +http://example.com/simple/sitemap-generic.xml2013-03-13T10:00:00 +""" + self.assertXMLEqual(response.content.decode('utf-8'), expected_content) diff --git a/tests/sitemaps_tests/test_http.py b/tests/sitemaps_tests/test_http.py index af787ff41f..5fea5ac3d3 100644 --- a/tests/sitemaps_tests/test_http.py +++ b/tests/sitemaps_tests/test_http.py @@ -24,9 +24,9 @@ class HTTPSitemapTests(SitemapTestsBase): response = self.client.get('/simple/index.xml') expected_content = """ -%s/simple/sitemap-simple.xml +%s/simple/sitemap-simple.xml%s -""" % self.base_url +""" % (self.base_url, date.today()) self.assertXMLEqual(response.content.decode(), expected_content) def test_sitemap_not_callable(self): @@ -34,9 +34,9 @@ class HTTPSitemapTests(SitemapTestsBase): response = self.client.get('/simple-not-callable/index.xml') expected_content = """ -%s/simple/sitemap-simple.xml +%s/simple/sitemap-simple.xml%s -""" % self.base_url +""" % (self.base_url, date.today()) self.assertXMLEqual(response.content.decode(), expected_content) def test_paged_sitemap(self): @@ -44,24 +44,24 @@ class HTTPSitemapTests(SitemapTestsBase): response = self.client.get('/simple-paged/index.xml') expected_content = """ -{0}/simple/sitemap-simple.xml{0}/simple/sitemap-simple.xml?p=2 +{0}/simple/sitemap-simple.xml{1}{0}/simple/sitemap-simple.xml?p=2{1} -""".format(self.base_url) +""".format(self.base_url, date.today()) self.assertXMLEqual(response.content.decode(), expected_content) @override_settings(TEMPLATES=[{ 'BACKEND': 'django.template.backends.django.DjangoTemplates', 'DIRS': [os.path.join(os.path.dirname(__file__), 'templates')], }]) - def test_simple_sitemap_custom_index(self): + def test_simple_sitemap_custom_lastmod_index(self): "A simple sitemap index can be rendered with a custom template" - response = self.client.get('/simple/custom-index.xml') + response = self.client.get('/simple/custom-lastmod-index.xml') expected_content = """ -%s/simple/sitemap-simple.xml +%s/simple/sitemap-simple.xml%s -""" % self.base_url +""" % (self.base_url, date.today()) self.assertXMLEqual(response.content.decode(), expected_content) def test_simple_sitemap_section(self): @@ -176,7 +176,30 @@ class HTTPSitemapTests(SitemapTestsBase): response = self.client.get('/lastmod-sitemaps/descending.xml') self.assertEqual(response.headers['Last-Modified'], 'Sat, 20 Apr 2013 05:00:00 GMT') - @override_settings(USE_I18N=True) + def test_sitemap_get_latest_lastmod_none(self): + """ + sitemapindex.lastmod is ommitted when Sitemap.lastmod is + callable and Sitemap.get_latest_lastmod is not implemented + """ + response = self.client.get('/lastmod/get-latest-lastmod-none-sitemap.xml') + self.assertNotContains(response, '') + + def test_sitemap_get_latest_lastmod(self): + """ + sitemapindex.lastmod is included when Sitemap.lastmod is + attribute and Sitemap.get_latest_lastmod is implemented + """ + response = self.client.get('/lastmod/get-latest-lastmod-sitemap.xml') + self.assertContains(response, '2013-03-13T10:00:00') + + def test_sitemap_latest_lastmod_timezone(self): + """ + lastmod datestamp shows timezones if Sitemap.get_latest_lastmod + returns an aware datetime. + """ + response = self.client.get('/lastmod/latest-lastmod-timezone-sitemap.xml') + self.assertContains(response, '2013-03-13T10:00:00-05:00') + def test_localized_priority(self): """The priority value should not be localized.""" with translation.override('fr'): @@ -240,9 +263,9 @@ class HTTPSitemapTests(SitemapTestsBase): response = self.client.get('/cached/index.xml') expected_content = """ -%s/cached/sitemap-simple.xml +%s/cached/sitemap-simple.xml%s -""" % self.base_url +""" % (self.base_url, date.today()) self.assertXMLEqual(response.content.decode(), expected_content) def test_x_robots_sitemap(self): @@ -356,7 +379,7 @@ class HTTPSitemapTests(SitemapTestsBase): def test_callable_sitemod_partial(self): """ Not all items have `lastmod`. Therefore the `Last-Modified` header - is not set by the detail sitemap view. + is not set by the detail or index sitemap view. """ index_response = self.client.get('/callable-lastmod-partial/index.xml') sitemap_response = self.client.get('/callable-lastmod-partial/sitemap.xml') @@ -378,16 +401,15 @@ class HTTPSitemapTests(SitemapTestsBase): def test_callable_sitemod_full(self): """ All items in the sitemap have `lastmod`. The `Last-Modified` header - is set for the detail sitemap view. The index view does not (currently) - set the `Last-Modified` header. + is set for the detail and index sitemap view. """ index_response = self.client.get('/callable-lastmod-full/index.xml') sitemap_response = self.client.get('/callable-lastmod-full/sitemap.xml') - self.assertNotIn('Last-Modified', index_response) + self.assertEqual(index_response.headers['Last-Modified'], 'Thu, 13 Mar 2014 10:00:00 GMT') self.assertEqual(sitemap_response.headers['Last-Modified'], 'Thu, 13 Mar 2014 10:00:00 GMT') expected_content_index = """ - http://example.com/simple/sitemap-callable-lastmod.xml + http://example.com/simple/sitemap-callable-lastmod.xml2014-03-13T10:00:00 """ expected_content_sitemap = """ @@ -397,3 +419,31 @@ class HTTPSitemapTests(SitemapTestsBase): """ self.assertXMLEqual(index_response.content.decode(), expected_content_index) self.assertXMLEqual(sitemap_response.content.decode(), expected_content_sitemap) + + +# RemovedInDjango50Warning +class DeprecatedTests(SitemapTestsBase): + @override_settings(TEMPLATES=[{ + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'DIRS': [os.path.join(os.path.dirname(__file__), 'templates')], + }]) + def test_simple_sitemap_custom_index_warning(self): + msg = 'Calling `__str__` on SitemapIndexItem is deprecated, use the `location` attribute instead.' + with self.assertRaisesMessage(RemovedInDjango50Warning, msg): + self.client.get('/simple/custom-index.xml') + + @ignore_warnings(category=RemovedInDjango50Warning) + @override_settings(TEMPLATES=[{ + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'DIRS': [os.path.join(os.path.dirname(__file__), 'templates')], + }]) + def test_simple_sitemap_custom_index(self): + "A simple sitemap index can be rendered with a custom template" + response = self.client.get('/simple/custom-index.xml') + expected_content = """ + + + %s/simple/sitemap-simple.xml + + """ % (self.base_url) + self.assertXMLEqual(response.content.decode(), expected_content) diff --git a/tests/sitemaps_tests/test_https.py b/tests/sitemaps_tests/test_https.py index 5c3f8894f8..1e8c28d6c3 100644 --- a/tests/sitemaps_tests/test_https.py +++ b/tests/sitemaps_tests/test_https.py @@ -14,9 +14,9 @@ class HTTPSSitemapTests(SitemapTestsBase): response = self.client.get('/secure/index.xml') expected_content = """ -%s/secure/sitemap-simple.xml +%s/secure/sitemap-simple.xml%s -""" % self.base_url +""" % (self.base_url, date.today()) self.assertXMLEqual(response.content.decode(), expected_content) def test_secure_sitemap_section(self): @@ -39,9 +39,9 @@ class HTTPSDetectionSitemapTests(SitemapTestsBase): response = self.client.get('/simple/index.xml', **self.extra) expected_content = """ -%s/simple/sitemap-simple.xml +%s/simple/sitemap-simple.xml%s -""" % self.base_url.replace('http://', 'https://') +""" % (self.base_url.replace('http://', 'https://'), date.today()) self.assertXMLEqual(response.content.decode(), expected_content) def test_sitemap_section_with_https_request(self): diff --git a/tests/sitemaps_tests/urls/http.py b/tests/sitemaps_tests/urls/http.py index 936f346e41..f0c0cf68b9 100644 --- a/tests/sitemaps_tests/urls/http.py +++ b/tests/sitemaps_tests/urls/http.py @@ -14,13 +14,15 @@ class SimpleSitemap(Sitemap): changefreq = "never" priority = 0.5 location = '/location/' - lastmod = datetime.now() + lastmod = date.today() def items(self): return [object()] class SimplePagedSitemap(Sitemap): + lastmod = date.today() + def items(self): return [object() for x in range(Sitemap.limit + 1)] @@ -110,6 +112,26 @@ class CallableLastmodFullSitemap(Sitemap): return obj.lastmod +class GetLatestLastmodNoneSiteMap(Sitemap): + changefreq = "never" + priority = 0.5 + location = '/location/' + + def items(self): + return [object()] + + def lastmod(self, obj): + return datetime(2013, 3, 13, 10, 0, 0) + + def get_latest_lastmod(self): + return None + + +class GetLatestLastmodSiteMap(SimpleSitemap): + def get_latest_lastmod(self): + return datetime(2013, 3, 13, 10, 0, 0) + + def testmodelview(request, id): return HttpResponse() @@ -180,6 +202,18 @@ generic_sitemaps = { 'generic': GenericSitemap({'queryset': TestModel.objects.order_by('pk').all()}), } +get_latest_lastmod_none_sitemaps = { + 'get-latest-lastmod-none': GetLatestLastmodNoneSiteMap, +} + +get_latest_lastmod_sitemaps = { + 'get-latest-lastmod': GetLatestLastmodSiteMap, +} + +latest_lastmod_timezone_sitemaps = { + 'latest-lastmod-timezone': TimezoneSiteMap, +} + generic_sitemaps_lastmod = { 'generic': GenericSitemap({ 'queryset': TestModel.objects.order_by('pk').all(), @@ -202,6 +236,10 @@ urlpatterns = [ path( 'simple/custom-index.xml', views.index, {'sitemaps': simple_sitemaps, 'template_name': 'custom_sitemap_index.xml'}), + path( + 'simple/custom-lastmod-index.xml', views.index, + {'sitemaps': simple_sitemaps, 'template_name': 'custom_sitemap_lastmod_index.xml'}, + ), path( 'simple/sitemap-
.xml', views.sitemap, {'sitemaps': simple_sitemaps}, @@ -266,6 +304,21 @@ urlpatterns = [ 'lastmod-sitemaps/descending.xml', views.sitemap, {'sitemaps': sitemaps_lastmod_descending}, name='django.contrib.sitemaps.views.sitemap'), + path( + 'lastmod/get-latest-lastmod-none-sitemap.xml', views.index, + {'sitemaps': get_latest_lastmod_none_sitemaps}, + name='django.contrib.sitemaps.views.index', + ), + path( + 'lastmod/get-latest-lastmod-sitemap.xml', views.index, + {'sitemaps': get_latest_lastmod_sitemaps}, + name='django.contrib.sitemaps.views.index', + ), + path( + 'lastmod/latest-lastmod-timezone-sitemap.xml', views.index, + {'sitemaps': latest_lastmod_timezone_sitemaps}, + name='django.contrib.sitemaps.views.index', + ), path( 'generic/sitemap.xml', views.sitemap, {'sitemaps': generic_sitemaps}, @@ -287,6 +340,11 @@ urlpatterns = [ path('callable-lastmod-partial/sitemap.xml', views.sitemap, {'sitemaps': callable_lastmod_partial_sitemap}), path('callable-lastmod-full/index.xml', views.index, {'sitemaps': callable_lastmod_full_sitemap}), path('callable-lastmod-full/sitemap.xml', views.sitemap, {'sitemaps': callable_lastmod_full_sitemap}), + path( + 'generic-lastmod/index.xml', views.index, + {'sitemaps': generic_sitemaps_lastmod}, + name='django.contrib.sitemaps.views.index', + ), ] urlpatterns += i18n_patterns(