From 16218c20606d8cd89c5393970c83da04598a3e04 Mon Sep 17 00:00:00 2001 From: Florian Demmer Date: Wed, 29 Jul 2020 10:33:20 +0200 Subject: [PATCH] Fixed #27395 -- Added sitemap 'alternates' generation. Updated the sitemap generator and default template to optionally include link elements with hreflang attribute to alternate language URLs. --- AUTHORS | 1 + django/contrib/sitemaps/__init__.py | 123 +++++++++++++----- django/contrib/sitemaps/templates/sitemap.xml | 5 +- docs/ref/contrib/sitemaps.txt | 44 +++++++ docs/releases/3.2.txt | 6 +- tests/sitemaps_tests/test_http.py | 80 +++++++++++- tests/sitemaps_tests/urls/http.py | 46 ++++++- 7 files changed, 263 insertions(+), 42 deletions(-) diff --git a/AUTHORS b/AUTHORS index d7f47d63d7..204a5c45eb 100644 --- a/AUTHORS +++ b/AUTHORS @@ -305,6 +305,7 @@ answer newbie questions, and generally made Django that much better: Flávio Juvenal da Silva Junior flavio.curella@gmail.com Florian Apolloner + Florian Demmer Florian Moussous Fran Hrženjak Francisco Albarran Cristobal diff --git a/django/contrib/sitemaps/__init__.py b/django/contrib/sitemaps/__init__.py index 54720324a2..b13507a11e 100644 --- a/django/contrib/sitemaps/__init__.py +++ b/django/contrib/sitemaps/__init__.py @@ -60,32 +60,71 @@ class Sitemap: # with which the sitemap was requested. protocol = None - def __get(self, name, obj, default=None): + # Enables generating URLs for all languages. + i18n = False + + # Override list of languages to use. + languages = None + + # Enables generating alternate/hreflang links. + alternates = False + + # Add an alternate/hreflang link with value 'x-default'. + x_default = False + + def _get(self, name, item, default=None): try: attr = getattr(self, name) except AttributeError: return default if callable(attr): - return attr(obj) + if self.i18n: + # Split the (item, lang_code) tuples again for the location, + # priority, lastmod and changefreq method calls. + item, lang_code = item + return attr(item) return attr + def _languages(self): + if self.languages is not None: + return self.languages + return [lang_code for lang_code, _ in settings.LANGUAGES] + + def _items(self): + if self.i18n: + # Create (item, lang_code) tuples for all items and languages. + # This is necessary to paginate with all languages already considered. + items = [ + (item, lang_code) + for lang_code in self._languages() + for item in self.items() + ] + return items + return self.items() + + def _location(self, item, force_lang_code=None): + if self.i18n: + obj, lang_code = item + # Activate language from item-tuple or forced one before calling location. + with translation.override(force_lang_code or lang_code): + return self._get('location', item) + return self._get('location', item) + + @property + def paginator(self): + return paginator.Paginator(self._items(), self.limit) + def items(self): return [] - def location(self, obj): - return obj.get_absolute_url() + def location(self, item): + return item.get_absolute_url() - @property - def paginator(self): - return paginator.Paginator(self.items(), self.limit) - - def get_urls(self, page=1, site=None, protocol=None): + def get_protocol(self, protocol=None): # Determine protocol - if self.protocol is not None: - protocol = self.protocol - if protocol is None: - protocol = 'http' + return self.protocol or protocol or 'http' + def get_domain(self, site=None): # Determine domain if site is None: if django_apps.is_installed('django.contrib.sites'): @@ -99,43 +138,61 @@ class Sitemap: "To use sitemaps, either enable the sites framework or pass " "a Site/RequestSite object in your view." ) - domain = site.domain + return site.domain - if getattr(self, 'i18n', False): - urls = [] - current_lang_code = translation.get_language() - for lang_code, lang_name in settings.LANGUAGES: - translation.activate(lang_code) - urls += self._urls(page, protocol, domain) - translation.activate(current_lang_code) - else: - urls = self._urls(page, protocol, domain) - - return urls + def get_urls(self, page=1, site=None, protocol=None): + protocol = self.get_protocol(protocol) + domain = self.get_domain(site) + return self._urls(page, protocol, domain) def _urls(self, page, protocol, domain): urls = [] latest_lastmod = None all_items_lastmod = True # track if all items have a lastmod - for item in self.paginator.page(page).object_list: - loc = "%s://%s%s" % (protocol, domain, self.__get('location', item)) - priority = self.__get('priority', item) - lastmod = self.__get('lastmod', item) + + paginator_page = self.paginator.page(page) + for item in paginator_page.object_list: + loc = f'{protocol}://{domain}{self._location(item)}' + priority = self._get('priority', item) + lastmod = self._get('lastmod', item) + if all_items_lastmod: all_items_lastmod = lastmod is not None if (all_items_lastmod and (latest_lastmod is None or lastmod > latest_lastmod)): latest_lastmod = lastmod + url_info = { 'item': item, 'location': loc, 'lastmod': lastmod, - 'changefreq': self.__get('changefreq', item), + 'changefreq': self._get('changefreq', item), 'priority': str(priority if priority is not None else ''), } + + if self.i18n and self.alternates: + alternates = [] + for lang_code in self._languages(): + loc = f'{protocol}://{domain}{self._location(item, lang_code)}' + alternates.append({ + 'location': loc, + 'lang_code': lang_code, + }) + if self.x_default: + lang_code = settings.LANGUAGE_CODE + loc = f'{protocol}://{domain}{self._location(item, lang_code)}' + loc = loc.replace(f'/{lang_code}/', '/', 1) + alternates.append({ + 'location': loc, + 'lang_code': 'x-default', + }) + url_info['alternates'] = alternates + urls.append(url_info) + if all_items_lastmod and latest_lastmod: self.latest_lastmod = latest_lastmod + return urls @@ -146,9 +203,9 @@ class GenericSitemap(Sitemap): def __init__(self, info_dict, priority=None, changefreq=None, protocol=None): self.queryset = info_dict['queryset'] self.date_field = info_dict.get('date_field') - self.priority = priority - self.changefreq = changefreq - self.protocol = protocol + self.priority = self.priority or priority + self.changefreq = self.changefreq or changefreq + self.protocol = self.protocol or protocol def items(self): # Make sure to return a clone; we don't want premature evaluation. diff --git a/django/contrib/sitemaps/templates/sitemap.xml b/django/contrib/sitemaps/templates/sitemap.xml index b13b830b9c..67b166ac36 100644 --- a/django/contrib/sitemaps/templates/sitemap.xml +++ b/django/contrib/sitemaps/templates/sitemap.xml @@ -7,7 +7,10 @@ {% if url.lastmod %}{{ url.lastmod|date:"Y-m-d" }}{% endif %} {% if url.changefreq %}{{ url.changefreq }}{% endif %} {% if url.priority %}{{ url.priority }}{% endif %} - + {% for alternate in url.alternates %} + + {% endfor %} + {% endfor %} {% endspaceless %} diff --git a/docs/ref/contrib/sitemaps.txt b/docs/ref/contrib/sitemaps.txt index 8f89f6f899..936567411e 100644 --- a/docs/ref/contrib/sitemaps.txt +++ b/docs/ref/contrib/sitemaps.txt @@ -252,6 +252,40 @@ Note: be generated using all of your :setting:`LANGUAGES`. The default is ``False``. + .. attribute:: Sitemap.languages + + .. versionadded:: 3.2 + + **Optional.** + + A :term:`sequence` of :term:`language codes` to use for + generating alternate links when :attr:`~Sitemap.i18n` is enabled. + Defaults to :setting:`LANGUAGES`. + + .. attribute:: Sitemap.alternates + + .. versionadded:: 3.2 + + **Optional.** + + A boolean attribute. When used in conjunction with + :attr:`~Sitemap.i18n` generated URLs will each have a list of alternate + links pointing to other language versions using the `hreflang + attribute`_. The default is ``False``. + + .. _hreflang attribute: https://support.google.com/webmasters/answer/189077 + + .. attribute:: Sitemap.x_default + + .. versionadded:: 3.2 + + **Optional.** + + A boolean attribute. When ``True`` the alternate links generated by + :attr:`~Sitemap.alternates` will contain a ``hreflang="x-default"`` + fallback entry with a value of :setting:`LANGUAGE_CODE`. The default is + ``False``. + Shortcuts ========= @@ -438,12 +472,22 @@ The variable ``urlset`` is a list of URLs that should appear in the sitemap. Each URL exposes attributes as defined in the :class:`~django.contrib.sitemaps.Sitemap` class: +- ``alternates`` - ``changefreq`` - ``item`` - ``lastmod`` - ``location`` - ``priority`` +The ``alternates`` attribute is available when :attr:`~Sitemap.i18n` and +:attr:`~Sitemap.alternates` are enabled. It is a list of other language +versions, including the optional :attr:`~Sitemap.x_default` fallback, for each +URL. Each alternate is a dictionary with ``location`` and ``lang_code`` keys. + +.. versionchanged:: 3.2 + + The ``alternates`` attribute was added. + The ``item`` attribute has been added for each URL to allow more flexible customization of the templates, such as `Google news sitemaps`_. Assuming Sitemap's :attr:`~Sitemap.items()` would return a list of items with diff --git a/docs/releases/3.2.txt b/docs/releases/3.2.txt index fe4b309cb5..10468ac655 100644 --- a/docs/releases/3.2.txt +++ b/docs/releases/3.2.txt @@ -125,7 +125,11 @@ Minor features :mod:`django.contrib.sitemaps` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -* ... +* The new :class:`~django.contrib.sitemaps.Sitemap` attributes + :attr:`~django.contrib.sitemaps.Sitemap.alternates`, + :attr:`~django.contrib.sitemaps.Sitemap.languages` and + :attr:`~django.contrib.sitemaps.Sitemap.x_default` allow + generating sitemap *alternates* to localized versions of your pages. :mod:`django.contrib.sites` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/tests/sitemaps_tests/test_http.py b/tests/sitemaps_tests/test_http.py index 3d4e1d84dc..3281774cc5 100644 --- a/tests/sitemaps_tests/test_http.py +++ b/tests/sitemaps_tests/test_http.py @@ -253,8 +253,10 @@ class HTTPSitemapTests(SitemapTestsBase): self.assertEqual(response.status_code, 200) @override_settings(LANGUAGES=(('en', 'English'), ('pt', 'Portuguese'))) - def test_simple_i18nsitemap_index(self): - "A simple i18n sitemap index can be rendered" + def test_simple_i18n_sitemap_index(self): + """ + A simple i18n sitemap index can be rendered. + """ response = self.client.get('/simple/i18n.xml') expected_content = """ @@ -263,6 +265,80 @@ class HTTPSitemapTests(SitemapTestsBase): """.format(self.base_url, self.i18n_model.pk) self.assertXMLEqual(response.content.decode(), expected_content) + @override_settings(LANGUAGES=(('en', 'English'), ('pt', 'Portuguese'))) + def test_alternate_i18n_sitemap_index(self): + """ + A i18n sitemap with alternate/hreflang links can be rendered. + """ + response = self.client.get('/alternates/i18n.xml') + url, pk = self.base_url, self.i18n_model.pk + expected_urls = f""" +{url}/en/i18n/testmodel/{pk}/never0.5 + + + +{url}/pt/i18n/testmodel/{pk}/never0.5 + + + +""".replace('\n', '') + expected_content = f""" + +{expected_urls} + +""" + self.assertXMLEqual(response.content.decode(), expected_content) + + @override_settings(LANGUAGES=(('en', 'English'), ('pt', 'Portuguese'), ('es', 'Spanish'))) + def test_alternate_i18n_sitemap_limited(self): + """ + A i18n sitemap index with limited languages can be rendered. + """ + response = self.client.get('/limited/i18n.xml') + url, pk = self.base_url, self.i18n_model.pk + expected_urls = f""" +{url}/en/i18n/testmodel/{pk}/never0.5 + + + +{url}/es/i18n/testmodel/{pk}/never0.5 + + + +""".replace('\n', '') + expected_content = f""" + +{expected_urls} + +""" + self.assertXMLEqual(response.content.decode(), expected_content) + + @override_settings(LANGUAGES=(('en', 'English'), ('pt', 'Portuguese'))) + def test_alternate_i18n_sitemap_xdefault(self): + """ + A i18n sitemap index with x-default can be rendered. + """ + response = self.client.get('/x-default/i18n.xml') + url, pk = self.base_url, self.i18n_model.pk + expected_urls = f""" +{url}/en/i18n/testmodel/{pk}/never0.5 + + + + +{url}/pt/i18n/testmodel/{pk}/never0.5 + + + + +""".replace('\n', '') + expected_content = f""" + +{expected_urls} + +""" + self.assertXMLEqual(response.content.decode(), expected_content) + def test_sitemap_without_entries(self): response = self.client.get('/sitemap-without-entries/sitemap.xml') expected_content = """ diff --git a/tests/sitemaps_tests/urls/http.py b/tests/sitemaps_tests/urls/http.py index 495f60fb1a..e4cba4c42f 100644 --- a/tests/sitemaps_tests/urls/http.py +++ b/tests/sitemaps_tests/urls/http.py @@ -34,6 +34,18 @@ class SimpleI18nSitemap(Sitemap): return I18nTestModel.objects.order_by('pk').all() +class AlternatesI18nSitemap(SimpleI18nSitemap): + alternates = True + + +class LimitedI18nSitemap(AlternatesI18nSitemap): + languages = ['en', 'es'] + + +class XDefaultI18nSitemap(AlternatesI18nSitemap): + x_default = True + + class EmptySitemap(Sitemap): changefreq = "never" priority = 0.5 @@ -77,8 +89,20 @@ simple_sitemaps = { 'simple': SimpleSitemap, } -simple_i18nsitemaps = { - 'simple': SimpleI18nSitemap, +simple_i18n_sitemaps = { + 'i18n': SimpleI18nSitemap, +} + +alternates_i18n_sitemaps = { + 'i18n-alternates': AlternatesI18nSitemap, +} + +limited_i18n_sitemaps = { + 'i18n-limited': LimitedI18nSitemap, +} + +xdefault_i18n_sitemaps = { + 'i18n-xdefault': XDefaultI18nSitemap, } simple_sitemaps_not_callable = { @@ -97,7 +121,7 @@ fixed_lastmod_sitemaps = { 'fixed-lastmod': FixedLastmodSitemap, } -fixed_lastmod__mixed_sitemaps = { +fixed_lastmod_mixed_sitemaps = { 'fixed-lastmod-mixed': FixedLastmodMixedSitemap, } @@ -151,7 +175,19 @@ urlpatterns = [ name='django.contrib.sitemaps.views.sitemap'), path( 'simple/i18n.xml', views.sitemap, - {'sitemaps': simple_i18nsitemaps}, + {'sitemaps': simple_i18n_sitemaps}, + name='django.contrib.sitemaps.views.sitemap'), + path( + 'alternates/i18n.xml', views.sitemap, + {'sitemaps': alternates_i18n_sitemaps}, + name='django.contrib.sitemaps.views.sitemap'), + path( + 'limited/i18n.xml', views.sitemap, + {'sitemaps': limited_i18n_sitemaps}, + name='django.contrib.sitemaps.views.sitemap'), + path( + 'x-default/i18n.xml', views.sitemap, + {'sitemaps': xdefault_i18n_sitemaps}, name='django.contrib.sitemaps.views.sitemap'), path( 'simple/custom-sitemap.xml', views.sitemap, @@ -167,7 +203,7 @@ urlpatterns = [ name='django.contrib.sitemaps.views.sitemap'), path( 'lastmod-mixed/sitemap.xml', views.sitemap, - {'sitemaps': fixed_lastmod__mixed_sitemaps}, + {'sitemaps': fixed_lastmod_mixed_sitemaps}, name='django.contrib.sitemaps.views.sitemap'), path( 'lastmod/date-sitemap.xml', views.sitemap,