Fixed #27395 -- Added sitemap 'alternates' generation.

Updated the sitemap generator and default template to optionally
include link elements with hreflang attribute to alternate language
URLs.
This commit is contained in:
Florian Demmer 2020-07-29 10:33:20 +02:00 committed by Carlton Gibson
parent 50e1ccbbea
commit 16218c2060
7 changed files with 263 additions and 42 deletions

View File

@ -305,6 +305,7 @@ answer newbie questions, and generally made Django that much better:
Flávio Juvenal da Silva Junior <flavio@vinta.com.br> Flávio Juvenal da Silva Junior <flavio@vinta.com.br>
flavio.curella@gmail.com flavio.curella@gmail.com
Florian Apolloner <florian@apolloner.eu> Florian Apolloner <florian@apolloner.eu>
Florian Demmer <fdemmer@gmail.com>
Florian Moussous <florian.moussous@gmail.com> Florian Moussous <florian.moussous@gmail.com>
Fran Hrženjak <fran.hrzenjak@gmail.com> Fran Hrženjak <fran.hrzenjak@gmail.com>
Francisco Albarran Cristobal <pahko.xd@gmail.com> Francisco Albarran Cristobal <pahko.xd@gmail.com>

View File

@ -60,32 +60,71 @@ class Sitemap:
# with which the sitemap was requested. # with which the sitemap was requested.
protocol = None protocol = None
def __get(self, name, obj, default=None): # Enables generating URLs for all languages.
i18n = False
# Override list of languages to use.
languages = None
# Enables generating alternate/hreflang links.
alternates = False
# Add an alternate/hreflang link with value 'x-default'.
x_default = False
def _get(self, name, item, default=None):
try: try:
attr = getattr(self, name) attr = getattr(self, name)
except AttributeError: except AttributeError:
return default return default
if callable(attr): if callable(attr):
return attr(obj) if self.i18n:
# Split the (item, lang_code) tuples again for the location,
# priority, lastmod and changefreq method calls.
item, lang_code = item
return attr(item)
return attr return attr
def _languages(self):
if self.languages is not None:
return self.languages
return [lang_code for lang_code, _ in settings.LANGUAGES]
def _items(self):
if self.i18n:
# Create (item, lang_code) tuples for all items and languages.
# This is necessary to paginate with all languages already considered.
items = [
(item, lang_code)
for lang_code in self._languages()
for item in self.items()
]
return items
return self.items()
def _location(self, item, force_lang_code=None):
if self.i18n:
obj, lang_code = item
# Activate language from item-tuple or forced one before calling location.
with translation.override(force_lang_code or lang_code):
return self._get('location', item)
return self._get('location', item)
@property
def paginator(self):
return paginator.Paginator(self._items(), self.limit)
def items(self): def items(self):
return [] return []
def location(self, obj): def location(self, item):
return obj.get_absolute_url() return item.get_absolute_url()
@property def get_protocol(self, protocol=None):
def paginator(self):
return paginator.Paginator(self.items(), self.limit)
def get_urls(self, page=1, site=None, protocol=None):
# Determine protocol # Determine protocol
if self.protocol is not None: return self.protocol or protocol or 'http'
protocol = self.protocol
if protocol is None:
protocol = 'http'
def get_domain(self, site=None):
# Determine domain # Determine domain
if site is None: if site is None:
if django_apps.is_installed('django.contrib.sites'): if django_apps.is_installed('django.contrib.sites'):
@ -99,43 +138,61 @@ class Sitemap:
"To use sitemaps, either enable the sites framework or pass " "To use sitemaps, either enable the sites framework or pass "
"a Site/RequestSite object in your view." "a Site/RequestSite object in your view."
) )
domain = site.domain return site.domain
if getattr(self, 'i18n', False): def get_urls(self, page=1, site=None, protocol=None):
urls = [] protocol = self.get_protocol(protocol)
current_lang_code = translation.get_language() domain = self.get_domain(site)
for lang_code, lang_name in settings.LANGUAGES: return self._urls(page, protocol, domain)
translation.activate(lang_code)
urls += self._urls(page, protocol, domain)
translation.activate(current_lang_code)
else:
urls = self._urls(page, protocol, domain)
return urls
def _urls(self, page, protocol, domain): def _urls(self, page, protocol, domain):
urls = [] urls = []
latest_lastmod = None latest_lastmod = None
all_items_lastmod = True # track if all items have a lastmod all_items_lastmod = True # track if all items have a lastmod
for item in self.paginator.page(page).object_list:
loc = "%s://%s%s" % (protocol, domain, self.__get('location', item)) paginator_page = self.paginator.page(page)
priority = self.__get('priority', item) for item in paginator_page.object_list:
lastmod = self.__get('lastmod', item) loc = f'{protocol}://{domain}{self._location(item)}'
priority = self._get('priority', item)
lastmod = self._get('lastmod', item)
if all_items_lastmod: if all_items_lastmod:
all_items_lastmod = lastmod is not None all_items_lastmod = lastmod is not None
if (all_items_lastmod and if (all_items_lastmod and
(latest_lastmod is None or lastmod > latest_lastmod)): (latest_lastmod is None or lastmod > latest_lastmod)):
latest_lastmod = lastmod latest_lastmod = lastmod
url_info = { url_info = {
'item': item, 'item': item,
'location': loc, 'location': loc,
'lastmod': lastmod, 'lastmod': lastmod,
'changefreq': self.__get('changefreq', item), 'changefreq': self._get('changefreq', item),
'priority': str(priority if priority is not None else ''), 'priority': str(priority if priority is not None else ''),
} }
if self.i18n and self.alternates:
alternates = []
for lang_code in self._languages():
loc = f'{protocol}://{domain}{self._location(item, lang_code)}'
alternates.append({
'location': loc,
'lang_code': lang_code,
})
if self.x_default:
lang_code = settings.LANGUAGE_CODE
loc = f'{protocol}://{domain}{self._location(item, lang_code)}'
loc = loc.replace(f'/{lang_code}/', '/', 1)
alternates.append({
'location': loc,
'lang_code': 'x-default',
})
url_info['alternates'] = alternates
urls.append(url_info) urls.append(url_info)
if all_items_lastmod and latest_lastmod: if all_items_lastmod and latest_lastmod:
self.latest_lastmod = latest_lastmod self.latest_lastmod = latest_lastmod
return urls return urls
@ -146,9 +203,9 @@ class GenericSitemap(Sitemap):
def __init__(self, info_dict, priority=None, changefreq=None, protocol=None): def __init__(self, info_dict, priority=None, changefreq=None, protocol=None):
self.queryset = info_dict['queryset'] self.queryset = info_dict['queryset']
self.date_field = info_dict.get('date_field') self.date_field = info_dict.get('date_field')
self.priority = priority self.priority = self.priority or priority
self.changefreq = changefreq self.changefreq = self.changefreq or changefreq
self.protocol = protocol self.protocol = self.protocol or protocol
def items(self): def items(self):
# Make sure to return a clone; we don't want premature evaluation. # Make sure to return a clone; we don't want premature evaluation.

View File

@ -7,6 +7,9 @@
{% if url.lastmod %}<lastmod>{{ url.lastmod|date:"Y-m-d" }}</lastmod>{% endif %} {% if url.lastmod %}<lastmod>{{ url.lastmod|date:"Y-m-d" }}</lastmod>{% endif %}
{% if url.changefreq %}<changefreq>{{ url.changefreq }}</changefreq>{% endif %} {% if url.changefreq %}<changefreq>{{ url.changefreq }}</changefreq>{% endif %}
{% if url.priority %}<priority>{{ url.priority }}</priority>{% endif %} {% if url.priority %}<priority>{{ url.priority }}</priority>{% endif %}
{% for alternate in url.alternates %}
<xhtml:link rel="alternate" hreflang="{{ alternate.lang_code }}" href="{{ alternate.location }}"/>
{% endfor %}
</url> </url>
{% endfor %} {% endfor %}
{% endspaceless %} {% endspaceless %}

View File

@ -252,6 +252,40 @@ Note:
be generated using all of your :setting:`LANGUAGES`. The default is be generated using all of your :setting:`LANGUAGES`. The default is
``False``. ``False``.
.. attribute:: Sitemap.languages
.. versionadded:: 3.2
**Optional.**
A :term:`sequence` of :term:`language codes<language code>` to use for
generating alternate links when :attr:`~Sitemap.i18n` is enabled.
Defaults to :setting:`LANGUAGES`.
.. attribute:: Sitemap.alternates
.. versionadded:: 3.2
**Optional.**
A boolean attribute. When used in conjunction with
:attr:`~Sitemap.i18n` generated URLs will each have a list of alternate
links pointing to other language versions using the `hreflang
attribute`_. The default is ``False``.
.. _hreflang attribute: https://support.google.com/webmasters/answer/189077
.. attribute:: Sitemap.x_default
.. versionadded:: 3.2
**Optional.**
A boolean attribute. When ``True`` the alternate links generated by
:attr:`~Sitemap.alternates` will contain a ``hreflang="x-default"``
fallback entry with a value of :setting:`LANGUAGE_CODE`. The default is
``False``.
Shortcuts Shortcuts
========= =========
@ -438,12 +472,22 @@ The variable ``urlset`` is a list of URLs that should appear in the
sitemap. Each URL exposes attributes as defined in the sitemap. Each URL exposes attributes as defined in the
:class:`~django.contrib.sitemaps.Sitemap` class: :class:`~django.contrib.sitemaps.Sitemap` class:
- ``alternates``
- ``changefreq`` - ``changefreq``
- ``item`` - ``item``
- ``lastmod`` - ``lastmod``
- ``location`` - ``location``
- ``priority`` - ``priority``
The ``alternates`` attribute is available when :attr:`~Sitemap.i18n` and
:attr:`~Sitemap.alternates` are enabled. It is a list of other language
versions, including the optional :attr:`~Sitemap.x_default` fallback, for each
URL. Each alternate is a dictionary with ``location`` and ``lang_code`` keys.
.. versionchanged:: 3.2
The ``alternates`` attribute was added.
The ``item`` attribute has been added for each URL to allow more flexible The ``item`` attribute has been added for each URL to allow more flexible
customization of the templates, such as `Google news sitemaps`_. Assuming customization of the templates, such as `Google news sitemaps`_. Assuming
Sitemap's :attr:`~Sitemap.items()` would return a list of items with Sitemap's :attr:`~Sitemap.items()` would return a list of items with

View File

@ -125,7 +125,11 @@ Minor features
:mod:`django.contrib.sitemaps` :mod:`django.contrib.sitemaps`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* ... * The new :class:`~django.contrib.sitemaps.Sitemap` attributes
:attr:`~django.contrib.sitemaps.Sitemap.alternates`,
:attr:`~django.contrib.sitemaps.Sitemap.languages` and
:attr:`~django.contrib.sitemaps.Sitemap.x_default` allow
generating sitemap *alternates* to localized versions of your pages.
:mod:`django.contrib.sites` :mod:`django.contrib.sites`
~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -253,8 +253,10 @@ class HTTPSitemapTests(SitemapTestsBase):
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
@override_settings(LANGUAGES=(('en', 'English'), ('pt', 'Portuguese'))) @override_settings(LANGUAGES=(('en', 'English'), ('pt', 'Portuguese')))
def test_simple_i18nsitemap_index(self): def test_simple_i18n_sitemap_index(self):
"A simple i18n sitemap index can be rendered" """
A simple i18n sitemap index can be rendered.
"""
response = self.client.get('/simple/i18n.xml') response = self.client.get('/simple/i18n.xml')
expected_content = """<?xml version="1.0" encoding="UTF-8"?> expected_content = """<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml"> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml">
@ -263,6 +265,80 @@ class HTTPSitemapTests(SitemapTestsBase):
""".format(self.base_url, self.i18n_model.pk) """.format(self.base_url, self.i18n_model.pk)
self.assertXMLEqual(response.content.decode(), expected_content) self.assertXMLEqual(response.content.decode(), expected_content)
@override_settings(LANGUAGES=(('en', 'English'), ('pt', 'Portuguese')))
def test_alternate_i18n_sitemap_index(self):
"""
A i18n sitemap with alternate/hreflang links can be rendered.
"""
response = self.client.get('/alternates/i18n.xml')
url, pk = self.base_url, self.i18n_model.pk
expected_urls = f"""
<url><loc>{url}/en/i18n/testmodel/{pk}/</loc><changefreq>never</changefreq><priority>0.5</priority>
<xhtml:link rel="alternate" hreflang="en" href="{url}/en/i18n/testmodel/{pk}/"/>
<xhtml:link rel="alternate" hreflang="pt" href="{url}/pt/i18n/testmodel/{pk}/"/>
</url>
<url><loc>{url}/pt/i18n/testmodel/{pk}/</loc><changefreq>never</changefreq><priority>0.5</priority>
<xhtml:link rel="alternate" hreflang="en" href="{url}/en/i18n/testmodel/{pk}/"/>
<xhtml:link rel="alternate" hreflang="pt" href="{url}/pt/i18n/testmodel/{pk}/"/>
</url>
""".replace('\n', '')
expected_content = f"""<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml">
{expected_urls}
</urlset>
"""
self.assertXMLEqual(response.content.decode(), expected_content)
@override_settings(LANGUAGES=(('en', 'English'), ('pt', 'Portuguese'), ('es', 'Spanish')))
def test_alternate_i18n_sitemap_limited(self):
"""
A i18n sitemap index with limited languages can be rendered.
"""
response = self.client.get('/limited/i18n.xml')
url, pk = self.base_url, self.i18n_model.pk
expected_urls = f"""
<url><loc>{url}/en/i18n/testmodel/{pk}/</loc><changefreq>never</changefreq><priority>0.5</priority>
<xhtml:link rel="alternate" hreflang="en" href="{url}/en/i18n/testmodel/{pk}/"/>
<xhtml:link rel="alternate" hreflang="es" href="{url}/es/i18n/testmodel/{pk}/"/>
</url>
<url><loc>{url}/es/i18n/testmodel/{pk}/</loc><changefreq>never</changefreq><priority>0.5</priority>
<xhtml:link rel="alternate" hreflang="en" href="{url}/en/i18n/testmodel/{pk}/"/>
<xhtml:link rel="alternate" hreflang="es" href="{url}/es/i18n/testmodel/{pk}/"/>
</url>
""".replace('\n', '')
expected_content = f"""<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml">
{expected_urls}
</urlset>
"""
self.assertXMLEqual(response.content.decode(), expected_content)
@override_settings(LANGUAGES=(('en', 'English'), ('pt', 'Portuguese')))
def test_alternate_i18n_sitemap_xdefault(self):
"""
A i18n sitemap index with x-default can be rendered.
"""
response = self.client.get('/x-default/i18n.xml')
url, pk = self.base_url, self.i18n_model.pk
expected_urls = f"""
<url><loc>{url}/en/i18n/testmodel/{pk}/</loc><changefreq>never</changefreq><priority>0.5</priority>
<xhtml:link rel="alternate" hreflang="en" href="{url}/en/i18n/testmodel/{pk}/"/>
<xhtml:link rel="alternate" hreflang="pt" href="{url}/pt/i18n/testmodel/{pk}/"/>
<xhtml:link rel="alternate" hreflang="x-default" href="{url}/i18n/testmodel/{pk}/"/>
</url>
<url><loc>{url}/pt/i18n/testmodel/{pk}/</loc><changefreq>never</changefreq><priority>0.5</priority>
<xhtml:link rel="alternate" hreflang="en" href="{url}/en/i18n/testmodel/{pk}/"/>
<xhtml:link rel="alternate" hreflang="pt" href="{url}/pt/i18n/testmodel/{pk}/"/>
<xhtml:link rel="alternate" hreflang="x-default" href="{url}/i18n/testmodel/{pk}/"/>
</url>
""".replace('\n', '')
expected_content = f"""<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml">
{expected_urls}
</urlset>
"""
self.assertXMLEqual(response.content.decode(), expected_content)
def test_sitemap_without_entries(self): def test_sitemap_without_entries(self):
response = self.client.get('/sitemap-without-entries/sitemap.xml') response = self.client.get('/sitemap-without-entries/sitemap.xml')
expected_content = """<?xml version="1.0" encoding="UTF-8"?> expected_content = """<?xml version="1.0" encoding="UTF-8"?>

View File

@ -34,6 +34,18 @@ class SimpleI18nSitemap(Sitemap):
return I18nTestModel.objects.order_by('pk').all() return I18nTestModel.objects.order_by('pk').all()
class AlternatesI18nSitemap(SimpleI18nSitemap):
alternates = True
class LimitedI18nSitemap(AlternatesI18nSitemap):
languages = ['en', 'es']
class XDefaultI18nSitemap(AlternatesI18nSitemap):
x_default = True
class EmptySitemap(Sitemap): class EmptySitemap(Sitemap):
changefreq = "never" changefreq = "never"
priority = 0.5 priority = 0.5
@ -77,8 +89,20 @@ simple_sitemaps = {
'simple': SimpleSitemap, 'simple': SimpleSitemap,
} }
simple_i18nsitemaps = { simple_i18n_sitemaps = {
'simple': SimpleI18nSitemap, 'i18n': SimpleI18nSitemap,
}
alternates_i18n_sitemaps = {
'i18n-alternates': AlternatesI18nSitemap,
}
limited_i18n_sitemaps = {
'i18n-limited': LimitedI18nSitemap,
}
xdefault_i18n_sitemaps = {
'i18n-xdefault': XDefaultI18nSitemap,
} }
simple_sitemaps_not_callable = { simple_sitemaps_not_callable = {
@ -97,7 +121,7 @@ fixed_lastmod_sitemaps = {
'fixed-lastmod': FixedLastmodSitemap, 'fixed-lastmod': FixedLastmodSitemap,
} }
fixed_lastmod__mixed_sitemaps = { fixed_lastmod_mixed_sitemaps = {
'fixed-lastmod-mixed': FixedLastmodMixedSitemap, 'fixed-lastmod-mixed': FixedLastmodMixedSitemap,
} }
@ -151,7 +175,19 @@ urlpatterns = [
name='django.contrib.sitemaps.views.sitemap'), name='django.contrib.sitemaps.views.sitemap'),
path( path(
'simple/i18n.xml', views.sitemap, 'simple/i18n.xml', views.sitemap,
{'sitemaps': simple_i18nsitemaps}, {'sitemaps': simple_i18n_sitemaps},
name='django.contrib.sitemaps.views.sitemap'),
path(
'alternates/i18n.xml', views.sitemap,
{'sitemaps': alternates_i18n_sitemaps},
name='django.contrib.sitemaps.views.sitemap'),
path(
'limited/i18n.xml', views.sitemap,
{'sitemaps': limited_i18n_sitemaps},
name='django.contrib.sitemaps.views.sitemap'),
path(
'x-default/i18n.xml', views.sitemap,
{'sitemaps': xdefault_i18n_sitemaps},
name='django.contrib.sitemaps.views.sitemap'), name='django.contrib.sitemaps.views.sitemap'),
path( path(
'simple/custom-sitemap.xml', views.sitemap, 'simple/custom-sitemap.xml', views.sitemap,
@ -167,7 +203,7 @@ urlpatterns = [
name='django.contrib.sitemaps.views.sitemap'), name='django.contrib.sitemaps.views.sitemap'),
path( path(
'lastmod-mixed/sitemap.xml', views.sitemap, 'lastmod-mixed/sitemap.xml', views.sitemap,
{'sitemaps': fixed_lastmod__mixed_sitemaps}, {'sitemaps': fixed_lastmod_mixed_sitemaps},
name='django.contrib.sitemaps.views.sitemap'), name='django.contrib.sitemaps.views.sitemap'),
path( path(
'lastmod/date-sitemap.xml', views.sitemap, 'lastmod/date-sitemap.xml', views.sitemap,