django1/django/contrib/sitemaps/views.py

154 lines
4.9 KiB
Python

import datetime
import warnings
from dataclasses import dataclass
from functools import wraps
from django.contrib.sites.shortcuts import get_current_site
from django.core.paginator import EmptyPage, PageNotAnInteger
from django.http import Http404
from django.template.response import TemplateResponse
from django.urls import reverse
from django.utils import timezone
from django.utils.deprecation import RemovedInDjango50Warning
from django.utils.http import http_date
@dataclass
class SitemapIndexItem:
location: str
last_mod: bool = None
# RemovedInDjango50Warning
def __str__(self):
msg = (
"Calling `__str__` on SitemapIndexItem is deprecated, use the `location` "
"attribute instead."
)
warnings.warn(msg, RemovedInDjango50Warning, stacklevel=2)
return self.location
def x_robots_tag(func):
@wraps(func)
def inner(request, *args, **kwargs):
response = func(request, *args, **kwargs)
response.headers["X-Robots-Tag"] = "noindex, noodp, noarchive"
return response
return inner
def _get_latest_lastmod(current_lastmod, new_lastmod):
"""
Returns the latest `lastmod` where `lastmod` can be either a date or a
datetime.
"""
if not isinstance(new_lastmod, datetime.datetime):
new_lastmod = datetime.datetime.combine(new_lastmod, datetime.time.min)
if timezone.is_naive(new_lastmod):
new_lastmod = timezone.make_aware(new_lastmod, datetime.timezone.utc)
return new_lastmod if current_lastmod is None else max(current_lastmod, new_lastmod)
@x_robots_tag
def index(
request,
sitemaps,
template_name="sitemap_index.xml",
content_type="application/xml",
sitemap_url_name="django.contrib.sitemaps.views.sitemap",
):
req_protocol = request.scheme
req_site = get_current_site(request)
sites = [] # all sections' sitemap URLs
all_indexes_lastmod = True
latest_lastmod = None
for section, site in sitemaps.items():
# For each section label, add links of all pages of its sitemap
# (usually generated by the `sitemap` view).
if callable(site):
site = site()
protocol = req_protocol if site.protocol is None else site.protocol
sitemap_url = reverse(sitemap_url_name, kwargs={"section": section})
absolute_url = "%s://%s%s" % (protocol, req_site.domain, sitemap_url)
site_lastmod = site.get_latest_lastmod()
if all_indexes_lastmod:
if site_lastmod is not None:
latest_lastmod = _get_latest_lastmod(latest_lastmod, site_lastmod)
else:
all_indexes_lastmod = False
sites.append(SitemapIndexItem(absolute_url, site_lastmod))
# Add links to all pages of the sitemap.
for page in range(2, site.paginator.num_pages + 1):
sites.append(
SitemapIndexItem("%s?p=%s" % (absolute_url, page), site_lastmod)
)
# If lastmod is defined for all sites, set header so as
# ConditionalGetMiddleware is able to send 304 NOT MODIFIED
if all_indexes_lastmod and latest_lastmod:
headers = {"Last-Modified": http_date(latest_lastmod.timestamp())}
else:
headers = None
return TemplateResponse(
request,
template_name,
{"sitemaps": sites},
content_type=content_type,
headers=headers,
)
@x_robots_tag
def sitemap(
request,
sitemaps,
section=None,
template_name="sitemap.xml",
content_type="application/xml",
):
req_protocol = request.scheme
req_site = get_current_site(request)
if section is not None:
if section not in sitemaps:
raise Http404("No sitemap available for section: %r" % section)
maps = [sitemaps[section]]
else:
maps = sitemaps.values()
page = request.GET.get("p", 1)
lastmod = None
all_sites_lastmod = True
urls = []
for site in maps:
try:
if callable(site):
site = site()
urls.extend(site.get_urls(page=page, site=req_site, protocol=req_protocol))
if all_sites_lastmod:
site_lastmod = getattr(site, "latest_lastmod", None)
if site_lastmod is not None:
lastmod = _get_latest_lastmod(lastmod, site_lastmod)
else:
all_sites_lastmod = False
except EmptyPage:
raise Http404("Page %s empty" % page)
except PageNotAnInteger:
raise Http404("No page '%s'" % page)
# If lastmod is defined for all sites, set header so as
# ConditionalGetMiddleware is able to send 304 NOT MODIFIED
if all_sites_lastmod:
headers = {"Last-Modified": http_date(lastmod.timestamp())} if lastmod else None
else:
headers = None
return TemplateResponse(
request,
template_name,
{"urlset": urls},
content_type=content_type,
headers=headers,
)