154 lines
4.9 KiB
Python
154 lines
4.9 KiB
Python
import datetime
|
|
import warnings
|
|
from dataclasses import dataclass
|
|
from functools import wraps
|
|
|
|
from django.contrib.sites.shortcuts import get_current_site
|
|
from django.core.paginator import EmptyPage, PageNotAnInteger
|
|
from django.http import Http404
|
|
from django.template.response import TemplateResponse
|
|
from django.urls import reverse
|
|
from django.utils import timezone
|
|
from django.utils.deprecation import RemovedInDjango50Warning
|
|
from django.utils.http import http_date
|
|
|
|
|
|
@dataclass
|
|
class SitemapIndexItem:
|
|
location: str
|
|
last_mod: bool = None
|
|
|
|
# RemovedInDjango50Warning
|
|
def __str__(self):
|
|
msg = (
|
|
"Calling `__str__` on SitemapIndexItem is deprecated, use the `location` "
|
|
"attribute instead."
|
|
)
|
|
warnings.warn(msg, RemovedInDjango50Warning, stacklevel=2)
|
|
return self.location
|
|
|
|
|
|
def x_robots_tag(func):
|
|
@wraps(func)
|
|
def inner(request, *args, **kwargs):
|
|
response = func(request, *args, **kwargs)
|
|
response.headers["X-Robots-Tag"] = "noindex, noodp, noarchive"
|
|
return response
|
|
|
|
return inner
|
|
|
|
|
|
def _get_latest_lastmod(current_lastmod, new_lastmod):
|
|
"""
|
|
Returns the latest `lastmod` where `lastmod` can be either a date or a
|
|
datetime.
|
|
"""
|
|
if not isinstance(new_lastmod, datetime.datetime):
|
|
new_lastmod = datetime.datetime.combine(new_lastmod, datetime.time.min)
|
|
if timezone.is_naive(new_lastmod):
|
|
new_lastmod = timezone.make_aware(new_lastmod, datetime.timezone.utc)
|
|
return new_lastmod if current_lastmod is None else max(current_lastmod, new_lastmod)
|
|
|
|
|
|
@x_robots_tag
|
|
def index(
|
|
request,
|
|
sitemaps,
|
|
template_name="sitemap_index.xml",
|
|
content_type="application/xml",
|
|
sitemap_url_name="django.contrib.sitemaps.views.sitemap",
|
|
):
|
|
|
|
req_protocol = request.scheme
|
|
req_site = get_current_site(request)
|
|
|
|
sites = [] # all sections' sitemap URLs
|
|
all_indexes_lastmod = True
|
|
latest_lastmod = None
|
|
for section, site in sitemaps.items():
|
|
# For each section label, add links of all pages of its sitemap
|
|
# (usually generated by the `sitemap` view).
|
|
if callable(site):
|
|
site = site()
|
|
protocol = req_protocol if site.protocol is None else site.protocol
|
|
sitemap_url = reverse(sitemap_url_name, kwargs={"section": section})
|
|
absolute_url = "%s://%s%s" % (protocol, req_site.domain, sitemap_url)
|
|
site_lastmod = site.get_latest_lastmod()
|
|
if all_indexes_lastmod:
|
|
if site_lastmod is not None:
|
|
latest_lastmod = _get_latest_lastmod(latest_lastmod, site_lastmod)
|
|
else:
|
|
all_indexes_lastmod = False
|
|
sites.append(SitemapIndexItem(absolute_url, site_lastmod))
|
|
# Add links to all pages of the sitemap.
|
|
for page in range(2, site.paginator.num_pages + 1):
|
|
sites.append(
|
|
SitemapIndexItem("%s?p=%s" % (absolute_url, page), site_lastmod)
|
|
)
|
|
# If lastmod is defined for all sites, set header so as
|
|
# ConditionalGetMiddleware is able to send 304 NOT MODIFIED
|
|
if all_indexes_lastmod and latest_lastmod:
|
|
headers = {"Last-Modified": http_date(latest_lastmod.timestamp())}
|
|
else:
|
|
headers = None
|
|
return TemplateResponse(
|
|
request,
|
|
template_name,
|
|
{"sitemaps": sites},
|
|
content_type=content_type,
|
|
headers=headers,
|
|
)
|
|
|
|
|
|
@x_robots_tag
|
|
def sitemap(
|
|
request,
|
|
sitemaps,
|
|
section=None,
|
|
template_name="sitemap.xml",
|
|
content_type="application/xml",
|
|
):
|
|
|
|
req_protocol = request.scheme
|
|
req_site = get_current_site(request)
|
|
|
|
if section is not None:
|
|
if section not in sitemaps:
|
|
raise Http404("No sitemap available for section: %r" % section)
|
|
maps = [sitemaps[section]]
|
|
else:
|
|
maps = sitemaps.values()
|
|
page = request.GET.get("p", 1)
|
|
|
|
lastmod = None
|
|
all_sites_lastmod = True
|
|
urls = []
|
|
for site in maps:
|
|
try:
|
|
if callable(site):
|
|
site = site()
|
|
urls.extend(site.get_urls(page=page, site=req_site, protocol=req_protocol))
|
|
if all_sites_lastmod:
|
|
site_lastmod = getattr(site, "latest_lastmod", None)
|
|
if site_lastmod is not None:
|
|
lastmod = _get_latest_lastmod(lastmod, site_lastmod)
|
|
else:
|
|
all_sites_lastmod = False
|
|
except EmptyPage:
|
|
raise Http404("Page %s empty" % page)
|
|
except PageNotAnInteger:
|
|
raise Http404("No page '%s'" % page)
|
|
# If lastmod is defined for all sites, set header so as
|
|
# ConditionalGetMiddleware is able to send 304 NOT MODIFIED
|
|
if all_sites_lastmod:
|
|
headers = {"Last-Modified": http_date(lastmod.timestamp())} if lastmod else None
|
|
else:
|
|
headers = None
|
|
return TemplateResponse(
|
|
request,
|
|
template_name,
|
|
{"urlset": urls},
|
|
content_type=content_type,
|
|
headers=headers,
|
|
)
|