2017-01-07 19:11:46 +08:00
|
|
|
from urllib.parse import urlencode
|
|
|
|
from urllib.request import urlopen
|
|
|
|
|
2014-02-02 00:37:08 +08:00
|
|
|
from django.apps import apps as django_apps
|
2014-06-07 02:47:15 +08:00
|
|
|
from django.conf import settings
|
2015-12-30 23:51:16 +08:00
|
|
|
from django.core import paginator
|
2010-10-11 22:34:42 +08:00
|
|
|
from django.core.exceptions import ImproperlyConfigured
|
2015-12-30 23:51:16 +08:00
|
|
|
from django.urls import NoReverseMatch, reverse
|
2014-06-07 02:47:15 +08:00
|
|
|
from django.utils import translation
|
2006-09-01 07:13:59 +08:00
|
|
|
|
2015-11-03 07:10:50 +08:00
|
|
|
PING_URL = "https://www.google.com/webmasters/tools/ping"
|
2006-09-01 07:13:59 +08:00
|
|
|
|
2013-11-03 03:37:48 +08:00
|
|
|
|
2006-09-01 07:13:59 +08:00
|
|
|
class SitemapNotFound(Exception):
|
|
|
|
pass
|
|
|
|
|
2013-11-03 03:37:48 +08:00
|
|
|
|
2019-01-10 18:00:00 +08:00
|
|
|
def ping_google(sitemap_url=None, ping_url=PING_URL, sitemap_uses_https=True):
|
2006-09-01 07:13:59 +08:00
|
|
|
"""
|
2017-01-25 04:31:57 +08:00
|
|
|
Alert Google that the sitemap for the current site has been updated.
|
2006-09-01 07:13:59 +08:00
|
|
|
If sitemap_url is provided, it should be an absolute path to the sitemap
|
|
|
|
for this site -- e.g., '/sitemap.xml'. If sitemap_url is not provided, this
|
2015-12-30 23:51:16 +08:00
|
|
|
function will attempt to deduce it by using urls.reverse().
|
2006-09-01 07:13:59 +08:00
|
|
|
"""
|
2019-01-10 18:00:00 +08:00
|
|
|
sitemap_full_url = _get_sitemap_full_url(sitemap_url, sitemap_uses_https)
|
2016-11-22 19:56:22 +08:00
|
|
|
params = urlencode({'sitemap': sitemap_full_url})
|
|
|
|
urlopen('%s?%s' % (ping_url, params))
|
|
|
|
|
|
|
|
|
2019-01-10 18:00:00 +08:00
|
|
|
def _get_sitemap_full_url(sitemap_url, sitemap_uses_https=True):
|
2016-11-22 19:56:22 +08:00
|
|
|
if not django_apps.is_installed('django.contrib.sites'):
|
|
|
|
raise ImproperlyConfigured("ping_google requires django.contrib.sites, which isn't installed.")
|
|
|
|
|
2006-09-01 07:13:59 +08:00
|
|
|
if sitemap_url is None:
|
|
|
|
try:
|
|
|
|
# First, try to get the "index" sitemap URL.
|
2015-12-30 23:51:16 +08:00
|
|
|
sitemap_url = reverse('django.contrib.sitemaps.views.index')
|
|
|
|
except NoReverseMatch:
|
2017-09-07 20:16:21 +08:00
|
|
|
try:
|
2006-09-01 07:13:59 +08:00
|
|
|
# Next, try for the "global" sitemap URL.
|
2015-12-30 23:51:16 +08:00
|
|
|
sitemap_url = reverse('django.contrib.sitemaps.views.sitemap')
|
2017-09-07 20:16:21 +08:00
|
|
|
except NoReverseMatch:
|
|
|
|
pass
|
2006-09-01 07:13:59 +08:00
|
|
|
|
|
|
|
if sitemap_url is None:
|
|
|
|
raise SitemapNotFound("You didn't provide a sitemap_url, and the sitemap URL couldn't be auto-detected.")
|
|
|
|
|
2014-02-02 00:37:08 +08:00
|
|
|
Site = django_apps.get_model('sites.Site')
|
2006-09-01 07:13:59 +08:00
|
|
|
current_site = Site.objects.get_current()
|
2019-01-10 18:00:00 +08:00
|
|
|
scheme = 'https' if sitemap_uses_https else 'http'
|
|
|
|
return '%s://%s%s' % (scheme, current_site.domain, sitemap_url)
|
2006-09-01 07:13:59 +08:00
|
|
|
|
2013-11-03 03:37:48 +08:00
|
|
|
|
2017-01-19 15:39:46 +08:00
|
|
|
class Sitemap:
|
2008-07-26 13:07:16 +08:00
|
|
|
# This limit is defined by Google. See the index documentation at
|
2018-09-26 14:48:47 +08:00
|
|
|
# https://www.sitemaps.org/protocol.html#index.
|
2008-07-26 13:07:16 +08:00
|
|
|
limit = 50000
|
|
|
|
|
2012-01-30 03:24:32 +08:00
|
|
|
# If protocol is None, the URLs in the sitemap will use the protocol
|
|
|
|
# with which the sitemap was requested.
|
|
|
|
protocol = None
|
|
|
|
|
2020-07-29 16:33:20 +08:00
|
|
|
# Enables generating URLs for all languages.
|
|
|
|
i18n = False
|
|
|
|
|
|
|
|
# Override list of languages to use.
|
|
|
|
languages = None
|
|
|
|
|
|
|
|
# Enables generating alternate/hreflang links.
|
|
|
|
alternates = False
|
|
|
|
|
|
|
|
# Add an alternate/hreflang link with value 'x-default'.
|
|
|
|
x_default = False
|
|
|
|
|
|
|
|
def _get(self, name, item, default=None):
|
2006-09-01 07:13:59 +08:00
|
|
|
try:
|
|
|
|
attr = getattr(self, name)
|
|
|
|
except AttributeError:
|
|
|
|
return default
|
|
|
|
if callable(attr):
|
2020-07-29 16:33:20 +08:00
|
|
|
if self.i18n:
|
|
|
|
# Split the (item, lang_code) tuples again for the location,
|
|
|
|
# priority, lastmod and changefreq method calls.
|
|
|
|
item, lang_code = item
|
|
|
|
return attr(item)
|
2006-09-01 07:13:59 +08:00
|
|
|
return attr
|
|
|
|
|
2020-07-29 16:33:20 +08:00
|
|
|
def _languages(self):
|
|
|
|
if self.languages is not None:
|
|
|
|
return self.languages
|
|
|
|
return [lang_code for lang_code, _ in settings.LANGUAGES]
|
|
|
|
|
|
|
|
def _items(self):
|
|
|
|
if self.i18n:
|
|
|
|
# Create (item, lang_code) tuples for all items and languages.
|
|
|
|
# This is necessary to paginate with all languages already considered.
|
|
|
|
items = [
|
|
|
|
(item, lang_code)
|
|
|
|
for lang_code in self._languages()
|
|
|
|
for item in self.items()
|
|
|
|
]
|
|
|
|
return items
|
|
|
|
return self.items()
|
|
|
|
|
|
|
|
def _location(self, item, force_lang_code=None):
|
|
|
|
if self.i18n:
|
|
|
|
obj, lang_code = item
|
|
|
|
# Activate language from item-tuple or forced one before calling location.
|
|
|
|
with translation.override(force_lang_code or lang_code):
|
|
|
|
return self._get('location', item)
|
|
|
|
return self._get('location', item)
|
2006-09-01 07:13:59 +08:00
|
|
|
|
2016-08-26 08:06:22 +08:00
|
|
|
@property
|
|
|
|
def paginator(self):
|
2020-07-29 16:33:20 +08:00
|
|
|
return paginator.Paginator(self._items(), self.limit)
|
2008-07-26 13:07:16 +08:00
|
|
|
|
2020-07-29 16:33:20 +08:00
|
|
|
def items(self):
|
|
|
|
return []
|
|
|
|
|
|
|
|
def location(self, item):
|
|
|
|
return item.get_absolute_url()
|
|
|
|
|
|
|
|
def get_protocol(self, protocol=None):
|
2012-01-30 03:24:32 +08:00
|
|
|
# Determine protocol
|
2020-07-29 16:33:20 +08:00
|
|
|
return self.protocol or protocol or 'http'
|
2012-01-30 03:24:32 +08:00
|
|
|
|
2020-07-29 16:33:20 +08:00
|
|
|
def get_domain(self, site=None):
|
2012-01-30 03:24:32 +08:00
|
|
|
# Determine domain
|
2010-10-11 22:34:42 +08:00
|
|
|
if site is None:
|
2014-02-02 00:37:08 +08:00
|
|
|
if django_apps.is_installed('django.contrib.sites'):
|
|
|
|
Site = django_apps.get_model('sites.Site')
|
2017-09-07 20:16:21 +08:00
|
|
|
try:
|
2010-10-11 22:34:42 +08:00
|
|
|
site = Site.objects.get_current()
|
2017-09-07 20:16:21 +08:00
|
|
|
except Site.DoesNotExist:
|
|
|
|
pass
|
2010-10-11 22:34:42 +08:00
|
|
|
if site is None:
|
2014-09-04 20:15:09 +08:00
|
|
|
raise ImproperlyConfigured(
|
|
|
|
"To use sitemaps, either enable the sites framework or pass "
|
|
|
|
"a Site/RequestSite object in your view."
|
|
|
|
)
|
2020-07-29 16:33:20 +08:00
|
|
|
return site.domain
|
2014-06-07 02:47:15 +08:00
|
|
|
|
2020-07-29 16:33:20 +08:00
|
|
|
def get_urls(self, page=1, site=None, protocol=None):
|
|
|
|
protocol = self.get_protocol(protocol)
|
|
|
|
domain = self.get_domain(site)
|
|
|
|
return self._urls(page, protocol, domain)
|
2014-06-07 02:47:15 +08:00
|
|
|
|
|
|
|
def _urls(self, page, protocol, domain):
|
2006-09-01 07:13:59 +08:00
|
|
|
urls = []
|
2013-07-23 22:25:21 +08:00
|
|
|
latest_lastmod = None
|
|
|
|
all_items_lastmod = True # track if all items have a lastmod
|
2020-07-29 16:33:20 +08:00
|
|
|
|
|
|
|
paginator_page = self.paginator.page(page)
|
|
|
|
for item in paginator_page.object_list:
|
|
|
|
loc = f'{protocol}://{domain}{self._location(item)}'
|
|
|
|
priority = self._get('priority', item)
|
|
|
|
lastmod = self._get('lastmod', item)
|
|
|
|
|
2013-07-23 22:25:21 +08:00
|
|
|
if all_items_lastmod:
|
|
|
|
all_items_lastmod = lastmod is not None
|
|
|
|
if (all_items_lastmod and
|
2013-11-26 17:43:46 +08:00
|
|
|
(latest_lastmod is None or lastmod > latest_lastmod)):
|
2013-07-23 22:25:21 +08:00
|
|
|
latest_lastmod = lastmod
|
2020-07-29 16:33:20 +08:00
|
|
|
|
2006-09-01 07:13:59 +08:00
|
|
|
url_info = {
|
2013-11-03 03:37:48 +08:00
|
|
|
'item': item,
|
|
|
|
'location': loc,
|
|
|
|
'lastmod': lastmod,
|
2020-07-29 16:33:20 +08:00
|
|
|
'changefreq': self._get('changefreq', item),
|
2013-11-03 03:37:48 +08:00
|
|
|
'priority': str(priority if priority is not None else ''),
|
2006-09-01 07:13:59 +08:00
|
|
|
}
|
2020-07-29 16:33:20 +08:00
|
|
|
|
|
|
|
if self.i18n and self.alternates:
|
|
|
|
alternates = []
|
|
|
|
for lang_code in self._languages():
|
|
|
|
loc = f'{protocol}://{domain}{self._location(item, lang_code)}'
|
|
|
|
alternates.append({
|
|
|
|
'location': loc,
|
|
|
|
'lang_code': lang_code,
|
|
|
|
})
|
|
|
|
if self.x_default:
|
|
|
|
lang_code = settings.LANGUAGE_CODE
|
|
|
|
loc = f'{protocol}://{domain}{self._location(item, lang_code)}'
|
|
|
|
loc = loc.replace(f'/{lang_code}/', '/', 1)
|
|
|
|
alternates.append({
|
|
|
|
'location': loc,
|
|
|
|
'lang_code': 'x-default',
|
|
|
|
})
|
|
|
|
url_info['alternates'] = alternates
|
|
|
|
|
2006-09-01 07:13:59 +08:00
|
|
|
urls.append(url_info)
|
2020-07-29 16:33:20 +08:00
|
|
|
|
2013-09-17 22:21:11 +08:00
|
|
|
if all_items_lastmod and latest_lastmod:
|
2013-07-23 22:25:21 +08:00
|
|
|
self.latest_lastmod = latest_lastmod
|
2020-07-29 16:33:20 +08:00
|
|
|
|
2006-09-01 07:13:59 +08:00
|
|
|
return urls
|
|
|
|
|
2013-11-03 03:37:48 +08:00
|
|
|
|
2006-09-01 07:13:59 +08:00
|
|
|
class GenericSitemap(Sitemap):
|
|
|
|
priority = None
|
|
|
|
changefreq = None
|
|
|
|
|
2017-02-15 17:36:18 +08:00
|
|
|
def __init__(self, info_dict, priority=None, changefreq=None, protocol=None):
|
2006-09-01 07:13:59 +08:00
|
|
|
self.queryset = info_dict['queryset']
|
2015-05-14 02:51:18 +08:00
|
|
|
self.date_field = info_dict.get('date_field')
|
2020-07-29 16:33:20 +08:00
|
|
|
self.priority = self.priority or priority
|
|
|
|
self.changefreq = self.changefreq or changefreq
|
|
|
|
self.protocol = self.protocol or protocol
|
2006-09-01 07:13:59 +08:00
|
|
|
|
|
|
|
def items(self):
|
|
|
|
# Make sure to return a clone; we don't want premature evaluation.
|
|
|
|
return self.queryset.filter()
|
|
|
|
|
|
|
|
def lastmod(self, item):
|
|
|
|
if self.date_field is not None:
|
|
|
|
return getattr(item, self.date_field)
|
|
|
|
return None
|