django1/django/contrib/sitemaps/__init__.py

136 lines
4.7 KiB
Python

from django.contrib.sites.models import Site
from django.core import urlresolvers, paginator
from django.core.exceptions import ImproperlyConfigured
from django.utils.six.moves.urllib.parse import urlencode
from django.utils.six.moves.urllib.request import urlopen
PING_URL = "http://www.google.com/webmasters/tools/ping"
class SitemapNotFound(Exception):
pass
def ping_google(sitemap_url=None, ping_url=PING_URL):
"""
Alerts Google that the sitemap for the current site has been updated.
If sitemap_url is provided, it should be an absolute path to the sitemap
for this site -- e.g., '/sitemap.xml'. If sitemap_url is not provided, this
function will attempt to deduce it by using urlresolvers.reverse().
"""
if sitemap_url is None:
try:
# First, try to get the "index" sitemap URL.
sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.index')
except urlresolvers.NoReverseMatch:
try:
# Next, try for the "global" sitemap URL.
sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.sitemap')
except urlresolvers.NoReverseMatch:
pass
if sitemap_url is None:
raise SitemapNotFound("You didn't provide a sitemap_url, and the sitemap URL couldn't be auto-detected.")
current_site = Site.objects.get_current()
url = "http://%s%s" % (current_site.domain, sitemap_url)
params = urlencode({'sitemap': url})
urlopen("%s?%s" % (ping_url, params))
class Sitemap(object):
# This limit is defined by Google. See the index documentation at
# http://sitemaps.org/protocol.php#index.
limit = 50000
# If protocol is None, the URLs in the sitemap will use the protocol
# with which the sitemap was requested.
protocol = None
def __get(self, name, obj, default=None):
try:
attr = getattr(self, name)
except AttributeError:
return default
if callable(attr):
return attr(obj)
return attr
def items(self):
return []
def location(self, obj):
return obj.get_absolute_url()
def _get_paginator(self):
return paginator.Paginator(self.items(), self.limit)
paginator = property(_get_paginator)
def get_urls(self, page=1, site=None, protocol=None):
# Determine protocol
if self.protocol is not None:
protocol = self.protocol
if protocol is None:
protocol = 'http'
# Determine domain
if site is None:
if Site._meta.installed:
try:
site = Site.objects.get_current()
except Site.DoesNotExist:
pass
if site is None:
raise ImproperlyConfigured("To use sitemaps, either enable the sites framework or pass a Site/RequestSite object in your view.")
domain = site.domain
urls = []
latest_lastmod = None
all_items_lastmod = True # track if all items have a lastmod
for item in self.paginator.page(page).object_list:
loc = "%s://%s%s" % (protocol, domain, self.__get('location', item))
priority = self.__get('priority', item, None)
lastmod = self.__get('lastmod', item, None)
if all_items_lastmod:
all_items_lastmod = lastmod is not None
if (all_items_lastmod and
(latest_lastmod is None or lastmod > latest_lastmod)):
latest_lastmod = lastmod
url_info = {
'item': item,
'location': loc,
'lastmod': lastmod,
'changefreq': self.__get('changefreq', item, None),
'priority': str(priority if priority is not None else ''),
}
urls.append(url_info)
if all_items_lastmod and latest_lastmod:
self.latest_lastmod = latest_lastmod
return urls
class FlatPageSitemap(Sitemap):
def items(self):
current_site = Site.objects.get_current()
return current_site.flatpage_set.filter(registration_required=False)
class GenericSitemap(Sitemap):
priority = None
changefreq = None
def __init__(self, info_dict, priority=None, changefreq=None):
self.queryset = info_dict['queryset']
self.date_field = info_dict.get('date_field', None)
self.priority = priority
self.changefreq = changefreq
def items(self):
# Make sure to return a clone; we don't want premature evaluation.
return self.queryset.filter()
def lastmod(self, item):
if self.date_field is not None:
return getattr(item, self.date_field)
return None