2011-07-13 17:35:51 +08:00
|
|
|
from django.contrib.sites.models import Site
|
2008-07-26 13:07:16 +08:00
|
|
|
from django.core import urlresolvers, paginator
|
2010-10-11 22:34:42 +08:00
|
|
|
from django.core.exceptions import ImproperlyConfigured
|
2012-07-20 21:36:52 +08:00
|
|
|
try:
|
|
|
|
from urllib.parse import urlencode
|
|
|
|
from urllib.request import urlopen
|
|
|
|
except ImportError: # Python 2
|
|
|
|
from urllib import urlencode, urlopen
|
2006-09-01 07:13:59 +08:00
|
|
|
|
2007-05-21 04:47:18 +08:00
|
|
|
PING_URL = "http://www.google.com/webmasters/tools/ping"
|
2006-09-01 07:13:59 +08:00
|
|
|
|
|
|
|
class SitemapNotFound(Exception):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def ping_google(sitemap_url=None, ping_url=PING_URL):
|
|
|
|
"""
|
|
|
|
Alerts Google that the sitemap for the current site has been updated.
|
|
|
|
If sitemap_url is provided, it should be an absolute path to the sitemap
|
|
|
|
for this site -- e.g., '/sitemap.xml'. If sitemap_url is not provided, this
|
|
|
|
function will attempt to deduce it by using urlresolvers.reverse().
|
|
|
|
"""
|
|
|
|
if sitemap_url is None:
|
|
|
|
try:
|
|
|
|
# First, try to get the "index" sitemap URL.
|
2006-09-03 02:10:00 +08:00
|
|
|
sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.index')
|
2006-09-01 07:13:59 +08:00
|
|
|
except urlresolvers.NoReverseMatch:
|
|
|
|
try:
|
|
|
|
# Next, try for the "global" sitemap URL.
|
2006-09-03 02:10:00 +08:00
|
|
|
sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.sitemap')
|
2006-09-01 07:13:59 +08:00
|
|
|
except urlresolvers.NoReverseMatch:
|
|
|
|
pass
|
|
|
|
|
|
|
|
if sitemap_url is None:
|
|
|
|
raise SitemapNotFound("You didn't provide a sitemap_url, and the sitemap URL couldn't be auto-detected.")
|
|
|
|
|
|
|
|
from django.contrib.sites.models import Site
|
|
|
|
current_site = Site.objects.get_current()
|
2007-05-21 04:47:18 +08:00
|
|
|
url = "http://%s%s" % (current_site.domain, sitemap_url)
|
2012-07-20 21:36:52 +08:00
|
|
|
params = urlencode({'sitemap':url})
|
|
|
|
urlopen("%s?%s" % (ping_url, params))
|
2006-09-01 07:13:59 +08:00
|
|
|
|
2008-09-17 12:56:04 +08:00
|
|
|
class Sitemap(object):
|
2008-07-26 13:07:16 +08:00
|
|
|
# This limit is defined by Google. See the index documentation at
|
|
|
|
# http://sitemaps.org/protocol.php#index.
|
|
|
|
limit = 50000
|
|
|
|
|
2012-01-30 03:24:32 +08:00
|
|
|
# If protocol is None, the URLs in the sitemap will use the protocol
|
|
|
|
# with which the sitemap was requested.
|
|
|
|
protocol = None
|
|
|
|
|
2006-09-01 07:13:59 +08:00
|
|
|
def __get(self, name, obj, default=None):
|
|
|
|
try:
|
|
|
|
attr = getattr(self, name)
|
|
|
|
except AttributeError:
|
|
|
|
return default
|
|
|
|
if callable(attr):
|
|
|
|
return attr(obj)
|
|
|
|
return attr
|
|
|
|
|
|
|
|
def items(self):
|
|
|
|
return []
|
|
|
|
|
|
|
|
def location(self, obj):
|
|
|
|
return obj.get_absolute_url()
|
|
|
|
|
2008-07-26 13:07:16 +08:00
|
|
|
def _get_paginator(self):
|
2012-02-10 02:56:49 +08:00
|
|
|
return paginator.Paginator(self.items(), self.limit)
|
2008-07-26 13:07:16 +08:00
|
|
|
paginator = property(_get_paginator)
|
|
|
|
|
2012-01-30 03:24:32 +08:00
|
|
|
def get_urls(self, page=1, site=None, protocol=None):
|
|
|
|
# Determine protocol
|
|
|
|
if self.protocol is not None:
|
|
|
|
protocol = self.protocol
|
|
|
|
if protocol is None:
|
|
|
|
protocol = 'http'
|
|
|
|
|
|
|
|
# Determine domain
|
2010-10-11 22:34:42 +08:00
|
|
|
if site is None:
|
|
|
|
if Site._meta.installed:
|
|
|
|
try:
|
|
|
|
site = Site.objects.get_current()
|
|
|
|
except Site.DoesNotExist:
|
|
|
|
pass
|
|
|
|
if site is None:
|
2012-02-04 04:45:45 +08:00
|
|
|
raise ImproperlyConfigured("To use sitemaps, either enable the sites framework or pass a Site/RequestSite object in your view.")
|
2012-01-30 03:24:32 +08:00
|
|
|
domain = site.domain
|
2010-10-11 22:34:42 +08:00
|
|
|
|
2006-09-01 07:13:59 +08:00
|
|
|
urls = []
|
2013-07-23 22:25:21 +08:00
|
|
|
latest_lastmod = None
|
|
|
|
all_items_lastmod = True # track if all items have a lastmod
|
2008-07-26 13:07:16 +08:00
|
|
|
for item in self.paginator.page(page).object_list:
|
2012-01-30 03:24:32 +08:00
|
|
|
loc = "%s://%s%s" % (protocol, domain, self.__get('location', item))
|
2010-08-30 23:09:12 +08:00
|
|
|
priority = self.__get('priority', item, None)
|
2013-07-23 22:25:21 +08:00
|
|
|
lastmod = self.__get('lastmod', item, None)
|
|
|
|
if all_items_lastmod:
|
|
|
|
all_items_lastmod = lastmod is not None
|
|
|
|
if (all_items_lastmod and
|
|
|
|
(latest_lastmod is None or lastmod > latest_lastmod)):
|
|
|
|
latest_lastmod = lastmod
|
2006-09-01 07:13:59 +08:00
|
|
|
url_info = {
|
2011-06-28 18:16:34 +08:00
|
|
|
'item': item,
|
2006-09-01 07:13:59 +08:00
|
|
|
'location': loc,
|
2013-07-23 22:25:21 +08:00
|
|
|
'lastmod': lastmod,
|
2006-09-01 07:13:59 +08:00
|
|
|
'changefreq': self.__get('changefreq', item, None),
|
2013-05-27 10:47:50 +08:00
|
|
|
'priority': str(priority if priority is not None else ''),
|
2006-09-01 07:13:59 +08:00
|
|
|
}
|
|
|
|
urls.append(url_info)
|
2013-07-23 22:25:21 +08:00
|
|
|
if all_items_lastmod:
|
|
|
|
self.latest_lastmod = latest_lastmod
|
2006-09-01 07:13:59 +08:00
|
|
|
return urls
|
|
|
|
|
2006-09-01 07:31:25 +08:00
|
|
|
class FlatPageSitemap(Sitemap):
|
2006-09-01 07:13:59 +08:00
|
|
|
def items(self):
|
2010-10-11 22:34:42 +08:00
|
|
|
current_site = Site.objects.get_current()
|
|
|
|
return current_site.flatpage_set.filter(registration_required=False)
|
2006-09-01 07:13:59 +08:00
|
|
|
|
|
|
|
class GenericSitemap(Sitemap):
|
|
|
|
priority = None
|
|
|
|
changefreq = None
|
|
|
|
|
|
|
|
def __init__(self, info_dict, priority=None, changefreq=None):
|
|
|
|
self.queryset = info_dict['queryset']
|
|
|
|
self.date_field = info_dict.get('date_field', None)
|
|
|
|
self.priority = priority
|
|
|
|
self.changefreq = changefreq
|
|
|
|
|
|
|
|
def items(self):
|
|
|
|
# Make sure to return a clone; we don't want premature evaluation.
|
|
|
|
return self.queryset.filter()
|
|
|
|
|
|
|
|
def lastmod(self, item):
|
|
|
|
if self.date_field is not None:
|
|
|
|
return getattr(item, self.date_field)
|
|
|
|
return None
|