Fixed #7793 -- Handle sitemaps with more than 50,000 URLs in them (by using
pagination). Patch from Julian Bez. The docs patch here could probably do with some rewording. git-svn-id: http://code.djangoproject.com/svn/django/trunk@8088 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
parent
a26ba33111
commit
badde8a7e5
3
AUTHORS
3
AUTHORS
|
@ -71,7 +71,7 @@ answer newbie questions, and generally made Django that much better:
|
||||||
Esdras Beleza <linux@esdrasbeleza.com>
|
Esdras Beleza <linux@esdrasbeleza.com>
|
||||||
Chris Bennett <chrisrbennett@yahoo.com>
|
Chris Bennett <chrisrbennett@yahoo.com>
|
||||||
James Bennett
|
James Bennett
|
||||||
Ben Godfrey <http://aftnn.org>
|
Julian Bez
|
||||||
Arvis Bickovskis <viestards.lists@gmail.com>
|
Arvis Bickovskis <viestards.lists@gmail.com>
|
||||||
Paul Bissex <http://e-scribe.com/>
|
Paul Bissex <http://e-scribe.com/>
|
||||||
Simon Blanchard
|
Simon Blanchard
|
||||||
|
@ -166,6 +166,7 @@ answer newbie questions, and generally made Django that much better:
|
||||||
glin@seznam.cz
|
glin@seznam.cz
|
||||||
martin.glueck@gmail.com
|
martin.glueck@gmail.com
|
||||||
Artyom Gnilov <boobsd@gmail.com>
|
Artyom Gnilov <boobsd@gmail.com>
|
||||||
|
Ben Godfrey <http://aftnn.org>
|
||||||
GomoX <gomo@datafull.com>
|
GomoX <gomo@datafull.com>
|
||||||
Guilherme Mesquita Gondim <semente@taurinus.org>
|
Guilherme Mesquita Gondim <semente@taurinus.org>
|
||||||
Mario Gonzalez <gonzalemario@gmail.com>
|
Mario Gonzalez <gonzalemario@gmail.com>
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from django.core import urlresolvers
|
from django.core import urlresolvers, paginator
|
||||||
import urllib
|
import urllib
|
||||||
|
|
||||||
PING_URL = "http://www.google.com/webmasters/tools/ping"
|
PING_URL = "http://www.google.com/webmasters/tools/ping"
|
||||||
|
@ -34,6 +34,10 @@ def ping_google(sitemap_url=None, ping_url=PING_URL):
|
||||||
urllib.urlopen("%s?%s" % (ping_url, params))
|
urllib.urlopen("%s?%s" % (ping_url, params))
|
||||||
|
|
||||||
class Sitemap:
|
class Sitemap:
|
||||||
|
# This limit is defined by Google. See the index documentation at
|
||||||
|
# http://sitemaps.org/protocol.php#index.
|
||||||
|
limit = 50000
|
||||||
|
|
||||||
def __get(self, name, obj, default=None):
|
def __get(self, name, obj, default=None):
|
||||||
try:
|
try:
|
||||||
attr = getattr(self, name)
|
attr = getattr(self, name)
|
||||||
|
@ -49,11 +53,17 @@ class Sitemap:
|
||||||
def location(self, obj):
|
def location(self, obj):
|
||||||
return obj.get_absolute_url()
|
return obj.get_absolute_url()
|
||||||
|
|
||||||
def get_urls(self):
|
def _get_paginator(self):
|
||||||
|
if not hasattr(self, "paginator"):
|
||||||
|
self.paginator = paginator.Paginator(self.items(), self.limit)
|
||||||
|
return self.paginator
|
||||||
|
paginator = property(_get_paginator)
|
||||||
|
|
||||||
|
def get_urls(self, page=1):
|
||||||
from django.contrib.sites.models import Site
|
from django.contrib.sites.models import Site
|
||||||
current_site = Site.objects.get_current()
|
current_site = Site.objects.get_current()
|
||||||
urls = []
|
urls = []
|
||||||
for item in self.items():
|
for item in self.paginator.page(page).object_list:
|
||||||
loc = "http://%s%s" % (current_site.domain, self.__get('location', item))
|
loc = "http://%s%s" % (current_site.domain, self.__get('location', item))
|
||||||
url_info = {
|
url_info = {
|
||||||
'location': loc,
|
'location': loc,
|
||||||
|
|
|
@ -3,14 +3,22 @@ from django.template import loader
|
||||||
from django.contrib.sites.models import Site
|
from django.contrib.sites.models import Site
|
||||||
from django.core import urlresolvers
|
from django.core import urlresolvers
|
||||||
from django.utils.encoding import smart_str
|
from django.utils.encoding import smart_str
|
||||||
|
from django.core.paginator import EmptyPage, PageNotAnInteger
|
||||||
|
|
||||||
def index(request, sitemaps):
|
def index(request, sitemaps):
|
||||||
current_site = Site.objects.get_current()
|
current_site = Site.objects.get_current()
|
||||||
sites = []
|
sites = []
|
||||||
protocol = request.is_secure() and 'https' or 'http'
|
protocol = request.is_secure() and 'https' or 'http'
|
||||||
for section in sitemaps.keys():
|
for section, site in sitemaps.items():
|
||||||
|
if callable(site):
|
||||||
|
pages = site().paginator.num_pages
|
||||||
|
else:
|
||||||
|
pages = site.paginator.num_pages
|
||||||
sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.sitemap', kwargs={'section': section})
|
sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.sitemap', kwargs={'section': section})
|
||||||
sites.append('%s://%s%s' % (protocol, current_site.domain, sitemap_url))
|
sites.append('%s://%s%s' % (protocol, current_site.domain, sitemap_url))
|
||||||
|
if pages > 1:
|
||||||
|
for page in range(2, pages+1):
|
||||||
|
sites.append('%s://%s%s?p=%s' % (protocol, current_site.domain, sitemap_url, page))
|
||||||
xml = loader.render_to_string('sitemap_index.xml', {'sitemaps': sites})
|
xml = loader.render_to_string('sitemap_index.xml', {'sitemaps': sites})
|
||||||
return HttpResponse(xml, mimetype='application/xml')
|
return HttpResponse(xml, mimetype='application/xml')
|
||||||
|
|
||||||
|
@ -22,10 +30,16 @@ def sitemap(request, sitemaps, section=None):
|
||||||
maps.append(sitemaps[section])
|
maps.append(sitemaps[section])
|
||||||
else:
|
else:
|
||||||
maps = sitemaps.values()
|
maps = sitemaps.values()
|
||||||
|
page = request.GET.get("p", 1)
|
||||||
for site in maps:
|
for site in maps:
|
||||||
|
try:
|
||||||
if callable(site):
|
if callable(site):
|
||||||
urls.extend(site().get_urls())
|
urls.extend(site().get_urls(page))
|
||||||
else:
|
else:
|
||||||
urls.extend(site.get_urls())
|
urls.extend(site.get_urls(page))
|
||||||
|
except EmptyPage:
|
||||||
|
raise Http404("Page %s empty" % page)
|
||||||
|
except PageNotAnInteger:
|
||||||
|
raise Http404("No page '%s'" % page)
|
||||||
xml = smart_str(loader.render_to_string('sitemap.xml', {'urlset': urls}))
|
xml = smart_str(loader.render_to_string('sitemap.xml', {'urlset': urls}))
|
||||||
return HttpResponse(xml, mimetype='application/xml')
|
return HttpResponse(xml, mimetype='application/xml')
|
||||||
|
|
|
@ -282,6 +282,10 @@ This will automatically generate a ``sitemap.xml`` file that references
|
||||||
both ``sitemap-flatpages.xml`` and ``sitemap-blog.xml``. The ``Sitemap``
|
both ``sitemap-flatpages.xml`` and ``sitemap-blog.xml``. The ``Sitemap``
|
||||||
classes and the ``sitemaps`` dict don't change at all.
|
classes and the ``sitemaps`` dict don't change at all.
|
||||||
|
|
||||||
|
If one of your sitemaps is going to have more than 50,000 URLs you should
|
||||||
|
create an index file. Your sitemap will be paginated and the index will
|
||||||
|
reflect that.
|
||||||
|
|
||||||
Pinging Google
|
Pinging Google
|
||||||
==============
|
==============
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue