Fixed #7793 -- Handle sitemaps with more than 50,000 URLs in them (by using
pagination). Patch from Julian Bez. The docs patch here could probably do with some rewording. git-svn-id: http://code.djangoproject.com/svn/django/trunk@8088 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
parent
a26ba33111
commit
badde8a7e5
3
AUTHORS
3
AUTHORS
|
@ -71,7 +71,7 @@ answer newbie questions, and generally made Django that much better:
|
|||
Esdras Beleza <linux@esdrasbeleza.com>
|
||||
Chris Bennett <chrisrbennett@yahoo.com>
|
||||
James Bennett
|
||||
Ben Godfrey <http://aftnn.org>
|
||||
Julian Bez
|
||||
Arvis Bickovskis <viestards.lists@gmail.com>
|
||||
Paul Bissex <http://e-scribe.com/>
|
||||
Simon Blanchard
|
||||
|
@ -166,6 +166,7 @@ answer newbie questions, and generally made Django that much better:
|
|||
glin@seznam.cz
|
||||
martin.glueck@gmail.com
|
||||
Artyom Gnilov <boobsd@gmail.com>
|
||||
Ben Godfrey <http://aftnn.org>
|
||||
GomoX <gomo@datafull.com>
|
||||
Guilherme Mesquita Gondim <semente@taurinus.org>
|
||||
Mario Gonzalez <gonzalemario@gmail.com>
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from django.core import urlresolvers
|
||||
from django.core import urlresolvers, paginator
|
||||
import urllib
|
||||
|
||||
PING_URL = "http://www.google.com/webmasters/tools/ping"
|
||||
|
@ -34,6 +34,10 @@ def ping_google(sitemap_url=None, ping_url=PING_URL):
|
|||
urllib.urlopen("%s?%s" % (ping_url, params))
|
||||
|
||||
class Sitemap:
|
||||
# This limit is defined by Google. See the index documentation at
|
||||
# http://sitemaps.org/protocol.php#index.
|
||||
limit = 50000
|
||||
|
||||
def __get(self, name, obj, default=None):
|
||||
try:
|
||||
attr = getattr(self, name)
|
||||
|
@ -49,11 +53,17 @@ class Sitemap:
|
|||
def location(self, obj):
|
||||
return obj.get_absolute_url()
|
||||
|
||||
def get_urls(self):
|
||||
def _get_paginator(self):
|
||||
if not hasattr(self, "paginator"):
|
||||
self.paginator = paginator.Paginator(self.items(), self.limit)
|
||||
return self.paginator
|
||||
paginator = property(_get_paginator)
|
||||
|
||||
def get_urls(self, page=1):
|
||||
from django.contrib.sites.models import Site
|
||||
current_site = Site.objects.get_current()
|
||||
urls = []
|
||||
for item in self.items():
|
||||
for item in self.paginator.page(page).object_list:
|
||||
loc = "http://%s%s" % (current_site.domain, self.__get('location', item))
|
||||
url_info = {
|
||||
'location': loc,
|
||||
|
|
|
@ -3,14 +3,22 @@ from django.template import loader
|
|||
from django.contrib.sites.models import Site
|
||||
from django.core import urlresolvers
|
||||
from django.utils.encoding import smart_str
|
||||
from django.core.paginator import EmptyPage, PageNotAnInteger
|
||||
|
||||
def index(request, sitemaps):
|
||||
current_site = Site.objects.get_current()
|
||||
sites = []
|
||||
protocol = request.is_secure() and 'https' or 'http'
|
||||
for section in sitemaps.keys():
|
||||
for section, site in sitemaps.items():
|
||||
if callable(site):
|
||||
pages = site().paginator.num_pages
|
||||
else:
|
||||
pages = site.paginator.num_pages
|
||||
sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.sitemap', kwargs={'section': section})
|
||||
sites.append('%s://%s%s' % (protocol, current_site.domain, sitemap_url))
|
||||
if pages > 1:
|
||||
for page in range(2, pages+1):
|
||||
sites.append('%s://%s%s?p=%s' % (protocol, current_site.domain, sitemap_url, page))
|
||||
xml = loader.render_to_string('sitemap_index.xml', {'sitemaps': sites})
|
||||
return HttpResponse(xml, mimetype='application/xml')
|
||||
|
||||
|
@ -22,10 +30,16 @@ def sitemap(request, sitemaps, section=None):
|
|||
maps.append(sitemaps[section])
|
||||
else:
|
||||
maps = sitemaps.values()
|
||||
page = request.GET.get("p", 1)
|
||||
for site in maps:
|
||||
if callable(site):
|
||||
urls.extend(site().get_urls())
|
||||
else:
|
||||
urls.extend(site.get_urls())
|
||||
try:
|
||||
if callable(site):
|
||||
urls.extend(site().get_urls(page))
|
||||
else:
|
||||
urls.extend(site.get_urls(page))
|
||||
except EmptyPage:
|
||||
raise Http404("Page %s empty" % page)
|
||||
except PageNotAnInteger:
|
||||
raise Http404("No page '%s'" % page)
|
||||
xml = smart_str(loader.render_to_string('sitemap.xml', {'urlset': urls}))
|
||||
return HttpResponse(xml, mimetype='application/xml')
|
||||
|
|
|
@ -282,6 +282,10 @@ This will automatically generate a ``sitemap.xml`` file that references
|
|||
both ``sitemap-flatpages.xml`` and ``sitemap-blog.xml``. The ``Sitemap``
|
||||
classes and the ``sitemaps`` dict don't change at all.
|
||||
|
||||
If one of your sitemaps is going to have more than 50,000 URLs you should
|
||||
create an index file. Your sitemap will be paginated and the index will
|
||||
reflect that.
|
||||
|
||||
Pinging Google
|
||||
==============
|
||||
|
||||
|
|
Loading…
Reference in New Issue