Fixed #7793 -- Handle sitemaps with more than 50,000 URLs in them (by using

pagination). Patch from Julian Bez. The docs patch here could probably do with some rewording. git-svn-id: http://code.djangoproject.com/svn/django/trunk@8088 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2008-07-26 05:07:16 +00:00 · 2008-07-26 05:07:16 +00:00 · badde8a7e5
parent a26ba33111
commit badde8a7e5
4 changed files with 38 additions and 9 deletions
--- a/3
+++ b/3
@ -71,7 +71,7 @@ answer newbie questions, and generally made Django that much better:
    Esdras Beleza <linux@esdrasbeleza.com>
    Chris Bennett <chrisrbennett@yahoo.com>
    James Bennett
-    Ben Godfrey <http://aftnn.org>
+    Julian Bez
    Arvis Bickovskis <viestards.lists@gmail.com>
    Paul Bissex <http://e-scribe.com/>
    Simon Blanchard
@ -166,6 +166,7 @@ answer newbie questions, and generally made Django that much better:
    glin@seznam.cz
    martin.glueck@gmail.com
    Artyom Gnilov <boobsd@gmail.com>
    Ben Godfrey <http://aftnn.org>
    GomoX <gomo@datafull.com>
    Guilherme Mesquita Gondim <semente@taurinus.org>
    Mario Gonzalez <gonzalemario@gmail.com>
--- a/django/contrib/sitemaps/init.py
+++ b/django/contrib/sitemaps/init.py
@ -1,4 +1,4 @@
-from django.core import urlresolvers
+from django.core import urlresolvers, paginator
 import urllib
 PING_URL = "http://www.google.com/webmasters/tools/ping"
@ -34,6 +34,10 @@ def ping_google(sitemap_url=None, ping_url=PING_URL):
    urllib.urlopen("%s?%s" % (ping_url, params))
 class Sitemap:
    # This limit is defined by Google. See the index documentation at
    # http://sitemaps.org/protocol.php#index.
    limit = 50000
    def __get(self, name, obj, default=None):
        try:
            attr = getattr(self, name)
@ -49,11 +53,17 @@ class Sitemap:
    def location(self, obj):
        return obj.get_absolute_url()
-    def get_urls(self):
+    def _get_paginator(self):
        if not hasattr(self, "paginator"):
            self.paginator = paginator.Paginator(self.items(), self.limit)
        return self.paginator
    paginator = property(_get_paginator)
    def get_urls(self, page=1):
        from django.contrib.sites.models import Site
        current_site = Site.objects.get_current()
        urls = []
-        for item in self.items():
+        for item in self.paginator.page(page).object_list:
            loc = "http://%s%s" % (current_site.domain, self.__get('location', item))
            url_info = {
                'location':   loc,
--- a/django/contrib/sitemaps/views.py
+++ b/django/contrib/sitemaps/views.py
@ -3,14 +3,22 @@ from django.template import loader
 from django.contrib.sites.models import Site
 from django.core import urlresolvers
 from django.utils.encoding import smart_str
 from django.core.paginator import EmptyPage, PageNotAnInteger
 def index(request, sitemaps):
    current_site = Site.objects.get_current()
    sites = []
    protocol = request.is_secure() and 'https' or 'http'
-    for section in sitemaps.keys():
+    for section, site in sitemaps.items():
        if callable(site):
            pages = site().paginator.num_pages
        else:
            pages = site.paginator.num_pages
        sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.sitemap', kwargs={'section': section})
        sites.append('%s://%s%s' % (protocol, current_site.domain, sitemap_url))
        if pages > 1:
            for page in range(2, pages+1):
                sites.append('%s://%s%s?p=%s' % (protocol, current_site.domain, sitemap_url, page))
    xml = loader.render_to_string('sitemap_index.xml', {'sitemaps': sites})
    return HttpResponse(xml, mimetype='application/xml')
@ -22,10 +30,16 @@ def sitemap(request, sitemaps, section=None):
        maps.append(sitemaps[section])
    else:
        maps = sitemaps.values()
    page = request.GET.get("p", 1)
    for site in maps:
-        if callable(site):
+        try:
-            urls.extend(site().get_urls())
+            if callable(site):
-        else:
+                urls.extend(site().get_urls(page))
-            urls.extend(site.get_urls())
+            else:
                urls.extend(site.get_urls(page))
        except EmptyPage:
            raise Http404("Page %s empty" % page)
        except PageNotAnInteger:
            raise Http404("No page '%s'" % page)
    xml = smart_str(loader.render_to_string('sitemap.xml', {'urlset': urls}))
    return HttpResponse(xml, mimetype='application/xml')
--- a/docs/sitemaps.txt
+++ b/docs/sitemaps.txt
@ -282,6 +282,10 @@ This will automatically generate a ``sitemap.xml`` file that references
 both ``sitemap-flatpages.xml`` and ``sitemap-blog.xml``. The ``Sitemap``
 classes and the ``sitemaps`` dict don't change at all.
 If one of your sitemaps is going to have more than 50,000 URLs you should 
 create an index file. Your sitemap will be paginated and the index will 
 reflect that.
 Pinging Google
 ==============