From 44dabecf6e4a009ff1a34ba12032ba07476baad2 Mon Sep 17 00:00:00 2001 From: Adrian Holovaty Date: Thu, 31 Aug 2006 23:13:59 +0000 Subject: [PATCH] Fixed #2628 -- Added django.contrib.sitemap. Thanks for the patch, Dan Watson git-svn-id: http://code.djangoproject.com/svn/django/trunk@3694 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- AUTHORS | 1 + django/contrib/sitemap/__init__.py | 90 +++++ django/contrib/sitemap/templates/sitemap.xml | 11 + .../sitemap/templates/sitemap_index.xml | 8 + django/contrib/sitemap/views.py | 30 ++ docs/sitemaps.txt | 318 ++++++++++++++++++ 6 files changed, 458 insertions(+) create mode 100644 django/contrib/sitemap/__init__.py create mode 100644 django/contrib/sitemap/templates/sitemap.xml create mode 100644 django/contrib/sitemap/templates/sitemap_index.xml create mode 100644 django/contrib/sitemap/views.py create mode 100644 docs/sitemaps.txt diff --git a/AUTHORS b/AUTHORS index 9c7463551a..e3db830b74 100644 --- a/AUTHORS +++ b/AUTHORS @@ -139,6 +139,7 @@ answer newbie questions, and generally made Django that much better: Amit Upadhyay Geert Vanderkelen Milton Waddams + Dan Watson Rachel Willmer wojtek ye7cakf02@sneakemail.com diff --git a/django/contrib/sitemap/__init__.py b/django/contrib/sitemap/__init__.py new file mode 100644 index 0000000000..83c9cbe07a --- /dev/null +++ b/django/contrib/sitemap/__init__.py @@ -0,0 +1,90 @@ +from django.core import urlresolvers +import urllib + +PING_URL = "http://www.google.com/webmasters/sitemaps/ping" + +class SitemapNotFound(Exception): + pass + +def ping_google(sitemap_url=None, ping_url=PING_URL): + """ + Alerts Google that the sitemap for the current site has been updated. + If sitemap_url is provided, it should be an absolute path to the sitemap + for this site -- e.g., '/sitemap.xml'. If sitemap_url is not provided, this + function will attempt to deduce it by using urlresolvers.reverse(). + """ + if sitemap_url is None: + try: + # First, try to get the "index" sitemap URL. + sitemap_url = urlresolvers.reverse('django.contrib.sitemap.views.index') + except urlresolvers.NoReverseMatch: + try: + # Next, try for the "global" sitemap URL. + sitemap_url = urlresolvers.reverse('django.contrib.sitemap.views.sitemap') + except urlresolvers.NoReverseMatch: + pass + + if sitemap_url is None: + raise SitemapNotFound("You didn't provide a sitemap_url, and the sitemap URL couldn't be auto-detected.") + + from django.contrib.sites.models import Site + current_site = Site.objects.get_current() + url = "%s%s" % (current_site.domain, sitemap) + params = urllib.urlencode({'sitemap':url}) + urllib.urlopen("%s?%s" % (ping_url, params)) + +class Sitemap: + def __get(self, name, obj, default=None): + try: + attr = getattr(self, name) + except AttributeError: + return default + if callable(attr): + return attr(obj) + return attr + + def items(self): + return [] + + def location(self, obj): + return obj.get_absolute_url() + + def get_urls(self): + from django.contrib.sites.models import Site + current_site = Site.objects.get_current() + urls = [] + for item in self.items(): + loc = "http://%s%s" % (current_site.domain, self.__get('location', item)) + url_info = { + 'location': loc, + 'lastmod': self.__get('lastmod', item, None), + 'changefreq': self.__get('changefreq', item, None), + 'priority': self.__get('priority', item, None) + } + urls.append(url_info) + return urls + +class FlatpageSitemap(Sitemap): + def items(self): + from django.contrib.sites.models import Site + current_site = Site.objects.get_current() + return current_site.flatpage_set.all() + +class GenericSitemap(Sitemap): + priority = None + changefreq = None + + def __init__(self, info_dict, priority=None, changefreq=None): + self.queryset = info_dict['queryset'] + self.date_field = info_dict.get('date_field', None) + self.priority = priority + self.changefreq = changefreq + + def items(self): + # Make sure to return a clone; we don't want premature evaluation. + return self.queryset.filter() + + def lastmod(self, item): + if self.date_field is not None: + return getattr(item, self.date_field) + return None diff --git a/django/contrib/sitemap/templates/sitemap.xml b/django/contrib/sitemap/templates/sitemap.xml new file mode 100644 index 0000000000..3ee4f914f7 --- /dev/null +++ b/django/contrib/sitemap/templates/sitemap.xml @@ -0,0 +1,11 @@ + + +{% for url in urlset %} + + {{ url.location|escape }} + {% if url.lastmod %}{{ url.lastmod|date:"Y-m-d" }}{% endif %} + {% if url.changefreq %}{{ url.changefreq }}{% endif %} + {% if url.priority %}{{ url.priority }}{% endif %} + +{% endfor %} + diff --git a/django/contrib/sitemap/templates/sitemap_index.xml b/django/contrib/sitemap/templates/sitemap_index.xml new file mode 100644 index 0000000000..e9d722ac7f --- /dev/null +++ b/django/contrib/sitemap/templates/sitemap_index.xml @@ -0,0 +1,8 @@ + + +{% for location in sitemaps %} + + {{ location|escape }} + +{% endfor %} + diff --git a/django/contrib/sitemap/views.py b/django/contrib/sitemap/views.py new file mode 100644 index 0000000000..8a4592c3e4 --- /dev/null +++ b/django/contrib/sitemap/views.py @@ -0,0 +1,30 @@ +from django.http import HttpResponse, Http404 +from django.template import loader +from django.contrib.sites.models import Site +from django.core import urlresolvers + +def index(request, sitemaps): + current_site = Site.objects.get_current() + sites = [] + protocol = request.is_secure() and 'https' or 'http' + for section in sitemaps.keys(): + sitemap_url = urlresolvers.reverse('django.contrib.sitemap.views.sitemap', kwargs={'section': section}) + sites.append('%s://%s%s' % (protocol, current_site.domain, sitemap_url)) + xml = loader.render_to_string('sitemap_index.xml', {'sitemaps': sites}) + return HttpResponse(xml, mimetype='application/xml') + +def sitemap(request, sitemaps, section=None): + maps, urls = [], [] + if section is not None: + if not sitemaps.has_key(section): + raise Http404("No sitemap available for section: %r" % section) + maps.append(sitemaps[section]) + else: + maps = sitemaps.values() + for site in maps: + if callable(site): + urls.extend(site().get_urls()) + else: + urls.extend(site.get_urls()) + xml = loader.render_to_string('sitemap.xml', {'urlset': urls}) + return HttpResponse(xml, mimetype='application/xml') diff --git a/docs/sitemaps.txt b/docs/sitemaps.txt new file mode 100644 index 0000000000..d22418eac2 --- /dev/null +++ b/docs/sitemaps.txt @@ -0,0 +1,318 @@ +===================== +The sitemap framework +===================== + +Django comes with a high-level sitemap-generating framework that makes +creating `Google Sitemap`_ XML files easy. + +.. _Google Sitemap: http://www.google.com/webmasters/sitemaps/docs/en/protocol.html + +Overview +======== + +A sitemap is an XML file on your Web site that tells search-engine indexers how +frequently your pages change and how "important" certain pages are in relation +to other pages on your site. This information helps search engines index your +site. + +The Django sitemap framework automates the creation of this XML file by letting +you express this information in Python code. + +It works much like Django's `syndication framework`_. To create a sitemap, just +write a ``Sitemap`` class and point to it in your URLconf_. + +.. _syndication framework: http://www.djangoproject.com/documentation/syndication/ +.. _URLconf: http://www.djangoproject.com/documentation/url_dispatch/ + +Installation +============ + +To install the sitemap app, follow these steps: + + 1. Add ``'django.contrib.sitemap'`` to your INSTALLED_APPS_ setting. + 2. Make sure ``'django.template.loaders.app_directories.load_template_source'`` + is in your TEMPLATE_LOADERS_ setting. It's in there by default, so + you'll only need to change this if you've changed that setting. + 3. Make sure you've installed the `sites framework`_. + +(Note: The sitemap application doesn't install any database tables. The only +reason it needs to go into ``INSTALLED_APPS`` is so that the +``load_template_source`` template loader can find the default templates.) + +.. _INSTALLED_APPS: http://www.djangoproject.com/documentation/settings/#installed-apps +.. _TEMPLATE_LOADERS: http://www.djangoproject.com/documentation/settings/#template-loaders +.. _sites framework: http://www.djangoproject.com/documentation/sites/ + +Initialization +============== + +To activate sitemap generation on your Django site, add this line to your +URLconf_: + + (r'^sitemap.xml$', 'django.contrib.sitemap.views.sitemap', {'sitemaps': sitemaps}) + +This tells Django to build a sitemap when a client accesses ``/sitemap.xml``. + +The name of the sitemap file is not important, but the location is. Google will +only index links in your sitemap for the current URL level and below. For +instance, if ``sitemap.xml`` lives in your root directory, it may reference any +URL in your site. However, if your sitemap lives at ``/content/sitemap.xml``, +it may only reference URLs that begin with ``/content/``. + +The sitemap view takes an extra, required argument: ``{'sitemaps': sitemaps}``. +``sitemaps`` should be a dictionary that maps a short section label (e.g., +``blog`` or ``news``) to its ``Sitemap`` class (e.g., ``BlogSitemap`` or +``NewsSitemap``). It may also map to an *instance* of a ``Sitemap`` class +(e.g., ``BlogSitemap(some_var)``). + +.. _URLconf: http://www.djangoproject.com/documentation/url_dispatch/ + +Sitemap classes +=============== + +A ``Sitemap`` class is a simple Python class that represents a "section" of +entries in your sitemap. For example, one ``Sitemap`` class could represent all +the entries of your weblog, while another could represent all of the events in +your events calendar. + +In the simplest case, all these sections get lumped together into one +``sitemap.xml``, but it's also possible to use the framework to generate a +sitemap index that references individual sitemap files, one per section. (See +`Creating a sitemap index`_ below.) + +``Sitemap`` classes must subclass ``django.contrib.sitemap.Sitemap``. They can +live anywhere in your codebase. + +A simple example +================ + +Let's assume you have a blog system, with an ``Entry`` model, and you want your +sitemap to include all the links to your individual blog entries. Here's how +your sitemap class might look:: + + from django.contrib.sitemap import Sitemap + from mysite.blog.models import Entry + + class BlogSitemap(Sitemap): + changefreq = "never" + priority = 0.5 + + def items(self): + return Entry.objects.filter(is_draft=False) + + def lastmod(self, obj): + return obj.pub_date + +Note: + + * ``changefreq`` and ``priority`` are class attributes corresponding to + ```` and ```` elements, respectively. They can be + made callable as functions, as ``lastmod`` was in the example. + * ``items()`` is simply a method that returns a list of objects. The objects + returned will get passed to any callable methods corresponding to a + sitemap property (``location``, ``lastmod``, ``changefreq``, and + ``priority``). + * ``lastmod`` should return a Python ``datetime`` object. + * There is no ``location`` method in this example, but you can provide it + in order to specify the URL for your object. By default, ``location()`` + calls ``get_absolute_url()`` on each object and returns the result. + +Sitemap class reference +======================= + +A ``Sitemap`` class can define the following methods/attributes: + +``items`` +--------- + +**Required.** A method that returns a list of objects. The framework doesn't +care what *type* of objects they are; all that matters is that these objects +get passed to the ``location()``, ``lastmod()``, ``changefreq()`` and +``priority()`` methods. + +``location`` +------------ + +**Optional.** Either a method or attribute. + +If it's a method, it should return the absolute URL for a given object as +returned by ``items()``. + +If it's an attribute, its value should be a string representing an absolute URL +to use for *every* object returned by ``items()``. + +In both cases, "absolute URL" means a URL that doesn't include the protocol or +domain. Examples: + + * Good: ``'/foo/bar/'`` + * Bad: ``'example.com/foo/bar/'`` + * Bad: ``'http://example.com/foo/bar/'`` + +If ``location`` isn't provided, the framework will call the +``get_absolute_url()`` method on each object as returned by ``items()``. + +``lastmod`` +----------- + +**Optional.** Either a method or attribute. + +If it's a method, it should take one argument -- an object as returned by +``items()`` -- and return that object's last-modified date/time, as a Python +``datetime.datetime`` object. + +If it's an attribute, its value should be a Python ``datetime.datetime`` object +representing the last-modified date/time for *every* object returned by +``items()``. + +``changefreq`` +-------------- + +**Optional.** Either a method or attribute. + +If it's a method, it should take one argument -- an object as returned by +``items()`` -- and return that object's change frequency, as a Python string. + +If it's an attribute, its value should be a string representing the change +frequency of *every* object returned by ``items()``. + +Possible values for ``changefreq``, whether you use a method or attribute, are: + + * ``'always'`` + * ``'hourly'`` + * ``'daily'`` + * ``'weekly'`` + * ``'monthly'`` + * ``'yearly'`` + * ``'never'`` + +``priority`` +------------ + +**Optional.** Either a method or attribute. + +If it's a method, it should take one argument -- an object as returned by +``items()`` -- and return that object's priority, as either a string or float. + +If it's an attribute, its value should be either a string or float representing +the priority of *every* object returned by ``items()``. + +Example values for ``priority``: ``0.4``, ``1.0``. The default priority of a +page is ``0.5``. See Google's documentation for more documentation. + +.. _Google's documentation: http://www.google.com/webmasters/sitemaps/docs/en/protocol.html + +Shortcuts +========= + +The sitemap framework provides a couple convenience classes for common cases: + +``FlatpageSitemap`` +------------------- + +The ``FlatpageSitemap`` class looks at all flatpages_ defined for the current +``SITE_ID`` (see the `sites documentation`_) and creates an entry in the +sitemap. These entries include only the ``location`` attribute -- not +``lastmod``, ``changefreq`` or ``priority``. + +.. _flatpages: http://www.djangoproject.com/documentation/flatpages/ +.. _sites documentation: http://www.djangoproject.com/documentation/sites/ + +``GenericSitemap`` +------------------ + +The ``GenericSitemap`` class works with any `generic views`_ you already have. +To use it, create an instance, passing in the same ``info_dict`` you pass to +the generic views. The only requirement is that the dictionary have a +``queryset`` entry. It may also have a ``date_field`` entry that specifies a +date field for objects retrieved from the ``queryset``. This will be used for +the ``lastmod`` attribute in the generated sitemap. You may also pass +``priority`` and ``changefreq`` keyword arguments to the ``GenericSitemap`` +constructor to specify these attributes for all URLs. + +.. _generic views: http://www.djangoproject.com/documentation/generic_views/ + +Example +------- + +Here's an example of a URLconf_ using both:: + + from django.conf.urls.defaults import * + from django.contrib.sitemap import FlatpageSitemap, GenericSitemap + from mysite.blog.models import Entry + + info_dict = { + 'queryset': Entry.objects.all(), + 'date_field': 'pub_date', + } + + sitemaps = { + 'flatpages': FlatpageSitemap, + 'blog': GenericSitemap(info_dict, priority=0.6), + } + + urlpatterns = patterns('', + # some generic view using info_dict + # ... + + # the sitemap + (r'^sitemap.xml$', 'django.contrib.sitemap.views.sitemap', {'sitemaps': sitemaps}) + ) + +.. _URLconf: http://www.djangoproject.com/documentation/url_dispatch/ + +Creating a sitemap index +======================== + +The sitemap framework also has the ability to create a sitemap index that +references individual sitemap files, one per each section defined in your +``sitemaps`` dictionary. The only differences in usage are: + + * You use two views in your URLconf: ``django.contrib.sitemap.views.index`` + and ``django.contrib.sitemap.views.sitemap``. + * The ``django.contrib.sitemap.views.sitemap`` view should take a + ``section`` keyword argument. + +Here is what the relevant URLconf lines would look like for the example above:: + + (r'^sitemap.xml$', 'django.contrib.sitemap.views.index', {'sitemaps': sitemaps}) + (r'^sitemap-(?P
.+).xml$', 'django.contrib.sitemap.views.sitemap', {'sitemaps': sitemaps}) + +This will automatically generate a ``sitemap.xml`` file that references +both ``sitemap-flatpages.xml`` and ``sitemap-blog.xml``. The ``Sitemap`` +classes and the ``sitemaps`` dict don't change at all. + +Pinging Google +============== + +You may want to "ping" Google when your sitemap changes, to let it know to +reindex your site. The framework provides a function to do just that: +``django.contrib.sitemap.ping_google()``. + +``ping_google()`` takes an optional argument, ``sitemap_url``, which should be +the absolute URL of your site's sitemap (e.g., ``'/sitemap.xml'``). If this +argument isn't provided, ``ping_google()`` will attempt to figure out your +sitemap by performing a reverse looking in your URLconf. + +``ping_google()`` raises the exception +``django.contrib.sitemap.SitemapNotFound`` if it cannot determine your sitemap +URL. + +One useful way to call ``ping_google()`` is from a model's ``save()`` method:: + + from django.contrib.sitemap import ping_google + + class Entry(models.Model): + # ... + def save(self): + super(Entry, self).save() + try: + ping_google() + except Exception: + # Bare 'except' because we could get a variety + # of HTTP-related exceptions. + pass + +A more efficient solution, however, would be to call ``ping_google()`` from a +cron script, or some other scheduled task. The function makes an HTTP request +to Google's servers, so you may not want to introduce that network overhead +each time you call ``save()``.