Fixed #2628 -- Added django.contrib.sitemap. Thanks for the patch, Dan Watson
git-svn-id: http://code.djangoproject.com/svn/django/trunk@3694 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
parent
0a7f218b53
commit
44dabecf6e
1
AUTHORS
1
AUTHORS
|
@ -139,6 +139,7 @@ answer newbie questions, and generally made Django that much better:
|
|||
Amit Upadhyay
|
||||
Geert Vanderkelen
|
||||
Milton Waddams
|
||||
Dan Watson <http://theidioteque.net/>
|
||||
Rachel Willmer <http://www.willmer.com/kb/>
|
||||
wojtek
|
||||
ye7cakf02@sneakemail.com
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
from django.core import urlresolvers
|
||||
import urllib
|
||||
|
||||
PING_URL = "http://www.google.com/webmasters/sitemaps/ping"
|
||||
|
||||
class SitemapNotFound(Exception):
|
||||
pass
|
||||
|
||||
def ping_google(sitemap_url=None, ping_url=PING_URL):
|
||||
"""
|
||||
Alerts Google that the sitemap for the current site has been updated.
|
||||
If sitemap_url is provided, it should be an absolute path to the sitemap
|
||||
for this site -- e.g., '/sitemap.xml'. If sitemap_url is not provided, this
|
||||
function will attempt to deduce it by using urlresolvers.reverse().
|
||||
"""
|
||||
if sitemap_url is None:
|
||||
try:
|
||||
# First, try to get the "index" sitemap URL.
|
||||
sitemap_url = urlresolvers.reverse('django.contrib.sitemap.views.index')
|
||||
except urlresolvers.NoReverseMatch:
|
||||
try:
|
||||
# Next, try for the "global" sitemap URL.
|
||||
sitemap_url = urlresolvers.reverse('django.contrib.sitemap.views.sitemap')
|
||||
except urlresolvers.NoReverseMatch:
|
||||
pass
|
||||
|
||||
if sitemap_url is None:
|
||||
raise SitemapNotFound("You didn't provide a sitemap_url, and the sitemap URL couldn't be auto-detected.")
|
||||
|
||||
from django.contrib.sites.models import Site
|
||||
current_site = Site.objects.get_current()
|
||||
url = "%s%s" % (current_site.domain, sitemap)
|
||||
params = urllib.urlencode({'sitemap':url})
|
||||
urllib.urlopen("%s?%s" % (ping_url, params))
|
||||
|
||||
class Sitemap:
|
||||
def __get(self, name, obj, default=None):
|
||||
try:
|
||||
attr = getattr(self, name)
|
||||
except AttributeError:
|
||||
return default
|
||||
if callable(attr):
|
||||
return attr(obj)
|
||||
return attr
|
||||
|
||||
def items(self):
|
||||
return []
|
||||
|
||||
def location(self, obj):
|
||||
return obj.get_absolute_url()
|
||||
|
||||
def get_urls(self):
|
||||
from django.contrib.sites.models import Site
|
||||
current_site = Site.objects.get_current()
|
||||
urls = []
|
||||
for item in self.items():
|
||||
loc = "http://%s%s" % (current_site.domain, self.__get('location', item))
|
||||
url_info = {
|
||||
'location': loc,
|
||||
'lastmod': self.__get('lastmod', item, None),
|
||||
'changefreq': self.__get('changefreq', item, None),
|
||||
'priority': self.__get('priority', item, None)
|
||||
}
|
||||
urls.append(url_info)
|
||||
return urls
|
||||
|
||||
class FlatpageSitemap(Sitemap):
|
||||
def items(self):
|
||||
from django.contrib.sites.models import Site
|
||||
current_site = Site.objects.get_current()
|
||||
return current_site.flatpage_set.all()
|
||||
|
||||
class GenericSitemap(Sitemap):
|
||||
priority = None
|
||||
changefreq = None
|
||||
|
||||
def __init__(self, info_dict, priority=None, changefreq=None):
|
||||
self.queryset = info_dict['queryset']
|
||||
self.date_field = info_dict.get('date_field', None)
|
||||
self.priority = priority
|
||||
self.changefreq = changefreq
|
||||
|
||||
def items(self):
|
||||
# Make sure to return a clone; we don't want premature evaluation.
|
||||
return self.queryset.filter()
|
||||
|
||||
def lastmod(self, item):
|
||||
if self.date_field is not None:
|
||||
return getattr(item, self.date_field)
|
||||
return None
|
|
@ -0,0 +1,11 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">
|
||||
{% for url in urlset %}
|
||||
<url>
|
||||
<loc>{{ url.location|escape }}</loc>
|
||||
{% if url.lastmod %}<lastmod>{{ url.lastmod|date:"Y-m-d" }}</lastmod>{% endif %}
|
||||
{% if url.changefreq %}<changefreq>{{ url.changefreq }}</changefreq>{% endif %}
|
||||
{% if url.priority %}<priority>{{ url.priority }}</priority>{% endif %}
|
||||
</url>
|
||||
{% endfor %}
|
||||
</urlset>
|
|
@ -0,0 +1,8 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<sitemapindex xmlns="http://www.google.com/schemas/sitemap/0.84">
|
||||
{% for location in sitemaps %}
|
||||
<sitemap>
|
||||
<loc>{{ location|escape }}</loc>
|
||||
</sitemap>
|
||||
{% endfor %}
|
||||
</sitemapindex>
|
|
@ -0,0 +1,30 @@
|
|||
from django.http import HttpResponse, Http404
|
||||
from django.template import loader
|
||||
from django.contrib.sites.models import Site
|
||||
from django.core import urlresolvers
|
||||
|
||||
def index(request, sitemaps):
|
||||
current_site = Site.objects.get_current()
|
||||
sites = []
|
||||
protocol = request.is_secure() and 'https' or 'http'
|
||||
for section in sitemaps.keys():
|
||||
sitemap_url = urlresolvers.reverse('django.contrib.sitemap.views.sitemap', kwargs={'section': section})
|
||||
sites.append('%s://%s%s' % (protocol, current_site.domain, sitemap_url))
|
||||
xml = loader.render_to_string('sitemap_index.xml', {'sitemaps': sites})
|
||||
return HttpResponse(xml, mimetype='application/xml')
|
||||
|
||||
def sitemap(request, sitemaps, section=None):
|
||||
maps, urls = [], []
|
||||
if section is not None:
|
||||
if not sitemaps.has_key(section):
|
||||
raise Http404("No sitemap available for section: %r" % section)
|
||||
maps.append(sitemaps[section])
|
||||
else:
|
||||
maps = sitemaps.values()
|
||||
for site in maps:
|
||||
if callable(site):
|
||||
urls.extend(site().get_urls())
|
||||
else:
|
||||
urls.extend(site.get_urls())
|
||||
xml = loader.render_to_string('sitemap.xml', {'urlset': urls})
|
||||
return HttpResponse(xml, mimetype='application/xml')
|
|
@ -0,0 +1,318 @@
|
|||
=====================
|
||||
The sitemap framework
|
||||
=====================
|
||||
|
||||
Django comes with a high-level sitemap-generating framework that makes
|
||||
creating `Google Sitemap`_ XML files easy.
|
||||
|
||||
.. _Google Sitemap: http://www.google.com/webmasters/sitemaps/docs/en/protocol.html
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
A sitemap is an XML file on your Web site that tells search-engine indexers how
|
||||
frequently your pages change and how "important" certain pages are in relation
|
||||
to other pages on your site. This information helps search engines index your
|
||||
site.
|
||||
|
||||
The Django sitemap framework automates the creation of this XML file by letting
|
||||
you express this information in Python code.
|
||||
|
||||
It works much like Django's `syndication framework`_. To create a sitemap, just
|
||||
write a ``Sitemap`` class and point to it in your URLconf_.
|
||||
|
||||
.. _syndication framework: http://www.djangoproject.com/documentation/syndication/
|
||||
.. _URLconf: http://www.djangoproject.com/documentation/url_dispatch/
|
||||
|
||||
Installation
|
||||
============
|
||||
|
||||
To install the sitemap app, follow these steps:
|
||||
|
||||
1. Add ``'django.contrib.sitemap'`` to your INSTALLED_APPS_ setting.
|
||||
2. Make sure ``'django.template.loaders.app_directories.load_template_source'``
|
||||
is in your TEMPLATE_LOADERS_ setting. It's in there by default, so
|
||||
you'll only need to change this if you've changed that setting.
|
||||
3. Make sure you've installed the `sites framework`_.
|
||||
|
||||
(Note: The sitemap application doesn't install any database tables. The only
|
||||
reason it needs to go into ``INSTALLED_APPS`` is so that the
|
||||
``load_template_source`` template loader can find the default templates.)
|
||||
|
||||
.. _INSTALLED_APPS: http://www.djangoproject.com/documentation/settings/#installed-apps
|
||||
.. _TEMPLATE_LOADERS: http://www.djangoproject.com/documentation/settings/#template-loaders
|
||||
.. _sites framework: http://www.djangoproject.com/documentation/sites/
|
||||
|
||||
Initialization
|
||||
==============
|
||||
|
||||
To activate sitemap generation on your Django site, add this line to your
|
||||
URLconf_:
|
||||
|
||||
(r'^sitemap.xml$', 'django.contrib.sitemap.views.sitemap', {'sitemaps': sitemaps})
|
||||
|
||||
This tells Django to build a sitemap when a client accesses ``/sitemap.xml``.
|
||||
|
||||
The name of the sitemap file is not important, but the location is. Google will
|
||||
only index links in your sitemap for the current URL level and below. For
|
||||
instance, if ``sitemap.xml`` lives in your root directory, it may reference any
|
||||
URL in your site. However, if your sitemap lives at ``/content/sitemap.xml``,
|
||||
it may only reference URLs that begin with ``/content/``.
|
||||
|
||||
The sitemap view takes an extra, required argument: ``{'sitemaps': sitemaps}``.
|
||||
``sitemaps`` should be a dictionary that maps a short section label (e.g.,
|
||||
``blog`` or ``news``) to its ``Sitemap`` class (e.g., ``BlogSitemap`` or
|
||||
``NewsSitemap``). It may also map to an *instance* of a ``Sitemap`` class
|
||||
(e.g., ``BlogSitemap(some_var)``).
|
||||
|
||||
.. _URLconf: http://www.djangoproject.com/documentation/url_dispatch/
|
||||
|
||||
Sitemap classes
|
||||
===============
|
||||
|
||||
A ``Sitemap`` class is a simple Python class that represents a "section" of
|
||||
entries in your sitemap. For example, one ``Sitemap`` class could represent all
|
||||
the entries of your weblog, while another could represent all of the events in
|
||||
your events calendar.
|
||||
|
||||
In the simplest case, all these sections get lumped together into one
|
||||
``sitemap.xml``, but it's also possible to use the framework to generate a
|
||||
sitemap index that references individual sitemap files, one per section. (See
|
||||
`Creating a sitemap index`_ below.)
|
||||
|
||||
``Sitemap`` classes must subclass ``django.contrib.sitemap.Sitemap``. They can
|
||||
live anywhere in your codebase.
|
||||
|
||||
A simple example
|
||||
================
|
||||
|
||||
Let's assume you have a blog system, with an ``Entry`` model, and you want your
|
||||
sitemap to include all the links to your individual blog entries. Here's how
|
||||
your sitemap class might look::
|
||||
|
||||
from django.contrib.sitemap import Sitemap
|
||||
from mysite.blog.models import Entry
|
||||
|
||||
class BlogSitemap(Sitemap):
|
||||
changefreq = "never"
|
||||
priority = 0.5
|
||||
|
||||
def items(self):
|
||||
return Entry.objects.filter(is_draft=False)
|
||||
|
||||
def lastmod(self, obj):
|
||||
return obj.pub_date
|
||||
|
||||
Note:
|
||||
|
||||
* ``changefreq`` and ``priority`` are class attributes corresponding to
|
||||
``<changefreq>`` and ``<priority>`` elements, respectively. They can be
|
||||
made callable as functions, as ``lastmod`` was in the example.
|
||||
* ``items()`` is simply a method that returns a list of objects. The objects
|
||||
returned will get passed to any callable methods corresponding to a
|
||||
sitemap property (``location``, ``lastmod``, ``changefreq``, and
|
||||
``priority``).
|
||||
* ``lastmod`` should return a Python ``datetime`` object.
|
||||
* There is no ``location`` method in this example, but you can provide it
|
||||
in order to specify the URL for your object. By default, ``location()``
|
||||
calls ``get_absolute_url()`` on each object and returns the result.
|
||||
|
||||
Sitemap class reference
|
||||
=======================
|
||||
|
||||
A ``Sitemap`` class can define the following methods/attributes:
|
||||
|
||||
``items``
|
||||
---------
|
||||
|
||||
**Required.** A method that returns a list of objects. The framework doesn't
|
||||
care what *type* of objects they are; all that matters is that these objects
|
||||
get passed to the ``location()``, ``lastmod()``, ``changefreq()`` and
|
||||
``priority()`` methods.
|
||||
|
||||
``location``
|
||||
------------
|
||||
|
||||
**Optional.** Either a method or attribute.
|
||||
|
||||
If it's a method, it should return the absolute URL for a given object as
|
||||
returned by ``items()``.
|
||||
|
||||
If it's an attribute, its value should be a string representing an absolute URL
|
||||
to use for *every* object returned by ``items()``.
|
||||
|
||||
In both cases, "absolute URL" means a URL that doesn't include the protocol or
|
||||
domain. Examples:
|
||||
|
||||
* Good: ``'/foo/bar/'``
|
||||
* Bad: ``'example.com/foo/bar/'``
|
||||
* Bad: ``'http://example.com/foo/bar/'``
|
||||
|
||||
If ``location`` isn't provided, the framework will call the
|
||||
``get_absolute_url()`` method on each object as returned by ``items()``.
|
||||
|
||||
``lastmod``
|
||||
-----------
|
||||
|
||||
**Optional.** Either a method or attribute.
|
||||
|
||||
If it's a method, it should take one argument -- an object as returned by
|
||||
``items()`` -- and return that object's last-modified date/time, as a Python
|
||||
``datetime.datetime`` object.
|
||||
|
||||
If it's an attribute, its value should be a Python ``datetime.datetime`` object
|
||||
representing the last-modified date/time for *every* object returned by
|
||||
``items()``.
|
||||
|
||||
``changefreq``
|
||||
--------------
|
||||
|
||||
**Optional.** Either a method or attribute.
|
||||
|
||||
If it's a method, it should take one argument -- an object as returned by
|
||||
``items()`` -- and return that object's change frequency, as a Python string.
|
||||
|
||||
If it's an attribute, its value should be a string representing the change
|
||||
frequency of *every* object returned by ``items()``.
|
||||
|
||||
Possible values for ``changefreq``, whether you use a method or attribute, are:
|
||||
|
||||
* ``'always'``
|
||||
* ``'hourly'``
|
||||
* ``'daily'``
|
||||
* ``'weekly'``
|
||||
* ``'monthly'``
|
||||
* ``'yearly'``
|
||||
* ``'never'``
|
||||
|
||||
``priority``
|
||||
------------
|
||||
|
||||
**Optional.** Either a method or attribute.
|
||||
|
||||
If it's a method, it should take one argument -- an object as returned by
|
||||
``items()`` -- and return that object's priority, as either a string or float.
|
||||
|
||||
If it's an attribute, its value should be either a string or float representing
|
||||
the priority of *every* object returned by ``items()``.
|
||||
|
||||
Example values for ``priority``: ``0.4``, ``1.0``. The default priority of a
|
||||
page is ``0.5``. See Google's documentation for more documentation.
|
||||
|
||||
.. _Google's documentation: http://www.google.com/webmasters/sitemaps/docs/en/protocol.html
|
||||
|
||||
Shortcuts
|
||||
=========
|
||||
|
||||
The sitemap framework provides a couple convenience classes for common cases:
|
||||
|
||||
``FlatpageSitemap``
|
||||
-------------------
|
||||
|
||||
The ``FlatpageSitemap`` class looks at all flatpages_ defined for the current
|
||||
``SITE_ID`` (see the `sites documentation`_) and creates an entry in the
|
||||
sitemap. These entries include only the ``location`` attribute -- not
|
||||
``lastmod``, ``changefreq`` or ``priority``.
|
||||
|
||||
.. _flatpages: http://www.djangoproject.com/documentation/flatpages/
|
||||
.. _sites documentation: http://www.djangoproject.com/documentation/sites/
|
||||
|
||||
``GenericSitemap``
|
||||
------------------
|
||||
|
||||
The ``GenericSitemap`` class works with any `generic views`_ you already have.
|
||||
To use it, create an instance, passing in the same ``info_dict`` you pass to
|
||||
the generic views. The only requirement is that the dictionary have a
|
||||
``queryset`` entry. It may also have a ``date_field`` entry that specifies a
|
||||
date field for objects retrieved from the ``queryset``. This will be used for
|
||||
the ``lastmod`` attribute in the generated sitemap. You may also pass
|
||||
``priority`` and ``changefreq`` keyword arguments to the ``GenericSitemap``
|
||||
constructor to specify these attributes for all URLs.
|
||||
|
||||
.. _generic views: http://www.djangoproject.com/documentation/generic_views/
|
||||
|
||||
Example
|
||||
-------
|
||||
|
||||
Here's an example of a URLconf_ using both::
|
||||
|
||||
from django.conf.urls.defaults import *
|
||||
from django.contrib.sitemap import FlatpageSitemap, GenericSitemap
|
||||
from mysite.blog.models import Entry
|
||||
|
||||
info_dict = {
|
||||
'queryset': Entry.objects.all(),
|
||||
'date_field': 'pub_date',
|
||||
}
|
||||
|
||||
sitemaps = {
|
||||
'flatpages': FlatpageSitemap,
|
||||
'blog': GenericSitemap(info_dict, priority=0.6),
|
||||
}
|
||||
|
||||
urlpatterns = patterns('',
|
||||
# some generic view using info_dict
|
||||
# ...
|
||||
|
||||
# the sitemap
|
||||
(r'^sitemap.xml$', 'django.contrib.sitemap.views.sitemap', {'sitemaps': sitemaps})
|
||||
)
|
||||
|
||||
.. _URLconf: http://www.djangoproject.com/documentation/url_dispatch/
|
||||
|
||||
Creating a sitemap index
|
||||
========================
|
||||
|
||||
The sitemap framework also has the ability to create a sitemap index that
|
||||
references individual sitemap files, one per each section defined in your
|
||||
``sitemaps`` dictionary. The only differences in usage are:
|
||||
|
||||
* You use two views in your URLconf: ``django.contrib.sitemap.views.index``
|
||||
and ``django.contrib.sitemap.views.sitemap``.
|
||||
* The ``django.contrib.sitemap.views.sitemap`` view should take a
|
||||
``section`` keyword argument.
|
||||
|
||||
Here is what the relevant URLconf lines would look like for the example above::
|
||||
|
||||
(r'^sitemap.xml$', 'django.contrib.sitemap.views.index', {'sitemaps': sitemaps})
|
||||
(r'^sitemap-(?P<section>.+).xml$', 'django.contrib.sitemap.views.sitemap', {'sitemaps': sitemaps})
|
||||
|
||||
This will automatically generate a ``sitemap.xml`` file that references
|
||||
both ``sitemap-flatpages.xml`` and ``sitemap-blog.xml``. The ``Sitemap``
|
||||
classes and the ``sitemaps`` dict don't change at all.
|
||||
|
||||
Pinging Google
|
||||
==============
|
||||
|
||||
You may want to "ping" Google when your sitemap changes, to let it know to
|
||||
reindex your site. The framework provides a function to do just that:
|
||||
``django.contrib.sitemap.ping_google()``.
|
||||
|
||||
``ping_google()`` takes an optional argument, ``sitemap_url``, which should be
|
||||
the absolute URL of your site's sitemap (e.g., ``'/sitemap.xml'``). If this
|
||||
argument isn't provided, ``ping_google()`` will attempt to figure out your
|
||||
sitemap by performing a reverse looking in your URLconf.
|
||||
|
||||
``ping_google()`` raises the exception
|
||||
``django.contrib.sitemap.SitemapNotFound`` if it cannot determine your sitemap
|
||||
URL.
|
||||
|
||||
One useful way to call ``ping_google()`` is from a model's ``save()`` method::
|
||||
|
||||
from django.contrib.sitemap import ping_google
|
||||
|
||||
class Entry(models.Model):
|
||||
# ...
|
||||
def save(self):
|
||||
super(Entry, self).save()
|
||||
try:
|
||||
ping_google()
|
||||
except Exception:
|
||||
# Bare 'except' because we could get a variety
|
||||
# of HTTP-related exceptions.
|
||||
pass
|
||||
|
||||
A more efficient solution, however, would be to call ``ping_google()`` from a
|
||||
cron script, or some other scheduled task. The function makes an HTTP request
|
||||
to Google's servers, so you may not want to introduce that network overhead
|
||||
each time you call ``save()``.
|
Loading…
Reference in New Issue