From 942e5246accda3d953b4634a010402ef5786c2e0 Mon Sep 17 00:00:00 2001 From: Jacob Kaplan-Moss Date: Mon, 11 Aug 2008 22:22:26 +0000 Subject: [PATCH] Added a number of callbacks to SyndicationFeed for adding custom attributes and elements to feeds. Refs #6547. git-svn-id: http://code.djangoproject.com/svn/django/trunk@8311 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/utils/feedgenerator.py | 238 ++++++++++++--------- docs/syndication_feeds.txt | 153 ++++++++++--- tests/regressiontests/syndication/feeds.py | 25 +++ tests/regressiontests/syndication/tests.py | 48 ++++- tests/regressiontests/syndication/urls.py | 3 +- 5 files changed, 336 insertions(+), 131 deletions(-) diff --git a/django/utils/feedgenerator.py b/django/utils/feedgenerator.py index ce09c4fc27..da6e0a8dc8 100644 --- a/django/utils/feedgenerator.py +++ b/django/utils/feedgenerator.py @@ -19,9 +19,10 @@ For definitions of the different versions of RSS, see: http://diveintomark.org/archives/2004/02/04/incompatible-rss """ +import re +import datetime from django.utils.xmlutils import SimplerXMLGenerator from django.utils.encoding import force_unicode, iri_to_uri -import datetime, re, time def rfc2822_date(date): # We do this ourselves to be timezone aware, email.Utils is not tz aware. @@ -56,7 +57,7 @@ class SyndicationFeed(object): "Base class for all syndication feeds. Subclasses should provide write()" def __init__(self, title, link, description, language=None, author_email=None, author_name=None, author_link=None, subtitle=None, categories=None, - feed_url=None, feed_copyright=None, feed_guid=None, ttl=None): + feed_url=None, feed_copyright=None, feed_guid=None, ttl=None, **kwargs): to_unicode = lambda s: force_unicode(s, strings_only=True) if categories: categories = [force_unicode(c) for c in categories] @@ -75,11 +76,13 @@ class SyndicationFeed(object): 'id': feed_guid or link, 'ttl': ttl, } + self.feed.update(kwargs) self.items = [] def add_item(self, title, link, description, author_email=None, author_name=None, author_link=None, pubdate=None, comments=None, - unique_id=None, enclosure=None, categories=(), item_copyright=None, ttl=None): + unique_id=None, enclosure=None, categories=(), item_copyright=None, + ttl=None, **kwargs): """ Adds an item to the feed. All args are expected to be Python Unicode objects except pubdate, which is a datetime.datetime object, and @@ -88,7 +91,7 @@ class SyndicationFeed(object): to_unicode = lambda s: force_unicode(s, strings_only=True) if categories: categories = [to_unicode(c) for c in categories] - self.items.append({ + item = { 'title': to_unicode(title), 'link': iri_to_uri(link), 'description': to_unicode(description), @@ -102,11 +105,39 @@ class SyndicationFeed(object): 'categories': categories or (), 'item_copyright': to_unicode(item_copyright), 'ttl': ttl, - }) + } + item.update(kwargs) + self.items.append(item) def num_items(self): return len(self.items) + def root_attributes(self): + """ + Return extra attributes to place on the root (i.e. feed/channel) element. + Called from write(). + """ + return {} + + def add_root_elements(self, handler): + """ + Add elements in the the root (i.e. feed/channel) element. Called + from write(). + """ + pass + + def item_attributes(self, item): + """ + Return extra attributes to place on each item (i.e. item/entry) element. + """ + return {} + + def add_item_elements(self, handler, item): + """ + Add elements on each item (i.e. item/entry) element. + """ + pass + def write(self, outfile, encoding): """ Outputs the feed in the given encoding to outfile, which is a file-like @@ -148,7 +179,19 @@ class RssFeed(SyndicationFeed): handler = SimplerXMLGenerator(outfile, encoding) handler.startDocument() handler.startElement(u"rss", {u"version": self._version}) - handler.startElement(u"channel", {}) + handler.startElement(u"channel", self.root_attributes()) + self.add_root_elements(handler) + self.write_items(handler) + self.endChannelElement(handler) + handler.endElement(u"rss") + + def write_items(self, handler): + for item in self.items: + handler.startElement(u'item', self.item_attributes(item)) + self.add_item_elements(handler, item) + handler.endElement(u"item") + + def add_root_elements(self, handler): handler.addQuickElement(u"title", self.feed['title']) handler.addQuickElement(u"link", self.feed['link']) handler.addQuickElement(u"description", self.feed['description']) @@ -161,76 +204,75 @@ class RssFeed(SyndicationFeed): handler.addQuickElement(u"lastBuildDate", rfc2822_date(self.latest_post_date()).decode('ascii')) if self.feed['ttl'] is not None: handler.addQuickElement(u"ttl", self.feed['ttl']) - self.write_items(handler) - self.endChannelElement(handler) - handler.endElement(u"rss") def endChannelElement(self, handler): handler.endElement(u"channel") class RssUserland091Feed(RssFeed): _version = u"0.91" - def write_items(self, handler): - for item in self.items: - handler.startElement(u"item", {}) - handler.addQuickElement(u"title", item['title']) - handler.addQuickElement(u"link", item['link']) - if item['description'] is not None: - handler.addQuickElement(u"description", item['description']) - handler.endElement(u"item") + def add_item_elements(self, handler, item): + handler.addQuickElement(u"title", item['title']) + handler.addQuickElement(u"link", item['link']) + if item['description'] is not None: + handler.addQuickElement(u"description", item['description']) class Rss201rev2Feed(RssFeed): # Spec: http://blogs.law.harvard.edu/tech/rss _version = u"2.0" - def write_items(self, handler): - for item in self.items: - handler.startElement(u"item", {}) - handler.addQuickElement(u"title", item['title']) - handler.addQuickElement(u"link", item['link']) - if item['description'] is not None: - handler.addQuickElement(u"description", item['description']) + def add_item_elements(self, handler, item): + handler.addQuickElement(u"title", item['title']) + handler.addQuickElement(u"link", item['link']) + if item['description'] is not None: + handler.addQuickElement(u"description", item['description']) - # Author information. - if item["author_name"] and item["author_email"]: - handler.addQuickElement(u"author", "%s (%s)" % \ - (item['author_email'], item['author_name'])) - elif item["author_email"]: - handler.addQuickElement(u"author", item["author_email"]) - elif item["author_name"]: - handler.addQuickElement(u"dc:creator", item["author_name"], {"xmlns:dc": u"http://purl.org/dc/elements/1.1/"}) + # Author information. + if item["author_name"] and item["author_email"]: + handler.addQuickElement(u"author", "%s (%s)" % \ + (item['author_email'], item['author_name'])) + elif item["author_email"]: + handler.addQuickElement(u"author", item["author_email"]) + elif item["author_name"]: + handler.addQuickElement(u"dc:creator", item["author_name"], {"xmlns:dc": u"http://purl.org/dc/elements/1.1/"}) - if item['pubdate'] is not None: - handler.addQuickElement(u"pubDate", rfc2822_date(item['pubdate']).decode('ascii')) - if item['comments'] is not None: - handler.addQuickElement(u"comments", item['comments']) - if item['unique_id'] is not None: - handler.addQuickElement(u"guid", item['unique_id']) - if item['ttl'] is not None: - handler.addQuickElement(u"ttl", item['ttl']) + if item['pubdate'] is not None: + handler.addQuickElement(u"pubDate", rfc2822_date(item['pubdate']).decode('ascii')) + if item['comments'] is not None: + handler.addQuickElement(u"comments", item['comments']) + if item['unique_id'] is not None: + handler.addQuickElement(u"guid", item['unique_id']) + if item['ttl'] is not None: + handler.addQuickElement(u"ttl", item['ttl']) - # Enclosure. - if item['enclosure'] is not None: - handler.addQuickElement(u"enclosure", '', - {u"url": item['enclosure'].url, u"length": item['enclosure'].length, - u"type": item['enclosure'].mime_type}) + # Enclosure. + if item['enclosure'] is not None: + handler.addQuickElement(u"enclosure", '', + {u"url": item['enclosure'].url, u"length": item['enclosure'].length, + u"type": item['enclosure'].mime_type}) - # Categories. - for cat in item['categories']: - handler.addQuickElement(u"category", cat) - - handler.endElement(u"item") + # Categories. + for cat in item['categories']: + handler.addQuickElement(u"category", cat) class Atom1Feed(SyndicationFeed): # Spec: http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html mime_type = 'application/atom+xml' ns = u"http://www.w3.org/2005/Atom" + def write(self, outfile, encoding): handler = SimplerXMLGenerator(outfile, encoding) handler.startDocument() + handler.startElement(u'feed', self.root_attributes()) + self.add_root_elements(handler) + self.write_items(handler) + handler.endElement(u"feed") + + def root_element_attributes(self): if self.feed['language'] is not None: - handler.startElement(u"feed", {u"xmlns": self.ns, u"xml:lang": self.feed['language']}) + return {u"xmlns": self.ns, u"xml:lang": self.feed['language']} else: - handler.startElement(u"feed", {u"xmlns": self.ns}) + return {u"xmlns": self.ns} + + def add_root_elements(self, handler): handler.addQuickElement(u"title", self.feed['title']) handler.addQuickElement(u"link", "", {u"rel": u"alternate", u"href": self.feed['link']}) if self.feed['feed_url'] is not None: @@ -251,55 +293,55 @@ class Atom1Feed(SyndicationFeed): handler.addQuickElement(u"category", "", {u"term": cat}) if self.feed['feed_copyright'] is not None: handler.addQuickElement(u"rights", self.feed['feed_copyright']) - self.write_items(handler) - handler.endElement(u"feed") - + def write_items(self, handler): for item in self.items: - handler.startElement(u"entry", {}) - handler.addQuickElement(u"title", item['title']) - handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"alternate"}) - if item['pubdate'] is not None: - handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('ascii')) - - # Author information. - if item['author_name'] is not None: - handler.startElement(u"author", {}) - handler.addQuickElement(u"name", item['author_name']) - if item['author_email'] is not None: - handler.addQuickElement(u"email", item['author_email']) - if item['author_link'] is not None: - handler.addQuickElement(u"uri", item['author_link']) - handler.endElement(u"author") - - # Unique ID. - if item['unique_id'] is not None: - unique_id = item['unique_id'] - else: - unique_id = get_tag_uri(item['link'], item['pubdate']) - handler.addQuickElement(u"id", unique_id) - - # Summary. - if item['description'] is not None: - handler.addQuickElement(u"summary", item['description'], {u"type": u"html"}) - - # Enclosure. - if item['enclosure'] is not None: - handler.addQuickElement(u"link", '', - {u"rel": u"enclosure", - u"href": item['enclosure'].url, - u"length": item['enclosure'].length, - u"type": item['enclosure'].mime_type}) - - # Categories. - for cat in item['categories']: - handler.addQuickElement(u"category", u"", {u"term": cat}) - - # Rights. - if item['item_copyright'] is not None: - handler.addQuickElement(u"rights", item['item_copyright']) - + handler.startElement(u"entry", self.item_attributes(item)) + self.add_item_elements(handler, item) handler.endElement(u"entry") + + def add_item_elements(self, handler, item): + handler.addQuickElement(u"title", item['title']) + handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"alternate"}) + if item['pubdate'] is not None: + handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('ascii')) + + # Author information. + if item['author_name'] is not None: + handler.startElement(u"author", {}) + handler.addQuickElement(u"name", item['author_name']) + if item['author_email'] is not None: + handler.addQuickElement(u"email", item['author_email']) + if item['author_link'] is not None: + handler.addQuickElement(u"uri", item['author_link']) + handler.endElement(u"author") + + # Unique ID. + if item['unique_id'] is not None: + unique_id = item['unique_id'] + else: + unique_id = get_tag_uri(item['link'], item['pubdate']) + handler.addQuickElement(u"id", unique_id) + + # Summary. + if item['description'] is not None: + handler.addQuickElement(u"summary", item['description'], {u"type": u"html"}) + + # Enclosure. + if item['enclosure'] is not None: + handler.addQuickElement(u"link", '', + {u"rel": u"enclosure", + u"href": item['enclosure'].url, + u"length": item['enclosure'].length, + u"type": item['enclosure'].mime_type}) + + # Categories. + for cat in item['categories']: + handler.addQuickElement(u"category", u"", {u"term": cat}) + + # Rights. + if item['item_copyright'] is not None: + handler.addQuickElement(u"rights", item['item_copyright']) # This isolates the decision of what the system default is, so calling code can # do "feedgenerator.DefaultFeed" instead of "feedgenerator.Rss201rev2Feed". diff --git a/docs/syndication_feeds.txt b/docs/syndication_feeds.txt index 6a603c4e54..ceb56a4788 100644 --- a/docs/syndication_feeds.txt +++ b/docs/syndication_feeds.txt @@ -801,7 +801,12 @@ Behind the scenes, the high-level RSS framework uses a lower-level framework for generating feeds' XML. This framework lives in a single module: `django/utils/feedgenerator.py`_. -Feel free to use this framework on your own, for lower-level tasks. +You use this framework on your own, for lower-level feed generation. You can +also create custom feed generator subclasses for use with the ``feed_type`` +``Feed`` option. + +``SyndicationFeed`` classes +--------------------------- The ``feedgenerator`` module contains a base class ``SyndicationFeed`` and several subclasses: @@ -813,38 +818,71 @@ several subclasses: Each of these three classes knows how to render a certain type of feed as XML. They share this interface: -``__init__(title, link, description, language=None, author_email=None,`` -``author_name=None, author_link=None, subtitle=None, categories=None,`` -``feed_url=None)`` +``SyndicationFeed.__init__(**kwargs)`` + Initialize the feed with the given dictionary of metadata, which applies to + the entire feed. Required keyword arguments are: + + * ``title`` + * ``link`` + * ``description`` + + There's also a bunch of other optional keywords: + + * ``language`` + * ``author_email`` + * ``author_name`` + * ``author_link`` + * ``subtitle`` + * ``categories`` + * ``feed_url`` + * ``feed_copyright`` + * ``feed_guid`` + * ``ttl`` + + Any extra keyword arguments you pass to ``__init__`` will be stored in + ``self.feed`` for use with `custom feed generators`_. -Initializes the feed with the given metadata, which applies to the entire feed -(i.e., not just to a specific item in the feed). + All parameters should be Unicode objects, except ``categories``, which + should be a sequence of Unicode objects. -All parameters, if given, should be Unicode objects, except ``categories``, -which should be a sequence of Unicode objects. +``SyndicationFeed.add_item(**kwargs)`` + Add an item to the feed with the given parameters. -``add_item(title, link, description, author_email=None, author_name=None,`` -``pubdate=None, comments=None, unique_id=None, enclosure=None, categories=())`` + Required keyword arguments are: + + * ``title`` + * ``link`` + * ``description`` -Add an item to the feed with the given parameters. All parameters, if given, -should be Unicode objects, except: + Optional keyword arguments are: - * ``pubdate`` should be a `Python datetime object`_. - * ``enclosure`` should be an instance of ``feedgenerator.Enclosure``. - * ``categories`` should be a sequence of Unicode objects. + * ``author_email`` + * ``author_name`` + * ``author_link`` + * ``pubdate`` + * ``comments`` + * ``unique_id`` + * ``enclosure`` + * ``categories`` + * ``item_copyright`` + * ``ttl`` -``write(outfile, encoding)`` + Extra keyword arguments will be stored for `custom feed generators`_. -Outputs the feed in the given encoding to outfile, which is a file-like object. + All parameters, if given, should be Unicode objects, except: -``writeString(encoding)`` + * ``pubdate`` should be a `Python datetime object`_. + * ``enclosure`` should be an instance of ``feedgenerator.Enclosure``. + * ``categories`` should be a sequence of Unicode objects. + +``SyndicationFeed.write(outfile, encoding)`` + Outputs the feed in the given ``encoding`` to ``outfile``, which must be a + file-like object. -Returns the feed as a string in the given encoding. +``SyndicationFeed.writeString(encoding)`` + Returns the feed as a string in the given ``encoding``. -Example usage -------------- - -This example creates an Atom 1.0 feed and prints it to standard output:: +For example, to create an Atom 1.0 feed and print it to standard output:: >>> from django.utils import feedgenerator >>> f = feedgenerator.Atom1Feed( @@ -857,12 +895,69 @@ This example creates an Atom 1.0 feed and prints it to standard output:: ... description=u"

Today I had a Vienna Beef hot dog. It was pink, plump and perfect.

") >>> print f.writeString('utf8') - My Weblog - http://www.example.com/ - Sat, 12 Nov 2005 00:28:43 -0000Hot dog today - http://www.example.com/entries/1/tag:www.example.com/entries/1/ - <p>Today I had a Vienna Beef hot dog. It was pink, plump and perfect.</p> - + + ... + .. _django/utils/feedgenerator.py: http://code.djangoproject.com/browser/django/trunk/django/utils/feedgenerator.py .. _Python datetime object: http://www.python.org/doc/current/lib/module-datetime.html + +Custom feed generators +---------------------- + +If you need to produce a custom feed format, you've got a couple of options. + +If the feed format is totally custom, you'll want to subclass +``SyndicationFeed`` and completely replace the ``write()`` and +``writeString()`` methods. + +However, if the feed format is a spin-off of RSS or Atom (i.e. GeoRSS_, Apple's +`iTunes podcast format`_, etc.), you've got a better choice. These types of +feeds typically add extra elements and/or attributes to the underlying format, +and there are a set of methods that ``SyndicationFeed`` calls to get these extra +attributes. Thus, you can subclass the appropriate feed generator class +(``Atom1Feed`` or ``Rss201rev2Feed``) and extend these callbacks. They are: + +.. _georss: http://georss.org/ +.. _itunes podcast format: http://www.apple.com/itunes/store/podcaststechspecs.html + +``SyndicationFeed.root_attributes(self, )`` + Return a ``dict`` of attributes to add to the root feed element + (``feed``/``channel``). + +``SyndicationFeed.add_root_elements(self, handler)`` + Callback to add elements inside the root feed element + (``feed``/``channel``). ``handler`` is an `XMLGenerator`_ from Python's + built-in SAX library; you'll call methods on it to add to the XML + document in process. + +``SyndicationFeed.item_attributes(self, item)`` + Return a ``dict`` of attributes to add to each item (``item``/``entry``) + element. The argument, ``item``, is a dictionary of all the data passed to + ``SyndicationFeed.add_item()``. + +``SyndicationFeed.add_item_elements(self, handler, item)`` + Callback to add elements to each item (``item``/``entry``) element. + ``handler`` and ``item`` are as above. + +.. warning:: + + If you override any of these methods, be sure to call the superclass methods + since they add the required elements for each feed format. + +For example, you might start implementing an iTunes RSS feed generator like so:: + + class iTunesFeed(Rss201rev2Feed): + def root_attibutes(self): + attrs = super(iTunesFeed, self).root_attibutes() + attrs['xmlns:itunes'] = 'http://www.itunes.com/dtds/podcast-1.0.dtd + return attrs + + def add_root_elements(self, handler): + super(iTunesFeed, self).add_root_elements(handler) + handler.addQuickElement('itunes:explicit', 'clean') + +Obviously there's a lot more work to be done for a complete custom feed class, +but the above example should demonstrate the basic idea. + +.. _XMLGenerator: http://docs.python.org/dev/library/xml.sax.utils.html#xml.sax.saxutils.XMLGenerator \ No newline at end of file diff --git a/tests/regressiontests/syndication/feeds.py b/tests/regressiontests/syndication/feeds.py index 3c5d5a51b5..eabc1f94a8 100644 --- a/tests/regressiontests/syndication/feeds.py +++ b/tests/regressiontests/syndication/feeds.py @@ -21,3 +21,28 @@ class TestRssFeed(feeds.Feed): class TestAtomFeed(TestRssFeed): feed_type = Atom1Feed + +class MyCustomAtom1Feed(Atom1Feed): + """ + Test of a custom feed generator class. + """ + def root_attributes(self): + attrs = super(MyCustomAtom1Feed, self).root_attributes() + attrs[u'django'] = u'rocks' + return attrs + + def add_root_elements(self, handler): + super(MyCustomAtom1Feed, self).add_root_elements(handler) + handler.addQuickElement(u'spam', u'eggs') + + def item_attributes(self, item): + attrs = super(MyCustomAtom1Feed, self).item_attributes(item) + attrs[u'bacon'] = u'yum' + return attrs + + def add_item_elements(self, handler, item): + super(MyCustomAtom1Feed, self).add_item_elements(handler, item) + handler.addQuickElement(u'ministry', u'silly walks') + +class TestCustomFeed(TestAtomFeed): + feed_type = MyCustomAtom1Feed diff --git a/tests/regressiontests/syndication/tests.py b/tests/regressiontests/syndication/tests.py index 142cf47d03..0938f69e5b 100644 --- a/tests/regressiontests/syndication/tests.py +++ b/tests/regressiontests/syndication/tests.py @@ -4,22 +4,64 @@ from xml.dom import minidom from django.test import TestCase from django.test.client import Client from models import Entry +try: + set +except NameError: + from sets import Set as set class SyndicationFeedTest(TestCase): fixtures = ['feeddata.json'] + def assertChildNodes(self, elem, expected): + actual = set([n.nodeName for n in elem.childNodes]) + expected = set(expected) + self.assertEqual(actual, expected) + def test_rss_feed(self): response = self.client.get('/syndication/feeds/rss/') doc = minidom.parseString(response.content) self.assertEqual(len(doc.getElementsByTagName('channel')), 1) - self.assertEqual(len(doc.getElementsByTagName('item')), Entry.objects.count()) + + chan = doc.getElementsByTagName('channel')[0] + self.assertChildNodes(chan, ['title', 'link', 'description', 'language', 'lastBuildDate', 'item']) + + items = chan.getElementsByTagName('item') + self.assertEqual(len(items), Entry.objects.count()) + for item in items: + self.assertChildNodes(item, ['title', 'link', 'description', 'guid']) def test_atom_feed(self): response = self.client.get('/syndication/feeds/atom/') doc = minidom.parseString(response.content) - self.assertEqual(len(doc.getElementsByTagName('feed')), 1) - self.assertEqual(len(doc.getElementsByTagName('entry')), Entry.objects.count()) + + feed = doc.firstChild + self.assertEqual(feed.nodeName, 'feed') + self.assertChildNodes(feed, ['title', 'link', 'id', 'updated', 'entry']) + + entries = feed.getElementsByTagName('entry') + self.assertEqual(len(entries), Entry.objects.count()) + for entry in entries: + self.assertChildNodes(entry, ['title', 'link', 'id', 'summary']) + summary = entry.getElementsByTagName('summary')[0] + self.assertEqual(summary.getAttribute('type'), 'html') + def test_custom_feed_generator(self): + response = self.client.get('/syndication/feeds/custom/') + doc = minidom.parseString(response.content) + + feed = doc.firstChild + self.assertEqual(feed.nodeName, 'feed') + self.assertEqual(feed.getAttribute('django'), 'rocks') + self.assertChildNodes(feed, ['title', 'link', 'id', 'updated', 'entry', 'spam']) + + entries = feed.getElementsByTagName('entry') + self.assertEqual(len(entries), Entry.objects.count()) + for entry in entries: + self.assertEqual(entry.getAttribute('bacon'), 'yum') + self.assertChildNodes(entry, ['title', 'link', 'id', 'summary', 'ministry']) + summary = entry.getElementsByTagName('summary')[0] + self.assertEqual(summary.getAttribute('type'), 'html') + def test_complex_base_url(self): """ Tests that that the base url for a complex feed doesn't raise a 500 diff --git a/tests/regressiontests/syndication/urls.py b/tests/regressiontests/syndication/urls.py index ce3b5056ba..f37222d9b5 100644 --- a/tests/regressiontests/syndication/urls.py +++ b/tests/regressiontests/syndication/urls.py @@ -1,10 +1,11 @@ -from feeds import TestRssFeed, TestAtomFeed, ComplexFeed +from feeds import TestRssFeed, TestAtomFeed, TestCustomFeed, ComplexFeed from django.conf.urls.defaults import patterns feed_dict = { 'complex': ComplexFeed, 'rss': TestRssFeed, 'atom': TestAtomFeed, + 'custom': TestCustomFeed, } urlpatterns = patterns('',