""" PyRSS2Gen - A Python library for generating RSS 2.0 feeds. (This is the BSD license, based on the template at http://www.opensource.org/licenses/bsd-license.php ) Copyright (c) 2003, Dalke Scientific Software, LLC All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the Dalke Scientific Softare, LLC, Andrew Dalke, nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ __name__ = "PyRSS2Gen" __version__ = (1, 1, 0) __author__ = "Andrew Dalke " _generator_name = __name__ + "-" + ".".join(map(str, __version__)) import datetime import sys if sys.version_info[0] == 3: # Python 3 basestring = str from io import StringIO else: # Python 2 try: from cStringIO import StringIO except ImportError: # Very old (or memory constrained) systems might # have left out the compiled C version. Fall back # to the pure Python one. Haven't seen this sort # of system since the early 2000s. from StringIO import StringIO # Could make this the base class; will need to add 'publish' class WriteXmlMixin: def write_xml(self, outfile, encoding="iso-8859-1"): from xml.sax import saxutils handler = saxutils.XMLGenerator(outfile, encoding) handler.startDocument() self.publish(handler) handler.endDocument() def to_xml(self, encoding="iso-8859-1"): f = StringIO() self.write_xml(f, encoding) return f.getvalue() def _element(handler, name, obj, d={}): if isinstance(obj, basestring) or obj is None: # special-case handling to make the API easier # to use for the common case. handler.startElement(name, d) if obj is not None: handler.characters(obj) handler.endElement(name) else: # It better know how to emit the correct XML. obj.publish(handler) def _opt_element(handler, name, obj): if obj is None: return _element(handler, name, obj) def _format_date(dt): """convert a datetime into an RFC 822 formatted date Input date must be in GMT. """ # Looks like: # Sat, 07 Sep 2002 00:00:01 GMT # Can't use strftime because that's locale dependent # # Isn't there a standard way to do this for Python? The # rfc822 and email.Utils modules assume a timestamp. The # following is based on the rfc822 module. return "%s, %02d %s %04d %02d:%02d:%02d GMT" % ( ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()], dt.day, ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month - 1], dt.year, dt.hour, dt.minute, dt.second) ## # A couple simple wrapper objects for the fields which # take a simple value other than a string. class IntElement: """implements the 'publish' API for integers Takes the tag name and the integer value to publish. (Could be used for anything which uses str() to be published to text for XML.) """ element_attrs = {} def __init__(self, name, val): self.name = name self.val = val def publish(self, handler): handler.startElement(self.name, self.element_attrs) handler.characters(str(self.val)) handler.endElement(self.name) class DateElement: """implements the 'publish' API for a datetime.datetime Takes the tag name and the datetime to publish. Converts the datetime to RFC 2822 timestamp (4-digit year). """ def __init__(self, name, dt): self.name = name self.dt = dt def publish(self, handler): _element(handler, self.name, _format_date(self.dt)) #### class Category: """Publish a category element""" def __init__(self, category, domain=None): self.category = category self.domain = domain def publish(self, handler): d = {} if self.domain is not None: d["domain"] = self.domain _element(handler, "category", self.category, d) class Cloud: """Publish a cloud""" def __init__(self, domain, port, path, registerProcedure, protocol): self.domain = domain self.port = port self.path = path self.registerProcedure = registerProcedure self.protocol = protocol def publish(self, handler): _element(handler, "cloud", None, { "domain": self.domain, "port": str(self.port), "path": self.path, "registerProcedure": self.registerProcedure, "protocol": self.protocol}) class Image: """Publish a channel Image""" element_attrs = {} def __init__(self, url, title, link, width=None, height=None, description=None): self.url = url self.title = title self.link = link self.width = width self.height = height self.description = description def publish(self, handler): handler.startElement("image", self.element_attrs) _element(handler, "url", self.url) _element(handler, "title", self.title) _element(handler, "link", self.link) width = self.width if isinstance(width, int): width = IntElement("width", width) _opt_element(handler, "width", width) height = self.height if isinstance(height, int): height = IntElement("height", height) _opt_element(handler, "height", height) _opt_element(handler, "description", self.description) handler.endElement("image") class Guid: """Publish a guid Defaults to being a permalink, which is the assumption if it's omitted. Hence strings are always permalinks. """ def __init__(self, guid, isPermaLink=1): self.guid = guid self.isPermaLink = isPermaLink def publish(self, handler): d = {} if self.isPermaLink: d["isPermaLink"] = "true" else: d["isPermaLink"] = "false" _element(handler, "guid", self.guid, d) class TextInput: """Publish a textInput Apparently this is rarely used. """ element_attrs = {} def __init__(self, title, description, name, link): self.title = title self.description = description self.name = name self.link = link def publish(self, handler): handler.startElement("textInput", self.element_attrs) _element(handler, "title", self.title) _element(handler, "description", self.description) _element(handler, "name", self.name) _element(handler, "link", self.link) handler.endElement("textInput") class Enclosure: """Publish an enclosure""" def __init__(self, url, length, type): self.url = url self.length = length self.type = type def publish(self, handler): _element(handler, "enclosure", None, {"url": self.url, "length": str(self.length), "type": self.type, }) class Source: """Publish the item's original source, used by aggregators""" def __init__(self, name, url): self.name = name self.url = url def publish(self, handler): _element(handler, "source", self.name, {"url": self.url}) class SkipHours: """Publish the skipHours This takes a list of hours, as integers. """ element_attrs = {} def __init__(self, hours): self.hours = hours def publish(self, handler): if self.hours: handler.startElement("skipHours", self.element_attrs) for hour in self.hours: _element(handler, "hour", str(hour)) handler.endElement("skipHours") class SkipDays: """Publish the skipDays This takes a list of days as strings. """ element_attrs = {} def __init__(self, days): self.days = days def publish(self, handler): if self.days: handler.startElement("skipDays", self.element_attrs) for day in self.days: _element(handler, "day", day) handler.endElement("skipDays") class RSS2(WriteXmlMixin): """The main RSS class. Stores the channel attributes, with the "category" elements under ".categories" and the RSS items under ".items". """ rss_attrs = {"version": "2.0"} element_attrs = {} def __init__(self, title, link, description, language=None, copyright=None, managingEditor=None, webMaster=None, pubDate=None, # a datetime, *in* *GMT* lastBuildDate=None, # a datetime categories=None, # list of strings or Category generator=_generator_name, docs="http://blogs.law.harvard.edu/tech/rss", cloud=None, # a Cloud ttl=None, # integer number of minutes image=None, # an Image rating=None, # a string; I don't know how it's used textInput=None, # a TextInput skipHours=None, # a SkipHours with a list of integers skipDays=None, # a SkipDays with a list of strings items=None, # list of RSSItems ): self.title = title self.link = link self.description = description self.language = language self.copyright = copyright self.managingEditor = managingEditor self.webMaster = webMaster self.pubDate = pubDate self.lastBuildDate = lastBuildDate if categories is None: categories = [] self.categories = categories self.generator = generator self.docs = docs self.cloud = cloud self.ttl = ttl self.image = image self.rating = rating self.textInput = textInput self.skipHours = skipHours self.skipDays = skipDays if items is None: items = [] self.items = items def publish(self, handler): handler.startElement("rss", self.rss_attrs) handler.startElement("channel", self.element_attrs) _element(handler, "title", self.title) _element(handler, "link", self.link) _element(handler, "description", self.description) self.publish_extensions(handler) _opt_element(handler, "language", self.language) _opt_element(handler, "copyright", self.copyright) _opt_element(handler, "managingEditor", self.managingEditor) _opt_element(handler, "webMaster", self.webMaster) pubDate = self.pubDate if isinstance(pubDate, datetime.datetime): pubDate = DateElement("pubDate", pubDate) _opt_element(handler, "pubDate", pubDate) lastBuildDate = self.lastBuildDate if isinstance(lastBuildDate, datetime.datetime): lastBuildDate = DateElement("lastBuildDate", lastBuildDate) _opt_element(handler, "lastBuildDate", lastBuildDate) for category in self.categories: if isinstance(category, basestring): category = Category(category) category.publish(handler) _opt_element(handler, "generator", self.generator) _opt_element(handler, "docs", self.docs) if self.cloud is not None: self.cloud.publish(handler) ttl = self.ttl if isinstance(self.ttl, int): ttl = IntElement("ttl", ttl) _opt_element(handler, "ttl", ttl) if self.image is not None: self.image.publish(handler) _opt_element(handler, "rating", self.rating) if self.textInput is not None: self.textInput.publish(handler) if self.skipHours is not None: self.skipHours.publish(handler) if self.skipDays is not None: self.skipDays.publish(handler) for item in self.items: item.publish(handler) handler.endElement("channel") handler.endElement("rss") def publish_extensions(self, handler): # Derived classes can hook into this to insert # output after the three required fields. pass class RSSItem(WriteXmlMixin): """Publish an RSS Item""" element_attrs = {} def __init__(self, title=None, # string link=None, # url as string description=None, # string author=None, # email address as string categories=None, # list of string or Category comments=None, # url as string enclosure=None, # an Enclosure guid=None, # a unique string pubDate=None, # a datetime source=None, # a Source ): if title is None and description is None: raise TypeError( "must define at least one of 'title' or 'description'") self.title = title self.link = link self.description = description self.author = author if categories is None: categories = [] self.categories = categories self.comments = comments self.enclosure = enclosure self.guid = guid self.pubDate = pubDate self.source = source # It sure does get tedious typing these names three times... def publish(self, handler): handler.startElement("item", self.element_attrs) _opt_element(handler, "title", self.title) _opt_element(handler, "link", self.link) self.publish_extensions(handler) _opt_element(handler, "description", self.description) _opt_element(handler, "author", self.author) for category in self.categories: if isinstance(category, basestring): category = Category(category) category.publish(handler) _opt_element(handler, "comments", self.comments) if self.enclosure is not None: self.enclosure.publish(handler) _opt_element(handler, "guid", self.guid) pubDate = self.pubDate if isinstance(pubDate, datetime.datetime): pubDate = DateElement("pubDate", pubDate) _opt_element(handler, "pubDate", pubDate) if self.source is not None: self.source.publish(handler) handler.endElement("item") def publish_extensions(self, handler): # Derived classes can hook into this to insert # output after the title and link elements pass