#!/usr/bin/python ############################################################################## ## A Python module for interacting with del.icio.us, via their REST API. ## This isn't a full implementation of the del.icio.us API - only enough ## methods have been written to support my del.icio.us -> Movable Type ## script. ## ## By Andy Reitz , February 7th, 2008. ## URL: http://redefine.dyndns.org/~andyr/blog/archives/2008/02/delish2mt.html ############################################################################## # Need this for the 'urlencode()' method. import urllib # Get an HTTPS URL (with GET-style parameters). Cribbed from http://docs.python.org/lib/urllib2-examples.html import urllib2 # XML parsing. Cribbed from http://docs.python.org/lib/expat-example.html import xml.parsers.expat # For extracting the domain from the API endpoint & combining the API endpoint with specific API methods. from urlparse import urlparse, urljoin from time import sleep class DeliciousWS: __debug = False api_endpoint = "https://www.delicious.com/v1/" api_realm = 'del.icio.us API' username = "" password = "" tag = "" # This is where we store the links that are parsed out of the XML. parsedLinks = [] def __init__(self, username, password): self.username = username self.password = password self.parsedLinks = [] def setAPIEndpoint(self, url): if not url.endswith('/'): url = url + '/' self.api_endpoint = url def getAPIEndpoint(self): return (self.api_endpoint) def setAPIRealm(self, realm): self.api_realm = realm def getAPIRealm(self): return (self.api_realm) def setDebug(self, d): self.__debug = d def __dbg(self, s): if self.__debug: print "DBG: %s" % s.encode('ascii', 'replace') # # Give a specific tag, this method will return all of the links from your # del.icio.us account which match. The links will be returned as a list of # dictionaries. Each dictionary should have the following keys: # # * hash - unique identifier for this link on del.icio.us # * href - the URL for the link # * extended - any notes that accompany the link # * description - the title of the link # * tag - any tags associated with the link # * time - time added (or modified) # def fetchLinksForTag(self, tag): self.tag = tag # Create an OpenerDirector with support for Basic HTTP Authentication... auth_handler = urllib2.HTTPBasicAuthHandler() parsed_url = urlparse(self.api_endpoint) auth_handler.add_password(self.api_realm, parsed_url.hostname, self.username, self.password) opener = urllib2.build_opener(auth_handler) opener.addheaders = [('User-agent', 'delicious2mtblog v0.2 http://redefine.dyndns.org/~andyr')] # ...and install it globally so it can be used with urlopen. urllib2.install_opener(opener) url = urljoin(self.api_endpoint, "posts/recent?tag=%s" % tag) self.__dbg("Fetching links from URL: %s" % url) f = urllib2.urlopen(url) return (self.parseDeliciousXML(f.read())) ## ## Here is an example of the XML that can come back from the above call: ## ## ## ## ## ## ## All of the data is in attributes on the "post" tag - not enclosed by tags ## themselves. So, in my parsing code below, all of the logic that I need to ## extract the data is in the 'start_element()' method. ## def parseDeliciousXML (self, xmlresp): p = xml.parsers.expat.ParserCreate() p.StartElementHandler = self.start_element p.EndElementHandler = self.end_element p.CharacterDataHandler = self.char_data self.__dbg("============== BEGIN XML PARSE ================") p.Parse(xmlresp) self.__dbg("============== END XML PARSE ================") return (self.parsedLinks) # 3 handler functions for the XML parsing - called exclusively by # 'parseDeliciousXML()'. def start_element(self, name, attrs): self.__dbg("Start element; name: %s; attrs: %s" % (name, attrs)) if name == "post": self.__dbg("** In a post element **") self.__dbg("** hash: %s **" % attrs['hash']) self.__dbg("** href: %s **" % attrs['href']) self.__dbg("** description: %s **" % attrs['description']) self.parsedLinks.append(attrs) def end_element(self, name): self.__dbg("End element: %s" % name) def char_data(self, data): self.__dbg("Character data: %s" % repr(data)) # Inspired by http://infohost.nmt.edu/tcc/projects/tccwebstats/ims/asciifyChar.html # # If we happen to get Unicode back from Del.icio.us in the XML, it will be # encoded by Python as UTF-16. Unfortunately, I couldn't find a routine in # the Python standard library that would properly URI-encode these values. # So, I wrote my own. I examined (using Wireshark) how Safari sends # Unicode values back to Del.ici.ous (it basically converts to UTF-8, and # then URI-enodes each byte), and I replicate that behavior here. def quoteUriUTF16(self, str): nstr = "" # Iterate over each character in the sting. Probably slow and dumb, but # easy on me. for c in str: if ord(c) < 0x80: nstr = nstr + urllib.quote(c) else: # ALERT! Unicode ahead. It looks like the XML parser is passing me # UTF-16. I don't know to deal with that in a way that gets me the URI # encoding that I want, so I'm going to convert this to UTF-8 first, # and then encode each byte. Thanks to: # http://www.reportlab.com/i18n/python_unicode_tutorial.html tmp = "" for rawbyte in c.encode('utf8'): # The '%%' prints out a literal '%' sign. The '%02X' prints out # the number in hex, with a leading zero if necessary, using # capital letters. tmp = tmp + "%%%02X" % ord(rawbyte) nstr = nstr + tmp return (nstr) # Given a set of del.icio.us links (as returned by 'fetchLinksForTag()'), # an old tag name, and a new tag name, this method will iterate over each # link, and change the old tag name to the new tag name. For example, if # you submit three links that are tagged with the tag 'myblog', this # method will modify each one, to be tagged with the string in "newtag" # (such as 'myblog-posted'). def modifyTagForLinks(self, newLinks, oldtag, newtag): for link in newLinks: self.__dbg("** Changing tag on '%s'" % link['href']) url = "url=%s" % urllib.quote(link['href']) # Need to do the unicode quoting thing on these next two, because they # *could* contain Unicode characters. If they don't, the # 'quoteUriUTF16()' method will still encode them fine. description = "description=%s" % self.quoteUriUTF16(link['description']) extended = "extended=%s" % self.quoteUriUTF16(link['extended']) # Preserve the time stamp. time = "time=%s" % urllib.quote(link['time']) # Need to preserve the other tags on this link. tags = link['tag'].split(" ") newtags = "" for tag in tags: if tag == oldtag: newtags += "%s " % newtag else: newtags += tag + " " # This is where the URL is built-up to send to del.icio.us. params = "?" + url + "&" + description + "&" + extended + "&replace=yes&" + time + "&tags=" + self.quoteUriUTF16(newtags) self.__dbg("** del.icio.us params -> %s **" % params) # Basically, if you add a link that already exists, and specify the # 'replace=yes' parameter, than the del.icio.us 'add' method will do a # replace instead. url = urljoin(self.api_endpoint, "posts/add%s" % params) if self.api_endpoint.find("pinboard") >= 0: self.__dbg("Pinboard requires 3 seconds in between API calls") sleep(3) self.__dbg("Modifying tags on links via URL: %s" % url) repost = urllib2.urlopen(url) self.__dbg(repost.read()) self.__dbg("==============================")