#!/usr/bin/env python

import os
import subprocess
import sys
import time
import urllib
import xml.sax
from email import utils as emailutils
from pysqlite2 import dbapi2 as sqlite
from xml.sax.handler import ContentHandler


DATA_DIR = os.path.expanduser("~/.getpods")
if not os.path.isdir(DATA_DIR):
	os.mkdir(DATA_DIR)


class DownloadedRegistry(list):
	"""A sqlite table containing guids of the podcasts we've already
	pulled.
	"""
	def __init__(self):
		self.conn = sqlite.connect(os.path.join(DATA_DIR, "db"),
			isolation_level=None)
		self.conn.execute("CREATE TABLE IF NOT EXISTS processedguids"
			" (guid TEXT PRIMARY KEY)")
	
	def __contains__(self, guid):
		return bool(list(self.conn.execute(
			"SELECT guid FROM processedguids WHERE guid=?",
			(guid,))))

	def add(self, guid):
		self.conn.execute("INSERT OR IGNORE INTO processedguids"
			" (guid) VALUES (?)", (guid,))

	def getNew(self, guids):
		# Hm -- I probably should be adapting sets/lists.  Let's go manually
		# for now.
		literal = "(%s)"%(('?,'*len(guids))[:-1])
		seenGUIDs = set(row[0] 
			for row in self.conn.execute("SELECT guid FROM processedguids"
				" WHERE guid IN %s"%literal, tuple(guids)))
		return [guid for guid in guids if guid not in seenGUIDs]


podcastList = {
	"forschung": ["http://www.dradio.de/rss/podcast/sendungen/forschak/",
		"http://www.dradio.de/rss/podcast/sendungen/computer/",
		"http://www.dradio.de/rss/podcast/sendungen/wib/"],
	"hintergrund": ["http://www.dradio.de/rss/podcast/"
			"sendungen/hintergrundpolitik/",
		"http://www.dradio.de/rss/podcast/sendungen/hiwi/"],
	"s2wissen": ["http://www1.swr.de/podcast/xml/swr2/wissen.xml",
		"http://www1.swr.de/podcast/xml/swr2/aula.xml"],
	"einewelt": ["http://www.dradio.de/rss/podcast/sendungen/einewelt/"],
	"endederwelt": ["http://www.br-online.de/podcast/ende-der-welt/cast.xml"],
	"sindlas": ["http://www.br-online.de/podcast/buergermeister-sindlasreuth/cast.xml"],
	}


class DictBasedHandler(ContentHandler):
	def __init__(self):
		ContentHandler.__init__(self)
		self.elementStack = []
		self.charCache = []

	def startElement(self, name, attrs):
		self.elementStack.append((name, attrs))
		if name in self.starthandlers:
			self.starthandlers[name](self, name, attrs)
		self.charCache = []
	
	def endElement(self, name):
		_, attrs = self.elementStack.pop()
		if name in self.endhandlers:
			self.endhandlers[name](self, name, attrs, "".join(self.charCache))
		self.charCache = []
	
	def characters(self, chars):
		self.charCache.append(chars)

	def isInsideOf(self, name):
		for elName, _ in self.elementStack:
			if elName==name:
				return True
		return False


class PodcastCollector(DictBasedHandler):
	def __init__(self, basename):
		DictBasedHandler.__init__(self)
		self.basename = basename
		self.podcasts = []

	def _startItem(self, name, attrs):
		self.curItem = {"basename": self.basename}
	
	def _endItem(self, name, attrs, content):
		self.curItem["pubDate"] = emailutils.parsedate_tz(
			self.curItem["pubDate"])[:3]
		self.podcasts.append(self.curItem)
		del self.curItem

	def _setItemProperty(self, name, attrs, content):
		if self.isInsideOf("item"):
			self.curItem[name] = content
	
	def _startEnclosure(self, name, attrs):
		self.curItem["url"] = attrs["url"]

	starthandlers = {
		"item": _startItem,
		"enclosure": _startEnclosure,
	}

	endhandlers = {
		"item": _endItem,
		"title": _setItemProperty,
		"pubDate": _setItemProperty,
		"guid": _setItemProperty,
	}

	def getResult(self):
		return self.podcasts


def getItems(basename, urllist):
	"""returns a list of new items from the URLs in urllist.

	An item is a dict containing, among others, guid, pubDate, and url.
	"""
	result = []
	for url in urllist:
		handler = PodcastCollector(basename)
		xml.sax.parseString(urllib.urlopen(url).read(), handler)
		result.extend(handler.getResult())
	return result


def recodeTo(url, destFName):
	f = urllib.urlopen(url)
	recoder = subprocess.Popen("nice -n 20 /usr/bin/lame --quiet --mp3input "
			"-m m -h -b 32 -".split()+[destFName], stdout=subprocess.PIPE,
			stderr=subprocess.STDOUT, stdin=subprocess.PIPE)
	while not recoder.poll():
		buf = f.read(1000000)
		if not buf:
			break
		recoder.stdin.write(buf)
	recoder.stdin.close()
	res = recoder.wait()
	if res:
		print "Ups:", url, "".join(recoder.stdout.read())


def getNameFor(item):
	index = 0
	year, month, day = item["pubDate"]
	while True:
		fname = "%s%s%02d%02d-%02d.mp3"%(item["basename"], year, 
			int(month), int(day), index)
		if not os.path.exists(fname):
			return fname
		index += 1

def retrieveAndRecode(item):
	recodeTo(item["url"], getNameFor(item))


def filterOutDownloaded(items, registry):
	newGUIDs = set(registry.getNew([item["guid"] for item in items]))
	return [item for item in items if item["guid"] in newGUIDs]


def usage():
	print 'Usage: %s <podcast> ["learn"]'%sys.argv[0]
	print "Available podcasts: ", " ".join(podcastList.keys())


def main():
	if len(sys.argv)==2:
		learn = False
		podcastName = sys.argv[1]
	elif len(sys.argv)==3 and sys.argv[2]=="learn":
		learn = True
		podcastName = sys.argv[1]
	else:
		usage()
		sys.exit(1)
	items = getItems(podcastName, podcastList[podcastName])
	registry = DownloadedRegistry()
	newItems = filterOutDownloaded(items, registry)
	for item in newItems:
		if not learn:
			retrieveAndRecode(item)
		registry.add(item["guid"])


if __name__=="__main__":
	main()

