"""
Some convenience classes to make writing SAX handlers more convenient.
"""

import re
import sys
import traceback
import weakref
import xml.sax
from xml.sax.handler import ContentHandler


class Error(Exception):
	pass


class StartEndHandler(ContentHandler):
	"""This class provides startElement, endElement and characters
	methods that translate events into method calls.

	When an opening tag is seen, we look of a _start_<element name>
	method and, if present, call it with the name and the attributes. 
	When a closing tag is seen, we try to call _end_<element name> with
	name, attributes and contents.	If the _end_xxx method returns a
	string (or similar), this value will be added to the content of the
	enclosing element.
	"""
	def __init__(self):
		ContentHandler.__init__(self)
		self.realHandler = weakref.proxy(self)
		self.elementStack = []
		self.contentsStack = [[]]

	def processingInstruction(self, target, data):
		self.contentsStack[-1].append(data)

	def cleanupName(self, name, 
			cleanupPat=re.compile(".*:")):  #Nuke namespaces
		return cleanupPat.sub("", name).replace("-", "_")

	def startElement(self, name, attrs):
		self.contentsStack.append([])
		name = self.cleanupName(name)
		self.elementStack.append((name, attrs))
		if hasattr(self.realHandler, "_start_%s"%name):
			getattr(self.realHandler, "_start_%s"%name)(name, attrs)
		elif hasattr(self, "_defaultStart"):
			self._defaultStart(name, attrs)

	def endElement(self, name, suppress=False):
		contents = "".join(self.contentsStack.pop())
		name = self.cleanupName(name)
		_, attrs = self.elementStack.pop()
		res = None
		if hasattr(self.realHandler, "_end_%s"%name):
			res = getattr(self.realHandler,
				"_end_%s"%name)(name, attrs, contents)
		elif hasattr(self, "_defaultEnd"):
			res = self._defaultEnd(name, attrs, contents)
		if isinstance(res, basestring) and not suppress:
			self.contentsStack[-1].append(res)

	def characters(self, chars):
		self.contentsStack[-1].append(chars)
	
	def getResult(self):
		return self.contentsStack[0][0]

	def getParentTag(self):
		if self.elementStack:
			return self.elementStack[-1][0]


class NamedNode(object):
	"""is a helper class for NodeBuilder to change node names from
	handling functions.
	"""
	def __init__(self, name, node):
		self.name, self.node = name, node

class SimpleNodeBuilder(ContentHandler):
	"""a node builder is a content handler working more along the
	lines of conventional parse tree builders.

	This means that for every element we want handled, there is a
	method _make_<elementname>(name, attrs, children) that receives
	the name of the element (so you can reuse implementations for
	elements behaving analogously), the attributes of the element
	and a list of children.  The children come in a list of tuples
	(name, content), where name is the element name and content
	is whatever the _make_x method returned for that element.
	Text nodes have a name of None.

	If a method _make_default is defined, it will be called if no handler
	for a node is defined.  Otherwise, elements with no handlers are errors.

	In general, text children are deleted when they are whitespace
	only, and they are joined to form a single one.  However, you
	can define a set keepWhitespaceNames containing the names of
	elements for which this is not wanted.  Don't add to the class-level
	set, make a new one.

	On errors during node construction, the class will call a
	handleError method with a sys.exc_info tuple.  The default
	implementation exits with an error message.

	If the parser calls the setDocumentLocator method, its result is available
	as the locator attribute.
	"""
	keepWhitespaceNames = set()

	def __init__(self):
		ContentHandler.__init__(self)
		self.elementStack = []
		self.childStack = [[]]
		self.locator = None

	def handleError(self, exc_info):
		msg = ("Error while parsing XML at"
			" %d:%d (%s)"%(self.locator.getLineNumber(), 
				self.locator.getColumnNumber(), exc_info[1]))
		raise Error(msg)

	def setDocumentLocator(self, locator):
		self.locator = locator

	def startElement(self, name, attrs):
		self.elementStack.append((name, attrs))
		self.childStack.append([])

	def _enterNewNode(self, name, attrs, newNode):
			if isinstance(newNode, NamedNode):
				newChild = (newNode.name, newNode.node)
			else:
				newChild = (name, newNode)
			self.childStack[-1].append(newChild)

	def endElement(self, name):
		_, attrs = self.elementStack.pop()
		try:
			children = self.childStack.pop()
			if name not in self.keepWhitespaceNames:
				children = self._cleanTextNodes(children)
			if hasattr(self, "_make_"+name):
				handler = getattr(self, "_make_"+name)
			elif hasattr(self, "_make_default"):
				handler = getattr(self, "_make_default")
			else:
				raise Error("No handler for %s"%name)
			newNode = handler(name, attrs, children)
			if newNode is not None:
				self._enterNewNode(name, attrs, newNode)
		except:
			self.handleError(sys.exc_info())

	def characters(self, content):
		self.childStack[-1].append((None, content))

	def getResult(self):
		return self.childStack[0][0][1]

	def _cleanTextNodes(self, children):
		"""joins adjacent text nodes and prunes whitespace-only nodes.
		"""
		cleanedChildren, text = [], []
		for type, node in children:
			if type is None:
				text.append(node)
			else:
				if text:
					chars = "".join(text)
					if chars.strip():
						cleanedChildren.append((None, chars))
					text = []
				cleanedChildren.append((type, node))
		chars = "".join(text)
		if chars.strip():
			cleanedChildren.append((None, chars))
		return cleanedChildren

	def getContentWS(self, children):
		"""returns the entire text content of the node in a string without doing
		whitespace normalization.
		"""
		return "".join([n[1] for n in children if n[0] is None])

	def getContent(self, children):
		"""returns the entire text content of the node in a string.

		This probably won't do what you want in mixed-content models.
		"""
		return self.getContentWS(children).strip()


class NodeBuilder(SimpleNodeBuilder):
	"""is a SimpleNodeBuilder with some additional nifty features.

	NodeBuilders support a limited id/idref mechanism.  Nodes with
	id will get entered in a dictionary and can be retrieved (as
	name/node pairs) via getById.  However, this only is possible
	after the element with the id has been closed.	There is no
	forward declaration.

	In some cases, you want parents provide information to their
	children while they are constructed.  This is a bit clumsy, but
	for such cases, you can define a _start_<element> method that can
	leave something in a dictionary through the pushProperty method
	that can be retrieved by children through the getProperty method.
	When constructing the parent node, you must call popProperty
	on this.

	As an added hack, you can register nodes for
	addition to the nearest enclosing element of a type via
	registerDelayedChildren.  This is provided to allow methods
	change the tree higher up if necessary; Here's an example:
	You have <foo><bar><baz/></bar></foo>, and the handler for
	baz decides it wants to have a Bla sibling.  It can then call
	registerDelayedChild("bar", Bla()).
	"""

	def __init__(self):
		NodeBuilder.__init__(self)
		self.delayedChildren = {}
		self.properties = {}
		self.elementsById = {}

	def registerDelayedChild(self, parentName, child, atfront=False):
		"""adds child for addition to the next enclosing parentName element.
		"""
		if not self.delayedChildren.has_key(parentName):
			self.delayedChildren[parentName] = ([], [])
		if atfront:
			self.delayedChildren[parentName][0].append(child)
		else:
			self.delayedChildren[parentName][1].append(child)

	def pushProperty(self, propName, value):
		"""makes value available to node constructors under the name propName.

		It is recommended to use <element>.<name> as propname.
		"""
		self.properties.setdefault(propName, []).append(value)
	
	def popProperty(self, propName):
		"""retrieves (and removes) the last value pushed as propName.
		"""
		return self.properties[propName].pop()
	
	def getProperty(self, propName):
		"""returns the current value of the property propName.

		Non-existing properties will be signalled by raising an IndexError.
		"""
		try:
			return self.properties[propName][-1]
		except (IndexError, KeyError):
			raise IndexError("Property %s is not set"%propName)

	def startElement(self, name, attrs):
		SimpleNodeBuilder.startElement(self, name, attrs)
		if hasattr(self, "_start_"+name):
			getattr(self, "_start_"+name)(name, attrs)

	def _enterNewNode(self, name, attrs, newNode):
		SimpleNodeBuilder.enterNewNode(self, name, attrs, newNode)
		if attrs.has_key("id"):
			if attrs["id"] in self.elementsById:
				raise Error("Duplicate id: %s"%attrs["id"])
			self.elementsById[attrs["id"]] = newChild

	def endElement(self, name, attrs):
		if self.delayedChildren.has_key(name):
			self.childStack[-1][:0] = self.delayedChildren[name][0]
			self.childStack[-1].extend(self.delayedChildren[name][1])
			del self.delayedChildren[name]
		return SimpleNodeBuilder.endElement(self, name, attrs)

	def getById(self, id):
		return self.elementsById[id]
	
	def getNodesDict(self, children):
		"""returns children as a dictionary of lists.

		children is a list of the type passed to the _make_xxx methods.
		"""
		res = {}
		for name, val in children:
			res.setdefault(name, []).append(val)

	def getWaitingChild(self, nodeName, maxLevels=100, startLevel=-1):
		"""returns the first child with nodeName waiting to be adopted in the
		childStack.

		maxLevels gives a maximum number of childStack levels we descend
		before giving up.

		If no matching child can be found, a NoWaitingChild exception is raised.
		"""
		if maxLevels==0:
			raise NoWaitingChild(nodeName)
		for name, element in self.childStack[startLevel]:
			if name==nodeName:
				return element
		return getWaitingChild(nodeName, maxLevels-1, startLevel-1)

	def filterChildren(self, children, targetNode):
		"""returns a list of children that are of type targetNode, and a
		list of all other children.
		"""
		return [child for child in children if child[0]==targetNode
			], [child for child in children if child[0]!=targetNode]

	def _processChildren(self, parent, name, childMap, children, 
			ignoreUnknownElements=False):
		"""adds children to parent.

		Parent is some class (usually a record.Record instance),
		childMap maps child names to methods to call the children with,
		and children is a sequence as passed to the _make_xxx methods.

		The function returns parent for convenience.
		"""
		for childName, val in children:
			try:
				childMap[childName](val)
			except KeyError:
				traceback.print_exc()
				if not ignoreUnknownElements:
					raise Error("%s elements may not have %s children"%(
						name, childName))
		return parent


def makeAttString(atts, ignoredAttrs=set()):
	attString = " ".join(["%s=%s"%(key, repr(str(value))) 
		for key, value in dict(atts).iteritems() if not key in ignoredAttrs])
	if attString:
		attString = " "+attString
	return attString


def parse(input, procClass, *args, **kwargs):
	proc = procClass(*args, **kwargs)
	xml.sax.parseString(input, proc)
	return proc.getResult()

