"""
A quick stab at generating halfway clean EBNF from pyparsing grammars.

Basically, you setName() as many symbols as you can and then call 
generateForRoot(sym) with your root symbol.  A quick way to add the
names of the variables you keep the symbols in is to generate the
grammar in a function and return something like

	return dict((n, v) for n, v in locals().iteritems()
		if hasattr(v, "setName"))

from it.  You can then call the function addAllNames below on the result.

I go to some length to avoid duplicate rules and to control the number of
artificial names (symNNN).

This can't yet handle all of pyparsing but only what I needed for a
particular grammar.  It may fail on recursive grammars.

To teach it more of pyparsing, define _emit_<pyparsing class name>
functions and, if necessary, more EBNFNode classes.  If it's not
obvious how to do this (it probably isn't), contact m@tfiu.de.

This code is under the GPLv3, or, at your option, any later version.
"""

import sys


class EBNFNode(object):
	"""A base class for entries in Rule's expansions.

	Derived classes should implement a method _simplify that may
	try simplifications.
	"""
	simplified = False

	def simplify(self):
		if self.simplified:
			return
		self.simplified = True
		self._simplify()

	def isSimple(self):
		return False
	
	def _simplify(self):
		pass


class EBNFRegex(EBNFNode):
	"""My personal fantasy of an EBNF regular expression "operator".
	"""
	def __init__(self, re):
		self.re = re
	
	def __str__(self):
		return "RE<%s>"%self.re
	

class EBNFCircumfix(EBNFNode):
	"""EBNF circumfix operators, i.e., [] and {}
	"""
	def __init__(self, openclose, opd):
		self.openclose, self.opd = openclose, opd

	def __str__(self):
		return "%s %s %s"%(self.openclose[0], str(self.opd), self.openclose[1])

	def _simplify(self):
		self.opd.simplify()
		if isinstance(self.opd, Rule) and self.opd.isSimple():
			self.opd = self.opd.decref().expansion
	
	def isSimple(self):
		return True

	def mapRules(self, ruleMap):
		c = self.opd
		if id(c) in ruleMap:
			self.opd = ruleMap[id(c.decref())].incref()
		elif isinstance(c, EBNFNode) and hasattr(c, "mapRules"):
			c.mapRules(ruleMap)


class EBNFInfix(EBNFNode):
	"""EBNF infix operators, i.e., " " and |.
	"""
	def __init__(self, infix, children):
		self.infix, self.children = infix, children

	def __str__(self):
		return self.infix.join(unicode(c) for c in self.children if c is not None)

	def _simplify(self):
		"""folds in selected children to cut down on the number of rules.

		The folded-in children are decreffed.
		"""
		newChildren = []
		for c in self.children:
			if c is None:
				continue
			elif isinstance(c, basestring):
				newChildren.append(c)
			elif isinstance(c, EBNFNode):
				newChildren.append(c)
			else: # We have a rule
				c.simplify()
				if (isinstance(c.expansion, EBNFInfix) 
							and c.expansion.infix==self.infix):
					newChildren.extend(c.expansion.children)
					c.decref()
				elif c.isSimple():
					newChildren.append(c.expansion)
					c.decref()
				else:
					newChildren.append(c)
		self.children = newChildren

	def mapRules(self, ruleMap):
		for ind, c in enumerate(self.children):
			if id(c) in ruleMap:
				c.decref()
				self.children[ind] = ruleMap[id(c)]
				ruleMap[id(c)].incref()
			elif isinstance(c, EBNFNode) and hasattr(c, "mapRules"):
				c.mapRules(ruleMap)


class Rule(object):
	"""A potential rule.

	These have refcounts, and rules with a refcount of 1 are inlined.

	The expansion is set by context's getRule, which also drives the
	builtup of a graph of rules.

	The expansion is either None, a string, or another rule instance.
	"""
	def __init__(self, nonterm):
		self.nonterm, self.expansion = nonterm, None
		self.refcount = 0

	def __repr__(self):
		return self.nonterm

	def incref(self):
		self.refcount += 1
		return self

	def decref(self):
		self.refcount -= 1
		return self

	def simplify(self):
		if hasattr(self.expansion, "simplify"):
			self.expansion.simplify()

	def isSimple(self):
		if self.expansion is None or isinstance(self.expansion, basestring):
			return True
		else:
			return self.expansion.isSimple()
	
	def getEBNF(self):
		return "%s ::= %s"%(self.nonterm, self.expansion)

	def mapRules(self, ruleMap):
		if hasattr(self.expansion, "mapRules"): 
			self.expansion.mapRules(ruleMap)


class Context(object):
	"""An aggregator for the translation state.

	We keep a set of objects already processed, together with a map to
	the names assigned.
	"""
	def __init__(self):
		self.idMap = {}    # maps node ids to rules
		self.anonCount = 0
	
	def getRule(self, node):
		"""returns a rule for the pyparsing object node.

		This will come from a cache if there already is a rule for that node;
		in that case the node's refcount is increased.
		"""
		if id(node) in self.idMap:
			return self.idMap[id(node)].incref()
		if hasattr(node, "name") and not node.name.startswith("Re:"):
			curName = node.name
		else:
			curName = "sym%04d"%self.anonCount
		self.anonCount += 1
		newRule = Rule(curName) 
		self.idMap[id(node)] = newRule.incref()
		newRule.expansion = makeEBNF(node, self)
		return newRule

	def _findDupes(self):
		expansions = {}
		for r in self.idMap.itervalues():
			if r.refcount<1:  # don't bother with unused rules
				continue
			expansions.setdefault(unicode(r.expansion), []).append(r)
		return [e for e in expansions.itervalues() if len(e)>1]

	def _getRepresentative(self, dupeList):
		for ind, d in enumerate(dupeList):
			if not d.nonterm.startswith("sym"):
				return dupeList.pop(ind)
		return dupeList.pop()

	def _removeDupes(self, dupes):
		"""iterates through all rules, asking them to replace dupes by
		the "canonical" rule.

		"canonical" is any rule in the list of dupes as returned by
		_findDupes that has a proper name, and the last one absent this.  
		"""
		undupeMap = {}
		for dupeList in dupes:
			canonical = self._getRepresentative(dupeList)
			for d in dupeList:
				undupeMap[id(d)] = canonical
		for r in self.idMap.itervalues():
			r.mapRules(undupeMap)

	def undupe(self):
		"""joins symbols having the same expansion into one and returns
		if it has done anything.

		This only does one pass.  It is possible that multiple invocations
		of this method will yield further undupes, so you might want
		to call this until it returns False.
		"""
		dupes = self._findDupes()
		if not dupes:
			return False
		self._removeDupes(dupes)
		return True


def _emit_Word(node, ctx):
	return 'WORD("%s")'%node.name

def _emit_Keyword(node, ctx):
	return '"%s"'%node.match

def _emit_Literal(node, ctx):
	return '"%s"'%node.match

def _emit_Suppress(node, ctx):
	return makeEBNF(node.expr, ctx)

def _emit_Regex(node, ctx):
	return EBNFRegex(node.pattern)

def _emit_Empty(node, ctx):
	return None

def _emit_StringEnd(node, ctx):
	return None

def _emit_Optional(node, ctx):
	return EBNFCircumfix("[]", ctx.getRule(node.expr))

def _emit_MatchFirst(node, ctx):
	return EBNFInfix(" | ", [ctx.getRule(node.exprs[0]),
		ctx.getRule(node.exprs[1])])

def _emit_Or(node, ctx):
	return EBNFInfix(" | ", [ctx.getRule(node.exprs[0]),
		ctx.getRule(node.exprs[1])])

def _emit_And(node, ctx):
	return EBNFInfix(" ", [ctx.getRule(node.exprs[0]),
		ctx.getRule(node.exprs[1])])

def _emit_ZeroOrMore(node, ctx):
	return EBNFCircumfix("{}", ctx.getRule(node.expr))

def _emit_OneOrMore(node, ctx):
	return EBNFInfix(" ", [ctx.getRule(node.expr), EBNFCircumfix("{}", 
		ctx.getRule(node.expr))])

def _emit_Forward(node, ctx):
	return ctx.getRule(node.expr)

def _emit_NOIMPL(node, ctx):
	sys.stderr.write("Not implemented: %s\n"%node.__class__.__name__)
	return "NOIMPL"


def makeEBNF(node, ctx):
	return globals().get("_emit_"+node.__class__.__name__, _emit_NOIMPL)(
		node, ctx)


def getRuleStrings(rules):
	"""returns a list of string representations of the Rule instances
	in rules

	This list is sorted alphabetically by symbol names.
	"""
	res = []
	for r in rules:
		if r.refcount<1:
			continue
		res.append(r.getEBNF())
	res.sort()
	return res


def generateForRoot(root):
	ctx = Context()
	rootRule = ctx.getRule(root)
	rootRule.simplify()
	ctx.undupe()
	print "\n".join(sorted(getRuleStrings(ctx.idMap.values())))


def addAllNames(localsDict):
	for n, ob in localsDict.iteritems():
		if hasattr(ob, "setName"):
			try:
				ob.setName(n)
			except TypeError:
				# there's an uninstanciated class in the namespace.  Never mind.
				continue


if __name__=="__main__":
# Test code, this won't work for you
	from gavo.base import unitconv
	unitconv.getUnitGrammar()
	syms = unitconv.getUnitGrammar.symbols
	addAllNames(syms)
	generateForRoot(syms["input"])
