pubsubclient: 89a6cec59a0389bc5790a8133897bcb88b129bf1

     1: ##   simplexml.py based on Mattew Allum's xmlstream.py
     2: ##
     3: ##   Copyright (C) 2003-2005 Alexey "Snake" Nezhdanov
     4: ##
     5: ##   This program is free software; you can redistribute it and/or modify
     6: ##   it under the terms of the GNU General Public License as published by
     7: ##   the Free Software Foundation; either version 2, or (at your option)
     8: ##   any later version.
     9: ##
    10: ##   This program is distributed in the hope that it will be useful,
    11: ##   but WITHOUT ANY WARRANTY; without even the implied warranty of
    12: ##   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13: ##   GNU General Public License for more details.
    14: 
    15: # $Id: simplexml.py,v 1.27 2005/04/30 07:20:27 snakeru Exp $
    16: 
    17: """Simplexml module provides xmpppy library with all needed tools to handle XML nodes and XML streams.
    18: I'm personally using it in many other separate projects. It is designed to be as standalone as possible."""
    19: 
    20: import xml.parsers.expat
    21: 
    22: def XMLescape(txt):
    23: 	"""Returns provided string with symbols & < > " replaced by their respective XML entities."""
    24: 	# replace also FORM FEED and ESC, because they are not valid XML chars
    25: 	return txt.replace("&", "&").replace("<", "<").replace(">", ">").replace('"', """).replace(u'\x0C', "").replace(u'\x1B', "")
    26: 
    27: ENCODING='utf-8'
    28: def ustr(what):
    29: 	"""Converts object "what" to unicode string using it's own __str__ method if accessible or unicode method otherwise."""
    30: 	if type(what) == type(u''): return what
    31: 	try: r=what.__str__()
    32: 	except AttributeError: r=str(what)
    33: 	if type(r)<>type(u''): return unicode(r,ENCODING)
    34: 	return r
    35: 
    36: class Node(object):
    37: 	""" Node class describes syntax of separate XML Node. It have a constructor that permits node creation
    38: 		from set of "namespace name", attributes and payload of text strings and other nodes.
    39: 		It does not natively support building node from text string and uses NodeBuilder class for that purpose.
    40: 		After creation node can be mangled in many ways so it can be completely changed.
    41: 		Also node can be serialised into string in one of two modes: default (where the textual representation
    42: 		of node describes it exactly) and "fancy" - with whitespace added to make indentation and thus make
    43: 		result more readable by human.
    44: 
    45: 		Node class have attribute FORCE_NODE_RECREATION that is defaults to False thus enabling fast node
    46: 		replication from the some other node. The drawback of the fast way is that new node shares some
    47: 		info with the "original" node that is changing the one node may influence the other. Though it is
    48: 		rarely needed (in xmpppy it is never needed at all since I'm usually never using original node after
    49: 		replication (and using replication only to move upwards on the classes tree).
    50: 	"""
    51: 	FORCE_NODE_RECREATION=0
    52: 	def __init__(self, tag=None, attrs={}, payload=[], parent=None, node=None):
    53: 		""" Takes "tag" argument as the name of node (prepended by namespace, if needed and separated from it
    54: 			by a space), attrs dictionary as the set of arguments, payload list as the set of textual strings
    55: 			and child nodes that this node carries within itself and "parent" argument that is another node
    56: 			that this one will be the child of. Also the __init__ can be provided with "node" argument that is 
    57: 			either a text string containing exactly one node or another Node instance to begin with. If both
    58: 			"node" and other arguments is provided then the node initially created as replica of "node"
    59: 			provided and then modified to be compliant with other arguments."""
    60: 		if node:
    61: 			if self.FORCE_NODE_RECREATION and isinstance(node, Node): 
    62: 				node=str(node)
    63: 			if not isinstance(node, Node): 
    64: 				node=NodeBuilder(node,self)
    65: 			else:
    66: 				self.name,self.namespace,self.attrs,self.data,self.kids,self.parent = node.name,node.namespace,{},[],[],node.parent
    67: 				for key  in node.attrs.keys(): self.attrs[key]=node.attrs[key]
    68: 				for data in node.data: self.data.append(data)
    69: 				for kid  in node.kids: self.kids.append(kid)
    70: 		else: self.name,self.namespace,self.attrs,self.data,self.kids,self.parent = 'tag','',{},[],[],None
    71: 
    72: 		if tag: self.namespace, self.name = ([self.namespace]+tag.split())[-2:]
    73: 		if parent: self.parent = parent
    74: 		if self.parent and not self.namespace: self.namespace=self.parent.namespace
    75: 		for attr in attrs.keys():
    76: 			self.attrs[attr]=attrs[attr]
    77: 		if isinstance(payload, basestring): payload=[payload]
    78: 		for i in payload:
    79: 			if isinstance(i, Node): self.addChild(node=i)
    80: 			else: self.data.append(ustr(i))
    81: 
    82: 	def __str__(self,fancy=0):
    83: 		""" Method used to dump node into textual representation.
    84: 			if "fancy" argument is set to True produces indented output for readability."""
    85: 		s = (fancy-1) * 2 * ' ' + "<" + self.name
    86: 		if self.namespace:
    87: 			if not self.parent or self.parent.namespace!=self.namespace:
    88: 				s = s + ' xmlns="%s"'%self.namespace
    89: 		for key in self.attrs.keys():
    90: 			val = ustr(self.attrs[key])
    91: 			s = s + ' %s="%s"' % ( key, XMLescape(val) )
    92: 		s = s + ">"
    93: 		cnt = 0 
    94: 		if self.kids:
    95: 			if fancy: s = s + "\n"
    96: 			for a in self.kids:
    97: 				if not fancy and (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt])
    98: 				elif (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt].strip())
    99: 				s = s + a.__str__(fancy and fancy+1)
   100: 				cnt=cnt+1
   101: 		if not fancy and (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt])
   102: 		elif (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt].strip())
   103: 		if not self.kids and s[-1:]=='>':
   104: 			s=s[:-1]+' />'
   105: 			if fancy: s = s + "\n"
   106: 		else:
   107: 			if fancy and not self.data: s = s + (fancy-1) * 2 * ' '
   108: 			s = s + "</" + self.name + ">"
   109: 			if fancy: s = s + "\n"
   110: 		return s
   111: 	def addChild(self, name=None, attrs={}, payload=[], namespace=None, node=None):
   112: 		""" If "node" argument is provided, adds it as child node. Else creates new node from
   113: 			the other arguments' values and adds it as well."""
   114: 		if namespace: name=namespace+' '+name
   115: 		if node:
   116: 			newnode=node
   117: 			node.parent = self
   118: 		else: newnode=Node(tag=name, parent=self, attrs=attrs, payload=payload)
   119: 		self.kids.append(newnode)
   120: 		return newnode
   121: 	def addData(self, data):
   122: 		""" Adds some CDATA to node. """
   123: 		self.data.append(ustr(data))
   124: 	def clearData(self):
   125: 		""" Removes all CDATA from the node. """
   126: 		self.data=[]
   127: 	def delAttr(self, key):
   128: 		""" Deletes an attribute "key" """
   129: 		del self.attrs[key]
   130: 	def delChild(self, node, attrs={}):
   131: 		""" Deletes the "node" from the node's childs list, if "node" is an instance.
   132: 			Else deletes the first node that have specified name and (optionally) attributes. """
   133: 		if not isinstance(node, Node): node=self.getTag(node,attrs)
   134: 		self.kids.remove(node)
   135: 		return node
   136: 	def getAttrs(self):
   137: 		""" Returns all node's attributes as dictionary. """
   138: 		return self.attrs
   139: 	def getAttr(self, key):
   140: 		""" Returns value of specified attribute. """
   141: 		try: return self.attrs[key]
   142: 		except: return None
   143: 	def getChildren(self):
   144: 		""" Returns all node's child nodes as list. """
   145: 		return self.kids
   146: 	def getData(self):
   147: 		""" Returns all node CDATA as string (concatenated). """
   148: 		return ''.join(self.data)
   149: 	def getName(self):
   150: 		""" Returns the name of node """
   151: 		return self.name
   152: 	def getNamespace(self):
   153: 		""" Returns the namespace of node """
   154: 		return self.namespace
   155: 	def getParent(self):
   156: 		""" Returns the parent of node (if present). """
   157: 		return self.parent
   158: 	def getPayload(self):
   159: 		""" Return the payload of node i.e. list of child nodes and CDATA entries.
   160: 			F.e. for "<node>text1<nodea/><nodeb/> text2</node>" will be returned list:
   161: 			['text1', <nodea instance>, <nodeb instance>, ' text2']. """
   162: 		ret=[]
   163: 		for i in range(len(self.kids)+len(self.data)+1):
   164: 			try:
   165: 				if self.data[i]: ret.append(self.data[i])
   166: 			except IndexError: pass
   167: 			try: ret.append(self.kids[i])
   168: 			except IndexError: pass
   169: 		return ret
   170: 	def getTag(self, name, attrs={}, namespace=None): 
   171: 		""" Filters all child nodes using specified arguments as filter.
   172: 			Returns the first found or None if not found. """
   173: 		return self.getTags(name, attrs, namespace, one=1)
   174: 	def getTagAttr(self,tag,attr):
   175: 		""" Returns attribute value of the child with specified name (or None if no such attribute)."""
   176: 		try: return self.getTag(tag).attrs[attr]
   177: 		except: return None
   178: 	def getTagData(self,tag):
   179: 		""" Returns cocatenated CDATA of the child with specified name."""
   180: 		try: return self.getTag(tag).getData()
   181: 		except: return None
   182: 	def getTags(self, name, attrs={}, namespace=None, one=0):
   183: 		""" Filters all child nodes using specified arguments as filter.
   184: 			Returns the list of nodes found. """
   185: 		nodes=[]
   186: 		for node in self.kids:
   187: 			if namespace and namespace<>node.getNamespace(): continue
   188: 			if node.getName() == name:
   189: 				for key in attrs.keys():
   190: 				   if not node.attrs.has_key(key) or node.attrs[key]<>attrs[key]: break
   191: 				else: nodes.append(node)
   192: 			if one and nodes: return nodes[0]
   193: 		if not one: return nodes
   194: 	
   195: 	def iterTags(self, name, attrs={}, namespace=None):
   196: 		""" Iterate over all children using specified arguments as filter. """
   197: 		for node in self.kids:
   198: 			if namespace is not None and namespace!=node.getNamespace(): continue
   199: 			if node.getName() == name:
   200: 				for key in attrs.keys():
   201: 					if not node.attrs.has_key(key) or \
   202: 						node.attrs[key]!=attrs[key]: break
   203: 				else:
   204: 					yield node
   205: 
   206: 	def setAttr(self, key, val):
   207: 		""" Sets attribute "key" with the value "val". """
   208: 		self.attrs[key]=val
   209: 	def setData(self, data):
   210: 		""" Sets node's CDATA to provided string. Resets all previous CDATA!"""
   211: 		self.data=[ustr(data)]
   212: 	def setName(self,val):
   213: 		""" Changes the node name. """
   214: 		self.name = val
   215: 	def setNamespace(self, namespace):
   216: 		""" Changes the node namespace. """
   217: 		self.namespace=namespace
   218: 	def setParent(self, node): 
   219: 		""" Sets node's parent to "node". WARNING: do not checks if the parent already present 
   220: 			and not removes the node from the list of childs of previous parent. """
   221: 		self.parent = node
   222: 	def setPayload(self,payload,add=0):
   223: 		""" Sets node payload according to the list specified. WARNING: completely replaces all node's
   224: 			previous content. If you wish just to add child or CDATA - use addData or addChild methods. """
   225: 		if type(payload) in (type(''),type(u'')): payload=[payload]
   226: 		if add: self.kids+=payload
   227: 		else: self.kids=payload
   228: 	def setTag(self, name, attrs={}, namespace=None):
   229: 		""" Same as getTag but if the node with specified namespace/attributes not found, creates such
   230: 			node and returns it. """
   231: 		node=self.getTags(name, attrs, namespace=namespace, one=1)
   232: 		if node: return node
   233: 		else: return self.addChild(name, attrs, namespace=namespace)
   234: 	def setTagAttr(self,tag,attr,val):
   235: 		""" Creates new node (if not already present) with name "tag"
   236: 			and sets it's attribute "attr" to value "val". """
   237: 		try: self.getTag(tag).attrs[attr]=val
   238: 		except: self.addChild(tag,attrs={attr:val})
   239: 	def setTagData(self,tag,val,attrs={}):
   240: 		""" Creates new node (if not already present) with name "tag" and (optionally) attributes "attrs"
   241: 			and sets it's CDATA to string "val". """
   242: 		try: self.getTag(tag,attrs).setData(ustr(val))
   243: 		except: self.addChild(tag,attrs,payload=[ustr(val)])
   244: 	def has_attr(self,key):
   245: 		""" Checks if node have attribute "key"."""
   246: 		return self.attrs.has_key(key)
   247: 	def __getitem__(self,item):
   248: 		""" Returns node's attribute "item" value. """
   249: 		return self.getAttr(item)
   250: 	def __setitem__(self,item,val):
   251: 		""" Sets node's attribute "item" value. """
   252: 		return self.setAttr(item,val)
   253: 	def __delitem__(self,item):
   254: 		""" Deletes node's attribute "item". """
   255: 		return self.delAttr(item)
   256: 	def __getattr__(self,attr):
   257: 		""" Reduce memory usage caused by T/NT classes - use memory only when needed. """
   258: 		if attr=='T':
   259: 			self.T=T(self)
   260: 			return self.T
   261: 		if attr=='NT':
   262: 			self.NT=NT(self)
   263: 			return self.NT
   264: 		raise AttributeError
   265: 
   266: class T:
   267: 	""" Auxiliary class used to quick access to node's child nodes. """
   268: 	def __init__(self,node): self.__dict__['node']=node
   269: 	def __getattr__(self,attr): return self.node.setTag(attr)
   270: 	def __setattr__(self,attr,val):
   271: 		if isinstance(val,Node): Node.__init__(self.node.setTag(attr),node=val)
   272: 		else: return self.node.setTagData(attr,val)
   273: 	def __delattr__(self,attr): return self.node.delChild(attr)
   274: 
   275: class NT(T):
   276: 	""" Auxiliary class used to quick create node's child nodes. """
   277: 	def __getattr__(self,attr): return self.node.addChild(attr)
   278: 	def __setattr__(self,attr,val):
   279: 		if isinstance(val,Node): self.node.addChild(attr,node=val)
   280: 		else: return self.node.addChild(attr,payload=[val])
   281: 
   282: DBG_NODEBUILDER = 'nodebuilder'
   283: class NodeBuilder:
   284: 	""" Builds a Node class minidom from data parsed to it. This class used for two purposes:
   285: 		1. Creation an XML Node from a textual representation. F.e. reading a config file. See an XML2Node method.
   286: 		2. Handling an incoming XML stream. This is done by mangling 
   287: 		   the __dispatch_depth parameter and redefining the dispatch method.
   288: 		You do not need to use this class directly if you do not designing your own XML handler."""
   289: 	def __init__(self,data=None,initial_node=None):
   290: 		""" Takes two optional parameters: "data" and "initial_node".
   291: 			By default class initialised with empty Node class instance.
   292: 			Though, if "initial_node" is provided it used as "starting point".
   293: 			You can think about it as of "node upgrade".
   294: 			"data" (if provided) feeded to parser immidiatedly after instance init.
   295: 			"""
   296: 		self.DEBUG(DBG_NODEBUILDER, "Preparing to handle incoming XML stream.", 'start')
   297: 		self._parser = xml.parsers.expat.ParserCreate(namespace_separator=' ')
   298: 		self._parser.StartElementHandler       = self.starttag
   299: 		self._parser.EndElementHandler         = self.endtag
   300: 		self._parser.StartNamespaceDeclHandler = self.handle_namespace_start
   301: 		self._parser.CharacterDataHandler    = self.handle_cdata
   302: 		self.Parse = self._parser.Parse
   303: 
   304: 		self.__depth = 0
   305: 		self.__last_depth = 0
   306: 		self.__max_depth = 0
   307: 		self._dispatch_depth = 1
   308: 		self._document_attrs = None
   309: 		self._mini_dom=initial_node
   310: 		self.last_is_data = 1
   311: 		self._ptr=None
   312: 		self.data_buffer = None
   313: 		self.namespaces={"http://www.w3.org/XML/1998/namespace":'xml:'}
   314: 		self.xmlns="http://www.w3.org/XML/1998/namespace"
   315: 
   316: 		if data: 
   317: 			self._parser.Parse(data,1)
   318: 	
   319: 	def check_data_buffer(self):
   320: 		if self.data_buffer:
   321: 			self._ptr.data.append(''.join(self.data_buffer))
   322: 			del self.data_buffer[:]
   323: 			self.data_buffer = None
   324: 	
   325: 	def destroy(self):
   326: 		""" Method used to allow class instance to be garbage-collected. """
   327: 		self.check_data_buffer()
   328: 		self._parser.StartElementHandler       = None
   329: 		self._parser.EndElementHandler         = None
   330: 		self._parser.CharacterDataHandler      = None
   331: 		self._parser.StartNamespaceDeclHandler = None
   332: 
   333: 	def starttag(self, tag, attrs):
   334: 		"""XML Parser callback. Used internally"""
   335: 		self.check_data_buffer()
   336: 		attlist=attrs.keys()       #
   337: 		for attr in attlist:       # FIXME: Crude hack. And it also slows down the whole library considerably.
   338: 			sp=attr.rfind(" ")     #
   339: 			if sp==-1: continue    #
   340: 			ns=attr[:sp]           #
   341: 			attrs[self.namespaces[ns]+attr[sp+1:]]=attrs[attr]
   342: 			del attrs[attr]        #
   343: 		self._inc_depth()
   344: 		self.DEBUG(DBG_NODEBUILDER, "DEPTH -> %i , tag -> %s, attrs -> %s" % (self.__depth, tag, `attrs`), 'down')
   345: 		if self.__depth == self._dispatch_depth:
   346: 			if not self._mini_dom : 
   347: 				self._mini_dom = Node(tag=tag, attrs=attrs)
   348: 			else: 
   349: 				Node.__init__(self._mini_dom,tag=tag, attrs=attrs)
   350: 			self._ptr = self._mini_dom
   351: 		elif self.__depth > self._dispatch_depth:
   352: 			self._ptr.kids.append(Node(tag=tag,parent=self._ptr,attrs=attrs))
   353: 			self._ptr = self._ptr.kids[-1]
   354: 		if self.__depth == 1:
   355: 			self._document_attrs = attrs
   356: 			ns, name = (['']+tag.split())[-2:]
   357: 			self.stream_header_received(ns, name, attrs)
   358: 		if not self.last_is_data and self._ptr.parent: 
   359: 			self._ptr.parent.data.append('')
   360: 		self.last_is_data = 0
   361: 	def endtag(self, tag ):
   362: 		"""XML Parser callback. Used internally"""
   363: 		self.DEBUG(DBG_NODEBUILDER, "DEPTH -> %i , tag -> %s" % (self.__depth, tag), 'up')
   364: 		self.check_data_buffer()
   365: 		if self.__depth == self._dispatch_depth:
   366: 			self.dispatch(self._mini_dom)
   367: 		elif self.__depth > self._dispatch_depth:
   368: 			self._ptr = self._ptr.parent
   369: 		else:
   370: 			self.DEBUG(DBG_NODEBUILDER, "Got higher than dispatch level. Stream terminated?", 'stop')
   371: 		self._dec_depth()
   372: 		self.last_is_data = 0
   373: 		if self.__depth == 0: self.stream_footer_received()
   374: 	
   375: 	def handle_cdata(self, data):
   376: 		if self.last_is_data:
   377: 			if self.data_buffer:
   378: 				self.data_buffer.append(data)
   379: 		elif self._ptr:
   380: 			self.data_buffer = [data]
   381: 			self.last_is_data = 1
   382: 	
   383: 	def handle_namespace_start(self, prefix, uri):
   384: 		"""XML Parser callback. Used internally"""
   385: 		self.check_data_buffer()
   386: 		if prefix: self.namespaces[uri]=prefix+':'
   387: 		else: self.xmlns=uri
   388: 	def DEBUG(self, level, text, comment=None):
   389: 		""" Gets all NodeBuilder walking events. Can be used for debugging if redefined."""
   390: 	def getDom(self):
   391: 		""" Returns just built Node. """
   392: 		self.check_data_buffer()
   393: 		return self._mini_dom
   394: 	def dispatch(self,stanza):
   395: 		""" Gets called when the NodeBuilder reaches some level of depth on it's way up with the built
   396: 			node as argument. Can be redefined to convert incoming XML stanzas to program events. """
   397: 	def stream_header_received(self,ns,tag,attrs):
   398: 		""" Method called when stream just opened. """
   399: 		self.check_data_buffer()
   400: 	def stream_footer_received(self):
   401: 		""" Method called when stream just closed. """
   402: 		self.check_data_buffer()
   403: 
   404: 	def has_received_endtag(self, level=0):
   405: 		""" Return True if at least one end tag was seen (at level) """
   406: 		return self.__depth <= level and self.__max_depth > level
   407: 
   408: 	def _inc_depth(self):
   409: 		self.__last_depth = self.__depth
   410: 		self.__depth += 1
   411: 		self.__max_depth = max(self.__depth, self.__max_depth)
   412: 
   413: 	def _dec_depth(self):
   414: 		self.__last_depth = self.__depth
   415: 		self.__depth -= 1
   416: 
   417: def XML2Node(xml):
   418: 	""" Converts supplied textual string into XML node. Handy f.e. for reading configuration file.
   419: 		Raises xml.parser.expat.parsererror if provided string is not well-formed XML. """
   420: 	return NodeBuilder(xml).getDom()
   421: 
   422: def BadXML2Node(xml):
   423: 	""" Converts supplied textual string into XML node. Survives if xml data is cutted half way round.
   424: 		I.e. "<html>some text <br>some more text". Will raise xml.parser.expat.parsererror on misplaced
   425: 		tags though. F.e. "<b>some text <br>some more text</b>" will not work."""
   426: 	return NodeBuilder(xml).getDom()

Generated by git2html.