pubsubclient: 89a6cec59a0389bc5790a8133897bcb88b129bf1
1: ## simplexml.py based on Mattew Allum's xmlstream.py
2: ##
3: ## Copyright (C) 2003-2005 Alexey "Snake" Nezhdanov
4: ##
5: ## This program is free software; you can redistribute it and/or modify
6: ## it under the terms of the GNU General Public License as published by
7: ## the Free Software Foundation; either version 2, or (at your option)
8: ## any later version.
9: ##
10: ## This program is distributed in the hope that it will be useful,
11: ## but WITHOUT ANY WARRANTY; without even the implied warranty of
12: ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13: ## GNU General Public License for more details.
14:
15: # $Id: simplexml.py,v 1.27 2005/04/30 07:20:27 snakeru Exp $
16:
17: """Simplexml module provides xmpppy library with all needed tools to handle XML nodes and XML streams.
18: I'm personally using it in many other separate projects. It is designed to be as standalone as possible."""
19:
20: import xml.parsers.expat
21:
22: def XMLescape(txt):
23: """Returns provided string with symbols & < > " replaced by their respective XML entities."""
24: # replace also FORM FEED and ESC, because they are not valid XML chars
25: return txt.replace("&", "&").replace("<", "<").replace(">", ">").replace('"', """).replace(u'\x0C', "").replace(u'\x1B', "")
26:
27: ENCODING='utf-8'
28: def ustr(what):
29: """Converts object "what" to unicode string using it's own __str__ method if accessible or unicode method otherwise."""
30: if type(what) == type(u''): return what
31: try: r=what.__str__()
32: except AttributeError: r=str(what)
33: if type(r)<>type(u''): return unicode(r,ENCODING)
34: return r
35:
36: class Node(object):
37: """ Node class describes syntax of separate XML Node. It have a constructor that permits node creation
38: from set of "namespace name", attributes and payload of text strings and other nodes.
39: It does not natively support building node from text string and uses NodeBuilder class for that purpose.
40: After creation node can be mangled in many ways so it can be completely changed.
41: Also node can be serialised into string in one of two modes: default (where the textual representation
42: of node describes it exactly) and "fancy" - with whitespace added to make indentation and thus make
43: result more readable by human.
44:
45: Node class have attribute FORCE_NODE_RECREATION that is defaults to False thus enabling fast node
46: replication from the some other node. The drawback of the fast way is that new node shares some
47: info with the "original" node that is changing the one node may influence the other. Though it is
48: rarely needed (in xmpppy it is never needed at all since I'm usually never using original node after
49: replication (and using replication only to move upwards on the classes tree).
50: """
51: FORCE_NODE_RECREATION=0
52: def __init__(self, tag=None, attrs={}, payload=[], parent=None, node=None):
53: """ Takes "tag" argument as the name of node (prepended by namespace, if needed and separated from it
54: by a space), attrs dictionary as the set of arguments, payload list as the set of textual strings
55: and child nodes that this node carries within itself and "parent" argument that is another node
56: that this one will be the child of. Also the __init__ can be provided with "node" argument that is
57: either a text string containing exactly one node or another Node instance to begin with. If both
58: "node" and other arguments is provided then the node initially created as replica of "node"
59: provided and then modified to be compliant with other arguments."""
60: if node:
61: if self.FORCE_NODE_RECREATION and isinstance(node, Node):
62: node=str(node)
63: if not isinstance(node, Node):
64: node=NodeBuilder(node,self)
65: else:
66: self.name,self.namespace,self.attrs,self.data,self.kids,self.parent = node.name,node.namespace,{},[],[],node.parent
67: for key in node.attrs.keys(): self.attrs[key]=node.attrs[key]
68: for data in node.data: self.data.append(data)
69: for kid in node.kids: self.kids.append(kid)
70: else: self.name,self.namespace,self.attrs,self.data,self.kids,self.parent = 'tag','',{},[],[],None
71:
72: if tag: self.namespace, self.name = ([self.namespace]+tag.split())[-2:]
73: if parent: self.parent = parent
74: if self.parent and not self.namespace: self.namespace=self.parent.namespace
75: for attr in attrs.keys():
76: self.attrs[attr]=attrs[attr]
77: if isinstance(payload, basestring): payload=[payload]
78: for i in payload:
79: if isinstance(i, Node): self.addChild(node=i)
80: else: self.data.append(ustr(i))
81:
82: def __str__(self,fancy=0):
83: """ Method used to dump node into textual representation.
84: if "fancy" argument is set to True produces indented output for readability."""
85: s = (fancy-1) * 2 * ' ' + "<" + self.name
86: if self.namespace:
87: if not self.parent or self.parent.namespace!=self.namespace:
88: s = s + ' xmlns="%s"'%self.namespace
89: for key in self.attrs.keys():
90: val = ustr(self.attrs[key])
91: s = s + ' %s="%s"' % ( key, XMLescape(val) )
92: s = s + ">"
93: cnt = 0
94: if self.kids:
95: if fancy: s = s + "\n"
96: for a in self.kids:
97: if not fancy and (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt])
98: elif (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt].strip())
99: s = s + a.__str__(fancy and fancy+1)
100: cnt=cnt+1
101: if not fancy and (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt])
102: elif (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt].strip())
103: if not self.kids and s[-1:]=='>':
104: s=s[:-1]+' />'
105: if fancy: s = s + "\n"
106: else:
107: if fancy and not self.data: s = s + (fancy-1) * 2 * ' '
108: s = s + "</" + self.name + ">"
109: if fancy: s = s + "\n"
110: return s
111: def addChild(self, name=None, attrs={}, payload=[], namespace=None, node=None):
112: """ If "node" argument is provided, adds it as child node. Else creates new node from
113: the other arguments' values and adds it as well."""
114: if namespace: name=namespace+' '+name
115: if node:
116: newnode=node
117: node.parent = self
118: else: newnode=Node(tag=name, parent=self, attrs=attrs, payload=payload)
119: self.kids.append(newnode)
120: return newnode
121: def addData(self, data):
122: """ Adds some CDATA to node. """
123: self.data.append(ustr(data))
124: def clearData(self):
125: """ Removes all CDATA from the node. """
126: self.data=[]
127: def delAttr(self, key):
128: """ Deletes an attribute "key" """
129: del self.attrs[key]
130: def delChild(self, node, attrs={}):
131: """ Deletes the "node" from the node's childs list, if "node" is an instance.
132: Else deletes the first node that have specified name and (optionally) attributes. """
133: if not isinstance(node, Node): node=self.getTag(node,attrs)
134: self.kids.remove(node)
135: return node
136: def getAttrs(self):
137: """ Returns all node's attributes as dictionary. """
138: return self.attrs
139: def getAttr(self, key):
140: """ Returns value of specified attribute. """
141: try: return self.attrs[key]
142: except: return None
143: def getChildren(self):
144: """ Returns all node's child nodes as list. """
145: return self.kids
146: def getData(self):
147: """ Returns all node CDATA as string (concatenated). """
148: return ''.join(self.data)
149: def getName(self):
150: """ Returns the name of node """
151: return self.name
152: def getNamespace(self):
153: """ Returns the namespace of node """
154: return self.namespace
155: def getParent(self):
156: """ Returns the parent of node (if present). """
157: return self.parent
158: def getPayload(self):
159: """ Return the payload of node i.e. list of child nodes and CDATA entries.
160: F.e. for "<node>text1<nodea/><nodeb/> text2</node>" will be returned list:
161: ['text1', <nodea instance>, <nodeb instance>, ' text2']. """
162: ret=[]
163: for i in range(len(self.kids)+len(self.data)+1):
164: try:
165: if self.data[i]: ret.append(self.data[i])
166: except IndexError: pass
167: try: ret.append(self.kids[i])
168: except IndexError: pass
169: return ret
170: def getTag(self, name, attrs={}, namespace=None):
171: """ Filters all child nodes using specified arguments as filter.
172: Returns the first found or None if not found. """
173: return self.getTags(name, attrs, namespace, one=1)
174: def getTagAttr(self,tag,attr):
175: """ Returns attribute value of the child with specified name (or None if no such attribute)."""
176: try: return self.getTag(tag).attrs[attr]
177: except: return None
178: def getTagData(self,tag):
179: """ Returns cocatenated CDATA of the child with specified name."""
180: try: return self.getTag(tag).getData()
181: except: return None
182: def getTags(self, name, attrs={}, namespace=None, one=0):
183: """ Filters all child nodes using specified arguments as filter.
184: Returns the list of nodes found. """
185: nodes=[]
186: for node in self.kids:
187: if namespace and namespace<>node.getNamespace(): continue
188: if node.getName() == name:
189: for key in attrs.keys():
190: if not node.attrs.has_key(key) or node.attrs[key]<>attrs[key]: break
191: else: nodes.append(node)
192: if one and nodes: return nodes[0]
193: if not one: return nodes
194:
195: def iterTags(self, name, attrs={}, namespace=None):
196: """ Iterate over all children using specified arguments as filter. """
197: for node in self.kids:
198: if namespace is not None and namespace!=node.getNamespace(): continue
199: if node.getName() == name:
200: for key in attrs.keys():
201: if not node.attrs.has_key(key) or \
202: node.attrs[key]!=attrs[key]: break
203: else:
204: yield node
205:
206: def setAttr(self, key, val):
207: """ Sets attribute "key" with the value "val". """
208: self.attrs[key]=val
209: def setData(self, data):
210: """ Sets node's CDATA to provided string. Resets all previous CDATA!"""
211: self.data=[ustr(data)]
212: def setName(self,val):
213: """ Changes the node name. """
214: self.name = val
215: def setNamespace(self, namespace):
216: """ Changes the node namespace. """
217: self.namespace=namespace
218: def setParent(self, node):
219: """ Sets node's parent to "node". WARNING: do not checks if the parent already present
220: and not removes the node from the list of childs of previous parent. """
221: self.parent = node
222: def setPayload(self,payload,add=0):
223: """ Sets node payload according to the list specified. WARNING: completely replaces all node's
224: previous content. If you wish just to add child or CDATA - use addData or addChild methods. """
225: if type(payload) in (type(''),type(u'')): payload=[payload]
226: if add: self.kids+=payload
227: else: self.kids=payload
228: def setTag(self, name, attrs={}, namespace=None):
229: """ Same as getTag but if the node with specified namespace/attributes not found, creates such
230: node and returns it. """
231: node=self.getTags(name, attrs, namespace=namespace, one=1)
232: if node: return node
233: else: return self.addChild(name, attrs, namespace=namespace)
234: def setTagAttr(self,tag,attr,val):
235: """ Creates new node (if not already present) with name "tag"
236: and sets it's attribute "attr" to value "val". """
237: try: self.getTag(tag).attrs[attr]=val
238: except: self.addChild(tag,attrs={attr:val})
239: def setTagData(self,tag,val,attrs={}):
240: """ Creates new node (if not already present) with name "tag" and (optionally) attributes "attrs"
241: and sets it's CDATA to string "val". """
242: try: self.getTag(tag,attrs).setData(ustr(val))
243: except: self.addChild(tag,attrs,payload=[ustr(val)])
244: def has_attr(self,key):
245: """ Checks if node have attribute "key"."""
246: return self.attrs.has_key(key)
247: def __getitem__(self,item):
248: """ Returns node's attribute "item" value. """
249: return self.getAttr(item)
250: def __setitem__(self,item,val):
251: """ Sets node's attribute "item" value. """
252: return self.setAttr(item,val)
253: def __delitem__(self,item):
254: """ Deletes node's attribute "item". """
255: return self.delAttr(item)
256: def __getattr__(self,attr):
257: """ Reduce memory usage caused by T/NT classes - use memory only when needed. """
258: if attr=='T':
259: self.T=T(self)
260: return self.T
261: if attr=='NT':
262: self.NT=NT(self)
263: return self.NT
264: raise AttributeError
265:
266: class T:
267: """ Auxiliary class used to quick access to node's child nodes. """
268: def __init__(self,node): self.__dict__['node']=node
269: def __getattr__(self,attr): return self.node.setTag(attr)
270: def __setattr__(self,attr,val):
271: if isinstance(val,Node): Node.__init__(self.node.setTag(attr),node=val)
272: else: return self.node.setTagData(attr,val)
273: def __delattr__(self,attr): return self.node.delChild(attr)
274:
275: class NT(T):
276: """ Auxiliary class used to quick create node's child nodes. """
277: def __getattr__(self,attr): return self.node.addChild(attr)
278: def __setattr__(self,attr,val):
279: if isinstance(val,Node): self.node.addChild(attr,node=val)
280: else: return self.node.addChild(attr,payload=[val])
281:
282: DBG_NODEBUILDER = 'nodebuilder'
283: class NodeBuilder:
284: """ Builds a Node class minidom from data parsed to it. This class used for two purposes:
285: 1. Creation an XML Node from a textual representation. F.e. reading a config file. See an XML2Node method.
286: 2. Handling an incoming XML stream. This is done by mangling
287: the __dispatch_depth parameter and redefining the dispatch method.
288: You do not need to use this class directly if you do not designing your own XML handler."""
289: def __init__(self,data=None,initial_node=None):
290: """ Takes two optional parameters: "data" and "initial_node".
291: By default class initialised with empty Node class instance.
292: Though, if "initial_node" is provided it used as "starting point".
293: You can think about it as of "node upgrade".
294: "data" (if provided) feeded to parser immidiatedly after instance init.
295: """
296: self.DEBUG(DBG_NODEBUILDER, "Preparing to handle incoming XML stream.", 'start')
297: self._parser = xml.parsers.expat.ParserCreate(namespace_separator=' ')
298: self._parser.StartElementHandler = self.starttag
299: self._parser.EndElementHandler = self.endtag
300: self._parser.StartNamespaceDeclHandler = self.handle_namespace_start
301: self._parser.CharacterDataHandler = self.handle_cdata
302: self.Parse = self._parser.Parse
303:
304: self.__depth = 0
305: self.__last_depth = 0
306: self.__max_depth = 0
307: self._dispatch_depth = 1
308: self._document_attrs = None
309: self._mini_dom=initial_node
310: self.last_is_data = 1
311: self._ptr=None
312: self.data_buffer = None
313: self.namespaces={"http://www.w3.org/XML/1998/namespace":'xml:'}
314: self.xmlns="http://www.w3.org/XML/1998/namespace"
315:
316: if data:
317: self._parser.Parse(data,1)
318:
319: def check_data_buffer(self):
320: if self.data_buffer:
321: self._ptr.data.append(''.join(self.data_buffer))
322: del self.data_buffer[:]
323: self.data_buffer = None
324:
325: def destroy(self):
326: """ Method used to allow class instance to be garbage-collected. """
327: self.check_data_buffer()
328: self._parser.StartElementHandler = None
329: self._parser.EndElementHandler = None
330: self._parser.CharacterDataHandler = None
331: self._parser.StartNamespaceDeclHandler = None
332:
333: def starttag(self, tag, attrs):
334: """XML Parser callback. Used internally"""
335: self.check_data_buffer()
336: attlist=attrs.keys() #
337: for attr in attlist: # FIXME: Crude hack. And it also slows down the whole library considerably.
338: sp=attr.rfind(" ") #
339: if sp==-1: continue #
340: ns=attr[:sp] #
341: attrs[self.namespaces[ns]+attr[sp+1:]]=attrs[attr]
342: del attrs[attr] #
343: self._inc_depth()
344: self.DEBUG(DBG_NODEBUILDER, "DEPTH -> %i , tag -> %s, attrs -> %s" % (self.__depth, tag, `attrs`), 'down')
345: if self.__depth == self._dispatch_depth:
346: if not self._mini_dom :
347: self._mini_dom = Node(tag=tag, attrs=attrs)
348: else:
349: Node.__init__(self._mini_dom,tag=tag, attrs=attrs)
350: self._ptr = self._mini_dom
351: elif self.__depth > self._dispatch_depth:
352: self._ptr.kids.append(Node(tag=tag,parent=self._ptr,attrs=attrs))
353: self._ptr = self._ptr.kids[-1]
354: if self.__depth == 1:
355: self._document_attrs = attrs
356: ns, name = (['']+tag.split())[-2:]
357: self.stream_header_received(ns, name, attrs)
358: if not self.last_is_data and self._ptr.parent:
359: self._ptr.parent.data.append('')
360: self.last_is_data = 0
361: def endtag(self, tag ):
362: """XML Parser callback. Used internally"""
363: self.DEBUG(DBG_NODEBUILDER, "DEPTH -> %i , tag -> %s" % (self.__depth, tag), 'up')
364: self.check_data_buffer()
365: if self.__depth == self._dispatch_depth:
366: self.dispatch(self._mini_dom)
367: elif self.__depth > self._dispatch_depth:
368: self._ptr = self._ptr.parent
369: else:
370: self.DEBUG(DBG_NODEBUILDER, "Got higher than dispatch level. Stream terminated?", 'stop')
371: self._dec_depth()
372: self.last_is_data = 0
373: if self.__depth == 0: self.stream_footer_received()
374:
375: def handle_cdata(self, data):
376: if self.last_is_data:
377: if self.data_buffer:
378: self.data_buffer.append(data)
379: elif self._ptr:
380: self.data_buffer = [data]
381: self.last_is_data = 1
382:
383: def handle_namespace_start(self, prefix, uri):
384: """XML Parser callback. Used internally"""
385: self.check_data_buffer()
386: if prefix: self.namespaces[uri]=prefix+':'
387: else: self.xmlns=uri
388: def DEBUG(self, level, text, comment=None):
389: """ Gets all NodeBuilder walking events. Can be used for debugging if redefined."""
390: def getDom(self):
391: """ Returns just built Node. """
392: self.check_data_buffer()
393: return self._mini_dom
394: def dispatch(self,stanza):
395: """ Gets called when the NodeBuilder reaches some level of depth on it's way up with the built
396: node as argument. Can be redefined to convert incoming XML stanzas to program events. """
397: def stream_header_received(self,ns,tag,attrs):
398: """ Method called when stream just opened. """
399: self.check_data_buffer()
400: def stream_footer_received(self):
401: """ Method called when stream just closed. """
402: self.check_data_buffer()
403:
404: def has_received_endtag(self, level=0):
405: """ Return True if at least one end tag was seen (at level) """
406: return self.__depth <= level and self.__max_depth > level
407:
408: def _inc_depth(self):
409: self.__last_depth = self.__depth
410: self.__depth += 1
411: self.__max_depth = max(self.__depth, self.__max_depth)
412:
413: def _dec_depth(self):
414: self.__last_depth = self.__depth
415: self.__depth -= 1
416:
417: def XML2Node(xml):
418: """ Converts supplied textual string into XML node. Handy f.e. for reading configuration file.
419: Raises xml.parser.expat.parsererror if provided string is not well-formed XML. """
420: return NodeBuilder(xml).getDom()
421:
422: def BadXML2Node(xml):
423: """ Converts supplied textual string into XML node. Survives if xml data is cutted half way round.
424: I.e. "<html>some text <br>some more text". Will raise xml.parser.expat.parsererror on misplaced
425: tags though. F.e. "<b>some text <br>some more text</b>" will not work."""
426: return NodeBuilder(xml).getDom()
Generated by git2html.