533 lines
22 KiB
Python
533 lines
22 KiB
Python
|
#!/usr/bin/python
|
||
|
# -*- coding: utf-8 -*-
|
||
|
# This program is free software; you can redistribute it and/or modify
|
||
|
# it under the terms of the GNU Lesser General Public License as published by the
|
||
|
# Free Software Foundation; either version 3, or (at your option) any later
|
||
|
# version.
|
||
|
#
|
||
|
# This program is distributed in the hope that it will be useful, but
|
||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||
|
# for more details.
|
||
|
|
||
|
"""Simple XML manipulation"""
|
||
|
|
||
|
|
||
|
from __future__ import unicode_literals
|
||
|
import sys
|
||
|
if sys.version > '3':
|
||
|
basestring = str
|
||
|
unicode = str
|
||
|
|
||
|
import logging
|
||
|
import re
|
||
|
import time
|
||
|
import xml.dom.minidom
|
||
|
|
||
|
from . import __author__, __copyright__, __license__, __version__
|
||
|
|
||
|
# Utility functions used for marshalling, moved aside for readability
|
||
|
from .helpers import TYPE_MAP, TYPE_MARSHAL_FN, TYPE_UNMARSHAL_FN, \
|
||
|
REVERSE_TYPE_MAP, Struct, Date, Decimal
|
||
|
|
||
|
log = logging.getLogger(__name__)
|
||
|
|
||
|
|
||
|
class SimpleXMLElement(object):
|
||
|
"""Simple XML manipulation (simil PHP)"""
|
||
|
|
||
|
def __init__(self, text=None, elements=None, document=None,
|
||
|
namespace=None, prefix=None, namespaces_map={}, jetty=False):
|
||
|
"""
|
||
|
:param namespaces_map: How to map our namespace prefix to that given by the client;
|
||
|
{prefix: received_prefix}
|
||
|
"""
|
||
|
self.__namespaces_map = namespaces_map
|
||
|
_rx = "|".join(namespaces_map.keys()) # {'external': 'ext', 'model': 'mod'} -> 'external|model'
|
||
|
self.__ns_rx = re.compile(r"^(%s):.*$" % _rx) # And now we build an expression ^(external|model):.*$
|
||
|
# to find prefixes in all xml nodes i.e.: <model:code>1</model:code>
|
||
|
# and later change that to <mod:code>1</mod:code>
|
||
|
self.__ns = namespace
|
||
|
self.__prefix = prefix
|
||
|
self.__jetty = jetty # special list support
|
||
|
|
||
|
if text is not None:
|
||
|
try:
|
||
|
self.__document = xml.dom.minidom.parseString(text)
|
||
|
except:
|
||
|
log.error(text)
|
||
|
raise
|
||
|
self.__elements = [self.__document.documentElement]
|
||
|
else:
|
||
|
self.__elements = elements
|
||
|
self.__document = document
|
||
|
|
||
|
def add_child(self, name, text=None, ns=True):
|
||
|
"""Adding a child tag to a node"""
|
||
|
if not ns or self.__ns is False:
|
||
|
##log.debug('adding %s without namespace', name)
|
||
|
element = self.__document.createElement(name)
|
||
|
else:
|
||
|
##log.debug('adding %s ns "%s" %s', name, self.__ns, ns)
|
||
|
if isinstance(ns, basestring):
|
||
|
element = self.__document.createElement(name)
|
||
|
if ns:
|
||
|
element.setAttribute("xmlns", ns)
|
||
|
elif self.__prefix:
|
||
|
element = self.__document.createElementNS(self.__ns, "%s:%s" % (self.__prefix, name))
|
||
|
else:
|
||
|
element = self.__document.createElementNS(self.__ns, name)
|
||
|
# don't append null tags!
|
||
|
if text is not None:
|
||
|
if isinstance(text, xml.dom.minidom.CDATASection):
|
||
|
element.appendChild(self.__document.createCDATASection(text.data))
|
||
|
else:
|
||
|
element.appendChild(self.__document.createTextNode(text))
|
||
|
self._element.appendChild(element)
|
||
|
return SimpleXMLElement(
|
||
|
elements=[element],
|
||
|
document=self.__document,
|
||
|
namespace=self.__ns,
|
||
|
prefix=self.__prefix,
|
||
|
jetty=self.__jetty,
|
||
|
namespaces_map=self.__namespaces_map
|
||
|
)
|
||
|
|
||
|
def __setattr__(self, tag, text):
|
||
|
"""Add text child tag node (short form)"""
|
||
|
if tag.startswith("_"):
|
||
|
object.__setattr__(self, tag, text)
|
||
|
else:
|
||
|
##log.debug('__setattr__(%s, %s)', tag, text)
|
||
|
self.add_child(tag, text)
|
||
|
|
||
|
def __delattr__(self, tag):
|
||
|
"""Remove a child tag (non recursive!)"""
|
||
|
elements = [__element for __element in self._element.childNodes
|
||
|
if __element.nodeType == __element.ELEMENT_NODE]
|
||
|
for element in elements:
|
||
|
self._element.removeChild(element)
|
||
|
|
||
|
def add_comment(self, data):
|
||
|
"""Add an xml comment to this child"""
|
||
|
comment = self.__document.createComment(data)
|
||
|
self._element.appendChild(comment)
|
||
|
|
||
|
def as_xml(self, filename=None, pretty=False):
|
||
|
"""Return the XML representation of the document"""
|
||
|
if not pretty:
|
||
|
return self.__document.toxml('UTF-8')
|
||
|
else:
|
||
|
return self.__document.toprettyxml(encoding='UTF-8')
|
||
|
|
||
|
if sys.version > '3':
|
||
|
def __repr__(self):
|
||
|
"""Return the XML representation of this tag"""
|
||
|
return self._element.toxml()
|
||
|
else:
|
||
|
def __repr__(self):
|
||
|
"""Return the XML representation of this tag"""
|
||
|
# NOTE: do not use self.as_xml('UTF-8') as it returns the whole xml doc
|
||
|
return self._element.toxml('UTF-8')
|
||
|
|
||
|
def get_name(self):
|
||
|
"""Return the tag name of this node"""
|
||
|
return self._element.tagName
|
||
|
|
||
|
def get_local_name(self):
|
||
|
"""Return the tag local name (prefix:name) of this node"""
|
||
|
return self._element.localName
|
||
|
|
||
|
def get_prefix(self):
|
||
|
"""Return the namespace prefix of this node"""
|
||
|
return self._element.prefix
|
||
|
|
||
|
def get_namespace_uri(self, ns):
|
||
|
"""Return the namespace uri for a prefix"""
|
||
|
element = self._element
|
||
|
while element is not None and element.attributes is not None:
|
||
|
try:
|
||
|
return element.attributes['xmlns:%s' % ns].value
|
||
|
except KeyError:
|
||
|
element = element.parentNode
|
||
|
|
||
|
def attributes(self):
|
||
|
"""Return a dict of attributes for this tag"""
|
||
|
#TODO: use slice syntax [:]?
|
||
|
return self._element.attributes
|
||
|
|
||
|
def __getitem__(self, item):
|
||
|
"""Return xml tag attribute value or a slice of attributes (iter)"""
|
||
|
##log.debug('__getitem__(%s)', item)
|
||
|
if isinstance(item, basestring):
|
||
|
if self._element.hasAttribute(item):
|
||
|
return self._element.attributes[item].value
|
||
|
elif isinstance(item, slice):
|
||
|
# return a list with name:values
|
||
|
return list(self._element.attributes.items())[item]
|
||
|
else:
|
||
|
# return element by index (position)
|
||
|
element = self.__elements[item]
|
||
|
return SimpleXMLElement(
|
||
|
elements=[element],
|
||
|
document=self.__document,
|
||
|
namespace=self.__ns,
|
||
|
prefix=self.__prefix,
|
||
|
jetty=self.__jetty,
|
||
|
namespaces_map=self.__namespaces_map
|
||
|
)
|
||
|
|
||
|
def add_attribute(self, name, value):
|
||
|
"""Set an attribute value from a string"""
|
||
|
self._element.setAttribute(name, value)
|
||
|
|
||
|
def __setitem__(self, item, value):
|
||
|
"""Set an attribute value"""
|
||
|
if isinstance(item, basestring):
|
||
|
self.add_attribute(item, value)
|
||
|
elif isinstance(item, slice):
|
||
|
# set multiple attributes at once
|
||
|
for k, v in value.items():
|
||
|
self.add_attribute(k, v)
|
||
|
|
||
|
def __delitem__(self, item):
|
||
|
"Remove an attribute"
|
||
|
self._element.removeAttribute(item)
|
||
|
|
||
|
def __call__(self, tag=None, ns=None, children=False, root=False,
|
||
|
error=True, ):
|
||
|
"""Search (even in child nodes) and return a child tag by name"""
|
||
|
try:
|
||
|
if root:
|
||
|
# return entire document
|
||
|
return SimpleXMLElement(
|
||
|
elements=[self.__document.documentElement],
|
||
|
document=self.__document,
|
||
|
namespace=self.__ns,
|
||
|
prefix=self.__prefix,
|
||
|
jetty=self.__jetty,
|
||
|
namespaces_map=self.__namespaces_map
|
||
|
)
|
||
|
if tag is None:
|
||
|
# if no name given, iterate over siblings (same level)
|
||
|
return self.__iter__()
|
||
|
if children:
|
||
|
# future: filter children? by ns?
|
||
|
return self.children()
|
||
|
elements = None
|
||
|
if isinstance(tag, int):
|
||
|
# return tag by index
|
||
|
elements = [self.__elements[tag]]
|
||
|
if ns and not elements:
|
||
|
for ns_uri in isinstance(ns, (tuple, list)) and ns or (ns, ):
|
||
|
##log.debug('searching %s by ns=%s', tag, ns_uri)
|
||
|
elements = self._element.getElementsByTagNameNS(ns_uri, tag)
|
||
|
if elements:
|
||
|
break
|
||
|
if self.__ns and not elements:
|
||
|
##log.debug('searching %s by ns=%s', tag, self.__ns)
|
||
|
elements = self._element.getElementsByTagNameNS(self.__ns, tag)
|
||
|
if not elements:
|
||
|
##log.debug('searching %s', tag)
|
||
|
elements = self._element.getElementsByTagName(tag)
|
||
|
if not elements:
|
||
|
##log.debug(self._element.toxml())
|
||
|
if error:
|
||
|
raise AttributeError("No elements found")
|
||
|
else:
|
||
|
return
|
||
|
return SimpleXMLElement(
|
||
|
elements=elements,
|
||
|
document=self.__document,
|
||
|
namespace=self.__ns,
|
||
|
prefix=self.__prefix,
|
||
|
jetty=self.__jetty,
|
||
|
namespaces_map=self.__namespaces_map)
|
||
|
except AttributeError as e:
|
||
|
raise AttributeError("Tag not found: %s (%s)" % (tag, e))
|
||
|
|
||
|
def __getattr__(self, tag):
|
||
|
"""Shortcut for __call__"""
|
||
|
return self.__call__(tag)
|
||
|
|
||
|
def __iter__(self):
|
||
|
"""Iterate over xml tags at this level"""
|
||
|
try:
|
||
|
for __element in self.__elements:
|
||
|
yield SimpleXMLElement(
|
||
|
elements=[__element],
|
||
|
document=self.__document,
|
||
|
namespace=self.__ns,
|
||
|
prefix=self.__prefix,
|
||
|
jetty=self.__jetty,
|
||
|
namespaces_map=self.__namespaces_map)
|
||
|
except:
|
||
|
raise
|
||
|
|
||
|
def __dir__(self):
|
||
|
"""List xml children tags names"""
|
||
|
return [node.tagName for node
|
||
|
in self._element.childNodes
|
||
|
if node.nodeType != node.TEXT_NODE]
|
||
|
|
||
|
def children(self):
|
||
|
"""Return xml children tags element"""
|
||
|
elements = [__element for __element in self._element.childNodes
|
||
|
if __element.nodeType == __element.ELEMENT_NODE]
|
||
|
if not elements:
|
||
|
return None
|
||
|
#raise IndexError("Tag %s has no children" % self._element.tagName)
|
||
|
return SimpleXMLElement(
|
||
|
elements=elements,
|
||
|
document=self.__document,
|
||
|
namespace=self.__ns,
|
||
|
prefix=self.__prefix,
|
||
|
jetty=self.__jetty,
|
||
|
namespaces_map=self.__namespaces_map
|
||
|
)
|
||
|
|
||
|
def __len__(self):
|
||
|
"""Return element count"""
|
||
|
return len(self.__elements)
|
||
|
|
||
|
def __contains__(self, item):
|
||
|
"""Search for a tag name in this element or child nodes"""
|
||
|
return self._element.getElementsByTagName(item)
|
||
|
|
||
|
def __unicode__(self):
|
||
|
"""Returns the unicode text nodes of the current element"""
|
||
|
rc = ''
|
||
|
for node in self._element.childNodes:
|
||
|
if node.nodeType == node.TEXT_NODE or node.nodeType == node.CDATA_SECTION_NODE:
|
||
|
rc = rc + node.data
|
||
|
return rc
|
||
|
|
||
|
if sys.version > '3':
|
||
|
__str__ = __unicode__
|
||
|
else:
|
||
|
def __str__(self):
|
||
|
return self.__unicode__().encode('utf-8')
|
||
|
|
||
|
def __int__(self):
|
||
|
"""Returns the integer value of the current element"""
|
||
|
return int(self.__str__())
|
||
|
|
||
|
def __float__(self):
|
||
|
"""Returns the float value of the current element"""
|
||
|
try:
|
||
|
return float(self.__str__())
|
||
|
except:
|
||
|
raise IndexError(self._element.toxml())
|
||
|
|
||
|
_element = property(lambda self: self.__elements[0])
|
||
|
|
||
|
def unmarshall(self, types, strict=True):
|
||
|
#import pdb; pdb.set_trace()
|
||
|
|
||
|
"""Convert to python values the current serialized xml element"""
|
||
|
# types is a dict of {tag name: conversion function}
|
||
|
# strict=False to use default type conversion if not specified
|
||
|
# example: types={'p': {'a': int,'b': int}, 'c': [{'d':str}]}
|
||
|
# expected xml: <p><a>1</a><b>2</b></p><c><d>hola</d><d>chau</d>
|
||
|
# returnde value: {'p': {'a':1,'b':2}, `'c':[{'d':'hola'},{'d':'chau'}]}
|
||
|
d = {}
|
||
|
for node in self():
|
||
|
name = str(node.get_local_name())
|
||
|
ref_name_type = None
|
||
|
# handle multirefs: href="#id0"
|
||
|
if 'href' in node.attributes().keys():
|
||
|
href = node['href'][1:]
|
||
|
for ref_node in self(root=True)("multiRef"):
|
||
|
if ref_node['id'] == href:
|
||
|
node = ref_node
|
||
|
ref_name_type = ref_node['xsi:type'].split(":")[1]
|
||
|
break
|
||
|
|
||
|
try:
|
||
|
if isinstance(types, dict):
|
||
|
fn = types[name]
|
||
|
# custom array only in the response (not defined in the WSDL):
|
||
|
# <results soapenc:arrayType="xsd:string[199]>
|
||
|
if any([k for k,v in node[:] if 'arrayType' in k]) and not isinstance(fn, list):
|
||
|
fn = [fn]
|
||
|
else:
|
||
|
fn = types
|
||
|
except (KeyError, ) as e:
|
||
|
xmlns = node['xmlns'] or node.get_namespace_uri(node.get_prefix())
|
||
|
if 'xsi:type' in node.attributes().keys():
|
||
|
xsd_type = node['xsi:type'].split(":")[1]
|
||
|
try:
|
||
|
# get fn type from SOAP-ENC:arrayType="xsd:string[28]"
|
||
|
if xsd_type == 'Array':
|
||
|
array_type = [k for k,v in node[:] if 'arrayType' in k][0]
|
||
|
xsd_type = node[array_type].split(":")[1]
|
||
|
if "[" in xsd_type:
|
||
|
xsd_type = xsd_type[:xsd_type.index("[")]
|
||
|
fn = [REVERSE_TYPE_MAP[xsd_type]]
|
||
|
else:
|
||
|
fn = REVERSE_TYPE_MAP[xsd_type]
|
||
|
except:
|
||
|
fn = None # ignore multirefs!
|
||
|
elif xmlns == "http://www.w3.org/2001/XMLSchema":
|
||
|
# self-defined schema, return the SimpleXMLElement
|
||
|
# TODO: parse to python types if <s:element ref="s:schema"/>
|
||
|
fn = None
|
||
|
elif None in types:
|
||
|
# <s:any/>, return the SimpleXMLElement
|
||
|
# TODO: check position of None if inside <s:sequence>
|
||
|
fn = None
|
||
|
elif strict:
|
||
|
raise TypeError("Tag: %s invalid (type not found)" % (name,))
|
||
|
else:
|
||
|
# if not strict, use default type conversion
|
||
|
fn = str
|
||
|
|
||
|
if isinstance(fn, list):
|
||
|
# append to existing list (if any) - unnested dict arrays -
|
||
|
value = d.setdefault(name, [])
|
||
|
# If the node has no children then the node itself might
|
||
|
# have multiple occurrences:
|
||
|
children = node.children() or node
|
||
|
# TODO: check if this was really needed (get first child only)
|
||
|
##if len(fn[0]) == 1 and children:
|
||
|
## children = children()
|
||
|
if fn and not isinstance(fn[0], dict):
|
||
|
# simple arrays []
|
||
|
for child in (children or []):
|
||
|
tmp_dict = child.unmarshall(fn[0], strict)
|
||
|
value.extend(tmp_dict.values())
|
||
|
#elif (self.__jetty and len(fn[0]) > 1):
|
||
|
elif (len(fn[0]) > 1):
|
||
|
# Jetty and now all dialects use array style support [{k, v}]
|
||
|
for parent in node:
|
||
|
tmp_dict = {} # unmarshall each value & mix
|
||
|
for child in (node.children() or []):
|
||
|
tmp_dict.update(child.unmarshall(fn[0], strict))
|
||
|
value.append(tmp_dict)
|
||
|
else: # len(fn[0]) == 0
|
||
|
for child in (children or []):
|
||
|
value.append(child.unmarshall(fn[0], strict))
|
||
|
|
||
|
elif isinstance(fn, tuple):
|
||
|
value = []
|
||
|
_d = {}
|
||
|
children = node.children()
|
||
|
as_dict = len(fn) == 1 and isinstance(fn[0], dict)
|
||
|
|
||
|
for child in (children and children() or []): # Readability counts
|
||
|
if as_dict:
|
||
|
_d.update(child.unmarshall(fn[0], strict)) # Merging pairs
|
||
|
else:
|
||
|
value.append(child.unmarshall(fn[0], strict))
|
||
|
if as_dict:
|
||
|
value.append(_d)
|
||
|
|
||
|
if name in d:
|
||
|
_tmp = list(d[name])
|
||
|
_tmp.extend(value)
|
||
|
value = tuple(_tmp)
|
||
|
else:
|
||
|
value = tuple(value)
|
||
|
|
||
|
elif isinstance(fn, dict):
|
||
|
##if ref_name_type is not None:
|
||
|
## fn = fn[ref_name_type]
|
||
|
children = node.children()
|
||
|
value = children and children.unmarshall(fn, strict)
|
||
|
else:
|
||
|
if fn is None: # xsd:anyType not unmarshalled
|
||
|
value = node
|
||
|
elif unicode(node) or (fn == str and unicode(node) != ''):
|
||
|
try:
|
||
|
# get special deserialization function (if any)
|
||
|
fn = TYPE_UNMARSHAL_FN.get(fn, fn)
|
||
|
if fn == str:
|
||
|
# always return an unicode object:
|
||
|
# (avoid encoding errors in py<3!)
|
||
|
value = unicode(node)
|
||
|
else:
|
||
|
value = fn(unicode(node))
|
||
|
except (ValueError, TypeError) as e:
|
||
|
raise ValueError("Tag: %s: %s" % (name, e))
|
||
|
else:
|
||
|
value = None
|
||
|
d[name] = value
|
||
|
return d
|
||
|
|
||
|
def _update_ns(self, name):
|
||
|
"""Replace the defined namespace alias with tohse used by the client."""
|
||
|
pref = self.__ns_rx.search(name)
|
||
|
if pref:
|
||
|
pref = pref.groups()[0]
|
||
|
try:
|
||
|
name = name.replace(pref, self.__namespaces_map[pref])
|
||
|
except KeyError:
|
||
|
log.warning('Unknown namespace alias %s' % name)
|
||
|
return name
|
||
|
|
||
|
def marshall(self, name, value, add_child=True, add_comments=False,
|
||
|
ns=False, add_children_ns=True):
|
||
|
"""Analyze python value and add the serialized XML element using tag name"""
|
||
|
# Change node name to that used by a client
|
||
|
name = self._update_ns(name)
|
||
|
|
||
|
if isinstance(value, dict): # serialize dict (<key>value</key>)
|
||
|
# for the first parent node, use the document target namespace
|
||
|
# (ns==True) or use the namespace string uri if passed (elements)
|
||
|
child = add_child and self.add_child(name, ns=ns) or self
|
||
|
for k, v in value.items():
|
||
|
if not add_children_ns:
|
||
|
ns = False
|
||
|
elif hasattr(value, 'namespaces'):
|
||
|
# for children, use the wsdl element target namespace:
|
||
|
ns = value.namespaces.get(k)
|
||
|
else:
|
||
|
# simple type
|
||
|
ns = None
|
||
|
child.marshall(k, v, add_comments=add_comments, ns=ns)
|
||
|
elif isinstance(value, tuple): # serialize tuple (<key>value</key>)
|
||
|
child = add_child and self.add_child(name, ns=ns) or self
|
||
|
if not add_children_ns:
|
||
|
ns = False
|
||
|
for k, v in value:
|
||
|
getattr(self, name).marshall(k, v, add_comments=add_comments, ns=ns)
|
||
|
elif isinstance(value, list): # serialize lists name: [value1, value2]
|
||
|
# list elements should be a dict with one element:
|
||
|
# 'vats': [{'vat': {'vat_amount': 50, 'vat_percent': 5}}, {...}]
|
||
|
# or an array of complex types directly (a.k.a. jetty dialect)
|
||
|
# 'vat': [{'vat_amount': 100, 'vat_percent': 21.0}, {...}]
|
||
|
child = self.add_child(name, ns=ns)
|
||
|
if not add_children_ns:
|
||
|
ns = False
|
||
|
if add_comments:
|
||
|
child.add_comment("Repetitive array of:")
|
||
|
for i, t in enumerate(value):
|
||
|
child.marshall(name, t, False, add_comments=add_comments, ns=ns)
|
||
|
# "jetty" arrays: add new base node (if not last) -see abobe-
|
||
|
# TODO: this could be an issue for some arrays of single values
|
||
|
if isinstance(t, dict) and len(t) > 1 and i < len(value) - 1:
|
||
|
child = self.add_child(name, ns=ns)
|
||
|
elif isinstance(value, (xml.dom.minidom.CDATASection, basestring)): # do not convert strings or unicodes
|
||
|
self.add_child(name, value, ns=ns)
|
||
|
elif value is None: # sent a empty tag?
|
||
|
self.add_child(name, ns=ns)
|
||
|
elif value in TYPE_MAP.keys():
|
||
|
# add commented placeholders for simple tipes (for examples/help only)
|
||
|
child = self.add_child(name, ns=ns)
|
||
|
child.add_comment(TYPE_MAP[value])
|
||
|
else: # the rest of object types are converted to string
|
||
|
# get special serialization function (if any)
|
||
|
fn = TYPE_MARSHAL_FN.get(type(value), str)
|
||
|
self.add_child(name, fn(value), ns=ns)
|
||
|
|
||
|
def import_node(self, other):
|
||
|
x = self.__document.importNode(other._element, True) # deep copy
|
||
|
self._element.appendChild(x)
|
||
|
|
||
|
def write_c14n(self, output=None, exclusive=True):
|
||
|
"Generate the canonical version of the XML node"
|
||
|
from . import c14n
|
||
|
xml = c14n.Canonicalize(self._element, output,
|
||
|
unsuppressedPrefixes=[] if exclusive else None)
|
||
|
return xml
|