SP/web2py/gluon/contrib/pysimplesoap/simplexml.py
Saturneic 064f602b1a Add.
2018-10-25 23:33:13 +08:00

533 lines
22 KiB
Python

#!/usr/bin/python
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation; either version 3, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
"""Simple XML manipulation"""
from __future__ import unicode_literals
import sys
if sys.version > '3':
basestring = str
unicode = str
import logging
import re
import time
import xml.dom.minidom
from . import __author__, __copyright__, __license__, __version__
# Utility functions used for marshalling, moved aside for readability
from .helpers import TYPE_MAP, TYPE_MARSHAL_FN, TYPE_UNMARSHAL_FN, \
REVERSE_TYPE_MAP, Struct, Date, Decimal
log = logging.getLogger(__name__)
class SimpleXMLElement(object):
"""Simple XML manipulation (simil PHP)"""
def __init__(self, text=None, elements=None, document=None,
namespace=None, prefix=None, namespaces_map={}, jetty=False):
"""
:param namespaces_map: How to map our namespace prefix to that given by the client;
{prefix: received_prefix}
"""
self.__namespaces_map = namespaces_map
_rx = "|".join(namespaces_map.keys()) # {'external': 'ext', 'model': 'mod'} -> 'external|model'
self.__ns_rx = re.compile(r"^(%s):.*$" % _rx) # And now we build an expression ^(external|model):.*$
# to find prefixes in all xml nodes i.e.: <model:code>1</model:code>
# and later change that to <mod:code>1</mod:code>
self.__ns = namespace
self.__prefix = prefix
self.__jetty = jetty # special list support
if text is not None:
try:
self.__document = xml.dom.minidom.parseString(text)
except:
log.error(text)
raise
self.__elements = [self.__document.documentElement]
else:
self.__elements = elements
self.__document = document
def add_child(self, name, text=None, ns=True):
"""Adding a child tag to a node"""
if not ns or self.__ns is False:
##log.debug('adding %s without namespace', name)
element = self.__document.createElement(name)
else:
##log.debug('adding %s ns "%s" %s', name, self.__ns, ns)
if isinstance(ns, basestring):
element = self.__document.createElement(name)
if ns:
element.setAttribute("xmlns", ns)
elif self.__prefix:
element = self.__document.createElementNS(self.__ns, "%s:%s" % (self.__prefix, name))
else:
element = self.__document.createElementNS(self.__ns, name)
# don't append null tags!
if text is not None:
if isinstance(text, xml.dom.minidom.CDATASection):
element.appendChild(self.__document.createCDATASection(text.data))
else:
element.appendChild(self.__document.createTextNode(text))
self._element.appendChild(element)
return SimpleXMLElement(
elements=[element],
document=self.__document,
namespace=self.__ns,
prefix=self.__prefix,
jetty=self.__jetty,
namespaces_map=self.__namespaces_map
)
def __setattr__(self, tag, text):
"""Add text child tag node (short form)"""
if tag.startswith("_"):
object.__setattr__(self, tag, text)
else:
##log.debug('__setattr__(%s, %s)', tag, text)
self.add_child(tag, text)
def __delattr__(self, tag):
"""Remove a child tag (non recursive!)"""
elements = [__element for __element in self._element.childNodes
if __element.nodeType == __element.ELEMENT_NODE]
for element in elements:
self._element.removeChild(element)
def add_comment(self, data):
"""Add an xml comment to this child"""
comment = self.__document.createComment(data)
self._element.appendChild(comment)
def as_xml(self, filename=None, pretty=False):
"""Return the XML representation of the document"""
if not pretty:
return self.__document.toxml('UTF-8')
else:
return self.__document.toprettyxml(encoding='UTF-8')
if sys.version > '3':
def __repr__(self):
"""Return the XML representation of this tag"""
return self._element.toxml()
else:
def __repr__(self):
"""Return the XML representation of this tag"""
# NOTE: do not use self.as_xml('UTF-8') as it returns the whole xml doc
return self._element.toxml('UTF-8')
def get_name(self):
"""Return the tag name of this node"""
return self._element.tagName
def get_local_name(self):
"""Return the tag local name (prefix:name) of this node"""
return self._element.localName
def get_prefix(self):
"""Return the namespace prefix of this node"""
return self._element.prefix
def get_namespace_uri(self, ns):
"""Return the namespace uri for a prefix"""
element = self._element
while element is not None and element.attributes is not None:
try:
return element.attributes['xmlns:%s' % ns].value
except KeyError:
element = element.parentNode
def attributes(self):
"""Return a dict of attributes for this tag"""
#TODO: use slice syntax [:]?
return self._element.attributes
def __getitem__(self, item):
"""Return xml tag attribute value or a slice of attributes (iter)"""
##log.debug('__getitem__(%s)', item)
if isinstance(item, basestring):
if self._element.hasAttribute(item):
return self._element.attributes[item].value
elif isinstance(item, slice):
# return a list with name:values
return list(self._element.attributes.items())[item]
else:
# return element by index (position)
element = self.__elements[item]
return SimpleXMLElement(
elements=[element],
document=self.__document,
namespace=self.__ns,
prefix=self.__prefix,
jetty=self.__jetty,
namespaces_map=self.__namespaces_map
)
def add_attribute(self, name, value):
"""Set an attribute value from a string"""
self._element.setAttribute(name, value)
def __setitem__(self, item, value):
"""Set an attribute value"""
if isinstance(item, basestring):
self.add_attribute(item, value)
elif isinstance(item, slice):
# set multiple attributes at once
for k, v in value.items():
self.add_attribute(k, v)
def __delitem__(self, item):
"Remove an attribute"
self._element.removeAttribute(item)
def __call__(self, tag=None, ns=None, children=False, root=False,
error=True, ):
"""Search (even in child nodes) and return a child tag by name"""
try:
if root:
# return entire document
return SimpleXMLElement(
elements=[self.__document.documentElement],
document=self.__document,
namespace=self.__ns,
prefix=self.__prefix,
jetty=self.__jetty,
namespaces_map=self.__namespaces_map
)
if tag is None:
# if no name given, iterate over siblings (same level)
return self.__iter__()
if children:
# future: filter children? by ns?
return self.children()
elements = None
if isinstance(tag, int):
# return tag by index
elements = [self.__elements[tag]]
if ns and not elements:
for ns_uri in isinstance(ns, (tuple, list)) and ns or (ns, ):
##log.debug('searching %s by ns=%s', tag, ns_uri)
elements = self._element.getElementsByTagNameNS(ns_uri, tag)
if elements:
break
if self.__ns and not elements:
##log.debug('searching %s by ns=%s', tag, self.__ns)
elements = self._element.getElementsByTagNameNS(self.__ns, tag)
if not elements:
##log.debug('searching %s', tag)
elements = self._element.getElementsByTagName(tag)
if not elements:
##log.debug(self._element.toxml())
if error:
raise AttributeError("No elements found")
else:
return
return SimpleXMLElement(
elements=elements,
document=self.__document,
namespace=self.__ns,
prefix=self.__prefix,
jetty=self.__jetty,
namespaces_map=self.__namespaces_map)
except AttributeError as e:
raise AttributeError("Tag not found: %s (%s)" % (tag, e))
def __getattr__(self, tag):
"""Shortcut for __call__"""
return self.__call__(tag)
def __iter__(self):
"""Iterate over xml tags at this level"""
try:
for __element in self.__elements:
yield SimpleXMLElement(
elements=[__element],
document=self.__document,
namespace=self.__ns,
prefix=self.__prefix,
jetty=self.__jetty,
namespaces_map=self.__namespaces_map)
except:
raise
def __dir__(self):
"""List xml children tags names"""
return [node.tagName for node
in self._element.childNodes
if node.nodeType != node.TEXT_NODE]
def children(self):
"""Return xml children tags element"""
elements = [__element for __element in self._element.childNodes
if __element.nodeType == __element.ELEMENT_NODE]
if not elements:
return None
#raise IndexError("Tag %s has no children" % self._element.tagName)
return SimpleXMLElement(
elements=elements,
document=self.__document,
namespace=self.__ns,
prefix=self.__prefix,
jetty=self.__jetty,
namespaces_map=self.__namespaces_map
)
def __len__(self):
"""Return element count"""
return len(self.__elements)
def __contains__(self, item):
"""Search for a tag name in this element or child nodes"""
return self._element.getElementsByTagName(item)
def __unicode__(self):
"""Returns the unicode text nodes of the current element"""
rc = ''
for node in self._element.childNodes:
if node.nodeType == node.TEXT_NODE or node.nodeType == node.CDATA_SECTION_NODE:
rc = rc + node.data
return rc
if sys.version > '3':
__str__ = __unicode__
else:
def __str__(self):
return self.__unicode__().encode('utf-8')
def __int__(self):
"""Returns the integer value of the current element"""
return int(self.__str__())
def __float__(self):
"""Returns the float value of the current element"""
try:
return float(self.__str__())
except:
raise IndexError(self._element.toxml())
_element = property(lambda self: self.__elements[0])
def unmarshall(self, types, strict=True):
#import pdb; pdb.set_trace()
"""Convert to python values the current serialized xml element"""
# types is a dict of {tag name: conversion function}
# strict=False to use default type conversion if not specified
# example: types={'p': {'a': int,'b': int}, 'c': [{'d':str}]}
# expected xml: <p><a>1</a><b>2</b></p><c><d>hola</d><d>chau</d>
# returnde value: {'p': {'a':1,'b':2}, `'c':[{'d':'hola'},{'d':'chau'}]}
d = {}
for node in self():
name = str(node.get_local_name())
ref_name_type = None
# handle multirefs: href="#id0"
if 'href' in node.attributes().keys():
href = node['href'][1:]
for ref_node in self(root=True)("multiRef"):
if ref_node['id'] == href:
node = ref_node
ref_name_type = ref_node['xsi:type'].split(":")[1]
break
try:
if isinstance(types, dict):
fn = types[name]
# custom array only in the response (not defined in the WSDL):
# <results soapenc:arrayType="xsd:string[199]>
if any([k for k,v in node[:] if 'arrayType' in k]) and not isinstance(fn, list):
fn = [fn]
else:
fn = types
except (KeyError, ) as e:
xmlns = node['xmlns'] or node.get_namespace_uri(node.get_prefix())
if 'xsi:type' in node.attributes().keys():
xsd_type = node['xsi:type'].split(":")[1]
try:
# get fn type from SOAP-ENC:arrayType="xsd:string[28]"
if xsd_type == 'Array':
array_type = [k for k,v in node[:] if 'arrayType' in k][0]
xsd_type = node[array_type].split(":")[1]
if "[" in xsd_type:
xsd_type = xsd_type[:xsd_type.index("[")]
fn = [REVERSE_TYPE_MAP[xsd_type]]
else:
fn = REVERSE_TYPE_MAP[xsd_type]
except:
fn = None # ignore multirefs!
elif xmlns == "http://www.w3.org/2001/XMLSchema":
# self-defined schema, return the SimpleXMLElement
# TODO: parse to python types if <s:element ref="s:schema"/>
fn = None
elif None in types:
# <s:any/>, return the SimpleXMLElement
# TODO: check position of None if inside <s:sequence>
fn = None
elif strict:
raise TypeError("Tag: %s invalid (type not found)" % (name,))
else:
# if not strict, use default type conversion
fn = str
if isinstance(fn, list):
# append to existing list (if any) - unnested dict arrays -
value = d.setdefault(name, [])
# If the node has no children then the node itself might
# have multiple occurrences:
children = node.children() or node
# TODO: check if this was really needed (get first child only)
##if len(fn[0]) == 1 and children:
## children = children()
if fn and not isinstance(fn[0], dict):
# simple arrays []
for child in (children or []):
tmp_dict = child.unmarshall(fn[0], strict)
value.extend(tmp_dict.values())
#elif (self.__jetty and len(fn[0]) > 1):
elif (len(fn[0]) > 1):
# Jetty and now all dialects use array style support [{k, v}]
for parent in node:
tmp_dict = {} # unmarshall each value & mix
for child in (node.children() or []):
tmp_dict.update(child.unmarshall(fn[0], strict))
value.append(tmp_dict)
else: # len(fn[0]) == 0
for child in (children or []):
value.append(child.unmarshall(fn[0], strict))
elif isinstance(fn, tuple):
value = []
_d = {}
children = node.children()
as_dict = len(fn) == 1 and isinstance(fn[0], dict)
for child in (children and children() or []): # Readability counts
if as_dict:
_d.update(child.unmarshall(fn[0], strict)) # Merging pairs
else:
value.append(child.unmarshall(fn[0], strict))
if as_dict:
value.append(_d)
if name in d:
_tmp = list(d[name])
_tmp.extend(value)
value = tuple(_tmp)
else:
value = tuple(value)
elif isinstance(fn, dict):
##if ref_name_type is not None:
## fn = fn[ref_name_type]
children = node.children()
value = children and children.unmarshall(fn, strict)
else:
if fn is None: # xsd:anyType not unmarshalled
value = node
elif unicode(node) or (fn == str and unicode(node) != ''):
try:
# get special deserialization function (if any)
fn = TYPE_UNMARSHAL_FN.get(fn, fn)
if fn == str:
# always return an unicode object:
# (avoid encoding errors in py<3!)
value = unicode(node)
else:
value = fn(unicode(node))
except (ValueError, TypeError) as e:
raise ValueError("Tag: %s: %s" % (name, e))
else:
value = None
d[name] = value
return d
def _update_ns(self, name):
"""Replace the defined namespace alias with tohse used by the client."""
pref = self.__ns_rx.search(name)
if pref:
pref = pref.groups()[0]
try:
name = name.replace(pref, self.__namespaces_map[pref])
except KeyError:
log.warning('Unknown namespace alias %s' % name)
return name
def marshall(self, name, value, add_child=True, add_comments=False,
ns=False, add_children_ns=True):
"""Analyze python value and add the serialized XML element using tag name"""
# Change node name to that used by a client
name = self._update_ns(name)
if isinstance(value, dict): # serialize dict (<key>value</key>)
# for the first parent node, use the document target namespace
# (ns==True) or use the namespace string uri if passed (elements)
child = add_child and self.add_child(name, ns=ns) or self
for k, v in value.items():
if not add_children_ns:
ns = False
elif hasattr(value, 'namespaces'):
# for children, use the wsdl element target namespace:
ns = value.namespaces.get(k)
else:
# simple type
ns = None
child.marshall(k, v, add_comments=add_comments, ns=ns)
elif isinstance(value, tuple): # serialize tuple (<key>value</key>)
child = add_child and self.add_child(name, ns=ns) or self
if not add_children_ns:
ns = False
for k, v in value:
getattr(self, name).marshall(k, v, add_comments=add_comments, ns=ns)
elif isinstance(value, list): # serialize lists name: [value1, value2]
# list elements should be a dict with one element:
# 'vats': [{'vat': {'vat_amount': 50, 'vat_percent': 5}}, {...}]
# or an array of complex types directly (a.k.a. jetty dialect)
# 'vat': [{'vat_amount': 100, 'vat_percent': 21.0}, {...}]
child = self.add_child(name, ns=ns)
if not add_children_ns:
ns = False
if add_comments:
child.add_comment("Repetitive array of:")
for i, t in enumerate(value):
child.marshall(name, t, False, add_comments=add_comments, ns=ns)
# "jetty" arrays: add new base node (if not last) -see abobe-
# TODO: this could be an issue for some arrays of single values
if isinstance(t, dict) and len(t) > 1 and i < len(value) - 1:
child = self.add_child(name, ns=ns)
elif isinstance(value, (xml.dom.minidom.CDATASection, basestring)): # do not convert strings or unicodes
self.add_child(name, value, ns=ns)
elif value is None: # sent a empty tag?
self.add_child(name, ns=ns)
elif value in TYPE_MAP.keys():
# add commented placeholders for simple tipes (for examples/help only)
child = self.add_child(name, ns=ns)
child.add_comment(TYPE_MAP[value])
else: # the rest of object types are converted to string
# get special serialization function (if any)
fn = TYPE_MARSHAL_FN.get(type(value), str)
self.add_child(name, fn(value), ns=ns)
def import_node(self, other):
x = self.__document.importNode(other._element, True) # deep copy
self._element.appendChild(x)
def write_c14n(self, output=None, exclusive=True):
"Generate the canonical version of the XML node"
from . import c14n
xml = c14n.Canonicalize(self._element, output,
unsuppressedPrefixes=[] if exclusive else None)
return xml