Skip to content
Snippets Groups Projects
Commit 3e8b13df authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

ENH: redesigned structure elements for the xml conversion

parent 10df18a9
Branches
Tags
2 merge requests!181Release 0.9.0,!174XML Converter
Pipeline #54413 failed
...@@ -196,7 +196,7 @@ class XMLTextNode(StructureElement): ...@@ -196,7 +196,7 @@ class XMLTextNode(StructureElement):
- XMLTextNodes just have a text and the name is just for identifying the structure element. - XMLTextNodes just have a text and the name is just for identifying the structure element.
They can only be matched using the match entry in the XMLTextNodeConverter. They can only be matched using the match entry in the XMLTextNodeConverter.
""" """
super().__init__(element.getroottree().getelementpath(element)) super().__init__(element.getroottree().getelementpath(element) + "/text()")
self.tag = element self.tag = element
self.value = element.text self.value = element.text
...@@ -207,17 +207,14 @@ class XMLAttributeNode(StructureElement): ...@@ -207,17 +207,14 @@ class XMLAttributeNode(StructureElement):
""" """
def __init__(self, element: lxml.etree.Element, def __init__(self, element: lxml.etree.Element,
key: str, key: str):
tree: lxml.etree.ElementTree):
""" """
Initializes this XML attribute node. Initializes this XML attribute node.
element: The xml tree element containing the attribute. element: The xml tree element containing the attribute.
key: The key which identifies the attribute in the list of attributes. key: The key which identifies the attribute in the list of attributes.
tree: The tree containing the element which is used to set the node path.
""" """
super().__init__(element.getroottree().getelementpath(element)) super().__init__(element.getroottree().getelementpath(element) + "@" + key)
self.value = element.attrib[key] self.value = element.attrib[key]
self.key = key self.key = key
# TODO: use getpath instead of getlementpath?
self.tag = element self.tag = element
...@@ -72,6 +72,28 @@ class XMLFileConverter(SimpleFileConverter): ...@@ -72,6 +72,28 @@ class XMLFileConverter(SimpleFileConverter):
class XMLTagConverter(Converter): class XMLTagConverter(Converter):
def create_children(self, generalStore: GeneralStore, element: StructureElement): def create_children(self, generalStore: GeneralStore, element: StructureElement):
"""
Children that are generated by this function are the result of the xpath query given in
the yaml property "xpath". Its default (when not given) is "child::*", so the direct children
of the current xml node.
The xpath expression must be designed in a way that it returns xml tags (and no attributes or texts).
That means, that the axis "attribute::" and the function "text()" must not be used.
The following yaml properties can be used to generate other types of nodes (text nodes and attribute nodes)
as subtree structure elements:
# _*_ marks the default:
attribs_as_children: true # true / _false_
text_as_children: true # true / _false_
tags_as_children: true # _true_ / false
The default is to generate the tags matched by the xpath expression only.
- When text_as_children is set to true, text nodes will be generated that contain the text
contained in the matched tags.
- When attribs_as_children is set to true, attribute nodes will be generated from the attributes
of the matched tags.
"""
if not isinstance(element, XMLTagElement): if not isinstance(element, XMLTagElement):
raise TypeError("Element must be an instance of XMLTagElement.") raise TypeError("Element must be an instance of XMLTagElement.")
...@@ -89,7 +111,7 @@ class XMLTagConverter(Converter): ...@@ -89,7 +111,7 @@ class XMLTagConverter(Converter):
el_lst = [] el_lst = []
for el in children: for el in children:
if isinstance(el, str): if isinstance(el, str):
el_lst.append(XMLTextNode(el)) raise RuntimeError("Only standard xml nodes are supported as results of xpath queries.")
elif isinstance(el, lxml.etree._Element): elif isinstance(el, lxml.etree._Element):
el_lst.append(XMLTagElement(el)) el_lst.append(XMLTagElement(el))
else: else:
...@@ -174,4 +196,6 @@ class XMLTextNodeConverter(Converter): ...@@ -174,4 +196,6 @@ class XMLTextNodeConverter(Converter):
if not isinstance(element, XMLTextNode): if not isinstance(element, XMLTextNode):
raise TypeError("Element must be an instance of XMLTextNode.") raise TypeError("Element must be an instance of XMLTextNode.")
raise NotImplementedError()
return None return None
...@@ -230,3 +230,54 @@ def test_nested_simple_xml(basic_xmltag_converter, basic_xpath_xmltag_converter) ...@@ -230,3 +230,54 @@ def test_nested_simple_xml(basic_xmltag_converter, basic_xpath_xmltag_converter)
assert isinstance(children[0], XMLTagElement) assert isinstance(children[0], XMLTagElement)
assert children[0].name == "img/testnode" assert children[0].name == "img/testnode"
def test_namespace_xml(converter_registry):
"""
Test for xml conversion including children.
Nodes have namespaces.
"""
xml_text = """
<root xmlns="default-namespace" xmlns:test="alternative-namespace">
<node1>
Bla
</node1>
<node1>
text
<node2 xmlns="sub-namespace">
<node3>
ok
</node3>
</node2>
2
<test:node2>
sep
</test:node2>
3
</node1>
</root>
"""
xpaths = (
"default:node1/text()",
)
for xpath in xpaths:
converter = XMLTagConverter(yaml.safe_load("""
type: XMLTag
match_tag: \\{{default-namespace\\}}root
xpath: "{}"
default_namespace: default
subtree:
Text:
type: XMLTextNode
match: (?P<result>.*)
""".format(xpath)), "TestXMLTagConverter", converter_registry)
tag = XMLTagElement(fromstring(xml_text))
m = converter.match(tag)
assert m is not None
general_store = GeneralStore()
children = converter.create_children(general_store, tag)
assert len(children) == 4
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment