Skip to content
Snippets Groups Projects
Commit 3e8b13df authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

ENH: redesigned structure elements for the xml conversion

parent 10df18a9
No related branches found
No related tags found
2 merge requests!181Release 0.9.0,!174XML Converter
Pipeline #54413 failed
This commit is part of merge request !174. Comments created here will be created in the context of that merge request.
......@@ -196,7 +196,7 @@ class XMLTextNode(StructureElement):
- XMLTextNodes just have a text and the name is just for identifying the structure element.
They can only be matched using the match entry in the XMLTextNodeConverter.
"""
super().__init__(element.getroottree().getelementpath(element))
super().__init__(element.getroottree().getelementpath(element) + "/text()")
self.tag = element
self.value = element.text
......@@ -207,17 +207,14 @@ class XMLAttributeNode(StructureElement):
"""
def __init__(self, element: lxml.etree.Element,
key: str,
tree: lxml.etree.ElementTree):
key: str):
"""
Initializes this XML attribute node.
element: The xml tree element containing the attribute.
key: The key which identifies the attribute in the list of attributes.
tree: The tree containing the element which is used to set the node path.
"""
super().__init__(element.getroottree().getelementpath(element))
super().__init__(element.getroottree().getelementpath(element) + "@" + key)
self.value = element.attrib[key]
self.key = key
# TODO: use getpath instead of getlementpath?
self.tag = element
......@@ -72,6 +72,28 @@ class XMLFileConverter(SimpleFileConverter):
class XMLTagConverter(Converter):
def create_children(self, generalStore: GeneralStore, element: StructureElement):
"""
Children that are generated by this function are the result of the xpath query given in
the yaml property "xpath". Its default (when not given) is "child::*", so the direct children
of the current xml node.
The xpath expression must be designed in a way that it returns xml tags (and no attributes or texts).
That means, that the axis "attribute::" and the function "text()" must not be used.
The following yaml properties can be used to generate other types of nodes (text nodes and attribute nodes)
as subtree structure elements:
# _*_ marks the default:
attribs_as_children: true # true / _false_
text_as_children: true # true / _false_
tags_as_children: true # _true_ / false
The default is to generate the tags matched by the xpath expression only.
- When text_as_children is set to true, text nodes will be generated that contain the text
contained in the matched tags.
- When attribs_as_children is set to true, attribute nodes will be generated from the attributes
of the matched tags.
"""
if not isinstance(element, XMLTagElement):
raise TypeError("Element must be an instance of XMLTagElement.")
......@@ -89,7 +111,7 @@ class XMLTagConverter(Converter):
el_lst = []
for el in children:
if isinstance(el, str):
el_lst.append(XMLTextNode(el))
raise RuntimeError("Only standard xml nodes are supported as results of xpath queries.")
elif isinstance(el, lxml.etree._Element):
el_lst.append(XMLTagElement(el))
else:
......@@ -174,4 +196,6 @@ class XMLTextNodeConverter(Converter):
if not isinstance(element, XMLTextNode):
raise TypeError("Element must be an instance of XMLTextNode.")
raise NotImplementedError()
return None
......@@ -230,3 +230,54 @@ def test_nested_simple_xml(basic_xmltag_converter, basic_xpath_xmltag_converter)
assert isinstance(children[0], XMLTagElement)
assert children[0].name == "img/testnode"
def test_namespace_xml(converter_registry):
"""
Test for xml conversion including children.
Nodes have namespaces.
"""
xml_text = """
<root xmlns="default-namespace" xmlns:test="alternative-namespace">
<node1>
Bla
</node1>
<node1>
text
<node2 xmlns="sub-namespace">
<node3>
ok
</node3>
</node2>
2
<test:node2>
sep
</test:node2>
3
</node1>
</root>
"""
xpaths = (
"default:node1/text()",
)
for xpath in xpaths:
converter = XMLTagConverter(yaml.safe_load("""
type: XMLTag
match_tag: \\{{default-namespace\\}}root
xpath: "{}"
default_namespace: default
subtree:
Text:
type: XMLTextNode
match: (?P<result>.*)
""".format(xpath)), "TestXMLTagConverter", converter_registry)
tag = XMLTagElement(fromstring(xml_text))
m = converter.match(tag)
assert m is not None
general_store = GeneralStore()
children = converter.create_children(general_store, tag)
assert len(children) == 4
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment