From 0694462dfff7c6c51e17ddbbb7be9803f1492a4b Mon Sep 17 00:00:00 2001 From: Alexander Schlemmer <a.schlemmer@indiscale.com> Date: Wed, 21 Aug 2024 11:21:51 +0200 Subject: [PATCH] ENH: added a new structure element for xml attributes --- src/caoscrawler/structure_elements.py | 35 +++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/src/caoscrawler/structure_elements.py b/src/caoscrawler/structure_elements.py index 73c6f79a..00e48833 100644 --- a/src/caoscrawler/structure_elements.py +++ b/src/caoscrawler/structure_elements.py @@ -175,9 +175,11 @@ class XMLTagElement(StructureElement): Stores elements of an XML tree. """ - def __init__(self, name: str, element: lxml.etree.Element): - super().__init__(name) + def __init__(self, element: lxml.etree.Element, + tree: lxml.etree.ElementTree): + super().__init__(tree.getelementpath(element)) self.tag = element + self.tree = tree class XMLTextNode(StructureElement): @@ -185,7 +187,8 @@ class XMLTextNode(StructureElement): Stores text nodes of XML trees. """ - def __init__(self, name: str, value: str): + def __init__(self, element: lxml.etree.Element, + tree: lxml.etree.ElementTree): """ Initializes this XML text node. @@ -196,5 +199,27 @@ class XMLTextNode(StructureElement): - XMLTextNodes just have a text and the name is just for identifying the structure element. They can only be matched using the match entry in the XMLTextNodeConverter. """ - super().__init__(name) - self.value = value + super().__init__(tree.getelementpath(element)) + self.tag = element + self.value = element.text + + +class XMLAttributeNode(StructureElement): + """ + Stores text nodes of XML trees. + """ + + def __init__(self, element: lxml.etree.Element, + key: str, + tree: lxml.etree.ElementTree): + """ + Initializes this XML attribute node. + + element: The xml tree element containing the attribute. + key: The key which identifies the attribute in the list of attributes. + tree: The tree containing the element which is used to set the node path. + """ + super().__init__(tree.getelementpath(element)) + self.value = element.attrib[key] + self.key = key + self.tag = element -- GitLab