From 0694462dfff7c6c51e17ddbbb7be9803f1492a4b Mon Sep 17 00:00:00 2001
From: Alexander Schlemmer <a.schlemmer@indiscale.com>
Date: Wed, 21 Aug 2024 11:21:51 +0200
Subject: [PATCH] ENH: added a new structure element for xml attributes

---
 src/caoscrawler/structure_elements.py | 35 +++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/src/caoscrawler/structure_elements.py b/src/caoscrawler/structure_elements.py
index 73c6f79a..00e48833 100644
--- a/src/caoscrawler/structure_elements.py
+++ b/src/caoscrawler/structure_elements.py
@@ -175,9 +175,11 @@ class XMLTagElement(StructureElement):
     Stores elements of an XML tree.
     """
 
-    def __init__(self, name: str, element: lxml.etree.Element):
-        super().__init__(name)
+    def __init__(self, element: lxml.etree.Element,
+                 tree: lxml.etree.ElementTree):
+        super().__init__(tree.getelementpath(element))
         self.tag = element
+        self.tree = tree
 
 
 class XMLTextNode(StructureElement):
@@ -185,7 +187,8 @@ class XMLTextNode(StructureElement):
     Stores text nodes of XML trees.
     """
 
-    def __init__(self, name: str, value: str):
+    def __init__(self, element: lxml.etree.Element,
+                 tree: lxml.etree.ElementTree):
         """
         Initializes this XML text node.
 
@@ -196,5 +199,27 @@ class XMLTextNode(StructureElement):
         - XMLTextNodes just have a text and the name is just for identifying the structure element.
           They can only be matched using the match entry in the XMLTextNodeConverter.
         """
-        super().__init__(name)
-        self.value = value
+        super().__init__(tree.getelementpath(element))
+        self.tag = element
+        self.value = element.text
+
+
+class XMLAttributeNode(StructureElement):
+    """
+    Stores text nodes of XML trees.
+    """
+
+    def __init__(self, element: lxml.etree.Element,
+                 key: str,
+                 tree: lxml.etree.ElementTree):
+        """
+        Initializes this XML attribute node.
+
+        element: The xml tree element containing the attribute.
+        key: The key which identifies the attribute in the list of attributes.
+        tree: The tree containing the element which is used to set the node path.
+        """
+        super().__init__(tree.getelementpath(element))
+        self.value = element.attrib[key]
+        self.key = key
+        self.tag = element
-- 
GitLab