diff --git a/src/caoscrawler/structure_elements.py b/src/caoscrawler/structure_elements.py index 73c6f79aeba1611074d628d228252a66f2fd1a3f..00e48833a6d519ceff9a6ba3e5ba54198d6ffc56 100644 --- a/src/caoscrawler/structure_elements.py +++ b/src/caoscrawler/structure_elements.py @@ -175,9 +175,11 @@ class XMLTagElement(StructureElement): Stores elements of an XML tree. """ - def __init__(self, name: str, element: lxml.etree.Element): - super().__init__(name) + def __init__(self, element: lxml.etree.Element, + tree: lxml.etree.ElementTree): + super().__init__(tree.getelementpath(element)) self.tag = element + self.tree = tree class XMLTextNode(StructureElement): @@ -185,7 +187,8 @@ class XMLTextNode(StructureElement): Stores text nodes of XML trees. """ - def __init__(self, name: str, value: str): + def __init__(self, element: lxml.etree.Element, + tree: lxml.etree.ElementTree): """ Initializes this XML text node. @@ -196,5 +199,27 @@ class XMLTextNode(StructureElement): - XMLTextNodes just have a text and the name is just for identifying the structure element. They can only be matched using the match entry in the XMLTextNodeConverter. """ - super().__init__(name) - self.value = value + super().__init__(tree.getelementpath(element)) + self.tag = element + self.value = element.text + + +class XMLAttributeNode(StructureElement): + """ + Stores text nodes of XML trees. + """ + + def __init__(self, element: lxml.etree.Element, + key: str, + tree: lxml.etree.ElementTree): + """ + Initializes this XML attribute node. + + element: The xml tree element containing the attribute. + key: The key which identifies the attribute in the list of attributes. + tree: The tree containing the element which is used to set the node path. + """ + super().__init__(tree.getelementpath(element)) + self.value = element.attrib[key] + self.key = key + self.tag = element