diff --git a/src/caoscrawler/structure_elements.py b/src/caoscrawler/structure_elements.py index 00e48833a6d519ceff9a6ba3e5ba54198d6ffc56..27dfe21f29edc0f9ac9d4345281a76a5c4b9ea85 100644 --- a/src/caoscrawler/structure_elements.py +++ b/src/caoscrawler/structure_elements.py @@ -175,11 +175,9 @@ class XMLTagElement(StructureElement): Stores elements of an XML tree. """ - def __init__(self, element: lxml.etree.Element, - tree: lxml.etree.ElementTree): - super().__init__(tree.getelementpath(element)) + def __init__(self, element: lxml.etree.Element): + super().__init__(element.getroottree().getelementpath(element)) self.tag = element - self.tree = tree class XMLTextNode(StructureElement): @@ -187,8 +185,7 @@ class XMLTextNode(StructureElement): Stores text nodes of XML trees. """ - def __init__(self, element: lxml.etree.Element, - tree: lxml.etree.ElementTree): + def __init__(self, element: lxml.etree.Element): """ Initializes this XML text node. @@ -199,7 +196,7 @@ class XMLTextNode(StructureElement): - XMLTextNodes just have a text and the name is just for identifying the structure element. They can only be matched using the match entry in the XMLTextNodeConverter. """ - super().__init__(tree.getelementpath(element)) + super().__init__(element.getroottree().getelementpath(element)) self.tag = element self.value = element.text @@ -219,7 +216,8 @@ class XMLAttributeNode(StructureElement): key: The key which identifies the attribute in the list of attributes. tree: The tree containing the element which is used to set the node path. """ - super().__init__(tree.getelementpath(element)) + super().__init__(element.getroottree().getelementpath(element)) self.value = element.attrib[key] self.key = key + # TODO: use getpath instead of getlementpath? self.tag = element