diff --git a/src/caoscrawler/xml_converter.py b/src/caoscrawler/xml_converter.py index e4b8cf4c61ff7258ed0b1f161453fba8d96db4a1..75116ef0c06a275490a46166356842ae2f96fc8f 100644 --- a/src/caoscrawler/xml_converter.py +++ b/src/caoscrawler/xml_converter.py @@ -75,27 +75,18 @@ class XMLTagConverter(Converter): if not isinstance(element, XMLTagElement): raise TypeError("Element must be an instance of XMLTagElement.") - # The options text_as_children, attribs_as_children and - # tags_as_children are currently not implemented. - + nsmap = self.definition["nsmap"] + xpath = self.definition.get("xpath", "child::*") + children = element.tag.xpath(xpath, namespaces=nsmap) el_lst = [] - if "xpath" in self.definition: - # in this case it does not make sense to use the other "children"-options: - # for opt in ("text_", "attribs_", "tags_"): - # if opt + "_as_children" in self.definition: - # raise RuntimeError("The option {} cannot be used when using" - # " xpath".format(opt)) - # if xpath is present, use the xpath to generate children - children = element.xpath(self.definition["xpath"]) - else: - # use the direct children of the node instead - children = list(element) - # if "text_as_children" in self.definition and self.definition["text_as_children"] == True: - # el_lst.append(XMLTextNode(element, element.tree)) - # if "attribs_as_children" in self.definition and self.definition["attribs_as_children"] == True: - # # el_lst.append(XMLTextNode(element, element.tree)) for el in children: - el_lst.append(XMLTagElement(el, element.tree)) + if isinstance(el, str): + el_lst.append(XMLTextNode(el)) + elif isinstance(el, lxml.etree._Element): + el_lst.append(XMLTagElement(el)) + else: + raise RuntimeError("Unsupported child type.") + return el_lst def typecheck(self, element: StructureElement): """