From 8619b7b9242791032d3bcc7f353a2790b9bc5d79 Mon Sep 17 00:00:00 2001 From: Alexander Schlemmer <a.schlemmer@indiscale.com> Date: Tue, 20 Aug 2024 17:54:31 +0200 Subject: [PATCH] ENH: improved handling of non-existing text nodes --- src/caoscrawler/xml_converter.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/caoscrawler/xml_converter.py b/src/caoscrawler/xml_converter.py index fe10c738..908cd9ae 100644 --- a/src/caoscrawler/xml_converter.py +++ b/src/caoscrawler/xml_converter.py @@ -97,7 +97,10 @@ class XMLTagConverter(Converter): vardict.update(m_tag.groupdict()) if "match_text" in self.definition: - m_text = re.match(self.definition["match_text"], element.tag.text) + tagtext = element.tag.text + if element.tag.text is None: + tagtext = "" + m_text = re.match(self.definition["match_text"], tagtext, re.DOTALL) if m_text is None: return None vardict.update(m_text.groupdict()) @@ -114,7 +117,6 @@ class XMLTagConverter(Converter): matched_m_attrib = m_attrib m_attrib_value = re.match(attrib_def_value, attr_value) if m_attrib_value is None: - breakpoint() return None matched_m_attrib_value = m_attrib_value # TODO: How to deal with multiple matches? -- GitLab