diff --git a/src/caoscrawler/xml_converter.py b/src/caoscrawler/xml_converter.py index fe10c738932fcb15be5bacff728555fc65f72303..908cd9ae072ba20279312474401b58d09a0e8478 100644 --- a/src/caoscrawler/xml_converter.py +++ b/src/caoscrawler/xml_converter.py @@ -97,7 +97,10 @@ class XMLTagConverter(Converter): vardict.update(m_tag.groupdict()) if "match_text" in self.definition: - m_text = re.match(self.definition["match_text"], element.tag.text) + tagtext = element.tag.text + if element.tag.text is None: + tagtext = "" + m_text = re.match(self.definition["match_text"], tagtext, re.DOTALL) if m_text is None: return None vardict.update(m_text.groupdict()) @@ -114,7 +117,6 @@ class XMLTagConverter(Converter): matched_m_attrib = m_attrib m_attrib_value = re.match(attrib_def_value, attr_value) if m_attrib_value is None: - breakpoint() return None matched_m_attrib_value = m_attrib_value # TODO: How to deal with multiple matches?