diff --git a/src/caoscrawler/converters/converters.py b/src/caoscrawler/converters/converters.py index 40ddde92823e7ecbf50af68dc288c632e2ff9822..1bc70a1573483e5397873652cc546574ec1d73b6 100644 --- a/src/caoscrawler/converters/converters.py +++ b/src/caoscrawler/converters/converters.py @@ -485,6 +485,11 @@ class Converter(object, metaclass=ABCMeta): label: str Default "match_properties". Can be used to change the name of the property in the definition. E.g. the xml converter uses "match_attrib" which makes more sense in the context of xml trees. + + Returns: + -------- + + Returns True when properties match and False otherwise. The vardict dictionary is updated in place. """ if label in self.definition: # This matcher works analogously to the attributes matcher in the XMLConverter @@ -499,7 +504,7 @@ class Converter(object, metaclass=ABCMeta): matched_m_prop = m_prop m_prop_value = re.match(prop_def_value, prop_value) if m_prop_value is None: - return None + return False matched_m_prop_value = m_prop_value # TODO: How to deal with multiple matches? # There are multiple options: @@ -509,11 +514,12 @@ class Converter(object, metaclass=ABCMeta): # Currently the latter option is implemented. # TODO: The ROCrateEntityConverter implements a very similar behavior. if match_counter == 0: - return None + return False elif match_counter > 1: raise RuntimeError("Multiple properties match the same {} entry.".format(label)) vardict.update(matched_m_prop.groupdict()) vardict.update(matched_m_prop_value.groupdict()) + return True def apply_transformers(self, values: GeneralStore, transformer_functions: dict): """ diff --git a/src/caoscrawler/converters/rocrate.py b/src/caoscrawler/converters/rocrate.py index 7fd8378e2efbd48ed4ad77dd371a8a69d30c9fc2..8a45af753312a2bf29c1ddb9e6bcb15458c3ebde 100644 --- a/src/caoscrawler/converters/rocrate.py +++ b/src/caoscrawler/converters/rocrate.py @@ -183,7 +183,8 @@ class ROCrateEntityConverter(Converter): return None vardict.update(m_type.groupdict()) - self.match_properties(element.entity.properties(), vardict) + if not self.match_properties(element.entity.properties(), vardict): + return None return vardict diff --git a/src/caoscrawler/converters/xml_converter.py b/src/caoscrawler/converters/xml_converter.py index 472e40518d8986f8264c0775140e0da516b8d7ec..60d7b49431fb011a06b7105a16471b0b3c7b2268 100644 --- a/src/caoscrawler/converters/xml_converter.py +++ b/src/caoscrawler/converters/xml_converter.py @@ -162,7 +162,8 @@ class XMLTagConverter(Converter): return None vardict.update(m_text.groupdict()) - self.match_properties(element.tag.attrib, vardict, "match_attrib") + if not self.match_properties(element.tag.attrib, vardict, "match_attrib"): + return None return vardict diff --git a/unittests/test_rocrate_converter.py b/unittests/test_rocrate_converter.py index ef59a37c7a9ca91f85d3a62b4f5b6f5c12559575..06ce187eae1ec4936ce3aff3f2799c15be428b6c 100644 --- a/unittests/test_rocrate_converter.py +++ b/unittests/test_rocrate_converter.py @@ -32,17 +32,16 @@ import linkahead as db import pytest import rocrate import yaml -from linkahead.high_level_api import convert_to_python_object -from lxml.etree import fromstring -from rocrate.model.entity import Entity -from rocrate.rocrate import ROCrate - from caoscrawler import scanner from caoscrawler.converters import ELNFileConverter, ROCrateEntityConverter from caoscrawler.scanner import load_definition from caoscrawler.stores import GeneralStore from caoscrawler.structure_elements import (DictElement, File, ROCrateEntity, TextElement) +from linkahead.high_level_api import convert_to_python_object +from lxml.etree import fromstring +from rocrate.model.entity import Entity +from rocrate.rocrate import ROCrate UNITTESTDIR = Path(__file__).parent