From ef95bfbdb6c651e334fe9043d79b8088c061d993 Mon Sep 17 00:00:00 2001 From: Alexander Schlemmer <a.schlemmer@indiscale.com> Date: Thu, 14 Nov 2024 10:01:18 +0100 Subject: [PATCH] FIX(converters): added return values to match_properties method --- src/caoscrawler/converters/converters.py | 10 ++++++++-- src/caoscrawler/converters/rocrate.py | 3 ++- src/caoscrawler/converters/xml_converter.py | 3 ++- unittests/test_rocrate_converter.py | 9 ++++----- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/caoscrawler/converters/converters.py b/src/caoscrawler/converters/converters.py index 40ddde92..1bc70a15 100644 --- a/src/caoscrawler/converters/converters.py +++ b/src/caoscrawler/converters/converters.py @@ -485,6 +485,11 @@ class Converter(object, metaclass=ABCMeta): label: str Default "match_properties". Can be used to change the name of the property in the definition. E.g. the xml converter uses "match_attrib" which makes more sense in the context of xml trees. + + Returns: + -------- + + Returns True when properties match and False otherwise. The vardict dictionary is updated in place. """ if label in self.definition: # This matcher works analogously to the attributes matcher in the XMLConverter @@ -499,7 +504,7 @@ class Converter(object, metaclass=ABCMeta): matched_m_prop = m_prop m_prop_value = re.match(prop_def_value, prop_value) if m_prop_value is None: - return None + return False matched_m_prop_value = m_prop_value # TODO: How to deal with multiple matches? # There are multiple options: @@ -509,11 +514,12 @@ class Converter(object, metaclass=ABCMeta): # Currently the latter option is implemented. # TODO: The ROCrateEntityConverter implements a very similar behavior. if match_counter == 0: - return None + return False elif match_counter > 1: raise RuntimeError("Multiple properties match the same {} entry.".format(label)) vardict.update(matched_m_prop.groupdict()) vardict.update(matched_m_prop_value.groupdict()) + return True def apply_transformers(self, values: GeneralStore, transformer_functions: dict): """ diff --git a/src/caoscrawler/converters/rocrate.py b/src/caoscrawler/converters/rocrate.py index 7fd8378e..8a45af75 100644 --- a/src/caoscrawler/converters/rocrate.py +++ b/src/caoscrawler/converters/rocrate.py @@ -183,7 +183,8 @@ class ROCrateEntityConverter(Converter): return None vardict.update(m_type.groupdict()) - self.match_properties(element.entity.properties(), vardict) + if not self.match_properties(element.entity.properties(), vardict): + return None return vardict diff --git a/src/caoscrawler/converters/xml_converter.py b/src/caoscrawler/converters/xml_converter.py index 472e4051..60d7b494 100644 --- a/src/caoscrawler/converters/xml_converter.py +++ b/src/caoscrawler/converters/xml_converter.py @@ -162,7 +162,8 @@ class XMLTagConverter(Converter): return None vardict.update(m_text.groupdict()) - self.match_properties(element.tag.attrib, vardict, "match_attrib") + if not self.match_properties(element.tag.attrib, vardict, "match_attrib"): + return None return vardict diff --git a/unittests/test_rocrate_converter.py b/unittests/test_rocrate_converter.py index ef59a37c..06ce187e 100644 --- a/unittests/test_rocrate_converter.py +++ b/unittests/test_rocrate_converter.py @@ -32,17 +32,16 @@ import linkahead as db import pytest import rocrate import yaml -from linkahead.high_level_api import convert_to_python_object -from lxml.etree import fromstring -from rocrate.model.entity import Entity -from rocrate.rocrate import ROCrate - from caoscrawler import scanner from caoscrawler.converters import ELNFileConverter, ROCrateEntityConverter from caoscrawler.scanner import load_definition from caoscrawler.stores import GeneralStore from caoscrawler.structure_elements import (DictElement, File, ROCrateEntity, TextElement) +from linkahead.high_level_api import convert_to_python_object +from lxml.etree import fromstring +from rocrate.model.entity import Entity +from rocrate.rocrate import ROCrate UNITTESTDIR = Path(__file__).parent -- GitLab