diff --git a/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml b/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml index eaf2690ae130cb61c8a74452e3e4e1d4fd06846a..7deebf6e9dc861f1debc2b266299d78965e3a733 100644 --- a/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml +++ b/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml @@ -33,28 +33,28 @@ Data: validate: schema/dataspace.schema.json subtree: dataspace_id_element: - type: DictIntegerElement + type: IntegerElement match_name: "dataspace_id" match_value: "(?P<id>[0-9]+)" records: Dataspace: dataspace_id: $id archived_element: - type: DictBooleanElement + type: BooleanElement match_name: "archived" match_value: "(?P<archived>.*)" records: Dataspace: archived: $archived url_element: - type: DictTextElement + type: TextElement match_name: "url" match_value: "(?P<url>.*)" records: Dataspace: url: $url coordinator_element: - type: DictDictElement + type: DictElement match_name: "coordinator" records: Person: @@ -64,70 +64,70 @@ Data: Person: $Person subtree: &person_subtree full_name_element: - type: DictTextElement + type: TextElement match_name: "full_name" match_value: "(?P<full_name>.*)" records: Person: full_name: $full_name full_name_nonlatin_element: - type: DictTextElement + type: TextElement match_name: "full_name_nonlatin" match_value: "(?P<full_name_nonlatin>.*)" records: Person: full_name_nonlatin: $full_name_nonlatin family_name_element: - type: DictTextElement + type: TextElement match_name: "family_name" match_value: "(?P<family_name>.*)" records: Person: family_name: $family_name given_name_element: - type: DictTextElement + type: TextElement match_name: "given_name" match_value: "(?P<given_name>.*)" records: Person: given_name: $given_name email_element: - type: DictTextElement + type: TextElement match_name: "email" match_value: "(?P<email>.*)" records: Person: email: $email affiliation_element: - type: DictTextElement + type: TextElement match_name: "affiliation" match_value: "(?P<affiliation>.*)" records: Person: affiliation: $affiliation ORCID_element: - type: DictTextElement + type: TextElement match_name: "ORCID" match_value: "(?P<ORCID>.*)" records: Person: ORCID: $ORCID start_date_element: - type: DictTextElement + type: TextElement match_name: "start_date" match_value: "(?P<start_date>.*)" records: Dataspace: start_date: $start_date end_date_element: - type: DictTextElement + type: TextElement match_name: "end_date" match_value: "(?P<end_date>.*)" records: Dataspace: end_date: $end_date comment: - type: DictTextElement + type: TextElement match_name: "comment" match_value: "(?P<comment>.*)" records: @@ -152,18 +152,18 @@ Data: validate: schema/dataset.schema.json subtree: title_element: - type: DictTextElement + type: TextElement match_name: "title" match_value: "(?P<title>.*)" records: Dataset: title: $title authors_element: - type: DictListElement + type: ListElement match_name: "authors" subtree: author_element: - type: Dict + type: DictElement records: Person: parents: @@ -172,21 +172,21 @@ Data: authors: +$Person subtree: *person_subtree abstract_element: - type: DictTextElement + type: TextElement match_name: "abstract" match_value: "(?P<abstract>.*)" records: Dataset: abstract: $abstract comment_element: - type: DictTextElement + type: TextElement match_name: "comment" match_value: "(?P<comment>.*)" records: Dataset: comment: $comment license_element: - type: DictTextElement + type: TextElement match_name: "license" match_value: "(?P<license_name>.*)" records: @@ -199,14 +199,14 @@ Data: Dataset: license: $license dataset_doi_element: - type: DictTextElement + type: TextElement match_name: "dataset_doi" match_value: "(?P<dataset_doi>.*)" records: Dataset: dataset_doi: $dataset_doi related_to_dois_element: - type: DictListElement + type: ListElement match_name: "related_to_dois" subtree: related_to_doi_element: @@ -216,14 +216,14 @@ Data: Dataset: related_to_dois: +$related_to_doi Keywords_element: - type: DictListElement + type: ListElement match_name: "Keyword" Events_element: - type: DictListElement + type: ListElement match_name: "Event" subtree: Event_element: - type: Dict + type: DictElement records: Event: parents: @@ -232,84 +232,84 @@ Data: Event: +$Event subtree: label_element: - type: DictTextElement + type: TextElement match_name: "label" match_value: "(?P<label>.*)" records: Event: label: $label comment_element: - type: DictTextElement + type: TextElement match_name: "comment" match_value: "(?P<comment>.*)" records: Event: comment: $comment start_datetime_element: - type: DictTextElement + type: TextElement match_name: start_datetime match_value: "(?P<start_datetime>.*)" records: Event: start_datetime: $start_datetime end_datetime_element: - type: DictTextElement + type: TextElement match_name: end_datetime match_value: "(?P<end_datetime>.*)" records: Event: end_datetime: $end_datetime longitude_element: - type: DictFloatElement + type: FloatElement match_name: "longitude" match_value: "(?P<longitude>.*)" records: Event: longitude: $longitude latitude_element: - type: DictFloatElement + type: FloatElement match_name: "latitude" match_value: "(?P<latitude>.*)" records: Event: latitude: $latitude elevation_element: - type: DictFloatElement + type: FloatElement match_name: "elevation" match_value: "(?P<elevation>.*)" records: Event: elevation: $elevation location_element: - type: DictTextElement + type: TextElement match_name: location match_value: "(?P<location>.*)" records: Event: location: $location igsn_element: - type: DictTextElement + type: TextElement match_name: igsn match_value: "(?P<igsn>.*)" records: Event: igsn: $igsn events_in_data_element: - type: DictBooleanElement + type: BooleanElement match_name: "events_in_data" match_value: "(?P<events_in_data>.*)" records: Dataset: events_in_data: $events_in_data geojson_element: - type: DictTextElement + type: TextElement match_name: "geojson" match_value: "(?P<geojson>.*)" records: Dataset: geojson: $geojson project_element: - type: DictDictElement + type: DictElement match_name: "project" records: Project: @@ -319,28 +319,28 @@ Data: Project: $Project subtree: name_element: - type: DictTextElement + type: TextElement match_name: "name" match_value: "(?P<name>.*)" records: Project: name: $name full_name_element: - type: DictTextElement + type: TextElement match_name: "full_name" match_value: "(?P<full_name>.*)" records: Project: full_name: $full_name project_id_element: - type: DictTextElement + type: TextElement match_name: "project_id" match_value: "(?P<project_id>.*)" records: Project: project_id: $project_id project_type_element: - type: DictTextElement + type: TextElement match_name: "project_type" match_value: "(?P<project_type_name>.*)" records: @@ -349,39 +349,39 @@ Data: Project: project_type: $project_type institute_element: - type: DictTextElement + type: TextElement match_name: "institute" match_value: "(?P<institute>.*)" records: Project: institute: $institute start_date_element: - type: DictTextElement + type: TextElement match_name: "start_date" match_value: "(?P<start_date>.*)" records: Project: start_date: $start_date end_date_element: - type: DictTextElement + type: TextElement match_name: "end_date" match_value: "(?P<end_date>.*)" records: Project: end_date: $end_date url_element: - type: DictTextElement + type: TextElement match_name: "url" match_value: "(?P<url>.*)" records: Project: url: $url coordinators_element: - type: DictListElement + type: ListElement match_name: "coordinators" subtree: coordinator_element: - type: Dict + type: DictElement records: Person: parents: @@ -390,7 +390,7 @@ Data: coordinators: +$Person subtree: *person_subtree campaign_element: - type: DictDictElement + type: DictElement match_name: "campaign" records: Campaign: @@ -400,39 +400,39 @@ Data: Campaign: $Campaign subtree: label_element: - type: DictTextElement + type: TextElement match_name: "label" match_value: "(?P<label>.*)" records: Campaign: label: $label optional_label_element: - type: DictTextElement + type: TextElement match_name: "optional_label" match_value: "(?P<optional_label>.*)" records: Campaign: optional_label: $optional_label start_date_element: - type: DictTextElement + type: TextElement match_name: "start_date" match_value: "(?P<start_date>.*)" records: Campaign: start_date: $start_date end_date_element: - type: DictTextElement + type: TextElement match_name: "end_date" match_value: "(?P<end_date>.*)" records: Campaign: end_date: $end_date responsible_scientists_element: - type: DictListElement + type: ListElement match_name: "responsible_scientists" subtree: responsible_scientist_element: - type: Dict + type: DictElement records: Person: parents: @@ -441,11 +441,11 @@ Data: responsible_scientists: +$Person subtree: *person_subtree Methods_element: - type: DictListElement + type: ListElement match_name: "Method" subtree: Method_element: - type: Dict + type: DictElement records: Method: parents: @@ -454,32 +454,32 @@ Data: Method: +$Method subtree: method_name_element: - type: DictTextElement + type: TextElement match_name: "method_name" match_value: "(?P<method_name>.*)" records: Method: name: $method_name abbreviation_element: - type: DictTextElement + type: TextElement match_name: "abbreviation" match_value: "(?P<abbreviation>.*)" records: Method: abbreviation: $abbreviation url_element: - type: DictTextElement + type: TextElement match_name: "url" match_value: "(?P<url>.*)" records: Method: url: $url Taxa_element: - type: DictListElement + type: ListElement match_name: "Taxon" subtree: Taxon_element: - type: Dict + type: DictElement records: Taxon: parents: @@ -488,28 +488,28 @@ Data: Taxon: +$Taxon subtree: taxon_name_element: - type: DictTextElement + type: TextElement match_name: "taxon_name" match_value: "(?P<taxon_name>.*)" records: Taxon: name: $taxon_name archived_element: - type: DictBooleanElement + type: BooleanElement match_name: "archived" match_value: "(P<archived>.*)" records: Dataset: archived: $archived publication_date_element: - type: DictTextElement + type: TextElement match_name: "publication_date" match_value: "(P<publication_date>.*)" records: Dataset: publication_date: $publication_date max_files_element: - type: DictIntegerElement + type: IntegerElement match_name: "max_files" match_value: "(P<max_files>.*)" records: diff --git a/integrationtests/test_issues.py b/integrationtests/test_issues.py index 0f8934137e646677243a851f8c525d90375fb66d..527b4c0cf67f483d5b61972a0104ff4fb673402d 100644 --- a/integrationtests/test_issues.py +++ b/integrationtests/test_issues.py @@ -22,7 +22,7 @@ import caosdb as db from caoscrawler.crawl import Crawler from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter -from caoscrawler.structure_elements import Dict +from caoscrawler.structure_elements import DictElement from caosdb.utils.register_tests import clear_database, set_test_key set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2") @@ -51,14 +51,14 @@ def test_issue_23(clear_database): # identifying_prop and prop_b, but not prop_a ... crawler_definition = { "DictTest": { - "type": "Dict", + "type": "DictElement", "match": "(.*)", "records": { "TestType": {} }, "subtree": { "identifying_element": { - "type": "DictTextElement", + "type": "TextElement", "match_name": "ident", "match_value": "(?P<ident_value>.*)", "records": { @@ -68,7 +68,7 @@ def test_issue_23(clear_database): } }, "other_element": { - "type": "DictTextElement", + "type": "TextElement", "match_name": "prop_b", "match_value": "(?P<other_value>.*)", "records": { @@ -96,7 +96,7 @@ def test_issue_23(clear_database): } records = crawler.start_crawling( - Dict("TestDict", test_dict), crawler_definition, converter_registry) + DictElement("TestDict", test_dict), crawler_definition, converter_registry) assert len(records) == 1 rec_crawled = records[0] diff --git a/integrationtests/test_realworld_example.py b/integrationtests/test_realworld_example.py index 48729f720e5b11eb5ad9722653aea06756cb0ae8..4158ed22278ef5c871a22d45885e58fbfa84ea3b 100644 --- a/integrationtests/test_realworld_example.py +++ b/integrationtests/test_realworld_example.py @@ -31,9 +31,8 @@ import os import caosdb as db from caoscrawler.crawl import Crawler, crawler_main -from caoscrawler.converters import JSONFileConverter, DictConverter from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter -from caoscrawler.structure_elements import File, JSONFile, Directory +from caoscrawler.structure_elements import Directory import pytest from caosadvancedtools.models.parser import parse_model_from_json_schema, parse_model_from_yaml diff --git a/src/caoscrawler/cfood-schema.yml b/src/caoscrawler/cfood-schema.yml index d7b5abfd1ac6c381b50bd4ce61015f1b8602b408..5e724c83695e098ce980e1aa8e81c65ae8525e19 100644 --- a/src/caoscrawler/cfood-schema.yml +++ b/src/caoscrawler/cfood-schema.yml @@ -16,10 +16,15 @@ cfood: - YamlFileCaosDBRecord - MarkdownFile - DictListElement + - ListElement - DictDictElement + - DictElement - DictFloatElement + - FloatElement - DictIntegerElement + - IntegerElement - DictBooleanElement + - BooleanElement - Definitions - Dict - JSONFile diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py index def7ca1243156b2d25a1f8db387fef25e1cc859c..88624c740939f01a9fdd86277b5447d693038bf4 100644 --- a/src/caoscrawler/converters.py +++ b/src/caoscrawler/converters.py @@ -31,11 +31,10 @@ import json import warnings from .utils import has_parent from .stores import GeneralStore, RecordStore -from .structure_elements import (StructureElement, Directory, File, Dict, JSONFile, - DictIntegerElement, DictBooleanElement, - DictFloatElement, DictDictElement, - TextElement, DictTextElement, DictElement, DictListElement) -from typing import Dict as Dict_t, List, Optional, Tuple, Union +from .structure_elements import (StructureElement, Directory, File, DictElement, JSONFile, + IntegerElement, BooleanElement, FloatElement, + TextElement, TextElement, ListElement) +from typing import List, Optional, Tuple, Union from abc import ABCMeta, abstractmethod from string import Template import yaml_header_tools @@ -264,14 +263,12 @@ class Converter(object, metaclass=ABCMeta): Converters treat StructureElements contained in the hierarchical sturcture. """ - def __init__(self, definition: dict, - name: str, - converter_registry: dict): + def __init__(self, definition: dict, name: str, converter_registry: dict): self.definition = definition self.name = name # Used to store usage information for debugging: - self.metadata: Dict_t[str, set[str]] = { + self.metadata: dict[str, set[str]] = { "usage": set() } @@ -284,9 +281,7 @@ class Converter(object, metaclass=ABCMeta): converter_definition, converter_name, converter_registry)) @staticmethod - def converter_factory(definition: dict, - name: str, - converter_registry: dict): + def converter_factory(definition: dict, name: str, converter_registry: dict): """creates a Converter instance of the appropriate class. The `type` key in the `definition` defines the Converter class which is being used. @@ -363,7 +358,7 @@ class Converter(object, metaclass=ABCMeta): filtered_children = FILTER_FUNCTIONS[rule](to_be_filtered) - return filtered_children+unmatched_children + return filtered_children + unmatched_children @abstractmethod def typecheck(self, element: StructureElement): @@ -386,14 +381,6 @@ class Converter(object, metaclass=ABCMeta): class DirectoryConverter(Converter): - - def __init__(self, definition: dict, name: str, - converter_registry: dict): - """ - Initialize a new directory converter. - """ - super().__init__(definition, name, converter_registry) - def create_children(self, generalStore: GeneralStore, element: StructureElement): if not isinstance(element, Directory): @@ -413,6 +400,8 @@ class DirectoryConverter(Converter): def typecheck(self, element: StructureElement): return isinstance(element, Directory) + # TODO basically all converters implement such a match function. Shouldn't this be the one + # of the parent class and subclasses can overwrite if needed? def match(self, element: StructureElement): if not isinstance(element, Directory): raise RuntimeError("Element must be a directory.") @@ -463,16 +452,21 @@ class SimpleFileConverter(Converter): return m.groupdict() +class FileConverter(SimpleFileConverter): + def __init__(self, *args, **kwargs): + warnings.warn(DeprecationWarning( + "This class is depricated. Please use SimpleFileConverter.")) + super().__init__(*args, **kwargs) + + class MarkdownFileConverter(Converter): - def __init__(self, definition: dict, name: str, - converter_registry: dict): - """ - Initialize a new directory converter. - """ - super().__init__(definition, name, converter_registry) + """ + reads the yaml header of markdown files (if a such a header exists). + """ def create_children(self, generalStore: GeneralStore, element: StructureElement): + # TODO: isn't the type check sufficient? if not isinstance(element, File): raise RuntimeError("A markdown file is needed to create children.") @@ -482,9 +476,9 @@ class MarkdownFileConverter(Converter): for name, entry in header.items(): if type(entry) == list: - children.append(DictListElement(name, entry)) + children.append(ListElement(name, entry)) elif type(entry) == str: - children.append(DictTextElement(name, entry)) + children.append(TextElement(name, entry)) else: raise RuntimeError( "Header entry {} has incompatible type.".format(name)) @@ -494,6 +488,7 @@ class MarkdownFileConverter(Converter): return isinstance(element, File) def match(self, element: StructureElement): + # TODO: isn't the type check sufficient? if not isinstance(element, File): raise RuntimeError("Element must be a file.") m = re.match(self.definition["match"], element.name) @@ -508,7 +503,7 @@ class MarkdownFileConverter(Converter): return m.groupdict() -class DictConverter(Converter): +class DictElementConverter(Converter): # TODO use Dict as typecheck? def create_children(self, generalStore: GeneralStore, element: StructureElement): if not self.typecheck(element): @@ -521,17 +516,17 @@ class DictConverter(Converter): for name, value in data.items(): if type(value) == list: - children.append(DictListElement(name, value)) + children.append(ListElement(name, value)) elif type(value) == str: - children.append(DictTextElement(name, value)) + children.append(TextElement(name, value)) elif type(value) == dict: - children.append(DictDictElement(name, value)) + children.append(DictElement(name, value)) elif type(value) == int: - children.append(DictIntegerElement(name, value)) + children.append(IntegerElement(name, value)) elif type(value) == bool: - children.append(DictBooleanElement(name, value)) + children.append(BooleanElement(name, value)) elif type(value) == float: - children.append(DictFloatElement(name, value)) + children.append(FloatElement(name, value)) elif type(value) == type(None): continue else: @@ -543,35 +538,32 @@ class DictConverter(Converter): # TODO use Dict as typecheck? def typecheck(self, element: StructureElement): - return isinstance(element, Dict) + return isinstance(element, DictElement) def match(self, element: StructureElement): """ Allways matches if the element has the right type. """ - if not isinstance(element, Dict): + if not isinstance(element, DictElement): raise RuntimeError("Element must be a DictElement.") - return {} + return match_name_and_value(self.definition, element.name, element.value) -# TODO: difference to SimpleFileConverter? Do we need both? -class FileConverter(Converter): - def typecheck(self, element: StructureElement): - return isinstance(element, File) +class DictConverter(DictElementConverter): + def __init__(self, *args, **kwargs): + warnings.warn(DeprecationWarning( + "This class is depricated. Please use DictConverter.")) + super().__init__(*args, **kwargs) - def match(self, element: StructureElement): - if not self.typecheck(element): - raise RuntimeError("Element must be a file") - m = re.match(self.definition["match"], element.name) - if m is None: - return None - return m.groupdict() - def create_children(self, generalStore: GeneralStore, element: StructureElement): - return [] +class DictDictElementConverter(DictElementConverter): + def __init__(self, *args, **kwargs): + warnings.warn(DeprecationWarning( + "This class is depricated. Please use DictElementConverter.")) + super().__init__(*args, **kwargs) -class JSONFileConverter(DictConverter): +class JSONFileConverter(DictElementConverter): def typecheck(self, element: StructureElement): return isinstance(element, File) @@ -613,7 +605,58 @@ class JSONFileConverter(DictConverter): return self._create_children_from_dict(json_data) -class _AbstractDictElementConverter(Converter): +def match_name_and_value(definition, name, value): + """ + takes match definitions from the definition argument and applies regular expressiion to name + and possibly value + + one of the keys 'match_name' and "match' needs to be available in definition + 'match_value' is optional + + Returns None, if match_name or match lead to no match. Otherwise, returns a dictionary with the + matched groups, possibly including matches from using match_value + """ + if "match_name" in definition: + if "match" in definition: + raise RuntimeError(f"Do not supply both, 'match_name' and 'match'.") + + m1 = re.match(definition["match_name"], name) + if m1 is None: + return None + else: + m1 = m1.groupdict() + elif "match" in definition: + m1 = re.match(definition["match"], name) + if m1 is None: + return None + else: + m1 = m1.groupdict() + else: + m1 = {} + + if "match_value" in definition: + m2 = re.match(definition["match_value"], str(value), re.DOTALL) + if m2 is None: + return None + else: + m2 = m2.groupdict() + else: + m2 = {} + + values = dict() + values.update(m1) + values.update(m2) + return values + + +class _AbstractScalarValueElementConverter(Converter): + """ + A base class for all converters that have a scalar value that can be matched using a regular + expression. + + values must have one of the following type: str, bool, int, float + """ + default_matches = { "accept_text": False, "accept_bool": False, @@ -625,7 +668,12 @@ class _AbstractDictElementConverter(Converter): return [] def typecheck(self, element: StructureElement): - return True + """ + returns whether the type of StructureElement is accepted by this converter instance. + """ + allowed_matches = self._merge_match_definition_with_default(self.default_matches, + self.definition) + return self._typecheck(element, allowed_matches) def match(self, element: StructureElement): """ @@ -639,18 +687,9 @@ class _AbstractDictElementConverter(Converter): if not self.typecheck(element): raise RuntimeError( f"Element has an invalid type: {type(element)}.") - m1 = re.match(self.definition["match_name"], element.name) - if m1 is None: - return None - m2 = re.match(self.definition["match_value"], str(element.value), re.DOTALL) - if m2 is None: - return None - values = dict() - values.update(m1.groupdict()) - values.update(m2.groupdict()) - return values + return match_name_and_value(self.definition, element.name, element.value) - def _typecheck(self, element: StructureElement, allowed_matches: Dict): + def _typecheck(self, element: StructureElement, allowed_matches: dict): """ returns whether the type of StructureElement is accepted. @@ -661,18 +700,18 @@ class _AbstractDictElementConverter(Converter): returns: whether or not the converter allows the type of element """ - if (bool(allowed_matches["accept_text"]) and isinstance(element, DictTextElement)): + if (bool(allowed_matches["accept_text"]) and isinstance(element, TextElement)): return True - elif (bool(allowed_matches["accept_bool"]) and isinstance(element, DictBooleanElement)): + elif (bool(allowed_matches["accept_bool"]) and isinstance(element, BooleanElement)): return True - elif (bool(allowed_matches["accept_int"]) and isinstance(element, DictIntegerElement)): + elif (bool(allowed_matches["accept_int"]) and isinstance(element, IntegerElement)): return True - elif (bool(allowed_matches["accept_float"]) and isinstance(element, DictFloatElement)): + elif (bool(allowed_matches["accept_float"]) and isinstance(element, FloatElement)): return True else: return False - def _merge_match_definition_with_default(self, default: Dict, definition: Dict): + def _merge_match_definition_with_default(self, default: dict, definition: dict): """ returns a dict with the same keys as default dict but with updated values from definition where it has the same keys @@ -686,16 +725,8 @@ class _AbstractDictElementConverter(Converter): result[key] = default[key] return result - def typecheck(self, element: StructureElement): - """ - returns whether the type of StructureElement is accepted by this converter instance. - """ - allowed_matches = self._merge_match_definition_with_default(self.default_matches, - self.definition) - return self._typecheck(element, allowed_matches) - -class DictBooleanElementConverter(_AbstractDictElementConverter): +class BooleanElementConverter(_AbstractScalarValueElementConverter): default_matches = { "accept_text": False, "accept_bool": True, @@ -704,7 +735,14 @@ class DictBooleanElementConverter(_AbstractDictElementConverter): } -class DictFloatElementConverter(_AbstractDictElementConverter): +class DictBooleanElementConverter(BooleanElementConverter): + def __init__(self, *args, **kwargs): + warnings.warn(DeprecationWarning( + "This class is depricated. Please use BooleanElementConverter.")) + super().__init__(*args, **kwargs) + + +class FloatElementConverter(_AbstractScalarValueElementConverter): default_matches = { "accept_text": False, "accept_bool": False, @@ -713,7 +751,14 @@ class DictFloatElementConverter(_AbstractDictElementConverter): } -class DictTextElementConverter(_AbstractDictElementConverter): +class DictFloatElementConverter(FloatElementConverter): + def __init__(self, *args, **kwargs): + warnings.warn(DeprecationWarning( + "This class is depricated. Please use FloatElementConverter.")) + super().__init__(*args, **kwargs) + + +class TextElementConverter(_AbstractScalarValueElementConverter): default_matches = { "accept_text": True, "accept_bool": True, @@ -722,7 +767,14 @@ class DictTextElementConverter(_AbstractDictElementConverter): } -class DictIntegerElementConverter(_AbstractDictElementConverter): +class DictTextElementConverter(TextElementConverter): + def __init__(self, *args, **kwargs): + warnings.warn(DeprecationWarning( + "This class is depricated. Please use TextElementConverter.")) + super().__init__(*args, **kwargs) + + +class IntegerElementConverter(_AbstractScalarValueElementConverter): default_matches = { "accept_text": False, "accept_bool": False, @@ -731,10 +783,17 @@ class DictIntegerElementConverter(_AbstractDictElementConverter): } -class DictListElementConverter(Converter): +class DictIntegerElementConverter(IntegerElementConverter): + def __init__(self, *args, **kwargs): + warnings.warn(DeprecationWarning( + "This class is depricated. Please use IntegerElementConverter.")) + super().__init__(*args, **kwargs) + + +class ListElementConverter(Converter): def create_children(self, generalStore: GeneralStore, element: StructureElement): - if not isinstance(element, DictListElement): + if not isinstance(element, ListElement): raise RuntimeError( "This converter can only process DictListElements.") children = [] @@ -743,64 +802,32 @@ class DictListElementConverter(Converter): if isinstance(list_element, str): children.append(TextElement(str(index), list_element)) elif isinstance(list_element, dict): - children.append(Dict(str(index), list_element)) + children.append(DictElement(str(index), list_element)) else: raise NotImplementedError( f"Unkown type {type(list_element)} in list element {list_element}.") return children def typecheck(self, element: StructureElement): - return isinstance(element, DictListElement) - - def match(self, element: StructureElement): - if not isinstance(element, DictListElement): - raise RuntimeError("Element must be a DictListElement.") - m = re.match(self.definition["match_name"], element.name) - if m is None: - return None - if "match" in self.definition: - raise NotImplementedError( - "Match is not implemented for DictListElement.") - return m.groupdict() - - -class DictDictElementConverter(DictConverter): - def create_children(self, generalStore: GeneralStore, element: StructureElement): - if not self.typecheck(element): - raise RuntimeError("A dict is needed to create children") - - return self._create_children_from_dict(element.value) - - def typecheck(self, element: StructureElement): - return isinstance(element, DictDictElement) + return isinstance(element, ListElement) def match(self, element: StructureElement): - if not self.typecheck(element): - raise RuntimeError("Element must be a DictDictElement.") + if not isinstance(element, ListElement): + raise RuntimeError("Element must be a ListElement.") m = re.match(self.definition["match_name"], element.name) if m is None: return None if "match" in self.definition: raise NotImplementedError( - "Match is not implemented for DictDictElement.") + "Match is not implemented for ListElement.") return m.groupdict() -class TextElementConverter(Converter): - def create_children(self, generalStore: GeneralStore, - element: StructureElement): - return [] - - def typecheck(self, element: StructureElement): - return isinstance(element, TextElement) - - def match(self, element: StructureElement): - if not isinstance(element, TextElement): - raise RuntimeError("Element must be a TextElement.") - m = re.match(self.definition["match"], element.value) - if m is None: - return None - return m.groupdict() +class DictListElementConverter(ListElementConverter): + def __init__(self, *args, **kwargs): + warnings.warn(DeprecationWarning( + "This class is depricated. Please use ListElementConverter.")) + super().__init__(*args, **kwargs) class TableConverter(Converter): @@ -808,10 +835,10 @@ class TableConverter(Converter): This converter reads tables in different formats line by line and allows matching the corresponding rows. - The subtree generated by the table converter consists of DictDictElements, each being + The subtree generated by the table converter consists of DictElements, each being a row. The corresponding header elements will become the dictionary keys. - The rows can be matched using a DictDictElementConverter. + The rows can be matched using a DictElementConverter. """ @abstractmethod def get_options(self): @@ -827,7 +854,8 @@ class TableConverter(Converter): if opt_name in self.definition: el = self.definition[opt_name] # The option can often either be a single value or a list of values. - # In the latter case each element of the list will be converted to the defined type. + # In the latter case each element of the list will be converted to the defined + # type. if isinstance(el, list): option_dict[opt_name] = [ opt_conversion(el_el) for el_el in el] @@ -871,7 +899,7 @@ class XLSXTableConverter(TableConverter): child_elements = list() for index, row in table.iterrows(): child_elements.append( - DictDictElement(str(index), row.to_dict())) + DictElement(str(index), row.to_dict())) return child_elements @@ -899,5 +927,5 @@ class CSVTableConverter(TableConverter): child_elements = list() for index, row in table.iterrows(): child_elements.append( - DictDictElement(str(index), row.to_dict())) + DictElement(str(index), row.to_dict())) return child_elements diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index ad77e678a31a9bd950c89019f38ce58a20d9c2e3..0bfe075b6847e52374cc7730f799558648446b47 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -326,7 +326,7 @@ class Crawler(object): "converter": "MarkdownFileConverter", "package": "caoscrawler.converters"}, "File": { - "converter": "FileConverter", + "converter": "SimpleFileConverter", "package": "caoscrawler.converters"}, "JSONFile": { "converter": "JSONFileConverter", @@ -337,30 +337,45 @@ class Crawler(object): "XLSXTableConverter": { "converter": "XLSXTableConverter", "package": "caoscrawler.converters"}, - "Dict": { - "converter": "DictConverter", - "package": "caoscrawler.converters"}, "DictBooleanElement": { - "converter": "DictBooleanElementConverter", + "converter": "BooleanElementConverter", + "package": "caoscrawler.converters"}, + "BooleanElement": { + "converter": "BooleanElementConverter", "package": "caoscrawler.converters"}, "DictFloatElement": { - "converter": "DictFloatElementConverter", + "converter": "FloatElementConverter", + "package": "caoscrawler.converters"}, + "FloatElement": { + "converter": "FloatElementConverter", "package": "caoscrawler.converters"}, "DictTextElement": { - "converter": "DictTextElementConverter", + "converter": "TextElementConverter", + "package": "caoscrawler.converters"}, + "TextElement": { + "converter": "TextElementConverter", "package": "caoscrawler.converters"}, "DictIntegerElement": { - "converter": "DictIntegerElementConverter", + "converter": "IntegerElementConverter", + "package": "caoscrawler.converters"}, + "IntegerElement": { + "converter": "IntegerElementConverter", "package": "caoscrawler.converters"}, "DictListElement": { - "converter": "DictListElementConverter", + "converter": "ListElementConverter", + "package": "caoscrawler.converters"}, + "ListElement": { + "converter": "ListElementConverter", "package": "caoscrawler.converters"}, "DictDictElement": { - "converter": "DictDictElementConverter", + "converter": "DictElementConverter", + "package": "caoscrawler.converters"}, + "DictElement": { + "converter": "DictElementConverter", + "package": "caoscrawler.converters"}, + "Dict": { + "converter": "DictElementConverter", "package": "caoscrawler.converters"}, - "TextElement": { - "converter": "TextElementConverter", - "package": "caoscrawler.converters"} } # More converters from definition file: @@ -720,7 +735,7 @@ class Crawler(object): newrecord = self.get_from_any_cache(record) merge_entities(newrecord, record) Crawler.bend_references_to_new_object( - old=record, new=newrecord, entities=flat+to_be_updated+to_be_inserted) + old=record, new=newrecord, entities=flat + to_be_updated + to_be_inserted) del flat[i] resolved_references = True diff --git a/src/caoscrawler/structure_elements.py b/src/caoscrawler/structure_elements.py index 01996b4ff3e14a9739857e6e03ceca161300b37e..cb5fad211b5e3b1b766ee95fd6f0a31c965d032b 100644 --- a/src/caoscrawler/structure_elements.py +++ b/src/caoscrawler/structure_elements.py @@ -23,7 +23,8 @@ # ** end header # -from typing import Dict +from typing import Dict as tDict +import warnings class StructureElement(object): @@ -31,7 +32,7 @@ class StructureElement(object): def __init__(self, name): # Used to store usage information for debugging: - self.metadata: Dict[str, set[str]] = { + self.metadata: tDict[str, set[str]] = { "usage": set() } @@ -68,48 +69,78 @@ class JSONFile(File): class DictElement(StructureElement): - def __init__(self, name: str, value): + def __init__(self, name: str, value: dict): super().__init__(name) self.value = value -class Dict(StructureElement): - def __init__(self, name: str, value: dict): +class TextElement(StructureElement): + def __init__(self, name: str, value: str): super().__init__(name) self.value = value -class DictTextElement(DictElement): - def __init__(self, name: str, value: str): - super().__init__(name, value) +class DictTextElement(TextElement): + def __init__(self, *args, **kwargs): + warnings.warn(DeprecationWarning("This class is depricated. Please use TextElement.")) + super().__init__(*args, **kwargs) -class DictIntegerElement(DictElement): +class IntegerElement(StructureElement): def __init__(self, name: str, value: int): - super().__init__(name, value) + super().__init__(name) + self.value = value -class DictBooleanElement(DictElement): +class DictIntegerElement(IntegerElement): + def __init__(self, *args, **kwargs): + warnings.warn(DeprecationWarning("This class is depricated. Please use IntegerElement.")) + super().__init__(*args, **kwargs) + + +class BooleanElement(StructureElement): def __init__(self, name: str, value: bool): - super().__init__(name, value) + super().__init__(name) + self.value = value -class DictDictElement(Dict, DictElement): - def __init__(self, name: str, value: dict): - DictElement.__init__(self, name, value) +class DictBooleanElement(BooleanElement): + def __init__(self, *args, **kwargs): + warnings.warn(DeprecationWarning("This class is depricated. Please use BooleanElement.")) + super().__init__(*args, **kwargs) -class DictListElement(DictElement): - def __init__(self, name: str, value: dict): - super().__init__(name, value) +class ListElement(StructureElement): + def __init__(self, name: str, value: list): + super().__init__(name) + self.value = value -class DictFloatElement(DictElement): - def __init__(self, name: str, value: float): - super().__init__(name, value) +class DictListElement(ListElement): + def __init__(self, *args, **kwargs): + warnings.warn(DeprecationWarning("This class is depricated. Please use ListElement.")) + super().__init__(*args, **kwargs) -class TextElement(StructureElement): - def __init__(self, name: str, value: str): +class FloatElement(StructureElement): + def __init__(self, name: str, value: float): super().__init__(name) self.value = value + + +class DictFloatElement(FloatElement): + def __init__(self, *args, **kwargs): + warnings.warn(DeprecationWarning("This class is depricated. Please use FloatElement.")) + super().__init__(*args, **kwargs) + + +class Dict(DictElement): + def __init__(self, *args, **kwargs): + warnings.warn(DeprecationWarning("This class is depricated. Please use DictElement.")) + super().__init__(*args, **kwargs) + + +class DictDictElement(DictElement): + def __init__(self, *args, **kwargs): + warnings.warn(DeprecationWarning("This class is depricated. Please use DictElement.")) + super().__init__(*args, **kwargs) diff --git a/unittests/broken_cfoods/broken1.yml b/unittests/broken_cfoods/broken1.yml index 9fd4c52934c56512ada8ea564ccd540e07e25661..86202acd7a3be90b6a8b8e85aee5109d79799239 100644 --- a/unittests/broken_cfoods/broken1.yml +++ b/unittests/broken_cfoods/broken1.yml @@ -39,14 +39,14 @@ DataAnalysis: # name of the converter # how to make match case insensitive? subtree: description: - type: DictTextElement + type: TextElement match_value: (?P<description>.*) match_name: description records: Measurement: description: $description responsible_single: - type: DictTextElement + type: TextElement match_name: responsible match_value: &person_regexp ((?P<first_name>.+) )?(?P<last_name>.+) records: &responsible_records @@ -65,7 +65,7 @@ DataAnalysis: # name of the converter subtree: Person: type: TextElement - match: *person_regexp + match_name: *person_regexp records: *responsible_records ExperimentalData: # name of the converter diff --git a/unittests/test_converters.py b/unittests/test_converters.py index 15be9f7c7e9f9e219f594f6818d396eeb52f81f1..e8cac914741a0cb93b16dae8649039b03421f24f 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -29,17 +29,15 @@ import pytest import yaml from caoscrawler.converters import (Converter, ConverterValidationError, - DictConverter, DirectoryConverter, + DictElementConverter, DirectoryConverter, handle_value, MarkdownFileConverter, - DictFloatElementConverter, JSONFileConverter) -from caoscrawler.converters import _AbstractDictElementConverter + FloatElementConverter, JSONFileConverter) +from caoscrawler.converters import _AbstractScalarValueElementConverter from caoscrawler.crawl import Crawler from caoscrawler.stores import GeneralStore -from caoscrawler.structure_elements import (File, DictTextElement, - DictListElement, DictElement, - DictBooleanElement, DictDictElement, - DictIntegerElement, - DictFloatElement, Directory) +from caoscrawler.structure_elements import (File, TextElement, ListElement, DictElement, + BooleanElement, IntegerElement, + FloatElement, Directory) from test_tool import rfp @@ -53,14 +51,14 @@ def converter_registry(): "MarkdownFile": { "converter": "MarkdownFileConverter", "package": "caoscrawler.converters"}, - "Dict": { - "converter": "DictConverter", + "DictElement": { + "converter": "DictElementConverter", "package": "caoscrawler.converters"}, - "DictTextElement": { - "converter": "DictTextElementConverter", + "TextElement": { + "converter": "TextElementConverter", "package": "caoscrawler.converters"}, - "DictListElement": { - "converter": "DictListElementConverter", + "ListElement": { + "converter": "ListElementConverter", "package": "caoscrawler.converters"}, "TextElement": { "converter": "TextElementConverter", @@ -81,8 +79,8 @@ def testConverterTrivial(converter_registry): types = [ "Directory", "MarkdownFile", - "DictTextElement", - "DictListElement", + "TextElement", + "ListElement", "TextElement" ] @@ -150,11 +148,11 @@ def test_markdown_converter(converter_registry): children = converter.create_children(None, test_readme) assert len(children) == 5 - assert children[1].__class__ == DictTextElement + assert children[1].__class__ == TextElement assert children[1].name == "description" assert children[1].value.__class__ == str - assert children[0].__class__ == DictTextElement + assert children[0].__class__ == TextElement assert children[0].name == "responsible" assert children[0].value.__class__ == str @@ -170,11 +168,11 @@ def test_markdown_converter(converter_registry): children = converter.create_children(None, test_readme2) assert len(children) == 2 - assert children[1].__class__ == DictTextElement + assert children[1].__class__ == TextElement assert children[1].name == "description" assert children[1].value.__class__ == str - assert children[0].__class__ == DictListElement + assert children[0].__class__ == ListElement assert children[0].name == "responsible" assert children[0].value.__class__ == list @@ -196,39 +194,39 @@ def test_json_converter(converter_registry): children = jsonconverter.create_children(None, test_json) assert len(children) == 8 - assert children[0].__class__ == DictTextElement + assert children[0].__class__ == TextElement assert children[0].name == "name" assert children[0].value.__class__ == str assert children[0].value == "DEMO" - assert children[1].__class__ == DictIntegerElement + assert children[1].__class__ == IntegerElement assert children[1].name == "projectId" assert children[1].value.__class__ == int assert children[1].value == 10002 - assert children[2].__class__ == DictBooleanElement + assert children[2].__class__ == BooleanElement assert children[2].name == "archived" assert children[2].value.__class__ == bool - assert children[3].__class__ == DictListElement + assert children[3].__class__ == ListElement assert children[3].name == "Person" assert children[3].value.__class__ == list assert len(children[3].value) == 2 - assert children[4].__class__ == DictTextElement + assert children[4].__class__ == TextElement assert children[4].name == "start_date" assert children[4].value.__class__ == str - assert children[5].__class__ == DictListElement + assert children[5].__class__ == ListElement assert children[5].name == "candidates" assert children[5].value.__class__ == list assert children[5].value == ["Mouse", "Penguine"] - assert children[6].__class__ == DictFloatElement + assert children[6].__class__ == FloatElement assert children[6].name == "rvalue" assert children[6].value.__class__ == float - assert children[7].__class__ == DictTextElement + assert children[7].__class__ == TextElement assert children[7].name == "url" assert children[7].value.__class__ == str @@ -394,11 +392,11 @@ match_name: text match_value: .*begin(?P<text>.*)end accept_text: True """) - converter = _AbstractDictElementConverter( + converter = _AbstractScalarValueElementConverter( definition, "test_converter", None # This is possible when "subtree" is not used ) - element = DictTextElement("text", """ + element = TextElement("text", """ begin bla end""") @@ -409,7 +407,7 @@ end""") def test_converter_value_match(converter_registry): # test with defaults - dc = DictFloatElementConverter( + dc = FloatElementConverter( definition={ "match_name": "(.*)", "match_value": "(.*)", @@ -417,11 +415,11 @@ def test_converter_value_match(converter_registry): name="Test", converter_registry=converter_registry ) - m = dc.match(DictIntegerElement(name="a", value=4)) + m = dc.match(IntegerElement(name="a", value=4)) assert m is not None # overwrite default with no match for int - dc = DictFloatElementConverter( + dc = FloatElementConverter( definition={ "match_name": "(.*)", "match_value": "(.*)", @@ -431,10 +429,10 @@ def test_converter_value_match(converter_registry): converter_registry=converter_registry ) with pytest.raises(RuntimeError) as err: - m = dc.match(DictIntegerElement(name="a", value=4)) + m = dc.match(IntegerElement(name="a", value=4)) # overwrite default with match for float - dc = DictFloatElementConverter( + dc = FloatElementConverter( definition={ "match_name": "(.*)", "match_value": "(.*)", @@ -443,5 +441,5 @@ def test_converter_value_match(converter_registry): name="Test", converter_registry=converter_registry ) - m = dc.match(DictFloatElement(name="a", value=4.0)) + m = dc.match(FloatElement(name="a", value=4.0)) assert m is not None diff --git a/unittests/test_directories/examples_json/jsontest_cfood.yml b/unittests/test_directories/examples_json/jsontest_cfood.yml index f1eb6a9fa186c07f551bd12a84050f544abfdabc..875773e6bf523500dba46abffda25c0edcb3abc4 100644 --- a/unittests/test_directories/examples_json/jsontest_cfood.yml +++ b/unittests/test_directories/examples_json/jsontest_cfood.yml @@ -9,25 +9,25 @@ JSONTest: # name of the converter - Project # not needed as the name is equivalent subtree: name_element: - type: DictTextElement + type: TextElement match_name: "name" match_value: "(?P<name>.*)" records: Project: name: $name url_element: # name of the first subtree element which is a converter - type: DictTextElement + type: TextElement match_value: "(?P<url>.*)" match_name: "url" records: Project: url: $url persons_element: - type: DictListElement + type: ListElement match_name: "Person" subtree: person_element: - type: Dict + type: DictElement records: Person: parents: @@ -36,21 +36,21 @@ JSONTest: # name of the converter Person: +$Person subtree: firstname_element: - type: DictTextElement + type: TextElement match_name: "firstname" match_value: "(?P<firstname>.*)" records: Person: firstname: $firstname lastname_element: - type: DictTextElement + type: TextElement match_name: "lastname" match_value: "(?P<lastname>.*)" records: Person: lastname: $lastname email_element: - type: DictTextElement + type: TextElement match_name: "email" match_value: "(?P<email>.*)" records: diff --git a/unittests/test_issues.py b/unittests/test_issues.py index 6e77b0c7f26f4b2970203cfc4b8cc786fe24121b..ad66aa1413303a16b96ed877f3279a061a0a4bc5 100644 --- a/unittests/test_issues.py +++ b/unittests/test_issues.py @@ -23,7 +23,7 @@ from pytest import mark from caoscrawler.crawl import Crawler -from caoscrawler.structure_elements import Dict +from caoscrawler.structure_elements import DictElement from test_tool import rfp @@ -35,7 +35,7 @@ def test_issue_10(): """Test integer-to-float conversion in dictionaries""" crawler_definition = { "DictTest": { - "type": "Dict", + "type": "DictElement", "match": "(.*)", "records": { "TestRec": {} @@ -63,7 +63,7 @@ def test_issue_10(): } records = crawler.start_crawling( - Dict("TestDict", test_dict), crawler_definition, converter_registry) + DictElement("TestDict", test_dict), crawler_definition, converter_registry) assert len(records) == 1 assert records[0].parents[0].name == "TestRec" assert records[0].get_property("float_prop") is not None diff --git a/unittests/test_json.py b/unittests/test_json.py index 97d9831de20a2b9f712294d1a0f6322789580f30..41fd31a43389148ad6fbc4167fd3fbd4f7f2ee9f 100644 --- a/unittests/test_json.py +++ b/unittests/test_json.py @@ -24,7 +24,7 @@ # """ -module description +test the JSON converter """ import json import os @@ -33,7 +33,7 @@ from pytest import raises import caosdb as db -from caoscrawler.converters import JSONFileConverter, DictConverter +from caoscrawler.converters import JSONFileConverter from caoscrawler.crawl import Crawler from caoscrawler.structure_elements import File, JSONFile from test_tool import rfp, dircheckstr diff --git a/unittests/test_table_converter.py b/unittests/test_table_converter.py index 85255d3efd34dc666d5d2e97423f33177dea6732..abe4ac85ec4fc0a78e71c177222817e1b84e9e56 100644 --- a/unittests/test_table_converter.py +++ b/unittests/test_table_converter.py @@ -31,10 +31,8 @@ from caoscrawler.stores import GeneralStore from caoscrawler.converters import (ConverterValidationError, DictConverter, XLSXTableConverter, CSVTableConverter) from caoscrawler.structure_elements import Directory -from caoscrawler.structure_elements import (File, DictTextElement, - DictListElement, DictElement, - DictBooleanElement, DictDictElement, - DictIntegerElement, DictFloatElement) +from caoscrawler.structure_elements import (File, TextElement, ListElement, DictElement, + BooleanElement, IntegerElement, FloatElement) from os.path import join, dirname, basename @@ -63,18 +61,17 @@ def converter_registry(): "XLSXTableConverter": { "converter": "XLSXTableConverter", "package": "caoscrawler.converters"}, - - "DictDictElement": { - "converter": "DictDictElementConverter", + "DictElement": { + "converter": "DictElementConverter", "package": "caoscrawler.converters"}, - "DictTextElement": { - "converter": "DictTextElementConverter", + "TextElement": { + "converter": "TextElementConverter", "package": "caoscrawler.converters"}, - "DictIntegerElement": { - "converter": "DictIntegerElementConverter", + "IntegerElement": { + "converter": "IntegerElementConverter", "package": "caoscrawler.converters"}, - "DictFloatElement": { - "converter": "DictFloatElementConverter", + "FloatElement": { + "converter": "FloatElementConverter", "package": "caoscrawler.converters"}, } diff --git a/unittests/test_tool.py b/unittests/test_tool.py index 0eef86b3a9f5ef6f64d9ccb9ce0102cd87208fa4..e1a155f29c5f3537ca9a33fa775e5497f4b15cc8 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -604,24 +604,30 @@ def reset_mocks(mocks): def change_identifiable_prop(ident): - # the checks in here are only to make sure we change the record as we intend to - meas = ident._records[-2] - assert meas.parents[0].name == "Measurement" - resps = meas.properties[0] - assert resps.name == "date" - # change one element; This changes the date which is part of the identifiable - resps.value = "2022-01-04" + for ent in ident._records: + if len(ent.parents) == 0 or ent.parents[0].name != "Measurement": + continue + for prop in ent.properties: + if prop.name != "date": + continue + # change one element; This removes a responsible which is not part of the identifiable + prop.value = "2022-01-04" + return + raise RuntimeError("Did not find the property that should be changed.") def change_non_identifiable_prop(ident): - # the checks in here are only to make sure we change the record as we intend to - meas = ident._records[-1] - assert meas.parents[0].name == "Measurement" - resps = meas.properties[-1] - assert resps.name == "responsible" - assert len(resps.value) == 2 - # change one element; This removes a responsible which is not part of the identifiable - del resps.value[-1] + for ent in ident._records: + if len(ent.parents) == 0 or ent.parents[0].name != "Measurement": + continue + + for prop in ent.properties: + if prop.name != "responsible" or len(prop.value) < 2: + continue + # change one element; This removes a responsible which is not part of the identifiable + del prop.value[-1] + return + raise RuntimeError("Did not find the property that should be changed.") @patch("caoscrawler.crawl.Crawler._get_entity_by_id",