diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 7b9119caa1cd4dd4623a9141de4a70abb4da5946..68a0977cac8a9f0c3a3f57191a256fb04dbe2b7b 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -38,6 +38,7 @@ import logging import os import sys import uuid +from caosdb.cached import cached_get_entity_by import warnings import yaml @@ -573,7 +574,7 @@ class Crawler(object): del flat[i] # 2. Can it be identified via a path? elif record.path is not None: - existing = self._get_entity_by_path(record.path) + existing = cached_get_entity_by(path=record.path) if existing is None: to_be_inserted.append(record) self.add_to_remote_missing_cache(record, identifiable) @@ -728,7 +729,7 @@ class Crawler(object): parent_updates = db.Container() for entity in to_be_updated: - old_entity = Crawler._get_entity_by_id(entity.id) + old_entity = cached_get_entity_by(id=entity.id) # Check whether the parents have been changed and add them if missing # in the old entity: @@ -757,28 +758,13 @@ class Crawler(object): "mode. This might lead to a failure of inserts that follow.") logger.info(parent_updates) - @staticmethod - def _get_entity_by_name(name): - return db.Entity(name=name).retrieve() - - @staticmethod - def _get_entity_by_path(path): - try: - return db.execute_query(f"FIND FILE WHICH IS STORED AT '{path}'", unique=True) - except db.exceptions.EmptyUniqueQueryError: - return None - - @staticmethod - def _get_entity_by_id(id): - return db.Entity(id=id).retrieve() - @staticmethod def execute_inserts_in_list(to_be_inserted, securityMode, run_id: Optional[uuid.UUID] = None, unique_names=True): for record in to_be_inserted: for prop in record.properties: - entity = Crawler._get_entity_by_name(prop.name) + entity = cached_get_entity_by(name=prop.name) _resolve_datatype(prop, entity) logger.debug("INSERT") logger.debug(to_be_inserted) @@ -794,10 +780,10 @@ class Crawler(object): for record in rec_list: for parent in record.parents: if parent.id is None: - parent.id = Crawler._get_entity_by_name(parent.name).id + parent.id = cached_get_entity_by(name=parent.name).id for prop in record.properties: if prop.id is None: - entity = Crawler._get_entity_by_name(prop.name) + entity = cached_get_entity_by(name=prop.name) prop.id = entity.id _resolve_datatype(prop, entity) diff --git a/unittests/test_tool.py b/unittests/test_tool.py index b88720f4da89dfa735e782a4d2e41ccc3b0f4d3c..0853cb3379a312ebf2a853a1ed1294192c0b5369 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -583,21 +583,20 @@ def test_replace_entities_with_ids(crawler): assert a.get_property("C").value == [12345, 233324] -def mock_get_entity_by_id(id): - candidates = [el for el in list(full_data.values()) if el.id == id] - if len(candidates) > 0: - return candidates[0] - else: - raise ValueError() - - -def mock_get_entity_by_name(name): - candidates = [el for el in full_data.values() - if (el.name is not None and el.name.lower() == name.lower())] - if len(candidates) > 0: - return candidates[0] - else: - raise ValueError() +def mock_get_entity_by(id=None, name=None): + if id is not None: + candidates = [el for el in list(full_data.values()) if el.id == id] + if len(candidates) > 0: + return candidates[0] + else: + raise ValueError() + if name is not None: + candidates = [el for el in full_data.values() + if (el.name is not None and el.name.lower() == name.lower())] + if len(candidates) > 0: + return candidates[0] + else: + raise ValueError() def prepare_crawler_with_sec_mode(mode, ident): @@ -650,10 +649,8 @@ def change_non_identifiable_prop(ident): raise RuntimeError("Did not find the property that should be changed.") -@patch("caoscrawler.crawl.Crawler._get_entity_by_id", - new=Mock(side_effect=mock_get_entity_by_id)) -@patch("caoscrawler.crawl.Crawler._get_entity_by_name", - new=Mock(side_effect=mock_get_entity_by_name)) +@patch("caoscrawler.crawl.cached_get_entity_by", + new=Mock(side_effect=mock_get_entity_by)) @patch("caoscrawler.crawl.db.Container.insert") @patch("caoscrawler.crawl.db.Container.update") @patch("caoscrawler.crawl.UpdateCache.insert")