diff --git a/src/caosadvancedtools/cache.py b/src/caosadvancedtools/cache.py index 808ae35beec072c7f6a5755532ec316987338e19..993868c48e1f88373cca8475ee832aeee9999545 100644 --- a/src/caosadvancedtools/cache.py +++ b/src/caosadvancedtools/cache.py @@ -27,8 +27,10 @@ # server side? import os import sqlite3 +from copy import deepcopy from abc import ABC, abstractmethod from hashlib import sha256 +import warnings import caosdb as db from lxml import etree @@ -171,7 +173,7 @@ class AbstractCache(ABC): # TODO: A better name would be IdentifiablesCache -class Cache(AbstractCache): +class IdentifiableCache(AbstractCache): """ stores identifiables (as a hash of xml) and their respective ID. @@ -208,7 +210,7 @@ class Cache(AbstractCache): """ Format an entity as "pretty" XML and return the SHA256 hash. """ - xml = get_pretty_xml(ent) + xml = get_pretty_xml(deepcopy(ent)) digest = sha256(xml.encode("utf-8")).hexdigest() return digest @@ -408,3 +410,9 @@ class UpdateCache(AbstractCache): conn.close() return res + + +class Cache(IdentifiableCache): + def __init__(self, *args, **kwargs): + warnings.warn(DeprecationWarning("This class is depricated. Please use IdentifiableCache.")) + super().__init__(*args, **kwargs) diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index c4af42fd15b50a03e5a85b159c2c95a938e46eec..5affebe8b11a580d2f94771ac0de5ee8bea76ea0 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -50,7 +50,7 @@ from sqlite3 import IntegrityError import caosdb as db from caosdb.exceptions import BadQueryError -from .cache import Cache, UpdateCache, get_pretty_xml +from .cache import IdentifiableCache, UpdateCache, get_pretty_xml from .cfood import RowCFood, add_files, get_ids_for_entities_with_names from .datainconsistency import DataInconsistencyError from .datamodel_problems import DataModelProblems @@ -190,7 +190,7 @@ class Crawler(object): self.filterKnown.reset(cat) if self.use_cache: - self.cache = Cache(db_file=cache_file) + self.cache = IdentifiableCache(db_file=cache_file) self.cache.validate_cache() def iteritems(self): diff --git a/unittests/test_cache.py b/unittests/test_cache.py index f0f6a8f4f5920f2ac5a07fa334673158d30e9e5b..de3430bf2f28a6b05ea36b1047ac11937809ff44 100644 --- a/unittests/test_cache.py +++ b/unittests/test_cache.py @@ -27,7 +27,7 @@ from tempfile import NamedTemporaryFile import sqlite3 import caosdb as db -from caosadvancedtools.cache import Cache, cleanXML +from caosadvancedtools.cache import IdentifiableCache, cleanXML from lxml import etree import pytest @@ -35,23 +35,24 @@ import pytest class CacheTest(unittest.TestCase): def setUp(self): - self.cache = Cache(db_file=NamedTemporaryFile(delete=False).name, - force_creation=True) + self.cache = IdentifiableCache(db_file=NamedTemporaryFile(delete=False).name, + force_creation=True) def test_hash(self): ent = db.Record() - assert isinstance(Cache.hash_entity(ent), str) - assert (Cache.hash_entity(ent) != - Cache.hash_entity(db.Record().add_parent("lol"))) + assert isinstance(IdentifiableCache.hash_entity(ent), str) + assert (IdentifiableCache.hash_entity(ent) != + IdentifiableCache.hash_entity(db.Record().add_parent("lol"))) def test_insert(self): ent = db.Record() ent2 = db.Record() ent2.add_parent(name="Experiment") - ent_hash = Cache.hash_entity(ent) - ent2_hash = Cache.hash_entity(ent2) + ent_hash = IdentifiableCache.hash_entity(ent) + ent2_hash = IdentifiableCache.hash_entity(ent2) self.cache.insert(ent2_hash, 1235, "ajkfljadsklf") - assert isinstance(self.cache.check_existing(ent2_hash)[0], int) + assert self.cache.check_existing(ent2_hash)[0] == 1235 + assert self.cache.check_existing(ent2_hash)[1] == "ajkfljadsklf" assert self.cache.check_existing(ent_hash) is None def test_hirarchy(self): @@ -67,18 +68,15 @@ class CacheTest(unittest.TestCase): ent3 = db.Record() ent3.add_parent(name="Analysis") test_id = 2353243 - self.cache.insert(Cache.hash_entity(ent2), test_id, "ajdsklfjadslf") + self.cache.insert(IdentifiableCache.hash_entity(ent2), test_id, "ajdsklfjadslf") entities = [ent, ent2, ent3] hashes = self.cache.update_ids_from_cache(entities) + self.assertEqual(ent.id, None) self.assertEqual(ent2.id, test_id) + self.assertEqual(ent3.id, None) - # TODO: is that wanted? - self.assertEqual(ent.id, -1) - self.assertEqual(ent3.id, -1) - - # TODO: I expected this instead: - # with pytest.raises(RuntimeError, match=r".*no ID.*"): - # self.cache.insert_list(hashes, entities) + with pytest.raises(RuntimeError, match=r".*no ID.*"): + self.cache.insert_list(hashes, entities) # test ent.id = 1001 @@ -134,8 +132,8 @@ class CacheTest2(unittest.TestCase): def setUp(self): # Correct version: - self.cache = Cache(db_file=NamedTemporaryFile(delete=False).name, - force_creation=True) + self.cache = IdentifiableCache(db_file=NamedTemporaryFile(delete=False).name, + force_creation=True) self.db_file_defect = [] self.db_file_defect.extend([ @@ -167,23 +165,23 @@ class CacheTest2(unittest.TestCase): # Test whether new cache is created correctly: assert os.path.exists(self.cache.db_file) # Test whether it can be opened - test_cache_2 = Cache(db_file=self.cache.db_file) + test_cache_2 = IdentifiableCache(db_file=self.cache.db_file) assert test_cache_2.get_cache_version() == 2 with pytest.raises(RuntimeError, match="Cache version too old.") as e_info: - test_cache_2 = Cache(db_file=self.db_file_defect[0]) + test_cache_2 = IdentifiableCache(db_file=self.db_file_defect[0]) with pytest.raises(RuntimeError, match="Cache version too old.") as e_info: - test_cache_2 = Cache(db_file=self.db_file_defect[1]) + test_cache_2 = IdentifiableCache(db_file=self.db_file_defect[1]) with pytest.raises(RuntimeError, match=r".*future version.*") as e_info: - test_cache_2 = Cache(db_file=self.db_file_defect[2]) + test_cache_2 = IdentifiableCache(db_file=self.db_file_defect[2]) with pytest.raises(RuntimeError, match=r".*table broken.*") as e_info: - test_cache_2 = Cache(db_file=self.db_file_defect[3]) + test_cache_2 = IdentifiableCache(db_file=self.db_file_defect[3]) with pytest.raises(RuntimeError, match=r".*table broken.*") as e_info: - test_cache_2 = Cache(db_file=self.db_file_defect[4]) + test_cache_2 = IdentifiableCache(db_file=self.db_file_defect[4]) def tearDown(self): os.remove(self.cache.db_file) @@ -199,8 +197,8 @@ class InvalidationTest(unittest.TestCase): def setUp(self): # Correct version: - self.cache = Cache(db_file=NamedTemporaryFile(delete=False).name, - force_creation=True) + self.cache = IdentifiableCache(db_file=NamedTemporaryFile(delete=False).name, + force_creation=True) def tearDown(self): os.remove(self.cache.db_file) @@ -222,10 +220,10 @@ class InvalidationTest(unittest.TestCase): el = [ent, ent2, ent3] for e in el: - self.cache.insert(Cache.hash_entity(e), e.id, e.version.id) + self.cache.insert(IdentifiableCache.hash_entity(e), e.id, e.version.id) for e in el: - res = self.cache.check_existing(Cache.hash_entity(e)) + res = self.cache.check_existing(IdentifiableCache.hash_entity(e)) assert e.id == res[0] assert e.version.id == res[1] @@ -233,7 +231,7 @@ class InvalidationTest(unittest.TestCase): ent3.version.id = "b" for e in el[1:]: - res = self.cache.check_existing(Cache.hash_entity(e)) + res = self.cache.check_existing(IdentifiableCache.hash_entity(e)) assert res is None invalidated_entries = self.cache.validate_cache(el)