diff --git a/CHANGELOG.md b/CHANGELOG.md index 205fcb077cb0ca8f8facc6c1f636539f9bbb2384..d6f7c5f123942a22cc7de63424bd7fb7ea597569 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### - Added location argument to `src/caosdb/utils/checkFileSystemConsistency.py` +- Entity getters: `get_entity_by_<name/id/path>` +- Cached versions of entity getters and of `execute_query` (`cached_query`) ### Changed ### diff --git a/src/caosdb/__init__.py b/src/caosdb/__init__.py index 7e06885fe495c1e8c4ccc99b7d0c0f8ff8c34b5b..acf323e860a93753b57f2e104531383b412f3fa0 100644 --- a/src/caosdb/__init__.py +++ b/src/caosdb/__init__.py @@ -46,6 +46,7 @@ from caosdb.common.models import (ACL, ALL, FIX, NONE, OBLIGATORY, RECOMMENDED, Query, QueryTemplate, Record, RecordType, delete, execute_query, get_global_acl, get_known_permissions, raise_errors) +from caosdb.utils.get_entity import get_entity_by_name, get_entity_by_path, get_entity_by_id from caosdb.configuration import _read_config_files, configure, get_config from caosdb.connection.connection import configure_connection, get_connection from caosdb.exceptions import * diff --git a/src/caosdb/apiutils.py b/src/caosdb/apiutils.py index 9c68e9db6cbdef827b9319505b4b81bc8db1ebac..a5a936c556dd065b56b60ff690baf9a1ce19a583 100644 --- a/src/caosdb/apiutils.py +++ b/src/caosdb/apiutils.py @@ -22,9 +22,8 @@ # # ** end header # -"""API-Utils: +"""API-Utils: Some simplified functions for generation of records etc. -Some simplified functions for generation of records etc. """ import logging @@ -206,9 +205,10 @@ def compare_entities(old_entity: Entity, new_entity: Entity, compare_referenced_ - Additional parents (a list under key "parents") - Information about properties: - Each property lists either an additional property or a property with a changed: - - ... datatype - - ... importance or - - ... value (not implemented yet) + - datatype + - importance or + - value (not implemented yet) + In case of changed information the value listed under the respective key shows the value that is stored in the respective entity. diff --git a/src/caosdb/cached.py b/src/caosdb/cached.py new file mode 100644 index 0000000000000000000000000000000000000000..4f735bb8e85ba7aa364b211e198840f9f6fb97e2 --- /dev/null +++ b/src/caosdb/cached.py @@ -0,0 +1,176 @@ +# -*- coding: utf-8 -*- +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2023 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2023 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# Copyright (C) 2023 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +""" +This module provides some cached versions of functions that retrieve Entities from a remote server. +""" + +from enum import Enum +from functools import lru_cache +from typing import Union + +from .utils import get_entity +from .common.models import execute_query, Entity, Container + + +# roughly 1GB for typical entity sizes +DEFAULT_SIZE = 33333 + +# This dict cache is solely for filling the real cache manually (e.g. to reuse older query results) +_DUMMY_CACHE = {} + + +class AccessType(Enum): + """Different access types for cached queries. Needed for filling the cache manually with +:func:`cache_fill` . + + """ + QUERY = 1 + PATH = 2 + EID = 3 + NAME = 4 + + +def cached_get_entity_by(eid: Union[str, int] = None, name: str = None, path: str = None, query: + str = None) -> Entity: + """Return a single entity that is identified uniquely by one argument. + +You must supply exactly one argument. + +If a query phrase is given, the result must be unique. If this is not what you need, use +:func:`cached_query` instead. + + """ + count = 0 + if eid is not None: + count += 1 + if name is not None: + count += 1 + if path is not None: + count += 1 + if query is not None: + count += 1 + if count != 1: + raise ValueError("You must supply exactly one argument.") + + if eid is not None: + return _cached_access(AccessType.EID, eid, unique=True) + if name is not None: + return _cached_access(AccessType.NAME, name, unique=True) + if path is not None: + return _cached_access(AccessType.PATH, path, unique=True) + if query is not None: + return _cached_access(AccessType.QUERY, query, unique=True) + + raise ValueError("Not all arguments may be None.") + + +def cached_query(query_string) -> Container: + """A cached version of :func:`caosdb.execute_query<caosdb.common.models.execute_query>`. + +All additional arguments are at their default values. + + """ + return _cached_access(AccessType.QUERY, query_string, unique=False) + + +@lru_cache(maxsize=DEFAULT_SIZE) +def _cached_access(kind: AccessType, value: Union[str, int], unique=True): + # This is the function that is actually cached. + # Due to the arguments, the cache has kind of separate sections for cached_query and + # cached_get_entity_by with the different AccessTypes. However, there is only one cache size. + + # The dummy dict cache is only for filling the cache manually, it is deleted afterwards. + if value in _DUMMY_CACHE: + return _DUMMY_CACHE[value] + + if kind == AccessType.QUERY: + return execute_query(value, unique=unique) + if kind == AccessType.NAME: + return get_entity.get_entity_by_name(value) + if kind == AccessType.EID: + return get_entity.get_entity_by_id(value) + if kind == AccessType.PATH: + return get_entity.get_entity_by_path(value) + + raise ValueError(f"Unknown AccessType: {kind}") + + +def cache_clear() -> None: + """Empty the cache that is used by `cached_query` and `cached_get_entity_by`.""" + _cached_access.cache_clear() + + +def cache_info(): + """Return info about the cache that is used by `cached_query` and `cached_get_entity_by`. + +Returns +------- + +out: named tuple + See the standard library :func:`functools.lru_cache` for details.""" + return _cached_access.cache_info() + + +def cache_initialize(maxsize=DEFAULT_SIZE) -> None: + """Create a new cache with the given size for `cached_query` and `cached_get_entity_by`. + + This implies a call of :func:`cache_clear`, the old cache is emptied. + + """ + cache_clear() + global _cached_access + _cached_access = lru_cache(maxsize=maxsize)(_cached_access.__wrapped__) + + +def cache_fill(items: dict, kind: AccessType = AccessType.EID, unique: bool = True) -> None: + """Add entries to the cache manually. + + This allows to fill the cache without actually submitting queries. Note that this does not + overwrite existing entries with the same keys. + +Parameters +---------- + +items: dict + A dictionary with the entries to go into the cache. The keys must be compatible with the + AccessType given in ``kind`` + +kind: AccessType, optional + The AccessType, for example ID, name, path or query. + +unique: bool, optional + If True, fills the cache for :func:`cached_get_entity_by`, presumably with + :class:`caosdb.Entity<caosdb.common.models.Entity>` objects. If False, the cache should be filled + with :class:`caosdb.Container<caosdb.common.models.Container>` objects, for use with + :func:`cached_query`. + + """ + # 1. add the given items to the corresponding dummy dict cache + _DUMMY_CACHE.update(items) + + # 2. call the cache function with each key (this only results in a dict look up) + for key in items.keys(): + _cached_access(kind, key, unique=unique) + + # 3. empty the dummy dict cache again + _DUMMY_CACHE.clear() diff --git a/src/caosdb/common/administration.py b/src/caosdb/common/administration.py index bd09c626d5a9e48bc9d3610f23cb46660cb10db3..a27aaf0406c83ac33c37b676a9cdeab812bf2f7a 100644 --- a/src/caosdb/common/administration.py +++ b/src/caosdb/common/administration.py @@ -385,7 +385,7 @@ action : str Either "grant" or "deny" permission : str - For example "RETRIEVE:*". + For example ``RETRIEVE:*``. priority : bool, optional Whether the priority shall be set, defaults is False. diff --git a/src/caosdb/common/models.py b/src/caosdb/common/models.py index 20e09810ba466d59cf7a82d68cad19fefb45b10b..9ba54c49d2d4cd776dc2263b850cc095c65fea60 100644 --- a/src/caosdb/common/models.py +++ b/src/caosdb/common/models.py @@ -33,6 +33,7 @@ transactions. All additional classes are either important for the entities or the transactions. """ + from __future__ import print_function, unicode_literals from __future__ import annotations # Can be removed with 3.10. @@ -1372,15 +1373,15 @@ out: List[Entity] unique=True, flags=None, sync=True): """Update this entity. - There are two possible work-flows to perform this update: - First: - 1) retrieve an entity - 2) do changes - 3) call update method +There are two possible work-flows to perform this update: +First: + 1) retrieve an entity + 2) do changes + 3) call update method - Second: - 1) construct entity with id - 2) call update method. +Second: + 1) construct entity with id + 2) call update method. For slight changes the second one it is more comfortable. Furthermore, it is possible to stay off-line until calling the update method. The name, description, unit, datatype, path, @@ -4371,7 +4372,7 @@ def execute_query(q, unique=False, raise_exception_on_error=True, cache=True, fl Whether the query is expected to have only one entity as result. Defaults to False. raise_exception_on_error : bool - Whether an exception should be raises when there are errors in the + Whether an exception should be raised when there are errors in the resulting entities. Defaults to True. cache : bool Whether to use the query cache (equivalent to adding a "cache" flag). diff --git a/src/caosdb/connection/utils.py b/src/caosdb/connection/utils.py index 9056bf9dea14fa2fa441fa13a5efe8e776990284..095d47035e24dad5b6d7041f5d3b8a739652f271 100644 --- a/src/caosdb/connection/utils.py +++ b/src/caosdb/connection/utils.py @@ -45,13 +45,16 @@ def urlencode(query): 3) All other parameters which can be passed to the respective functions are not implemented here and the default parameters will be used. + +.. code:: + >>> urlencode({'key': ['val1', 'val2']}, doseq=True) Traceback (most recent call last): ... TypeError: urlencode() got an unexpected keyword argument 'doseq' - Otherwise, this functions works exactly as its counterparts in the urllib - modules when they are called with only the query parameter. +Otherwise, this functions works exactly as its counterparts in the urllib +modules when they are called with only the query parameter. Parameters ---------- diff --git a/src/caosdb/utils/create_revision.py b/src/caosdb/utils/create_revision.py index 0b7ce996311a96a6a0fe89935de729f07b67a353..419e1c9f2b97171be0dccf1bc772ae5db679c0b7 100644 --- a/src/caosdb/utils/create_revision.py +++ b/src/caosdb/utils/create_revision.py @@ -34,13 +34,15 @@ def bend_references(from_id, to_id, except_for=None): and those references are changed to point to to_id. entities having an id listed in except_for are excluded. - params: - from_id : int - the old object to which references where pointing - to_id : int - the new object to which references will be pointing - except_for : list of int - entities with id of this list will not be changed +Parameters +---------- + +from_id : int + the old object to which references where pointing +to_id : int + the new object to which references will be pointing +except_for : list of int + entities with id of this list will not be changed """ if except_for is None: except_for = [to_id] @@ -71,14 +73,16 @@ def create_revision(old_id, prop, value): This function changes the record with id old_id. The value of the propertye prop is changed to value. - params: - old_id : int - id of the record to be changed - prop : string - name of the property to be changed - value : type of corresponding property - the new value of the corresponding property - """ +Parameters +---------- + +old_id : int + id of the record to be changed +prop : string + name of the property to be changed +value : type of corresponding property + the new value of the corresponding property +""" record = db.execute_query("FIND {}".format(old_id))[0] new_rec = record.copy() new_rec.get_property(prop).value = value diff --git a/src/caosdb/utils/get_entity.py b/src/caosdb/utils/get_entity.py new file mode 100644 index 0000000000000000000000000000000000000000..a27aafa99ffe3759a46876a5bcd5e686d631b1dc --- /dev/null +++ b/src/caosdb/utils/get_entity.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2023 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# Copyright (C) 2023 IndiScale GmbH <info@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +"""Convenience functions to retrieve a specific entity.""" + +from typing import Union +from ..common.models import execute_query, Entity + + +def get_entity_by_name(name: str) -> Entity: + """Return the result of a unique query that uses the name to find the correct entity. + + Submits the query "FIND ENTITY WITH name='{name}'". + """ + return execute_query(f"FIND ENTITY WITH name='{name}'", unique=True) + + +def get_entity_by_id(eid: Union[str, int]) -> Entity: + """Return the result of a unique query that uses the id to find the correct entity. + + Submits the query "FIND ENTITY WITH id='{eid}'". + """ + return execute_query(f"FIND ENTITY WITH id='{eid}'", unique=True) + + +def get_entity_by_path(path: str) -> Entity: + """Return the result of a unique query that uses the path to find the correct file. + + Submits the query "FIND FILE WHICH IS STORED AT '{path}'". + """ + return execute_query(f"FIND FILE WHICH IS STORED AT '{path}'", unique=True) diff --git a/src/doc/conf.py b/src/doc/conf.py index 54d75bd0e70138d3c90020e6058edb4403f838f0..819ef61d7fb02e752b4a73a86644d1602bbf188a 100644 --- a/src/doc/conf.py +++ b/src/doc/conf.py @@ -25,7 +25,7 @@ import sphinx_rtd_theme # noqa: E402 # -- Project information ----------------------------------------------------- project = 'pycaosdb' -copyright = '2022, IndiScale GmbH' +copyright = '2023, IndiScale GmbH' author = 'Daniel Hornung' # The short X.Y version @@ -78,6 +78,9 @@ exclude_patterns = [] # The name of the Pygments (syntax highlighting) style to use. pygments_style = None +suppress_warnings = [ + "autosectionlabel.*", # duplicate labels +] # -- Options for HTML output ------------------------------------------------- diff --git a/src/doc/tutorials/Data-Insertion.rst b/src/doc/tutorials/Data-Insertion.rst index 9060345c836a43c57f8039d40020082a7f342b02..82df07691f7c78a2787d67463ca222d2e68249ca 100644 --- a/src/doc/tutorials/Data-Insertion.rst +++ b/src/doc/tutorials/Data-Insertion.rst @@ -223,6 +223,28 @@ list-valued attribute in Python, as the following example illustrates. print(retrieved.get_property("TestList").value) +.. note:: + Properties of Entities that shall be updated need to have IDs. Let's look at an + example: + +.. code:: python + + experiment = db.Record(id=1111).retrieve() + experiment.add_property(name='date', value="2020-01-01") + retrieved.update() # Fails! The 'date' Property needs to have an ID. + +The easiest way to get around this is to use the corresponding entity getter: + +.. code:: python + + experiment = db.Record(id=1111).retrieve() + experiment.add_property(db.get_entity_by_name('date'), value="2020-01-01") + retrieved.update() # Works! + +There also are the functions ``get_entity_by_path`` and ``get_entity_by_id``. You can easily use +cached versions of those functions (see :doc:`caching options<caching>`). + + File Update ----------- diff --git a/src/doc/tutorials/Entity-Getters.rst b/src/doc/tutorials/Entity-Getters.rst new file mode 100644 index 0000000000000000000000000000000000000000..50ed13201e5720de22bf0b605bc5162834a458a8 --- /dev/null +++ b/src/doc/tutorials/Entity-Getters.rst @@ -0,0 +1,16 @@ + +Entity Getters +============== + +There is a very frequent situation when working with PyCaosDB: You need to get a specific Entity +from the remote server. For example, you need the Property Entity in order to make an update. Sure, +you can do a ``db.Entity().retrieve()`` or submit a query, but there is an even faster way which +also helps preventing errors: + +- ``get_entity_by_name`` +- ``get_entity_by_id`` +- ``get_entity_by_path`` + +You can call these functions with a single argument (name/id/path). Since these functions are +frequently used with the same arguments over and over again, you might want to look at the +:doc:`caching options<caching>`. diff --git a/src/doc/tutorials/caching.rst b/src/doc/tutorials/caching.rst new file mode 100644 index 0000000000000000000000000000000000000000..aad9a1ddbd9e93a3cd06887eaffcf956c3c5bea6 --- /dev/null +++ b/src/doc/tutorials/caching.rst @@ -0,0 +1,58 @@ + +Caching +======= + +.. note:: + + Caching is great, because it can speed up things considerably. But it can also create dangerous + pitfalls if the cache is not cleared when needed and you work with outdated data. Thus, please use + the cache with care and make sure to clear it when needed. + +Python provides great tools for caching. For example, you could define a ``cached_get_by_name`` +function, easily created from ``get_entity_by_name`` using Python's ``lru_cache``: + +.. code:: python + + @lru_cache(maxsize=1000) + def cached_get_by_name(name): + return db.get_entity_by_name(name) + + exp = cached_get_by_name('Experiment') + # reset the cache with + cached_get_by_name.cache_clear() + +For convenience, PyCaosDB provides the ``caosdb.cached`` module that defines the functions +``cached_query`` and ``cached_get_entity_by``, they use a shared cache. Let's have a look: + +.. code:: python + + from caosdb.cached import cached_query, cached_get_entity_by, cache_clear, cache_info, cache_initialize + rt1 = cached_get_entity_by(name='RT1') + qresult = cached_query('FIND Experiment WITH parameter=1') + # you can inspect the cache + print(cache_info()) + # this will not cause a server request since it is cached + rt1 = cached_get_entity_by(name='RT1') + # you can clear the cache with + cache_clear() + # If you want to have a cache with a custom size, you can initialize it (again). Old cached + # data is lost. + cache_initialize(maxsize=10) + + +If you want to manually add entities to the cache, you can do it yourself. This is useful when you +have entities on hand from previous queries that you want to add. + +.. code:: python + + from caosdb.cached import cache_fill, AccessType + # Here, items must be a dict with Entity IDs as keys and the Entities as values. + cache_fill(items, AccessType.EID, unique=True) + # If you now use IDs that were in items, they are taken from the cache. + e1 = cached_get_entity_by(eid=10001) + +When filling the cache with Entity objects for ``cached_get_entity_by``, you need to set +``unique=True``, whereas the cache for ``cached_query`` should be filled with Container object and +``unique=False``. + + diff --git a/src/doc/tutorials/first_steps.rst b/src/doc/tutorials/first_steps.rst index 7366d391cdf50b1d9f9107fc2d47f28d9df7bf7f..c84ec52aa63f0563b22c698081e89600c7af6122 100644 --- a/src/doc/tutorials/first_steps.rst +++ b/src/doc/tutorials/first_steps.rst @@ -52,6 +52,7 @@ Let's look at the first element: <Record ... .. The above example needs doctest ELLIPSIS + You see that the object is a Record. It has a Parent and two Properties. .. note:: @@ -119,7 +120,7 @@ If the files are large data files, it is often a better idea to only retrieve th Summary ------- -Now you know, how you can use Python to send queries to CaosDB and you can access +Now you know how to use Python to send queries to CaosDB and you can access the result Records and their properties. The next tutorial shows how to make some meaningful use of this. diff --git a/src/doc/tutorials/index.rst b/src/doc/tutorials/index.rst index 0b08d0b4fe153d803a780bd144787819b827db78..ce37993d7ec5e0888da8a2b4c58904bcbdc43bb4 100644 --- a/src/doc/tutorials/index.rst +++ b/src/doc/tutorials/index.rst @@ -14,6 +14,8 @@ advanced usage of the Python client. basic_analysis Data-Insertion errors + Entity-Getters + caching data-model-interface complex_data_models serverside diff --git a/unittests/test_cached.py b/unittests/test_cached.py new file mode 100644 index 0000000000000000000000000000000000000000..ce302d671d6077aed7d8457e70da2076ebe65d50 --- /dev/null +++ b/unittests/test_cached.py @@ -0,0 +1,295 @@ +# -*- coding: utf-8 -*- +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2023 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# Copyright (C) 2023 IndiScale GmbH <info@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +""" Test the caosdb.cached module """ + +from caosdb.cached import (cached_get_entity_by, cache_clear, cache_info, cache_fill, + AccessType, cache_initialize, cached_query) +from unittest.mock import patch +import caosdb as db +from copy import deepcopy +import pytest + + +DUMMY_SERVER_CONTENT = [ + db.Record(name='a', id=101), + db.Record(name='b', id=102), + db.Record(name='c', id=103), + db.File(path='p', id=104), + db.File(path='pp', id=105), +] + + +@pytest.fixture(autouse=True) +def cache_clean_up(): + cache_clear() + yield + cache_clear() + + +def mocked_name_query(name): + # copy the object, because Entities would normally be created from XML response + return deepcopy([el for el in DUMMY_SERVER_CONTENT if el.name == name][0]) + + +def mocked_id_query(eid): + # copy the object, because Entities would normally be created from XML response + return deepcopy([el for el in DUMMY_SERVER_CONTENT if el.id == eid][0]) + + +def mocked_path_query(path): + # copy the object, because Entities would normally be created from XML response + return deepcopy([el for el in DUMMY_SERVER_CONTENT if el.path == path][0]) + + +def mocked_gen_query(q, unique): + if unique: + if q == 'a': + return DUMMY_SERVER_CONTENT[0] + else: + return None + else: + if q == 'a': + return db.Container().extend([DUMMY_SERVER_CONTENT[0]]) + else: + return db.Container().extend(DUMMY_SERVER_CONTENT) + + +@patch("caosdb.utils.get_entity.get_entity_by_name") +def test_get_by_name(mocked_get_by_name): + mocked_get_by_name.side_effect = mocked_name_query + # first call; not in cache -> mocked_execute is touched + a = cached_get_entity_by(name='a') + assert a.id == 101 + assert mocked_get_by_name.call_count == 1 + # second call; in cache -> mocked_execute is NOT touched (count is still 1) + b = cached_get_entity_by(name='a') + assert mocked_get_by_name.call_count == 1 + # the cache returned the same object + assert a is b + # check the info + assert cache_info().hits == 1 + assert cache_info().currsize == 1 + # after clearing the test, the mock is used again + cache_clear() + cached_get_entity_by(name='a') + assert mocked_get_by_name.call_count == 2 + # we fill the cache manually and make sure the element is used + cache_fill({'lol': db.Entity(id=10001, name='lol')}, AccessType.NAME, unique=True) + # there are now two elements in the cache: a and lol + assert cache_info().currsize == 2 + # we can retrieve the inserted element + lol = cached_get_entity_by(name='lol') + assert lol.id == 10001 + # this did not touch the mocked function + assert mocked_get_by_name.call_count == 2 + # make sure normal retrieval still works (count +1) + c = cached_get_entity_by(name='c') + assert mocked_get_by_name.call_count == 3 + assert c.id == 103 + + +@patch("caosdb.utils.get_entity.get_entity_by_id") +def test_get_by_id(mocked_get_by_id): + mocked_get_by_id.side_effect = mocked_id_query + # first call; not in cache -> mocked_execute is touched + b = cached_get_entity_by(eid=102) + assert b.id == 102 + assert b.name == 'b' + assert mocked_get_by_id.call_count == 1 + # second call; in cache -> mocked_execute is NOT touched (count is still 1) + a = cached_get_entity_by(eid=102) + assert mocked_get_by_id.call_count == 1 + # the cache returned the same object + assert a is b + # check the info + assert cache_info().hits == 1 + assert cache_info().currsize == 1 + # after clearing the test, the mock is used again + cache_clear() + cached_get_entity_by(eid=102) + assert mocked_get_by_id.call_count == 2 + # we fill the cache manually and make sure the element is used + cache_fill({10001: db.Entity(id=10001, name='lol')}, AccessType.EID, unique=True) + # there are now two elements in the cache: a and lol + assert cache_info().currsize == 2 + # we can retrieve the inserted element + lol = cached_get_entity_by(eid=10001) + assert lol.name == 'lol' + # this did not touch the mocked function + assert mocked_get_by_id.call_count == 2 + # make sure normal retrieval still works (count +1) + c = cached_get_entity_by(eid=103) + assert mocked_get_by_id.call_count == 3 + assert c.name == 'c' + + +@patch("caosdb.cached.get_entity.get_entity_by_path") +def test_get_by_path(mocked_get_by_path): + mocked_get_by_path.side_effect = mocked_path_query + # first call; not in cache -> mocked_execute is touched + b = cached_get_entity_by(path='p') + assert b.id == 104 + assert mocked_get_by_path.call_count == 1 + # second call; in cache -> mocked_execute is NOT touched (count is still 1) + a = cached_get_entity_by(path='p') + assert mocked_get_by_path.call_count == 1 + # the cache returned the same object + assert a is b + # check the info + assert cache_info().hits == 1 + assert cache_info().currsize == 1 + # after clearing the test, the mock is used again + cache_clear() + cached_get_entity_by(path='p') + assert mocked_get_by_path.call_count == 2 + # we fill the cache manually and make sure the element is used + cache_fill({'lol': db.File(id=10001, path='lol')}, AccessType.PATH, unique=True) + # there are now two elements in the cache: a and lol + assert cache_info().currsize == 2 + # we can retrieve the inserted element + lol = cached_get_entity_by(path='lol') + assert lol.id == 10001 + # this did not touch the mocked function + assert mocked_get_by_path.call_count == 2 + # make sure normal retrieval still works (count +1) + c = cached_get_entity_by(path='pp') + assert mocked_get_by_path.call_count == 3 + assert c.id == 105 + + +@patch("caosdb.cached.execute_query") +def test_get_by_query(mocked_query): + mocked_query.side_effect = mocked_gen_query + # test cache initialization + cache_initialize(maxsize=10) + assert cache_info().currsize == 0 + + # Non-existent entity + res = cached_get_entity_by(query='stuff') + assert res is None + assert cache_info().currsize == 1 + assert cache_info().hits == 0 + assert cache_info().misses == 1 + + res = cached_get_entity_by(query='stuff') + assert res is None + assert cache_info().currsize == 1 + assert cache_info().hits == 1 + assert cache_info().misses == 1 + + # Existent entity + a = cached_get_entity_by(query='a') + assert a is not None + assert a.id == 101 + assert cache_info().currsize == 2 + assert cache_info().hits == 1 + assert cache_info().misses == 2 + + +@patch("caosdb.cached.execute_query") +def test_cached_query(mocked_query): + mocked_query.side_effect = mocked_gen_query + # test cache initialization + cache_initialize(maxsize=10) + assert cache_info().maxsize == 10 + # first call; not in cache -> mocked_execute is touched + res = cached_query('stuff') + assert len(res) == len(DUMMY_SERVER_CONTENT) + assert mocked_query.call_count == 1 + # second call; in cache -> mocked_execute is NOT touched (count is still 1) + a = cached_query('stuff') + assert mocked_query.call_count == 1 + # the cache returned the same object + assert a is res + # check the info + assert cache_info().hits == 1 + assert cache_info().currsize == 1 + # after clearing the test, the mock is used again + cache_clear() + cached_query('stuff') + assert mocked_query.call_count == 2 + # we fill the cache manually and make sure the element is used + cache_fill({'lol': db.Container().extend([db.Entity(id=10001, name='lol')])}, + AccessType.QUERY, unique=False) + # there are now two elements in the cache: a and lol + assert cache_info().currsize == 2 + # we can retrieve the inserted element + lol = cached_query('lol') + assert lol[0].id == 10001 + # this did not touch the mocked function + assert mocked_query.call_count == 2 + # make sure normal retrieval still works (count +1) + c = cached_query('a') + assert mocked_query.call_count == 3 + assert c[0].id == 101 + + +@patch("caosdb.utils.get_entity.get_entity_by_name") +def test_cache_size(mocked_get_by_name): + mocked_get_by_name.side_effect = lambda x: x + # first call; not in cache -> mocked_execute is touched + maxsize = 5 + cache_initialize(maxsize=maxsize) + assert cache_info().currsize == 0 + + names_first = ("a", "b", "c", "d", "e") + names_later = ("A", "B", "C", "D", "E") + names_fill = {"X": None, "Y": None, "Z": None} + + # Use the first batch of names + for ii, name in enumerate(names_first, start=1): + cached_get_entity_by(name=name) + assert cache_info().currsize == ii + assert cache_info().hits == 0 + assert cache_info().misses == ii + for ii, name in enumerate(names_first, start=1): + cached_get_entity_by(name=name) + assert cache_info().currsize == maxsize + assert cache_info().hits == ii + assert cache_info().misses == maxsize + + # use the second batch of names + for ii, name in enumerate(names_later, start=1): + cached_get_entity_by(name=name) + assert cache_info().currsize == maxsize + assert cache_info().hits == len(names_first) + assert cache_info().misses == len(names_first) + ii + for ii, name in enumerate(names_later, start=1): + cached_get_entity_by(name=name) + assert cache_info().currsize == maxsize + assert cache_info().hits == len(names_first) + ii + assert cache_info().misses == len(names_first) + len(names_later) + + # The cache is now filled with A,B,C,D,E (oldest to least recently used). + # Let's fill it with X,Y,Z. + cache_fill(names_fill, kind=AccessType.NAME) + + # Now, the cache should be: D,E,X,Y,Z + current_misses = cache_info().misses + + for name in ("Z", "Y", "X", "E", "D"): + cached_get_entity_by(name=name) + assert cache_info().misses == current_misses + + for ii, name in enumerate(("A", "B", "C"), start=1): + cached_get_entity_by(name=name) + assert cache_info().misses == current_misses + ii