diff --git a/CHANGELOG.md b/CHANGELOG.md index 510b25282d64dd1da7c30ca86b09f3dfbae38276..cd5165cee9793f434bbc6a1c703ae2692fcb5571 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,29 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.12.0] - 2023-06-02 ## + +### Added ### + +- Added location argument to `src/caosdb/utils/checkFileSystemConsistency.py` +- Entity getters: `get_entity_by_<name/id/path>` +- Cached versions of entity getters and of `execute_query` (`cached_query`) + +### Deprecated ### + +- getOriginUrlIn, getDiffIn, getBranchIn, getCommitIn (formerly apiutils) have been + moved to caosdb.utils.git_utils + +### Fixed ### + +- Fixed `src/caosdb/utils/checkFileSystemConsistency.py` + +### Documentation ### + +* [#83](https://gitlab.com/caosdb/caosdb-pylib/-/issues/83) - Improved + documentation on adding REFERENCE properties, both in the docstring of + `Entity.add_property` and in the data-insertion tutorial. + ## [0.11.2] - 2023-03-14 ## ### Fixed ### diff --git a/CITATION.cff b/CITATION.cff index 910e40a2193d527fc8e4eb68c4ca6b10a28d3630..d9126aae6483459f8c8f248ed6a4fdf859f24e45 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -20,6 +20,6 @@ authors: given-names: Stefan orcid: https://orcid.org/0000-0001-7214-8125 title: CaosDB - Pylib -version: 0.11.1 +version: 0.12.0 doi: 10.3390/data4020083 -date-released: 2022-11-14 \ No newline at end of file +date-released: 2023-06-02 \ No newline at end of file diff --git a/setup.py b/setup.py index 373bfefd273c22aa597c793bcd5548b058dc72e7..4bed95b727e5d4e0b39d64e4d3809bbdd620628c 100755 --- a/setup.py +++ b/setup.py @@ -47,8 +47,8 @@ from setuptools import find_packages, setup ISRELEASED = True MAJOR = 0 -MINOR = 11 -MICRO = 2 +MINOR = 12 +MICRO = 0 # Do not tag as pre-release until this commit # https://github.com/pypa/packaging/pull/515 # has made it into a release. Probably we should wait for pypa/packaging>=21.4 diff --git a/src/caosdb/__init__.py b/src/caosdb/__init__.py index 7e06885fe495c1e8c4ccc99b7d0c0f8ff8c34b5b..acf323e860a93753b57f2e104531383b412f3fa0 100644 --- a/src/caosdb/__init__.py +++ b/src/caosdb/__init__.py @@ -46,6 +46,7 @@ from caosdb.common.models import (ACL, ALL, FIX, NONE, OBLIGATORY, RECOMMENDED, Query, QueryTemplate, Record, RecordType, delete, execute_query, get_global_acl, get_known_permissions, raise_errors) +from caosdb.utils.get_entity import get_entity_by_name, get_entity_by_path, get_entity_by_id from caosdb.configuration import _read_config_files, configure, get_config from caosdb.connection.connection import configure_connection, get_connection from caosdb.exceptions import * diff --git a/src/caosdb/apiutils.py b/src/caosdb/apiutils.py index 9c68e9db6cbdef827b9319505b4b81bc8db1ebac..a46e30375b924d358448e73aece61562c36c700b 100644 --- a/src/caosdb/apiutils.py +++ b/src/caosdb/apiutils.py @@ -22,27 +22,25 @@ # # ** end header # -"""API-Utils: +"""API-Utils: Some simplified functions for generation of records etc. -Some simplified functions for generation of records etc. """ import logging -import sys -import tempfile import warnings from collections.abc import Iterable -from subprocess import call -from typing import Optional, Any, Dict, List +from typing import Any, Dict, List -from caosdb.common.datatype import (BOOLEAN, DATETIME, DOUBLE, FILE, INTEGER, - REFERENCE, TEXT, is_reference) -from caosdb.common.models import (Container, Entity, File, Property, Query, +from caosdb.common.datatype import is_reference +from caosdb.common.models import (Container, Entity, File, Property, Record, RecordType, execute_query, - get_config, SPECIAL_ATTRIBUTES) + SPECIAL_ATTRIBUTES) from caosdb.exceptions import CaosDBException +from caosdb.utils.git_utils import (get_origin_url_in, get_diff_in, + get_branch_in, get_commit_in) + logger = logging.getLogger(__name__) @@ -149,51 +147,35 @@ def retrieve_entities_with_ids(entities): def getOriginUrlIn(folder): - """return the Fetch URL of the git repository in the given folder.""" - with tempfile.NamedTemporaryFile(delete=False, mode="w") as t: - call(["git", "remote", "show", "origin"], stdout=t, cwd=folder) - with open(t.name, "r") as t: - urlString = "Fetch URL:" - - for line in t.readlines(): - if urlString in line: - return line[line.find(urlString) + len(urlString):].strip() - - return None + warnings.warn(""" + This function is deprecated and will be removed with the next release. + Please use the caosdb.utils.git_utils.get_origin_url_in instead.""", + DeprecationWarning) + return get_origin_url_in(folder) def getDiffIn(folder, save_dir=None): - """returns the name of a file where the out put of "git diff" in the given - folder is stored.""" - with tempfile.NamedTemporaryFile(delete=False, mode="w", dir=save_dir) as t: - call(["git", "diff"], stdout=t, cwd=folder) - - return t.name + warnings.warn(""" + This function is deprecated and will be removed with the next release. + Please use the caosdb.utils.git_utils.get_diff_in instead.""", + DeprecationWarning) + return get_diff_in(folder, save_dir) def getBranchIn(folder): - """returns the current branch of the git repository in the given folder. - - The command "git branch" is called in the given folder and the - output is returned - """ - with tempfile.NamedTemporaryFile(delete=False, mode="w") as t: - call(["git", "rev-parse", "--abbrev-ref", "HEAD"], stdout=t, cwd=folder) - with open(t.name, "r") as t: - return t.readline().strip() + warnings.warn(""" + This function is deprecated and will be removed with the next release. + Please use the caosdb.utils.git_utils.get_branch_in instead.""", + DeprecationWarning) + return get_branch_in(folder) def getCommitIn(folder): - """returns the commit hash in of the git repository in the given folder. - - The command "git log -1 --format=%h" is called in the given folder - and the output is returned - """ - - with tempfile.NamedTemporaryFile(delete=False, mode="w") as t: - call(["git", "log", "-1", "--format=%h"], stdout=t, cwd=folder) - with open(t.name, "r") as t: - return t.readline().strip() + warnings.warn(""" + This function is deprecated and will be removed with the next release. + Please use the caosdb.utils.git_utils.get_commit_in instead.""", + DeprecationWarning) + return get_commit_in(folder) def compare_entities(old_entity: Entity, new_entity: Entity, compare_referenced_records: bool = False): @@ -206,9 +188,10 @@ def compare_entities(old_entity: Entity, new_entity: Entity, compare_referenced_ - Additional parents (a list under key "parents") - Information about properties: - Each property lists either an additional property or a property with a changed: - - ... datatype - - ... importance or - - ... value (not implemented yet) + - datatype + - importance or + - value (not implemented yet) + In case of changed information the value listed under the respective key shows the value that is stored in the respective entity. diff --git a/src/caosdb/cached.py b/src/caosdb/cached.py new file mode 100644 index 0000000000000000000000000000000000000000..4f735bb8e85ba7aa364b211e198840f9f6fb97e2 --- /dev/null +++ b/src/caosdb/cached.py @@ -0,0 +1,176 @@ +# -*- coding: utf-8 -*- +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2023 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2023 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# Copyright (C) 2023 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +""" +This module provides some cached versions of functions that retrieve Entities from a remote server. +""" + +from enum import Enum +from functools import lru_cache +from typing import Union + +from .utils import get_entity +from .common.models import execute_query, Entity, Container + + +# roughly 1GB for typical entity sizes +DEFAULT_SIZE = 33333 + +# This dict cache is solely for filling the real cache manually (e.g. to reuse older query results) +_DUMMY_CACHE = {} + + +class AccessType(Enum): + """Different access types for cached queries. Needed for filling the cache manually with +:func:`cache_fill` . + + """ + QUERY = 1 + PATH = 2 + EID = 3 + NAME = 4 + + +def cached_get_entity_by(eid: Union[str, int] = None, name: str = None, path: str = None, query: + str = None) -> Entity: + """Return a single entity that is identified uniquely by one argument. + +You must supply exactly one argument. + +If a query phrase is given, the result must be unique. If this is not what you need, use +:func:`cached_query` instead. + + """ + count = 0 + if eid is not None: + count += 1 + if name is not None: + count += 1 + if path is not None: + count += 1 + if query is not None: + count += 1 + if count != 1: + raise ValueError("You must supply exactly one argument.") + + if eid is not None: + return _cached_access(AccessType.EID, eid, unique=True) + if name is not None: + return _cached_access(AccessType.NAME, name, unique=True) + if path is not None: + return _cached_access(AccessType.PATH, path, unique=True) + if query is not None: + return _cached_access(AccessType.QUERY, query, unique=True) + + raise ValueError("Not all arguments may be None.") + + +def cached_query(query_string) -> Container: + """A cached version of :func:`caosdb.execute_query<caosdb.common.models.execute_query>`. + +All additional arguments are at their default values. + + """ + return _cached_access(AccessType.QUERY, query_string, unique=False) + + +@lru_cache(maxsize=DEFAULT_SIZE) +def _cached_access(kind: AccessType, value: Union[str, int], unique=True): + # This is the function that is actually cached. + # Due to the arguments, the cache has kind of separate sections for cached_query and + # cached_get_entity_by with the different AccessTypes. However, there is only one cache size. + + # The dummy dict cache is only for filling the cache manually, it is deleted afterwards. + if value in _DUMMY_CACHE: + return _DUMMY_CACHE[value] + + if kind == AccessType.QUERY: + return execute_query(value, unique=unique) + if kind == AccessType.NAME: + return get_entity.get_entity_by_name(value) + if kind == AccessType.EID: + return get_entity.get_entity_by_id(value) + if kind == AccessType.PATH: + return get_entity.get_entity_by_path(value) + + raise ValueError(f"Unknown AccessType: {kind}") + + +def cache_clear() -> None: + """Empty the cache that is used by `cached_query` and `cached_get_entity_by`.""" + _cached_access.cache_clear() + + +def cache_info(): + """Return info about the cache that is used by `cached_query` and `cached_get_entity_by`. + +Returns +------- + +out: named tuple + See the standard library :func:`functools.lru_cache` for details.""" + return _cached_access.cache_info() + + +def cache_initialize(maxsize=DEFAULT_SIZE) -> None: + """Create a new cache with the given size for `cached_query` and `cached_get_entity_by`. + + This implies a call of :func:`cache_clear`, the old cache is emptied. + + """ + cache_clear() + global _cached_access + _cached_access = lru_cache(maxsize=maxsize)(_cached_access.__wrapped__) + + +def cache_fill(items: dict, kind: AccessType = AccessType.EID, unique: bool = True) -> None: + """Add entries to the cache manually. + + This allows to fill the cache without actually submitting queries. Note that this does not + overwrite existing entries with the same keys. + +Parameters +---------- + +items: dict + A dictionary with the entries to go into the cache. The keys must be compatible with the + AccessType given in ``kind`` + +kind: AccessType, optional + The AccessType, for example ID, name, path or query. + +unique: bool, optional + If True, fills the cache for :func:`cached_get_entity_by`, presumably with + :class:`caosdb.Entity<caosdb.common.models.Entity>` objects. If False, the cache should be filled + with :class:`caosdb.Container<caosdb.common.models.Container>` objects, for use with + :func:`cached_query`. + + """ + # 1. add the given items to the corresponding dummy dict cache + _DUMMY_CACHE.update(items) + + # 2. call the cache function with each key (this only results in a dict look up) + for key in items.keys(): + _cached_access(kind, key, unique=unique) + + # 3. empty the dummy dict cache again + _DUMMY_CACHE.clear() diff --git a/src/caosdb/common/administration.py b/src/caosdb/common/administration.py index bd09c626d5a9e48bc9d3610f23cb46660cb10db3..a27aaf0406c83ac33c37b676a9cdeab812bf2f7a 100644 --- a/src/caosdb/common/administration.py +++ b/src/caosdb/common/administration.py @@ -385,7 +385,7 @@ action : str Either "grant" or "deny" permission : str - For example "RETRIEVE:*". + For example ``RETRIEVE:*``. priority : bool, optional Whether the priority shall be set, defaults is False. diff --git a/src/caosdb/common/models.py b/src/caosdb/common/models.py index 08fcd0206b9df22902e80277d0e57b5f67c76db5..9ba54c49d2d4cd776dc2263b850cc095c65fea60 100644 --- a/src/caosdb/common/models.py +++ b/src/caosdb/common/models.py @@ -33,6 +33,7 @@ transactions. All additional classes are either important for the entities or the transactions. """ + from __future__ import print_function, unicode_literals from __future__ import annotations # Can be removed with 3.10. @@ -461,18 +462,30 @@ class Entity: unit=None, importance=None, inheritance=None): # @ReservedAssignment """Add a property to this entity. - The first parameter is meant to identify the property entity. So the method expects an instance of - Entity, an integer or a string here. The second parameter is the value of the new property. Any - other named parameter may be passed by means of the keywwords. Accepted keywords are: - id, name, description, importance, inheritance, datatype, and unit. Any other keyword will be - ignored right now. But that may change in the future. + The first parameter is meant to identify the property entity either via + its id or name, or by providing the corresponding ``Entity`` Python + object. The second parameter is the value of the new property. Any other + named parameter may be passed by means of the keywwords. Accepted + keywords are: id, name, description, importance, inheritance, datatype, + and unit. + + Notes + ----- + If you want to add a property to an already existing entity, the + property ``id`` of that property needs to be specified before you send + the updated entity to the server. Parameters ---------- - property : int, str, Property, optional - An identifying parameter, by default None - value : int, str, Property, optional - The value of the new property, by default None + property : int, str, Entity, optional + An identifier for the property to be added, either its name, its id, + or the corresponding Entity Python object. If ``None``, either the + `name` or the `id` argument have to be specified explicitly. Default + is ``None``. + value : int, str, bool, datetime, Entity, or list of these types, optional + The value of the new property. In case of a reference to another + entity, this value may be the referenced entities id or the + ``Entity`` as a Python object. Default is None. id : int, optional Id of the property, by default None name : str, optional @@ -491,17 +504,64 @@ class Entity: Returns ------- Entity + This Entity object to which the new property has been added. - Raises - ------ + Warns + ----- UserWarning If the first parameter is None then id or name must be defined and not be None. UserWarning If the first parameter is an integer then it is interpreted as the id and id must be undefined or None. UserWarning - If the first parameter is not None and neither an instance of Entity nor an integer it is + If the first parameter is not None and neither an instance of Entity nor an integer it is interpreted as the name and name must be undefined or None. + + Raises + ------ + ValueError: + If you try to add an ``Entity`` object with File or Record role (or, + equivalently, a ``File`` or ``Record`` object) as a property, a + ``ValueError`` is raised. + + Examples + -------- + Add a simple integer property with the name ``TestProp`` and the value + 27 to a Record: + + >>> import caosdb as db + >>> rec = db.Record(name="TestRec").add_parent(name="TestType") + >>> rec.add_property("TestProp", value=27) # specified by name, you could equally use the property's id if it is known + + You can also use the Python object: + + >>> prop = db.Property(name="TestProp", datatype=db.INTEGER) + >>> rec.add_property(prop, value=27) # specified via the Python object + + In case of updating an existing Record, the Property needs to be + specified by id: + + >>> rec = db.Record(name="TestRec").retrieve() + >>> prop2 = db.Property(name="OtherTestProp").retrieve() + >>> rec.add_property(id=prop2.id, value="My new value") + >>> rec.update() + + Let's look at the more advanced example of adding a list of integers as + value of the above integer ``TestProp``: + + >>> rec.add_property("TestProp", value=[27,28,29], datatype=db.LIST(db.INTEGER)) + + Note that since `TestProp` is a scalar integer Property, the datatype + `LIST<INTEGER>` has to be specified explicitly. + + Finally, we can also add reference properties, specified by the RecordType of the referenced entity. + + >>> ref_rec = db.Record(name="ReferencedRecord").add_parent(name="OtherRT") + >>> rec.add_property(name="OtherRT", value=ref_rec) # or value=ref_rec.id if ref_rec has one set by the server + + See more on adding properties and inserting data in + https://docs.indiscale.com/caosdb-pylib/tutorials/Data-Insertion.html. + """ pid = id @@ -588,11 +648,13 @@ class Entity: entity. If no `inheritance` is given, no properties will be inherited by the child. This parameter is case-insensitive. - Note that the behaviour is currently not yet specified when assigning parents to - Records, it only works for inheritance of RecordTypes (and Properties). - - For more information, it is recommended to look into the - :ref:`data insertion tutorial<tutorial-inheritance-properties>`. + Notes + ----- + Note that the behaviour of the `inheritance` argument currently has not + yet been specified when assigning parents to Records, it only works for + inheritance of RecordTypes (and Properties). For more information, it is + recommended to look into the :ref:`data insertion + tutorial<tutorial-inheritance-properties>`. Raises ------ @@ -1285,12 +1347,19 @@ out: List[Entity] anyway. Set the flag 'strict' to True in order to force the server to take all warnings as errors. This prevents the server from inserting this entity if any warning occurs. - @param strict=False: Flag for strict mode. - @param raise_exception_on_error=True: Flag to raise an - exception when an error occurs. - @param unique=True: Flag to only allow - insertion of elements with unique names. - @param flags: A dictionary of flags to be send with the insertion. + Parameters + ---------- + strict : bool, optional + Flag for strict mode. Default is False. + raise_exception_on_error : bool, optional + Flag to raise an exception when an error occurs. Default is True. + unique : bool, optional + Flag to only allow insertion of elements with unique names. Default + is True. + flags : dict, optional + A dictionary of flags to be send with the insertion. Default is + None. + """ return Container().append(self).insert( @@ -1304,15 +1373,15 @@ out: List[Entity] unique=True, flags=None, sync=True): """Update this entity. - There are two possible work-flows to perform this update: - First: - 1) retrieve an entity - 2) do changes - 3) call update method +There are two possible work-flows to perform this update: +First: + 1) retrieve an entity + 2) do changes + 3) call update method - Second: - 1) construct entity with id - 2) call update method. +Second: + 1) construct entity with id + 2) call update method. For slight changes the second one it is more comfortable. Furthermore, it is possible to stay off-line until calling the update method. The name, description, unit, datatype, path, @@ -1650,6 +1719,7 @@ class Property(Entity): def add_property(self, property=None, value=None, id=None, name=None, description=None, datatype=None, unit=None, importance=FIX, inheritance=FIX): # @ReservedAssignment + """See ``Entity.add_property``.""" return super().add_property( property=property, id=id, name=name, description=description, datatype=datatype, @@ -1783,6 +1853,7 @@ class RecordType(Entity): def add_property(self, property=None, value=None, id=None, name=None, description=None, datatype=None, unit=None, importance=RECOMMENDED, inheritance=FIX): # @ReservedAssignment + """See ``Entity.add_property``.""" return super().add_property( property=property, id=id, name=name, description=description, datatype=datatype, @@ -1838,6 +1909,7 @@ class Record(Entity): def add_property(self, property=None, value=None, id=None, name=None, description=None, datatype=None, unit=None, importance=FIX, inheritance=FIX): # @ReservedAssignment + """See ``Entity.add_property``.""" return super().add_property( property=property, id=id, name=name, description=description, datatype=datatype, @@ -1995,6 +2067,7 @@ class File(Record): def add_property(self, property=None, id=None, name=None, description=None, datatype=None, value=None, unit=None, importance=FIX, inheritance=FIX): # @ReservedAssignment + """See ``Entity.add_property``.""" return super().add_property( property=property, id=id, name=name, description=description, datatype=datatype, @@ -4299,7 +4372,7 @@ def execute_query(q, unique=False, raise_exception_on_error=True, cache=True, fl Whether the query is expected to have only one entity as result. Defaults to False. raise_exception_on_error : bool - Whether an exception should be raises when there are errors in the + Whether an exception should be raised when there are errors in the resulting entities. Defaults to True. cache : bool Whether to use the query cache (equivalent to adding a "cache" flag). diff --git a/src/caosdb/connection/utils.py b/src/caosdb/connection/utils.py index 9056bf9dea14fa2fa441fa13a5efe8e776990284..095d47035e24dad5b6d7041f5d3b8a739652f271 100644 --- a/src/caosdb/connection/utils.py +++ b/src/caosdb/connection/utils.py @@ -45,13 +45,16 @@ def urlencode(query): 3) All other parameters which can be passed to the respective functions are not implemented here and the default parameters will be used. + +.. code:: + >>> urlencode({'key': ['val1', 'val2']}, doseq=True) Traceback (most recent call last): ... TypeError: urlencode() got an unexpected keyword argument 'doseq' - Otherwise, this functions works exactly as its counterparts in the urllib - modules when they are called with only the query parameter. +Otherwise, this functions works exactly as its counterparts in the urllib +modules when they are called with only the query parameter. Parameters ---------- diff --git a/src/caosdb/high_level_api.py b/src/caosdb/high_level_api.py index 427a095a4bafc0c372b0169298f2980dbd902c49..005a20bbba26fd5bee16eac612bd8ebe81f1294a 100644 --- a/src/caosdb/high_level_api.py +++ b/src/caosdb/high_level_api.py @@ -629,18 +629,20 @@ class CaosDBPythonEntity(object): else: entity = CaosDBPythonRecord() - for parent in serialization["parents"]: - if "unresolved" in parent: - id = None - name = None - if "id" in parent: - id = parent["id"] - if "name" in parent: - name = parent["name"] - entity.add_parent(CaosDBPythonUnresolvedParent( - id=id, name=name)) - else: - raise NotImplementedError() + if "parents" in serialization: + for parent in serialization["parents"]: + if "unresolved" in parent: + id = None + name = None + if "id" in parent: + id = parent["id"] + if "name" in parent: + name = parent["name"] + entity.add_parent(CaosDBPythonUnresolvedParent( + id=id, name=name)) + else: + raise NotImplementedError( + "Currently, only unresolved parents can be deserialized.") for baseprop in ("name", "id", "description", "version"): if baseprop in serialization: @@ -673,7 +675,8 @@ class CaosDBPythonEntity(object): if f.name in metadata: propmeta.__setattr__(f.name, metadata[f.name]) else: - raise NotImplementedError() + pass + # raise NotImplementedError() return entity diff --git a/src/caosdb/schema-pycaosdb-ini.yml b/src/caosdb/schema-pycaosdb-ini.yml index 64451a24da1587abbd9815b8ddefc4c85fa36e5a..cb07dfeb84bc16e212100232403b0f66543c73e9 100644 --- a/src/caosdb/schema-pycaosdb-ini.yml +++ b/src/caosdb/schema-pycaosdb-ini.yml @@ -106,6 +106,9 @@ schema-pycaosdb-ini: advancedtools: description: "Configuration settings for the caosadvancedtools." additionalProperties: true + caoscrawler: + description: "Configuration settings for the CaosDB Crawler." + additionalProperties: true sss_helper: description: "Configuration settings for server-side scripting." additionalProperties: true diff --git a/src/caosdb/utils/checkFileSystemConsistency.py b/src/caosdb/utils/checkFileSystemConsistency.py index 6dd35f8a6f699a2c74ff41a9924cd65c436efd42..a142c1dd7ffd1a4e6ee6cfc85891e1bf70f98d89 100755 --- a/src/caosdb/utils/checkFileSystemConsistency.py +++ b/src/caosdb/utils/checkFileSystemConsistency.py @@ -82,17 +82,15 @@ def main(argv=None): program_build_date = str(__updated__) program_version_message = '%%(prog)s %s (%s)' % ( program_version, program_build_date) - program_shortdesc = __import__('__main__').__doc__.split("\n")[1] - program_license = '''%s + program_license = ''' - Created by timm fitschen on %s. Copyright 2016 BMPG. All rights reserved. Distributed on an "AS IS" basis without warranties or conditions of any kind, either express or implied. USAGE -''' % (program_shortdesc, str(__date__)) +''' # Setup argument parser parser = ArgumentParser(description=program_license, @@ -113,6 +111,7 @@ USAGE help="timeout in seconds for the database requests. [default: %(default)s]", metavar="TIMEOUT", default="200") + parser.add_argument('location') # Process arguments args = parser.parse_args() @@ -121,7 +120,7 @@ USAGE VERBOSITY = args.verbose TIMEOUT = args.timeout - runCheck(TIMEOUT) + print(runCheck(TIMEOUT, args.location).messages) return 0 diff --git a/src/caosdb/utils/create_revision.py b/src/caosdb/utils/create_revision.py index 0b7ce996311a96a6a0fe89935de729f07b67a353..419e1c9f2b97171be0dccf1bc772ae5db679c0b7 100644 --- a/src/caosdb/utils/create_revision.py +++ b/src/caosdb/utils/create_revision.py @@ -34,13 +34,15 @@ def bend_references(from_id, to_id, except_for=None): and those references are changed to point to to_id. entities having an id listed in except_for are excluded. - params: - from_id : int - the old object to which references where pointing - to_id : int - the new object to which references will be pointing - except_for : list of int - entities with id of this list will not be changed +Parameters +---------- + +from_id : int + the old object to which references where pointing +to_id : int + the new object to which references will be pointing +except_for : list of int + entities with id of this list will not be changed """ if except_for is None: except_for = [to_id] @@ -71,14 +73,16 @@ def create_revision(old_id, prop, value): This function changes the record with id old_id. The value of the propertye prop is changed to value. - params: - old_id : int - id of the record to be changed - prop : string - name of the property to be changed - value : type of corresponding property - the new value of the corresponding property - """ +Parameters +---------- + +old_id : int + id of the record to be changed +prop : string + name of the property to be changed +value : type of corresponding property + the new value of the corresponding property +""" record = db.execute_query("FIND {}".format(old_id))[0] new_rec = record.copy() new_rec.get_property(prop).value = value diff --git a/src/caosdb/utils/get_entity.py b/src/caosdb/utils/get_entity.py new file mode 100644 index 0000000000000000000000000000000000000000..a27aafa99ffe3759a46876a5bcd5e686d631b1dc --- /dev/null +++ b/src/caosdb/utils/get_entity.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2023 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# Copyright (C) 2023 IndiScale GmbH <info@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +"""Convenience functions to retrieve a specific entity.""" + +from typing import Union +from ..common.models import execute_query, Entity + + +def get_entity_by_name(name: str) -> Entity: + """Return the result of a unique query that uses the name to find the correct entity. + + Submits the query "FIND ENTITY WITH name='{name}'". + """ + return execute_query(f"FIND ENTITY WITH name='{name}'", unique=True) + + +def get_entity_by_id(eid: Union[str, int]) -> Entity: + """Return the result of a unique query that uses the id to find the correct entity. + + Submits the query "FIND ENTITY WITH id='{eid}'". + """ + return execute_query(f"FIND ENTITY WITH id='{eid}'", unique=True) + + +def get_entity_by_path(path: str) -> Entity: + """Return the result of a unique query that uses the path to find the correct file. + + Submits the query "FIND FILE WHICH IS STORED AT '{path}'". + """ + return execute_query(f"FIND FILE WHICH IS STORED AT '{path}'", unique=True) diff --git a/src/caosdb/utils/git_utils.py b/src/caosdb/utils/git_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7a58272a3bef1930f75a1e08364349388e2bb89f --- /dev/null +++ b/src/caosdb/utils/git_utils.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2018 Research Group Biomedical Physics, +# Max-Planck-Institute for Dynamics and Self-Organization Göttingen +# Copyright (C) 2020 Timm Fitschen <t.fitschen@indiscale.com> +# Copyright (C) 2020-2022 IndiScale GmbH <info@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# +"""git-utils: Some functions for retrieving information about git repositories. + +""" + +import logging +import tempfile + +from subprocess import call + +logger = logging.getLogger(__name__) + + +def get_origin_url_in(folder: str): + """return the Fetch URL of the git repository in the given folder.""" + with tempfile.NamedTemporaryFile(delete=False, mode="w") as t: + call(["git", "remote", "show", "origin"], stdout=t, cwd=folder) + with open(t.name, "r") as t: + urlString = "Fetch URL:" + + for line in t.readlines(): + if urlString in line: + return line[line.find(urlString) + len(urlString):].strip() + + return None + + +def get_diff_in(folder: str, save_dir=None): + """returns the name of a file where the out put of "git diff" in the given + folder is stored.""" + with tempfile.NamedTemporaryFile(delete=False, mode="w", dir=save_dir) as t: + call(["git", "diff"], stdout=t, cwd=folder) + + return t.name + + +def get_branch_in(folder: str): + """returns the current branch of the git repository in the given folder. + + The command "git branch" is called in the given folder and the + output is returned + """ + with tempfile.NamedTemporaryFile(delete=False, mode="w") as t: + call(["git", "rev-parse", "--abbrev-ref", "HEAD"], stdout=t, cwd=folder) + with open(t.name, "r") as t: + return t.readline().strip() + + +def get_commit_in(folder: str): + """returns the commit hash in of the git repository in the given folder. + + The command "git log -1 --format=%h" is called in the given folder + and the output is returned + """ + + with tempfile.NamedTemporaryFile(delete=False, mode="w") as t: + call(["git", "log", "-1", "--format=%h"], stdout=t, cwd=folder) + with open(t.name, "r") as t: + return t.readline().strip() diff --git a/src/doc/conf.py b/src/doc/conf.py index 292aa7a2a427da7044c9d6acf7b000cd2fa82a32..7a2cbe4910b48ad714ce6d9eb40a368c2a666f6e 100644 --- a/src/doc/conf.py +++ b/src/doc/conf.py @@ -25,14 +25,14 @@ import sphinx_rtd_theme # noqa: E402 # -- Project information ----------------------------------------------------- project = 'pycaosdb' -copyright = '2022, IndiScale GmbH' +copyright = '2023, IndiScale GmbH' author = 'Daniel Hornung' # The short X.Y version -version = '0.11.2' +version = '0.12.0' # The full version, including alpha/beta/rc tags # release = '0.5.2-rc2' -release = '0.11.2-dev' +release = '0.12.0' # -- General configuration --------------------------------------------------- @@ -78,6 +78,9 @@ exclude_patterns = [] # The name of the Pygments (syntax highlighting) style to use. pygments_style = None +suppress_warnings = [ + "autosectionlabel.*", # duplicate labels +] # -- Options for HTML output ------------------------------------------------- diff --git a/src/doc/tutorials/Data-Insertion.rst b/src/doc/tutorials/Data-Insertion.rst index f2c7f830d1403fbdf45354d1f36a4ea339759058..82df07691f7c78a2787d67463ca222d2e68249ca 100644 --- a/src/doc/tutorials/Data-Insertion.rst +++ b/src/doc/tutorials/Data-Insertion.rst @@ -83,33 +83,86 @@ corresponding python class: .. code:: python - rec = db.Record() + rec = db.Record() # rec.id is None rec.add_parent(name="Experiment") rec.add_property(name="date", value="2020-01-07") rec.insert() + print(rec.id) # rec.id set by the server + +Here, the record has a parent, the RecordType “Experimentâ€, and a Property date +with a value ``"2020-01-07"``. After the successful insertion, our new Record is +assigned an ``id`` by the server. In the following, let's assume this id to be +``256``. + +Reference Properties +-------------------- + +Now suppose we want to insert an analysis that references the above experiment +record as its source data. Since we know that the id of the experiment record is +256, we can do the following: + +.. code:: python + + ana = db.Record().add_parent(name="Analysis") # Create record and assign parent in one line + ana.add_property(name="Experiment", value=256) + ana.add_propertt(name="date", value="2020-01-08") + # possibly, add more properties here ... + ana.insert() + +The experiment record's id is used as the value of the ``Experiment`` property +of the analysis Record (note how we use the RecordType ``Experiment`` as a +``REFERENCE`` property here). Sending a CaosDB query like ``FIND RECORD +Experiment WHICH IS REFERENCED BY A Analysis WITH date=2020-01-08`` would now +return our original experiment record. -Here, the record has a parent: The RecordType “Experimentâ€. And a -Property: date. +Equivalently, we can also use the Python object of the experiment record, i.e., +``rec`` as the value of the ``Experiment`` property: -Note, that if you want to use a property that is not a primitive -datatype like db.INTEGER and so on, you need to use the ID of the Entity -that you are referencing. + +.. code:: python + + ana = db.Record().add_parent(name="Analysis") + ana.add_property(name="Experiment", value=rec) + ana.add_propertt(name="date", value="2020-01-08") + # possibly, add more properties here ... + ana.insert() + +Finally, we can also insert both records at the same time using a +``db.Container``: .. code:: python rec = db.Record() rec.add_parent(name="Experiment") - rec.add_property(name="report", value=235507) - rec.add_property(name="Analysis", value=230007) - rec.insert() + rec.add_property(name="date", value="2020-01-07") + ana = db.Record().add_parent(name="Analysis") + ana.add_property(name="Experiment", value=rec) + ana.add_propertt(name="date", value="2020-01-08") + + cont = db.Container().extend([rec, ana]) # Add experiment and analysis + # records to our container + cont.insert() # Insert both at the same time, the CaosDB server will + # resolve the reference upon insertion. -Of course, the IDs 235507 and 230007 need to exist in CaosDB. The first -example shows how to use a db.REFERENCE Property (report) and the second -shows that you can use any RecordType as Property to reference a Record -that has such a parent. +All three ways result in an Analysis record which references an Experiment +record. -Most Records do not have name however it can absolutely make sense. In -that case use the name argument when creating it. Another useful feature +.. note:: + + Instead of using the ``Experiment`` RecordType as a ``REFERENCE`` porperty, + we can also create an actual property with data type ``Experiment``: + ``db.property(name="source", datatype="Experiment")``. Now you can add this + property to the analysis record with the experiment record as a value as + explained above. As a rule of thumbs, using a separate property for these + references is meaningful whenever you want to highlight that, e.g., this + particular experiment provided the source data for your analysis (as opposed + to another experiment that was used for validation). + +Advanced insertions +------------------- + +Most Records do not have a name, however it can absolutely make sense to assign +one. In that case use the name argument when creating it. Another useful feature is the fact that properties can have units: .. code:: python @@ -134,7 +187,7 @@ container. E.g. if you have a python list ``analysis_results``: cont.insert() -Useful is also, that you can insert directly tabular data. +It may also be usefull to know that you can insert directly tabular data. .. code:: python @@ -144,8 +197,8 @@ Useful is also, that you can insert directly tabular data. print(recs) recs.insert() -With this example file -`test.csv <uploads/4f2c8756a26a3984c0af09d206d583e5/test.csv>`__. +Try it yourself with this example file +`test.csv <uploads/4f2c8756a26a3984c0af09d206d583e5/test.csv>`__! List Properties --------------- @@ -170,6 +223,28 @@ list-valued attribute in Python, as the following example illustrates. print(retrieved.get_property("TestList").value) +.. note:: + Properties of Entities that shall be updated need to have IDs. Let's look at an + example: + +.. code:: python + + experiment = db.Record(id=1111).retrieve() + experiment.add_property(name='date', value="2020-01-01") + retrieved.update() # Fails! The 'date' Property needs to have an ID. + +The easiest way to get around this is to use the corresponding entity getter: + +.. code:: python + + experiment = db.Record(id=1111).retrieve() + experiment.add_property(db.get_entity_by_name('date'), value="2020-01-01") + retrieved.update() # Works! + +There also are the functions ``get_entity_by_path`` and ``get_entity_by_id``. You can easily use +cached versions of those functions (see :doc:`caching options<caching>`). + + File Update ----------- diff --git a/src/doc/tutorials/Entity-Getters.rst b/src/doc/tutorials/Entity-Getters.rst new file mode 100644 index 0000000000000000000000000000000000000000..50ed13201e5720de22bf0b605bc5162834a458a8 --- /dev/null +++ b/src/doc/tutorials/Entity-Getters.rst @@ -0,0 +1,16 @@ + +Entity Getters +============== + +There is a very frequent situation when working with PyCaosDB: You need to get a specific Entity +from the remote server. For example, you need the Property Entity in order to make an update. Sure, +you can do a ``db.Entity().retrieve()`` or submit a query, but there is an even faster way which +also helps preventing errors: + +- ``get_entity_by_name`` +- ``get_entity_by_id`` +- ``get_entity_by_path`` + +You can call these functions with a single argument (name/id/path). Since these functions are +frequently used with the same arguments over and over again, you might want to look at the +:doc:`caching options<caching>`. diff --git a/src/doc/tutorials/caching.rst b/src/doc/tutorials/caching.rst new file mode 100644 index 0000000000000000000000000000000000000000..aad9a1ddbd9e93a3cd06887eaffcf956c3c5bea6 --- /dev/null +++ b/src/doc/tutorials/caching.rst @@ -0,0 +1,58 @@ + +Caching +======= + +.. note:: + + Caching is great, because it can speed up things considerably. But it can also create dangerous + pitfalls if the cache is not cleared when needed and you work with outdated data. Thus, please use + the cache with care and make sure to clear it when needed. + +Python provides great tools for caching. For example, you could define a ``cached_get_by_name`` +function, easily created from ``get_entity_by_name`` using Python's ``lru_cache``: + +.. code:: python + + @lru_cache(maxsize=1000) + def cached_get_by_name(name): + return db.get_entity_by_name(name) + + exp = cached_get_by_name('Experiment') + # reset the cache with + cached_get_by_name.cache_clear() + +For convenience, PyCaosDB provides the ``caosdb.cached`` module that defines the functions +``cached_query`` and ``cached_get_entity_by``, they use a shared cache. Let's have a look: + +.. code:: python + + from caosdb.cached import cached_query, cached_get_entity_by, cache_clear, cache_info, cache_initialize + rt1 = cached_get_entity_by(name='RT1') + qresult = cached_query('FIND Experiment WITH parameter=1') + # you can inspect the cache + print(cache_info()) + # this will not cause a server request since it is cached + rt1 = cached_get_entity_by(name='RT1') + # you can clear the cache with + cache_clear() + # If you want to have a cache with a custom size, you can initialize it (again). Old cached + # data is lost. + cache_initialize(maxsize=10) + + +If you want to manually add entities to the cache, you can do it yourself. This is useful when you +have entities on hand from previous queries that you want to add. + +.. code:: python + + from caosdb.cached import cache_fill, AccessType + # Here, items must be a dict with Entity IDs as keys and the Entities as values. + cache_fill(items, AccessType.EID, unique=True) + # If you now use IDs that were in items, they are taken from the cache. + e1 = cached_get_entity_by(eid=10001) + +When filling the cache with Entity objects for ``cached_get_entity_by``, you need to set +``unique=True``, whereas the cache for ``cached_query`` should be filled with Container object and +``unique=False``. + + diff --git a/src/doc/tutorials/first_steps.rst b/src/doc/tutorials/first_steps.rst index 7366d391cdf50b1d9f9107fc2d47f28d9df7bf7f..c84ec52aa63f0563b22c698081e89600c7af6122 100644 --- a/src/doc/tutorials/first_steps.rst +++ b/src/doc/tutorials/first_steps.rst @@ -52,6 +52,7 @@ Let's look at the first element: <Record ... .. The above example needs doctest ELLIPSIS + You see that the object is a Record. It has a Parent and two Properties. .. note:: @@ -119,7 +120,7 @@ If the files are large data files, it is often a better idea to only retrieve th Summary ------- -Now you know, how you can use Python to send queries to CaosDB and you can access +Now you know how to use Python to send queries to CaosDB and you can access the result Records and their properties. The next tutorial shows how to make some meaningful use of this. diff --git a/src/doc/tutorials/index.rst b/src/doc/tutorials/index.rst index 0b08d0b4fe153d803a780bd144787819b827db78..ce37993d7ec5e0888da8a2b4c58904bcbdc43bb4 100644 --- a/src/doc/tutorials/index.rst +++ b/src/doc/tutorials/index.rst @@ -14,6 +14,8 @@ advanced usage of the Python client. basic_analysis Data-Insertion errors + Entity-Getters + caching data-model-interface complex_data_models serverside diff --git a/unittests/test_cached.py b/unittests/test_cached.py new file mode 100644 index 0000000000000000000000000000000000000000..ce302d671d6077aed7d8457e70da2076ebe65d50 --- /dev/null +++ b/unittests/test_cached.py @@ -0,0 +1,295 @@ +# -*- coding: utf-8 -*- +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2023 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# Copyright (C) 2023 IndiScale GmbH <info@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +""" Test the caosdb.cached module """ + +from caosdb.cached import (cached_get_entity_by, cache_clear, cache_info, cache_fill, + AccessType, cache_initialize, cached_query) +from unittest.mock import patch +import caosdb as db +from copy import deepcopy +import pytest + + +DUMMY_SERVER_CONTENT = [ + db.Record(name='a', id=101), + db.Record(name='b', id=102), + db.Record(name='c', id=103), + db.File(path='p', id=104), + db.File(path='pp', id=105), +] + + +@pytest.fixture(autouse=True) +def cache_clean_up(): + cache_clear() + yield + cache_clear() + + +def mocked_name_query(name): + # copy the object, because Entities would normally be created from XML response + return deepcopy([el for el in DUMMY_SERVER_CONTENT if el.name == name][0]) + + +def mocked_id_query(eid): + # copy the object, because Entities would normally be created from XML response + return deepcopy([el for el in DUMMY_SERVER_CONTENT if el.id == eid][0]) + + +def mocked_path_query(path): + # copy the object, because Entities would normally be created from XML response + return deepcopy([el for el in DUMMY_SERVER_CONTENT if el.path == path][0]) + + +def mocked_gen_query(q, unique): + if unique: + if q == 'a': + return DUMMY_SERVER_CONTENT[0] + else: + return None + else: + if q == 'a': + return db.Container().extend([DUMMY_SERVER_CONTENT[0]]) + else: + return db.Container().extend(DUMMY_SERVER_CONTENT) + + +@patch("caosdb.utils.get_entity.get_entity_by_name") +def test_get_by_name(mocked_get_by_name): + mocked_get_by_name.side_effect = mocked_name_query + # first call; not in cache -> mocked_execute is touched + a = cached_get_entity_by(name='a') + assert a.id == 101 + assert mocked_get_by_name.call_count == 1 + # second call; in cache -> mocked_execute is NOT touched (count is still 1) + b = cached_get_entity_by(name='a') + assert mocked_get_by_name.call_count == 1 + # the cache returned the same object + assert a is b + # check the info + assert cache_info().hits == 1 + assert cache_info().currsize == 1 + # after clearing the test, the mock is used again + cache_clear() + cached_get_entity_by(name='a') + assert mocked_get_by_name.call_count == 2 + # we fill the cache manually and make sure the element is used + cache_fill({'lol': db.Entity(id=10001, name='lol')}, AccessType.NAME, unique=True) + # there are now two elements in the cache: a and lol + assert cache_info().currsize == 2 + # we can retrieve the inserted element + lol = cached_get_entity_by(name='lol') + assert lol.id == 10001 + # this did not touch the mocked function + assert mocked_get_by_name.call_count == 2 + # make sure normal retrieval still works (count +1) + c = cached_get_entity_by(name='c') + assert mocked_get_by_name.call_count == 3 + assert c.id == 103 + + +@patch("caosdb.utils.get_entity.get_entity_by_id") +def test_get_by_id(mocked_get_by_id): + mocked_get_by_id.side_effect = mocked_id_query + # first call; not in cache -> mocked_execute is touched + b = cached_get_entity_by(eid=102) + assert b.id == 102 + assert b.name == 'b' + assert mocked_get_by_id.call_count == 1 + # second call; in cache -> mocked_execute is NOT touched (count is still 1) + a = cached_get_entity_by(eid=102) + assert mocked_get_by_id.call_count == 1 + # the cache returned the same object + assert a is b + # check the info + assert cache_info().hits == 1 + assert cache_info().currsize == 1 + # after clearing the test, the mock is used again + cache_clear() + cached_get_entity_by(eid=102) + assert mocked_get_by_id.call_count == 2 + # we fill the cache manually and make sure the element is used + cache_fill({10001: db.Entity(id=10001, name='lol')}, AccessType.EID, unique=True) + # there are now two elements in the cache: a and lol + assert cache_info().currsize == 2 + # we can retrieve the inserted element + lol = cached_get_entity_by(eid=10001) + assert lol.name == 'lol' + # this did not touch the mocked function + assert mocked_get_by_id.call_count == 2 + # make sure normal retrieval still works (count +1) + c = cached_get_entity_by(eid=103) + assert mocked_get_by_id.call_count == 3 + assert c.name == 'c' + + +@patch("caosdb.cached.get_entity.get_entity_by_path") +def test_get_by_path(mocked_get_by_path): + mocked_get_by_path.side_effect = mocked_path_query + # first call; not in cache -> mocked_execute is touched + b = cached_get_entity_by(path='p') + assert b.id == 104 + assert mocked_get_by_path.call_count == 1 + # second call; in cache -> mocked_execute is NOT touched (count is still 1) + a = cached_get_entity_by(path='p') + assert mocked_get_by_path.call_count == 1 + # the cache returned the same object + assert a is b + # check the info + assert cache_info().hits == 1 + assert cache_info().currsize == 1 + # after clearing the test, the mock is used again + cache_clear() + cached_get_entity_by(path='p') + assert mocked_get_by_path.call_count == 2 + # we fill the cache manually and make sure the element is used + cache_fill({'lol': db.File(id=10001, path='lol')}, AccessType.PATH, unique=True) + # there are now two elements in the cache: a and lol + assert cache_info().currsize == 2 + # we can retrieve the inserted element + lol = cached_get_entity_by(path='lol') + assert lol.id == 10001 + # this did not touch the mocked function + assert mocked_get_by_path.call_count == 2 + # make sure normal retrieval still works (count +1) + c = cached_get_entity_by(path='pp') + assert mocked_get_by_path.call_count == 3 + assert c.id == 105 + + +@patch("caosdb.cached.execute_query") +def test_get_by_query(mocked_query): + mocked_query.side_effect = mocked_gen_query + # test cache initialization + cache_initialize(maxsize=10) + assert cache_info().currsize == 0 + + # Non-existent entity + res = cached_get_entity_by(query='stuff') + assert res is None + assert cache_info().currsize == 1 + assert cache_info().hits == 0 + assert cache_info().misses == 1 + + res = cached_get_entity_by(query='stuff') + assert res is None + assert cache_info().currsize == 1 + assert cache_info().hits == 1 + assert cache_info().misses == 1 + + # Existent entity + a = cached_get_entity_by(query='a') + assert a is not None + assert a.id == 101 + assert cache_info().currsize == 2 + assert cache_info().hits == 1 + assert cache_info().misses == 2 + + +@patch("caosdb.cached.execute_query") +def test_cached_query(mocked_query): + mocked_query.side_effect = mocked_gen_query + # test cache initialization + cache_initialize(maxsize=10) + assert cache_info().maxsize == 10 + # first call; not in cache -> mocked_execute is touched + res = cached_query('stuff') + assert len(res) == len(DUMMY_SERVER_CONTENT) + assert mocked_query.call_count == 1 + # second call; in cache -> mocked_execute is NOT touched (count is still 1) + a = cached_query('stuff') + assert mocked_query.call_count == 1 + # the cache returned the same object + assert a is res + # check the info + assert cache_info().hits == 1 + assert cache_info().currsize == 1 + # after clearing the test, the mock is used again + cache_clear() + cached_query('stuff') + assert mocked_query.call_count == 2 + # we fill the cache manually and make sure the element is used + cache_fill({'lol': db.Container().extend([db.Entity(id=10001, name='lol')])}, + AccessType.QUERY, unique=False) + # there are now two elements in the cache: a and lol + assert cache_info().currsize == 2 + # we can retrieve the inserted element + lol = cached_query('lol') + assert lol[0].id == 10001 + # this did not touch the mocked function + assert mocked_query.call_count == 2 + # make sure normal retrieval still works (count +1) + c = cached_query('a') + assert mocked_query.call_count == 3 + assert c[0].id == 101 + + +@patch("caosdb.utils.get_entity.get_entity_by_name") +def test_cache_size(mocked_get_by_name): + mocked_get_by_name.side_effect = lambda x: x + # first call; not in cache -> mocked_execute is touched + maxsize = 5 + cache_initialize(maxsize=maxsize) + assert cache_info().currsize == 0 + + names_first = ("a", "b", "c", "d", "e") + names_later = ("A", "B", "C", "D", "E") + names_fill = {"X": None, "Y": None, "Z": None} + + # Use the first batch of names + for ii, name in enumerate(names_first, start=1): + cached_get_entity_by(name=name) + assert cache_info().currsize == ii + assert cache_info().hits == 0 + assert cache_info().misses == ii + for ii, name in enumerate(names_first, start=1): + cached_get_entity_by(name=name) + assert cache_info().currsize == maxsize + assert cache_info().hits == ii + assert cache_info().misses == maxsize + + # use the second batch of names + for ii, name in enumerate(names_later, start=1): + cached_get_entity_by(name=name) + assert cache_info().currsize == maxsize + assert cache_info().hits == len(names_first) + assert cache_info().misses == len(names_first) + ii + for ii, name in enumerate(names_later, start=1): + cached_get_entity_by(name=name) + assert cache_info().currsize == maxsize + assert cache_info().hits == len(names_first) + ii + assert cache_info().misses == len(names_first) + len(names_later) + + # The cache is now filled with A,B,C,D,E (oldest to least recently used). + # Let's fill it with X,Y,Z. + cache_fill(names_fill, kind=AccessType.NAME) + + # Now, the cache should be: D,E,X,Y,Z + current_misses = cache_info().misses + + for name in ("Z", "Y", "X", "E", "D"): + cached_get_entity_by(name=name) + assert cache_info().misses == current_misses + + for ii, name in enumerate(("A", "B", "C"), start=1): + cached_get_entity_by(name=name) + assert cache_info().misses == current_misses + ii