diff --git a/CHANGELOG.md b/CHANGELOG.md index a5f40f830b13f67eed6cf1667f99eafba2c80cef..65547e6b8017e53d711955f9f9fbee00dc739b86 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,9 +21,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - If a registered identifiable states, that a reference by a Record with parent RT1 is needed, then now also references from Records that have a child of RT1 as parent are accepted. +- More aggressive caching. ### Deprecated ### -- LocalStorageIdentifiableAdapter +- `IdentifiableAdapter.get_file` ### Removed ### diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index f31d66a95b948967de283e3343640447db93dd7f..1c6968d76257109cc24458a048d6d41ea64d2e5f 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -26,7 +26,7 @@ """ Crawl a file structure using a yaml cfood definition and synchronize -the acuired data with CaosDB. +the acuired data with LinkAhead. """ from __future__ import annotations @@ -44,18 +44,18 @@ from datetime import datetime from enum import Enum from typing import Any, Optional, Union -import caosdb as db +import linkahead as db import yaml from caosadvancedtools.cache import UpdateCache from caosadvancedtools.crawler import Crawler as OldCrawler from caosadvancedtools.serverside.helper import send_mail from caosadvancedtools.utils import create_entity_link -from caosdb.apiutils import (EntityMergeConflictError, compare_entities, - merge_entities) -from caosdb.cached import cache_clear, cached_get_entity_by -from caosdb.exceptions import EmptyUniqueQueryError +from linkahead.apiutils import (EntityMergeConflictError, compare_entities, + merge_entities) +from linkahead.cached import cache_clear, cached_get_entity_by from linkahead.common.datatype import get_list_datatype, is_reference -from linkahead.utils.escape import escape_quoted_text +from linkahead.exceptions import EmptyUniqueQueryError +from linkahead.utils.escape import escape_squoted_text from .config import get_config_setting from .converters import Converter, ConverterValidationError @@ -900,8 +900,8 @@ class Crawler(object): @staticmethod def _get_property_id_for_datatype(rtname: str, name: str): return cached_get_entity_by( - query=f"FIND Entity '{escape_quoted_text(rtname)}' " - f"with name='{escape_quoted_text(name)}'").id + query=f"FIND Entity '{escape_squoted_text(rtname)}' " + f"with name='{escape_squoted_text(name)}'").id @staticmethod def replace_name_with_referenced_entity_id(prop: db.Property): diff --git a/src/caoscrawler/debug_tree.py b/src/caoscrawler/debug_tree.py index 9983981c69e3df7c58ddfda4b6977944eac54999..0d57040f5c20aca236a3c11531e8b7c45bad89ab 100644 --- a/src/caoscrawler/debug_tree.py +++ b/src/caoscrawler/debug_tree.py @@ -45,13 +45,13 @@ from importlib_resources import files from jsonschema import validate from typing import Any, Optional, Type, Union -import caosdb as db +import linkahead as db from caosadvancedtools.cache import UpdateCache, Cache from caosadvancedtools.crawler import Crawler as OldCrawler -from caosdb.apiutils import (compare_entities, EntityMergeConflictError, - merge_entities) -from caosdb.common.datatype import is_reference +from linkahead.apiutils import (compare_entities, EntityMergeConflictError, + merge_entities) +from linkahead.common.datatype import is_reference from .converters import Converter, DirectoryConverter, ConverterValidationError diff --git a/src/caoscrawler/identifiable_adapters.py b/src/caoscrawler/identifiable_adapters.py index 15915ee8495e5ea19837545d6fcf836a75b094d7..dd8c032041a74fa05b16d93abb06186e7e6fa569 100644 --- a/src/caoscrawler/identifiable_adapters.py +++ b/src/caoscrawler/identifiable_adapters.py @@ -29,11 +29,13 @@ import logging import warnings from abc import ABCMeta, abstractmethod from datetime import datetime +from functools import lru_cache from typing import Any -import caosdb as db +import linkahead as db import yaml -from caosdb.cached import cached_get_entity_by +from linkahead.cached import cached_get_entity_by, cached_query +from linkahead.utils.escape import escape_squoted_text from .identifiable import Identifiable from .utils import has_parent @@ -44,21 +46,24 @@ logger = logging.getLogger(__name__) def get_children_of_rt(rtname): """Supply the name of a recordtype. This name and the name of all children RTs are returned in a list""" - return [p.name for p in db.execute_query(f"FIND RECORDTYPE {rtname}")] + escaped = escape_squoted_text(rtname) + return [p.name for p in cached_query(f"FIND RECORDTYPE '{escaped}'")] -def convert_value(value: Any): - """ Returns a string representation of the value that is suitable - to be used in the query - looking for the identified record. +def convert_value(value: Any) -> str: + """ Return a string representation of the value suitable for the search query. + + This is for search queries looking for the identified record. Parameters ---------- - value : Any type, the value that shall be returned and potentially converted. + value: Any + The value to be converted. Returns ------- - out : the string reprensentation of the value + out: str + the string reprensentation of the value. """ @@ -69,8 +74,7 @@ def convert_value(value: Any): elif isinstance(value, bool): return str(value).upper() elif isinstance(value, str): - # replace single quotes, otherwise they may break the queries - return value.replace("\'", "\\'") + return escape_squoted_text(value) else: return str(value) @@ -96,7 +100,7 @@ General question to clarify: The list of referenced by statements is currently not implemented. -The IdentifiableAdapter can be used to retrieve the three above mentioned objects (registred +The IdentifiableAdapter can be used to retrieve the three above mentioned objects (registered identifiabel, identifiable and identified record) for a Record. """ @@ -113,7 +117,8 @@ identifiabel, identifiable and identified record) for a Record. query_string = "FIND RECORD " if ident.record_type is not None: - query_string += f"'{ident.record_type}'" + escaped_rt = escape_squoted_text(ident.record_type) + query_string += f"'{escaped_rt}'" for ref in ident.backrefs: eid = ref if isinstance(ref, db.Entity): @@ -123,7 +128,7 @@ identifiabel, identifiable and identified record) for a Record. query_string += " WITH " if ident.name is not None: - query_string += "name='{}'".format(convert_value(ident.name)) + query_string += "name='{}'".format(escape_squoted_text(ident.name)) if len(ident.properties) > 0: query_string += " AND " @@ -139,10 +144,10 @@ identifiabel, identifiable and identified record) for a Record. query_string = "" for pname, pvalue in entity.properties.items(): if pvalue is None: - query_string += "'" + pname + "' IS NULL AND " + query_string += "'" + escape_squoted_text(pname) + "' IS NULL AND " elif isinstance(pvalue, list): for v in pvalue: - query_string += ("'" + pname + "'='" + + query_string += ("'" + escape_squoted_text(pname) + "'='" + convert_value(v) + "' AND ") # TODO: (for review) @@ -156,7 +161,7 @@ identifiabel, identifiable and identified record) for a Record. # IdentifiableAdapter.create_property_query(p.value) + # ") AND ") else: - query_string += ("'" + pname + "'='" + + query_string += ("'" + escape_squoted_text(pname) + "'='" + convert_value(pvalue) + "' AND ") # remove the last AND return query_string[:-4] @@ -175,6 +180,7 @@ identifiabel, identifiable and identified record) for a Record. @abstractmethod def get_file(self, identifiable: db.File): + warnings.warn(DeprecationWarning("This function is deprecated. Please do not use it.")) """ Retrieve the file object for a (File) identifiable. """ @@ -182,7 +188,7 @@ identifiabel, identifiable and identified record) for a Record. def get_identifiable(self, record: db.Record, referencing_entities=None): """ - retrieve the registred identifiable and fill the property values to create an + retrieve the registered identifiable and fill the property values to create an identifiable Args: @@ -216,7 +222,7 @@ identifiabel, identifiable and identified record) for a Record. # case A: in the registered identifiable # case B: in the identifiable - # TODO: similar to the Identifiable class, Registred Identifiable should be a + # TODO: similar to the Identifiable class, Registered Identifiable should be a # separate class too if prop.name.lower() == "is_referenced_by": for givenrt in prop.value: @@ -264,7 +270,7 @@ identifiabel, identifiable and identified record) for a Record. "Multi properties used in identifiables could cause unpredictable results and " "are not allowed. You might want to consider a Property with a list as value.") - # use the RecordType of the registred Identifiable if it exists + # use the RecordType of the registered Identifiable if it exists # We do not use parents of Record because it might have multiple try: return Identifiable( @@ -318,7 +324,7 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter): def __init__(self): warnings.warn(DeprecationWarning( - "This class is depricated. Please use the CaosDBIdentifiableAdapter.")) + "This class is deprecated. Please use the CaosDBIdentifiableAdapter.")) self._registered_identifiables = dict() self._records = [] @@ -333,6 +339,7 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter): Just look in records for a file with the same path. """ candidates = [] + warnings.warn(DeprecationWarning("This function is deprecated. Please do not use it.")) for record in self._records: if record.role == "File" and record.path == identifiable.path: candidates.append(record) @@ -480,14 +487,14 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter): self._registered_identifiables[name] = definition def get_file(self, identifiable: Identifiable): + warnings.warn(DeprecationWarning("This function is deprecated. Please do not use it.")) # TODO is this needed for Identifiable? # or can we get rid of this function? if isinstance(identifiable, db.Entity): return cached_get_entity_by(path=identifiable) if identifiable.path is None: raise RuntimeError("Path must not be None for File retrieval.") - candidates = db.execute_query("FIND File which is stored at '{}'".format( - identifiable.path)) + candidates = cached_get_entity_by(path=identifiable.path) if len(candidates) > 1: raise RuntimeError("Identifiable was not defined unambigiously.") if len(candidates) == 0: @@ -496,7 +503,7 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter): def get_registered_identifiable(self, record: db.Record): """ - returns the registred identifiable for the given Record + returns the registered identifiable for the given Record It is assumed, that there is exactly one identifiable for each RecordType. Only the first parent of the given Record is considered; others are ignored @@ -520,7 +527,7 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter): def retrieve_identified_record_for_identifiable(self, identifiable: Identifiable): query_string = self.create_query_for_identifiable(identifiable) - candidates = db.execute_query(query_string) + candidates = cached_query(query_string) if len(candidates) > 1: raise RuntimeError( f"Identifiable was not defined unambigiously.\n{query_string}\nReturned the "