Skip to content
Snippets Groups Projects
Commit 993d8608 authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

STY: removed unused imports and applied black style

parent 7d9c2bad
No related branches found
No related tags found
2 merge requests!178FIX: #96 Better error output for crawl.py script.,!167Sync Graph
Pipeline #51533 passed with warnings
...@@ -29,7 +29,6 @@ import logging ...@@ -29,7 +29,6 @@ import logging
import warnings import warnings
from abc import ABCMeta, abstractmethod from abc import ABCMeta, abstractmethod
from datetime import datetime from datetime import datetime
from functools import lru_cache
from typing import Any from typing import Any
import linkahead as db import linkahead as db
...@@ -37,8 +36,7 @@ import yaml ...@@ -37,8 +36,7 @@ import yaml
from linkahead.cached import cached_get_entity_by, cached_query from linkahead.cached import cached_get_entity_by, cached_query
from linkahead.utils.escape import escape_squoted_text from linkahead.utils.escape import escape_squoted_text
from .exceptions import (MissingIdentifyingProperty, from .exceptions import MissingIdentifyingProperty, MissingReferencingEntityError
MissingReferencingEntityError)
from .identifiable import Identifiable from .identifiable import Identifiable
from .sync_node import SyncNode from .sync_node import SyncNode
from .utils import has_parent from .utils import has_parent
...@@ -54,7 +52,7 @@ def get_children_of_rt(rtname): ...@@ -54,7 +52,7 @@ def get_children_of_rt(rtname):
def convert_value(value: Any) -> str: def convert_value(value: Any) -> str:
""" Return a string representation of the value suitable for the search query. """Return a string representation of the value suitable for the search query.
This is for search queries looking for the identified record. This is for search queries looking for the identified record.
...@@ -85,27 +83,27 @@ def convert_value(value: Any) -> str: ...@@ -85,27 +83,27 @@ def convert_value(value: Any) -> str:
class IdentifiableAdapter(metaclass=ABCMeta): class IdentifiableAdapter(metaclass=ABCMeta):
"""Base class for identifiable adapters. """Base class for identifiable adapters.
Some terms: Some terms:
- A *registered identifiable* defines an identifiable template, for example by specifying: - A *registered identifiable* defines an identifiable template, for example by specifying:
- Parent record types - Parent record types
- Properties - Properties
- ``is_referenced_by`` statements - ``is_referenced_by`` statements
- An *identifiable* belongs to a concrete record. It consists of identifying attributes which "fill - An *identifiable* belongs to a concrete record. It consists of identifying attributes which "fill
in" the *registered identifiable*. In code, it can be represented as a Record based on the in" the *registered identifiable*. In code, it can be represented as a Record based on the
*registered identifiable* with all the values filled in. *registered identifiable* with all the values filled in.
- An *identified record* is the result of retrieving a record from the database, based on the - An *identified record* is the result of retrieving a record from the database, based on the
*identifiable* (and its values). *identifiable* (and its values).
General question to clarify: General question to clarify:
- Do we want to support multiple identifiables per RecordType? - Do we want to support multiple identifiables per RecordType?
- Current implementation supports only one identifiable per RecordType. - Current implementation supports only one identifiable per RecordType.
The list of referenced by statements is currently not implemented. The list of referenced by statements is currently not implemented.
The IdentifiableAdapter can be used to retrieve the three above mentioned objects (registered The IdentifiableAdapter can be used to retrieve the three above mentioned objects (registered
identifiabel, identifiable and identified record) for a Record. identifiabel, identifiable and identified record) for a Record.
""" """
...@@ -130,7 +128,7 @@ identifiabel, identifiable and identified record) for a Record. ...@@ -130,7 +128,7 @@ identifiabel, identifiable and identified record) for a Record.
eid = ref eid = ref
if isinstance(ref, db.Entity): if isinstance(ref, db.Entity):
eid = ref.id eid = ref.id
query_string += (" WHICH IS REFERENCED BY " + str(eid) + " AND") query_string += " WHICH IS REFERENCED BY " + str(eid) + " AND"
query_string += " WITH " query_string += " WITH "
...@@ -139,16 +137,20 @@ identifiabel, identifiable and identified record) for a Record. ...@@ -139,16 +137,20 @@ identifiabel, identifiable and identified record) for a Record.
if len(ident.properties) > 0: if len(ident.properties) > 0:
query_string += " AND " query_string += " AND "
query_string += IdentifiableAdapter.create_property_query(ident, startswith=startswith) query_string += IdentifiableAdapter.create_property_query(
ident, startswith=startswith
)
# TODO Can these cases happen at all with the current code? # TODO Can these cases happen at all with the current code?
if query_string.endswith(" AND WITH "): if query_string.endswith(" AND WITH "):
query_string = query_string[:-len(" AND WITH ")] query_string = query_string[: -len(" AND WITH ")]
if query_string.endswith(" AND "): if query_string.endswith(" AND "):
query_string = query_string[:-len(" AND ")] query_string = query_string[: -len(" AND ")]
return query_string return query_string
def all_identifying_properties_exist(self, node: SyncNode, raise_exception: bool = True): def all_identifying_properties_exist(
self, node: SyncNode, raise_exception: bool = True
):
"""checks whether all identifying properties exist and raises an error if """checks whether all identifying properties exist and raises an error if
that's not the case. It furthermore raises an error if "name" is part of that's not the case. It furthermore raises an error if "name" is part of
the identifiable, but the node does not have a name. the identifiable, but the node does not have a name.
...@@ -172,7 +174,7 @@ identifiabel, identifiable and identified record) for a Record. ...@@ -172,7 +174,7 @@ identifiabel, identifiable and identified record) for a Record.
if prop.name.lower() == "name": if prop.name.lower() == "name":
if node.name is None: if node.name is None:
if raise_exception: if raise_exception:
i = MissingIdentifyingProperty(f"The node has no name.") i = MissingIdentifyingProperty("The node has no name.")
i.prop = "name" i.prop = "name"
raise i raise i
else: else:
...@@ -182,9 +184,20 @@ identifiabel, identifiable and identified record) for a Record. ...@@ -182,9 +184,20 @@ identifiabel, identifiable and identified record) for a Record.
# multiple occurances are ok here. We deal with that when actually creating an # multiple occurances are ok here. We deal with that when actually creating an
# identifiable (IDs of referenced Entities might need to get resolved first). # identifiable (IDs of referenced Entities might need to get resolved first).
if (len([el for el in node.properties if el.name.lower() == prop.name.lower()]) == 0): if (
len(
[
el
for el in node.properties
if el.name.lower() == prop.name.lower()
]
)
== 0
):
if raise_exception: if raise_exception:
i = MissingIdentifyingProperty(f"The property {prop.name} is missing.") i = MissingIdentifyingProperty(
f"The property {prop.name} is missing."
)
i.prop = prop.name i.prop = prop.name
raise i raise i
else: else:
...@@ -196,9 +209,9 @@ identifiabel, identifiable and identified record) for a Record. ...@@ -196,9 +209,9 @@ identifiabel, identifiable and identified record) for a Record.
def __create_pov_snippet(pname: str, pvalue, startswith: bool = False): def __create_pov_snippet(pname: str, pvalue, startswith: bool = False):
"""Return something like ``'name'='some value'`` or ``'name' LIKE 'some*'``. """Return something like ``'name'='some value'`` or ``'name' LIKE 'some*'``.
If ``startswith`` is True, the value of strings will be cut off at 200 characters and a ``LIKE`` If ``startswith`` is True, the value of strings will be cut off at 200 characters and a ``LIKE``
operator will be used to find entities matching at the beginning. operator will be used to find entities matching at the beginning.
""" """
if startswith and isinstance(pvalue, str) and len(pvalue) > 200: if startswith and isinstance(pvalue, str) and len(pvalue) > 200:
operator_value_str = f" LIKE '{escape_squoted_text(pvalue[:200])}*'" operator_value_str = f" LIKE '{escape_squoted_text(pvalue[:200])}*'"
else: else:
...@@ -210,14 +223,14 @@ operator will be used to find entities matching at the beginning. ...@@ -210,14 +223,14 @@ operator will be used to find entities matching at the beginning.
def create_property_query(entity: Identifiable, startswith: bool = False): def create_property_query(entity: Identifiable, startswith: bool = False):
"""Create a POV query part with the entity's properties. """Create a POV query part with the entity's properties.
Parameters Parameters
---------- ----------
entity: Identifiable entity: Identifiable
The Identifiable whose properties shall be used. The Identifiable whose properties shall be used.
startswith: bool, optional startswith: bool, optional
If True, check string typed properties against the first 200 characters only. Default is False. If True, check string typed properties against the first 200 characters only. Default is False.
""" """
query_string = "" query_string = ""
pov = IdentifiableAdapter.__create_pov_snippet # Shortcut pov = IdentifiableAdapter.__create_pov_snippet # Shortcut
...@@ -257,7 +270,9 @@ startswith: bool, optional ...@@ -257,7 +270,9 @@ startswith: bool, optional
@abstractmethod @abstractmethod
def get_file(self, identifiable: db.File): def get_file(self, identifiable: db.File):
warnings.warn(DeprecationWarning("This function is deprecated. Please do not use it.")) warnings.warn(
DeprecationWarning("This function is deprecated. Please do not use it.")
)
""" """
Retrieve the file object for a (File) identifiable. Retrieve the file object for a (File) identifiable.
""" """
...@@ -326,9 +341,7 @@ startswith: bool, optional ...@@ -326,9 +341,7 @@ startswith: bool, optional
" entities." " entities."
) )
elif len([e.id for e in identifiable_backrefs if el.id is None]) > 0: elif len([e.id for e in identifiable_backrefs if el.id is None]) > 0:
raise RuntimeError( raise RuntimeError("Referencing entity has no id")
f"Referencing entity has no id"
)
continue continue
options = [p.value for p in se.properties if p.name == prop.name] options = [p.value for p in se.properties if p.name == prop.name]
...@@ -341,8 +354,10 @@ startswith: bool, optional ...@@ -341,8 +354,10 @@ startswith: bool, optional
if isinstance(el, SyncNode): if isinstance(el, SyncNode):
options[ii] = el.id options[ii] = el.id
if el.id is None: if el.id is None:
raise RuntimeError("Reference to unchecked in identifiable:\n" raise RuntimeError(
f"{prop.name}:\n{el}") "Reference to unchecked in identifiable:\n"
f"{prop.name}:\n{el}"
)
else: else:
options[ii] = el options[ii] = el
if not all([f == options[0] for f in options]): if not all([f == options[0] for f in options]):
...@@ -354,11 +369,13 @@ startswith: bool, optional ...@@ -354,11 +369,13 @@ startswith: bool, optional
# check for multi properties in the record: # check for multi properties in the record:
for prop in property_name_list_A: for prop in property_name_list_A:
property_name_list_B.append(prop) property_name_list_B.append(prop)
if (len(set(property_name_list_B)) != len(property_name_list_B) or len( if len(set(property_name_list_B)) != len(property_name_list_B) or len(
set(property_name_list_A)) != len(property_name_list_A)): set(property_name_list_A)
) != len(property_name_list_A):
raise RuntimeError( raise RuntimeError(
"Multi properties used in identifiables could cause unpredictable results and " "Multi properties used in identifiables could cause unpredictable results and "
"are not allowed. You might want to consider a Property with a list as value.") "are not allowed. You might want to consider a Property with a list as value."
)
# use the RecordType of the registered Identifiable if it exists # use the RecordType of the registered Identifiable if it exists
# We do not use parents of Record because it might have multiple # We do not use parents of Record because it might have multiple
...@@ -368,7 +385,7 @@ startswith: bool, optional ...@@ -368,7 +385,7 @@ startswith: bool, optional
record_type=se.registered_identifiable.parents[0].name, record_type=se.registered_identifiable.parents[0].name,
name=name, name=name,
properties=identifiable_props, properties=identifiable_props,
backrefs=[e.id for e in identifiable_backrefs] backrefs=[e.id for e in identifiable_backrefs],
) )
except Exception as exc: except Exception as exc:
logger.error(exc) logger.error(exc)
...@@ -387,7 +404,9 @@ startswith: bool, optional ...@@ -387,7 +404,9 @@ startswith: bool, optional
""" """
pass pass
def retrieve_identified_record_for_record(self, record: db.Record, referencing_entities=None): def retrieve_identified_record_for_record(
self, record: db.Record, referencing_entities=None
):
""" """
This function combines all functionality of the IdentifierAdapter by This function combines all functionality of the IdentifierAdapter by
returning the identifiable after having checked for an appropriate returning the identifiable after having checked for an appropriate
...@@ -401,13 +420,15 @@ startswith: bool, optional ...@@ -401,13 +420,15 @@ startswith: bool, optional
if record.id is not None: if record.id is not None:
return cached_get_entity_by(eid=record.id) return cached_get_entity_by(eid=record.id)
identifiable = self.get_identifiable(record, referencing_entities=referencing_entities) identifiable = self.get_identifiable(
record, referencing_entities=referencing_entities
)
return self.retrieve_identified_record_for_identifiable(identifiable) return self.retrieve_identified_record_for_identifiable(identifiable)
@staticmethod @staticmethod
def referencing_entity_has_appropriate_type(parents, register_identifiable): def referencing_entity_has_appropriate_type(parents, register_identifiable):
""" returns true if one of the parents is listed by the 'is_referenced_by' property """returns true if one of the parents is listed by the 'is_referenced_by' property
This function also returns True if 'is_referenced_by' contains the wildcard '*'. This function also returns True if 'is_referenced_by' contains the wildcard '*'.
""" """
...@@ -415,8 +436,10 @@ startswith: bool, optional ...@@ -415,8 +436,10 @@ startswith: bool, optional
return False return False
if register_identifiable.get_property("is_referenced_by").value is None: if register_identifiable.get_property("is_referenced_by").value is None:
return False return False
appropriate_types = [el.lower() for el in appropriate_types = [
register_identifiable.get_property("is_referenced_by").value] el.lower()
for el in register_identifiable.get_property("is_referenced_by").value
]
if "*" in appropriate_types: if "*" in appropriate_types:
return True return True
for parent in parents: for parent in parents:
...@@ -431,8 +454,11 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter): ...@@ -431,8 +454,11 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
""" """
def __init__(self): def __init__(self):
warnings.warn(DeprecationWarning( warnings.warn(
"This class is deprecated. Please use the CaosDBIdentifiableAdapter.")) DeprecationWarning(
"This class is deprecated. Please use the CaosDBIdentifiableAdapter."
)
)
self._registered_identifiables = dict() self._registered_identifiables = dict()
self._records = [] self._records = []
...@@ -447,7 +473,9 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter): ...@@ -447,7 +473,9 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
Just look in records for a file with the same path. Just look in records for a file with the same path.
""" """
candidates = [] candidates = []
warnings.warn(DeprecationWarning("This function is deprecated. Please do not use it.")) warnings.warn(
DeprecationWarning("This function is deprecated. Please do not use it.")
)
for record in self._records: for record in self._records:
if record.role == "File" and record.path == identifiable.path: if record.role == "File" and record.path == identifiable.path:
candidates.append(record) candidates.append(record)
...@@ -459,15 +487,18 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter): ...@@ -459,15 +487,18 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
def store_state(self, filename): def store_state(self, filename):
with open(filename, "w") as f: with open(filename, "w") as f:
f.write(db.common.utils.xml2str( f.write(
db.Container().extend(self._records).to_xml())) db.common.utils.xml2str(db.Container().extend(self._records).to_xml())
)
def restore_state(self, filename): def restore_state(self, filename):
with open(filename, "r") as f: with open(filename, "r") as f:
self._records = db.Container().from_xml(f.read()) self._records = db.Container().from_xml(f.read())
# TODO: move to super class? # TODO: move to super class?
def is_identifiable_for_record(self, registered_identifiable: db.RecordType, record: db.Record): def is_identifiable_for_record(
self, registered_identifiable: db.RecordType, record: db.Record
):
""" """
Check whether this registered_identifiable is an identifiable for the record. Check whether this registered_identifiable is an identifiable for the record.
...@@ -478,8 +509,7 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter): ...@@ -478,8 +509,7 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
Return True in that case and False otherwise. Return True in that case and False otherwise.
""" """
if len(registered_identifiable.parents) != 1: if len(registered_identifiable.parents) != 1:
raise RuntimeError( raise RuntimeError("Multiple parents for identifiables not supported.")
"Multiple parents for identifiables not supported.")
if not has_parent(record, registered_identifiable.parents[0].name): if not has_parent(record, registered_identifiable.parents[0].name):
return False return False
...@@ -495,8 +525,7 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter): ...@@ -495,8 +525,7 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
if self.is_identifiable_for_record(definition, record): if self.is_identifiable_for_record(definition, record):
identifiable_candidates.append(definition) identifiable_candidates.append(definition)
if len(identifiable_candidates) > 1: if len(identifiable_candidates) > 1:
raise RuntimeError( raise RuntimeError("Multiple candidates for an identifiable found.")
"Multiple candidates for an identifiable found.")
if len(identifiable_candidates) == 0: if len(identifiable_candidates) == 0:
return None return None
return identifiable_candidates[0] return identifiable_candidates[0]
...@@ -511,8 +540,9 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter): ...@@ -511,8 +540,9 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
record is the record from the local database to check against. record is the record from the local database to check against.
identifiable is the record that was created during the crawler run. identifiable is the record that was created during the crawler run.
""" """
if (identifiable.record_type is not None if identifiable.record_type is not None and not has_parent(
and not has_parent(record, identifiable.record_type)): record, identifiable.record_type
):
return False return False
for propname, propvalue in identifiable.properties.items(): for propname, propvalue in identifiable.properties.items():
prop_record = record.get_property(propname) prop_record = record.get_property(propname)
...@@ -541,21 +571,26 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter): ...@@ -541,21 +571,26 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
candidates.append(record) candidates.append(record)
if len(candidates) > 1: if len(candidates) > 1:
raise RuntimeError( raise RuntimeError(
f"Identifiable was not defined unambigiously. Possible candidates are {candidates}") f"Identifiable was not defined unambigiously. Possible candidates are {candidates}"
)
if len(candidates) == 0: if len(candidates) == 0:
return None return None
return candidates[0] return candidates[0]
def resolve_reference(self, value: db.Record): def resolve_reference(self, value: db.Record):
if self.get_registered_identifiable(value) is None: if self.get_registered_identifiable(value) is None:
raise NotImplementedError("Non-identifiable references cannot" raise NotImplementedError(
" be used as properties in identifiables.") "Non-identifiable references cannot"
" be used as properties in identifiables."
)
# TODO: just resolve the entity # TODO: just resolve the entity
value_identifiable = self.retrieve_identified_record_for_record(value) value_identifiable = self.retrieve_identified_record_for_record(value)
if value_identifiable is None: if value_identifiable is None:
raise RuntimeError("The identifiable which is used as property" raise RuntimeError(
" here has to be inserted first.") "The identifiable which is used as property"
" here has to be inserted first."
)
if value_identifiable.id is None: if value_identifiable.id is None:
raise RuntimeError("The entity has not been assigned an ID.") raise RuntimeError("The entity has not been assigned an ID.")
...@@ -575,7 +610,7 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter): ...@@ -575,7 +610,7 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter):
def load_from_yaml_definition(self, path: str): def load_from_yaml_definition(self, path: str):
"""Load identifiables defined in a yaml file""" """Load identifiables defined in a yaml file"""
with open(path, 'r', encoding="utf-8") as yaml_f: with open(path, "r", encoding="utf-8") as yaml_f:
identifiable_data = yaml.safe_load(yaml_f) identifiable_data = yaml.safe_load(yaml_f)
for key, value in identifiable_data.items(): for key, value in identifiable_data.items():
...@@ -595,7 +630,9 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter): ...@@ -595,7 +630,9 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter):
self._registered_identifiables[name] = definition self._registered_identifiables[name] = definition
def get_file(self, identifiable: Identifiable): def get_file(self, identifiable: Identifiable):
warnings.warn(DeprecationWarning("This function is deprecated. Please do not use it.")) warnings.warn(
DeprecationWarning("This function is deprecated. Please do not use it.")
)
# TODO is this needed for Identifiable? # TODO is this needed for Identifiable?
# or can we get rid of this function? # or can we get rid of this function?
if isinstance(identifiable, db.Entity): if isinstance(identifiable, db.Entity):
...@@ -637,9 +674,13 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter): ...@@ -637,9 +674,13 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter):
query_string = self.create_query_for_identifiable(identifiable) query_string = self.create_query_for_identifiable(identifiable)
try: try:
candidates = cached_query(query_string) candidates = cached_query(query_string)
except db.exceptions.HTTPServerError as err: except db.exceptions.HTTPServerError:
query_string = self.create_query_for_identifiable(identifiable, startswith=True) query_string = self.create_query_for_identifiable(
candidates = cached_query(query_string).copy() # Copy against cache poisoning identifiable, startswith=True
)
candidates = cached_query(
query_string
).copy() # Copy against cache poisoning
# Test if the candidates really match all properties # Test if the candidates really match all properties
for pname, pvalue in identifiable.properties.items(): for pname, pvalue in identifiable.properties.items():
...@@ -658,7 +699,8 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter): ...@@ -658,7 +699,8 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter):
raise RuntimeError( raise RuntimeError(
f"Identifiable was not defined unambiguously.\n{query_string}\nReturned the " f"Identifiable was not defined unambiguously.\n{query_string}\nReturned the "
f"following {candidates}." f"following {candidates}."
f"Identifiable:\n{identifiable.record_type}{identifiable.properties}") f"Identifiable:\n{identifiable.record_type}{identifiable.properties}"
)
if len(candidates) == 0: if len(candidates) == 0:
return None return None
return candidates[0] return candidates[0]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment