Skip to content
Snippets Groups Projects
Commit c874edf5 authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

STY: applied black style and removed unused imports

parent 4ac7d9a7
No related branches found
No related tags found
2 merge requests!178FIX: #96 Better error output for crawl.py script.,!167Sync Graph
...@@ -27,24 +27,25 @@ crawler. ...@@ -27,24 +27,25 @@ crawler.
from __future__ import annotations from __future__ import annotations
import logging import logging
from typing import Any, Optional, Union, Callable from typing import Any, Optional, Union, Callable
import linkahead as db import linkahead as db
from linkahead.apiutils import (EntityMergeConflictError, compare_entities, from linkahead.cached import cached_get_entity_by
merge_entities)
from linkahead.cached import cache_clear, cached_get_entity_by
from linkahead.exceptions import EmptyUniqueQueryError from linkahead.exceptions import EmptyUniqueQueryError
from .exceptions import ImpossibleMergeError, MissingReferencingEntityError
from .identifiable_adapters import IdentifiableAdapter from .identifiable_adapters import IdentifiableAdapter
from .identifiable import Identifiable from .identifiable import Identifiable
from .sync_node import SyncNode, TempID from .sync_node import SyncNode, TempID
import re
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def _set_each_scalar_value(node: SyncNode, condition: Callable[[Any], bool], value: Any): def _set_each_scalar_value(
""" helper function that conditionally replaces each value element of each property of a node node: SyncNode, condition: Callable[[Any], bool], value: Any
):
"""helper function that conditionally replaces each value element of each property of a node
If the property value is a list, the replacement is done for each list entry. If the property value is a list, the replacement is done for each list entry.
The replacement is only performed if the condition that The replacement is only performed if the condition that
...@@ -70,7 +71,7 @@ def _set_each_scalar_value(node: SyncNode, condition: Callable[[Any], bool], val ...@@ -70,7 +71,7 @@ def _set_each_scalar_value(node: SyncNode, condition: Callable[[Any], bool], val
p.value = value(p.value) p.value = value(p.value)
class SyncGraph(): class SyncGraph:
""" """
A data model class for the graph of entities that shall be created during synchronization of A data model class for the graph of entities that shall be created during synchronization of
the crawler. the crawler.
...@@ -133,7 +134,9 @@ class SyncGraph(): ...@@ -133,7 +134,9 @@ class SyncGraph():
# Note, that when ever one node is changed, we check all dependend nodes (see usage of # Note, that when ever one node is changed, we check all dependend nodes (see usage of
# `_get_nodes_whose_identity_relies_on`) whether something should be updated. Thus, we cannot # `_get_nodes_whose_identity_relies_on`) whether something should be updated. Thus, we cannot
# miss a necessary update. # miss a necessary update.
def __init__(self, entities: list[db.Entity], identifiableAdapter: IdentifiableAdapter): def __init__(
self, entities: list[db.Entity], identifiableAdapter: IdentifiableAdapter
):
self.identifiableAdapter = identifiableAdapter self.identifiableAdapter = identifiableAdapter
# A dictionary allowing for quick lookup of sync nodes using their (possibly negative) IDs. # A dictionary allowing for quick lookup of sync nodes using their (possibly negative) IDs.
# This dictionary is initially set using _mark_entities_with_path_or_id and later updated # This dictionary is initially set using _mark_entities_with_path_or_id and later updated
...@@ -192,8 +195,10 @@ class SyncGraph(): ...@@ -192,8 +195,10 @@ class SyncGraph():
Last review by Alexander Schlemmer on 2024-05-24. Last review by Alexander Schlemmer on 2024-05-24.
""" """
if node.id is not None: if node.id is not None:
raise RuntimeError('Cannot update ID.\n' raise RuntimeError(
f'It already is {node.id} and shall be set to {node_id}.') "Cannot update ID.\n"
f"It already is {node.id} and shall be set to {node_id}."
)
if node_id is None: if node_id is None:
node_id = TempID(self._get_new_id()) node_id = TempID(self._get_new_id())
node.id = node_id node.id = node_id
...@@ -207,7 +212,7 @@ class SyncGraph(): ...@@ -207,7 +212,7 @@ class SyncGraph():
self._mark_existing(node) self._mark_existing(node)
def export_record_lists(self): def export_record_lists(self):
""" exports the SyncGraph in form of db.Entities """exports the SyncGraph in form of db.Entities
All nodes are converted to db.Entity objects and reference values that are SyncNodes are All nodes are converted to db.Entity objects and reference values that are SyncNodes are
replaced by their corresponding (newly created) db.Entity objects. replaced by their corresponding (newly created) db.Entity objects.
...@@ -233,9 +238,11 @@ class SyncGraph(): ...@@ -233,9 +238,11 @@ class SyncGraph():
node_map[id(el)] = entities[-1] node_map[id(el)] = entities[-1]
for ent in entities: for ent in entities:
_set_each_scalar_value(ent, _set_each_scalar_value(
condition=lambda val: isinstance(val, SyncNode), ent,
value=lambda val: node_map[id(val)]) condition=lambda val: isinstance(val, SyncNode),
value=lambda val: node_map[id(val)],
)
missing = [el for el in entities if el.id < 0] missing = [el for el in entities if el.id < 0]
existing = [el for el in entities if el.id > 0] existing = [el for el in entities if el.id > 0]
...@@ -253,10 +260,16 @@ class SyncGraph(): ...@@ -253,10 +260,16 @@ class SyncGraph():
Last review by Alexander Schlemmer on 2024-05-27. Last review by Alexander Schlemmer on 2024-05-27.
""" """
return any([id(ent) not in self._missing and id(ent) not in self._existing return any(
for ent in self.forward_references_id_props[id(node)]] [
+ [id(ent) not in self._missing and id(ent) not in self._existing id(ent) not in self._missing and id(ent) not in self._existing
for ent in self.backward_references_backref[id(node)]]) for ent in self.forward_references_id_props[id(node)]
]
+ [
id(ent) not in self._missing and id(ent) not in self._existing
for ent in self.backward_references_backref[id(node)]
]
)
def unchecked_contains_circular_dependency(self): def unchecked_contains_circular_dependency(self):
""" """
...@@ -309,23 +322,28 @@ class SyncGraph(): ...@@ -309,23 +322,28 @@ class SyncGraph():
candidate = self._path_look_up[entity.path] candidate = self._path_look_up[entity.path]
if candidate is not entity: if candidate is not entity:
return candidate return candidate
if (entity.identifiable is not None and entity.identifiable.get_representation() in if (
self._identifiable_look_up): entity.identifiable is not None
candidate = self._identifiable_look_up[entity.identifiable.get_representation()] and entity.identifiable.get_representation() in self._identifiable_look_up
):
candidate = self._identifiable_look_up[
entity.identifiable.get_representation()
]
if candidate is not entity: if candidate is not entity:
return candidate return candidate
return None return None
def _get_new_id(self): def _get_new_id(self):
""" returns the next unused temporary ID """returns the next unused temporary ID
Last review by Alexander Schlemmer on 2024-05-24. Last review by Alexander Schlemmer on 2024-05-24.
""" """
self._remote_missing_counter -= 1 self._remote_missing_counter -= 1
return self._remote_missing_counter return self._remote_missing_counter
def _set_identifiable_of_node(self, node: SyncNode, def _set_identifiable_of_node(
identifiable: Optional[Identifiable] = None): self, node: SyncNode, identifiable: Optional[Identifiable] = None
):
"""sets the identifiable and checks whether an equivalent node can be found with that new """sets the identifiable and checks whether an equivalent node can be found with that new
information. If an equivalent node is found, 'node' is merged into that node. information. If an equivalent node is found, 'node' is merged into that node.
...@@ -333,12 +351,14 @@ class SyncGraph(): ...@@ -333,12 +351,14 @@ class SyncGraph():
Raises a ValueError if the equivalent node found does not have an identifiable. Raises a ValueError if the equivalent node found does not have an identifiable.
Raises a RuntimeError if there is no equivalent node found and Raises a RuntimeError if there is no equivalent node found and
the (unique) string representation of the identifiable of node is already contained in the identifiable_look_up. the (unique) string representation of the identifiable of node is already contained in
the identifiable_look_up.
""" """
if identifiable is None: if identifiable is None:
self.identifiableAdapter.all_identifying_properties_exist(node) self.identifiableAdapter.all_identifying_properties_exist(node)
identifiable = self.identifiableAdapter.get_identifiable( identifiable = self.identifiableAdapter.get_identifiable(
node, self.backward_references_backref[id(node)]) node, self.backward_references_backref[id(node)]
)
node.identifiable = identifiable node.identifiable = identifiable
equivalent_se = self.get_equivalent(node) equivalent_se = self.get_equivalent(node)
if equivalent_se is not None and equivalent_se is not node: if equivalent_se is not None and equivalent_se is not node:
...@@ -361,9 +381,13 @@ class SyncGraph(): ...@@ -361,9 +381,13 @@ class SyncGraph():
if ent.role == "Record" and len(ent.parents) == 0: if ent.role == "Record" and len(ent.parents) == 0:
raise ValueError(f"Records must have a parent.\n{ent}") raise ValueError(f"Records must have a parent.\n{ent}")
if isinstance(ent.id, int) and ent.id < 0: if isinstance(ent.id, int) and ent.id < 0:
raise ValueError(f"Records must not have negative integers as IDs.\n{ent}") raise ValueError(
f"Records must not have negative integers as IDs.\n{ent}"
)
if isinstance(ent.id, str) and re.match(r"^-\d+$", ent.id): if isinstance(ent.id, str) and re.match(r"^-\d+$", ent.id):
raise ValueError(f"Records must not have negative integers as IDs.\n{ent}") raise ValueError(
f"Records must not have negative integers as IDs.\n{ent}"
)
def _get_nodes_whose_identity_relies_on(self, node: SyncNode): def _get_nodes_whose_identity_relies_on(self, node: SyncNode):
"""returns a set of nodes that reference the given node as identifying property or are """returns a set of nodes that reference the given node as identifying property or are
...@@ -372,11 +396,14 @@ class SyncGraph(): ...@@ -372,11 +396,14 @@ class SyncGraph():
Last review by Alexander Schlemmer on 2024-05-24. Last review by Alexander Schlemmer on 2024-05-24.
""" """
return (self.backward_references_id_props[id(node)].union( return self.backward_references_id_props[id(node)].union(
self.forward_references_backref[id(node)])) self.forward_references_backref[id(node)]
)
@staticmethod @staticmethod
def _create_flat_list(ent_list: list[db.Entity], flat: Optional[list[db.Entity]] = None): def _create_flat_list(
ent_list: list[db.Entity], flat: Optional[list[db.Entity]] = None
):
""" """
Recursively adds entities and all their properties contained in ent_list to Recursively adds entities and all their properties contained in ent_list to
the output list flat. the output list flat.
...@@ -446,25 +473,40 @@ class SyncGraph(): ...@@ -446,25 +473,40 @@ class SyncGraph():
if isinstance(v, SyncNode): if isinstance(v, SyncNode):
forward_references[id(node)].add(v) forward_references[id(node)].add(v)
backward_references[id(v)].add(node) backward_references[id(v)].add(node)
if (node.registered_identifiable is not None if (
and len([el.name node.registered_identifiable is not None
for el in node.registered_identifiable.properties if and len(
el.name == p.name]) > 0): [
el.name
for el in node.registered_identifiable.properties
if el.name == p.name
]
)
> 0
):
forward_references_id_props[id(node)].add(v) forward_references_id_props[id(node)].add(v)
backward_references_id_props[id(v)].add(node) backward_references_id_props[id(v)].add(node)
if (v.registered_identifiable is not None and if (
IdentifiableAdapter.referencing_entity_has_appropriate_type( v.registered_identifiable is not None
node.parents, v.registered_identifiable)): and IdentifiableAdapter.referencing_entity_has_appropriate_type(
node.parents, v.registered_identifiable
)
):
forward_references_backref[id(node)].add(v) forward_references_backref[id(node)].add(v)
backward_references_backref[id(v)].add(node) backward_references_backref[id(v)].add(node)
return (forward_references, backward_references, forward_references_id_props, return (
backward_references_id_props, forward_references_backref, backward_references_backref, forward_references,
) backward_references,
forward_references_id_props,
backward_references_id_props,
forward_references_backref,
backward_references_backref,
)
def _mark_entities_with_path_or_id(self): def _mark_entities_with_path_or_id(self):
""" A path or an ID is sufficiently identifying. Thus, those entities can be marked as """A path or an ID is sufficiently identifying. Thus, those entities can be marked as
checked """ checked"""
for node in list(self.nodes): for node in list(self.nodes):
if node.id is not None: if node.id is not None:
if self.get_equivalent(node) is not None: if self.get_equivalent(node) is not None:
...@@ -490,7 +532,7 @@ class SyncGraph(): ...@@ -490,7 +532,7 @@ class SyncGraph():
self.set_id_of_node(node, remote_id) self.set_id_of_node(node, remote_id)
def _merge_into(self, source: SyncNode, target: SyncNode): def _merge_into(self, source: SyncNode, target: SyncNode):
""" tries to merge source into target and performs the necessary updates: """tries to merge source into target and performs the necessary updates:
- update the membervariables of target using source (``target.update(source)``). - update the membervariables of target using source (``target.update(source)``).
- replaces reference values to source by target - replaces reference values to source by target
- updates the reference map - updates the reference map
...@@ -509,20 +551,22 @@ class SyncGraph(): ...@@ -509,20 +551,22 @@ class SyncGraph():
if self._id_look_up[source.id] != source: if self._id_look_up[source.id] != source:
raise ValueError( raise ValueError(
"It is assumed that always only one node exists with a certain ID and that " "It is assumed that always only one node exists with a certain ID and that "
"node is in the look up") "node is in the look up"
)
if target.path is None and source.path is not None: if target.path is None and source.path is not None:
if self._id_look_up[source.path] != source: if self._id_look_up[source.path] != source:
raise ValueError( raise ValueError(
"It is assumed that always only one node exists with a certain path and that" "It is assumed that always only one node exists with a certain path and that"
" node is in the look up") " node is in the look up"
)
target.update(source) target.update(source)
# replace actual reference property values # replace actual reference property values
for node in self.backward_references[id(source)]: for node in self.backward_references[id(source)]:
_set_each_scalar_value(node, _set_each_scalar_value(
condition=lambda val: val is source, node, condition=lambda val: val is source, value=lambda val: target
value=lambda val: target) )
# update reference mappings # update reference mappings
for node in self.forward_references.pop(id(source)): for node in self.forward_references.pop(id(source)):
...@@ -562,10 +606,13 @@ class SyncGraph(): ...@@ -562,10 +606,13 @@ class SyncGraph():
if target.path is not None: if target.path is not None:
self._path_look_up[target.path] = target self._path_look_up[target.path] = target
if target.identifiable is not None: if target.identifiable is not None:
self._identifiable_look_up[target.identifiable.get_representation()] = target self._identifiable_look_up[target.identifiable.get_representation()] = (
target
)
if ((id(source) in self._existing and id(target) in self._missing) if (id(source) in self._existing and id(target) in self._missing) or (
or (id(target) in self._existing and id(source) in self._missing)): id(target) in self._existing and id(source) in self._missing
):
raise RuntimeError("Trying to merge missing and existing") raise RuntimeError("Trying to merge missing and existing")
if id(source) in self._missing and id(target) not in self._missing: if id(source) in self._missing and id(target) not in self._missing:
...@@ -595,12 +642,16 @@ class SyncGraph(): ...@@ -595,12 +642,16 @@ class SyncGraph():
Last review by Alexander Schlemmer on 2024-05-24. Last review by Alexander Schlemmer on 2024-05-24.
""" """
return (node.identifiable is None and not self._identity_relies_on_unchecked_entity(node) return (
and self.identifiableAdapter.all_identifying_properties_exist( node.identifiable is None
node, raise_exception=False)) and not self._identity_relies_on_unchecked_entity(node)
and self.identifiableAdapter.all_identifying_properties_exist(
node, raise_exception=False
)
)
def _initialize_nodes(self, entities: list[db.Entity]): def _initialize_nodes(self, entities: list[db.Entity]):
""" create initial set of SyncNodes from provided Entity list""" """create initial set of SyncNodes from provided Entity list"""
self._sanity_check(entities) self._sanity_check(entities)
entities = self._create_flat_list(entities) entities = self._create_flat_list(entities)
se_lookup: dict[int, SyncNode] = {} # lookup: python id -> SyncNode se_lookup: dict[int, SyncNode] = {} # lookup: python id -> SyncNode
...@@ -608,19 +659,24 @@ class SyncGraph(): ...@@ -608,19 +659,24 @@ class SyncGraph():
# Create new sync nodes from the list of entities, their registered identifiables # Create new sync nodes from the list of entities, their registered identifiables
# are set from the identifiable adapter. # are set from the identifiable adapter.
for el in entities: for el in entities:
self.nodes.append(SyncNode( self.nodes.append(
el, SyncNode(el, self.identifiableAdapter.get_registered_identifiable(el))
self.identifiableAdapter.get_registered_identifiable(el))) )
se_lookup[id(el)] = self.nodes[-1] se_lookup[id(el)] = self.nodes[-1]
# replace db.Entity objects with SyncNodes in references: # replace db.Entity objects with SyncNodes in references:
for node in self.nodes: for node in self.nodes:
_set_each_scalar_value(node, _set_each_scalar_value(
condition=lambda val: id(val) in se_lookup, node,
value=lambda val: se_lookup[id(val)]) condition=lambda val: id(val) in se_lookup,
value=lambda val: se_lookup[id(val)],
)
def _add_identifiables_to_dependend_nodes(self, node): def _add_identifiables_to_dependend_nodes(self, node):
""" For each dependent node, we check whether this allows to create an identifiable """ """For each dependent node, we check whether this allows to create an identifiable
Last review by Alexander Schlemmer on 2024-05-29.
"""
for other_node in self._get_nodes_whose_identity_relies_on(node): for other_node in self._get_nodes_whose_identity_relies_on(node):
if self._identifiable_is_needed(other_node): if self._identifiable_is_needed(other_node):
self._set_identifiable_of_node(other_node) self._set_identifiable_of_node(other_node)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment