Skip to content
Snippets Groups Projects
Commit c874edf5 authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

STY: applied black style and removed unused imports

parent 4ac7d9a7
Branches
Tags
2 merge requests!178FIX: #96 Better error output for crawl.py script.,!167Sync Graph
......@@ -30,20 +30,21 @@ import logging
from typing import Any, Optional, Union, Callable
import linkahead as db
from linkahead.apiutils import (EntityMergeConflictError, compare_entities,
merge_entities)
from linkahead.cached import cache_clear, cached_get_entity_by
from linkahead.cached import cached_get_entity_by
from linkahead.exceptions import EmptyUniqueQueryError
from .exceptions import ImpossibleMergeError, MissingReferencingEntityError
from .identifiable_adapters import IdentifiableAdapter
from .identifiable import Identifiable
from .sync_node import SyncNode, TempID
import re
logger = logging.getLogger(__name__)
def _set_each_scalar_value(node: SyncNode, condition: Callable[[Any], bool], value: Any):
def _set_each_scalar_value(
node: SyncNode, condition: Callable[[Any], bool], value: Any
):
"""helper function that conditionally replaces each value element of each property of a node
If the property value is a list, the replacement is done for each list entry.
......@@ -70,7 +71,7 @@ def _set_each_scalar_value(node: SyncNode, condition: Callable[[Any], bool], val
p.value = value(p.value)
class SyncGraph():
class SyncGraph:
"""
A data model class for the graph of entities that shall be created during synchronization of
the crawler.
......@@ -133,7 +134,9 @@ class SyncGraph():
# Note, that when ever one node is changed, we check all dependend nodes (see usage of
# `_get_nodes_whose_identity_relies_on`) whether something should be updated. Thus, we cannot
# miss a necessary update.
def __init__(self, entities: list[db.Entity], identifiableAdapter: IdentifiableAdapter):
def __init__(
self, entities: list[db.Entity], identifiableAdapter: IdentifiableAdapter
):
self.identifiableAdapter = identifiableAdapter
# A dictionary allowing for quick lookup of sync nodes using their (possibly negative) IDs.
# This dictionary is initially set using _mark_entities_with_path_or_id and later updated
......@@ -192,8 +195,10 @@ class SyncGraph():
Last review by Alexander Schlemmer on 2024-05-24.
"""
if node.id is not None:
raise RuntimeError('Cannot update ID.\n'
f'It already is {node.id} and shall be set to {node_id}.')
raise RuntimeError(
"Cannot update ID.\n"
f"It already is {node.id} and shall be set to {node_id}."
)
if node_id is None:
node_id = TempID(self._get_new_id())
node.id = node_id
......@@ -233,9 +238,11 @@ class SyncGraph():
node_map[id(el)] = entities[-1]
for ent in entities:
_set_each_scalar_value(ent,
_set_each_scalar_value(
ent,
condition=lambda val: isinstance(val, SyncNode),
value=lambda val: node_map[id(val)])
value=lambda val: node_map[id(val)],
)
missing = [el for el in entities if el.id < 0]
existing = [el for el in entities if el.id > 0]
......@@ -253,10 +260,16 @@ class SyncGraph():
Last review by Alexander Schlemmer on 2024-05-27.
"""
return any([id(ent) not in self._missing and id(ent) not in self._existing
for ent in self.forward_references_id_props[id(node)]]
+ [id(ent) not in self._missing and id(ent) not in self._existing
for ent in self.backward_references_backref[id(node)]])
return any(
[
id(ent) not in self._missing and id(ent) not in self._existing
for ent in self.forward_references_id_props[id(node)]
]
+ [
id(ent) not in self._missing and id(ent) not in self._existing
for ent in self.backward_references_backref[id(node)]
]
)
def unchecked_contains_circular_dependency(self):
"""
......@@ -309,9 +322,13 @@ class SyncGraph():
candidate = self._path_look_up[entity.path]
if candidate is not entity:
return candidate
if (entity.identifiable is not None and entity.identifiable.get_representation() in
self._identifiable_look_up):
candidate = self._identifiable_look_up[entity.identifiable.get_representation()]
if (
entity.identifiable is not None
and entity.identifiable.get_representation() in self._identifiable_look_up
):
candidate = self._identifiable_look_up[
entity.identifiable.get_representation()
]
if candidate is not entity:
return candidate
return None
......@@ -324,8 +341,9 @@ class SyncGraph():
self._remote_missing_counter -= 1
return self._remote_missing_counter
def _set_identifiable_of_node(self, node: SyncNode,
identifiable: Optional[Identifiable] = None):
def _set_identifiable_of_node(
self, node: SyncNode, identifiable: Optional[Identifiable] = None
):
"""sets the identifiable and checks whether an equivalent node can be found with that new
information. If an equivalent node is found, 'node' is merged into that node.
......@@ -333,12 +351,14 @@ class SyncGraph():
Raises a ValueError if the equivalent node found does not have an identifiable.
Raises a RuntimeError if there is no equivalent node found and
the (unique) string representation of the identifiable of node is already contained in the identifiable_look_up.
the (unique) string representation of the identifiable of node is already contained in
the identifiable_look_up.
"""
if identifiable is None:
self.identifiableAdapter.all_identifying_properties_exist(node)
identifiable = self.identifiableAdapter.get_identifiable(
node, self.backward_references_backref[id(node)])
node, self.backward_references_backref[id(node)]
)
node.identifiable = identifiable
equivalent_se = self.get_equivalent(node)
if equivalent_se is not None and equivalent_se is not node:
......@@ -361,9 +381,13 @@ class SyncGraph():
if ent.role == "Record" and len(ent.parents) == 0:
raise ValueError(f"Records must have a parent.\n{ent}")
if isinstance(ent.id, int) and ent.id < 0:
raise ValueError(f"Records must not have negative integers as IDs.\n{ent}")
raise ValueError(
f"Records must not have negative integers as IDs.\n{ent}"
)
if isinstance(ent.id, str) and re.match(r"^-\d+$", ent.id):
raise ValueError(f"Records must not have negative integers as IDs.\n{ent}")
raise ValueError(
f"Records must not have negative integers as IDs.\n{ent}"
)
def _get_nodes_whose_identity_relies_on(self, node: SyncNode):
"""returns a set of nodes that reference the given node as identifying property or are
......@@ -372,11 +396,14 @@ class SyncGraph():
Last review by Alexander Schlemmer on 2024-05-24.
"""
return (self.backward_references_id_props[id(node)].union(
self.forward_references_backref[id(node)]))
return self.backward_references_id_props[id(node)].union(
self.forward_references_backref[id(node)]
)
@staticmethod
def _create_flat_list(ent_list: list[db.Entity], flat: Optional[list[db.Entity]] = None):
def _create_flat_list(
ent_list: list[db.Entity], flat: Optional[list[db.Entity]] = None
):
"""
Recursively adds entities and all their properties contained in ent_list to
the output list flat.
......@@ -446,20 +473,35 @@ class SyncGraph():
if isinstance(v, SyncNode):
forward_references[id(node)].add(v)
backward_references[id(v)].add(node)
if (node.registered_identifiable is not None
and len([el.name
for el in node.registered_identifiable.properties if
el.name == p.name]) > 0):
if (
node.registered_identifiable is not None
and len(
[
el.name
for el in node.registered_identifiable.properties
if el.name == p.name
]
)
> 0
):
forward_references_id_props[id(node)].add(v)
backward_references_id_props[id(v)].add(node)
if (v.registered_identifiable is not None and
IdentifiableAdapter.referencing_entity_has_appropriate_type(
node.parents, v.registered_identifiable)):
if (
v.registered_identifiable is not None
and IdentifiableAdapter.referencing_entity_has_appropriate_type(
node.parents, v.registered_identifiable
)
):
forward_references_backref[id(node)].add(v)
backward_references_backref[id(v)].add(node)
return (forward_references, backward_references, forward_references_id_props,
backward_references_id_props, forward_references_backref, backward_references_backref,
return (
forward_references,
backward_references,
forward_references_id_props,
backward_references_id_props,
forward_references_backref,
backward_references_backref,
)
def _mark_entities_with_path_or_id(self):
......@@ -509,20 +551,22 @@ class SyncGraph():
if self._id_look_up[source.id] != source:
raise ValueError(
"It is assumed that always only one node exists with a certain ID and that "
"node is in the look up")
"node is in the look up"
)
if target.path is None and source.path is not None:
if self._id_look_up[source.path] != source:
raise ValueError(
"It is assumed that always only one node exists with a certain path and that"
" node is in the look up")
" node is in the look up"
)
target.update(source)
# replace actual reference property values
for node in self.backward_references[id(source)]:
_set_each_scalar_value(node,
condition=lambda val: val is source,
value=lambda val: target)
_set_each_scalar_value(
node, condition=lambda val: val is source, value=lambda val: target
)
# update reference mappings
for node in self.forward_references.pop(id(source)):
......@@ -562,10 +606,13 @@ class SyncGraph():
if target.path is not None:
self._path_look_up[target.path] = target
if target.identifiable is not None:
self._identifiable_look_up[target.identifiable.get_representation()] = target
self._identifiable_look_up[target.identifiable.get_representation()] = (
target
)
if ((id(source) in self._existing and id(target) in self._missing)
or (id(target) in self._existing and id(source) in self._missing)):
if (id(source) in self._existing and id(target) in self._missing) or (
id(target) in self._existing and id(source) in self._missing
):
raise RuntimeError("Trying to merge missing and existing")
if id(source) in self._missing and id(target) not in self._missing:
......@@ -595,9 +642,13 @@ class SyncGraph():
Last review by Alexander Schlemmer on 2024-05-24.
"""
return (node.identifiable is None and not self._identity_relies_on_unchecked_entity(node)
return (
node.identifiable is None
and not self._identity_relies_on_unchecked_entity(node)
and self.identifiableAdapter.all_identifying_properties_exist(
node, raise_exception=False))
node, raise_exception=False
)
)
def _initialize_nodes(self, entities: list[db.Entity]):
"""create initial set of SyncNodes from provided Entity list"""
......@@ -608,19 +659,24 @@ class SyncGraph():
# Create new sync nodes from the list of entities, their registered identifiables
# are set from the identifiable adapter.
for el in entities:
self.nodes.append(SyncNode(
el,
self.identifiableAdapter.get_registered_identifiable(el)))
self.nodes.append(
SyncNode(el, self.identifiableAdapter.get_registered_identifiable(el))
)
se_lookup[id(el)] = self.nodes[-1]
# replace db.Entity objects with SyncNodes in references:
for node in self.nodes:
_set_each_scalar_value(node,
_set_each_scalar_value(
node,
condition=lambda val: id(val) in se_lookup,
value=lambda val: se_lookup[id(val)])
value=lambda val: se_lookup[id(val)],
)
def _add_identifiables_to_dependend_nodes(self, node):
""" For each dependent node, we check whether this allows to create an identifiable """
"""For each dependent node, we check whether this allows to create an identifiable
Last review by Alexander Schlemmer on 2024-05-29.
"""
for other_node in self._get_nodes_whose_identity_relies_on(node):
if self._identifiable_is_needed(other_node):
self._set_identifiable_of_node(other_node)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment