diff --git a/src/caoscrawler/sync_graph.py b/src/caoscrawler/sync_graph.py index 380b8a6a42675c1b952f3e34b20d51a0ab10052f..45059ec8b63573b29783128d155f050c43240834 100644 --- a/src/caoscrawler/sync_graph.py +++ b/src/caoscrawler/sync_graph.py @@ -42,7 +42,7 @@ from .sync_node import SyncNode logger = logging.getLogger(__name__) -def _for_each_scalar_value(node, condition, kind, value=None): +def _for_each_scalar_value(node: SyncNode, condition: callable, kind: str, value: Any = None): """ helper function that performs an action on each value element of each property of a node The action (remove or set) is performed on each property value of each property: in case on @@ -68,12 +68,12 @@ def _for_each_scalar_value(node, condition, kind, value=None): p.value = value(p.value) -def _remove_each_scalar_value(node, condition): +def _remove_each_scalar_value(node: SyncNode, condition: callable): """ "remove" version of _for_each_scalar_value """ _for_each_scalar_value(node, condition, "remove") -def _set_each_scalar_value(node, condition, value): +def _set_each_scalar_value(node: SyncNode, condition: callable, value: Any): """ "set" version of _for_each_scalar_value """ _for_each_scalar_value(node, condition, "set", value=value) @@ -124,7 +124,7 @@ class SyncGraph(): export_record_lists. """ - def __init__(self, entities: List[db.Entity], identifiableAdapter): + def __init__(self, entities: List[db.Entity], identifiableAdapter: IdentifiableAdapter): self.identifiableAdapter = identifiableAdapter self._id_look_up: Dict[int, SyncNode] = {} self._path_look_up: Dict[str, SyncNode] = {} @@ -150,10 +150,16 @@ class SyncGraph(): self.backward_id_referenced_by, ) = self._create_reference_mapping(self.nodes) + # remove entities with path or ID from unchecked list self._mark_entities_with_path_or_id() + + # add identifiables where possible for node in list(self.nodes): if self._identifiable_is_needed(node): self._set_identifiable_of_node(node) + + # everything in unchecked has no ID nor path. Thus, it must be possible to create an + # identifiable. for node in self.unchecked: self.identifiableAdapter.check_identifying_props(node) @@ -265,28 +271,6 @@ class SyncGraph(): if candidate is not entity: return candidate - # def get_equivalent_existing(self, se): - # """ Check whether this Record exists on the remote server - - # Returns: The stored Record - # """ - # treated = self.get_any(se) - # if id(treated) in self._existing: - # return rec - # else: - # return None - - # def get_equivalent_missing(self, se): - # """ Check whether this Record is missing on the remote server - - # Returns: The stored Record - # """ - # treated = self.get_any(record, identifiable) - # if id(treated) in self._missing: - # return rec - # else: - # return None - def _get_new_id(self): self._remote_missing_counter -= 1 return self._remote_missing_counter @@ -522,12 +506,12 @@ class SyncGraph(): if self._identifiable_is_needed(other_node): self._set_identifiable_of_node(other_node) - def _identifiable_is_needed(self, node): + def _identifiable_is_needed(self, node: SyncNode): return (node.identifiable is None and not self._identity_relies_on_unchecked_entity(node) and self.identifiableAdapter.check_identifying_props(node, raise_exception=False)) - def _initialize_nodes(self, entities): - """ create initial set of SemanticEntities from provided Entity list""" + def _initialize_nodes(self, entities: list[db.Entity]): + """ create initial set of SyncNodes from provided Entity list""" entities = self._create_flat_list(entities) self._sanity_check(entities) se_lookup: Dict[str, SyncNode] = {} # lookup: python id -> SyncNode @@ -542,7 +526,7 @@ class SyncGraph(): condition=lambda val: id(val) in se_lookup, value=lambda val: se_lookup[id(val)]) - def _mark_missing(self, node): + def _mark_missing(self, node: SyncNode): self._missing[id(node)] = node self.unchecked.remove(node) @@ -560,7 +544,7 @@ class SyncGraph(): print(f"set\n{other_node}\n to missing due to missing\n{node}") self.set_id_of_node(other_node) - def _mark_existing(self, node): + def _mark_existing(self, node: SyncNode): if node.id <= 0: raise ValueError("ID must be positive for existing entities") self._existing[id(node)] = node diff --git a/src/caoscrawler/sync_node.py b/src/caoscrawler/sync_node.py index 69437b1876baaa31454fdaf813244d0bb5e19643..7b80d5d5aaad6b6c8a708c19f4a8432f57c81be9 100644 --- a/src/caoscrawler/sync_node.py +++ b/src/caoscrawler/sync_node.py @@ -155,7 +155,7 @@ class SyncNode(): raise ime return ent - def __repr__(self): + def __repr__(self) -> str: res = f"\n=====================================================\n{self.role}\n" if hasattr(self, "_metadata"): res += f"user: {self._metadata['user']}\n" @@ -181,11 +181,12 @@ class SyncNode(): return (res + yaml.dump(d, allow_unicode=True) + "=====================================================\n") - def is_unidentifiable(self): + def is_unidentifiable(self) -> bool: + """returns whether this is an unidentifiable Node""" return self.registered_identifiable.get_property("no-ident") is not None -def parent_in_list(parent, plist): +def parent_in_list(parent: db.Parent, plist: _ParentList) -> bool: """helper function that checks whether a parent with the same name or ID is in the plist""" missing = False if parent.name is not None: @@ -197,7 +198,7 @@ def parent_in_list(parent, plist): return not missing -def property_in_list(prop, plist): +def property_in_list(prop: db.Property, plist: _Properties) -> bool: """helper function that checks whether a property with the same name or ID is in the plist""" missing = False if prop.name is not None: diff --git a/unittests/test_sync_graph.py b/unittests/test_sync_graph.py index 4c134af9810da6a34b8db4651f2f1fddc83e66c1..ad41f67cb16e7d5bc523ed26a69fe2e204b5781c 100644 --- a/unittests/test_sync_graph.py +++ b/unittests/test_sync_graph.py @@ -119,18 +119,69 @@ def test_create_reference_mapping(): assert backward_id_referenced_by[id(ses[1])] == set() -def test_SyncGraph(): +@patch("caoscrawler.sync_graph.cached_get_entity_by", + new=Mock(side_effect=mock_get_entity_by)) +def test_SyncGraph_init(): + # trivial case a = db.Record(id=101).add_parent("A") ident_a = db.RecordType().add_parent("A").add_property("prop_ident") ident_adapter = CaosDBIdentifiableAdapter() ident_adapter.register_identifiable("A", ident_a) - st = SyncGraph([a], ident_adapter) + SyncGraph([a], ident_adapter) + SyncGraph([], ident_adapter) # should not fail either... + + entlist = [ + db.Record(id=101).add_parent("A"), + db.Record(id=102).add_parent("A"), + db.File(path='a').add_parent("A"), + db.File(path='b').add_parent("A"), + db.Record(id=103).add_parent("A"), + db.Record(id=104).add_parent("A").add_property(name='prop_ident', value="MERGEME"), + db.Record().add_parent("A").add_property(name='prop_ident', value="MERGEME"), + db.File(path='a', file='b').add_parent("A"), + db.Record(id=101).add_parent("A"), + db.Record().add_parent("A").add_property(name='prop_ident', value="other"), + db.Record().add_parent("A").add_property(name='prop_ident', + value=db.Record().add_parent("A") + .add_property(name='prop_ident', value="other")), + db.File(path='a', file='b').add_parent("A"), + db.Record(id=101).add_parent("A"), + ] + st = SyncGraph(entlist, ident_adapter) + # all nodes with ID=101 have been merged + assert len([el for el in st.nodes if el.id == 101]) == 1 + # all nodes with path='a' have been merged + assert len([el for el in st.nodes if el.path == 'a']) == 1 + # all nodes with ID or path were removed from unchecked + for el in st.nodes: + if el.id is not None or el.path is not None: + assert el not in st.unchecked + # all nodes with ID are in the ID lookup + for el in st.nodes: + if el.id is not None: + assert st._id_look_up[el.id] is el + # all nodes with path are in the path lookup + for el in st.nodes: + if el.path is not None: + assert st._path_look_up[el.path] is el + # all nodes with identifiable are in the identifiable lookup + for el in st.nodes: + if el.identifiable is not None: + assert st._identifiable_look_up[el.identifiable.get_representation()] is el + # node without ID but with identifiable was merged with other node with ID + assert len([el for el in st.nodes if len(el.properties) > 0 + and el.properties[0].value == "MERGEME"]) == 1 + # every node that does not rely on something unchecked has an identifiable or an ID + for el in st.nodes: + if not st._identity_relies_on_unchecked_entity(el): + assert el.identifiable is not None or el.id is not None def test_merge_into_trivial(simple_adapter): # simplest case: a -> c # b - # (a reference c; b does not reference anything; a & b have the same target record) + # (a reference c; b does not reference anything; a & b have the same target + # record) c = db.Record(name='c').add_parent("RT2") a = db.Record(name='a').add_parent("RT1").add_property('RT2', c) b = db.Record(id=101).add_parent("RT1") @@ -399,6 +450,7 @@ def test_set_id_of_node(simple_adapter): assert st.nodes[0].identifiable is not None assert len(st.nodes) == 2 + @patch("caoscrawler.sync_graph.cached_get_entity_by", new=Mock(side_effect=mock_get_entity_by)) def test_merging(simple_adapter): @@ -481,15 +533,6 @@ def test_something(simple_adapter): assert b_prop.id == 101 - -# TODO create test that tests the assumptions after initialization: -# - no two (or more) nodes with the same id -# - no two (or more) nodes with the same path -# - nodes with id or path are not in unchecked -# - every node with id or path is in the corresponding look ups -# - every node that does not rely on something unchecked has an identifiable - - # TODO create test that tests whether a cascading information application is fully applied # TODO create test that tests whether setting something to missing allows to create an identifiable # and thus a merge