From 21da31c253de06ffa8e562deaa2bdcc6e995ec44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Tue, 28 May 2024 11:08:00 +0200 Subject: [PATCH] rename_refs --- src/caoscrawler/sync_graph.py | 87 ++++++++--------- unittests/test_crawler.py | 4 +- unittests/test_sync_graph.py | 174 +++++++++++++++++----------------- 3 files changed, 133 insertions(+), 132 deletions(-) diff --git a/src/caoscrawler/sync_graph.py b/src/caoscrawler/sync_graph.py index 652f96a5..d317b0c8 100644 --- a/src/caoscrawler/sync_graph.py +++ b/src/caoscrawler/sync_graph.py @@ -156,14 +156,14 @@ class SyncGraph(): # list all SemanticEntities that have not yet been checked self.unchecked = list(self.nodes) - # initialize reference mappings + # initialize reference mappings (see _create_reference_mapping) ( self.forward_references, self.backward_references, - self.forward_id_references, - self.backward_id_references, - self.forward_id_referenced_by, - self.backward_id_referenced_by, + self.forward_references_id_props, + self.backward_references_id_props, + self.forward_references_backref, + self.backward_references_backref, ) = self._create_reference_mapping(self.nodes) # remove entities with path or ID from unchecked list @@ -250,9 +250,9 @@ class SyncGraph(): """ return any([id(ent) not in self._missing and id(ent) not in self._existing - for ent in self.forward_id_references[id(node)]] + for ent in self.forward_references_id_props[id(node)]] + [id(ent) not in self._missing and id(ent) not in self._existing - for ent in self.backward_id_referenced_by[id(node)]]) + for ent in self.backward_references_backref[id(node)]]) def unchecked_contains_circular_dependency(self): """ @@ -328,7 +328,7 @@ class SyncGraph(): if identifiable is None: self.identifiableAdapter.all_identifying_properties_exist(node) identifiable = self.identifiableAdapter.get_identifiable( - node, self.backward_id_referenced_by[id(node)]) + node, self.backward_references_backref[id(node)]) node.identifiable = identifiable equivalent_se = self.get_equivalent(node) if equivalent_se is not None and equivalent_se is not node: @@ -362,8 +362,8 @@ class SyncGraph(): Last review by Alexander Schlemmer on 2024-05-24. """ - return (self.backward_id_references[id(node)].union( - self.forward_id_referenced_by[id(node)])) + return (self.backward_references_id_props[id(node)].union( + self.forward_references_backref[id(node)])) @staticmethod def _create_flat_list(ent_list: list[db.Entity], flat: Optional[list[db.Entity]] = None): @@ -407,25 +407,26 @@ class SyncGraph(): Then there are three kinds of maps being generated: One includes all references ("_references"), one includes references that are values of identifying properties - ("_id_references") and one includes references that are relevant for identifying - backreferences/"is_referenced_by" ("_id_references_by"). + ("_references_id_props") and one includes references that are relevant for identifying + backreferences/"is_referenced_by" ("_references_backref"). I.e. the two latter are subesets + of the former reference map. """ # TODO we need to treat children of RecordTypes somehow. forward_references: dict[int, set[SyncNode]] = {} backward_references: dict[int, set[SyncNode]] = {} - forward_id_references: dict[int, set[SyncNode]] = {} - backward_id_references: dict[int, set[SyncNode]] = {} - forward_id_referenced_by: dict[int, set[SyncNode]] = {} - backward_id_referenced_by: dict[int, set[SyncNode]] = {} + forward_references_id_props: dict[int, set[SyncNode]] = {} + backward_references_id_props: dict[int, set[SyncNode]] = {} + forward_references_backref: dict[int, set[SyncNode]] = {} + backward_references_backref: dict[int, set[SyncNode]] = {} # initialize with empty lists/dict for node in flat: forward_references[id(node)] = set() backward_references[id(node)] = set() - forward_id_references[id(node)] = set() - backward_id_references[id(node)] = set() - forward_id_referenced_by[id(node)] = set() - backward_id_referenced_by[id(node)] = set() + forward_references_id_props[id(node)] = set() + backward_references_id_props[id(node)] = set() + forward_references_backref[id(node)] = set() + backward_references_backref[id(node)] = set() for node in flat: for p in node.properties: val = p.value @@ -439,16 +440,16 @@ class SyncGraph(): and len([el.name for el in node.registered_identifiable.properties if el.name == p.name]) > 0): - forward_id_references[id(node)].add(v) - backward_id_references[id(v)].add(node) + forward_references_id_props[id(node)].add(v) + backward_references_id_props[id(v)].add(node) if (v.registered_identifiable is not None and IdentifiableAdapter.referencing_entity_has_appropriate_type( node.parents, v.registered_identifiable)): - forward_id_referenced_by[id(node)].add(v) - backward_id_referenced_by[id(v)].add(node) + forward_references_backref[id(node)].add(v) + backward_references_backref[id(v)].add(node) - return (forward_references, backward_references, forward_id_references, - backward_id_references, forward_id_referenced_by, backward_id_referenced_by, + return (forward_references, backward_references, forward_references_id_props, + backward_references_id_props, forward_references_backref, backward_references_backref, ) def _mark_entities_with_path_or_id(self): @@ -523,23 +524,23 @@ class SyncGraph(): self.forward_references[id(node)].remove(source) self.forward_references[id(node)].add(target) - for node in self.forward_id_references.pop(id(source)): - self.forward_id_references[id(target)].add(node) - self.backward_id_references[id(node)].remove(source) - self.backward_id_references[id(node)].add(target) - for node in self.backward_id_references.pop(id(source)): - self.backward_id_references[id(target)].add(node) - self.forward_id_references[id(node)].remove(source) - self.forward_id_references[id(node)].add(target) - - for node in self.forward_id_referenced_by.pop(id(source)): - self.forward_id_referenced_by[id(target)].add(node) - self.backward_id_referenced_by[id(node)].remove(source) - self.backward_id_referenced_by[id(node)].add(target) - for node in self.backward_id_referenced_by.pop(id(source)): - self.backward_id_referenced_by[id(target)].add(node) - self.forward_id_referenced_by[id(node)].remove(source) - self.forward_id_referenced_by[id(node)].add(target) + for node in self.forward_references_id_props.pop(id(source)): + self.forward_references_id_props[id(target)].add(node) + self.backward_references_id_props[id(node)].remove(source) + self.backward_references_id_props[id(node)].add(target) + for node in self.backward_references_id_props.pop(id(source)): + self.backward_references_id_props[id(target)].add(node) + self.forward_references_id_props[id(node)].remove(source) + self.forward_references_id_props[id(node)].add(target) + + for node in self.forward_references_backref.pop(id(source)): + self.forward_references_backref[id(target)].add(node) + self.backward_references_backref[id(node)].remove(source) + self.backward_references_backref[id(node)].add(target) + for node in self.backward_references_backref.pop(id(source)): + self.backward_references_backref[id(target)].add(node) + self.forward_references_backref[id(node)].remove(source) + self.forward_references_backref[id(node)].add(target) # remove unneeded SyncNode self.nodes.remove(source) diff --git a/unittests/test_crawler.py b/unittests/test_crawler.py index ccd441e7..4e8b057e 100644 --- a/unittests/test_crawler.py +++ b/unittests/test_crawler.py @@ -546,7 +546,7 @@ def test_split_into_inserts_and_updates_mult_backref(crawler_mocked_for_backref_ identifiable = crawler.identifiableAdapter.get_identifiable( st.nodes[0], - st.backward_id_referenced_by[id(st.nodes[0])]) + st.backward_references_backref[id(st.nodes[0])]) assert len(identifiable.backrefs) == 2 # check the split... @@ -570,7 +570,7 @@ def test_split_into_inserts_and_updates_diff_backref(crawler_mocked_for_backref_ st = SyncGraph(entlist, crawler.identifiableAdapter) identifiable = crawler.identifiableAdapter.get_identifiable( st.nodes[0], - st.backward_id_referenced_by[id(st.nodes[0])]) + st.backward_references_backref[id(st.nodes[0])]) assert len(identifiable.backrefs) == 2 diff --git a/unittests/test_sync_graph.py b/unittests/test_sync_graph.py index e14c8346..1ec2b3b2 100644 --- a/unittests/test_sync_graph.py +++ b/unittests/test_sync_graph.py @@ -89,9 +89,9 @@ def test_create_reference_mapping(): for index, mapping in product((0, 1), mappings): assert id(ses[index]) in mapping - (forward_references, backward_references, forward_id_references, - backward_id_references, forward_id_referenced_by, - backward_id_referenced_by) = mappings + (forward_references, backward_references, forward_references_id_props, + backward_references_id_props, forward_references_backref, + backward_references_backref) = mappings # a has no ref assert len(forward_references[id(a)]) == 0 @@ -100,17 +100,17 @@ def test_create_reference_mapping(): assert forward_references[id(b)] == set([a]) assert backward_references[id(b)] == set() # a has no identifying reference - assert forward_id_references[id(a)] == set() - assert backward_id_references[id(a)] == set([b]) + assert forward_references_id_props[id(a)] == set() + assert backward_references_id_props[id(a)] == set([b]) # b has an identifying reference - assert forward_id_references[id(b)] == set([a]) - assert backward_id_references[id(b)] == set() + assert forward_references_id_props[id(b)] == set([a]) + assert backward_references_id_props[id(b)] == set() # a has an identifying back reference - assert forward_id_referenced_by[id(a)] == set() - assert backward_id_referenced_by[id(a)] == set([b]) + assert forward_references_backref[id(a)] == set() + assert backward_references_backref[id(a)] == set([b]) # b does not - assert forward_id_referenced_by[id(b)] == set([a]) - assert backward_id_referenced_by[id(b)] == set() + assert forward_references_backref[id(b)] == set([a]) + assert backward_references_backref[id(b)] == set() @patch("caoscrawler.sync_graph.cached_get_entity_by", @@ -203,23 +203,23 @@ def test_merge_into_trivial(simple_adapter): assert len(st.backward_references[id(se_c)]) == 1 assert se_a in st.backward_references[id(se_c)] - assert len(st.forward_id_references[id(se_a)]) == 1 - assert se_c in st.forward_id_references[id(se_a)] - assert len(st.forward_id_references[id(se_b)]) == 0 - assert len(st.forward_id_references[id(se_c)]) == 0 - assert len(st.backward_id_references[id(se_a)]) == 0 - assert len(st.backward_id_references[id(se_b)]) == 0 - assert len(st.backward_id_references[id(se_c)]) == 1 - assert se_a in st.backward_id_references[id(se_c)] - - assert len(st.forward_id_referenced_by[id(se_a)]) == 1 - assert se_c in st.forward_id_referenced_by[id(se_a)] - assert len(st.forward_id_referenced_by[id(se_b)]) == 0 - assert len(st.forward_id_referenced_by[id(se_c)]) == 0 - assert len(st.backward_id_referenced_by[id(se_a)]) == 0 - assert len(st.backward_id_referenced_by[id(se_b)]) == 0 - assert len(st.backward_id_referenced_by[id(se_c)]) == 1 - assert se_a in st.backward_id_referenced_by[id(se_c)] + assert len(st.forward_references_id_props[id(se_a)]) == 1 + assert se_c in st.forward_references_id_props[id(se_a)] + assert len(st.forward_references_id_props[id(se_b)]) == 0 + assert len(st.forward_references_id_props[id(se_c)]) == 0 + assert len(st.backward_references_id_props[id(se_a)]) == 0 + assert len(st.backward_references_id_props[id(se_b)]) == 0 + assert len(st.backward_references_id_props[id(se_c)]) == 1 + assert se_a in st.backward_references_id_props[id(se_c)] + + assert len(st.forward_references_backref[id(se_a)]) == 1 + assert se_c in st.forward_references_backref[id(se_a)] + assert len(st.forward_references_backref[id(se_b)]) == 0 + assert len(st.forward_references_backref[id(se_c)]) == 0 + assert len(st.backward_references_backref[id(se_a)]) == 0 + assert len(st.backward_references_backref[id(se_b)]) == 0 + assert len(st.backward_references_backref[id(se_c)]) == 1 + assert se_a in st.backward_references_backref[id(se_c)] st.set_id_of_node(se_a, 101) @@ -234,23 +234,23 @@ def test_merge_into_trivial(simple_adapter): assert len(st.backward_references[id(se_c)]) == 1 assert se_b in st.backward_references[id(se_c)] - assert id(se_a) not in st.forward_id_references - assert len(st.forward_id_references[id(se_b)]) == 1 - assert se_c in st.forward_id_references[id(se_b)] - assert len(st.forward_id_references[id(se_c)]) == 0 - assert id(se_a) not in st.backward_id_references - assert len(st.backward_id_references[id(se_b)]) == 0 - assert len(st.backward_id_references[id(se_c)]) == 1 - assert se_b in st.backward_id_references[id(se_c)] - - assert id(se_a) not in st.forward_id_referenced_by - assert len(st.forward_id_referenced_by[id(se_b)]) == 1 - assert se_c in st.forward_id_referenced_by[id(se_b)] - assert len(st.forward_id_referenced_by[id(se_c)]) == 0 - assert id(se_a) not in st.backward_id_referenced_by - assert len(st.backward_id_referenced_by[id(se_b)]) == 0 - assert len(st.backward_id_referenced_by[id(se_c)]) == 1 - assert se_b in st.backward_id_referenced_by[id(se_c)] + assert id(se_a) not in st.forward_references_id_props + assert len(st.forward_references_id_props[id(se_b)]) == 1 + assert se_c in st.forward_references_id_props[id(se_b)] + assert len(st.forward_references_id_props[id(se_c)]) == 0 + assert id(se_a) not in st.backward_references_id_props + assert len(st.backward_references_id_props[id(se_b)]) == 0 + assert len(st.backward_references_id_props[id(se_c)]) == 1 + assert se_b in st.backward_references_id_props[id(se_c)] + + assert id(se_a) not in st.forward_references_backref + assert len(st.forward_references_backref[id(se_b)]) == 1 + assert se_c in st.forward_references_backref[id(se_b)] + assert len(st.forward_references_backref[id(se_c)]) == 0 + assert id(se_a) not in st.backward_references_backref + assert len(st.backward_references_backref[id(se_b)]) == 0 + assert len(st.backward_references_backref[id(se_c)]) == 1 + assert se_b in st.backward_references_backref[id(se_c)] def test_merge_into_simple(simple_adapter): @@ -277,27 +277,27 @@ def test_merge_into_simple(simple_adapter): se_a in st.backward_references[id(se_c)] se_b in st.backward_references[id(se_c)] - assert len(st.forward_id_references[id(se_a)]) == 1 - se_c in st.forward_id_references[id(se_a)] - assert len(st.forward_id_references[id(se_b)]) == 1 - se_c in st.forward_id_references[id(se_b)] - assert len(st.forward_id_references[id(se_c)]) == 0 - assert len(st.backward_id_references[id(se_a)]) == 0 - assert len(st.backward_id_references[id(se_b)]) == 0 - assert len(st.backward_id_references[id(se_c)]) == 2 - se_a in st.backward_id_references[id(se_c)] - se_b in st.backward_id_references[id(se_c)] - - assert len(st.forward_id_referenced_by[id(se_a)]) == 1 - se_c in st.forward_id_referenced_by[id(se_a)] - assert len(st.forward_id_referenced_by[id(se_b)]) == 1 - se_c in st.forward_id_referenced_by[id(se_b)] - assert len(st.forward_id_referenced_by[id(se_c)]) == 0 - assert len(st.backward_id_referenced_by[id(se_a)]) == 0 - assert len(st.backward_id_referenced_by[id(se_b)]) == 0 - assert len(st.backward_id_referenced_by[id(se_c)]) == 2 - se_a in st.backward_id_referenced_by[id(se_c)] - se_b in st.backward_id_referenced_by[id(se_c)] + assert len(st.forward_references_id_props[id(se_a)]) == 1 + se_c in st.forward_references_id_props[id(se_a)] + assert len(st.forward_references_id_props[id(se_b)]) == 1 + se_c in st.forward_references_id_props[id(se_b)] + assert len(st.forward_references_id_props[id(se_c)]) == 0 + assert len(st.backward_references_id_props[id(se_a)]) == 0 + assert len(st.backward_references_id_props[id(se_b)]) == 0 + assert len(st.backward_references_id_props[id(se_c)]) == 2 + se_a in st.backward_references_id_props[id(se_c)] + se_b in st.backward_references_id_props[id(se_c)] + + assert len(st.forward_references_backref[id(se_a)]) == 1 + se_c in st.forward_references_backref[id(se_a)] + assert len(st.forward_references_backref[id(se_b)]) == 1 + se_c in st.forward_references_backref[id(se_b)] + assert len(st.forward_references_backref[id(se_c)]) == 0 + assert len(st.backward_references_backref[id(se_a)]) == 0 + assert len(st.backward_references_backref[id(se_b)]) == 0 + assert len(st.backward_references_backref[id(se_c)]) == 2 + se_a in st.backward_references_backref[id(se_c)] + se_b in st.backward_references_backref[id(se_c)] st._merge_into(se_a, se_b) @@ -313,26 +313,26 @@ def test_merge_into_simple(simple_adapter): assert len(st.backward_references[id(se_c)]) == 1 se_b in st.backward_references[id(se_c)] - assert id(se_a) not in st.forward_id_references - assert len(st.forward_id_references[id(se_b)]) == 1 - se_c in st.forward_id_references[id(se_b)] - assert len(st.forward_id_references[id(se_c)]) == 0 - assert id(se_a) not in st.backward_id_references - assert len(st.backward_id_references[id(se_b)]) == 0 - assert len(st.backward_id_references[id(se_c)]) == 1 - se_b in st.backward_id_references[id(se_c)] - - assert id(se_a) not in st.forward_id_referenced_by - assert len(st.forward_id_referenced_by[id(se_b)]) == 1 - se_c in st.forward_id_referenced_by[id(se_b)] - assert len(st.forward_id_referenced_by[id(se_c)]) == 0 - assert id(se_a) not in st.backward_id_referenced_by - assert len(st.backward_id_referenced_by[id(se_b)]) == 0 - assert len(st.backward_id_referenced_by[id(se_c)]) == 1 - se_b in st.backward_id_referenced_by[id(se_c)] - - -def test_backward_id_referenced_by(): + assert id(se_a) not in st.forward_references_id_props + assert len(st.forward_references_id_props[id(se_b)]) == 1 + se_c in st.forward_references_id_props[id(se_b)] + assert len(st.forward_references_id_props[id(se_c)]) == 0 + assert id(se_a) not in st.backward_references_id_props + assert len(st.backward_references_id_props[id(se_b)]) == 0 + assert len(st.backward_references_id_props[id(se_c)]) == 1 + se_b in st.backward_references_id_props[id(se_c)] + + assert id(se_a) not in st.forward_references_backref + assert len(st.forward_references_backref[id(se_b)]) == 1 + se_c in st.forward_references_backref[id(se_b)] + assert len(st.forward_references_backref[id(se_c)]) == 0 + assert id(se_a) not in st.backward_references_backref + assert len(st.backward_references_backref[id(se_b)]) == 0 + assert len(st.backward_references_backref[id(se_c)]) == 1 + se_b in st.backward_references_backref[id(se_c)] + + +def test_backward_references_backref(): # We use the reference as identifying reference in both directions. Thus the map is the same # for all three categories: references, id_references and id_referenced_by ident_a = db.RecordType().add_parent("BR").add_property("name") @@ -345,7 +345,7 @@ def test_backward_id_referenced_by(): ent_list = [referenced, db.Record(name="A").add_parent("BR").add_property("ref", referenced), ] st = SyncGraph(ent_list, ident_adapter) - assert st.nodes[1] in st.backward_id_referenced_by[id(st.nodes[0])] + assert st.nodes[1] in st.backward_references_backref[id(st.nodes[0])] def test_set_id_of_node(simple_adapter): -- GitLab