From 7bff47f89dd0f9f27ee9ea7cf7edcd52b714d06b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Thu, 18 Apr 2024 16:49:08 +0200
Subject: [PATCH] rename

---
 src/caoscrawler/crawl.py                      |  6 ++--
 src/caoscrawler/identifiable.py               |  1 -
 .../{semantic_target.py => sync_graph.py}     | 15 +++++++--
 unittests/test_crawler.py                     | 32 +++++++++----------
 unittests/test_file_identifiables.py          |  2 +-
 unittests/test_identifiable_adapters.py       |  2 +-
 unittests/test_semantic_target.py             | 28 ++++++++--------
 7 files changed, 47 insertions(+), 39 deletions(-)
 rename src/caoscrawler/{semantic_target.py => sync_graph.py} (97%)

diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py
index d545403f..546f5c2e 100644
--- a/src/caoscrawler/crawl.py
+++ b/src/caoscrawler/crawl.py
@@ -69,7 +69,7 @@ from .logging import configure_server_side_logging
 from .macros import defmacro_constructor, macro_constructor
 from .scanner import (create_converter_registry, initialize_converters,
                       load_definition, scan_directory, scan_structure_elements)
-from .semantic_target import SemanticTarget
+from .sync_graph import SyncGraph
 from .stores import GeneralStore
 from .structure_elements import StructureElement
 
@@ -352,7 +352,7 @@ class Crawler(object):
 #                        )
 #                p.value = cached
 
-    def split_into_inserts_and_updates(self, st: SemanticTarget):
+    def split_into_inserts_and_updates(self, st: SyncGraph):
 
         entity_was_treated = True
         # st.entities contains Entities which could not yet be checked against the remote server
@@ -730,7 +730,7 @@ class Crawler(object):
             crawled_data = self.crawled_data
 
         to_be_inserted, to_be_updated = self.split_into_inserts_and_updates(
-            SemanticTarget(crawled_data, self.identifiableAdapter))
+            SyncGraph(crawled_data, self.identifiableAdapter))
 
         for el in to_be_updated:
             # all entity objects are replaced by their IDs except for the not yet inserted ones
diff --git a/src/caoscrawler/identifiable.py b/src/caoscrawler/identifiable.py
index e8ee521d..02eba44a 100644
--- a/src/caoscrawler/identifiable.py
+++ b/src/caoscrawler/identifiable.py
@@ -96,7 +96,6 @@ class Identifiable():
             if value.id is not None:
                 return str(value.id)
             else:
-                print(value)
                 raise RuntimeError("Python Entity without id not allowed")
         elif isinstance(value, list):
             return "[" + ", ".join([Identifiable._value_representation(el) for el in value]) + "]"
diff --git a/src/caoscrawler/semantic_target.py b/src/caoscrawler/sync_graph.py
similarity index 97%
rename from src/caoscrawler/semantic_target.py
rename to src/caoscrawler/sync_graph.py
index 2396446f..5993ed09 100644
--- a/src/caoscrawler/semantic_target.py
+++ b/src/caoscrawler/sync_graph.py
@@ -84,7 +84,7 @@ class SemanticEntity():
                 assert self.path == source.path
 
 
-class SemanticTarget():
+class SyncGraph():
     """ models the target structure of Entities as it shall be created by the Crawler
 
     The target entities are composed using the information of the entity fragments (db.Entity
@@ -317,11 +317,11 @@ class SemanticTarget():
                         if isinstance(el, db.Entity):
                             if el not in flat:
                                 flat.append(el)
-                                SemanticTarget._create_flat_list([el], flat)
+                                SyncGraph._create_flat_list([el], flat)
                 elif isinstance(p.value, db.Entity):
                     if p.value not in flat:
                         flat.append(p.value)
-                        SemanticTarget._create_flat_list([p.value], flat)
+                        SyncGraph._create_flat_list([p.value], flat)
         return flat
 
     @ staticmethod
@@ -454,6 +454,15 @@ class SemanticTarget():
             else:
                 self._merge_into(semantic_entity, treated_before)
 
+    def _remove_non_identifiables(self):
+        """ A path or an ID is sufficiently identifying. Thus, those entities can be marked as
+        checked """
+        for semantic_entity in list(self.se[::-1]):
+            if "nonidentifiable" in [p.name for p in
+                                     semantic_entity.registered_identifiable.properties]:
+
+                self.unchecked.remove(semantic_entity)
+
     def detect_circular_dependency(self):
         """
         Detects whether there are circular references in the given entity list and returns a list
diff --git a/unittests/test_crawler.py b/unittests/test_crawler.py
index f5552918..fdae024b 100644
--- a/unittests/test_crawler.py
+++ b/unittests/test_crawler.py
@@ -48,7 +48,7 @@ from caoscrawler.identifiable_adapters import (CaosDBIdentifiableAdapter,
                                                LocalStorageIdentifiableAdapter)
 from caoscrawler.scanner import (create_converter_registry, scan_directory,
                                  scan_structure_elements)
-from caoscrawler.semantic_target import SemanticTarget
+from caoscrawler.sync_graph import SyncGraph
 from caoscrawler.stores import GeneralStore, RecordStore
 from caoscrawler.structure_elements import (DictElement, DictListElement,
                                             DictTextElement, File)
@@ -255,13 +255,13 @@ def test_remove_unnecessary_updates():
 
 def test_split_into_inserts_and_updates_trivial():
     crawler = Crawler()
-    st = SemanticTarget([], crawler.identifiableAdapter)
+    st = SyncGraph([], crawler.identifiableAdapter)
     crawler.split_into_inserts_and_updates(st)
 
 
 def test_split_into_inserts_and_updates_unidentified():
     crawler = Crawler()
-    st = SemanticTarget([db.Record(name="recname").add_parent("someparent")],
+    st = SyncGraph([db.Record(name="recname").add_parent("someparent")],
                         crawler.identifiableAdapter)
     with raises(ValueError) as err:
         crawler.split_into_inserts_and_updates(st)
@@ -300,7 +300,7 @@ def test_split_into_inserts_and_updates_single(crawler_mocked_identifiable_retri
     entlist = [db.Record(name="A").add_parent("C"),
                db.Record(name="B").add_parent("C")]
 
-    st = SemanticTarget(entlist, crawler.identifiableAdapter)
+    st = SyncGraph(entlist, crawler.identifiableAdapter)
     assert st.get_checked_equivalent(st.se[0]) is None
     assert st.get_checked_equivalent(st.se[0]) is None
     assert not st.identity_relies_on_unchecked_entity(st.se[0])
@@ -328,7 +328,7 @@ def test_split_into_inserts_and_updates_with_duplicate(crawler_mocked_identifiab
     # This is identical to a and should be removed
     c = db.Record(name="A").add_parent("C")
     entlist = [a, b, c]
-    st = SemanticTarget(entlist, crawler.identifiableAdapter)
+    st = SyncGraph(entlist, crawler.identifiableAdapter)
     insert, update = crawler.split_into_inserts_and_updates(st)
     assert len(insert) == 1
     assert insert[0].name == "B"
@@ -346,7 +346,7 @@ def test_split_into_inserts_and_updates_with_ref(crawler_mocked_identifiable_ret
     b = db.Record(name="B").add_parent("C")
     b.add_property("A", a)
     entlist = [a, b]
-    st = SemanticTarget(entlist, crawler.identifiableAdapter)
+    st = SyncGraph(entlist, crawler.identifiableAdapter)
     insert, update = crawler.split_into_inserts_and_updates(st)
     assert len(insert) == 1
     assert insert[0].name == "B"
@@ -382,7 +382,7 @@ def test_split_into_inserts_and_updates_with_complex(crawler_mocked_identifiable
     b.add_property("A", f)
     b.add_property("A", a)
     entlist = [a, b, g]
-    st = SemanticTarget(entlist, crawler.identifiableAdapter)
+    st = SyncGraph(entlist, crawler.identifiableAdapter)
     insert, update = crawler.split_into_inserts_and_updates(st)
     assert len(insert) == 3
     assert "B" in [el.name for el in insert]
@@ -403,7 +403,7 @@ def test_split_into_inserts_and_updates_with_copy_attr(crawler_mocked_identifiab
     b = db.Record(name="A").add_parent("C")
     b.add_property("bar", 2)
     entlist = [a, b]
-    st = SemanticTarget(entlist, crawler.identifiableAdapter)
+    st = SyncGraph(entlist, crawler.identifiableAdapter)
     insert, update = crawler.split_into_inserts_and_updates(st)
 
     assert update[0].get_property("bar").value == 2
@@ -482,7 +482,7 @@ a: ([b1, b2])
 
     crawler = Crawler(identifiableAdapter=ident_adapter)
 
-    st = SemanticTarget(deepcopy([rec_a, *rec_b, *rec_c]), crawler.identifiableAdapter)
+    st = SyncGraph(deepcopy([rec_a, *rec_b, *rec_c]), crawler.identifiableAdapter)
     assert st.identity_relies_on_unchecked_entity(st.se[0]) is False
     assert st.identity_relies_on_unchecked_entity(st.se[1])
     assert st.identity_relies_on_unchecked_entity(st.se[2])
@@ -701,14 +701,14 @@ def test_split_into_inserts_and_updates_backref(crawler_mocked_for_backref_test)
     referenced = db.Record(name="B").add_parent("C")
     entlist = [referenced, db.Record(name="A").add_parent("BR").add_property("ref", referenced), ]
 
-    st = SemanticTarget([db.Record(name="B").add_parent("C")], crawler.identifiableAdapter)
+    st = SyncGraph([db.Record(name="B").add_parent("C")], crawler.identifiableAdapter)
     # Test without referencing object
     # currently a RuntimeError is raised if necessary properties are missing.
     with raises(RuntimeError):
         crawler.split_into_inserts_and_updates(st)
 
     # identifiables were not yet checked
-    st = SemanticTarget(entlist, crawler.identifiableAdapter)
+    st = SyncGraph(entlist, crawler.identifiableAdapter)
     assert st.get_checked_equivalent(st.se[1]) is None
     assert st.get_checked_equivalent(st.se[0]) is None
     # one can be found remotely, one not
@@ -739,7 +739,7 @@ def test_split_into_inserts_and_updates_mult_backref(crawler_mocked_for_backref_
                ]
 
     # test whether both entities are listed in the backref attribute of the identifiable
-    st = SemanticTarget(entlist, crawler.identifiableAdapter)
+    st = SyncGraph(entlist, crawler.identifiableAdapter)
 
     identifiable = crawler.identifiableAdapter.get_identifiable(
         st.se[0],
@@ -764,7 +764,7 @@ def test_split_into_inserts_and_updates_diff_backref(crawler_mocked_for_backref_
                ]
 
     # test whether both entities are listed in the backref attribute of the identifiable
-    st = SemanticTarget(entlist, crawler.identifiableAdapter)
+    st = SyncGraph(entlist, crawler.identifiableAdapter)
     identifiable = crawler.identifiableAdapter.get_identifiable(
         st.se[0],
         st.backward_id_referenced_by[st.se[0].uuid])
@@ -928,7 +928,7 @@ def test_detect_circular_dependency(crawler_mocked_identifiable_retrieve, caplog
     assert [id(el) for el in circle] == [id(el) for el in [a, c, b, a]]
 
     assert Crawler.detect_circular_dependency([d]) is None
-    st = SemanticTarget(flat, crawler.identifiableAdapter)
+    st = SyncGraph(flat, crawler.identifiableAdapter)
     with raises(RuntimeError):
         _, _ = crawler.split_into_inserts_and_updates(st)
     caplog.set_level(logging.ERROR, logger="caoscrawler.converters")
@@ -1015,7 +1015,7 @@ def test_replace_name_with_referenced_entity():
 
 def test_treated_record_lookup():
     ident_adapter = CaosDBIdentifiableAdapter()
-    trlu = SemanticTarget([db.Record().add_parent(
+    trlu = SyncGraph([db.Record().add_parent(
         'A'), db.Record().add_parent('A'), db.File()], ident_adapter)
     exist = trlu.se[0]
     miss = trlu.se[1]
@@ -1074,5 +1074,5 @@ def test_merge_entity_with_identifying_reference(crawler_mocked_identifiable_ret
     b = db.Record(name='b').add_parent("C")
     c = db.Record(name='b').add_parent("C").add_property(name="C", value=a)
     flat = [a, c, b]
-    st = SemanticTarget(flat, crawler.identifiableAdapter)
+    st = SyncGraph(flat, crawler.identifiableAdapter)
     _, _ = crawler.split_into_inserts_and_updates(st)
diff --git a/unittests/test_file_identifiables.py b/unittests/test_file_identifiables.py
index 87afe916..29ad1c62 100644
--- a/unittests/test_file_identifiables.py
+++ b/unittests/test_file_identifiables.py
@@ -8,7 +8,7 @@ import caosdb as db
 import pytest
 from caoscrawler.identifiable import Identifiable
 from caoscrawler.identifiable_adapters import LocalStorageIdentifiableAdapter
-from caoscrawler.semantic_target import SemanticEntity
+from caoscrawler.sync_graph import SemanticEntity
 from caosdb.cached import cache_clear
 from caosdb.exceptions import EmptyUniqueQueryError
 from pytest import raises
diff --git a/unittests/test_identifiable_adapters.py b/unittests/test_identifiable_adapters.py
index dd6f40af..6f10189f 100644
--- a/unittests/test_identifiable_adapters.py
+++ b/unittests/test_identifiable_adapters.py
@@ -37,7 +37,7 @@ from caoscrawler.identifiable import Identifiable
 from caoscrawler.identifiable_adapters import (CaosDBIdentifiableAdapter,
                                                IdentifiableAdapter,
                                                convert_value)
-from caoscrawler.semantic_target import SemanticEntity
+from caoscrawler.sync_graph import SemanticEntity
 
 UNITTESTDIR = Path(__file__).parent
 
diff --git a/unittests/test_semantic_target.py b/unittests/test_semantic_target.py
index 14bd8452..e2d1509a 100644
--- a/unittests/test_semantic_target.py
+++ b/unittests/test_semantic_target.py
@@ -23,7 +23,7 @@ from unittest.mock import MagicMock, Mock, patch
 
 import linkahead as db
 from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
-from caoscrawler.semantic_target import SemanticEntity, SemanticTarget
+from caoscrawler.sync_graph import SemanticEntity, SyncGraph
 
 from test_crawler import basic_retrieve_by_name_mock_up, mock_get_entity_by
 
@@ -33,7 +33,7 @@ def test_create_flat_list():
     b = db.Record()
     a.add_property(name="a", value=a)
     a.add_property(name="b", value=b)
-    flat = SemanticTarget._create_flat_list([a])
+    flat = SyncGraph._create_flat_list([a])
     assert len(flat) == 2
     assert a in flat
     assert b in flat
@@ -41,7 +41,7 @@ def test_create_flat_list():
     c.add_property(name="a", value=a)
     # This would caus recursion if it is not dealt with properly.
     a.add_property(name="c", value=c)
-    flat = SemanticTarget._create_flat_list([c])
+    flat = SyncGraph._create_flat_list([c])
     assert len(flat) == 3
     assert a in flat
     assert b in flat
@@ -56,7 +56,7 @@ def test_create_reference_mapping():
 
     (forward_references, backward_references, forward_id_references,
      backward_id_references, forward_id_referenced_by,
-     backward_id_referenced_by) = SemanticTarget._create_reference_mapping(
+     backward_id_referenced_by) = SyncGraph._create_reference_mapping(
         ses,
         {id(a): ses[0], id(b): ses[1]}
     )
@@ -94,12 +94,12 @@ def test_create_reference_mapping():
     assert backward_id_referenced_by[ses[1].uuid] == set()
 
 
-def test_SemanticTarget():
+def test_SyncGraph():
     a = db.Record().add_parent("A")
     ident_a = db.RecordType().add_parent("A").add_property("prop_ident")
     ident_adapter = CaosDBIdentifiableAdapter()
     ident_adapter.register_identifiable("A", ident_a)
-    st = SemanticTarget([a], ident_adapter)
+    st = SyncGraph([a], ident_adapter)
 
 
 def test_merge_into():
@@ -118,7 +118,7 @@ def test_merge_into():
     ident_adapter.register_identifiable("RT1", ident_a)
     ident_adapter.register_identifiable("RT2", ident_b)
 
-    st = SemanticTarget([a, b], ident_adapter)
+    st = SyncGraph([a, b], ident_adapter)
     se_a = st.se_lookup[id(a)]
     se_b = st.se_lookup[id(b)]
     se_c = st.se_lookup[id(c)]
@@ -188,7 +188,7 @@ def test_merge_into():
     a = db.Record().add_parent("RT1").add_property('RT2', c)
     b = db.Record().add_parent("RT1").add_property('RT2', c)
 
-    st = SemanticTarget([a, b], ident_adapter)
+    st = SyncGraph([a, b], ident_adapter)
     se_a = st.se_lookup[id(a)]
     se_b = st.se_lookup[id(b)]
     se_c = st.se_lookup[id(c)]
@@ -273,11 +273,11 @@ def test_backward_id_referenced_by():
     referenced = db.Record(name="B").add_parent("C")
     entlist = [referenced, db.Record(name="A").add_parent("BR").add_property("ref", referenced), ]
 
-    st = SemanticTarget(entlist, ident_adapter)
+    st = SyncGraph(entlist, ident_adapter)
     assert st.se[1] in st.backward_id_referenced_by[st.se[0].uuid]
 
 
-@patch("caoscrawler.semantic_target.cached_get_entity_by",
+@patch("caoscrawler.sync_graph.cached_get_entity_by",
        new=Mock(side_effect=mock_get_entity_by))
 def test_merging():
     # identifying information can be given at various locations in the hierachical tree
@@ -293,7 +293,7 @@ def test_merging():
     entlist = [
         db.Record(id=101).add_parent("A"),
         db.Record(id=101).add_parent("A")]
-    st = SemanticTarget(entlist, ident_adapter)
+    st = SyncGraph(entlist, ident_adapter)
     assert len(st.se) == 1
     assert len(st.unchecked) == 0
     assert entlist[0] in st.se[0].fragments
@@ -303,7 +303,7 @@ def test_merging():
     entlist = [
         db.File(path='101').add_parent("A"),
         db.File(path='101').add_parent("A")]
-    st = SemanticTarget(entlist, ident_adapter)
+    st = SyncGraph(entlist, ident_adapter)
     assert len(st.se) == 1
     assert len(st.unchecked) == 0
     assert entlist[0] in st.se[0].fragments
@@ -313,7 +313,7 @@ def test_merging():
     entlist = [
         db.File(name='101').add_parent("A").add_property('a', value=1),
         db.File(name='101').add_parent("A").add_property('a', value=1)]
-    st = SemanticTarget(entlist, ident_adapter)
+    st = SyncGraph(entlist, ident_adapter)
     assert len(st.unchecked) == 2
     st.make_identifiable(st.se[0])
     st.check_remote_server(st.se[0])
@@ -335,7 +335,7 @@ def test_merging():
         db.Record(id=101).add_parent("A").add_property('a', value=1),
         db.Record(name='a').add_parent("A").add_property('a', value=1)]
 
-    st = SemanticTarget(entlist, ident_adapter)
+    st = SyncGraph(entlist, ident_adapter)
     assert len(st.se) == 2
     assert len(st.unchecked) == 1
     st.make_identifiable(st.se[1])
-- 
GitLab