From 7bff47f89dd0f9f27ee9ea7cf7edcd52b714d06b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Thu, 18 Apr 2024 16:49:08 +0200 Subject: [PATCH] rename --- src/caoscrawler/crawl.py | 6 ++-- src/caoscrawler/identifiable.py | 1 - .../{semantic_target.py => sync_graph.py} | 15 +++++++-- unittests/test_crawler.py | 32 +++++++++---------- unittests/test_file_identifiables.py | 2 +- unittests/test_identifiable_adapters.py | 2 +- unittests/test_semantic_target.py | 28 ++++++++-------- 7 files changed, 47 insertions(+), 39 deletions(-) rename src/caoscrawler/{semantic_target.py => sync_graph.py} (97%) diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index d545403f..546f5c2e 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -69,7 +69,7 @@ from .logging import configure_server_side_logging from .macros import defmacro_constructor, macro_constructor from .scanner import (create_converter_registry, initialize_converters, load_definition, scan_directory, scan_structure_elements) -from .semantic_target import SemanticTarget +from .sync_graph import SyncGraph from .stores import GeneralStore from .structure_elements import StructureElement @@ -352,7 +352,7 @@ class Crawler(object): # ) # p.value = cached - def split_into_inserts_and_updates(self, st: SemanticTarget): + def split_into_inserts_and_updates(self, st: SyncGraph): entity_was_treated = True # st.entities contains Entities which could not yet be checked against the remote server @@ -730,7 +730,7 @@ class Crawler(object): crawled_data = self.crawled_data to_be_inserted, to_be_updated = self.split_into_inserts_and_updates( - SemanticTarget(crawled_data, self.identifiableAdapter)) + SyncGraph(crawled_data, self.identifiableAdapter)) for el in to_be_updated: # all entity objects are replaced by their IDs except for the not yet inserted ones diff --git a/src/caoscrawler/identifiable.py b/src/caoscrawler/identifiable.py index e8ee521d..02eba44a 100644 --- a/src/caoscrawler/identifiable.py +++ b/src/caoscrawler/identifiable.py @@ -96,7 +96,6 @@ class Identifiable(): if value.id is not None: return str(value.id) else: - print(value) raise RuntimeError("Python Entity without id not allowed") elif isinstance(value, list): return "[" + ", ".join([Identifiable._value_representation(el) for el in value]) + "]" diff --git a/src/caoscrawler/semantic_target.py b/src/caoscrawler/sync_graph.py similarity index 97% rename from src/caoscrawler/semantic_target.py rename to src/caoscrawler/sync_graph.py index 2396446f..5993ed09 100644 --- a/src/caoscrawler/semantic_target.py +++ b/src/caoscrawler/sync_graph.py @@ -84,7 +84,7 @@ class SemanticEntity(): assert self.path == source.path -class SemanticTarget(): +class SyncGraph(): """ models the target structure of Entities as it shall be created by the Crawler The target entities are composed using the information of the entity fragments (db.Entity @@ -317,11 +317,11 @@ class SemanticTarget(): if isinstance(el, db.Entity): if el not in flat: flat.append(el) - SemanticTarget._create_flat_list([el], flat) + SyncGraph._create_flat_list([el], flat) elif isinstance(p.value, db.Entity): if p.value not in flat: flat.append(p.value) - SemanticTarget._create_flat_list([p.value], flat) + SyncGraph._create_flat_list([p.value], flat) return flat @ staticmethod @@ -454,6 +454,15 @@ class SemanticTarget(): else: self._merge_into(semantic_entity, treated_before) + def _remove_non_identifiables(self): + """ A path or an ID is sufficiently identifying. Thus, those entities can be marked as + checked """ + for semantic_entity in list(self.se[::-1]): + if "nonidentifiable" in [p.name for p in + semantic_entity.registered_identifiable.properties]: + + self.unchecked.remove(semantic_entity) + def detect_circular_dependency(self): """ Detects whether there are circular references in the given entity list and returns a list diff --git a/unittests/test_crawler.py b/unittests/test_crawler.py index f5552918..fdae024b 100644 --- a/unittests/test_crawler.py +++ b/unittests/test_crawler.py @@ -48,7 +48,7 @@ from caoscrawler.identifiable_adapters import (CaosDBIdentifiableAdapter, LocalStorageIdentifiableAdapter) from caoscrawler.scanner import (create_converter_registry, scan_directory, scan_structure_elements) -from caoscrawler.semantic_target import SemanticTarget +from caoscrawler.sync_graph import SyncGraph from caoscrawler.stores import GeneralStore, RecordStore from caoscrawler.structure_elements import (DictElement, DictListElement, DictTextElement, File) @@ -255,13 +255,13 @@ def test_remove_unnecessary_updates(): def test_split_into_inserts_and_updates_trivial(): crawler = Crawler() - st = SemanticTarget([], crawler.identifiableAdapter) + st = SyncGraph([], crawler.identifiableAdapter) crawler.split_into_inserts_and_updates(st) def test_split_into_inserts_and_updates_unidentified(): crawler = Crawler() - st = SemanticTarget([db.Record(name="recname").add_parent("someparent")], + st = SyncGraph([db.Record(name="recname").add_parent("someparent")], crawler.identifiableAdapter) with raises(ValueError) as err: crawler.split_into_inserts_and_updates(st) @@ -300,7 +300,7 @@ def test_split_into_inserts_and_updates_single(crawler_mocked_identifiable_retri entlist = [db.Record(name="A").add_parent("C"), db.Record(name="B").add_parent("C")] - st = SemanticTarget(entlist, crawler.identifiableAdapter) + st = SyncGraph(entlist, crawler.identifiableAdapter) assert st.get_checked_equivalent(st.se[0]) is None assert st.get_checked_equivalent(st.se[0]) is None assert not st.identity_relies_on_unchecked_entity(st.se[0]) @@ -328,7 +328,7 @@ def test_split_into_inserts_and_updates_with_duplicate(crawler_mocked_identifiab # This is identical to a and should be removed c = db.Record(name="A").add_parent("C") entlist = [a, b, c] - st = SemanticTarget(entlist, crawler.identifiableAdapter) + st = SyncGraph(entlist, crawler.identifiableAdapter) insert, update = crawler.split_into_inserts_and_updates(st) assert len(insert) == 1 assert insert[0].name == "B" @@ -346,7 +346,7 @@ def test_split_into_inserts_and_updates_with_ref(crawler_mocked_identifiable_ret b = db.Record(name="B").add_parent("C") b.add_property("A", a) entlist = [a, b] - st = SemanticTarget(entlist, crawler.identifiableAdapter) + st = SyncGraph(entlist, crawler.identifiableAdapter) insert, update = crawler.split_into_inserts_and_updates(st) assert len(insert) == 1 assert insert[0].name == "B" @@ -382,7 +382,7 @@ def test_split_into_inserts_and_updates_with_complex(crawler_mocked_identifiable b.add_property("A", f) b.add_property("A", a) entlist = [a, b, g] - st = SemanticTarget(entlist, crawler.identifiableAdapter) + st = SyncGraph(entlist, crawler.identifiableAdapter) insert, update = crawler.split_into_inserts_and_updates(st) assert len(insert) == 3 assert "B" in [el.name for el in insert] @@ -403,7 +403,7 @@ def test_split_into_inserts_and_updates_with_copy_attr(crawler_mocked_identifiab b = db.Record(name="A").add_parent("C") b.add_property("bar", 2) entlist = [a, b] - st = SemanticTarget(entlist, crawler.identifiableAdapter) + st = SyncGraph(entlist, crawler.identifiableAdapter) insert, update = crawler.split_into_inserts_and_updates(st) assert update[0].get_property("bar").value == 2 @@ -482,7 +482,7 @@ a: ([b1, b2]) crawler = Crawler(identifiableAdapter=ident_adapter) - st = SemanticTarget(deepcopy([rec_a, *rec_b, *rec_c]), crawler.identifiableAdapter) + st = SyncGraph(deepcopy([rec_a, *rec_b, *rec_c]), crawler.identifiableAdapter) assert st.identity_relies_on_unchecked_entity(st.se[0]) is False assert st.identity_relies_on_unchecked_entity(st.se[1]) assert st.identity_relies_on_unchecked_entity(st.se[2]) @@ -701,14 +701,14 @@ def test_split_into_inserts_and_updates_backref(crawler_mocked_for_backref_test) referenced = db.Record(name="B").add_parent("C") entlist = [referenced, db.Record(name="A").add_parent("BR").add_property("ref", referenced), ] - st = SemanticTarget([db.Record(name="B").add_parent("C")], crawler.identifiableAdapter) + st = SyncGraph([db.Record(name="B").add_parent("C")], crawler.identifiableAdapter) # Test without referencing object # currently a RuntimeError is raised if necessary properties are missing. with raises(RuntimeError): crawler.split_into_inserts_and_updates(st) # identifiables were not yet checked - st = SemanticTarget(entlist, crawler.identifiableAdapter) + st = SyncGraph(entlist, crawler.identifiableAdapter) assert st.get_checked_equivalent(st.se[1]) is None assert st.get_checked_equivalent(st.se[0]) is None # one can be found remotely, one not @@ -739,7 +739,7 @@ def test_split_into_inserts_and_updates_mult_backref(crawler_mocked_for_backref_ ] # test whether both entities are listed in the backref attribute of the identifiable - st = SemanticTarget(entlist, crawler.identifiableAdapter) + st = SyncGraph(entlist, crawler.identifiableAdapter) identifiable = crawler.identifiableAdapter.get_identifiable( st.se[0], @@ -764,7 +764,7 @@ def test_split_into_inserts_and_updates_diff_backref(crawler_mocked_for_backref_ ] # test whether both entities are listed in the backref attribute of the identifiable - st = SemanticTarget(entlist, crawler.identifiableAdapter) + st = SyncGraph(entlist, crawler.identifiableAdapter) identifiable = crawler.identifiableAdapter.get_identifiable( st.se[0], st.backward_id_referenced_by[st.se[0].uuid]) @@ -928,7 +928,7 @@ def test_detect_circular_dependency(crawler_mocked_identifiable_retrieve, caplog assert [id(el) for el in circle] == [id(el) for el in [a, c, b, a]] assert Crawler.detect_circular_dependency([d]) is None - st = SemanticTarget(flat, crawler.identifiableAdapter) + st = SyncGraph(flat, crawler.identifiableAdapter) with raises(RuntimeError): _, _ = crawler.split_into_inserts_and_updates(st) caplog.set_level(logging.ERROR, logger="caoscrawler.converters") @@ -1015,7 +1015,7 @@ def test_replace_name_with_referenced_entity(): def test_treated_record_lookup(): ident_adapter = CaosDBIdentifiableAdapter() - trlu = SemanticTarget([db.Record().add_parent( + trlu = SyncGraph([db.Record().add_parent( 'A'), db.Record().add_parent('A'), db.File()], ident_adapter) exist = trlu.se[0] miss = trlu.se[1] @@ -1074,5 +1074,5 @@ def test_merge_entity_with_identifying_reference(crawler_mocked_identifiable_ret b = db.Record(name='b').add_parent("C") c = db.Record(name='b').add_parent("C").add_property(name="C", value=a) flat = [a, c, b] - st = SemanticTarget(flat, crawler.identifiableAdapter) + st = SyncGraph(flat, crawler.identifiableAdapter) _, _ = crawler.split_into_inserts_and_updates(st) diff --git a/unittests/test_file_identifiables.py b/unittests/test_file_identifiables.py index 87afe916..29ad1c62 100644 --- a/unittests/test_file_identifiables.py +++ b/unittests/test_file_identifiables.py @@ -8,7 +8,7 @@ import caosdb as db import pytest from caoscrawler.identifiable import Identifiable from caoscrawler.identifiable_adapters import LocalStorageIdentifiableAdapter -from caoscrawler.semantic_target import SemanticEntity +from caoscrawler.sync_graph import SemanticEntity from caosdb.cached import cache_clear from caosdb.exceptions import EmptyUniqueQueryError from pytest import raises diff --git a/unittests/test_identifiable_adapters.py b/unittests/test_identifiable_adapters.py index dd6f40af..6f10189f 100644 --- a/unittests/test_identifiable_adapters.py +++ b/unittests/test_identifiable_adapters.py @@ -37,7 +37,7 @@ from caoscrawler.identifiable import Identifiable from caoscrawler.identifiable_adapters import (CaosDBIdentifiableAdapter, IdentifiableAdapter, convert_value) -from caoscrawler.semantic_target import SemanticEntity +from caoscrawler.sync_graph import SemanticEntity UNITTESTDIR = Path(__file__).parent diff --git a/unittests/test_semantic_target.py b/unittests/test_semantic_target.py index 14bd8452..e2d1509a 100644 --- a/unittests/test_semantic_target.py +++ b/unittests/test_semantic_target.py @@ -23,7 +23,7 @@ from unittest.mock import MagicMock, Mock, patch import linkahead as db from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter -from caoscrawler.semantic_target import SemanticEntity, SemanticTarget +from caoscrawler.sync_graph import SemanticEntity, SyncGraph from test_crawler import basic_retrieve_by_name_mock_up, mock_get_entity_by @@ -33,7 +33,7 @@ def test_create_flat_list(): b = db.Record() a.add_property(name="a", value=a) a.add_property(name="b", value=b) - flat = SemanticTarget._create_flat_list([a]) + flat = SyncGraph._create_flat_list([a]) assert len(flat) == 2 assert a in flat assert b in flat @@ -41,7 +41,7 @@ def test_create_flat_list(): c.add_property(name="a", value=a) # This would caus recursion if it is not dealt with properly. a.add_property(name="c", value=c) - flat = SemanticTarget._create_flat_list([c]) + flat = SyncGraph._create_flat_list([c]) assert len(flat) == 3 assert a in flat assert b in flat @@ -56,7 +56,7 @@ def test_create_reference_mapping(): (forward_references, backward_references, forward_id_references, backward_id_references, forward_id_referenced_by, - backward_id_referenced_by) = SemanticTarget._create_reference_mapping( + backward_id_referenced_by) = SyncGraph._create_reference_mapping( ses, {id(a): ses[0], id(b): ses[1]} ) @@ -94,12 +94,12 @@ def test_create_reference_mapping(): assert backward_id_referenced_by[ses[1].uuid] == set() -def test_SemanticTarget(): +def test_SyncGraph(): a = db.Record().add_parent("A") ident_a = db.RecordType().add_parent("A").add_property("prop_ident") ident_adapter = CaosDBIdentifiableAdapter() ident_adapter.register_identifiable("A", ident_a) - st = SemanticTarget([a], ident_adapter) + st = SyncGraph([a], ident_adapter) def test_merge_into(): @@ -118,7 +118,7 @@ def test_merge_into(): ident_adapter.register_identifiable("RT1", ident_a) ident_adapter.register_identifiable("RT2", ident_b) - st = SemanticTarget([a, b], ident_adapter) + st = SyncGraph([a, b], ident_adapter) se_a = st.se_lookup[id(a)] se_b = st.se_lookup[id(b)] se_c = st.se_lookup[id(c)] @@ -188,7 +188,7 @@ def test_merge_into(): a = db.Record().add_parent("RT1").add_property('RT2', c) b = db.Record().add_parent("RT1").add_property('RT2', c) - st = SemanticTarget([a, b], ident_adapter) + st = SyncGraph([a, b], ident_adapter) se_a = st.se_lookup[id(a)] se_b = st.se_lookup[id(b)] se_c = st.se_lookup[id(c)] @@ -273,11 +273,11 @@ def test_backward_id_referenced_by(): referenced = db.Record(name="B").add_parent("C") entlist = [referenced, db.Record(name="A").add_parent("BR").add_property("ref", referenced), ] - st = SemanticTarget(entlist, ident_adapter) + st = SyncGraph(entlist, ident_adapter) assert st.se[1] in st.backward_id_referenced_by[st.se[0].uuid] -@patch("caoscrawler.semantic_target.cached_get_entity_by", +@patch("caoscrawler.sync_graph.cached_get_entity_by", new=Mock(side_effect=mock_get_entity_by)) def test_merging(): # identifying information can be given at various locations in the hierachical tree @@ -293,7 +293,7 @@ def test_merging(): entlist = [ db.Record(id=101).add_parent("A"), db.Record(id=101).add_parent("A")] - st = SemanticTarget(entlist, ident_adapter) + st = SyncGraph(entlist, ident_adapter) assert len(st.se) == 1 assert len(st.unchecked) == 0 assert entlist[0] in st.se[0].fragments @@ -303,7 +303,7 @@ def test_merging(): entlist = [ db.File(path='101').add_parent("A"), db.File(path='101').add_parent("A")] - st = SemanticTarget(entlist, ident_adapter) + st = SyncGraph(entlist, ident_adapter) assert len(st.se) == 1 assert len(st.unchecked) == 0 assert entlist[0] in st.se[0].fragments @@ -313,7 +313,7 @@ def test_merging(): entlist = [ db.File(name='101').add_parent("A").add_property('a', value=1), db.File(name='101').add_parent("A").add_property('a', value=1)] - st = SemanticTarget(entlist, ident_adapter) + st = SyncGraph(entlist, ident_adapter) assert len(st.unchecked) == 2 st.make_identifiable(st.se[0]) st.check_remote_server(st.se[0]) @@ -335,7 +335,7 @@ def test_merging(): db.Record(id=101).add_parent("A").add_property('a', value=1), db.Record(name='a').add_parent("A").add_property('a', value=1)] - st = SemanticTarget(entlist, ident_adapter) + st = SyncGraph(entlist, ident_adapter) assert len(st.se) == 2 assert len(st.unchecked) == 1 st.make_identifiable(st.se[1]) -- GitLab