diff --git a/integrationtests/basic_example/test_basic.py b/integrationtests/basic_example/test_basic.py index 55ba8c2267b96560212a1ecffbdafcd84908a519..c906a81d86af56669f7c522169bceb3b5fcb3e01 100755 --- a/integrationtests/basic_example/test_basic.py +++ b/integrationtests/basic_example/test_basic.py @@ -139,7 +139,6 @@ def test_single_insertion(clear_database, usemodel, crawler, ident): # xml.remove(xml.find(tag)) # f.write(db.common.utils.xml2str(xml)) - breakpoint() assert len(ins) == 18 assert len(ups) == 0 diff --git a/src/caoscrawler/identifiable_adapters.py b/src/caoscrawler/identifiable_adapters.py index dcf5babddcc66b920973f76d0d48ab3ee4e6a230..903327424e8cbf0c93ec9c6fc1bbba2fb6612f48 100644 --- a/src/caoscrawler/identifiable_adapters.py +++ b/src/caoscrawler/identifiable_adapters.py @@ -295,7 +295,8 @@ startswith: bool, optional if isinstance(el, db.Entity): options[ii] = el.id if el.id is None: - raise RuntimeError("reference to unchecked in identifiable") + raise RuntimeError("Reference to unchecked in identifiable:\n" + f"{prop.name}:\n{el}") else: options[ii] = el if not all([f == options[0] for f in options]): diff --git a/src/caoscrawler/semantic_target.py b/src/caoscrawler/semantic_target.py index 5a852eb19c8d31431c5464771cf775363f61fbf5..3c8112e0afe14cabc30c2f5f31bbf55015e7dd98 100644 --- a/src/caoscrawler/semantic_target.py +++ b/src/caoscrawler/semantic_target.py @@ -123,9 +123,7 @@ class SemanticTarget(): self.se_lookup: Dict[str, SemanticEntity] = {} # lookup: UUID -> SemanticEntity entities = self._create_flat_list(entities) self._sanity_check(entities) - print("ids") for el in entities: - print(el.id) self.se.append(SemanticEntity( el, self.identifiableAdapter.get_registered_identifiable(el))) @@ -206,10 +204,6 @@ class SemanticTarget(): return self._id_look_up[entity.id] if entity.path is not None and entity.path in self._path_look_up: return self._path_look_up[entity.path] - for e in self.se: - if e.identifiable is not None: - print(e.identifiable._create_hashable_string(e.identifiable)) - print(self._identifiable_look_up) if (entity.identifiable is not None and entity.identifiable.get_representation() in self._identifiable_look_up): return self._identifiable_look_up[entity.identifiable.get_representation()] @@ -440,7 +434,6 @@ class SemanticTarget(): """ A path or an ID is sufficiently identifying. Thus, those entities can be marked as checked """ for semantic_entity in list(self.se[::-1]): - print(semantic_entity.uuid) assert len(semantic_entity.fragments) == 1 entity = semantic_entity.fragments[0] if entity.id is None and entity.path is None: @@ -504,6 +497,8 @@ class SemanticTarget(): """ for f in source.fragments: self.se_lookup[id(f)] = target + f.id = target.id + f.path = target.path target.include(source) if target.identifiable is None and not self.identity_relies_on_unchecked_entity(target): try: diff --git a/unittests/test_crawler.py b/unittests/test_crawler.py index 85564705adff2646a618e1e781341cfe6309aeb0..f5552918cd5f21047fc8088d04c42f024c128d27 100644 --- a/unittests/test_crawler.py +++ b/unittests/test_crawler.py @@ -556,7 +556,6 @@ def test_synchronization_no_commit(upmock, insmock): def test_security_mode(updateCacheMock, upmock, insmock): # trivial case: nothing to do crawled_data = [r.copy() for r in EXAMPLE_SERVER_STATE if r.role == "Record"] - print(crawled_data) crawler = Crawler(securityMode=SecurityMode.RETRIEVE) crawler.synchronize(commit_changes=True, crawled_data=crawled_data) assert crawler.run_id is not None