diff --git a/src/doc/getting_started/furtherreading.rst b/src/doc/getting_started/furtherreading.rst index eb600416c1fce3857d28fc2e856ceabebb3a8bb7..8d8d3ecc4b5575f71e90e9e5a17b060a63403a07 100644 --- a/src/doc/getting_started/furtherreading.rst +++ b/src/doc/getting_started/furtherreading.rst @@ -6,3 +6,4 @@ Further reading - Some useful examples can be found in the `integration tests <https://gitlab.com/caosdb/caosdb-crawler/-/tree/main/integrationtests>`_ (and to a certain extent in the unit tests). +- TODO: Information on caching diff --git a/unittests/test_crawler.py b/unittests/test_crawler.py index cf6129c39a62778c26b6ae78710bb79461c6e30f..e0c2631e39c1a946492d26b4236101aca258ce59 100644 --- a/unittests/test_crawler.py +++ b/unittests/test_crawler.py @@ -113,6 +113,14 @@ def mock_get_entity_by(eid=None, name=None, path=None): raise EmptyUniqueQueryError("") +def basic_retrieve_by_name_mock_up(rec, referencing_entities=None, known=None): + """ returns a stored Record if rec.name is an existing key, None otherwise """ + if rec.name in known: + return known[rec.name] + else: + return None + + def mock_retrieve_record(identifiable: Identifiable): """ assumes that the identifiable is always only the date""" @@ -159,22 +167,6 @@ def clear_cache(): cache_clear() -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_constructor(): - with warnings.catch_warnings(record=True) as w: - # Cause all warnings to always be triggered. - warnings.filterwarnings("ignore") - warnings.filterwarnings("always", category=DeprecationWarning) - - Crawler(debug=True) - assert issubclass(w[-1].category, DeprecationWarning) - assert "The debug argument of the Crawler class" in str(w[-1].message) - - Crawler(generalStore=GeneralStore()) - assert issubclass(w[-1].category, DeprecationWarning) - assert "The generalStore argument of the Crawler" in str(w[-1].message) - - @pytest.fixture def crawler_mocked_identifiable_retrieve(): crawler = Crawler() @@ -193,8 +185,55 @@ def crawler_mocked_identifiable_retrieve(): return crawler +@pytest.fixture +def crawler_mocked_for_backref_test(): + crawler = Crawler() + # mock retrieval of registered identifiabls: return Record with just a parent + + def get_reg_ident(x): + if x.parents[0].name == "C": + return db.Record().add_parent(x.parents[0].name).add_property( + "is_referenced_by", value=["BR"]).add_property("name") + elif x.parents[0].name == "D": + return db.Record().add_parent(x.parents[0].name).add_property( + "is_referenced_by", value=["BR", "BR2"]).add_property("name") + else: + return db.Record().add_parent(x.parents[0].name).add_property("name") + crawler.identifiableAdapter.get_registered_identifiable = Mock(side_effect=get_reg_ident) + + # Simulate remote server content by using the names to identify records + # There is only a single known Record with name A + crawler.identifiableAdapter.retrieve_identified_record_for_record = Mock(side_effect=partial( + basic_retrieve_by_name_mock_up, known={"A": + db.Record(id=1111, name="A").add_parent("BR")})) + crawler.identifiableAdapter.retrieve_identified_record_for_identifiable = Mock( + side_effect=partial( + basic_retrieve_by_name_mock_up, known={"A": + db.Record(id=1111, name="A").add_parent("BR")})) + return crawler + + +@pytest.mark.filterwarnings("ignore::DeprecationWarning") +def test_constructor(): + # tests that appropriate DeprecationWarnings are triggered by the constructor when deprecated + # arguments are being passed. + with warnings.catch_warnings(record=True) as w: + # Cause all warnings to always be triggered. + warnings.filterwarnings("ignore") + warnings.filterwarnings("always", category=DeprecationWarning) + + Crawler(debug=True) + assert issubclass(w[-1].category, DeprecationWarning) + assert "The debug argument of the Crawler class" in str(w[-1].message) + + Crawler(generalStore=GeneralStore()) + assert issubclass(w[-1].category, DeprecationWarning) + assert "The generalStore argument of the Crawler" in str(w[-1].message) + + @pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_deprecated_functions(): + # tests that appropriate DeprecationWarnings are triggered by deprecated methods with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered. warnings.filterwarnings("ignore") @@ -239,38 +278,37 @@ def test_check_whether_parent_exists(): def test_remove_unnecessary_updates(): # test trvial case - upl = [db.Record().add_parent("A")] - irs = [db.Record().add_parent("A")] - updates = Crawler.remove_unnecessary_updates(upl, irs) + crawled_data = [db.Record().add_parent("A")] + identified_records = [db.Record().add_parent("A")] + updates = Crawler.remove_unnecessary_updates(crawled_data, identified_records) assert len(updates) == 0 # test property difference case - # TODO this should work right? - # upl = [db.Record().add_parent("A").add_property("a", 3)] - # irs = [db.Record().add_parent("A")] # ID should be s - # Crawler.remove_unnecessary_updates(upl, irs) - # assert len(upl) == 1 + crawled_data = [db.Record().add_parent("A").add_property("a", 3)] + identified_records = [db.Record().add_parent("A")] # ID should be s + Crawler.remove_unnecessary_updates(crawled_data, identified_records) + assert len(crawled_data) == 1 # test value difference case - upl = [db.Record().add_parent("A").add_property("a", 5)] - irs = [db.Record().add_parent("A").add_property("a")] - updates = Crawler.remove_unnecessary_updates(upl, irs) + crawled_data = [db.Record().add_parent("A").add_property("a", 5)] + identified_records = [db.Record().add_parent("A").add_property("a")] + updates = Crawler.remove_unnecessary_updates(crawled_data, identified_records) assert len(updates) == 1 - upl = [db.Record().add_parent("A").add_property("a", 5)] - irs = [db.Record().add_parent("A").add_property("a", 5)] - updates = Crawler.remove_unnecessary_updates(upl, irs) + crawled_data = [db.Record().add_parent("A").add_property("a", 5)] + identified_records = [db.Record().add_parent("A").add_property("a", 5)] + updates = Crawler.remove_unnecessary_updates(crawled_data, identified_records) assert len(updates) == 0 # test unit difference case - upl = [db.Record().add_parent("A").add_property("a", unit='cm')] - irs = [db.Record().add_parent("A").add_property("a")] - updates = Crawler.remove_unnecessary_updates(upl, irs) + crawled_data = [db.Record().add_parent("A").add_property("a", unit='cm')] + identified_records = [db.Record().add_parent("A").add_property("a")] + updates = Crawler.remove_unnecessary_updates(crawled_data, identified_records) assert len(updates) == 1 # test None difference case - upl = [db.Record().add_parent("A").add_property("a")] - irs = [db.Record().add_parent("A").add_property("a", 5)] - updates = Crawler.remove_unnecessary_updates(upl, irs) + crawled_data = [db.Record().add_parent("A").add_property("a")] + identified_records = [db.Record().add_parent("A").add_property("a", 5)] + updates = Crawler.remove_unnecessary_updates(crawled_data, identified_records) assert len(updates) == 1 @@ -280,28 +318,16 @@ def test_split_into_inserts_and_updates_trivial(): crawler.split_into_inserts_and_updates(st) -def test_split_into_inserts_and_updates_unidentified(crawler_mocked_identifiable_retrieve): - crawler = crawler_mocked_identifiable_retrieve - with raises(MissingIdentifyingProperty) as err: - st = SyncGraph([db.Record().add_parent("someparent")], crawler.identifiableAdapter) - assert str(err.value).startswith("The node has no name.") - - -def basic_retrieve_by_name_mock_up(rec, referencing_entities=None, known=None): - """ returns a stored Record if rec.name is an existing key, None otherwise """ - if rec.name in known: - return known[rec.name] - else: - return None - - -def test_split_into_inserts_and_updates_single(crawler_mocked_identifiable_retrieve): +def test_split_into_inserts_and_updates_simple(crawler_mocked_identifiable_retrieve): + # basic test that checks whether two records are correctly sorted to update and insert based on + # whether an entity can be found using the identifiable crawler = crawler_mocked_identifiable_retrieve identlist = [Identifiable(name="A", record_type="C"), Identifiable(name="B", record_type="C")] entlist = [db.Record(name="A").add_parent("C"), db.Record(name="B").add_parent("C")] st = SyncGraph(entlist, crawler.identifiableAdapter) + # check setup assert crawler.identifiableAdapter.retrieve_identified_record_for_record( identlist[0]).id == 1111 assert crawler.identifiableAdapter.retrieve_identified_record_for_record( @@ -317,43 +343,6 @@ def test_split_into_inserts_and_updates_single(crawler_mocked_identifiable_retri crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called() -def test_split_into_inserts_and_updates_with_duplicate(crawler_mocked_identifiable_retrieve): - crawler = crawler_mocked_identifiable_retrieve - a = db.Record(name="A").add_parent("C") - b = db.Record(name="B").add_parent("C") - b.add_property("A", a) - # This is identical to a and should be removed - c = db.Record(name="A").add_parent("C") - entlist = [a, b, c] - st = SyncGraph(entlist, crawler.identifiableAdapter) - insert, update = crawler.split_into_inserts_and_updates(st) - assert len(insert) == 1 - assert insert[0].name == "B" - assert len(update) == 1 - assert update[0].name == "A" - # if this ever fails, the mock up may be removed - crawler.identifiableAdapter.get_registered_identifiable.assert_called() - crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called() - - -def test_split_into_inserts_and_updates_with_ref(crawler_mocked_identifiable_retrieve): - crawler = crawler_mocked_identifiable_retrieve - # try it with a reference - a = db.Record(name="A").add_parent("C") - b = db.Record(name="B").add_parent("C") - b.add_property("A", a) - entlist = [a, b] - st = SyncGraph(entlist, crawler.identifiableAdapter) - insert, update = crawler.split_into_inserts_and_updates(st) - assert len(insert) == 1 - assert insert[0].name == "B" - assert len(update) == 1 - assert update[0].name == "A" - # if this ever fails, the mock up may be removed - crawler.identifiableAdapter.get_registered_identifiable.assert_called() - crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called() - - def test_split_into_inserts_and_updates_with_circ(): # try circular a = db.Record(name="A").add_parent("C") @@ -649,34 +638,6 @@ def test_security_mode(updateCacheMock, upmock, insmock): crawled_data[-1] = EXAMPLE_SERVER_STATE[-1].copy() -@ pytest.fixture -def crawler_mocked_for_backref_test(): - crawler = Crawler() - # mock retrieval of registered identifiabls: return Record with just a parent - - def get_reg_ident(x): - if x.parents[0].name == "C": - return db.Record().add_parent(x.parents[0].name).add_property( - "is_referenced_by", value=["BR"]).add_property("name") - elif x.parents[0].name == "D": - return db.Record().add_parent(x.parents[0].name).add_property( - "is_referenced_by", value=["BR", "BR2"]).add_property("name") - else: - return db.Record().add_parent(x.parents[0].name).add_property("name") - crawler.identifiableAdapter.get_registered_identifiable = Mock(side_effect=get_reg_ident) - - # Simulate remote server content by using the names to identify records - # There is only a single known Record with name A - crawler.identifiableAdapter.retrieve_identified_record_for_record = Mock(side_effect=partial( - basic_retrieve_by_name_mock_up, known={"A": - db.Record(id=1111, name="A").add_parent("BR")})) - crawler.identifiableAdapter.retrieve_identified_record_for_identifiable = Mock( - side_effect=partial( - basic_retrieve_by_name_mock_up, known={"A": - db.Record(id=1111, name="A").add_parent("BR")})) - return crawler - - def test_validation_error_print(caplog): caplog.set_level(logging.DEBUG, logger="caoscrawler.converters") # there should be no server interaction since we only test the behavior if a validation error diff --git a/unittests/test_sync_graph.py b/unittests/test_sync_graph.py index d477e65a603ff0b98e179d6a1b5064d23e958d66..1fdab27b15a34939376d6b015fc8d7a185efb671 100644 --- a/unittests/test_sync_graph.py +++ b/unittests/test_sync_graph.py @@ -486,10 +486,10 @@ def test_merging(simple_adapter): assert '101' == st.nodes[0].path assert "A" == st.nodes[0].parents[0].name - # merging based on identifiable + # merging based on identifiable (non identifying properties are ignored) ent_list = [ - db.File(name='101').add_parent("A").add_property('a', value=1), - db.File(name='101').add_parent("A").add_property('a', value=1)] + db.File(name='101').add_parent("A").add_property('a', value=1).add_property('b', value=1), + db.File(name='101').add_parent("A").add_property('a', value=1).add_property('b', value=2)] st = SyncGraph(ent_list, ident_adapter) assert len(st.nodes) == 1 assert st.nodes[0].id is None @@ -539,7 +539,10 @@ def test_merging(simple_adapter): assert len(st.unchecked) == 0 -def test_something(simple_adapter): +def test_update_of_reference_values(simple_adapter): + # multiple nodes are merged including one that is referenced + # assure that this still leads to the value of the property of the referencing node to be + # updated, when the id is set. (Value object is replaced appropriately) a = db.Record().add_parent("RT3").add_property('a', value=1) ent_list = [ a, @@ -557,3 +560,25 @@ def test_something(simple_adapter): st.set_id_of_node(st.nodes[0], 101) b_prop = st.nodes[1].properties[0].value assert b_prop.id == 101 + + +def test_ignoring_irrelevant_references(simple_adapter): + # make sure that a circle of references is no problem if one references is not identifying + b = db.Record(name='b').add_parent("RT5") + a = db.Record().add_parent("RT3").add_property('a', value=b) + b.add_property('a', value=a) + ent_list = [a, b] + st = SyncGraph(ent_list, simple_adapter) + assert len(st.nodes) == 2 + assert len(st.unchecked) == 2 + assert st.nodes[1].name == 'b' + + # a relies on b + assert st._identity_relies_on_unchecked_entity(st.nodes[0]) + # b relies on nothing + assert not st._identity_relies_on_unchecked_entity(st.nodes[1]) + # set ID of b + st.set_id_of_node(st.nodes[1], 101) + assert len(st.unchecked) == 1 + # now a nolonger relies on unchecked + assert not st._identity_relies_on_unchecked_entity(st.nodes[0])