diff --git a/unittests/test_crawler.py b/unittests/test_crawler.py index 14185d4564344b0052f9e41e7cc9b65997fcca8e..677fd1e5e83bc97e27b11240b972017cbf762289 100644 --- a/unittests/test_crawler.py +++ b/unittests/test_crawler.py @@ -52,6 +52,7 @@ from caoscrawler.stores import GeneralStore, RecordStore from caoscrawler.structure_elements import (DictElement, DictListElement, DictTextElement, File) from linkahead.apiutils import compare_entities +from caosadvancedtools.models.parser import parse_model_from_string from linkahead.cached import cache_clear from linkahead.exceptions import EmptyUniqueQueryError from pytest import raises @@ -109,6 +110,47 @@ def mock_get_entity_by(eid=None, name=None, path=None): raise EmptyUniqueQueryError("") +def mock_retrieve_record(identifiable: Identifiable): + """ assumes that the identifiable is always only the date""" + + for record in EXAMPLE_SERVER_STATE: + if (record.role == "Record" and "date" in identifiable.properties + and record.get_property("date").value == identifiable.properties['date']): + return record + return None + + +def mock_cached_only_rt(query_string: str): + """Always return an empty Container""" + result = db.Container() + lo_query = query_string.lower() + if lo_query.startswith("find record ") or lo_query.startswith("find file "): + return result + model = parse_model_from_string(""" +B: + obligatory_properties: + C: + obligatory_properties: + prop_other: + datatype: INTEGER + prop_ident: + datatype: INTEGER +A: + obligatory_properties: + B: + datatype: LIST<B> + prop_ident: +""") + if query_string == "FIND RECORDTYPE 'A'": + model.get_deep("A").id = 1 + return result + [model.get_deep("A")] + if query_string == "FIND RECORDTYPE 'B'": + model.get_deep("A").id = 2 + return result + [model.get_deep("B")] + print(query_string) + raise NotImplementedError("Mock for this case is missing") + + @pytest.fixture(autouse=True) def clear_cache(): cache_clear() @@ -363,6 +405,82 @@ def test_split_into_inserts_and_updates_with_copy_attr(crawler_mocked_identifiab crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called() +@pytest.mark.xfail(reason="https://gitlab.com/linkahead/linkahead-crawler/-/issues/88") +@patch("caoscrawler.identifiable_adapters.cached_query", + new=Mock(side_effect=mock_cached_only_rt)) +def test_split_iiau_with_unmergeable_list_items(): + """Test for meaningful exception when referencing a list of unmergeable entities. + +Datamodel +--------- +A: + B: LIST<B> + prop_ident: INTEGER + +B: + prop_ident: + C: + +C: + prop_other: INTEGER + +Identifiables +------------- + +id_A: [prop_ident] +id_B: [prop_ident, "is_referenced_by: A"] + +Data +---- + +b1: ("same", 23) +b2: ("same", 42) + +a: ([b1, b2]) + """ + prop_ident = db.Property("prop_ident", datatype=db.INTEGER) + prop_other = db.Property("prop_ident", datatype=db.INTEGER) + rt_c = db.RecordType("C").add_property(prop_other) + rt_b = db.RecordType("B").add_property(prop_ident).add_property("C") + rt_a = db.RecordType("A").add_property(prop_ident).add_property("LIST<B>") + + ident_a = db.RecordType().add_parent("A").add_property("prop_ident") + ident_b = db.RecordType().add_parent("B").add_property("prop_ident").add_property( + "is_referenced_by", value="A") + ident_c = db.RecordType().add_parent("C").add_property("prop_other").add_property( + "is_referenced_by", value="B") + + rec_a = db.Record("a").add_parent(rt_a).add_property("prop_ident", value=1234) + rec_b = [] + rec_c = [] + for value in [23, 42]: + new_c = db.Record().add_parent(rt_c).add_property("prop_other", value=value) + rec_c.append(new_c) + rec_b.append(db.Record().add_parent(rt_b).add_property( + "prop_ident", value=2020).add_property("C", value=new_c)) + rec_a.add_property("B", rec_b) + + ident_adapter = CaosDBIdentifiableAdapter() + ident_adapter.register_identifiable("A", ident_a) + ident_adapter.register_identifiable("B", ident_b) + ident_adapter.register_identifiable("C", ident_c) + + crawler = Crawler(identifiableAdapter=ident_adapter) + + # This should give a merge conflict, and not + # "Could not find referencing entities of type(s): A" + + # from IPython import embed; embed() + with raises(RuntimeError) as rte: + crawler.synchronize(commit_changes=False, + crawled_data=[rec_a, *rec_b, *rec_c]) + assert not isinstance(rte.value, NotImplementedError), \ + "Exception must not be NotImplementedError, but plain RuntimeError." + assert "Could not find referencing entities" not in rte.value.args[0] + assert "merging impossible" in rte.something + # crawler.split_into_inserts_and_updates(ent_list=[rec_a, *rec_b, *rec_c]) + + def test_has_missing_object_in_references(): crawler = Crawler() # Simulate remote server content by using the names to identify records @@ -449,16 +567,6 @@ def reset_mocks(mocks): mock.reset_mock() -def mock_retrieve_record(identifiable: Identifiable): - """ assumes that the identifiable is always only the date""" - - for record in EXAMPLE_SERVER_STATE: - if (record.role == "Record" - and record.get_property("date").value == identifiable.properties['date']): - return record - return None - - @ patch("caoscrawler.crawl.cached_get_entity_by", new=Mock(side_effect=mock_get_entity_by)) @ patch("caoscrawler.identifiable_adapters.cached_get_entity_by",