diff --git a/integrationtests/test_issues.py b/integrationtests/test_issues.py index d385dd644e67b791adb2c90ef6f79ec71835e0f7..f0f2a7876f9db85aeff768c3659915ef2b0cb9f5 100644 --- a/integrationtests/test_issues.py +++ b/integrationtests/test_issues.py @@ -16,23 +16,29 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. # -from pytest import fixture +from pytest import fixture, mark import caosdb as db +from caoscrawler.crawl import Crawler +from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter +from caoscrawler.structure_elements import Dict + @fixture def clear_database(): db.execute_query("FIND ENTITY").delete(raise_exception_on_error=False) +@mark.xfail(reason="See issue https://gitlab.com/caosdb/caosdb-crawler/-/issues/23") def test_issue_23(clear_database): """Test that an update leaves existing properties, that were not found by the crawler, unchanged. """ - # insert a simplistic model + # insert a simplistic model an arecord of type TestType with identifying + # property and prop_a, but not prop_b. prop_ident = db.Property(name="identifying_prop", datatype=db.TEXT) prop_a = db.Property(name="prop_a", datatype=db.TEXT) prop_b = db.Property(name="prop_b", datatype=db.TEXT) @@ -41,3 +47,81 @@ def test_issue_23(clear_database): rec.add_property(name="identifying_prop", value="identifier") rec.add_property(name="prop_a", value="something") db.Container().extend([prop_ident, prop_a, prop_b, rt, rec]).insert() + + # set up crawler, first cfood defining a TestType record with + # identifying_prop and prop_b, but not prop_a ... + crawler_definition = { + "DictTest": { + "type": "Dict", + "match": "(.*)", + "records": { + "TestType": {} + }, + "subtree": { + "identifying_element": { + "type": "DictTextElement", + "match_name": "ident", + "match_value": "(?P<ident_value>.*)", + "records": { + "TestType": { + "identifying_prop": "$ident_value" + } + } + }, + "other_element": { + "type": "DictTextElement", + "match_name": "prop_b", + "match_value": "(?P<other_value>.*)", + "records": { + "TestType": { + "prop_b": "$other_value" + } + } + } + } + } + } + + # register identifiable for TestType + ident = CaosDBIdentifiableAdapter() + ident.register_identifiable("TestType", db.RecordType().add_parent( + name="TestType").add_property(name="identifying_prop")) + + crawler = Crawler(debug=True, identifiableAdapter=ident) + converter_registry = crawler.load_converters(crawler_definition) + + # the dictionary to be crawled... + test_dict = { + "ident": "identifier", + "prop_b": "something_else" + } + + records = crawler.start_crawling( + Dict("TestDict", test_dict), crawler_definition, converter_registry) + + assert len(records) == 1 + rec_crawled = records[0] + assert rec_crawled.parents[0].name == "TestType" + assert rec_crawled.get_property("identifying_prop") is not None + assert rec_crawled.get_property("identifying_prop").value == "identifier" + assert rec_crawled.get_property("prop_b") is not None + assert rec_crawled.get_property("prop_b").value == "something_else" + # no interaction with the database yet, so the rrecord shouldn't have a prop_a yet + assert rec_crawled.get_property("prop_a") is None + + # synchronize with database and update the record + ins, ups = crawler.synchronize() + assert len(ins) == 0 + assert len(ups) == 1 + + # retrieve and check that name and properties have been combined correctly + rec_retrieved = db.Record(id=rec.id).retrieve() + assert rec_retrieved.name == rec.name + assert rec_retrieved.get_property( + "identifying_prop").value == rec.get_property("identifying_prop").value + assert rec_retrieved.get_property( + "prop_a").value == rec.get_property("prop_a").value + assert rec_retrieved.get_property( + "identifying_prop").value == rec_crawled.get_property("identifying_prop").value + assert rec_retrieved.get_property( + "prop_b").value == rec_crawled.get_property("prop_b").value