diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py index 946637d27ee4ed6a89dcd1af1969cb5115cae71b..55011148b0493f18b07810e403e55e2671d9ebbc 100644 --- a/src/newcrawler/crawl.py +++ b/src/newcrawler/crawl.py @@ -344,15 +344,23 @@ class Crawler(object): if isinstance(val, db.Entity): el.value[index] = val.id - def remove_unnecessary_updates(self, updateList: list[db.Record]): + @staticmethod + def remove_unnecessary_updates(updateList: list[db.Record], + identified_records: list[db.Record]): """ checks whether all relevant attributes (especially Property values) are equal + + Returns (in future) + ------- + update list without unecessary updates + """ + if len(updateList) != len(identified_records): + raise RuntimeError("The lists of updates and of identified records need to be of the " + "same length!") + # TODO this can now easily be changed to a function without side effect for i in reversed(range(len(updateList))): - record = updateList[i] - identifiable = self.identifiableAdapter.retrieve_identifiable(record) - - comp = compare_entities(record, identifiable) + comp = compare_entities(updateList[i], identified_records[i]) identical = True for j in range(2): # TODO: should be implemented elsewhere (?) @@ -364,7 +372,8 @@ class Crawler(object): break for key in comp[0]["properties"]: for attribute in ("datatype", "importance", "unit"): - + # only make an update for those attributes if there is a value difference and + # the value in the updateList is not None if attribute in comp[0]["properties"][key]: attr_val = comp[0]["properties"][key][attribute] other_attr_val = (comp[1]["properties"][key][attribute] @@ -407,7 +416,9 @@ class Crawler(object): for el in to_be_updated: self.replace_entities_by_ids(el) - self.remove_unnecessary_updates(to_be_updated) + identified_records = [self.identifiableAdapter.retrieve_identifiable(record) for record + in to_be_updated] + self.remove_unnecessary_updates(to_be_updated, identified_records) # TODO # self.execute_inserts_in_list(to_be_inserted) diff --git a/src/newcrawler/identifiable_adapters.py b/src/newcrawler/identifiable_adapters.py index cc57bb672c4872d72f4011858e0f49200fb77101..d64b46e951b12c3ba39af1f7eb35c62771eef4a0 100644 --- a/src/newcrawler/identifiable_adapters.py +++ b/src/newcrawler/identifiable_adapters.py @@ -26,6 +26,8 @@ from datetime import datetime import caosdb as db from abc import abstractmethod +from .utils import get_value, has_parent +from caosdb.common.datatype import is_reference from .utils import has_parent diff --git a/unittests/test_tool.py b/unittests/test_tool.py index 1af55808edfda547f097d5f1d4acd18bf5e56ec3..4b55de8749d3651e4e21482bd903c7da62a96d58 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -242,29 +242,41 @@ def test_crawler_update_list(crawler, ident): assert len(updl) == 0 -def test_identifiable_update(crawler, ident): - # change one value in updateList and then run the synchronization: - meas = [r for r in crawler.updateList if r.parents[0].name == "Measurement"][0] - meas.get_property("responsible").value = [] - insl, updl = crawler.synchronize() - assert len(updl) == 1 - - -def test_identifiable_update2(crawler, ident): - # change one unit in updateList and then run the synchronization: - meas = [r for r in crawler.updateList if r.parents[0].name == "Measurement"][0] - meas.get_property("description").unit = "cm" - insl, updl = crawler.synchronize() - assert len(updl) == 1 - - -def test_identifiable_update3(crawler, ident): - # change values of multiple records in updateList and then run the synchronization: - meas = [r for r in crawler.updateList if r.parents[0].name == "Measurement"] - meas[0].get_property("responsible").value = [] - meas[3].get_property("responsible").value = [] - insl, updl = crawler.synchronize() - assert len(updl) == 2 +def test_remove_unnecessary_updates(): + # test trvial case + upl = [db.Record().add_parent("A")] + irs = [db.Record().add_parent("A")] + Crawler.remove_unnecessary_updates(upl, irs) + assert len(upl) == 0 + + # test property difference case + # TODO this should work right? + #upl = [db.Record().add_parent("A").add_property("a", 3)] + # irs = [db.Record().add_parent("A")] # ID should be s + #Crawler.remove_unnecessary_updates(upl, irs) + #assert len(upl) == 1 + + # test value difference case + upl = [db.Record().add_parent("A").add_property("a", 5)] + irs = [db.Record().add_parent("A").add_property("a")] + Crawler.remove_unnecessary_updates(upl, irs) + assert len(upl) == 1 + upl = [db.Record().add_parent("A").add_property("a", 5)] + irs = [db.Record().add_parent("A").add_property("a", 5)] + Crawler.remove_unnecessary_updates(upl, irs) + assert len(upl) == 0 + + # test unit difference case + upl = [db.Record().add_parent("A").add_property("a", unit='cm')] + irs = [db.Record().add_parent("A").add_property("a")] + Crawler.remove_unnecessary_updates(upl, irs) + assert len(upl) == 1 + + # test None difference case + upl = [db.Record().add_parent("A").add_property("a")] + irs = [db.Record().add_parent("A").add_property("a", 5)] + Crawler.remove_unnecessary_updates(upl, irs) + assert len(upl) == 1 @pytest.mark.xfail