diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py index a0670eb149f82d348a515df4cc143f91e24e3e00..2fca174b3a6b7f7d73e4b3c81470e785f5f17daa 100644 --- a/src/newcrawler/crawl.py +++ b/src/newcrawler/crawl.py @@ -344,15 +344,23 @@ class Crawler(object): if isinstance(val, db.Entity): el.value[index] = val.id - def remove_unnecessary_updates(self, updateList: list[db.Record]): + @staticmethod + def remove_unnecessary_updates(updateList: list[db.Record], + identified_records: list[db.Record]): """ checks whether all relevant attributes (especially Property values) are equal + + Returns (in future) + ------- + update list without unecessary updates + """ + if len(updateList) != len(identified_records): + raise RuntimeError("The lists of updates and of identified records need to be of the " + "same length!") + # TODO this can now easily be changed to a function without side effect for i in reversed(range(len(updateList))): - record = updateList[i] - identifiable = self.identifiableAdapter.retrieve_identifiable(record) - - comp = compare_entities(record, identifiable) + comp = compare_entities(updateList[i], identified_records[i]) identical = True for j in range(2): # TODO: should be implemented elsewhere (?) @@ -364,6 +372,8 @@ class Crawler(object): break for key in comp[0]["properties"]: for attribute in ("datatype", "importance", "unit"): + # only make an update for those attributes if there is a value difference and + # the value in the updateList is not None if (attribute in comp[0]["properties"][key] and comp[0]["properties"][key][attribute] is not None and comp[0]["properties"][key][attribute] != @@ -405,7 +415,9 @@ class Crawler(object): for el in to_be_updated: self.replace_entities_by_ids(el) - self.remove_unnecessary_updates(to_be_updated) + identified_records = [self.identifiableAdapter.retrieve_identifiable(record) for record + in to_be_updated] + self.remove_unnecessary_updates(to_be_updated, identified_records) # TODO # self.execute_inserts_in_list(to_be_inserted) diff --git a/src/newcrawler/identifiable_adapters.py b/src/newcrawler/identifiable_adapters.py index 01eb55bbb3ec4b54a49633ef839b2ed99ab5b398..b4d43044c1e8f45c36142a87e9d95722b8eefec8 100644 --- a/src/newcrawler/identifiable_adapters.py +++ b/src/newcrawler/identifiable_adapters.py @@ -26,6 +26,7 @@ import caosdb as db from abc import abstractmethod from .utils import get_value, has_parent +from caosdb.common.datatype import is_reference class IdentifiableAdapter(object): diff --git a/unittests/test_tool.py b/unittests/test_tool.py index 23912ff133fdb7dceb1805907d24a57578fc63ee..5336af167da99665d1704a262d4903f3fa0b7fc0 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -295,29 +295,41 @@ def test_crawler_update_list(crawler, ident): assert len(updl) == 0 -def test_identifiable_update(crawler, ident): - # change one value in updateList and then run the synchronization: - meas = [r for r in crawler.updateList if r.parents[0].name == "Measurement"][0] - meas.get_property("responsible").value = [] - insl, updl = crawler.synchronize() - assert len(updl) == 1 - - -def test_identifiable_update2(crawler, ident): - # change one unit in updateList and then run the synchronization: - meas = [r for r in crawler.updateList if r.parents[0].name == "Measurement"][0] - meas.get_property("description").unit = "cm" - insl, updl = crawler.synchronize() - assert len(updl) == 1 - - -def test_identifiable_update3(crawler, ident): - # change values of multiple records in updateList and then run the synchronization: - meas = [r for r in crawler.updateList if r.parents[0].name == "Measurement"] - meas[0].get_property("responsible").value = [] - meas[3].get_property("responsible").value = [] - insl, updl = crawler.synchronize() - assert len(updl) == 2 +def test_remove_unnecessary_updates(): + # test trvial case + upl = [db.Record().add_parent("A")] + irs = [db.Record().add_parent("A")] + Crawler.remove_unnecessary_updates(upl, irs) + assert len(upl) == 0 + + # test property difference case + # TODO this should work right? + #upl = [db.Record().add_parent("A").add_property("a", 3)] + # irs = [db.Record().add_parent("A")] # ID should be s + #Crawler.remove_unnecessary_updates(upl, irs) + #assert len(upl) == 1 + + # test value difference case + upl = [db.Record().add_parent("A").add_property("a", 5)] + irs = [db.Record().add_parent("A").add_property("a")] + Crawler.remove_unnecessary_updates(upl, irs) + assert len(upl) == 1 + upl = [db.Record().add_parent("A").add_property("a", 5)] + irs = [db.Record().add_parent("A").add_property("a", 5)] + Crawler.remove_unnecessary_updates(upl, irs) + assert len(upl) == 0 + + # test unit difference case + upl = [db.Record().add_parent("A").add_property("a", unit='cm')] + irs = [db.Record().add_parent("A").add_property("a")] + Crawler.remove_unnecessary_updates(upl, irs) + assert len(upl) == 1 + + # test None difference case + upl = [db.Record().add_parent("A").add_property("a")] + irs = [db.Record().add_parent("A").add_property("a", 5)] + Crawler.remove_unnecessary_updates(upl, irs) + assert len(upl) == 1 def test_identifiable_adapter():