From d0bc3645440834a02d7d2b4046f1c3f31a338ec1 Mon Sep 17 00:00:00 2001 From: Alexander Schlemmer <alexander@mail-schlemmer.de> Date: Thu, 16 Dec 2021 11:52:33 +0100 Subject: [PATCH] FIX: properties of identifiables are resolved now correctly --- src/newcrawler/crawl.py | 25 ++++++++++++-- src/newcrawler/identifiable_adapters.py | 44 ++++++++++++++++++------- unittests/test_tool.py | 6 +++- 3 files changed, 59 insertions(+), 16 deletions(-) diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py index 6b31f31a..c9e85187 100644 --- a/src/newcrawler/crawl.py +++ b/src/newcrawler/crawl.py @@ -106,7 +106,6 @@ class Crawler(object): self.generalStore = generalStore if generalStore is None: self.generalStore = GeneralStore() - self.identifiableAdapter = identifiableAdapter if identifiableAdapter is None: @@ -175,7 +174,6 @@ class Crawler(object): self.debug_converters = self.global_converters + local_converters return self.updateList - def synchronize(self, updateList: list[db.Record]): """ @@ -207,6 +205,12 @@ class Crawler(object): # Walk backwards through list, so that deletion is possible: for i in reversed(range(len(updateList))): record = updateList[i] + + if record.parents[0].name == "Measurement": + breakpoint() + + # resolve references first: + self.identifiableAdapter.resolve_references(record) identifiable = self.identifiableAdapter.retrieve_identifiable(record) # if there is no identifiable, move record from update list to insert list: @@ -227,17 +231,32 @@ class Crawler(object): identical = True for j in range(2): # TODO: should be implemented elsewhere - for label in ("properties", "parents"): + for label in ("parents", ): if len(comp[j][label]) > 0: identical = False break if not identical: break + for key in comp[0]["properties"]: + for attribute in ("datatype", "importance"): + if ("datatype" in comp[0]["properties"][key] and + comp[0]["properties"][key]["datatype"] is not None and + comp[1]["properties"][key]["datatype"] != + comp[1]["properties"][key]["datatype"]): + identical = False + break + + if "value" in comp[0]["properties"][key]: + identical = False + + if not identical: + break if identical: del updateList[i] continue else: + breakpoint() pass return (insertList, updateList) diff --git a/src/newcrawler/identifiable_adapters.py b/src/newcrawler/identifiable_adapters.py index f306a27c..5f4ad70a 100644 --- a/src/newcrawler/identifiable_adapters.py +++ b/src/newcrawler/identifiable_adapters.py @@ -95,6 +95,9 @@ class IdentifiableAdapter(object): """ pass + @abstractmethod + def resolve_references(self, record: db.Record): + pass def get_identifiable(self, record: db.Record): registered_identifiable = self.get_registered_identifiable(record) @@ -241,20 +244,10 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter): raise NotImplementedError("Non-identifiable references cannot" " be used as properties in identifiables.") - value_identifiable = self.retrieve_identified_record(prop.value) - if value_identifiable is None: - raise RuntimeError("The identifiable which is used as property" - " here has to be inserted first.") - - if value_identifiable.id is None: - raise RuntimeError("The entity has not been assigned an ID.") - - prop.value = value_identifiable.id + raise RuntimeError("The identifiable which is used as property" + " here has to be inserted first.") - if prop.value != prop_record.value: - if prop.name == "project": - breakpoint() return False return True @@ -268,3 +261,30 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter): if len(candidates) == 0: return None return candidates[0] + + def _resolve_reference(self, value: db.Entity): + registered = self.get_registered_identifiable(value) + + if registered is None: + raise NotImplementedError("Non-identifiable references cannot" + " be used as properties in identifiables.") + value_identifiable = self.retrieve_identified_record(value) + if value_identifiable is None: + raise RuntimeError("The identifiable which is used as property" + " here has to be inserted first.") + + if value_identifiable.id is None: + raise RuntimeError("The entity has not been assigned an ID.") + + return value_identifiable.id + + def resolve_references(self, record: db.Record): + for prop in record.properties: + if isinstance(prop.value, db.Entity): + prop.value = self._resolve_reference(prop.value) + + if isinstance(prop.value, list): + for element_index in range(len(prop.value)): + element = prop.value[element_index] + if isinstance(element, db.Entity): + prop.value[element_index] = self._resolve_reference(element) diff --git a/unittests/test_tool.py b/unittests/test_tool.py index 38280773..e5664461 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -19,6 +19,7 @@ from pytest import raises # - DONE: provenance in structure elements and converters for properties of records # - TODO: list whether information from structure elements and converters was used + def rfp(*pathcomponents): """ Return full path. @@ -26,12 +27,14 @@ def rfp(*pathcomponents): """ return join(dirname(__file__), *pathcomponents) + def dircheckstr(*pathcomponents): """ Return the debug tree identifier for a given path. """ return "newcrawler.structure_elements.Directory: " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "examples_article", *pathcomponents) + def test_crawler(): crawler = Crawler(debug=True) crawler.crawl_directory(rfp("test_directories", "examples_article"), @@ -291,16 +294,17 @@ def test_crawler_update_list(): assert len(comp[1]["properties"]) == 0 insl, updl = crawler.synchronize(crawler.updateList) + breakpoint() assert len(insl) == 0 assert len(updl) == 0 + def test_identifiable_adapter(): query = IdentifiableAdapter.create_query_for_identifiable( db.Record().add_parent("Person") .add_property("first_name", value="A") .add_property("last_name", value="B")) assert query.lower() == "find record person with 'first_name'='a' and 'last_name'='b' " - def test_provenance_debug_data(): -- GitLab