From 3db75326e10021b182a40e0ed3d031ab6e30a431 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Mon, 13 Jan 2025 14:29:21 +0100 Subject: [PATCH] ENH: allow inheritance of identifiables --- src/caoscrawler/identifiable_adapters.py | 56 +++++++++++++++++++----- src/doc/concepts.rst | 17 ++++++- unittests/test_identifiable_adapters.py | 56 +++++++++++++++++++++++- 3 files changed, 116 insertions(+), 13 deletions(-) diff --git a/src/caoscrawler/identifiable_adapters.py b/src/caoscrawler/identifiable_adapters.py index 592f603b..2d7b0158 100644 --- a/src/caoscrawler/identifiable_adapters.py +++ b/src/caoscrawler/identifiable_adapters.py @@ -45,6 +45,12 @@ from .utils import has_parent logger = logging.getLogger(__name__) +def _retrieve_RecordType(id=None, name =None): + """ + Retrieve the RecordType from LinkAhead. For mocking purposes. + """ + return db.RecordType(name=name, id=id).retrieve() + def get_children_of_rt(rtname): """Supply the name of a recordtype. This name and the name of all children RTs are returned in a list""" @@ -586,8 +592,7 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter): self.load_from_yaml_object(identifiable_data) def load_from_yaml_object(self, identifiable_data): - """Load identifiables defined in a yaml object. - """ + """Load identifiables defined in a yaml object. """ for rt_name, id_list in identifiable_data.items(): rt = db.RecordType().add_parent(rt_name) @@ -611,7 +616,7 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter): self.register_identifiable(rt_name, rt) def register_identifiable(self, name: str, definition: db.RecordType): - self._registered_identifiables[name] = definition + self._registered_identifiables[name.lower()] = definition def get_file(self, identifiable: Identifiable): warnings.warn( @@ -630,20 +635,51 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter): return None return candidates[0] - def get_registered_identifiable(self, record: db.Entity): + def get_registered_identifiable(self, entity: db.Entity): """ returns the registered identifiable for the given Record It is assumed, that there is exactly one identifiable for each RecordType. Only the first parent of the given Record is considered; others are ignored """ - if len(record.parents) == 0: + if len(entity.parents) == 0: + return None + registerd = [] + for parent in entity.parents: + prt = _retrieve_RecordType(id=parent.id, name=parent.name) + reg = self._get_registered_for_rt(prt) + if reg is not None: + registerd.append(reg) + if len(registerd) > 1: + raise RuntimeError("Multiple registered identifiables found.") + elif len(registerd) == 1: + return registerd[0] + else: return None - # TODO We need to treat the case where multiple parents exist properly. - rt_name = record.parents[0].name - for name, definition in self._registered_identifiables.items(): - if definition.parents[0].name.lower() == rt_name.lower(): - return definition + + + def _get_registered_for_rt(self, rt): + """ + returns the registered identifiable for the given RecordType or the + registered identifiable of the first parent + """ + if rt.name.lower() in self._registered_identifiables: + return self._registered_identifiables[rt.name.lower()] + if len(rt.parents) == 0: + return None + registerd = [] + for parent in rt.parents: + prt = _retrieve_RecordType(id=parent.id, name=parent.name) + registerd.append(self._get_registered_for_rt(prt)) + if len(registerd) > 1: + raise RuntimeError("Multiple registered identifiables found.") + elif len(registerd) == 1: + return registerd[0] + else: + return None + + + def retrieve_identified_record_for_identifiable(self, identifiable: Identifiable): query_string = self.create_query_for_identifiable(identifiable) diff --git a/src/doc/concepts.rst b/src/doc/concepts.rst index b3aa02a1..f6a7c547 100644 --- a/src/doc/concepts.rst +++ b/src/doc/concepts.rst @@ -79,7 +79,7 @@ A Registered Identifiable is the blue print for Identifiables. You can think of registered identifiables as identifiables without concrete values for properties. RegisteredIdentifiables are associated with RecordTypes and define of what information an identifiable for that RecordType -exists. There can be multiple Registered Identifiables for one RecordType. +exists. There cannot be multiple Registered Identifiables for one RecordType. If identifiables shall contain references to the object to be identified, the Registered Identifiable must list the RecordTypes of the Entities that have those references. @@ -94,6 +94,21 @@ reference the object to be identified. You can also use the wildcard "*" as RecordType name in the configuration which will only require, that ANY Record references the Record at hand. +If a Record has multiple parents, only one of them must have an registered identifiable. + +Reasoning: +If there are mutliple registered identifiables that could be used to identify a given record, then only a single +one of them is used, it might be that the existence check returns a different result than if another one would +be used. This would allow for unpredictable and inconsistent behavior(Example: one registered identifiable +contains the name another one property date. Using the name might imply that the record does not exist and using +the date might imply that it does. Thus, for any Record the registered identifiable must be unique). +Anlogous Example: If you tinnk in the context, of relational databases, there can always only be a foreign key +associated with one table. + +When no registered identifiable exist for the direct parents, registered identifiables may be used +from their parents. If multiple recordtypes exist in the inheritance chain with a registered identifiable, then +the one that is closest to the direct parent is used. In case of multiple inheritance, only one branch must have registered identifiables. + Identified Records ++++++++++++++++++ diff --git a/unittests/test_identifiable_adapters.py b/unittests/test_identifiable_adapters.py index bdc0ab85..57a1c856 100644 --- a/unittests/test_identifiable_adapters.py +++ b/unittests/test_identifiable_adapters.py @@ -44,6 +44,19 @@ from caoscrawler.sync_graph import SyncNode UNITTESTDIR = Path(__file__).parent + +def mock_retrieve_RecordType(id, name): + return { + "Person": db.RecordType(name="Person"), + "Keyword": db.RecordType(name="Keyword"), + "Project": db.RecordType(name="Project"), + "A": db.RecordType(name="A"), + "Experiment": db.RecordType(name="Experiment"), + "Lab": db.RecordType(name="Lab"), + "Analysis": db.RecordType(name="Analysis"), + "Measurement": db.RecordType(name="Measurement").add_parent("Experiment") + }[name] + def test_create_query_for_identifiable(): query = IdentifiableAdapter.create_query_for_identifiable( Identifiable(record_type="Person", properties={"first_name": "A", "last_name": "B"})) @@ -99,7 +112,8 @@ def test_create_query_for_identifiable(): Identifiable(record_type="record type", name="it's weird")) assert query == ("FIND RECORD 'record type' WITH name='it\\'s weird'") - +@patch("caoscrawler.identifiable_adapters._retrieve_RecordType", + new=Mock(side_effect=mock_retrieve_RecordType)) def test_load_from_yaml_file(): ident = CaosDBIdentifiableAdapter() ident.load_from_yaml_definition( @@ -174,7 +188,8 @@ def test_convert_value(): assert convert_value(A()) == " a " - +@patch("caoscrawler.identifiable_adapters._retrieve_RecordType", + new=Mock(side_effect=mock_retrieve_RecordType)) def test_get_identifiable(): ident = CaosDBIdentifiableAdapter() ident.load_from_yaml_definition(UNITTESTDIR / "example_identifiables.yml") @@ -283,3 +298,40 @@ def test_referencing_entity_has_appropriate_type(): assert rft(dummy.parents, registered_identifiable) registered_identifiable.properties[0].value = ["B", "*"] assert rft(dummy.parents, registered_identifiable) + +@patch("caoscrawler.identifiable_adapters._retrieve_RecordType", + new=Mock(side_effect=mock_retrieve_RecordType)) +def test_get_registered_identifiable(): + # Test the case that the record has a parent for which an identifiable is registered + ident = CaosDBIdentifiableAdapter() + ident.load_from_yaml_definition(UNITTESTDIR / "example_identifiables.yml") + rec = db.Record().add_parent(name="Experiment") + registered = ident.get_registered_identifiable(rec) + assert registered is not None + assert registered.parents[0].name == "Experiment" + + # Test the same but with an additional parent + rec = db.Record().add_parent(name="Lab").add_parent(name="Experiment") + registered = ident.get_registered_identifiable(rec) + assert registered is not None + assert registered.parents[0].name == "Experiment" + + + # Test the same but with an additional parent that also has a registered identifiable + rec = db.Record().add_parent(name="Analysis").add_parent(name="Experiment") + with pytest.raises(RuntimeError): + registered = ident.get_registered_identifiable(rec) + + # Test the same but with an additional parent that also has a registered identifiable + rec = db.Record().add_parent(name="Measurement").add_parent(name="Experiment") + with pytest.raises(RuntimeError): + registered = ident.get_registered_identifiable(rec) + + # Test the case that the record has a parent for which no identifiable is registered + # and there is a registered identifiable for a grand parent + ident = CaosDBIdentifiableAdapter() + ident.load_from_yaml_definition(UNITTESTDIR / "example_identifiables.yml") + rec = db.Record().add_parent(name="Measurement") + registered = ident.get_registered_identifiable(rec) + assert registered is not None + assert registered.parents[0].name == "Experiment" \ No newline at end of file -- GitLab