From 18c02891a64b5208097b01f6eff00e171fc718bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Tue, 6 Jul 2021 14:25:57 +0000 Subject: [PATCH] Treat dependencies among identifiables --- CHANGELOG.md | 3 + .../2010_TestProject/2019-02-04/README.md | 5 ++ integrationtests/filldb.sh | 1 + integrationtests/insert_some.py | 28 ++++++++++ integrationtests/test.sh | 2 +- integrationtests/test_crawler_with_cfoods.py | 8 +++ src/caosadvancedtools/crawler.py | 55 +++++++++++-------- .../scifolder/experiment_cfood.py | 2 +- 8 files changed, 78 insertions(+), 26 deletions(-) create mode 100644 integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-04/README.md create mode 100644 integrationtests/insert_some.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c1ccfd7..d07084cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed ### +- identifiables of single CFoods are now treated one after the other. This + allows them to have dependencies among each other if they are ordered + correctly - identifiables must have at least one property or a name * `caosadvancedtools.serverside.helper.init_data_model` also checks the role and data type of entities. diff --git a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-04/README.md b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-04/README.md new file mode 100644 index 00000000..7de3bd15 --- /dev/null +++ b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-04/README.md @@ -0,0 +1,5 @@ +--- +responsible: +- Tom Wood +description: Something. +... diff --git a/integrationtests/filldb.sh b/integrationtests/filldb.sh index 98d22347..9f55365e 100755 --- a/integrationtests/filldb.sh +++ b/integrationtests/filldb.sh @@ -7,4 +7,5 @@ python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/SimulationData python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/Publications python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/Software python3 insert_model.py +python3 insert_some.py python3 crawl.py / diff --git a/integrationtests/insert_some.py b/integrationtests/insert_some.py new file mode 100644 index 00000000..cf16a45d --- /dev/null +++ b/integrationtests/insert_some.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +import caosdb as db +from caosadvancedtools.scifolder.experiment_cfood import dm + +# This inserts two identifiables. When no dependencies are possible among +# identifiables, it should not be possible to find both: the experiment +# identifiable would for example not reference the correct project Record +project = db.Record(name='2010_TestProject') +project.add_parent(name=dm.Project) +project.insert() + +pers = db.Record() +pers.add_parent("Person") +pers.add_property("lastname", "Wood") +pers.add_property("firstname", "Tom") +pers.insert() + +experiment = db.Record() +experiment.add_parent(name=dm.Experiment) +experiment.description = "Something." +experiment.add_property( + name=dm.date, value='2019-02-04') +experiment.add_property(name=dm.Project, value=project) +experiment.add_property( + name="identifier", value="empty_identifier") +experiment.add_property( + name="responsible", value=pers) +experiment.insert(flags={"force-missing-obligatory": "ignore"}) diff --git a/integrationtests/test.sh b/integrationtests/test.sh index a56b7584..80a2afe3 100755 --- a/integrationtests/test.sh +++ b/integrationtests/test.sh @@ -3,6 +3,7 @@ OUT=/tmp/crawler.output ls cat pycaosdb.ini rm -rf cache.db +set -e echo "Clearing database" python3 clear_database.py echo "Testing crawler without cfoods" @@ -29,7 +30,6 @@ echo "run crawler" # rename the moved file mv extroot/DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx_back extroot/DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx # check whether there was something UNAUTHORIZED -set -e grep "There where unauthorized changes" $OUT # get the id of the run which is the last field of the output string RUN_ID=$(grep "run id:" $OUT | awk '{ print $NF }') diff --git a/integrationtests/test_crawler_with_cfoods.py b/integrationtests/test_crawler_with_cfoods.py index c39c3fc6..7dc19240 100755 --- a/integrationtests/test_crawler_with_cfoods.py +++ b/integrationtests/test_crawler_with_cfoods.py @@ -34,6 +34,14 @@ def get_entity_with_id(eid): class CrawlerTest(unittest.TestCase): def test_experiment(self): + + ######################## + # # dummy for dependency test experiment # # + ######################## + exp = db.execute_query( + "FIND Experiment with date=2019-02-04 and identifier=empty_identifier", + unique=True) + ######################## # # first experiment # # ######################## diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 747c533d..05471e94 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -576,44 +576,51 @@ carefully and if the changes are ok, click on the following link: # looking for matching entities in CaosDB when there is no valid id # i.e. there was none set from a cache + existing = [] + inserted = [] + for ent in identifiables: if ent.id is None or ent.id < 0: logger.debug("Looking for: {}".format( ent.id if ent.id is not None else ent.name)) - existing = Crawler.find_existing(ent) + found = Crawler.find_existing(ent) - if existing is not None: - ent.id = existing.id + if found is not None: + ent.id = found.id else: logger.debug("Id is known of: {}".format(ent)) - # insert missing, i.e. those which are not valid - missing_identifiables = db.Container() - missing_identifiables.extend([ent for ent in identifiables - if ent.id is None or ent.id < 0]) - # TODO the following should not be necessary. Fix it - - for ent in missing_identifiables: - ent.id = None + # insert missing, i.e. those which are not valid + if ent.id is None or ent.id < 0: + missing = ent + ent.id = None + else: + missing = None + existing.append(ent) - if len(missing_identifiables) > 0: - info = "Going to insert the following entities:\n" + if missing: + try: + guard.safe_insert(missing, unique=False, + flags={"force-missing-obligatory": "ignore"}) + inserted.append(ent) + except Exception as e: + DataModelProblems.evaluate_exception(e) + if len(existing) > 0: + info = "Identified the following existing entities:\n" - for ent in missing_identifiables: + for ent in existing: info += str(ent)+"\n" logger.debug(info) + else: + logger.debug("Did not identify any existing entities") + if len(inserted) > 0: + info = "Inserted the following entities:\n" - if len(missing_identifiables) == 0: - logger.debug("No new entities to be inserted.") + for ent in inserted: + info += str(ent)+"\n" + logger.debug(info) else: - try: - logger.info( - "Inserting {} Records...".format( - len(missing_identifiables))) - guard.safe_insert(missing_identifiables, unique=False, - flags={"force-missing-obligatory": "ignore"}) - except Exception as e: - DataModelProblems.evaluate_exception(e) + logger.debug("Did not insert any new entities") logger.debug("Retrieving entities from CaosDB...") identifiables.retrieve(unique=True, raise_exception_on_error=False) diff --git a/src/caosadvancedtools/scifolder/experiment_cfood.py b/src/caosadvancedtools/scifolder/experiment_cfood.py index 0eccd18d..38606b5f 100644 --- a/src/caosadvancedtools/scifolder/experiment_cfood.py +++ b/src/caosadvancedtools/scifolder/experiment_cfood.py @@ -78,7 +78,7 @@ class ExperimentCFood(AbstractFileCFood, WithREADME): self.experiment, self.project = ( ExperimentCFood.create_identifiable_experiment(self.match)) - self.identifiables.extend([self.experiment, self.project]) + self.identifiables.extend([self.project, self.experiment]) self.people = parse_responsibles(self.header) self.identifiables.extend(self.people) -- GitLab