diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c1ccfd72a026fd855d713743e7ceb7c9f5a8549..d07084cc5df0e360a869cdb946a830761831d743 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed ### +- identifiables of single CFoods are now treated one after the other. This + allows them to have dependencies among each other if they are ordered + correctly - identifiables must have at least one property or a name * `caosadvancedtools.serverside.helper.init_data_model` also checks the role and data type of entities. diff --git a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-04/README.md b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-04/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7de3bd15d29b93085322250a06adb9b8f389f8e4 --- /dev/null +++ b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-04/README.md @@ -0,0 +1,5 @@ +--- +responsible: +- Tom Wood +description: Something. +... diff --git a/integrationtests/filldb.sh b/integrationtests/filldb.sh index 98d22347bd2d40e8384a2a217452fd3ba5bc445f..9f55365eb595537b43caa9b197c8bc31ea1e69cb 100755 --- a/integrationtests/filldb.sh +++ b/integrationtests/filldb.sh @@ -7,4 +7,5 @@ python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/SimulationData python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/Publications python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/Software python3 insert_model.py +python3 insert_some.py python3 crawl.py / diff --git a/integrationtests/insert_some.py b/integrationtests/insert_some.py new file mode 100644 index 0000000000000000000000000000000000000000..cf16a45ddf1f95ed261af1d9f18edfa1cbf4b450 --- /dev/null +++ b/integrationtests/insert_some.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +import caosdb as db +from caosadvancedtools.scifolder.experiment_cfood import dm + +# This inserts two identifiables. When no dependencies are possible among +# identifiables, it should not be possible to find both: the experiment +# identifiable would for example not reference the correct project Record +project = db.Record(name='2010_TestProject') +project.add_parent(name=dm.Project) +project.insert() + +pers = db.Record() +pers.add_parent("Person") +pers.add_property("lastname", "Wood") +pers.add_property("firstname", "Tom") +pers.insert() + +experiment = db.Record() +experiment.add_parent(name=dm.Experiment) +experiment.description = "Something." +experiment.add_property( + name=dm.date, value='2019-02-04') +experiment.add_property(name=dm.Project, value=project) +experiment.add_property( + name="identifier", value="empty_identifier") +experiment.add_property( + name="responsible", value=pers) +experiment.insert(flags={"force-missing-obligatory": "ignore"}) diff --git a/integrationtests/test.sh b/integrationtests/test.sh index a56b758421a059a0cc3461c08600c13ffd93705c..80a2afe307ab1da628faa39d4ba8ef93bc6959e4 100755 --- a/integrationtests/test.sh +++ b/integrationtests/test.sh @@ -3,6 +3,7 @@ OUT=/tmp/crawler.output ls cat pycaosdb.ini rm -rf cache.db +set -e echo "Clearing database" python3 clear_database.py echo "Testing crawler without cfoods" @@ -29,7 +30,6 @@ echo "run crawler" # rename the moved file mv extroot/DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx_back extroot/DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx # check whether there was something UNAUTHORIZED -set -e grep "There where unauthorized changes" $OUT # get the id of the run which is the last field of the output string RUN_ID=$(grep "run id:" $OUT | awk '{ print $NF }') diff --git a/integrationtests/test_crawler_with_cfoods.py b/integrationtests/test_crawler_with_cfoods.py index c39c3fc67d7ca30e3d013ac205ef398de216ad9c..7dc19240cbd27f0e6e7cd6cc145c7e12704df0d2 100755 --- a/integrationtests/test_crawler_with_cfoods.py +++ b/integrationtests/test_crawler_with_cfoods.py @@ -34,6 +34,14 @@ def get_entity_with_id(eid): class CrawlerTest(unittest.TestCase): def test_experiment(self): + + ######################## + # # dummy for dependency test experiment # # + ######################## + exp = db.execute_query( + "FIND Experiment with date=2019-02-04 and identifier=empty_identifier", + unique=True) + ######################## # # first experiment # # ######################## diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 747c533d7a4652434f967147d8a53d1847cfbb4e..05471e94a05689bf3a63c4925a9394e116bebe0b 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -576,44 +576,51 @@ carefully and if the changes are ok, click on the following link: # looking for matching entities in CaosDB when there is no valid id # i.e. there was none set from a cache + existing = [] + inserted = [] + for ent in identifiables: if ent.id is None or ent.id < 0: logger.debug("Looking for: {}".format( ent.id if ent.id is not None else ent.name)) - existing = Crawler.find_existing(ent) + found = Crawler.find_existing(ent) - if existing is not None: - ent.id = existing.id + if found is not None: + ent.id = found.id else: logger.debug("Id is known of: {}".format(ent)) - # insert missing, i.e. those which are not valid - missing_identifiables = db.Container() - missing_identifiables.extend([ent for ent in identifiables - if ent.id is None or ent.id < 0]) - # TODO the following should not be necessary. Fix it - - for ent in missing_identifiables: - ent.id = None + # insert missing, i.e. those which are not valid + if ent.id is None or ent.id < 0: + missing = ent + ent.id = None + else: + missing = None + existing.append(ent) - if len(missing_identifiables) > 0: - info = "Going to insert the following entities:\n" + if missing: + try: + guard.safe_insert(missing, unique=False, + flags={"force-missing-obligatory": "ignore"}) + inserted.append(ent) + except Exception as e: + DataModelProblems.evaluate_exception(e) + if len(existing) > 0: + info = "Identified the following existing entities:\n" - for ent in missing_identifiables: + for ent in existing: info += str(ent)+"\n" logger.debug(info) + else: + logger.debug("Did not identify any existing entities") + if len(inserted) > 0: + info = "Inserted the following entities:\n" - if len(missing_identifiables) == 0: - logger.debug("No new entities to be inserted.") + for ent in inserted: + info += str(ent)+"\n" + logger.debug(info) else: - try: - logger.info( - "Inserting {} Records...".format( - len(missing_identifiables))) - guard.safe_insert(missing_identifiables, unique=False, - flags={"force-missing-obligatory": "ignore"}) - except Exception as e: - DataModelProblems.evaluate_exception(e) + logger.debug("Did not insert any new entities") logger.debug("Retrieving entities from CaosDB...") identifiables.retrieve(unique=True, raise_exception_on_error=False) diff --git a/src/caosadvancedtools/scifolder/experiment_cfood.py b/src/caosadvancedtools/scifolder/experiment_cfood.py index 0eccd18d9481b0bbb91c75d63c849e69e0c6572b..38606b5f8ffd372d7bf6f507ed96738d9345f16c 100644 --- a/src/caosadvancedtools/scifolder/experiment_cfood.py +++ b/src/caosadvancedtools/scifolder/experiment_cfood.py @@ -78,7 +78,7 @@ class ExperimentCFood(AbstractFileCFood, WithREADME): self.experiment, self.project = ( ExperimentCFood.create_identifiable_experiment(self.match)) - self.identifiables.extend([self.experiment, self.project]) + self.identifiables.extend([self.project, self.experiment]) self.people = parse_responsibles(self.header) self.identifiables.extend(self.people)