From 334b60297041e866350702c78960720e489c87c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Tue, 5 Jan 2021 16:14:23 +0000 Subject: [PATCH] ENH: Include Scifolder --- .docker/Dockerfile | 9 - .gitlab-ci.yml | 1 + CHANGELOG.md | 1 + README_SETUP.md | 5 - integrationtests/crawl.py | 5 +- integrationtests/insert_model.py | 2 +- .../test_crawl_with_datamodel_problems.py | 6 +- src/caosadvancedtools/import_from_xml.py | 2 +- src/caosadvancedtools/scifolder/__init__.py | 5 + .../scifolder/analysis_cfood.py | 129 +++++++++ .../scifolder/experiment_cfood.py | 105 +++++++ .../scifolder/generic_pattern.py | 35 +++ .../scifolder/publication_cfood.py | 112 ++++++++ .../scifolder/simulation_cfood.py | 109 +++++++ .../scifolder/software_cfood.py | 115 ++++++++ src/caosadvancedtools/scifolder/utils.py | 204 +++++++++++++ src/caosadvancedtools/scifolder/withreadme.py | 270 ++++++++++++++++++ src/doc/crawler.rst | 5 +- .../2019-02-03_something/README.md | 15 + .../2019-02-03_something/README.md | 9 + .../Posters/2019-02-03_something/README.md | 11 + unittests/data/README.md | 14 + unittests/data/README.xlsx | Bin 0 -> 5338 bytes .../2019-02-03_something/README.md | 12 + unittests/test_cfoods.py | 54 ++++ unittests/test_scifolder_utils.py | 67 +++++ 26 files changed, 1278 insertions(+), 24 deletions(-) create mode 100644 src/caosadvancedtools/scifolder/__init__.py create mode 100644 src/caosadvancedtools/scifolder/analysis_cfood.py create mode 100644 src/caosadvancedtools/scifolder/experiment_cfood.py create mode 100644 src/caosadvancedtools/scifolder/generic_pattern.py create mode 100644 src/caosadvancedtools/scifolder/publication_cfood.py create mode 100644 src/caosadvancedtools/scifolder/simulation_cfood.py create mode 100644 src/caosadvancedtools/scifolder/software_cfood.py create mode 100644 src/caosadvancedtools/scifolder/utils.py create mode 100644 src/caosadvancedtools/scifolder/withreadme.py create mode 100644 unittests/data/DataAnalysis/2010_TestProject/2019-02-03_something/README.md create mode 100644 unittests/data/ExperimentalData/2010_TestProject/2019-02-03_something/README.md create mode 100644 unittests/data/Publications/Posters/2019-02-03_something/README.md create mode 100644 unittests/data/README.md create mode 100644 unittests/data/README.xlsx create mode 100644 unittests/data/SimulationData/2010_TestProject/2019-02-03_something/README.md create mode 100644 unittests/test_cfoods.py create mode 100644 unittests/test_scifolder_utils.py diff --git a/.docker/Dockerfile b/.docker/Dockerfile index ca59395a..d5d2fe66 100644 --- a/.docker/Dockerfile +++ b/.docker/Dockerfile @@ -20,15 +20,6 @@ ADD https://gitlab.com/api/v4/projects/13656973/repository/branches/dev \ pylib_version.json RUN git clone https://gitlab.com/caosdb/caosdb-pylib.git && \ cd caosdb-pylib && git checkout dev && pip3 install . -ADD https://gitlab.com/api/v4/projects/13656965/repository/branches/master \ - model_version.json -RUN git clone https://gitlab.com/caosdb/caosdb-models.git && \ - cd caosdb-models && pip3 install . -ADD https://gitlab.com/api/v4/projects/13601752/repository/branches/master \ - scifolder_version.json -RUN git clone \ - https://gitlab.com/henrik_indiscale/scifolder.git && \ - cd scifolder && pip3 install . COPY . /git RUN rm -r /git/.git \ && mv /git/.docker/pycaosdb.ini /git/integrationtests diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9f746e47..9b573a53 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -78,6 +78,7 @@ build-testenv: stage: setup only: - schedules + - web script: - df -h - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY diff --git a/CHANGELOG.md b/CHANGELOG.md index cfebbbcf..106d703c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### +- included the scifolder module - included the caosmodels module * `send_mail` function in `caosadvancedtools.serverside.helper` module - New class to collect possible problems with the data model diff --git a/README_SETUP.md b/README_SETUP.md index b9db16a9..243fba2d 100644 --- a/README_SETUP.md +++ b/README_SETUP.md @@ -15,17 +15,12 @@ Dependencies will be installed automatically if you use the below described proc For testing: - `tox` -- `scifolder`from https://gitlab.com/henrik_indiscale/scifolder ## Installation - `pip install . --user` - `pip install tox --user` -In order to run the tests you need to install the [scifolder -package](https://gitlab.com/henrik_indiscale/scifolder) by Henrik tom -Wörden. - ## Run Unit Tests `tox` diff --git a/integrationtests/crawl.py b/integrationtests/crawl.py index e4bf311e..bf72b5f7 100755 --- a/integrationtests/crawl.py +++ b/integrationtests/crawl.py @@ -32,8 +32,9 @@ import caosdb as db from caosadvancedtools.cfood import fileguide from caosadvancedtools.crawler import FileCrawler from caosadvancedtools.guard import INSERT, UPDATE -from scifolder import (AnalysisCFood, ExperimentCFood, PublicationCFood, - SimulationCFood, SoftwareCFood) +from caosadvancedtools.scifolder import (AnalysisCFood, ExperimentCFood, + PublicationCFood, SimulationCFood, + SoftwareCFood) try: from sss_helper import get_argument_parser, print_success diff --git a/integrationtests/insert_model.py b/integrationtests/insert_model.py index 2289f72e..270a08a3 100755 --- a/integrationtests/insert_model.py +++ b/integrationtests/insert_model.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 import caosdb as db -from caosmodels.parser import parse_model_from_yaml +from caosadvancedtools.models.parser import parse_model_from_yaml model = parse_model_from_yaml("model.yml") model.sync_data_model(noquestion=True) diff --git a/integrationtests/test_crawl_with_datamodel_problems.py b/integrationtests/test_crawl_with_datamodel_problems.py index 3089bf4c..6c212e36 100644 --- a/integrationtests/test_crawl_with_datamodel_problems.py +++ b/integrationtests/test_crawl_with_datamodel_problems.py @@ -30,9 +30,9 @@ from caosadvancedtools.cfood import fileguide from caosadvancedtools.crawler import FileCrawler from caosadvancedtools.datamodel_problems import DataModelProblems from caosadvancedtools.guard import INSERT -from caosmodels.parser import parse_model_from_yaml -from scifolder import (AnalysisCFood, ExperimentCFood, PublicationCFood, - SimulationCFood) +from caosadvancedtools.models.parser import parse_model_from_yaml +from caosadvancedtools.scifolder import (AnalysisCFood, ExperimentCFood, + PublicationCFood, SimulationCFood) def setup_module(): diff --git a/src/caosadvancedtools/import_from_xml.py b/src/caosadvancedtools/import_from_xml.py index 9942a9a9..0bf9b1c0 100755 --- a/src/caosadvancedtools/import_from_xml.py +++ b/src/caosadvancedtools/import_from_xml.py @@ -33,7 +33,7 @@ from tempfile import NamedTemporaryFile import caosdb as db from caosdb.apiutils import apply_to_ids -from caosmodels.data_model import DataModel +from caosadvancedtools.models.data_model import DataModel def create_dummy_file(text="Please ask the administrator for this file."): diff --git a/src/caosadvancedtools/scifolder/__init__.py b/src/caosadvancedtools/scifolder/__init__.py new file mode 100644 index 00000000..d7d67937 --- /dev/null +++ b/src/caosadvancedtools/scifolder/__init__.py @@ -0,0 +1,5 @@ +from .analysis_cfood import AnalysisCFood +from .experiment_cfood import ExperimentCFood +from .publication_cfood import PublicationCFood +from .simulation_cfood import SimulationCFood +from .software_cfood import SoftwareCFood diff --git a/src/caosadvancedtools/scifolder/analysis_cfood.py b/src/caosadvancedtools/scifolder/analysis_cfood.py new file mode 100644 index 00000000..27cb871a --- /dev/null +++ b/src/caosadvancedtools/scifolder/analysis_cfood.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2019 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import os +from itertools import chain + +import caosdb as db +from caosadvancedtools.cfood import (AbstractFileCFood, assure_has_parent, + assure_has_property, + assure_object_is_in_list, get_entity) +from caosadvancedtools.read_md_header import get_header + +from .generic_pattern import full_pattern +from .utils import (get_files_referenced_by_field, parse_responsibles, + reference_records_corresponding_to_files) +from .withreadme import DATAMODEL as dm +from .withreadme import (RESULTS, REVISIONOF, SCRIPTS, SOURCES, WithREADME, + get_glob) + + +class AnalysisCFood(AbstractFileCFood, WithREADME): + _prefix = ".*/DataAnalysis/" + + # win_paths can be used to define fields that will contain windows style + # path instead of the default unix ones. Possible fields are: + # ["results", "sources", "scripts","revisionOf"] + win_paths = [] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + WithREADME.__init__(self) + + def collect_information(self): + self.find_referenced_files([RESULTS, SOURCES, SCRIPTS]) + + @staticmethod + def name_beautifier(name): + """ a function that can be used to rename the project. I.e. if + the project in CaosDB shall be named differently than in the folder + structure. + Use discouraged. + """ + + return name + + @staticmethod + def get_re(): + return AnalysisCFood._prefix + full_pattern + + def create_identifiables(self): + # create the project identifiable + name = AnalysisCFood.name_beautifier( + self.match.group("project_identifier")) + self.project = db.Record(name=name) + self.project.add_parent(name=dm.Project) + self.identifiables.append(self.project) + + # create the Analysis identifiable + self.analysis = db.Record() + self.analysis.add_parent(name=dm.Analysis) + self.analysis.add_property(name=dm.date, value=self.match.group("date")) + + self.analysis.add_property(name=dm.Project, value=self.project) + self.identifiables.append(self.analysis) + + if self.match.group("suffix") is not None: + self.analysis.add_property(name=dm.identifier, + value=self.match.group("suffix")) + else: + # TODO empty string causes an error in search + self.analysis.add_property(name=dm.identifier, + value="empty_identifier") + + # parse people and add them to identifiables + # TODO People are currently 'identifiable' due to ther first and last + # names. There will be conflicts + self.people = parse_responsibles(self.header) + self.identifiables.extend(self.people) + + def update_identifiables(self): + assure_has_property(self.analysis, "description", + self.header["description"][0], + to_be_updated=self.to_be_updated) + assure_object_is_in_list(obj=self.people, + containing_object=self.analysis, + property_name=dm.responsible, + to_be_updated=self.to_be_updated, + datatype=db.LIST(db.REFERENCE) + ) + self.reference_included_records(self.analysis, + [RESULTS, SOURCES, SCRIPTS], + to_be_updated=self.to_be_updated + ) + + if SOURCES.key in self.header: + reference_records_corresponding_to_files( + record=self.analysis, + recordtypes=[dm.Experiment, dm.Publication, dm.Simulation, + dm.Analysis], + globs=get_glob(self.header[SOURCES.key]), + property_name=dm.sources, + path=self.crawled_path, + to_be_updated=self.to_be_updated) + + self.reference_files_from_header(record=self.analysis) + + if REVISIONOF.key in self.header: + reference_records_corresponding_to_files( + record=self.analysis, + recordtypes=[dm.Analysis], + property_name=dm.revisionOf, + globs=get_glob(self.header[REVISIONOF.key]), + path=self.crawled_path, + to_be_updated=self.to_be_updated) diff --git a/src/caosadvancedtools/scifolder/experiment_cfood.py b/src/caosadvancedtools/scifolder/experiment_cfood.py new file mode 100644 index 00000000..0eccd18d --- /dev/null +++ b/src/caosadvancedtools/scifolder/experiment_cfood.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2019 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import caosdb as db +from caosadvancedtools.cfood import (AbstractFileCFood, assure_has_description, + assure_has_parent, assure_has_property, + assure_object_is_in_list, get_entity) +from caosadvancedtools.read_md_header import get_header + +from .generic_pattern import full_pattern +from .utils import parse_responsibles, reference_records_corresponding_to_files +from .withreadme import DATAMODEL as dm +from .withreadme import RESULTS, REVISIONOF, SCRIPTS, WithREADME, get_glob + + +class ExperimentCFood(AbstractFileCFood, WithREADME): + + # win_paths can be used to define fields that will contain windows style + # path instead of the default unix ones. Possible fields are: + # ["results", "revisionOf"] + win_paths = [] + + @staticmethod + def name_beautifier(x): return x + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + WithREADME.__init__(self) + + self.name_map = {}, + + @staticmethod + def get_re(): + return ".*/ExperimentalData/"+full_pattern + + def collect_information(self): + self.find_referenced_files([RESULTS]) + + @staticmethod + def create_identifiable_experiment(match): + # create the project identifiable + name = ExperimentCFood.name_beautifier( + match.group("project_identifier")) + project = db.Record(name=name) + project.add_parent(name=dm.Project) + + experiment = db.Record() + experiment.add_parent(name=dm.Experiment) + experiment.add_property( + name=dm.date, value=match.group("date")) + experiment.add_property(name=dm.Project, value=project) + + if match.group("suffix") is None: + experiment.add_property( + name="identifier", value="empty_identifier") + else: + experiment.add_property(name="identifier", + value=match.group("suffix")) + + return [experiment, project] + + def create_identifiables(self): + self.experiment, self.project = ( + ExperimentCFood.create_identifiable_experiment(self.match)) + + self.identifiables.extend([self.experiment, self.project]) + self.people = parse_responsibles(self.header) + self.identifiables.extend(self.people) + + def update_identifiables(self): + # set description + assure_has_property(self.experiment, "description", + self.header["description"][0], + to_be_updated=self.to_be_updated) + + # set responsible people + assure_object_is_in_list(self.people, self.experiment, dm.responsible, + to_be_updated=self.to_be_updated, + datatype=db.LIST(db.REFERENCE)) + + self.reference_files_from_header(record=self.experiment) + + if "revisionOf" in self.header: + reference_records_corresponding_to_files( + record=self.experiment, + recordtypes=[dm.Experiment], + globs=get_glob(self.header[REVISIONOF.key]), + path=self.crawled_path, + property_name=dm.revisionOf, + to_be_updated=self.to_be_updated) diff --git a/src/caosadvancedtools/scifolder/generic_pattern.py b/src/caosadvancedtools/scifolder/generic_pattern.py new file mode 100644 index 00000000..0b5a4df2 --- /dev/null +++ b/src/caosadvancedtools/scifolder/generic_pattern.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2020 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +""" this module contains regular expressions neeeded for the standard file +structure """ + + +project_pattern = (r"(?P<project_identifier>" + r"(?P<project_year>\d{4})_?(?P<project_name>((?!/).)*))/") +date_pattern = r"(?P<date>\d{2,4}[-_]\d{1,2}[-_]\d{1,2})" +date_suffix_pattern = r"(_(?P<suffix>(((?!/).)*)))?/" +readme_pattern = r"(readme.md|README.md|readme.xlsx|README.xlsx)$" + +full_pattern = (project_pattern + date_pattern + date_suffix_pattern + # TODO: Additional level are not allowed according to the + # specification. This should be removed or enabled via a + # configuration + + "(.*)" + + readme_pattern) diff --git a/src/caosadvancedtools/scifolder/publication_cfood.py b/src/caosadvancedtools/scifolder/publication_cfood.py new file mode 100644 index 00000000..fc78e5b7 --- /dev/null +++ b/src/caosadvancedtools/scifolder/publication_cfood.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2019 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import os +from itertools import chain + +import caosdb as db +from caosadvancedtools.cfood import (AbstractFileCFood, + assure_object_is_in_list, fileguide, + get_entity) +from caosadvancedtools.read_md_header import get_header +from caosadvancedtools.utils import find_records_that_reference_ids + +from .generic_pattern import date_suffix_pattern, readme_pattern +from .utils import (get_files_referenced_by_field, parse_responsibles, + reference_records_corresponding_to_files) +from .withreadme import DATAMODEL as dm +from .withreadme import (RESULTS, REVISIONOF, SCRIPTS, SOURCES, WithREADME, + get_glob) + + +def folder_to_type(name): + if name == "Theses": + return "Thesis" + elif name == "Articles": + return "Article" + elif name == "Posters": + return "Poster" + elif name == "Presentations": + return "Presentation" + elif name == "Reports": + return "Report" + else: + raise ValueError() + + +class PublicationCFood(AbstractFileCFood, WithREADME): + # win_paths can be used to define fields that will contain windows style + # path instead of the default unix ones. Possible fields are: + # ["results", "sources", "scripts", "revisionOf"] + win_paths = [] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + WithREADME.__init__(self) + + def collect_information(self): + self.find_referenced_files([RESULTS, SOURCES, SCRIPTS]) + + @staticmethod + def get_re(): + # matches anything but "/", i.e. a folder name + _prefix = ".*/Publications/" + _type = r"(?P<type>Theses|Articles|Posters|Presentations|Reports)/" + _partial_date = r"(?P<date>\d{2,4}([-_]\d{1,2}[-_]\d{1,2})?)" + + return _prefix+_type+_partial_date+date_suffix_pattern+readme_pattern + + def create_identifiables(self): + header = get_header(fileguide.access(self.crawled_path)) + self.publication = db.Record(name=self.match.group("date") + + "_"+self.match.group("suffix")) + self.publication.add_parent(name=folder_to_type( + self.match.group("type"))) + self.identifiables.append(self.publication) + + self.people = parse_responsibles(header) + self.identifiables.extend(self.people) + + def update_identifiables(self): + header = get_header(fileguide.access(self.crawled_path)) + self.publication.description = header["description"][0] + + assure_object_is_in_list(self.people, self.publication, + "responsible", + self.to_be_updated, + datatype=db.LIST(db.REFERENCE)) + + if SOURCES.key in self.header: + reference_records_corresponding_to_files( + record=self.publication, + recordtypes=[dm.Experiment, dm.Publication, dm.Simulation, + dm.Analysis], + globs=get_glob(self.header[SOURCES.key]), + property_name=dm.sources, + path=self.crawled_path, + to_be_updated=self.to_be_updated) + self.reference_files_from_header(record=self.publication) + + if REVISIONOF.key in self.header: + reference_records_corresponding_to_files( + record=self.publication, + recordtypes=[dm.Publication], + property_name=dm.revisionOf, + globs=get_glob(self.header[REVISIONOF.key]), + path=self.crawled_path, + to_be_updated=self.to_be_updated) diff --git a/src/caosadvancedtools/scifolder/simulation_cfood.py b/src/caosadvancedtools/scifolder/simulation_cfood.py new file mode 100644 index 00000000..ae129e6a --- /dev/null +++ b/src/caosadvancedtools/scifolder/simulation_cfood.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2019 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import os +from itertools import chain + +import caosdb as db +from caosadvancedtools.cfood import (AbstractFileCFood, assure_has_parent, + assure_has_property, + assure_object_is_in_list, get_entity) +from caosadvancedtools.read_md_header import get_header + +from .generic_pattern import full_pattern +from .utils import (get_files_referenced_by_field, parse_responsibles, + reference_records_corresponding_to_files) +from .withreadme import DATAMODEL as dm +from .withreadme import (RESULTS, REVISIONOF, SCRIPTS, SOURCES, WithREADME, + get_glob) + + +class SimulationCFood(AbstractFileCFood, WithREADME): + # win_paths can be used to define fields that will contain windows style + # path instead of the default unix ones. Possible fields are: + # ["results", "sources", "scripts", "revisionOf"] + win_paths = [] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + WithREADME.__init__(self) + + def collect_information(self): + self.find_referenced_files([RESULTS, SOURCES, SCRIPTS]) + + @staticmethod + def get_re(): + return ".*/SimulationData/" + full_pattern + + def create_identifiables(self): + # create the project identifiable + self.project = db.Record(name=self.match.group("project_identifier")) + self.project.add_parent(name="Project") + self.identifiables.append(self.project) + + self.simulation = db.Record() + # import IPython + # IPython.embed() + self.simulation.add_parent(name="Simulation") + self.simulation.add_property( + name="date", value=self.match.group("date")) + + self.simulation.add_property(name="Project", value=self.project) + + if self.match.group("suffix") is not None: + self.simulation.add_property( + name="identifier", value=self.match.group("suffix")) + else: + # TODO empty string causes an error in search + self.simulation.add_property(name="identifier", + value="empty_identifier") + self.identifiables.append(self.simulation) + self.people = parse_responsibles(self.header) + self.identifiables.extend(self.people) + + def update_identifiables(self): + assure_has_property(self.simulation, "description", + self.header["description"][0], + to_be_updated=self.to_be_updated) + + # TODO why is here no db.LIST("Person") possible? + + assure_object_is_in_list(self.people, self.simulation, + "responsible", + self.to_be_updated, + datatype=db.LIST(db.REFERENCE)) + + if SOURCES.key in self.header: + reference_records_corresponding_to_files( + record=self.simulation, + recordtypes=["Experiment", "Publication", "Simulation", + "Analysis"], + globs=get_glob(self.header[SOURCES.key]), + property_name=dm.sources, + path=self.crawled_path, + to_be_updated=self.to_be_updated) + self.reference_files_from_header(record=self.simulation) + + if REVISIONOF.key in self.header: + reference_records_corresponding_to_files( + record=self.simulation, + recordtypes=[dm.Software], + property_name=dm.revisionOf, + globs=get_glob(self.header[dm.revisionOf]), + path=self.crawled_path, + to_be_updated=self.to_be_updated) diff --git a/src/caosadvancedtools/scifolder/software_cfood.py b/src/caosadvancedtools/scifolder/software_cfood.py new file mode 100644 index 00000000..77fb4652 --- /dev/null +++ b/src/caosadvancedtools/scifolder/software_cfood.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2019 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2019 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import os +from itertools import chain + +import caosdb as db +from caosadvancedtools.cfood import (AbstractFileCFood, assure_has_parent, + assure_has_property, assure_name_is, + assure_object_is_in_list, get_entity) +from caosadvancedtools.guard import global_guard as guard +from caosadvancedtools.read_md_header import get_header + +from .generic_pattern import full_pattern +from .utils import get_files_referenced_by_field, parse_responsibles +from .withreadme import BINARIES +from .withreadme import DATAMODEL as dm +from .withreadme import SOURCECODE, WithREADME + + +class SoftwareCFood(AbstractFileCFood, WithREADME): + _prefix = ".*/Software/" + # win_paths can be used to define fields that will contain windows style + # path instead of the default unix ones. Possible fields are: + # ["binaries", "sourceCode","revisionOf"] + win_paths = [] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + WithREADME.__init__(self) + + def collect_information(self): + self.find_referenced_files([BINARIES, SOURCECODE]) + + @staticmethod + def get_re(): + + return SoftwareCFood._prefix + full_pattern + + def create_identifiables(self): + # The software is a record type. Let's try to find it. + self.software = db.execute_query( + "FIND RecordType Software with name = {}".format( + self.match.group("project_identifier"))) + + if len(self.software) == 0: + # Software not found insert if allowed + self.software = db.RecordType( + name=self.match.group("project_identifier")) + self.software.add_parent(name="Software") + self.software.add_property(name="alias", + value=self.match.group("project_name")) + guard.safe_insert(self.software) + elif len(self.software) == 1: + self.software = self.software[0] + else: + raise RuntimeError("Cannot identify software record type. Multiple" + "matches for {}".format( + self.match.group("project_identifier"))) + + # create the software version + # identifiable is made from parent and date and suffix + self.softwareversion = db.Record() + self.softwareversion.add_parent(self.software) + self.softwareversion.add_property("date", self.match.group("date")) + + if self.match.group("suffix"): + self.softwareversion.add_property( + "version", self.match.group("suffix")) + + self.identifiables.append(self.softwareversion) + + # parse people and add them to identifiables + # TODO People are currently 'identifiable' with their first and last + # names. There will be conflicts + self.people = parse_responsibles(self.header) + self.identifiables.extend(self.people) + + def update_identifiables(self): + version_name = self.match.group("project_name") + + if self.match.group("suffix"): + version_name += "_"+self.match.group("suffix") + else: + version_name += "_"+self.match.group("date") + + assure_name_is(self.softwareversion, version_name, + to_be_updated=self.to_be_updated) + assure_has_property(self.softwareversion, "description", + self.header["description"][0], + to_be_updated=self.to_be_updated) + assure_object_is_in_list(obj=self.people, + containing_object=self.softwareversion, + property_name="responsible", + to_be_updated=self.to_be_updated, + datatype=db.LIST(db.REFERENCE) + ) + + self.reference_files_from_header(record=self.softwareversion) diff --git a/src/caosadvancedtools/scifolder/utils.py b/src/caosadvancedtools/scifolder/utils.py new file mode 100644 index 00000000..3241764f --- /dev/null +++ b/src/caosadvancedtools/scifolder/utils.py @@ -0,0 +1,204 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2020 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import os +from itertools import chain + +import caosdb as db +import pandas as pd +from caosadvancedtools.cfood import assure_object_is_in_list, fileguide +from caosadvancedtools.utils import (find_records_that_reference_ids, + read_field_as_list, + return_field_or_property, + string_to_person) + + +def parse_responsibles(header): + """ + Extract the responsible person(s) from the yaml header. + + If field responsible is a list every entry from that list will be added as + a person. + Currently only the format <Firstname> <Lastname> <*> is supported. + If it is a simple string, it is added as the only person. + """ + people = [] + + for person in read_field_as_list(header["responsible"]): + people.append(string_to_person(person)) + + return people + + +def get_files_referenced_by_field(globs, prefix="", final_glob=None): + """ + returns all file entities at paths described by given globs + + This function assumes that the supplied globs is a list of + filenames, directories or globs. + + prefix should be the path of the crawled file to supply a context for + relative paths. + """ + referenced_files = [] + globs = [g for g in globs if g is not None] + + for glob in globs: + # TODO extract glob manipulation + + if final_glob is not None and not glob.endswith(final_glob): + glob += final_glob + + if not glob.startswith("/"): + glob = os.path.normpath(os.path.join(prefix, glob)) + else: + glob = os.path.normpath(glob) + + query_string = "FIND file which is stored at {}".format(glob) + + el = db.execute_query(query_string) + + referenced_files.append(el) + + return referenced_files + + +def is_filename_allowed(path, recordtype): + if recordtype.lower() == "experiment": + if "ExperimentalData" in path: + return True + elif recordtype.lower() == "analysis": + if "DataAnalysis" in path: + return True + elif recordtype.lower() == "publication": + if "Publication" in path: + return True + elif recordtype.lower() == "simulation": + if "Simulation" in path: + return True + + return False + + +def get_entity_ids_from_include_file(prefix, file_path): + """reads version ids from include file """ + + if not file_path.startswith("/"): + file_path = os.path.normpath(os.path.join(prefix, file_path)) + else: + file_path = os.path.normpath(file_path) + df = pd.read_csv(fileguide.access(file_path), sep="\t", comment="#") + + if "ID" not in df.columns: + raise ValueError("Include file must have an ID column") + + return list(df.ID) + + +def reference_records_corresponding_to_files(record, recordtypes, globs, path, + to_be_updated, property_name): + # TODO this function needs to be refactored: + # the treatement of keys like 'results' should be separated from searching + # entities (see setting of globs and includes below). + + for recordtype in recordtypes: + + directly_named_files = list(chain(*get_files_referenced_by_field( + globs, + prefix=os.path.dirname(path)))) + + files_in_folders = list(chain(*get_files_referenced_by_field( + globs, + prefix=os.path.dirname(path), + final_glob="**"))) + files = [f for f in directly_named_files + files_in_folders if + is_filename_allowed(f.path, recordtype=recordtype)] + entities = find_records_that_reference_ids( + list(set([ + fi.id for fi in files])), + rt=recordtype) + + if len(entities) == 0: + continue + else: + assure_object_is_in_list(entities, + record, + property_name, + to_be_updated, + datatype=db.LIST(db.REFERENCE)) + + +def create_files_list(df, ftype): + files = [] + + for indx, src in df.loc[ftype, + pd.notnull(df.loc[ftype])].iteritems(): + desc = df.loc[ftype+" description", indx] + + if pd.notnull(desc): + files.append({'file': src, 'description': desc}) + else: + files.append(src) + + return files + + +def add_value_list(header, df, name): + if name in df.index: + header[name] = list(df.loc[name, pd.notnull(df.loc[name])]) + + +def get_xls_header(filepath): + """ + This function reads an xlsx file and creates a dictionary analogue to the + one created by the yaml headers in README.md files read with the get_header + function of caosdb-advancedtools. + As xlsx files lack the hierarchical structure, the information that can be + provided is less complex. See the possibility to use the xlsx files as a + less powerfull version for people who are not comfortable with the + README.md files. + + The xlsx file has a defined set of rows. In each row a list of entries can + be given. This structure is converted to a dictionary with a fix structure. + """ + + header = {} + + df = pd.read_excel(filepath, index_col=0, header=None) + add_value_list(header, df, "responsible") + add_value_list(header, df, "description") + assert len(header["description"]) <= 1 + + for ftype in ["sources", "scripts", "results", "sourceCode", "binaries"]: + if ftype not in df.index: + continue + files = create_files_list(df, ftype) + + if len(files) > 0: + header[ftype] = files + + add_value_list(header, df, "revisionOf") + # there should be only one revision of + + if "revisionOf" in header: + if len(header["revisionOf"]) > 0: + header["revisionOf"] = header["revisionOf"][0] + add_value_list(header, df, "tags") + + return header diff --git a/src/caosadvancedtools/scifolder/withreadme.py b/src/caosadvancedtools/scifolder/withreadme.py new file mode 100644 index 00000000..b3eb1095 --- /dev/null +++ b/src/caosadvancedtools/scifolder/withreadme.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2020 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + + +import logging +import os +from dataclasses import dataclass + +import caosdb as db +from caosadvancedtools.cfood import (assure_has_description, assure_has_parent, + assure_object_is_in_list, fileguide) +from caosadvancedtools.read_md_header import get_header as get_md_header +from caosadvancedtools.table_importer import (win_path_converter, + win_path_list_converter) +from caosadvancedtools.utils import return_field_or_property + +from .utils import (get_entity_ids_from_include_file, + get_files_referenced_by_field, get_xls_header) + +LOGGER = logging.getLogger("withreadme") +LOGGER.setLevel(level=logging.ERROR) + + +@dataclass +class DataModel(object): + results: str = "results" + scripts: str = "scripts" + sources: str = "sources" + date: str = "date" + Project: str = "Project" + Analysis: str = "Analysis" + identifier: str = "identifier" + responsible: str = "responsible" + revisionOf: str = "revisionOf" + Experiment: str = "Experiment" + Publication: str = "Publication" + Simulation: str = "Simulation" + Analysis: str = "Analysis" + revisionOf: str = "revisionOf" + binaries: str = "binaries" + sourcecode: str = "sourceCode" + description: str = "description" + + +DATAMODEL = DataModel() +dm = DATAMODEL + + +class HeaderField(object): + def __init__(self, key, model): + self.key = key + self.model = model + + +RESULTS = HeaderField("results", dm.results) +SCRIPTS = HeaderField("scripts", dm.scripts) +SOURCES = HeaderField("sources", dm.sources) +FILE = HeaderField("file", None) +INCLUDE = HeaderField("include", None) +REVISIONOF = HeaderField("revisionOf", dm.revisionOf) +BINARIES = HeaderField("binaries", dm.binaries) +SOURCECODE = HeaderField("sourceCode", dm.sourcecode) +DESCRIPTION = HeaderField("description", dm.description) +RECORDTYPE = HeaderField("recordtype", None) + + +def get_glob(field): + """ takes a field which must be a list of globs or dicts. + + if it is a dict, it must have either an include or a file key""" + globs = [] + + for value in field: + + if isinstance(value, dict) and INCLUDE.key in value: + continue + + globs.append(return_field_or_property(value, FILE.key)) + + return globs + + +def get_description(value): + if isinstance(value, dict) and DESCRIPTION.key in value: + return value[DESCRIPTION.key] + else: + return None + + +def get_rt(value): + if isinstance(value, dict) and RECORDTYPE.key in value: + return value[RECORDTYPE.key] + else: + return None + + +class WithREADME(object): + def __init__(self): + self._header = None + self.ref_files = {} + + @property + def header(self): + if self._header is None: + if self.crawled_path.lower().endswith(".md"): + self._header = get_md_header( + fileguide.access(self.crawled_path)) + elif self.crawled_path.lower().endswith(".xlsx"): + self._header = get_xls_header( + fileguide.access(self.crawled_path)) + else: + raise RuntimeError("Readme format not recognized.") + self.convert_win_paths() + + return self._header + + def find_referenced_files(self, fields): + """ iterates over given fields in the header and searches for files + + if the field contains a glob. The file entities are attached""" + + for field in fields: + + if field.key not in self.header: + continue + + globs = get_glob(self.header[field.key]) + files = get_files_referenced_by_field( + globs, prefix=os.path.dirname(self.crawled_path)) + + description = [get_description(val) for val in + self.header[field.key]] + recordtype = [get_rt(val) for val in self.header[field.key]] + self.ref_files[field.model] = [ + (f, d, r) for f, d, r in zip(files, description, recordtype)] + # flatten returned list of file lists + flat_list = [f.path for sublist in files + for f in sublist] + + if len(flat_list) == 0: + LOGGER.warn("ATTENTION: the field {} does not reference any " + "known files".format(field.key)) + + self.attached_filenames.extend(flat_list) + + def convert_path(self, el): + """ converts the path in el to unix type + + el can be a dict of a string. If el is dict it must have a file key + + returns: same type as el + """ + + if isinstance(el, dict): + if INCLUDE.key in el: + el[INCLUDE.key] = win_path_converter(el[INCLUDE.key]) + + return el + + if FILE.key not in el: + raise ValueError("field should have a 'file' attribute") + el[FILE.key] = win_path_converter(el[FILE.key]) + + return el + else: + return win_path_converter(el) + + def convert_win_paths(self): + for field in self.win_paths: + if field in self.header: + + if isinstance(self.header[field], list): + self.header[field] = [ + self.convert_path(el) for el in self.header[field]] + else: + self.header[field] = self.convert_path(self.header[field]) + + def reference_files_from_header(self, record): + """adds properties that reference the files collected in ref_files + + ref_files is expected to be a list of (files, description, recordtype) + tuples, where files is the list of file entities, description the description + that shall be added to each and recordtype the recordtype that the + files shall get as parent. files may be an empty list and description + and recordtype may be None. + + The files will be grouped according to the keys used in ref_files and + the record types. The record types take precedence. + """ + references = {} + + for prop_name, ref_tuple in self.ref_files.items(): + generic_references = [] + + for files, description, recordtype in ref_tuple: + if len(files) == 0: + continue + + if description is not None: + for fi in files: + assure_has_description(fi, description, force=True) + + if recordtype is None: + generic_references.extend(files) + else: + for fi in files: + # fix parent + assure_has_parent(fi, recordtype, force=True, + unique=False) + + if recordtype not in references: + references[recordtype] = [] + references[recordtype].extend(files) + + if len(generic_references) > 0: + assure_object_is_in_list( + generic_references, + record, + prop_name, + to_be_updated=self.to_be_updated, + datatype=db.LIST(db.REFERENCE), + ) + + for ref_type in references.keys(): + assure_object_is_in_list( + references[ref_type], + record, + ref_type, + to_be_updated=self.to_be_updated, + ) + + def reference_included_records(self, record, fields, to_be_updated): + """ iterates over given fields in the header and searches for files + + if the field contains a glob. The file entities are attached""" + + for field in fields: + + if field.key not in self.header: + continue + included = [] + + for item in self.header[field.key]: + if INCLUDE.key in item: + included.extend( + get_entity_ids_from_include_file( + os.path.dirname(self.crawled_path), + item[INCLUDE.key])) + + assure_object_is_in_list(included, + record, + field.model, + to_be_updated, + datatype=db.LIST(db.REFERENCE)) diff --git a/src/doc/crawler.rst b/src/doc/crawler.rst index 2380cdbd..92a624bb 100644 --- a/src/doc/crawler.rst +++ b/src/doc/crawler.rst @@ -36,9 +36,8 @@ different components of the CaosDB Crawler can be found in the `developers’ information <#extending-the-crawlers>`__ below. In case you are happy with our suggestion of a standard crawler, feel -free to use the standard crawler. The standard crawler lives in this git -repository maintained by Henrik tom Wörden: -https://gitlab.com/henrik_indiscale/scifolder +free to use the standard crawler. The standard crawler lives in the submodule +`caosadvancedtools.scifolder` Usage ===== diff --git a/unittests/data/DataAnalysis/2010_TestProject/2019-02-03_something/README.md b/unittests/data/DataAnalysis/2010_TestProject/2019-02-03_something/README.md new file mode 100644 index 00000000..71454e89 --- /dev/null +++ b/unittests/data/DataAnalysis/2010_TestProject/2019-02-03_something/README.md @@ -0,0 +1,15 @@ +--- +responsible: +- Only Responsible +description: A description of another example analysis. + +sources: +- file: "/ExperimentalData/2010_TestProject/2019-02-03/*.dat" + description: an example reference to a results file + +scripts: +- file: plot.py + description: a plotting script +results: +- file: results.pdf +... diff --git a/unittests/data/ExperimentalData/2010_TestProject/2019-02-03_something/README.md b/unittests/data/ExperimentalData/2010_TestProject/2019-02-03_something/README.md new file mode 100644 index 00000000..b7e5051c --- /dev/null +++ b/unittests/data/ExperimentalData/2010_TestProject/2019-02-03_something/README.md @@ -0,0 +1,9 @@ +--- +responsible: +- Only Responsible +description: A description of another example experiment. + +results: +- file: "/ExperimentalData/2010_TestProject/2019-02-03/*.dat" + description: an example reference to a results file +... diff --git a/unittests/data/Publications/Posters/2019-02-03_something/README.md b/unittests/data/Publications/Posters/2019-02-03_something/README.md new file mode 100644 index 00000000..c95e37ec --- /dev/null +++ b/unittests/data/Publications/Posters/2019-02-03_something/README.md @@ -0,0 +1,11 @@ +--- +responsible: +- Only Responsible +description: A description of another example experiment. + +sources: +- /DataAnalysis/2010_TestProject/2019-02-03/results.pdf + +results: +- "*.pdf" +... diff --git a/unittests/data/README.md b/unittests/data/README.md new file mode 100644 index 00000000..a2e0ce6e --- /dev/null +++ b/unittests/data/README.md @@ -0,0 +1,14 @@ +--- +responsible: Ana Lytic +description: An examplary analysis of very exciting research. The analysis was conducted following state of the art best practices of scientific methodology. +sources: + - /ExperimentalData/2010_TestProject/2019-02-03_something/ + - file: /ExperimentalData/2010_TestProject/2019-02-03_something/ + description: An example reference to an experiment. The experimental data was analysed with statistical methods using proper error calculations. +scripts: + - file: scripts + description: all the files needed to run the analysis +results: + - file: results.pdf + description: a plot of the statistical analysis +... diff --git a/unittests/data/README.xlsx b/unittests/data/README.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..a909347789edc1d5a1bbaacd998744cee83d5f6b GIT binary patch literal 5338 zcmaJ_1yq#X)*e8IhEYHoksf+LYG@FUl5Xji7#!Lml$I`$ltw{P8X2T}DCtH)r8|`R zhwr=V%Ju&D+q2fpylcICpLO<r&U2o<)s?YskOFXUZ~)Bu>KcG+hL8Sl3b%H2<>9{i zE{Pvh!h(Q8w|&E75uP<<53DMYVMA#TeV-@yUziJ}@KU<_A_8#?3-hsK6#e#5Lb9oI zw`~r?(-p$yVMT@<k<n)qh}=w~<8^AJgC2@;Kv#*0*s`~1wB7N5KHeGQ12ymDV~tjQ z6X+MSe46@Dt2rw~kdv_~)}_^tqp-rGdAs@3DBaNGpx5Pv>++$V%O;reBEw(wa$>oT z+2ncn8*ix%KG4$1qI&evk;n5V;t>wpxKYJo*wc(U?aIjHPwry%q3{ezlJ7yK7p@|8 zIPx(Ujq$3heRgBb!>o3a0Ci<-9Gjwm+C}uXMKJ;B=l_)$V)Pqc=rv$)2MZX?f!oW; zF-oh)d4?B+Jm$DD@5E*DggOaJFFF=52YTC+5-)$pA)K^(H!0_G{+4w<cv_ZNPy~OW zF~!?(KHXuJwnaZ%r@z&_Sxmj2J7~INW5yDi3>jq5yZPEsx}V!@3bi)3mxq%0{A_y| zVQr~<PY{2h*88SFt?b<MoQH*~s5q6%eJ9K3<TaShhQBmGD(9*A9nhfQpLYq2lNdsy z28adNHlzq1Mz_!F=2SRVa|Q>!Exi{bgt9du*$nDS|M^~T+BE9*u8+T+W^c4XPyWp( zRU>bCraZgjpkFlk$`)~!#3+uz{!(^%D(tcY<GW0YR{mW^pYP4vGn!P%aM#ukYjzT# zmJFl9a=I_crfLG8EvIZ|e7DWU^9`pg`LXxS4?bb5VJ3&49&Ph=-V@tG&%EDeNz8(7 zZzG#lyPpL^oFlm6N4PNQqYRoCYt?rImOrg_Zx&06uK?FfLb!CG+xSHrZULlFJPtm^ zNR*6v^DKFs?1uTfx%6*5Ac7UvUs+)?lW;zDzmc2L#kiYP{>B4i^UohVVhWW8)s(w* z#@w0n)`=f=yHr-yv;soUg~5<*vj_ctjhXVFtpuZh!%!N+)*TPN6J9Dc;Hp#V$YQ)I zpPGMsG0j?lpQ6G-PBv^Aqjxh{7qP`u<B{moyLLg#|4FZXjr<BT5nlY8#~1*BGuA(0 zhVXC9xVm{eTDx8$W>eo7mc~o!yHHks)+D2uT0@e(2DP5<(=I90wx15f0v7+mqI^qu ze)zF7NNUFIiJ((qXTs(>BAU2MkRsHUqh*+Oj9CjFu7E8euieRR&X@LO{pZFxu6+JI zzI%@qp&evgy9Ni`oT?qGI*$sao9X+&15scJFB8h|+Od05WZE)nMOE~`ROo)Mj8=T& z9i}>=qw|-nI0?ddE1La5V2$FWk>`<uL63KtUo0{;cMluCyOHNp5dWmLf;`wn78G0< zPyL2*15-m^B1^ROMSJaPJkHwm_f(OPM4V0za+--mqB^YE1^$y(cM4=)4a}Bft<|t! z7{*0sNgmc3Kp|V5E{!E{=&jQ%rl>C*T`+Je=TV?2Ky}w0;EwT?JMocOwg>r3{Oq7O z6~)%F2=F6Z9<Z})<|y(pc5dwFQ9y-_fwOWh-e5dM?hWm63ehS{XAWK;6_HOL-e0iH zi6M#e-XE;8zjDVCOc<Njrq%jPFW2%u<pa}UdtGxEG@l&$oiiaoLaA}IcW81nt%gY9 zo&jslWBVh(s0U`HO~RzOv%aJ3mN$gY&AJIJOM`FMFp-xmTP_gSlSfDKtM@U#x2PWf zxJRI>90%4@vV{>VE<RgaDbbX9cHia1PrLS2@71CPy5!y4jNgv;EpCQU5?j=SO?krz zEr5`~Wa)l-8*)g?l7uffaM+{}_ecrll{uPzVF@ugKG`DL&v5lAUx)a;lF*qS+fN%= zm`-W3HQIsBT+9X$mx_O%PWsfPmj;wX@j$<I{zQZz;hq;fA>JsCQrD-$O<OE=ByvZ1 zcHy6<(*)*s9V@J6c~=eRwVH!(2NdrsD2O*N=>mRjUD`<}(An*A+7$}CdOBtijYwW( zR)G~K&~2TF%rcKb`lazjHl=4@jdrToIdV(qW?rFvlY*<i63vc^WdDSAir)<BYG-Zj z=F0Qe59A8?9eT#DX~JmWo3`u>y)A*%>?(xMDW)HK*p3L3yq!t$O^~Kg4SK+A-u3ek z!HmJO=)o`gG3u^kT|+uztG=72HAaon{Yd{|>(uNa!gr#D^9#iMWBuuh`Jvw$RP){) z#$L)d9~C&~Vj@givB+ciI`l;V&C9c@uX=gJ)a1bfu^YQI{Q8P@-LU~$75i4)yx}N0 z%0uA!#bh7Iyi~3ZRMxVnD8t}`%4KL%F#x>)Yf70GFuf+5);eQv)X&1IGTgOSZ>-@E zcq6&CXw7u{@!o?&hBvQ98;l1Ury?XOe+abmG0{bwi37e5%_gbmM2F;l#34M%aJel* zb|=%Dz*Q0SieemI)ka<1v}|K6z9WjG-8av0&>KN+t|BEP?xC<~wPo^)ysvP87uWx` zPi&N>Lcv)%rr%Au4*~ow3pZcFaN0mRIc$N)XXVQWBsdtJx~@w+1mQXu(jH*m#+H{w z_sFK5c{}1-pYAufXS0z`(NuE16_AP~(N@m~ONnq7ozPJX8W*hO0eW1GEnb4hoEjeG zxn2U!cDvo*hztte%At>6*cKXqp;k8ECG7)gz0Iw_dWSZX@QZh{RJCQHRt-aY-yXu! z<Ac}f!?r4{T?L%p%qY|TL=h}Md6xPb=uD@VYmg!els9I_VE19JP<rut0YBu$-AHWd z5)zdalJV5Gy)5kfOmixoywOafY}gK+R<}@4D&(En>And_KZw5Q{(_-NIJ^*BKR`ZM zTZ|)R>Q+8eG(^;Be6)Xvq&	wZBYTfu8|1(@K!l5~us)6PE~=GSOQ6dBgJLHcCZt zP|)F$HC84<eJ7BSK-_3@O>+XOIL}~S>CcO$_31O6TS?z)nn7xjOGVHRzhPn|l11O< z@l>QAV_O-&n3=KG-Q;%jk1sT#!gwsmUCEu=p0c+v2O_FvAlEN}_fSi(H-4cfm&Ir0 zxo7X>d?z&|yizJQY$6ARgq{UXMM5i6s;Fq(ZTOu8PI4p334xN{&!laiRqDD*+X`0h zk9NU$)1KN2`XLr#L`O~+rcM$vi_i0Nkqh&O30s-to;BwOgrk`rW3cUWd%g3lvF8(B z&z1vts|TW9x@)?qnu)1#J9xtNH*tiK4F=rZ{I+<*%)*wORD&|*$Kem=ZVpa&3_jez zAuZ?(Jwzm|GO?kiR!KJ^yv1>V3BRz6o$E1~2TSrg&oThF2Xwy9zAt$a7bx3XF&S&? zH9a6zA407h7OR9Pone@<I1Li0srewXV!!F|SfQ;nxR7=q3-J(Q6o|=dE-eMvFXHud z<xZ5kaW<Kh^J6!@!&EEh$=i+Q2<f(0@{0pWVxPBZG^v$=6*6CR(u@(L6Cy8Wq=Yn4 zi=;2T9+gkwW(DgwSl@?R#53`-77$##)Fd<M+J&8d-fYD2rZFl*dWC*CYa8+TbgJue zxP<Yz$<4W?WnrJ7!gPC=h_kUgf@Sb-a%SI<Xp7wg<!=l3o~Y8e`q-K&7Y{ic;l4dI zTK)pNA}yILJ8jqv0N^FjKam#I-=y`o<h?HL{+7RSst#?u1R>k!Wb(%yirAKs!pKPJ zvhx#+PK_@^?Sk>+)t7$U)JAyL8$$_06Z=gM57+KTt%bGA5&(;GZUnb<c;FykoNsL| zQR<>{<`?alW6JN|e&0wk`J7yl$FOo=*nr!jHCEI(uSe-o;Y3eE;HCyo-=5;<bPN=k zR*4Z-l{1I>N7vTNa?1%qBGX(%CT}+(E(%T;$3LtYW3kA=!-sv~^m9dgMUsJblCife zFjG^|6L-wihHm>AJ`8uJMBQ)WjA?(bHbt(aY<|uM^C~lHJ3pU|p!d!EkAj!<U*g5i z9Byr;<p#HRw*5=PRBNj`%wDNpJ%XmPl|0b&qC4BU@ZnI~5e=z*qi!6t4*x*urLV$K z2oSHsi0y<v=FLK*w;KG^g-%?|l7+mjsi80pk2l%QSa|FGb|}wBPcqQDS#=D?@mQp` zxEsun0r>O&P!m=1XCzv<(Flw|%wsbozG+l8e#^~C)G(p~c^|CR*l&%#Gk(|H4lb{H z#iHdR$$I7mp4Bl{E_Ha#a-3KdH{U9goLRAE6=y+zyj-C(TU}&!8E^@@?~^f=qG}gm z^+B04$<Yd1Y#rjAVLK#!nnEMJ5m8p@NZDn<O4>MJ^KOZE5jLxsa01$}qTo6hKJ{XE zd74pC?jmg)!b&=*H7@_HIXE-2t+~__D8GDf`p4I&r9v+Ygsrouf^;$maVx3wQ;st4 zJM`=nc#DN)!9*^k+VXe&CT6!%Yhy?wC@7_!7|I-~B`zy_tPM}T+i84RhxE-!EwZgD zp37}fu{FgL>mKm*ltPrtS-aZe3mJRV=p8I>ki4qWN<t)HxJFZ2&8V1=zp8?utc#rN zP0IBU*+|m?i|s~XN>E88v&k~J*!BRPBNW$Tu}6KgY!W(kLAAF2J#d-v(`?;@#<@sJ z{H5tJ{uLhPAjc~MXfZ2(^Pf)2Ei@jiV3z7|n2RfqB@Ax;R|}-6q1MU^@;lb!^j&Dc zWDvAu3KvtfV37Z6<P@VZ*HFMkaDklb1hNye_|}~^l`YI2ruHAn(QP#A8x(;VG$Jw8 z8%Z^-wGKWQ29c2JcO{y67n+3=nGJIymFfx_@9$VC5q2t;zYx+)f{V*hf4|=`xkpIa zFK?19R?n^4lCGv2BJkCe6H6@YD}=il(n8Z#ZT=3S;8sQ2SUYNzAOsAs?9K+PaI)NL zVJ*SZnv5W)`le#=&~wN|V>%A`3}($!T*O`!5;c0PwllN0TTdyFSmeh}#HJb@rSAM( z1oT2E%c(xvn|6Ls4?mIUi!{w`e|ZkR><JPmRU_-k#QAi2lybuO26(pu&=)m2uwBmV z50Ds4>8-hF(ZX0d%Z@uxNA{*0)E+3CDuX<+x1VCHL)oJz{pG33aI$v^nQLts-GQps zSfU1(NViea&Cd-EAJ0#2Y)!}gYVUf~Kx0^ke|gq*Kz&sv7M1*NJ9L?5{_)s?u9wN& z#pS9>8L_A*ZM>kMZC^Y_cy)gAVk$jHRpJ}W%@!^srH&yh`>!@b?C+cJYhssYXNW3& zKJxq23LB-HDY2YH7jwwt?XcT@C8qUW&+JPg#m%1>0TR=S$TEmN?J|7}GMB+B?7oo+ zg+GWiQc8dZBoy}63=UARutNG)rP_q4ZmLP6h{Q%xD?(ZW76pFrnLJ;PsS{uF&xXFs zyc|LJN2joQ5px^9q!&g;`%2j(zDkW!kK26D`6(MY&i1mNv_dG8r18uo`<U$gxDIlf zT9T(tVTJ%zP-j2^59d*TNNFTK(7E>E+rgdA)oS5g!mCW3etm7VfzE?3I$Qs{qbEVX zVI&K4cC&VNGu8HXv351SN)}RER1KZ1`BM&tGB%IC=OCr&7hO27*!cN&nITbkKg^9> zAO^tNI=JAlu$~XwO|R8F1ZUo{7zFN*{76^%k<65Z&zM&V->a%q=IL>S_TDkVmuNBB zbkz(sE2G$#BMe78;j}AG>;%)fth3vAS1gm2@9&-jkjY~Y-6M<(RH>w9`&oqBvk8px zY`|gdIV|N-S_v|m=?=i=tkk#85XO$$#we{8hQIa`&JA-!_;8g-o|?3*tv!?~O6Gjy z$!NmmCOeI3LATS8K^og%kAM8w919VxNI@&C_Ijki9M;KY8fh?c;jpdFZw<x=73hP5 z)M2zPOAgPosHRJ`pBAe9I4yT-CN7zezo7TV-zhd`!AhIl&)lh?K3<}^us>+}M$~TU za?*sI*?iA4lem2_qk_&Ki2X@uH$^l@E8@Xoz{cHklUs$C=~Q2g$myG&#PE&#C4Tn2 zs!Yt$il70yDtdoW4F)DD;P)8n`W)tJ4E3LOZ78L#{GX23oAIkL&2OVbFZIXJ=1=GA zz0Fm>{@Ya1y*=9bclZ9M>ve^3)%^T6QQ*JV{jXN&&nVY#<101t+jj8&it<0S$)9nq zv-s8B{oC*e{@%!+H~F6tt`p1^+y6Eyw1)aG?*G&M`l-8Oq2JbmuFAjN|HVmvdS6Gz h)ye#A>*!$r&pB0BzKPB<0Dz1BNTMk*`1aM){{g;w5Pbju literal 0 HcmV?d00001 diff --git a/unittests/data/SimulationData/2010_TestProject/2019-02-03_something/README.md b/unittests/data/SimulationData/2010_TestProject/2019-02-03_something/README.md new file mode 100644 index 00000000..fba1bd48 --- /dev/null +++ b/unittests/data/SimulationData/2010_TestProject/2019-02-03_something/README.md @@ -0,0 +1,12 @@ +--- +responsible: +- Only Responsible +description: A description of another example experiment. + +results: +- file: "*.dat" + description: an example reference to a results file + +scripts: +- sim.py +... diff --git a/unittests/test_cfoods.py b/unittests/test_cfoods.py new file mode 100644 index 00000000..87e6d6d2 --- /dev/null +++ b/unittests/test_cfoods.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2019 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +import os +import unittest + +from caosadvancedtools.scifolder import (AnalysisCFood, ExperimentCFood, + PublicationCFood, SimulationCFood) + +data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + "data") + + +class CFoodTest(unittest.TestCase): + def test_analysis(self): + self.assertFalse(AnalysisCFood.match_item("nopath")) + path = (data_path+"/DataAnalysis/2010_TestProject/" + "2019-02-03_something/README.md") + self.assertTrue(AnalysisCFood.match_item(path)) + AnalysisCFood(path) + + def test_experiment(self): + self.assertFalse(ExperimentCFood.match_item("nopath")) + path = (data_path+"/ExperimentalData/2010_TestProject/" + "2019-02-03_something/README.md") + self.assertTrue(ExperimentCFood.match_item(path)) + ExperimentCFood(path) + + def test_publication(self): + self.assertFalse(PublicationCFood.match_item("nopath")) + path = data_path+"/Publications/Posters/2019-02-03_something/README.md" + self.assertTrue(PublicationCFood.match_item(path)) + PublicationCFood(path) + + def test_simulation(self): + self.assertFalse(SimulationCFood.match_item("nopath")) + path = (data_path + "/SimulationData/2010_TestProject/" + "2019-02-03_something/README.md") + self.assertTrue(SimulationCFood.match_item(path)) + SimulationCFood(path) diff --git a/unittests/test_scifolder_utils.py b/unittests/test_scifolder_utils.py new file mode 100644 index 00000000..30e211d9 --- /dev/null +++ b/unittests/test_scifolder_utils.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2020 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import os +import unittest + +from caosadvancedtools.scifolder.utils import get_xls_header + + +class XLSTest(unittest.TestCase): + def test_read(self): + filename = os.path.join(os.path.dirname(__file__), + "data/README.xlsx") + assert os.path.exists(filename) + + header = get_xls_header(filename) + assert header is not None + assert isinstance(header, dict) + + # responsible + assert header['responsible'] == ["Ana Lytic"] + + # description + assert len(header['description']) == 1 + assert isinstance(header['description'][0], str) + assert len(header['description'][0]) > 20 + assert "exciting" in header['description'][0] + + # sources + assert isinstance(header['sources'], list) + + for el in header['sources']: + assert isinstance(el, dict) + assert "TestProject" in el["file"] + assert "example" in el["description"] + + # scripts + assert isinstance(header['scripts'], list) + + for el in header['scripts']: + assert isinstance(el, dict) + assert "scripts" == el["file"] + assert "all the files" in el["description"] + + # results + assert isinstance(header['results'], list) + + for el in header['results']: + assert isinstance(el, dict) + assert "result.pdf" == el["file"] + assert "plot" in el["description"] -- GitLab