From 7ce500305d34f57b9088f48bdacc1beb3098c39f Mon Sep 17 00:00:00 2001 From: Florian Spreckelsen <florian.spreckelsen@gmx.net> Date: Fri, 19 Jun 2020 08:00:32 +0000 Subject: [PATCH] DEV: Add class for the collection of datamodel problems To be used while crawling and to collect missing proerties and/or record types from possible errors while crawling. --- CHANGELOG.md | 6 + integrationtests/full_test/test.sh | 7 +- .../test_crawl_with_datamodel_problems.py | 92 ++++++++++++++ .../single_tests/test_datamodel_problems.py | 113 ++++++++++++++++++ src/caosadvancedtools/cfood.py | 73 ++++++++++- src/caosadvancedtools/crawler.py | 19 ++- src/caosadvancedtools/datamodel_problems.py | 89 ++++++++++++++ unittests/test_cfood.py | 2 +- 8 files changed, 393 insertions(+), 8 deletions(-) create mode 100644 integrationtests/full_test/test_crawl_with_datamodel_problems.py create mode 100644 integrationtests/single_tests/test_datamodel_problems.py create mode 100644 src/caosadvancedtools/datamodel_problems.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 891c5ecf..a9bee83f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,8 +8,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### +- New class to collect possible problems whith the data model + ### Changed ### +- instead of `get_entity`, type-specific functions are used in + `cfood.py` when the type of the entity in question is known. +- Logger is used instead of `print` for errors in `crawler.py`. + ### Deprecated ### ### Removed ### diff --git a/integrationtests/full_test/test.sh b/integrationtests/full_test/test.sh index 2f7bc3d8..ccaaad5e 100755 --- a/integrationtests/full_test/test.sh +++ b/integrationtests/full_test/test.sh @@ -36,4 +36,9 @@ python3 test_table.py echo "Testing im and export" python3 test_im_und_export.py -echo "/n/n/n YOU NEED TO RESTART THE SERVER TO REDO TESTS!!!" +# Test correct display of data model errors: +echo "Testing recognition of data model problems ... " +python3 -m pytest test_crawl_with_datamodel_problems.py + +# Obsolete due to teardown in the above test. +# echo "/n/n/n YOU NEED TO RESTART THE SERVER TO REDO TESTS!!!" diff --git a/integrationtests/full_test/test_crawl_with_datamodel_problems.py b/integrationtests/full_test/test_crawl_with_datamodel_problems.py new file mode 100644 index 00000000..daf794b5 --- /dev/null +++ b/integrationtests/full_test/test_crawl_with_datamodel_problems.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (c) 2020 IndiScale GmbH <info@indiscale.com> +# Copyright (c) 2020 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +"""Test whether the crawler correctly identifies the data model +problems caused by a faulty model. + +""" +import caosdb as db + +from caosadvancedtools import loadFiles +from caosadvancedtools.cfood import fileguide +from caosadvancedtools.crawler import FileCrawler +from caosadvancedtools.datamodel_problems import DataModelProblems +from caosadvancedtools.guard import INSERT +from scifolder import (AnalysisCFood, ExperimentCFood, ProjectCFood, + PublicationCFood, SimulationCFood) + +from caosmodels.parser import parse_model_from_yaml + + +def setup_module(): + """Clear problems and remove all entities except for built-in ones.""" + DataModelProblems.missing.clear() + try: + db.execute_query("FIND entity WITH ID > 100").delete() + except Exception as delete_exc: + print(delete_exc) + + +def setup(): + """No further setup required.""" + setup_module() + + +def teardown(): + """Delete and clear again.""" + setup_module() + + +def test_crawler_with_data_model_problems(): + """Test whether data model problems are found correctly.""" + # load the files + basepath = "/opt/caosdb/mnt/extroot/" + pathlist = [basepath+dirname for dirname in + ["ExperimentalData", "DataAnalysis", + "SimulationData", "Publications"]] + for path in pathlist: + loadFiles.loadpath(path=path, include="", exclude="", + prefix="", dryrun=False, forceAllowSymlinks=False) + + # load and damage the model + model = parse_model_from_yaml("model.yml") + model.sync_data_model(noquestion=True) + deleted_entities = {"Experiment", "Poster", "results"} + for ent in deleted_entities: + db.execute_query("FIND "+ent).delete() + + # Do the crawling + def access(x): return "extroot" + x + fileguide.access = access + crawl_path = '/' + files = FileCrawler.query_files(crawl_path) + c = FileCrawler(files=files, use_cache=False, + abort_on_exception=False, interactive=False, + hideKnown=False, cfood_types=[ProjectCFood, + ExperimentCFood, AnalysisCFood, PublicationCFood, + SimulationCFood, ]) + c.crawl(security_level=INSERT, path=crawl_path) + + # There should be datamodel problems + assert len(DataModelProblems.missing) > 0 + # Deleted entities should have been identified: + assert deleted_entities.issubset(DataModelProblems.missing) diff --git a/integrationtests/single_tests/test_datamodel_problems.py b/integrationtests/single_tests/test_datamodel_problems.py new file mode 100644 index 00000000..93a403d8 --- /dev/null +++ b/integrationtests/single_tests/test_datamodel_problems.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) Indiscale, GmbH <info@indiscale.com> +# Copyright (C) 2020 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +"""Test the error handling in datamodel_problems.py with simple +insertions. A test using a full-grown crawler finding datamodel errors +during crawling that tests the integrations of the DataModelProblems +class in crawler.py and cfood.py can be found in full-tests. + +""" +import caosdb as db +import pytest +from caosadvancedtools.datamodel_problems import DataModelProblems +from caosdb.exceptions import (UnqualifiedParentsError, + UnqualifiedPropertiesError) + + +def setup_module(): + """Clear problem sets and delete possible test entities""" + DataModelProblems.missing.clear() + try: + db.execute_query("FIND Test*").delete() + except Exception as delete_exc: + print(delete_exc) + + +def setup(): + """No further setup""" + setup_module() + + +def teardown(): + """Clear and delete again.""" + setup_module() + + +def _insert_and_evaluate_exception(ent): + try: + ent.insert() + except Exception as e: + DataModelProblems.evaluate_exception(e) + + +def test_missing_parent(): + """Test if missing RecordType is in datamodel problems.""" + missing_name = "TestType" + rec = db.Record(name="TestRecord") + rec.add_parent(name=missing_name) + with pytest.raises(UnqualifiedParentsError): + _insert_and_evaluate_exception(rec) + assert missing_name in DataModelProblems.missing + + +def test_missing_property(): + """Test if missing Property is in datamodel problems.""" + missing_name = "TestProp" + rec = db.Record(name="TestRecord").add_property(name=missing_name) + with pytest.raises(UnqualifiedPropertiesError): + _insert_and_evaluate_exception(rec) + assert missing_name in DataModelProblems.missing + + +def test_missing_property_existing_type(): + """Test if missing Property is in datamodel problems but existing + RecordType is not. + + """ + missing_prop = "TestProp" + existing_rt = "TestType" + db.RecordType(name=existing_rt).insert() + rec = db.Record(name="TestRecord").add_parent(name=existing_rt) + rec.add_property(name=missing_prop) + with pytest.raises(UnqualifiedPropertiesError): + _insert_and_evaluate_exception(rec) + assert missing_prop in DataModelProblems.missing + assert existing_rt not in DataModelProblems.missing + + +def test_wrong_property_value(): + """An error due to a wrong value (type) is no data model problem per + se + + """ + rt_name = "TestType" + rt = db.RecordType(name=rt_name).insert() + prop_name = "TestProp" + prop_dtype = db.DOUBLE + prop = db.Property(name=prop_name, datatype=prop_dtype).insert() + rec = db.Record(name="TestRecord").add_parent( + name=rt_name).add_property(name=prop_name, value="bla") + with pytest.raises(UnqualifiedPropertiesError): + _insert_and_evaluate_exception(rec) + # Should be empty + assert not DataModelProblems.missing diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py index 47852fdf..e3ac55fb 100644 --- a/src/caosadvancedtools/cfood.py +++ b/src/caosadvancedtools/cfood.py @@ -6,8 +6,8 @@ # # Copyright (C) 2018 Research Group Biomedical Physics, # Max-Planck-Institute for Dynamics and Self-Organization Göttingen -# Copyright (C) 2019 Henrik tom Wörden -# Copyright (C) 2020 Henrik tom Wörden +# Copyright (C) 2019,2020 Henrik tom Wörden +# Copyright (C) 2020 Florian Spreckelsen <f.spreckelsen@indiscale.com> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -44,9 +44,13 @@ from abc import ABCMeta, abstractmethod import caosdb as db from caosdb.exceptions import EntityDoesNotExistError +from .datamodel_problems import DataModelProblems from .guard import global_guard as guard ENTITIES = {} +PROPERTIES = {} +RECORDS = {} +RECORDTYPES = {} logger = logging.getLogger(__name__) @@ -65,6 +69,61 @@ def get_entity(name): return ENTITIES[name] +def get_property(name): + """Returns the record type with a given name, preferably from a local + cache. + + If the local cache does not contain the record type, try to + retrieve it from CaosDB. If it does not exist, add it to the data + model problems + + """ + if name not in PROPERTIES: + try: + prop = db.execute_query("FIND Property "+name, + unique=True) + except EntityDoesNotExistError as e: + # Property might actually be a RecordTypes + prop = get_recordtype(name) + PROPERTIES[name] = prop + + return PROPERTIES[name] + + +def get_record(name): + """Returns the record with a given name, preferably from a local cache. + + If the local cache does not contain the record, try to retrieve it + from CaosDB. + + """ + if name not in RECORDS: + rec = db.execute_query("FIND Record "+name, unique=True) + RECORDS[name] = rec + + return RECORDS[name] + + +def get_recordtype(name): + """Returns the record type with a given name, preferably from a local + cache. + + If the local cache does not contain the record type, try to + retrieve it from CaosDB. If it does not exist, add it to the data + model problems + + """ + if name not in RECORDTYPES: + try: + rec = db.execute_query("FIND RecordType "+name, unique=True) + except EntityDoesNotExistError as e: + DataModelProblems.add(name) + raise e + RECORDTYPES[name] = rec + + return RECORDTYPES[name] + + class FileGuide(object): def access(self, path): """ should be replaced by a function that adds @@ -465,7 +524,15 @@ def assure_has_property(entity, name, value, to_be_updated=None, def insert_id_based_on_name(entity): if entity.name is not None and (entity.id is None or entity.id < 0): - entity.id = get_entity(entity.name).id + if isinstance(entity, db.Property): + entity.id = get_property(entity.name).id + elif isinstance(entity, db.Record): + entity.id = get_record(entity.name).id + elif isinstance(entity, db.RecordType): + entity.id = get_recordtype(entity.name).id + else: + # In case the type of the entity isn't specified + entity.id = get_entity(entity.name).id def get_ids_for_entities_with_names(entities): diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 496688af..74153614 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -6,7 +6,9 @@ # # Copyright (C) 2018 Research Group Biomedical Physics, # Max-Planck-Institute for Dynamics and Self-Organization Göttingen +# Copyright (C) 2020 Indiscale GmbH <info@indiscale.com> # Copyright (C) 2020 Henrik tom Wörden +# Copyright (C) 2020 Florian Spreckelsen <f.spreckelsen@indiscale.com> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -49,6 +51,7 @@ from caosdb.exceptions import TransactionError from .cache import Cache, UpdateCache, get_pretty_xml from .cfood import RowCFood, get_ids_for_entities_with_names +from .datamodel_problems import DataModelProblems from .guard import RETRIEVE, ProhibitedException from .guard import global_guard as guard @@ -190,11 +193,14 @@ class Crawler(object): Cfood.__name__, item)) except Exception as e: + logger.info("Failed during execution of {}!".format( + cfood.__class__)) traceback.print_exc() - print(e) + logger.debug(e) if self.abort_on_exception: raise e + errors_occured = True tbs.append(e) @@ -281,8 +287,10 @@ class Crawler(object): cfood.update_identifiables() self.push_identifiables_to_CaosDB(cfood) except Exception as e: + logger.info("Failed during execution of {}!".format( + cfood.__class__)) traceback.print_exc() - print(e) + logger.debug(e) if self.abort_on_exception: raise e @@ -422,6 +430,8 @@ carefully and if the changes are ok, click on the following link: guard.safe_update(cfood.to_be_updated) except ProhibitedException: self.update_cache.insert(cfood.to_be_updated, self.run_id) + except Exception as e: + DataModelProblems.evaluate_exception(e) # TODO remove static? @staticmethod @@ -462,7 +472,10 @@ carefully and if the changes are ok, click on the following link: if len(missing_identifiables) == 0: logger.debug("No new entities to be inserted.") else: - guard.safe_insert(missing_identifiables) + try: + guard.safe_insert(missing_identifiables) + except Exception as e: + DataModelProblems.evaluate_exception(e) logger.debug("Retrieving entities from CaosDB...") identifiables.retrieve(unique=True, raise_exception_on_error=False) diff --git a/src/caosadvancedtools/datamodel_problems.py b/src/caosadvancedtools/datamodel_problems.py new file mode 100644 index 00000000..224744ea --- /dev/null +++ b/src/caosadvancedtools/datamodel_problems.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2020 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2020 Florian Sprckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +"""Implements a class for finding and storing missing entities, either +record types or properties, that are missing in a data model. They can +be inserted by hand or gueesed from possible exceptions when inserting +or updating entities with missing parents and/or properties. + +""" +from caosdb.exceptions import (EntityDoesNotExistError, + UnqualifiedParentsError, + UnqualifiedPropertiesError) + + +class DataModelProblems(object): + """ Collect and store missing RecordTypes and Properties.""" + missing = set() + + @staticmethod + def add(ent): + """Add a missing record type or property.""" + DataModelProblems.missing.add(ent) + + @staticmethod + def evaluate_exception(e): + """Take an exception, see whether it was caused by datamodel problems, + and update missing parents and/or properties if this was the + case. Otherwise, raise the exception. + + """ + # type(e) == type(Exception()) seems to be necessary because + # of EntityMultiErrors that are instances of (all? some of?) + # theirchild errors. So isinstance doesn't show the desired + # behavior. + + if type(e) == type(UnqualifiedPropertiesError()): + for err in e.get_errors(): + # Here, it doesn't matter if there is an actual + # EntityDoesNotExistError or a MultiEntityError + # including an EntityDoesNotExistError. The latter + # case happens when a wrong entity with a value is + # given since then, an EntityHasNoDatatypeError is + # raised as well. Still, the problem is the missing + # property, so this is okay. + + if isinstance(err, EntityDoesNotExistError): + property_missing = True + DataModelProblems.add(err.get_entity().name) + raise e + elif type(e) == type(UnqualifiedParentsError()): + # This is always caused by missing/wrong parents + + for err in e.get_errors(): + DataModelProblems.add(err.get_entity().name) + raise e + # This is the ugly workaround for a MultiEntityError that + # stems from a UnqualifiedParentsError: an + # EntityDoesNotExistError is raised AND the causing entity has + # type PARENT. + elif ((type(e) == type(EntityDoesNotExistError())) and + ((str(type(e.get_entity()).__name__).upper() == "PARENT"))): + DataModelProblems.add(e.get_entity().name) + raise e + # Evaluate children of real MultiEntityErrors: + elif hasattr(e, "errors") and len(e.get_errors()) > 0: + for err in e.get_errors(): + DataModelProblems.evaluate_exception(err) + else: + raise e diff --git a/unittests/test_cfood.py b/unittests/test_cfood.py index 27365b2a..f51d515a 100644 --- a/unittests/test_cfood.py +++ b/unittests/test_cfood.py @@ -153,7 +153,7 @@ class InsertionTest(unittest.TestCase): assure_has_property(entity=entity, name=int_name, value=new_int, to_be_updated=to_be_updated) assert to_be_updated[0] is entity - + class DependendTest(unittest.TestCase): def test(self): -- GitLab