From 7ce500305d34f57b9088f48bdacc1beb3098c39f Mon Sep 17 00:00:00 2001
From: Florian Spreckelsen <florian.spreckelsen@gmx.net>
Date: Fri, 19 Jun 2020 08:00:32 +0000
Subject: [PATCH] DEV: Add class for the collection of datamodel problems

To be used while crawling and to collect missing proerties and/or
record types from possible errors while crawling.
---
 CHANGELOG.md                                  |   6 +
 integrationtests/full_test/test.sh            |   7 +-
 .../test_crawl_with_datamodel_problems.py     |  92 ++++++++++++++
 .../single_tests/test_datamodel_problems.py   | 113 ++++++++++++++++++
 src/caosadvancedtools/cfood.py                |  73 ++++++++++-
 src/caosadvancedtools/crawler.py              |  19 ++-
 src/caosadvancedtools/datamodel_problems.py   |  89 ++++++++++++++
 unittests/test_cfood.py                       |   2 +-
 8 files changed, 393 insertions(+), 8 deletions(-)
 create mode 100644 integrationtests/full_test/test_crawl_with_datamodel_problems.py
 create mode 100644 integrationtests/single_tests/test_datamodel_problems.py
 create mode 100644 src/caosadvancedtools/datamodel_problems.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 891c5ecf..a9bee83f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,8 +8,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added ###
 
+- New class to collect possible problems whith the data model
+
 ### Changed ###
 
+- instead of `get_entity`, type-specific functions are used in
+  `cfood.py` when the type of the entity in question is known.
+- Logger is used instead of `print` for errors in `crawler.py`.
+
 ### Deprecated ###
 
 ### Removed ###
diff --git a/integrationtests/full_test/test.sh b/integrationtests/full_test/test.sh
index 2f7bc3d8..ccaaad5e 100755
--- a/integrationtests/full_test/test.sh
+++ b/integrationtests/full_test/test.sh
@@ -36,4 +36,9 @@ python3 test_table.py
 echo "Testing im and export"
 python3 test_im_und_export.py
 
-echo "/n/n/n YOU NEED TO RESTART THE SERVER TO REDO TESTS!!!"
+# Test correct display of data model errors:
+echo "Testing recognition of data model problems ... "
+python3 -m pytest test_crawl_with_datamodel_problems.py
+
+# Obsolete due to teardown in the above test.
+# echo "/n/n/n YOU NEED TO RESTART THE SERVER TO REDO TESTS!!!"
diff --git a/integrationtests/full_test/test_crawl_with_datamodel_problems.py b/integrationtests/full_test/test_crawl_with_datamodel_problems.py
new file mode 100644
index 00000000..daf794b5
--- /dev/null
+++ b/integrationtests/full_test/test_crawl_with_datamodel_problems.py
@@ -0,0 +1,92 @@
+# -*- coding: utf-8 -*-
+#
+# ** header v3.0
+# This file is a part of the CaosDB Project.
+#
+# Copyright (c) 2020 IndiScale GmbH <info@indiscale.com>
+# Copyright (c) 2020 Florian Spreckelsen <f.spreckelsen@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# ** end header
+"""Test whether the crawler correctly identifies the data model
+problems caused by a faulty model.
+
+"""
+import caosdb as db
+
+from caosadvancedtools import loadFiles
+from caosadvancedtools.cfood import fileguide
+from caosadvancedtools.crawler import FileCrawler
+from caosadvancedtools.datamodel_problems import DataModelProblems
+from caosadvancedtools.guard import INSERT
+from scifolder import (AnalysisCFood, ExperimentCFood, ProjectCFood,
+                       PublicationCFood, SimulationCFood)
+
+from caosmodels.parser import parse_model_from_yaml
+
+
+def setup_module():
+    """Clear problems and remove all entities except for built-in ones."""
+    DataModelProblems.missing.clear()
+    try:
+        db.execute_query("FIND entity WITH ID > 100").delete()
+    except Exception as delete_exc:
+        print(delete_exc)
+
+
+def setup():
+    """No further setup required."""
+    setup_module()
+
+
+def teardown():
+    """Delete and clear again."""
+    setup_module()
+
+
+def test_crawler_with_data_model_problems():
+    """Test whether data model problems are found correctly."""
+    # load the files
+    basepath = "/opt/caosdb/mnt/extroot/"
+    pathlist = [basepath+dirname for dirname in
+                ["ExperimentalData", "DataAnalysis",
+                 "SimulationData", "Publications"]]
+    for path in pathlist:
+        loadFiles.loadpath(path=path, include="", exclude="",
+                           prefix="", dryrun=False, forceAllowSymlinks=False)
+
+    # load and damage the model
+    model = parse_model_from_yaml("model.yml")
+    model.sync_data_model(noquestion=True)
+    deleted_entities = {"Experiment", "Poster", "results"}
+    for ent in deleted_entities:
+        db.execute_query("FIND "+ent).delete()
+
+    # Do the crawling
+    def access(x): return "extroot" + x
+    fileguide.access = access
+    crawl_path = '/'
+    files = FileCrawler.query_files(crawl_path)
+    c = FileCrawler(files=files, use_cache=False,
+                    abort_on_exception=False, interactive=False,
+                    hideKnown=False, cfood_types=[ProjectCFood,
+                                                  ExperimentCFood, AnalysisCFood, PublicationCFood,
+                                                  SimulationCFood, ])
+    c.crawl(security_level=INSERT, path=crawl_path)
+
+    # There should be datamodel problems
+    assert len(DataModelProblems.missing) > 0
+    # Deleted entities should have been identified:
+    assert deleted_entities.issubset(DataModelProblems.missing)
diff --git a/integrationtests/single_tests/test_datamodel_problems.py b/integrationtests/single_tests/test_datamodel_problems.py
new file mode 100644
index 00000000..93a403d8
--- /dev/null
+++ b/integrationtests/single_tests/test_datamodel_problems.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# ** header v3.0
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) Indiscale, GmbH <info@indiscale.com>
+# Copyright (C) 2020 Florian Spreckelsen <f.spreckelsen@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# ** end header
+"""Test the error handling in datamodel_problems.py with simple
+insertions. A test using a full-grown crawler finding datamodel errors
+during crawling that tests the integrations of the DataModelProblems
+class in crawler.py and cfood.py can be found in full-tests.
+
+"""
+import caosdb as db
+import pytest
+from caosadvancedtools.datamodel_problems import DataModelProblems
+from caosdb.exceptions import (UnqualifiedParentsError,
+                               UnqualifiedPropertiesError)
+
+
+def setup_module():
+    """Clear problem sets and delete possible test entities"""
+    DataModelProblems.missing.clear()
+    try:
+        db.execute_query("FIND Test*").delete()
+    except Exception as delete_exc:
+        print(delete_exc)
+
+
+def setup():
+    """No further setup"""
+    setup_module()
+
+
+def teardown():
+    """Clear and delete again."""
+    setup_module()
+
+
+def _insert_and_evaluate_exception(ent):
+    try:
+        ent.insert()
+    except Exception as e:
+        DataModelProblems.evaluate_exception(e)
+
+
+def test_missing_parent():
+    """Test if missing RecordType is in datamodel problems."""
+    missing_name = "TestType"
+    rec = db.Record(name="TestRecord")
+    rec.add_parent(name=missing_name)
+    with pytest.raises(UnqualifiedParentsError):
+        _insert_and_evaluate_exception(rec)
+    assert missing_name in DataModelProblems.missing
+
+
+def test_missing_property():
+    """Test if missing Property is in datamodel problems."""
+    missing_name = "TestProp"
+    rec = db.Record(name="TestRecord").add_property(name=missing_name)
+    with pytest.raises(UnqualifiedPropertiesError):
+        _insert_and_evaluate_exception(rec)
+    assert missing_name in DataModelProblems.missing
+
+
+def test_missing_property_existing_type():
+    """Test if missing Property is in datamodel problems but existing
+    RecordType is not.
+
+    """
+    missing_prop = "TestProp"
+    existing_rt = "TestType"
+    db.RecordType(name=existing_rt).insert()
+    rec = db.Record(name="TestRecord").add_parent(name=existing_rt)
+    rec.add_property(name=missing_prop)
+    with pytest.raises(UnqualifiedPropertiesError):
+        _insert_and_evaluate_exception(rec)
+    assert missing_prop in DataModelProblems.missing
+    assert existing_rt not in DataModelProblems.missing
+
+
+def test_wrong_property_value():
+    """An error due to a wrong value (type) is no data model problem per
+    se
+
+    """
+    rt_name = "TestType"
+    rt = db.RecordType(name=rt_name).insert()
+    prop_name = "TestProp"
+    prop_dtype = db.DOUBLE
+    prop = db.Property(name=prop_name, datatype=prop_dtype).insert()
+    rec = db.Record(name="TestRecord").add_parent(
+        name=rt_name).add_property(name=prop_name, value="bla")
+    with pytest.raises(UnqualifiedPropertiesError):
+        _insert_and_evaluate_exception(rec)
+    # Should be empty
+    assert not DataModelProblems.missing
diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py
index 47852fdf..e3ac55fb 100644
--- a/src/caosadvancedtools/cfood.py
+++ b/src/caosadvancedtools/cfood.py
@@ -6,8 +6,8 @@
 #
 # Copyright (C) 2018 Research Group Biomedical Physics,
 # Max-Planck-Institute for Dynamics and Self-Organization Göttingen
-# Copyright (C) 2019 Henrik tom Wörden
-# Copyright (C) 2020 Henrik tom Wörden
+# Copyright (C) 2019,2020 Henrik tom Wörden
+# Copyright (C) 2020 Florian Spreckelsen <f.spreckelsen@indiscale.com>
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as
@@ -44,9 +44,13 @@ from abc import ABCMeta, abstractmethod
 import caosdb as db
 from caosdb.exceptions import EntityDoesNotExistError
 
+from .datamodel_problems import DataModelProblems
 from .guard import global_guard as guard
 
 ENTITIES = {}
+PROPERTIES = {}
+RECORDS = {}
+RECORDTYPES = {}
 
 logger = logging.getLogger(__name__)
 
@@ -65,6 +69,61 @@ def get_entity(name):
     return ENTITIES[name]
 
 
+def get_property(name):
+    """Returns the record type with a given name, preferably from a local
+    cache.
+
+    If the local cache does not contain the record type, try to
+    retrieve it from CaosDB. If it does not exist, add it to the data
+    model problems
+
+    """
+    if name not in PROPERTIES:
+        try:
+            prop = db.execute_query("FIND Property "+name,
+                                    unique=True)
+        except EntityDoesNotExistError as e:
+            # Property might actually be a RecordTypes
+            prop = get_recordtype(name)
+        PROPERTIES[name] = prop
+
+    return PROPERTIES[name]
+
+
+def get_record(name):
+    """Returns the record with a given name, preferably from a local cache.
+
+    If the local cache does not contain the record, try to retrieve it
+    from CaosDB.
+
+    """
+    if name not in RECORDS:
+        rec = db.execute_query("FIND Record "+name, unique=True)
+        RECORDS[name] = rec
+
+    return RECORDS[name]
+
+
+def get_recordtype(name):
+    """Returns the record type with a given name, preferably from a local
+    cache.
+
+    If the local cache does not contain the record type, try to
+    retrieve it from CaosDB. If it does not exist, add it to the data
+    model problems
+
+    """
+    if name not in RECORDTYPES:
+        try:
+            rec = db.execute_query("FIND RecordType "+name, unique=True)
+        except EntityDoesNotExistError as e:
+            DataModelProblems.add(name)
+            raise e
+        RECORDTYPES[name] = rec
+
+    return RECORDTYPES[name]
+
+
 class FileGuide(object):
     def access(self, path):
         """ should be replaced by a function that adds
@@ -465,7 +524,15 @@ def assure_has_property(entity, name, value, to_be_updated=None,
 
 def insert_id_based_on_name(entity):
     if entity.name is not None and (entity.id is None or entity.id < 0):
-        entity.id = get_entity(entity.name).id
+        if isinstance(entity, db.Property):
+            entity.id = get_property(entity.name).id
+        elif isinstance(entity, db.Record):
+            entity.id = get_record(entity.name).id
+        elif isinstance(entity, db.RecordType):
+            entity.id = get_recordtype(entity.name).id
+        else:
+            # In case the type of the entity isn't specified
+            entity.id = get_entity(entity.name).id
 
 
 def get_ids_for_entities_with_names(entities):
diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py
index 496688af..74153614 100644
--- a/src/caosadvancedtools/crawler.py
+++ b/src/caosadvancedtools/crawler.py
@@ -6,7 +6,9 @@
 #
 # Copyright (C) 2018 Research Group Biomedical Physics,
 # Max-Planck-Institute for Dynamics and Self-Organization Göttingen
+# Copyright (C) 2020 Indiscale GmbH <info@indiscale.com>
 # Copyright (C) 2020 Henrik tom Wörden
+# Copyright (C) 2020 Florian Spreckelsen <f.spreckelsen@indiscale.com>
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as
@@ -49,6 +51,7 @@ from caosdb.exceptions import TransactionError
 
 from .cache import Cache, UpdateCache, get_pretty_xml
 from .cfood import RowCFood, get_ids_for_entities_with_names
+from .datamodel_problems import DataModelProblems
 from .guard import RETRIEVE, ProhibitedException
 from .guard import global_guard as guard
 
@@ -190,11 +193,14 @@ class Crawler(object):
                                 Cfood.__name__,
                                 item))
                     except Exception as e:
+                        logger.info("Failed during execution of {}!".format(
+                            cfood.__class__))
                         traceback.print_exc()
-                        print(e)
+                        logger.debug(e)
 
                         if self.abort_on_exception:
                             raise e
+
                         errors_occured = True
                         tbs.append(e)
 
@@ -281,8 +287,10 @@ class Crawler(object):
                 cfood.update_identifiables()
                 self.push_identifiables_to_CaosDB(cfood)
             except Exception as e:
+                logger.info("Failed during execution of {}!".format(
+                    cfood.__class__))
                 traceback.print_exc()
-                print(e)
+                logger.debug(e)
 
                 if self.abort_on_exception:
                     raise e
@@ -422,6 +430,8 @@ carefully and if the changes are ok, click on the following link:
             guard.safe_update(cfood.to_be_updated)
         except ProhibitedException:
             self.update_cache.insert(cfood.to_be_updated, self.run_id)
+        except Exception as e:
+            DataModelProblems.evaluate_exception(e)
 
     # TODO remove static?
     @staticmethod
@@ -462,7 +472,10 @@ carefully and if the changes are ok, click on the following link:
         if len(missing_identifiables) == 0:
             logger.debug("No new entities to be inserted.")
         else:
-            guard.safe_insert(missing_identifiables)
+            try:
+                guard.safe_insert(missing_identifiables)
+            except Exception as e:
+                DataModelProblems.evaluate_exception(e)
 
         logger.debug("Retrieving entities from CaosDB...")
         identifiables.retrieve(unique=True, raise_exception_on_error=False)
diff --git a/src/caosadvancedtools/datamodel_problems.py b/src/caosadvancedtools/datamodel_problems.py
new file mode 100644
index 00000000..224744ea
--- /dev/null
+++ b/src/caosadvancedtools/datamodel_problems.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# ** header v3.0
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2020 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2020 Florian Sprckelsen <f.spreckelsen@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# ** end header
+"""Implements a class for finding and storing missing entities, either
+record types or properties, that are missing in a data model. They can
+be inserted by hand or gueesed from possible exceptions when inserting
+or updating entities with missing parents and/or properties.
+
+"""
+from caosdb.exceptions import (EntityDoesNotExistError,
+                               UnqualifiedParentsError,
+                               UnqualifiedPropertiesError)
+
+
+class DataModelProblems(object):
+    """ Collect and store missing RecordTypes and Properties."""
+    missing = set()
+
+    @staticmethod
+    def add(ent):
+        """Add a missing record type or property."""
+        DataModelProblems.missing.add(ent)
+
+    @staticmethod
+    def evaluate_exception(e):
+        """Take an exception, see whether it was caused by datamodel problems,
+        and update missing parents and/or properties if this was the
+        case. Otherwise, raise the exception.
+
+        """
+        # type(e) == type(Exception()) seems to be necessary because
+        # of EntityMultiErrors that are instances of (all? some of?)
+        # theirchild errors. So isinstance doesn't show the desired
+        # behavior.
+
+        if type(e) == type(UnqualifiedPropertiesError()):
+            for err in e.get_errors():
+                # Here, it doesn't matter if there is an actual
+                # EntityDoesNotExistError or a MultiEntityError
+                # including an EntityDoesNotExistError. The latter
+                # case happens when a wrong entity with a value is
+                # given since then, an EntityHasNoDatatypeError is
+                # raised as well. Still, the problem is the missing
+                # property, so this is okay.
+
+                if isinstance(err, EntityDoesNotExistError):
+                    property_missing = True
+                    DataModelProblems.add(err.get_entity().name)
+            raise e
+        elif type(e) == type(UnqualifiedParentsError()):
+            # This is always caused by missing/wrong parents
+
+            for err in e.get_errors():
+                DataModelProblems.add(err.get_entity().name)
+            raise e
+        # This is the ugly workaround for a MultiEntityError that
+        # stems from a UnqualifiedParentsError: an
+        # EntityDoesNotExistError is raised AND the causing entity has
+        # type PARENT.
+        elif ((type(e) == type(EntityDoesNotExistError())) and
+              ((str(type(e.get_entity()).__name__).upper() == "PARENT"))):
+            DataModelProblems.add(e.get_entity().name)
+            raise e
+        # Evaluate children of real MultiEntityErrors:
+        elif hasattr(e, "errors") and len(e.get_errors()) > 0:
+            for err in e.get_errors():
+                DataModelProblems.evaluate_exception(err)
+        else:
+            raise e
diff --git a/unittests/test_cfood.py b/unittests/test_cfood.py
index 27365b2a..f51d515a 100644
--- a/unittests/test_cfood.py
+++ b/unittests/test_cfood.py
@@ -153,7 +153,7 @@ class InsertionTest(unittest.TestCase):
         assure_has_property(entity=entity, name=int_name,
                             value=new_int, to_be_updated=to_be_updated)
         assert to_be_updated[0] is entity
-              
+
 
 class DependendTest(unittest.TestCase):
     def test(self):
-- 
GitLab