Skip to content
Snippets Groups Projects
Commit 7ce50030 authored by Florian Spreckelsen's avatar Florian Spreckelsen Committed by Henrik tom Wörden
Browse files

DEV: Add class for the collection of datamodel problems

To be used while crawling and to collect missing proerties and/or
record types from possible errors while crawling.
parent 9446c6a6
No related branches found
No related tags found
1 merge request!22Release 0.3
......@@ -8,8 +8,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added ###
- New class to collect possible problems whith the data model
### Changed ###
- instead of `get_entity`, type-specific functions are used in
`cfood.py` when the type of the entity in question is known.
- Logger is used instead of `print` for errors in `crawler.py`.
### Deprecated ###
### Removed ###
......
......@@ -36,4 +36,9 @@ python3 test_table.py
echo "Testing im and export"
python3 test_im_und_export.py
echo "/n/n/n YOU NEED TO RESTART THE SERVER TO REDO TESTS!!!"
# Test correct display of data model errors:
echo "Testing recognition of data model problems ... "
python3 -m pytest test_crawl_with_datamodel_problems.py
# Obsolete due to teardown in the above test.
# echo "/n/n/n YOU NEED TO RESTART THE SERVER TO REDO TESTS!!!"
# -*- coding: utf-8 -*-
#
# ** header v3.0
# This file is a part of the CaosDB Project.
#
# Copyright (c) 2020 IndiScale GmbH <info@indiscale.com>
# Copyright (c) 2020 Florian Spreckelsen <f.spreckelsen@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# ** end header
"""Test whether the crawler correctly identifies the data model
problems caused by a faulty model.
"""
import caosdb as db
from caosadvancedtools import loadFiles
from caosadvancedtools.cfood import fileguide
from caosadvancedtools.crawler import FileCrawler
from caosadvancedtools.datamodel_problems import DataModelProblems
from caosadvancedtools.guard import INSERT
from scifolder import (AnalysisCFood, ExperimentCFood, ProjectCFood,
PublicationCFood, SimulationCFood)
from caosmodels.parser import parse_model_from_yaml
def setup_module():
"""Clear problems and remove all entities except for built-in ones."""
DataModelProblems.missing.clear()
try:
db.execute_query("FIND entity WITH ID > 100").delete()
except Exception as delete_exc:
print(delete_exc)
def setup():
"""No further setup required."""
setup_module()
def teardown():
"""Delete and clear again."""
setup_module()
def test_crawler_with_data_model_problems():
"""Test whether data model problems are found correctly."""
# load the files
basepath = "/opt/caosdb/mnt/extroot/"
pathlist = [basepath+dirname for dirname in
["ExperimentalData", "DataAnalysis",
"SimulationData", "Publications"]]
for path in pathlist:
loadFiles.loadpath(path=path, include="", exclude="",
prefix="", dryrun=False, forceAllowSymlinks=False)
# load and damage the model
model = parse_model_from_yaml("model.yml")
model.sync_data_model(noquestion=True)
deleted_entities = {"Experiment", "Poster", "results"}
for ent in deleted_entities:
db.execute_query("FIND "+ent).delete()
# Do the crawling
def access(x): return "extroot" + x
fileguide.access = access
crawl_path = '/'
files = FileCrawler.query_files(crawl_path)
c = FileCrawler(files=files, use_cache=False,
abort_on_exception=False, interactive=False,
hideKnown=False, cfood_types=[ProjectCFood,
ExperimentCFood, AnalysisCFood, PublicationCFood,
SimulationCFood, ])
c.crawl(security_level=INSERT, path=crawl_path)
# There should be datamodel problems
assert len(DataModelProblems.missing) > 0
# Deleted entities should have been identified:
assert deleted_entities.issubset(DataModelProblems.missing)
#!/usr/bin/env python
# encoding: utf-8
#
# ** header v3.0
# This file is a part of the CaosDB Project.
#
# Copyright (C) Indiscale, GmbH <info@indiscale.com>
# Copyright (C) 2020 Florian Spreckelsen <f.spreckelsen@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# ** end header
"""Test the error handling in datamodel_problems.py with simple
insertions. A test using a full-grown crawler finding datamodel errors
during crawling that tests the integrations of the DataModelProblems
class in crawler.py and cfood.py can be found in full-tests.
"""
import caosdb as db
import pytest
from caosadvancedtools.datamodel_problems import DataModelProblems
from caosdb.exceptions import (UnqualifiedParentsError,
UnqualifiedPropertiesError)
def setup_module():
"""Clear problem sets and delete possible test entities"""
DataModelProblems.missing.clear()
try:
db.execute_query("FIND Test*").delete()
except Exception as delete_exc:
print(delete_exc)
def setup():
"""No further setup"""
setup_module()
def teardown():
"""Clear and delete again."""
setup_module()
def _insert_and_evaluate_exception(ent):
try:
ent.insert()
except Exception as e:
DataModelProblems.evaluate_exception(e)
def test_missing_parent():
"""Test if missing RecordType is in datamodel problems."""
missing_name = "TestType"
rec = db.Record(name="TestRecord")
rec.add_parent(name=missing_name)
with pytest.raises(UnqualifiedParentsError):
_insert_and_evaluate_exception(rec)
assert missing_name in DataModelProblems.missing
def test_missing_property():
"""Test if missing Property is in datamodel problems."""
missing_name = "TestProp"
rec = db.Record(name="TestRecord").add_property(name=missing_name)
with pytest.raises(UnqualifiedPropertiesError):
_insert_and_evaluate_exception(rec)
assert missing_name in DataModelProblems.missing
def test_missing_property_existing_type():
"""Test if missing Property is in datamodel problems but existing
RecordType is not.
"""
missing_prop = "TestProp"
existing_rt = "TestType"
db.RecordType(name=existing_rt).insert()
rec = db.Record(name="TestRecord").add_parent(name=existing_rt)
rec.add_property(name=missing_prop)
with pytest.raises(UnqualifiedPropertiesError):
_insert_and_evaluate_exception(rec)
assert missing_prop in DataModelProblems.missing
assert existing_rt not in DataModelProblems.missing
def test_wrong_property_value():
"""An error due to a wrong value (type) is no data model problem per
se
"""
rt_name = "TestType"
rt = db.RecordType(name=rt_name).insert()
prop_name = "TestProp"
prop_dtype = db.DOUBLE
prop = db.Property(name=prop_name, datatype=prop_dtype).insert()
rec = db.Record(name="TestRecord").add_parent(
name=rt_name).add_property(name=prop_name, value="bla")
with pytest.raises(UnqualifiedPropertiesError):
_insert_and_evaluate_exception(rec)
# Should be empty
assert not DataModelProblems.missing
......@@ -6,8 +6,8 @@
#
# Copyright (C) 2018 Research Group Biomedical Physics,
# Max-Planck-Institute for Dynamics and Self-Organization Göttingen
# Copyright (C) 2019 Henrik tom Wörden
# Copyright (C) 2020 Henrik tom Wörden
# Copyright (C) 2019,2020 Henrik tom Wörden
# Copyright (C) 2020 Florian Spreckelsen <f.spreckelsen@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
......@@ -44,9 +44,13 @@ from abc import ABCMeta, abstractmethod
import caosdb as db
from caosdb.exceptions import EntityDoesNotExistError
from .datamodel_problems import DataModelProblems
from .guard import global_guard as guard
ENTITIES = {}
PROPERTIES = {}
RECORDS = {}
RECORDTYPES = {}
logger = logging.getLogger(__name__)
......@@ -65,6 +69,61 @@ def get_entity(name):
return ENTITIES[name]
def get_property(name):
"""Returns the record type with a given name, preferably from a local
cache.
If the local cache does not contain the record type, try to
retrieve it from CaosDB. If it does not exist, add it to the data
model problems
"""
if name not in PROPERTIES:
try:
prop = db.execute_query("FIND Property "+name,
unique=True)
except EntityDoesNotExistError as e:
# Property might actually be a RecordTypes
prop = get_recordtype(name)
PROPERTIES[name] = prop
return PROPERTIES[name]
def get_record(name):
"""Returns the record with a given name, preferably from a local cache.
If the local cache does not contain the record, try to retrieve it
from CaosDB.
"""
if name not in RECORDS:
rec = db.execute_query("FIND Record "+name, unique=True)
RECORDS[name] = rec
return RECORDS[name]
def get_recordtype(name):
"""Returns the record type with a given name, preferably from a local
cache.
If the local cache does not contain the record type, try to
retrieve it from CaosDB. If it does not exist, add it to the data
model problems
"""
if name not in RECORDTYPES:
try:
rec = db.execute_query("FIND RecordType "+name, unique=True)
except EntityDoesNotExistError as e:
DataModelProblems.add(name)
raise e
RECORDTYPES[name] = rec
return RECORDTYPES[name]
class FileGuide(object):
def access(self, path):
""" should be replaced by a function that adds
......@@ -465,6 +524,14 @@ def assure_has_property(entity, name, value, to_be_updated=None,
def insert_id_based_on_name(entity):
if entity.name is not None and (entity.id is None or entity.id < 0):
if isinstance(entity, db.Property):
entity.id = get_property(entity.name).id
elif isinstance(entity, db.Record):
entity.id = get_record(entity.name).id
elif isinstance(entity, db.RecordType):
entity.id = get_recordtype(entity.name).id
else:
# In case the type of the entity isn't specified
entity.id = get_entity(entity.name).id
......
......@@ -6,7 +6,9 @@
#
# Copyright (C) 2018 Research Group Biomedical Physics,
# Max-Planck-Institute for Dynamics and Self-Organization Göttingen
# Copyright (C) 2020 Indiscale GmbH <info@indiscale.com>
# Copyright (C) 2020 Henrik tom Wörden
# Copyright (C) 2020 Florian Spreckelsen <f.spreckelsen@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
......@@ -49,6 +51,7 @@ from caosdb.exceptions import TransactionError
from .cache import Cache, UpdateCache, get_pretty_xml
from .cfood import RowCFood, get_ids_for_entities_with_names
from .datamodel_problems import DataModelProblems
from .guard import RETRIEVE, ProhibitedException
from .guard import global_guard as guard
......@@ -190,11 +193,14 @@ class Crawler(object):
Cfood.__name__,
item))
except Exception as e:
logger.info("Failed during execution of {}!".format(
cfood.__class__))
traceback.print_exc()
print(e)
logger.debug(e)
if self.abort_on_exception:
raise e
errors_occured = True
tbs.append(e)
......@@ -281,8 +287,10 @@ class Crawler(object):
cfood.update_identifiables()
self.push_identifiables_to_CaosDB(cfood)
except Exception as e:
logger.info("Failed during execution of {}!".format(
cfood.__class__))
traceback.print_exc()
print(e)
logger.debug(e)
if self.abort_on_exception:
raise e
......@@ -422,6 +430,8 @@ carefully and if the changes are ok, click on the following link:
guard.safe_update(cfood.to_be_updated)
except ProhibitedException:
self.update_cache.insert(cfood.to_be_updated, self.run_id)
except Exception as e:
DataModelProblems.evaluate_exception(e)
# TODO remove static?
@staticmethod
......@@ -462,7 +472,10 @@ carefully and if the changes are ok, click on the following link:
if len(missing_identifiables) == 0:
logger.debug("No new entities to be inserted.")
else:
try:
guard.safe_insert(missing_identifiables)
except Exception as e:
DataModelProblems.evaluate_exception(e)
logger.debug("Retrieving entities from CaosDB...")
identifiables.retrieve(unique=True, raise_exception_on_error=False)
......
#!/usr/bin/env python
# encoding: utf-8
#
# ** header v3.0
# This file is a part of the CaosDB Project.
#
# Copyright (C) 2020 Indiscale GmbH <info@indiscale.com>
# Copyright (C) 2020 Florian Sprckelsen <f.spreckelsen@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# ** end header
"""Implements a class for finding and storing missing entities, either
record types or properties, that are missing in a data model. They can
be inserted by hand or gueesed from possible exceptions when inserting
or updating entities with missing parents and/or properties.
"""
from caosdb.exceptions import (EntityDoesNotExistError,
UnqualifiedParentsError,
UnqualifiedPropertiesError)
class DataModelProblems(object):
""" Collect and store missing RecordTypes and Properties."""
missing = set()
@staticmethod
def add(ent):
"""Add a missing record type or property."""
DataModelProblems.missing.add(ent)
@staticmethod
def evaluate_exception(e):
"""Take an exception, see whether it was caused by datamodel problems,
and update missing parents and/or properties if this was the
case. Otherwise, raise the exception.
"""
# type(e) == type(Exception()) seems to be necessary because
# of EntityMultiErrors that are instances of (all? some of?)
# theirchild errors. So isinstance doesn't show the desired
# behavior.
if type(e) == type(UnqualifiedPropertiesError()):
for err in e.get_errors():
# Here, it doesn't matter if there is an actual
# EntityDoesNotExistError or a MultiEntityError
# including an EntityDoesNotExistError. The latter
# case happens when a wrong entity with a value is
# given since then, an EntityHasNoDatatypeError is
# raised as well. Still, the problem is the missing
# property, so this is okay.
if isinstance(err, EntityDoesNotExistError):
property_missing = True
DataModelProblems.add(err.get_entity().name)
raise e
elif type(e) == type(UnqualifiedParentsError()):
# This is always caused by missing/wrong parents
for err in e.get_errors():
DataModelProblems.add(err.get_entity().name)
raise e
# This is the ugly workaround for a MultiEntityError that
# stems from a UnqualifiedParentsError: an
# EntityDoesNotExistError is raised AND the causing entity has
# type PARENT.
elif ((type(e) == type(EntityDoesNotExistError())) and
((str(type(e.get_entity()).__name__).upper() == "PARENT"))):
DataModelProblems.add(e.get_entity().name)
raise e
# Evaluate children of real MultiEntityErrors:
elif hasattr(e, "errors") and len(e.get_errors()) > 0:
for err in e.get_errors():
DataModelProblems.evaluate_exception(err)
else:
raise e
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment