diff --git a/CHANGELOG.md b/CHANGELOG.md index 77fa7a0d0ccd6d83543dc984bb6d780129e5da40..eb435792c2b8fdde27203f52ff7abc287eda3dca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Function to get a file path to a shared resource directory - Function to setup logging appropriate for server side scripts with webui output +- New class for collecting information for exporting tables, e.g., to + metadata repositories ### Changed ### diff --git a/integrationtests/test.sh b/integrationtests/test.sh index e6baabf8feefa216648840ffb96517c2a7b1f8e3..6d8261338456e51c278cb89d22bde5425a7668d4 100755 --- a/integrationtests/test.sh +++ b/integrationtests/test.sh @@ -48,5 +48,8 @@ echo "Testing recognition of data model problems ... " python3 -m pytest test_datamodel_problems.py python3 -m pytest test_crawl_with_datamodel_problems.py +echo "Testing table export" +python3 -m pytest test_base_table_exporter_integration.py + # Obsolete due to teardown in the above test. # echo "/n/n/n YOU NEED TO RESTART THE SERVER TO REDO TESTS!!!" diff --git a/integrationtests/test_base_table_exporter_integration.py b/integrationtests/test_base_table_exporter_integration.py new file mode 100644 index 0000000000000000000000000000000000000000..b3dad7a8b29462fdd70d142de5bb0f69e0f7e45c --- /dev/null +++ b/integrationtests/test_base_table_exporter_integration.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2020 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2020 Florian Sprecklelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# +import caosdb as db +from caosadvancedtools import table_export as te + + +class IntegrationExporter(te.BaseTableExporter): + + def __init__(self, export_dict, rec_id, + raise_error_if_missing=False): + self.record = db.execute_query( + "FIND Record WITH ID={}".format(rec_id), unique=True) + super().__init__(export_dict, record=self.record, + raise_error_if_missing=raise_error_if_missing) + + def find_Test_Property_1(self): + tmp = db.execute_query( + "FIND Record WITH ID={}".format(self.record.id), + unique=True) + + return tmp.get_property("Test_Property_1").value + + def find_more_complicated_value(self): + tp1 = self.record.get_property("Test_Property_1").value + tp2 = db.execute_query( + "SELECT Test_Property_2 FROM Test_Type_2 WHICH IS" + " REFERENCED BY A Test_Type_1 WITH ID={}".format( + self.record.id), + unique=True).get_property_values("Test_Property_2")[0] + return tp1+tp2 + + +def insert_entities(): + """Insert four test records and the corresponding properties and + record types. + + """ + rt1 = db.RecordType(name="Test_Type_1").insert() + rt2 = db.RecordType(name="Test_Type_2").insert() + prop1 = db.Property(name="Test_Property_1", datatype=db.DOUBLE).insert() + prop2 = db.Property(name="Test_Property_2", datatype=db.DOUBLE).insert() + rec1 = db.Record(name="Test_Record_1").add_parent(rt1) + rec1.add_property(name="Test_Property_1", value=1.0) + rec2 = db.Record(name="Test_Record_2").add_parent(rt2) + rec2.add_property(name="Test_Property_2", value=2.0).insert() + rec1.add_property(name="Test_Record_2", value=rec2.id, + datatype=db.REFERENCE).insert() + rec3 = db.Record(name="Test_Record_3").add_parent(rt1) + rec3.add_property(name="Test_Property_1", value=11.0) + rec4 = db.Record(name="Test_Record_4").add_parent(rt2) + rec4.add_property(name="Test_Property_2", value=12.0).insert() + rec3.add_property(name="Test_Record_2", value=rec4.id, + datatype=db.REFERENCE).insert() + + return rec1, rec2, rec3, rec4 + + +def setup_module(): + """Clear all test entities""" + try: + db.execute_query("FIND Test*").delete() + except BaseException: + pass + + +def setup(): + """No further setup""" + setup_module() + + +def teardown(): + """Delete everything again.""" + setup_module() + + +def test_find_functions(): + rec1, rec2, rec3, rec4 = insert_entities() + export_dict = { + "Test_Property_1": {}, + "Other value": { + "find_func": "find_more_complicated_value" + } + } + my_exporter1 = IntegrationExporter( + export_dict, rec1.id, raise_error_if_missing=True) + assert rec1.name == my_exporter1.record.name + my_exporter1.collect_information() + assert my_exporter1.info["Test_Property_1"] == rec1.get_property( + "Test_Property_1").value + assert my_exporter1.info["Other value"] == 3 + assert not my_exporter1.missing + + # again with other record + my_exporter2 = IntegrationExporter( + export_dict, rec3.id, raise_error_if_missing=True) + my_exporter2.collect_information() + assert my_exporter2.info["Test_Property_1"] == rec3.get_property( + "Test_Property_1").value + assert my_exporter2.info["Other value"] == 23 + assert not my_exporter2.missing + + +def test_queries(): + rec1, rec2, _, _ = insert_entities() + # no explicit functions since only `query` key is used, + # so works with BaseTableExporter + export_dict = { + "Test_Property_1": {}, + "Test_Property_2": { + "query": "SELECT Test_Property_2 FROM Test_Type_2 WHICH IS REFERENCED BY A Test_Type_1 WITH ID={}", + "selector": "Test_Property_2" + } + } + my_exporter = te.BaseTableExporter( + export_dict=export_dict, record=rec1, raise_error_if_missing=True) + my_exporter.collect_information() + assert my_exporter.info["Test_Property_1"] == rec1.get_property( + "Test_Property_1").value + assert my_exporter.info["Test_Property_2"] == rec2.get_property( + "Test_Property_2").value + + # test guessing of selector + del(export_dict["Test_Property_2"]["selector"]) + my_exporter = te.BaseTableExporter( + export_dict=export_dict, record=rec1, raise_error_if_missing=True) + assert my_exporter.export_dict["Test_Property_2"]["selector"] == "Test_Property_2" + my_exporter.collect_information() + assert my_exporter.info["Test_Property_1"] == rec1.get_property( + "Test_Property_1").value + assert my_exporter.info["Test_Property_2"] == rec2.get_property( + "Test_Property_2").value diff --git a/src/caosadvancedtools/table_export.py b/src/caosadvancedtools/table_export.py new file mode 100644 index 0000000000000000000000000000000000000000..64a2b7ed6ef9aa04fa4cd042db4684bb348adb94 --- /dev/null +++ b/src/caosadvancedtools/table_export.py @@ -0,0 +1,328 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2020 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2020 Florian Sprecklelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# +"""Collect optional and mandatory data from CaosDB records and prepare +them for an export as a table, e.g., for the export to metadata +repositories. + +""" +import json +import logging + +import caosdb as db + +FIND_FUNCTION = "find_func" +QUERY = "query" + +logger = logging.getLogger(__name__) + + +class TableExportError(db.CaosDBException): + """Error that is raised in case of failing export, e.g., because of + missing mandatory entries. + + """ + + +class BaseTableExporter(object): + """Base exporter class from which all actual implementations + inherit. It contains the basic structure with a dictionary for + optional and mandatory keys, and the error handling. The actual + logic for finding the values to the entries has to be implemented + elsewhere. The final results are stored in the `info` dict. + + """ + + def __init__(self, export_dict, record=None, + raise_error_if_missing=False): + """Initialize the exporter. + + Parameters + ---------- + export_dict : dict or string + dictionary with the names of the entries to be exported as + keys. The values are again dictionaries specifying whether + the entries are optional, which function or query should + be used to find the value for the corresponding entry and + possible error explanations if values are missing. Can be + either a dict or a string specifying the path of a json + file containing that dict. See Notes for further details. + record : Record or None, optional + record which is inserted into possible queries. Must be + given if there are queries in export_dict. Default is None. + raise_error_if_missing : bool, optional + specify whether an error is raised if mandatory entries + are missing or whether an error message is forwarded to a + logger. Default is False. + + Notes + ----- + The entries of the export_dict are themselves dictionaries + of the form + ``` + {"entry_to_be_exported: { + "optional": True/False + "find_func": name of member function + "query": query string + "selector": selector for the query + "error": error explanation + } + } + ``` + All entries are optional; `query` and `find_func` are + mutually exclusive and an error will be raised if both are + provided. The indivdual entries mean: + + - optional: True or False, if not present, the entry is + assumed to be mandatory. + - find_func: name of the member function that returns the + value for this entry. Must not exist together with + `query` + - query: Query string for finding the value for this + entry. If this is given, a record must be given to the + constructor of this class. The query is then executed as + `db.execute_query(query.format(record.id). unique=True)` + so it must return a unique result from which the value + can be extracted via + `query_result.get_property_values(selector)`. + - selector: only relevant if query is given. This is usesd + as a selector in a call to `get_property_values` on the + result of the query. If no selector is given, it is + guessed from the second word of the query string (as in + `SELECT something FROM something else`). + - error: only relevant for mandatory entries. If the entry + is missing, an explanatory string can be provided here + that is used for a more verbose error output. + + """ + self.missing = [] + + if isinstance(export_dict, dict): + self.export_dict = export_dict + else: + try: + with open(export_dict) as tmp: + self.export_dict = json.load(tmp) + except BaseException: + raise ValueError( + "export_dict must be either a dictionary" + " or the path to a json file.") + self.record = record + self._check_sanity_of_export_dict() + self.raise_error_if_missing = raise_error_if_missing + self.info = {} + + def collect_information(self): + """Use the items of `export_dict` to collect the information for the + export. + + """ + + for e, d in self.export_dict.items(): + if QUERY in d: + # TODO: How do we make this more general? There might + # be queries that don't need the record or work with + # the name instead of the id. + q = d[QUERY].format(self.record.id) + try: + val = db.execute_query( + q, unique=True).get_property_values(d["selector"]) + + if len(val) == 1: + val = val[0] + except Exception as exc: + # invalid query + logger.debug(exc) + errmssg = "Empty or invalid query '{}' for entry {}".format(q, e) + raise TableExportError(errmssg) + + if val is not None: + self.info[e] = val + else: + self._append_missing(e, d) + elif FIND_FUNCTION in d: + find_fun = getattr(self, d[FIND_FUNCTION]) + try: + self.info[e] = find_fun() + except Exception as exc: + self._append_missing(e, d) + logger.debug(exc) + # last resort: check if record has e as property: + else: + try: + self.info[e] = self.record.get_property(e).value + except AttributeError as exc: + # either record is None, or get_property(e) returns None + logger.debug(exc) + errmssg = "No find function or query were specified for entry " + errmssg += e + + if self.record is not None: + errmssg += ", nor does record {} have a property of that name".format( + self.record.id) + errmssg += "." + raise TableExportError(errmssg) + + if self.missing: + errmssg = "The following mandatory entries are missing:\n" + + for e in self.missing: + if "error" in self.export_dict[e]: + errmssg += e + \ + ":\t{}\n".format(self.export_dict[e]["error"]) + else: + errmssg += e + '\n' + + if self.raise_error_if_missing: + raise TableExportError(errmssg) + else: + logger.error(errmssg) + + def prepare_csv_export(self, delimiter=',', print_header=False, + skip_empty_optionals=False): + """Return the values in self.info as a single-line string, separated + by the delimiter. If header is true, a header line with the + names of the entries, separated by the same delimiter is + added. Header and body are separated by a newline character. + + Parameters + ---------- + delimiter : string, optional + symbol that separates two consecutive entries, e.g. ',' + for .csv or '\t' for .tsv. Default is ','. + print_header : bool, optional + specify whether a header line with all entry names + separated by the delimiter precedes the body. Default is + False. + skip_empty_optionals : bool, True + if this is true, optional entries without value will be + skipped in the output string. Otherwise an empty field + will be attached. Default is False. + + Raises + ------ + TableExportError: + if mandatory entries are missing a value + + Returns + ------- + string: + a single string, either only the body line, or header and + body separated by a newline character if header is True. + + """ + body = "" + + if print_header: + header = "" + + for e, d in self.export_dict.items(): + if e in self.info: + body += str(self.info[e]) + delimiter + + if print_header: + header += str(e) + delimiter + else: + if not ("optional" in d and d["optional"]): + raise TableExportError( + "Mandatory entry " + e + + " has no value that could be exported to .csv.") + + if not skip_empty_optionals: + body += delimiter + + if print_header: + header += str(e) + delimiter + # return and remove final delimiter + + if body.endswith(delimiter): + body = body[:-len(delimiter)] + + if print_header and header.endswith(delimiter): + header = header[:-len(delimiter)] + + if print_header: + return header + '\n' + body + + return body + + def _check_sanity_of_export_dict(self): + """Check whether all entries of the dictionary with the entries to be + exported are valid. + + """ + + for e, d in self.export_dict.items(): + # values should be exported either by query or by function + + if QUERY in d and FIND_FUNCTION in d: + raise TableExportError( + "For entry " + e + + ", both a query and a function are given for finding " + "the value to be exported. Please spcify either a" + " function or a query, not both." + ) + # check find function if present + + if FIND_FUNCTION in d: + if not hasattr(self, d[FIND_FUNCTION]): + raise TableExportError( + "Find function " + d[FIND_FUNCTION] + + " was specified for entry " + e + + " but no such function could be found." + ) + elif not callable(getattr(self, d[FIND_FUNCTION])): + raise TableExportError( + "Find function " + d[FIND_FUNCTION] + + " was given for entry " + e + " but is not callable." + ) + + elif QUERY in d: + # query but no record is given + + if self.record is None: + raise TableExportError( + "A query for entry " + e + + " was specified but no record is given." + ) + else: + if not "selector" in d: + d["selector"] = d[QUERY].strip().split(" ")[1] + # guess find function and insert if existing + else: + possible_name = self._guess_find_function(e) + + if hasattr(self, possible_name) and callable(getattr(self, possible_name)): + d[FIND_FUNCTION] = possible_name + + def _append_missing(self, e, d): + """append e to missing if e is mandatory""" + + if not ("optional" in d and d["optional"]): + self.missing.append(e) + + def _guess_find_function(self, e): + """guess find function name as 'find_' + e""" + + return "find_{}".format(e) diff --git a/unittests/dummy_export_dict.json b/unittests/dummy_export_dict.json new file mode 100644 index 0000000000000000000000000000000000000000..ee84fab4a1e67065e7e38ce1545418fe5aabe886 --- /dev/null +++ b/unittests/dummy_export_dict.json @@ -0,0 +1,6 @@ +{"simple_test_entry": {}, + "Not so simple": { + "find_func": "find_entry_with_strange_name" + }, + "Test_Prop_1": {} +} diff --git a/unittests/test_base_table_exporter.py b/unittests/test_base_table_exporter.py new file mode 100644 index 0000000000000000000000000000000000000000..3b8276cdf947c5b22e829e050295dd47f3cfe9ea --- /dev/null +++ b/unittests/test_base_table_exporter.py @@ -0,0 +1,220 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2020 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2020 Florian Sprecklelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# +"""Test all functionality of the base exporter class that can be +tested without db connection. + +""" +import json +import os +import caosdb as db +from pytest import raises +from caosadvancedtools import table_export as te + + +class DummyExporter(te.BaseTableExporter): + + def __init__(self, export_dict, record=None, + raise_error_if_missing=False): + super().__init__(export_dict, record, raise_error_if_missing) + self.dummy = 27 + + def find_simple_test_entry(self): + return "bla" + + def find_entry_with_strange_name(self): + return "blabla" + + def find_function_with_error(self): + raise AttributeError("Can't find the value.") + + +def test_base(): + """test constructor of base class""" + empty = {} + my_exporter = te.BaseTableExporter(empty) + assert my_exporter.export_dict == {} + assert len(my_exporter.missing) == 0 + assert my_exporter.info == {} + assert my_exporter.prepare_csv_export() == "" + + +def test_simple_record(): + """Test whether properties of simple record are found correctly.""" + rec = db.Record(name="TestRecord") + rec.add_property(name="Test_Prop_1", value="bla") + rec.add_property(name="Test_Prop_2", value="blabla") + + export_dict = { + "Test_Prop_1": {}, + "Test_Prop_2": {"optional": True} + } + + my_exporter = te.BaseTableExporter(export_dict=export_dict, record=rec) + assert my_exporter.record.name == rec.name + assert my_exporter.export_dict == export_dict + my_exporter.collect_information() + assert len(my_exporter.missing) == 0 + assert my_exporter.info["Test_Prop_1"] == "bla" + assert my_exporter.info["Test_Prop_2"] == "blabla" + assert my_exporter.prepare_csv_export() == "bla,blabla" + assert my_exporter.prepare_csv_export( + delimiter='\t', print_header=True) == "Test_Prop_1\tTest_Prop_2\nbla\tblabla" + # remove optional entry from info + del(my_exporter.info["Test_Prop_2"]) + assert my_exporter.prepare_csv_export(skip_empty_optionals=True) == "bla" + assert my_exporter.prepare_csv_export( + delimiter='\t', print_header=True) == "Test_Prop_1\tTest_Prop_2\nbla\t" + # reload info, and delete mandatory entry + my_exporter.collect_information() + del(my_exporter.info["Test_Prop_1"]) + with raises(te.TableExportError) as exc: + my_exporter.prepare_csv_export() + assert "Test_Prop_1" in exc.value.msg + assert "Test_Prop_2" not in exc.value.msg + + +def test_broken_export_dicts(): + # query but no record + export_dict = { + "Test_Prop_1": {"query": "SELECT Test_Prop_1 FROM Test_Record WITH ID={}"} + } + with raises(te.TableExportError): + my_exporter = te.BaseTableExporter(export_dict=export_dict) + + # record without matching property or find function + export_dict = {"Test_Prop_1": {}} + rec = db.Record() + my_exporter = te.BaseTableExporter(export_dict=export_dict, record=rec) + with raises(te.TableExportError): + my_exporter.collect_information() + + # query and function given + export_dict = { + "Test_Prop_1": {"query": "SELECT Test_Prop_1 FROM Test_Record WITH ID={}", + "find_func": "find_Test_Prop_1"} + } + with raises(te.TableExportError): + my_exporter = te.BaseTableExporter(export_dict=export_dict, record=rec) + + # function not implemented + export_dict = { + "Test_Prop_1": {"find_func": "find_Test_Prop_1"} + } + with raises(te.TableExportError): + my_exporter = te.BaseTableExporter(export_dict) + + # function not callable + export_dict = { + "Test_Prop_1": {"find_func": "dummy"} + } + with raises(te.TableExportError): + my_exporter = DummyExporter(export_dict) + + # one good, one bad + export_dict = { + "Test_Prop_1": {"find_func": "find_simple_test_entry"}, + "Test_Prop_2": {"find_func": "not_implemented"} + } + with raises(te.TableExportError) as exc: + my_exporter = DummyExporter(export_dict) + assert "Test_Prop_2" in exc.value.msg + assert "Test_Prop_1" not in exc.value.msg + + +def test_info_collection(): + # guess correct function name + export_dict = {"simple_test_entry": {}} + my_exporter = DummyExporter(export_dict=export_dict, + raise_error_if_missing=True) + assert my_exporter.export_dict[ + "simple_test_entry"]["find_func"] == "find_simple_test_entry" + my_exporter.collect_information() + assert my_exporter.info["simple_test_entry"] == "bla" + + # use specific find function + export_dict["Not so simple"] = { + "find_func": "find_entry_with_strange_name"} + my_exporter = DummyExporter(export_dict=export_dict, + raise_error_if_missing=True) + my_exporter.collect_information() + assert my_exporter.info["simple_test_entry"] == "bla" + assert my_exporter.info["Not so simple"] == "blabla" + + # mix functions and record + rec = db.Record() + rec.add_property(name="Test_Prop_1", value="blablabla") + export_dict["Test_Prop_1"] = {} + my_exporter = DummyExporter(export_dict=export_dict, record=rec, + raise_error_if_missing=True) + my_exporter.collect_information() + assert my_exporter.info["simple_test_entry"] == "bla" + assert my_exporter.info["Not so simple"] == "blabla" + assert my_exporter.info["Test_Prop_1"] == "blablabla" + + # error in optional value + export_dict["optional_value"] = { + "find_func": "find_function_with_error", + "optional": True + } + my_exporter = DummyExporter(export_dict=export_dict, record=rec, + raise_error_if_missing=True) + my_exporter.collect_information() + assert "optional_value" not in my_exporter.info + + # now error in mandatory value + del(export_dict["optional_value"]) + export_dict["mandatory_value"] = { + "find_func": "find_function_with_error" + } + my_exporter = DummyExporter(export_dict=export_dict, record=rec, + raise_error_if_missing=True) + with raises(te.TableExportError) as exc: + my_exporter.collect_information() + assert len(my_exporter.missing) == 1 + assert "mandatory_value" in my_exporter.missing + assert exc.value.msg.split("\n")[1] == "mandatory_value" + + # add explanation to error + export_dict["mandatory_value"]["error"] = "Explain the error" + my_exporter = DummyExporter(export_dict=export_dict, record=rec, + raise_error_if_missing=True) + with raises(te.TableExportError) as exc: + my_exporter.collect_information() + assert exc.value.msg.split( + "\n")[1] == "mandatory_value:\tExplain the error" + + +def test_json_import(): + with open(os.path.join(os.path.dirname(__file__), + "dummy_export_dict.json")) as tmp: + export_dict = json.load(tmp) + rec = db.Record() + rec.add_property(name="Test_Prop_1", value="blablabla") + my_exporter = DummyExporter(export_dict=export_dict, record=rec, + raise_error_if_missing=True) + my_exporter.collect_information() + assert my_exporter.info["simple_test_entry"] == "bla" + assert my_exporter.info["Not so simple"] == "blabla" + assert my_exporter.info["Test_Prop_1"] == "blablabla"