diff --git a/integrationtests/README.md b/integrationtests/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c1f96606a46de4dd96f90fd4a1e46957100e68b3 --- /dev/null +++ b/integrationtests/README.md @@ -0,0 +1,3 @@ +1. Clear database +2. Insert model +3. Run test.py diff --git a/integrationtests/clear_database.py b/integrationtests/clear_database.py new file mode 100644 index 0000000000000000000000000000000000000000..138cf4e6abb256d5710cd2b32f55a1fb51f3fbed --- /dev/null +++ b/integrationtests/clear_database.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2020 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2020 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public +# License along with this program. If not, see +# <https://www.gnu.org/licenses/>. +# +# ** end header +# +"""Clear the database before and after the integration tests.""" +import caosdb as db + + +def clear_all(): + """First remove Records, then RecordTypes, then Properties, finally + files. Since there may be no entities, execute all deletions + without raising errors. + + """ + db.execute_query("FIND Record").delete( + raise_exception_on_error=False) + db.execute_query("FIND RecordType").delete( + raise_exception_on_error=False) + db.execute_query("FIND Property").delete( + raise_exception_on_error=False) + db.execute_query("FIND File").delete( + raise_exception_on_error=False) + + +if __name__ == "__main__": + clear_all() diff --git a/integrationtests/insert_model.py b/integrationtests/insert_model.py new file mode 100755 index 0000000000000000000000000000000000000000..45bdb6c837c36c999b289548e0f685519cd3aa85 --- /dev/null +++ b/integrationtests/insert_model.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Henrik tom Wörden +# 2021 Alexander Schlemmer +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +import caosdb as db +from caosadvancedtools.models.data_model import DataModel +from caosadvancedtools.models.parser import parse_model_from_yaml + + +def main(): + model = parse_model_from_yaml("model.yml") + model.sync_data_model(noquestion=True) + + +if __name__ == "__main__": + main() diff --git a/integrationtests/model.yml b/integrationtests/model.yml new file mode 100644 index 0000000000000000000000000000000000000000..7d78ac7ef4bc792f54594b29a8ac311479f41a59 --- /dev/null +++ b/integrationtests/model.yml @@ -0,0 +1,88 @@ +Experiment: + obligatory_properties: + date: + datatype: DATETIME + description: 'date of the experiment' + identifier: + datatype: TEXT + description: 'identifier of the experiment' + # TODO empty recommended_properties is a problem + #recommended_properties: + responsible: + datatype: LIST<Person> +Project: +SoftwareVersion: + recommended_properties: + version: + datatype: TEXT + description: 'Version of the software.' + binaries: + sourceCode: + Software: +DepthTest: + obligatory_properties: + temperature: + datatype: DOUBLE + description: 'temp' + depth: + datatype: DOUBLE + description: 'temp' +Person: + obligatory_properties: + first_name: + datatype: TEXT + description: 'First name of a Person.' + last_name: + datatype: TEXT + description: 'LastName of a Person.' + recommended_properties: + email: + datatype: TEXT + description: 'Email of a Person.' +revisionOf: + datatype: REFERENCE +results: + datatype: LIST<REFERENCE> +sources: + datatype: LIST<REFERENCE> +scripts: + datatype: LIST<REFERENCE> +single_attribute: + datatype: LIST<INTEGER> +Simulation: + obligatory_properties: + date: + identifier: + responsible: +Analysis: + obligatory_properties: + date: + identifier: + responsible: + suggested_properties: + mean_value: + datatype: DOUBLE +Publication: +Thesis: + inherit_from_suggested: + - Publication +Article: + inherit_from_suggested: + - Publication +Poster: + inherit_from_suggested: + - Publication +Presentation: + inherit_from_suggested: + - Publication +Report: + inherit_from_suggested: + - Publication +hdf5File: + datatype: REFERENCE +extern: + - TestRT1 + - TestP1 +Measurement: + recommended_properties: + date: diff --git a/integrationtests/test.py b/integrationtests/test.py new file mode 100644 index 0000000000000000000000000000000000000000..782687be27863e479186717d698b9965f7be8c64 --- /dev/null +++ b/integrationtests/test.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# 2021 Alexander Schlemmer +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +module description +""" + +import argparse +import sys +from argparse import RawTextHelpFormatter +from newcrawler import Crawler +from unittest.mock import Mock +import caosdb as db +from newcrawler.identifiable_adapters import CaosDBIdentifiableAdapter + +import os + + +def rfp(*pathcomponents): + """ + Return full path. + Shorthand convenience function. + """ + return os.path.join(os.path.dirname(__file__), *pathcomponents) + + +def main(args): + ident_adapt = CaosDBIdentifiableAdapter() + # TODO place this definition of identifiables elsewhere + ident_adapt.register_identifiable( + "Person", db.RecordType() + .add_parent(name="Person") + .add_property(name="first_name") + .add_property(name="last_name")) + ident_adapt.register_identifiable( + "Measurement", db.RecordType() + .add_parent(name="Measurement") + .add_property(name="identifier") + .add_property(name="date") + .add_property(name="project")) + ident_adapt.register_identifiable( + "Project", db.RecordType() + .add_parent(name="Project") + .add_property(name="date") + .add_property(name="identifier")) + + crawler = Crawler(debug=True, identifiableAdapter=ident_adapt) + crawler.copy_attributes = Mock() + crawler.crawl_directory(rfp("../unittests/test_directories", "examples_article"), + rfp("../unittests/scifolder_cfood.yml")) + ins, ups = crawler.synchronize() + assert len(ins) == 18 + assert len(ups) == 0 + + +def parse_args(): + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=RawTextHelpFormatter) + # parser.add_argument("path", + # help="the subtree of files below the given path will " + # "be considered. Use '/' for everything.") + + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + sys.exit(main(args)) diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py index 2fca174b3a6b7f7d73e4b3c81470e785f5f17daa..d5664e09b2bd5c5330e59ff549c56bddb3af8247 100644 --- a/src/newcrawler/crawl.py +++ b/src/newcrawler/crawl.py @@ -299,8 +299,21 @@ class Crawler(object): # This record is a duplicate that can be removed. Make sure we do not lose # information # Update an (local) identified record that will be inserted + newrecord = self.get_identified_record_from_local_cache(record) self.copy_attributes( - fro=record, to=self.get_identified_record_from_local_cache(record)) + fro=record, to=newrecord) + # Bend references to the other object + # TODO refactor this + for el in flat + to_be_inserted + to_be_updated: + for p in el.properties: + if isinstance(p.value, list): + for index, val in enumerate(p.value): + if val is record: + p.value[index] = newrecord + else: + if p.value is record: + p.value = newrecord + del flat[i] continue @@ -393,6 +406,16 @@ class Crawler(object): else: pass + @staticmethod + def execute_inserts_in_list(to_be_inserted): + if len(to_be_inserted) > 0: + db.Container().extend(to_be_inserted).insert() + + @staticmethod + def execute_updates_in_list(to_be_updated): + if len(to_be_updated) > 0: + db.Container().extend(to_be_updated).update() + def _synchronize(self, updateList: list[db.Record]): """ This function applies several stages: @@ -419,9 +442,8 @@ class Crawler(object): in to_be_updated] self.remove_unnecessary_updates(to_be_updated, identified_records) - # TODO - # self.execute_inserts_in_list(to_be_inserted) - # self.execute_updates_in_list(to_be_updated) + self.execute_inserts_in_list(to_be_inserted) + self.execute_updates_in_list(to_be_updated) return (to_be_inserted, to_be_updated) diff --git a/src/newcrawler/identifiable_adapters.py b/src/newcrawler/identifiable_adapters.py index b4d43044c1e8f45c36142a87e9d95722b8eefec8..2ec3c41a9907cabc0debeaec6a97ccfffefcaaef 100644 --- a/src/newcrawler/identifiable_adapters.py +++ b/src/newcrawler/identifiable_adapters.py @@ -232,7 +232,7 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter): def get_registered_identifiable(self, record: db.Record): identifiable_candidates = [] - for name, definition in self._registered_identifiables.items(): + for _, definition in self._registered_identifiables.items(): if self.is_identifiable_for_record(definition, record): identifiable_candidates.append(definition) if len(identifiable_candidates) > 1: @@ -304,3 +304,39 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter): raise RuntimeError("The entity has not been assigned an ID.") return value_identifiable.id + + +class CaosDBIdentifiableAdapter(IdentifiableAdapter): + """ + Identifiable adapter which can be used for production. + + + TODO: store registred identifiables not locally + """ + + def __init__(self): + self._registered_identifiables = dict() + + def register_identifiable(self, name: str, definition: db.RecordType): + self._registered_identifiables[name] = definition + + def get_registered_identifiable(self, record: db.Record): + """ + returns the registred identifiable for the given Record + + It is assumed, that there is exactly one identifiable for each RecordType. Only the first + parent of the given Record is considered; others are ignored + """ + rt_name = record.parents[0].name + for name, definition in self._registered_identifiables.items(): + if definition.parents[0].name.lower() == rt_name.lower(): + return definition + + def retrieve_identified_record(self, identifiable: db.Record): + query_string = self.create_query_for_identifiable(identifiable) + candidates = db.execute_query(query_string) + if len(candidates) > 1: + raise RuntimeError("Identifiable was not defined unambigiously.") + if len(candidates) == 0: + return None + return candidates[0]