diff --git a/src/caosadvancedtools/table_converter.py b/src/caosadvancedtools/table_converter.py new file mode 100644 index 0000000000000000000000000000000000000000..e10b736f1a4cfc8b4131366a510d650bc0ac47d6 --- /dev/null +++ b/src/caosadvancedtools/table_converter.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2019 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +import argparse +import re +import sys + +import caosdb as db +import pandas as pd + + +def from_tsv(filename, recordtype): + """ parses a tsv file to a list of records """ + df = pd.read_csv(filename, sep="\t") + + return from_table(df, recordtype) + + +def to_tsv(filename, container): + df = to_table(container) + df.to_csv(filename, sep="\t", index=False) + + +def generate_property_name(prop): + if prop.unit is None: + return prop.name + else: + return "{} [{}]".format(prop.name, prop.unit) + + +def to_table(container): + """ creates a table from the records in a container """ + + if len(container) == 0: + raise ValueError("Container is empty") + properties = set() + + for rec in container: + properties.update([generate_property_name(p) + for p in container[0].get_properties()]) + df = pd.DataFrame(columns=list(properties)) + rts = set([p.name for p in container[0].parents]) + + for ii, rec in enumerate(container): + if set([p.name for p in rec.parents]) != rts: + raise ValueError("Parents differ") + + for p in rec.get_properties(): + + df.loc[ii, generate_property_name(p)] = p.value + + return df + + +def from_table(spreadsheet, recordtype): + """ parses a pandas DataFrame to a list of records """ + records = db.Container() + + for idx, row in spreadsheet.iterrows(): + rec = db.Record() + rec.add_parent(name=recordtype) + + for key, value in row.iteritems(): + if (pd.notnull(value) and + (not isinstance(value, str) or value.strip() != "")): + regexp = r"(.*)\[(.*)\].*" + match = re.match(regexp, key) + + if match is not None: + pname = match.group(1).strip() + unit = match.group(2).strip() + rec.add_property(name=pname, value=value, unit=unit) + else: + rec.add_property(name=key, value=value) + records.append(rec) + + return records + + +if __name__ == "__main__": + + p = argparse.ArgumentParser() + p.add_argument("-f", "--filename", help="The excel filename") + p.add_argument("--auth-token") + arg = p.parse_args(sys.argv[1:]) + + db.configure_connection(auth_token=arg.auth_token) + + recordtype = "Experiment" + + from_tsv(arg.filename, recordtype) diff --git a/unittests/test.tsv b/unittests/test.tsv new file mode 100644 index 0000000000000000000000000000000000000000..714bfce2f5295b6548402982d75e4feebd688011 --- /dev/null +++ b/unittests/test.tsv @@ -0,0 +1,5 @@ +temperature [*C] greeting +3.4 tach +4.5 hi + hi +4.5 diff --git a/unittests/test_table_converter.py b/unittests/test_table_converter.py new file mode 100644 index 0000000000000000000000000000000000000000..ca90f00297b59a1ae914cc202e6b6f0e5f1e7e99 --- /dev/null +++ b/unittests/test_table_converter.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2019 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +import os +import unittest +from tempfile import NamedTemporaryFile + +import caosdb as db +import pandas as pd +from caosadvancedtools.table_converter import (from_table, from_tsv, to_table, + to_tsv) + +TEST_TABLE = os.path.join(os.path.dirname(os.path.realpath(__file__)), + "test.tsv") + + +class TableTest(unittest.TestCase): + def test_basic(self): + df = pd.read_csv(TEST_TABLE, sep="\t") + assert isinstance(from_table(df, "Measurement"), db.Container) + + def test_empty(self): + c = db.Container() + self.assertRaises(ValueError, to_table, c) + + def test_different_props(self): + r1 = db.Record() + r1.add_parent("no1") + r1.add_property("p1") + r2 = db.Record() + r2.add_parent("no1") + r2.add_property("p1") + r2.add_property("p2") + c = db.Container() + c.extend([r1, r2]) + to_table(c) + + def test_parents(self): + r1 = db.Record() + r1.add_parent("no1") + r2 = db.Record() + r2.add_parent("no2") + c = db.Container() + c.extend([r1, r2]) + self.assertRaises(ValueError, to_table, c) + + +class FromTsvTest(unittest.TestCase): + def test_basic(self): + from_tsv(TEST_TABLE, "Measurement") + + +class ToTsvTest(unittest.TestCase): + def test_basic(self): + r = db.Record() + r.add_property("ha", 5) + r.add_parent("hu") + c = db.Container() + c.append(r) + to_tsv(NamedTemporaryFile().name, c) + + +class IntegrationTest(unittest.TestCase): + """ converts tsv to a container and back and compares origin with + result """ + def test_backandforth(self): + cont = from_tsv(TEST_TABLE, "Measurement") + tempfile = NamedTemporaryFile(delete=False) + to_tsv(tempfile.name, cont) + with open(TEST_TABLE, "r") as no1, open(tempfile.name, "r") as no2: + assert no1.read() == no2.read()