Skip to content
Snippets Groups Projects
Commit e38e0eeb authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

ENH: initial version of a table-record conversion

caosadvancedtools/table_converter.py implements conversions from tables
(tsv, pandas DataFrame) to a container with records and vice versa.
parent ac05d159
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python
# encoding: utf-8
#
# ** header v3.0
# This file is a part of the CaosDB Project.
#
# Copyright (C) 2019 Henrik tom Wörden
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# ** end header
import argparse
import re
import sys
import caosdb as db
import pandas as pd
def from_tsv(filename, recordtype):
""" parses a tsv file to a list of records """
df = pd.read_csv(filename, sep="\t")
return from_table(df, recordtype)
def to_tsv(filename, container):
df = to_table(container)
df.to_csv(filename, sep="\t", index=False)
def generate_property_name(prop):
if prop.unit is None:
return prop.name
else:
return "{} [{}]".format(prop.name, prop.unit)
def to_table(container):
""" creates a table from the records in a container """
if len(container) == 0:
raise ValueError("Container is empty")
properties = set()
for rec in container:
properties.update([generate_property_name(p)
for p in container[0].get_properties()])
df = pd.DataFrame(columns=list(properties))
rts = set([p.name for p in container[0].parents])
for ii, rec in enumerate(container):
if set([p.name for p in rec.parents]) != rts:
raise ValueError("Parents differ")
for p in rec.get_properties():
df.loc[ii, generate_property_name(p)] = p.value
return df
def from_table(spreadsheet, recordtype):
""" parses a pandas DataFrame to a list of records """
records = db.Container()
for idx, row in spreadsheet.iterrows():
rec = db.Record()
rec.add_parent(name=recordtype)
for key, value in row.iteritems():
if (pd.notnull(value) and
(not isinstance(value, str) or value.strip() != "")):
regexp = r"(.*)\[(.*)\].*"
match = re.match(regexp, key)
if match is not None:
pname = match.group(1).strip()
unit = match.group(2).strip()
rec.add_property(name=pname, value=value, unit=unit)
else:
rec.add_property(name=key, value=value)
records.append(rec)
return records
if __name__ == "__main__":
p = argparse.ArgumentParser()
p.add_argument("-f", "--filename", help="The excel filename")
p.add_argument("--auth-token")
arg = p.parse_args(sys.argv[1:])
db.configure_connection(auth_token=arg.auth_token)
recordtype = "Experiment"
from_tsv(arg.filename, recordtype)
temperature [*C] greeting
3.4 tach
4.5 hi
hi
4.5
#!/usr/bin/env python
# encoding: utf-8
#
# ** header v3.0
# This file is a part of the CaosDB Project.
#
# Copyright (C) 2019 Henrik tom Wörden
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# ** end header
import os
import unittest
from tempfile import NamedTemporaryFile
import caosdb as db
import pandas as pd
from caosadvancedtools.table_converter import (from_table, from_tsv, to_table,
to_tsv)
TEST_TABLE = os.path.join(os.path.dirname(os.path.realpath(__file__)),
"test.tsv")
class TableTest(unittest.TestCase):
def test_basic(self):
df = pd.read_csv(TEST_TABLE, sep="\t")
assert isinstance(from_table(df, "Measurement"), db.Container)
def test_empty(self):
c = db.Container()
self.assertRaises(ValueError, to_table, c)
def test_different_props(self):
r1 = db.Record()
r1.add_parent("no1")
r1.add_property("p1")
r2 = db.Record()
r2.add_parent("no1")
r2.add_property("p1")
r2.add_property("p2")
c = db.Container()
c.extend([r1, r2])
to_table(c)
def test_parents(self):
r1 = db.Record()
r1.add_parent("no1")
r2 = db.Record()
r2.add_parent("no2")
c = db.Container()
c.extend([r1, r2])
self.assertRaises(ValueError, to_table, c)
class FromTsvTest(unittest.TestCase):
def test_basic(self):
from_tsv(TEST_TABLE, "Measurement")
class ToTsvTest(unittest.TestCase):
def test_basic(self):
r = db.Record()
r.add_property("ha", 5)
r.add_parent("hu")
c = db.Container()
c.append(r)
to_tsv(NamedTemporaryFile().name, c)
class IntegrationTest(unittest.TestCase):
""" converts tsv to a container and back and compares origin with
result """
def test_backandforth(self):
cont = from_tsv(TEST_TABLE, "Measurement")
tempfile = NamedTemporaryFile(delete=False)
to_tsv(tempfile.name, cont)
with open(TEST_TABLE, "r") as no1, open(tempfile.name, "r") as no2:
assert no1.read() == no2.read()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment