diff --git a/src/caosadvancedtools/converter/import_labfolder.py b/src/caosadvancedtools/converter/labfolder.py similarity index 58% rename from src/caosadvancedtools/converter/import_labfolder.py rename to src/caosadvancedtools/converter/labfolder.py index e6c85a94a0c09ee55249135448de57a52169351f..172ed993ecf0bc96d39a57f06082d7f83716ba19 100644 --- a/src/caosadvancedtools/converter/import_labfolder.py +++ b/src/caosadvancedtools/converter/labfolder.py @@ -21,7 +21,6 @@ """ Imports labfolder exports """ -import argparse import os import re import shutil @@ -38,7 +37,14 @@ from bs4 import BeautifulSoup import caosdb as db -#crawler = Crawler() +RERUN = False +# crawler = Crawler() + +print(""" +WARNING: This is an ALPHA version. Parsing of the by labfolder exported data +might not work correctly! There might be missing elements! Check the result +carefully before inserting it. +""") def create_project(project): @@ -77,12 +83,34 @@ def add_property_from_data_element(dbrecord, element): quant = val_or_none(element.find_all(attrs={'class': 'element-quantity'})) val = val_or_none(element.find_all(attrs={'class': 'element-value'})) + print("tit", title) + print("qu", quant) if quant is not None: - title = title+"-"+quant + quant = quant.strip(": ") + title = title+" - "+quant + res = db.execute_query("FIND PROPERTY '{}'".format(title)) + if len(res) == 0: + p = db.Property(name=title, unit=unit, datatype=db.DOUBLE) + p.insert() + try: + val = float(val) + except TypeError: + print("Value is no float!!!", val) + return dbrecord.add_property(name=title, value=val, unit=unit) -def create_entry(entry, dbproject): +def create_file(name, filename, root): + local_path = os.path.join(root, filename) + local_path = os.path.normpath(local_path) + if not os.path.exists(local_path): + raise ValueError("FILE DOES NOT EXIST: ", local_path) + f = db.File(path=local_path, file=local_path, name=name) + return f + + +def create_entry(entry, dbproject, root): + cont = db.Container() dbrecord = db.Record() dbrecord.add_parent(name="LabbookEntry") dbrecord.add_property(name="Project", value=dbproject) @@ -108,10 +136,24 @@ def create_entry(entry, dbproject): attrs={'class': 'dd_entry_cell_file_download'}) if len(download) > 0: - local_path = (download[0].parent).attrs['data-filename'] - f = db.File(path=local_path, - file=local_path) - dbrecord.add_property(name="accompaningFile", value=f) + name = ((download[0].parent).attrs['data-filename']).strip('"') + if name == "blank.png": + continue + if len(download[0].find_all("img")) > 0: + filename = download[0].find_all("img")[0].attrs['src'] + elif len(download[0].find_all("a")) > 0: + filename = download[0].find_all("a")[0].attrs['href'] + else: + raise ValueError("could not get filename") + print(name) + print(filename) + f = create_file(name, filename, root) + if RERUN: + f.retrieve() + else: + f.insert() + dbrecord.add_property(name="associatedFile", value=f) + cont.append(f) continue @@ -128,11 +170,16 @@ def create_entry(entry, dbproject): attrs={'class': 'table-el-container'}) if len(tables) > 0: - local_path = (tables[0]).find_all( + name = (tables[0]).find_all( attrs={'class': 'table-el-filename'} )[0].getText().strip() - f = db.File(path=local_path, file=local_path) + f = create_file(name, name, root) + if RERUN: + f.retrieve() + else: + f.insert() dbrecord.add_property(name="table", value=f) + cont.append(f) continue @@ -144,31 +191,48 @@ def create_entry(entry, dbproject): continue - print(dbrecord) + cont.extend([dbrecord, person]) + return cont -def main(args): - """The main function.""" - if not os.path.exists(args.file): - raise ValueError("File does not exist") +def treat_project(path): + with open(os.path.join(path, "index.html")) as fp: + tree = BeautifulSoup(fp, features="lxml") - with open(args.file) as inpu: - text = inpu.read() + cont = db.Container() + project = tree.find_all(id="eln_project_content") - tree = BeautifulSoup(text, features="lxml") - project = tree.find_all(id="eln_project_content")[0] + if len(project) == 0: + return + else: + project = project[0] dbproject = create_project(project) + cont.append(dbproject) for entry in project.find_all(lambda x: x.has_attr('data-id')): - create_entry(entry, dbproject) + recs = create_entry(entry, dbproject, path) + cont.extend(recs) + print(cont) + cont.insert() # import IPython # IPython.embed() -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("file", default="./labfolder_example.html", nargs="?") - args = parser.parse_args() - sys.exit(main(args)) +def import_data(folder): + """imports the data of a labfolder export""" + + if not os.path.exists(folder): + raise ValueError("folder does not exist") + + projects_folder = os.path.join(folder, "projects") + + if not os.path.exists(projects_folder): + raise ValueError("folder does not contain a projects folder") + + for root, dirs, files in os.walk(projects_folder): + print(root, dirs, files) + + if "index.html" in files: + treat_project(root) diff --git a/unittests/model.yml b/unittests/model.yml new file mode 100644 index 0000000000000000000000000000000000000000..987b691b8a64374b872140d214d76774ee43bdef --- /dev/null +++ b/unittests/model.yml @@ -0,0 +1,31 @@ +Project: + obligatory_properties: + projectId: + datatype: INTEGER + description: 'UID of this project' +Person: + recommended_properties: + firstName: + datatype: TEXT + description: 'first name' + lastName: + datatype: TEXT + description: 'last name' +LabbookEntry: + recommended_properties: + Project: + entryId: + datatype: INTEGER + description: 'UID of this entry' + responsible: + datatype: Person + description: 'the person responsible for these notes' + textElement: + datatype: TEXT + description: 'a text element of a labbook recording' + associatedFile: + datatype: FILE + description: 'A file associated with this recording' + table: + datatype: FILE + description: 'A table document associated with this recording' diff --git a/unittests/test_labfolder_import.py b/unittests/test_labfolder_import.py new file mode 100644 index 0000000000000000000000000000000000000000..0508f3e2e2bd716a4f33a9317bdd48ebd54d37dc --- /dev/null +++ b/unittests/test_labfolder_import.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +# +# This file is a part of the CaosDB Project. +# +# Copyright (c) 2020 IndiScale GmbH +# Copyright (c) 2020 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +""" Imports labfolder exports """ + +import argparse +import sys + +import caosmodels +from caosmodels.parser import parse_model_from_yaml + +from caosadvancedtools.converter import labfolder + + +def main(args): + """The main function.""" + model = parse_model_from_yaml("./model.yml") + + model.sync_data_model() + labfolder.import_data(args.folder) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("folder", default="./example_labfolder_data", + nargs="?", help='folder that contains the data') + args = parser.parse_args() + sys.exit(main(args))