Skip to content
Snippets Groups Projects
Commit 4124e6e1 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

ENH: alpha done

parent af1d8197
Branches
Tags
No related merge requests found
...@@ -21,7 +21,6 @@ ...@@ -21,7 +21,6 @@
""" Imports labfolder exports """ """ Imports labfolder exports """
import argparse
import os import os
import re import re
import shutil import shutil
...@@ -38,7 +37,14 @@ from bs4 import BeautifulSoup ...@@ -38,7 +37,14 @@ from bs4 import BeautifulSoup
import caosdb as db import caosdb as db
#crawler = Crawler() RERUN = False
# crawler = Crawler()
print("""
WARNING: This is an ALPHA version. Parsing of the by labfolder exported data
might not work correctly! There might be missing elements! Check the result
carefully before inserting it.
""")
def create_project(project): def create_project(project):
...@@ -77,12 +83,34 @@ def add_property_from_data_element(dbrecord, element): ...@@ -77,12 +83,34 @@ def add_property_from_data_element(dbrecord, element):
quant = val_or_none(element.find_all(attrs={'class': 'element-quantity'})) quant = val_or_none(element.find_all(attrs={'class': 'element-quantity'}))
val = val_or_none(element.find_all(attrs={'class': 'element-value'})) val = val_or_none(element.find_all(attrs={'class': 'element-value'}))
print("tit", title)
print("qu", quant)
if quant is not None: if quant is not None:
title = title+"-"+quant quant = quant.strip(": ")
title = title+" - "+quant
res = db.execute_query("FIND PROPERTY '{}'".format(title))
if len(res) == 0:
p = db.Property(name=title, unit=unit, datatype=db.DOUBLE)
p.insert()
try:
val = float(val)
except TypeError:
print("Value is no float!!!", val)
return
dbrecord.add_property(name=title, value=val, unit=unit) dbrecord.add_property(name=title, value=val, unit=unit)
def create_entry(entry, dbproject): def create_file(name, filename, root):
local_path = os.path.join(root, filename)
local_path = os.path.normpath(local_path)
if not os.path.exists(local_path):
raise ValueError("FILE DOES NOT EXIST: ", local_path)
f = db.File(path=local_path, file=local_path, name=name)
return f
def create_entry(entry, dbproject, root):
cont = db.Container()
dbrecord = db.Record() dbrecord = db.Record()
dbrecord.add_parent(name="LabbookEntry") dbrecord.add_parent(name="LabbookEntry")
dbrecord.add_property(name="Project", value=dbproject) dbrecord.add_property(name="Project", value=dbproject)
...@@ -108,10 +136,24 @@ def create_entry(entry, dbproject): ...@@ -108,10 +136,24 @@ def create_entry(entry, dbproject):
attrs={'class': 'dd_entry_cell_file_download'}) attrs={'class': 'dd_entry_cell_file_download'})
if len(download) > 0: if len(download) > 0:
local_path = (download[0].parent).attrs['data-filename'] name = ((download[0].parent).attrs['data-filename']).strip('"')
f = db.File(path=local_path, if name == "blank.png":
file=local_path) continue
dbrecord.add_property(name="accompaningFile", value=f) if len(download[0].find_all("img")) > 0:
filename = download[0].find_all("img")[0].attrs['src']
elif len(download[0].find_all("a")) > 0:
filename = download[0].find_all("a")[0].attrs['href']
else:
raise ValueError("could not get filename")
print(name)
print(filename)
f = create_file(name, filename, root)
if RERUN:
f.retrieve()
else:
f.insert()
dbrecord.add_property(name="associatedFile", value=f)
cont.append(f)
continue continue
...@@ -128,11 +170,16 @@ def create_entry(entry, dbproject): ...@@ -128,11 +170,16 @@ def create_entry(entry, dbproject):
attrs={'class': 'table-el-container'}) attrs={'class': 'table-el-container'})
if len(tables) > 0: if len(tables) > 0:
local_path = (tables[0]).find_all( name = (tables[0]).find_all(
attrs={'class': 'table-el-filename'} attrs={'class': 'table-el-filename'}
)[0].getText().strip() )[0].getText().strip()
f = db.File(path=local_path, file=local_path) f = create_file(name, name, root)
if RERUN:
f.retrieve()
else:
f.insert()
dbrecord.add_property(name="table", value=f) dbrecord.add_property(name="table", value=f)
cont.append(f)
continue continue
...@@ -144,31 +191,48 @@ def create_entry(entry, dbproject): ...@@ -144,31 +191,48 @@ def create_entry(entry, dbproject):
continue continue
print(dbrecord) cont.extend([dbrecord, person])
return cont
def main(args):
"""The main function."""
if not os.path.exists(args.file): def treat_project(path):
raise ValueError("File does not exist") with open(os.path.join(path, "index.html")) as fp:
tree = BeautifulSoup(fp, features="lxml")
with open(args.file) as inpu: cont = db.Container()
text = inpu.read() project = tree.find_all(id="eln_project_content")
tree = BeautifulSoup(text, features="lxml") if len(project) == 0:
project = tree.find_all(id="eln_project_content")[0] return
else:
project = project[0]
dbproject = create_project(project) dbproject = create_project(project)
cont.append(dbproject)
for entry in project.find_all(lambda x: x.has_attr('data-id')): for entry in project.find_all(lambda x: x.has_attr('data-id')):
create_entry(entry, dbproject) recs = create_entry(entry, dbproject, path)
cont.extend(recs)
print(cont)
cont.insert()
# import IPython # import IPython
# IPython.embed() # IPython.embed()
if __name__ == "__main__": def import_data(folder):
parser = argparse.ArgumentParser() """imports the data of a labfolder export"""
parser.add_argument("file", default="./labfolder_example.html", nargs="?")
args = parser.parse_args() if not os.path.exists(folder):
sys.exit(main(args)) raise ValueError("folder does not exist")
projects_folder = os.path.join(folder, "projects")
if not os.path.exists(projects_folder):
raise ValueError("folder does not contain a projects folder")
for root, dirs, files in os.walk(projects_folder):
print(root, dirs, files)
if "index.html" in files:
treat_project(root)
Project:
obligatory_properties:
projectId:
datatype: INTEGER
description: 'UID of this project'
Person:
recommended_properties:
firstName:
datatype: TEXT
description: 'first name'
lastName:
datatype: TEXT
description: 'last name'
LabbookEntry:
recommended_properties:
Project:
entryId:
datatype: INTEGER
description: 'UID of this entry'
responsible:
datatype: Person
description: 'the person responsible for these notes'
textElement:
datatype: TEXT
description: 'a text element of a labbook recording'
associatedFile:
datatype: FILE
description: 'A file associated with this recording'
table:
datatype: FILE
description: 'A table document associated with this recording'
#!/usr/bin/env python3
#
# This file is a part of the CaosDB Project.
#
# Copyright (c) 2020 IndiScale GmbH
# Copyright (c) 2020 Daniel Hornung <d.hornung@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
""" Imports labfolder exports """
import argparse
import sys
import caosmodels
from caosmodels.parser import parse_model_from_yaml
from caosadvancedtools.converter import labfolder
def main(args):
"""The main function."""
model = parse_model_from_yaml("./model.yml")
model.sync_data_model()
labfolder.import_data(args.folder)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("folder", default="./example_labfolder_data",
nargs="?", help='folder that contains the data')
args = parser.parse_args()
sys.exit(main(args))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment