Skip to content
Snippets Groups Projects
Verified Commit 6b8cc43b authored by Timm Fitschen's avatar Timm Fitschen
Browse files

Merge branch 'master' into im_und_export

parents 416a56dc 4421e086
No related branches found
No related tags found
No related merge requests found
Showing
with 16147 additions and 0 deletions
#!/usr/bin/env python3
#
# This file is a part of the CaosDB Project.
#
# Copyright (c) 2020 IndiScale GmbH
# Copyright (c) 2020 Daniel Hornung <d.hornung@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
""" Imports data from labfolder via api """
import json
import os
import time
import html2text
import caosdb as db
import labfolder.connection
from labfolder.connection import configure_connection
class Importer(object):
def __init__(self):
self.connection = configure_connection()
self.projects = self.connection.retrieve_projects()
self.entries = self.connection.retrieve_entries()
def create_project(self, project):
dbproject = db.Record(name=project['title'])
dbproject.add_parent(name="Project")
dbproject.add_property(name="projectId", value=project['id'])
# crawler.cached_find_identifiables([dbproject])
return dbproject
def get_entries(self, project_id):
return [ent for ent in self.entries if ent["project_id"] == project_id]
def treat_project(self, project):
cont = db.Container()
dbproject = self.create_project(project)
cont.append(dbproject)
for entry in self.get_entries(project["id"]):
recs = self.create_entry(entry, dbproject)
cont.extend(recs)
print(cont)
cont.insert(unique=False)
# import IPython
# IPython.embed()
def import_data(self):
for project in self.projects:
self.treat_project(project)
def add_property_from_data_element(self, dbrecord, element):
if element['type'] == "DATA_ELEMENT_GROUP":
for c in element["children"]:
self.add_property_from_data_element(dbrecord, c)
elif element['type'] == "SINGLE_DATA_ELEMENT":
# if quant is not None:
# quant = quant.strip(": ")
# title = title+" - "+quant
res = db.execute_query("FIND PROPERTY '{}'".format(element['title']))
if len(res) == 0:
p = db.Property(name=element['title'], unit=element['unit'], datatype=db.DOUBLE)
try:
p.insert()
except db.exceptions.EntityError as e:
print(e)
return
val = element['value']
try:
val = float(val)
except (ValueError, TypeError):
print("Value is no float!!!", val)
return
dbrecord.add_property(name=element['title'], value=val, unit=element['unit'])
elif element['type'] == "DESCRIPTIVE_DATA_ELEMENT":
res = db.execute_query("FIND PROPERTY '{}'".format(element['title']))
if len(res) == 0:
p = db.Property(name=element['title'], datatype=db.TEXT)
p.insert()
dbrecord.add_property(name=element['title'],
value=element['description'])
def create_element(self, element_id, el_type, dbrecord):
print(element_id, el_type)
if el_type == "IMAGE":
el_type = "FILE"
elif el_type == "DATA_ELEMENT":
el_type = "DATA"
try:
element = self.connection.retrieve_element(element_id, el_type=el_type)
except:
print("Could not retrieve: ", element_id)
return
if el_type == "TEXT":
dbrecord.add_property(
name="textElement",
value=html2text.html2text(element["content"]))
elif el_type == "FILE":
local_file = self.connection.download_file(element_id)
f = db.File(name=element["file_name"],
path=os.path.join("labfolder", str(time.time()),
element["file_name"]),
file=local_file)
f.insert(unique=False)
dbrecord.add_property(name="associatedFile", value=f)
elif el_type == "DATA":
for subel in element["data_elements"]:
self.add_property_from_data_element(dbrecord=dbrecord,
element=subel)
elif el_type == "TABLE":
print(element)
def create_entry(self, entry, dbproject):
cont = db.Container()
dbrecord = db.Record(name=entry["title"])
dbrecord.add_parent(name="LabbookEntry")
dbrecord.add_property(name="Project", value=dbproject)
dbrecord.add_property(name="entryId", value=entry['id'])
# crawler.cached_find_identifiables([dbrecord])
# TODO resolve id
# person = get_author_from_entry(entry)
# dbrecord.add_property(name="responsible", value=person)
for element in entry["elements"]:
print(json.dumps(element, sort_keys=True, indent=4))
self.create_element(element["id"], element["type"], dbrecord)
# If all text field would have the class dd_text_entry the
# following would be sufficient:
# if 'dd_text_entry' in block['class']:
# instead we check whether an editor field exists.
cont.extend([dbrecord])
return cont
#!/usr/bin/env python3
#
# This file is a part of the CaosDB Project.
#
# Copyright (c) 2020 IndiScale GmbH
# Copyright (c) 2020 Daniel Hornung <d.hornung@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
""" Imports labfolder exports """
import os
import re
import shutil
import subprocess
import sys
import tempfile
import time
import warnings
from io import BytesIO, StringIO
import requests
import yaml
from bs4 import BeautifulSoup
import caosdb as db
RERUN = False
# crawler = Crawler()
print("""
WARNING: This is an ALPHA version. Parsing of the by labfolder exported data
might not work correctly! There might be missing elements! Check the result
carefully before inserting it.
""")
def create_project(project):
dbproject = db.Record(name=project.attrs['data-name'])
dbproject.add_parent(name="Project")
dbproject.add_property(name="projectId", value=project.attrs['data-id'])
# crawler.cached_find_identifiables([dbproject])
return dbproject
def get_author_from_entry(entry):
person = db.Record()
person.add_parent(name="Person")
resp = entry.find_all(attrs={'class': 'author_name'})
for name in ["firstname", "lastname"]:
person.add_property(
name=name,
value=resp[0].find_all(attrs={'class': 'author_'+name})[0].getText())
# crawler.cached_find_identifiables([person])
return person
def val_or_none(stuff):
if len(stuff) == 0:
return None
else:
return stuff[0].getText()
def add_property_from_data_element(dbrecord, element):
unit = val_or_none(element.find_all(attrs={'class': 'element-unit'}))
title = val_or_none(element.find_all(attrs={'class': 'element-title'}))
quant = val_or_none(element.find_all(attrs={'class': 'element-quantity'}))
val = val_or_none(element.find_all(attrs={'class': 'element-value'}))
print("tit", title)
print("qu", quant)
if quant is not None:
quant = quant.strip(": ")
title = title+" - "+quant
res = db.execute_query("FIND PROPERTY '{}'".format(title))
if len(res) == 0:
p = db.Property(name=title, unit=unit, datatype=db.DOUBLE)
p.insert()
try:
val = float(val)
except TypeError:
print("Value is no float!!!", val)
return
dbrecord.add_property(name=title, value=val, unit=unit)
def create_file(name, filename, root):
local_path = os.path.join(root, filename)
local_path = os.path.normpath(local_path)
if not os.path.exists(local_path):
raise ValueError("FILE DOES NOT EXIST: ", local_path)
f = db.File(path=local_path, file=local_path, name=name)
return f
def create_entry(entry, dbproject, root):
cont = db.Container()
dbrecord = db.Record()
dbrecord.add_parent(name="LabbookEntry")
dbrecord.add_property(name="Project", value=dbproject)
dbrecord.add_property(name="entryId", value=entry.attrs['data-id'])
# crawler.cached_find_identifiables([dbrecord])
person = get_author_from_entry(entry)
dbrecord.add_property(name="responsible", value=person)
for block in entry.find_all(attrs={'class': 'dd_entry_cell'}):
# If all text field would have the class dd_text_entry the
# following would be sufficient:
# if 'dd_text_entry' in block['class']:
# instead we check whether an editor field exists.
editor = block.find_all(attrs={'class': 'redactor_editor'})
if len(editor) > 0:
dbrecord.add_property(name="textElement", value=editor[0].getText())
continue
download = block.find_all(
attrs={'class': 'dd_entry_cell_file_download'})
if len(download) > 0:
name = ((download[0].parent).attrs['data-filename']).strip('"')
if name == "blank.png":
continue
if len(download[0].find_all("img")) > 0:
filename = download[0].find_all("img")[0].attrs['src']
elif len(download[0].find_all("a")) > 0:
filename = download[0].find_all("a")[0].attrs['href']
else:
raise ValueError("could not get filename")
print(name)
print(filename)
f = create_file(name, filename, root)
if RERUN:
f.retrieve()
else:
f.insert()
dbrecord.add_property(name="associatedFile", value=f)
cont.append(f)
continue
elements = block.find_all(
attrs={'class': 'data-element-display'})
if len(elements) > 0:
for el in elements:
add_property_from_data_element(dbrecord=dbrecord, element=el)
continue
tables = block.find_all(
attrs={'class': 'table-el-container'})
if len(tables) > 0:
name = (tables[0]).find_all(
attrs={'class': 'table-el-filename'}
)[0].getText().strip()
f = create_file(name, name, root)
if RERUN:
f.retrieve()
else:
f.insert()
dbrecord.add_property(name="table", value=f)
cont.append(f)
continue
empty = block.find_all(
attrs={'class': 'dd_entry_empty_element'})
if len(tables) > 0:
print("\n\nempty")
continue
cont.extend([dbrecord, person])
return cont
def treat_project(path):
with open(os.path.join(path, "index.html")) as fp:
tree = BeautifulSoup(fp, features="lxml")
cont = db.Container()
project = tree.find_all(id="eln_project_content")
if len(project) == 0:
return
else:
project = project[0]
dbproject = create_project(project)
cont.append(dbproject)
for entry in project.find_all(lambda x: x.has_attr('data-id')):
recs = create_entry(entry, dbproject, path)
cont.extend(recs)
print(cont)
cont.insert()
# import IPython
# IPython.embed()
def import_data(folder):
"""imports the data of a labfolder export"""
if not os.path.exists(folder):
raise ValueError("folder does not exist")
projects_folder = os.path.join(folder, "projects")
if not os.path.exists(projects_folder):
raise ValueError("folder does not contain a projects folder")
for root, dirs, files in os.walk(projects_folder):
print(root, dirs, files)
if "index.html" in files:
treat_project(root)
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-status-bar-style"
content="black-translucent">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Projects</title>
<link rel="shortcut icon" type="image/x-icon"
href="static/img/favicon2.ico"/>
<script
src="static/js/jquery-1.8.2.min.js"
type="text/javascript"></script>
<script src="static/js/tree.js"
type="text/javascript"></script>
<!-- This must be the first labfolder JS file included -->
<script
src="static/js/labfolder-global.js?13fcc6eeb30608bb104f4b234c2fa3fd86699ffe"
type="text/javascript"></script>
<link rel="stylesheet" type="text/css"
href="static/css/eln_layout.css"/>
<link rel="stylesheet" type="text/css"
href="static/css/pixel_icon.css"/>
<link rel="stylesheet" type="text/css"
href="static/css/tree.css"/>
<link rel="stylesheet" type="text/css"
href="static/css/notebook.css"/>
</head>
<body>
<div class="body_bg"></div>
<div class="eln_header eln_row">
<div class="headerbar_top">
<a href="/eln/"><span class="logo-img"></span></a>
<header>
<span aria-hidden="true" class="manage_s-img"></span> Projects
</header>
<nav>
<div class="nav_top">
<ul>
<li><a href="templates.html">
<button class="header_btn ">
<span class="desk-img"></span>
<p>Templates</p>
</button>
</a></li>
</ul>
</div>
</nav>
</div>
</div>
<div class="action_bar clearfix"></div>
<div id="data_element" data-viewname="WORKSPACE_INDEX"></div>
<div id="eln_main_content"
class="eln_main_content eln_row eln_scroll-y">
<div class="eln_main_content_box projects-list">
<div class="headers">
<div class="owner">Owner</div>
<div class="update">Last Modified</div>
<div class="created">Created</div>
</div>
<div class="tree_my_eln_projects tree_top_level" data-treeid="eln_projects">
<a id="treeline_eln_projects_0_0"
data-groupid="0"
data-objectid="0"
data-ownerid="{{ownerId}}"
class="treeline is_folder ui-droppable is_closed_folder">
<span class="updateTS"></span>
<span class="folder_up-img"></span>
<span class="name">My private projects</span>
<span class="details">
<span class="box-owner">
<label>Owner:</label>
</span>
<span class="box-last-update">
<label>Last update:</label>
</span>
</span>
</a>
<div class="treeline_children"style="overflow: hidden; display: none;"><a id="treeline_eln_projects" data-id="118217" data-parentId="0" data-userId="30003" data-groupId="0" data-name="Example project" data-folder="false" data-template="false" data-createTS="22.10.2019 15:49" data-hidden="false" data-shareable="false" data-owner-profilePictureHash="null" data-owner-tutorial="1" data-owner-zoneId="Europe/Berlin" data-owner-id="30003" data-owner-firstname="max" data-owner-lastname="muster" data-owner-email="max.muster@posteo.de" data-numberOfBlocks="4" data-lastEditedTS="28.01.2020 10:12" data-adminUserIds="[]" data-adminOrOwner="true" class="treeline is_item ui-draggable" href="./projects/My private projects_0/118217_Example%20project/index.html">
<span class="updateTS">22.10.2019 15:49</span>
<span class="project_s-img"></span>
<span class="name">Example project</span>
<span class="details">
<span class="box-owner">
<label>Owner:</label>
max
muster
</span>
<span class="box-last-update">
<label>Last update:</label>
28.01.2020 10:12
</span>
</span>
</a>
<div class="treeline_children"style="overflow: hidden; display: none;"></div>
<a id="treeline_eln_projects" data-id="118224" data-parentId="0" data-userId="30003" data-groupId="0" data-name="subproj 1" data-folder="false" data-template="false" data-createTS="22.10.2019 16:49" data-hidden="false" data-shareable="false" data-owner-profilePictureHash="null" data-owner-tutorial="1" data-owner-zoneId="Europe/Berlin" data-owner-id="30003" data-owner-firstname="max" data-owner-lastname="muster" data-owner-email="max.muster@posteo.de" data-numberOfBlocks="0" data-lastEditedTS="22.10.2019 16:49" data-adminUserIds="[]" data-adminOrOwner="true" class="treeline is_item ui-draggable" href="./projects/My private projects_0/118224_subproj%201/index.html">
<span class="updateTS">22.10.2019 16:49</span>
<span class="project_s-img"></span>
<span class="name">subproj 1</span>
<span class="details">
<span class="box-owner">
<label>Owner:</label>
max
muster
</span>
<span class="box-last-update">
<label>Last update:</label>
22.10.2019 16:49
</span>
</span>
</a>
<div class="treeline_children"style="overflow: hidden; display: none;"></div>
</div>
</div>
</div>
</div>
</body>
</html>
This diff is collapsed.
.data-elements {
display: block;
font-family: 'Open Sans', Helvetica, Arial, sans-serif;
}
.notebook-element-content {
height: calc(100% - 10px);
background: #FFF;
padding: 1.25em 0 0.75em;
}
.notebook-element-content .data-element {
display: block;
-webkit-touch-callout: none;
/* iOS Safari */
-webkit-user-select: none;
/* Chrome/Safari/Opera */
-khtml-user-select: none;
/* Konqueror */
-moz-user-select: none;
/* Firefox */
-ms-user-select: none;
/* Internet Explorer/Edge */
user-select: none;
/* Non-prefixed version, currently not supported by any browser */
}
/* Match element or class (class is used in compose mode) */
.data-element-wrap {
margin-bottom: 0.75em;
margin-right: 1em;
display: flex;
align-items: baseline;
position: relative;
}
.data-element-wrap .data-element-icon {
width: 12px;
height: 12px;
margin: 0.65em;
margin-right: 0.75em;
margin-left: 1.15em;
fill: #9D9D9D;
flex-shrink: 0;
}
.data-element-wrap .data-element-display {
border: solid 1px #c0c0c0;
border-radius: 5px;
padding: 0.5em 0.75em;
overflow-wrap: break-word;
word-wrap: break-word;
-ms-word-break: break-all;
word-break: break-all;
word-break: break-word;
}
.data-element-wrap .data-element-display .empty-value {
color: #9D9D9D;
font-size: 1.75em;
line-height: 0.25em;
}
.data-element-wrap .data-element-display .element-title {
font-weight: bold;
}
.data-group-wrap .data-group-icon {
align-self: flex-start;
}
.data-group-wrap .data-group-content.display-mode {
/**
* Fix for nested flexbox sizing in IE11. See:
* https://github.com/philipwalton/flexbugs/issues/170
* https://github.com/philipwalton/flexbugs/issues/71
*/
min-width: 0%;
border: solid 1px #c0c0c0;
border-radius: 5px;
padding: 0.5em 0.75em 0;
}
.data-group-wrap .data-group-content.display-mode .data-element-display {
border: none;
padding: 0;
}
.data-group-wrap .data-group-content .data-group-header .element-title {
font-weight: bold;
}
.data-group-wrap .data-group-content .data-element-icon {
margin: 0;
margin-right: 0.75em;
}
.data-group-wrap .data-group-content .data-group-icon {
margin-top: 0.75em;
}
.descriptive-element-wrap {
align-items: baseline;
}
.descriptive-element-wrap .descriptive-element-display .element-title {
font-weight: bold;
}
.material-element-wrap {
align-items: flex-start;
}
.material-element-wrap .material-element-display .element-title {
color: #6cc0ec;
font-weight: bold;
word-wrap: break-word;
}
.material-element-wrap .material-element-display .element-title:hover {
color: #96dbff;
}
This diff is collapsed.
.entry_footer {
background: #cad4de;
margin-left: 30px;
margin-right: 45px;
padding-top: 2px;
padding-bottom: 2px;
min-width: 696px;
height: auto;
font-size: 0.6em;
}
.entry_footer > span {
color: #748dad;
font-weight: 900;
padding: 8px 8px 6px 8px;
}
.entry_footer_line {
margin-top: 2px;
height: auto;
display: flex;
flex-direction: column;
position: relative;
padding: 5px 5px 5px 22px;
background: #f2f5f7;
border-left: solid 1px #aabbca;
border-right: solid 1px #aabbca;
}
.entry_footer_signature > img {
position: absolute;
right: 10px;
margin-top: -2px;
height: 30px;
}
.width_80_percent {
width: 80%;
}
.min_height_35 {
min-height: 35px;
}
.table-el-container, .well-plate-el-container {
padding: 100px 10px;
min-height: calc(100% - 10px) !important;
height: 243px;
background: white;
text-align: center;
}
.table-el-info, .well-plate-el-info {
color: #bababa;
}
.table-el-download, .well-plate-el-download {
margin-top: 8px;
}
.table-el-download > a, .well-plate-el-download > a {
color: #6cc0ec;
}
.table-el-icon, .well-plate-el-icon {
width: 16px;
height: 16px;
vertical-align: middle;
margin-right: 0.2em;
fill: #6cc0ec;;
flex-shrink: 0;
}
.table-el-filename, .well-plate-el-filename {
display: inline-block;
vertical-align: middle;
font-size: 16px;
word-break: break-all
}
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment