Skip to content
Snippets Groups Projects
Verified Commit 84b6a505 authored by Daniel Hornung's avatar Daniel Hornung
Browse files

Resurrect labfolder converter, for when someone wants to work on it.

Revert "!MAINT: Removed labfolder converter."

This reverts commit fcbffa46.
parent a4b21487
No related tags found
No related merge requests found
Pipeline #58228 passed with warnings
Showing
with 12577 additions and 1 deletion
#!/usr/bin/env python3
#
# This file is a part of the LinkAhead project.
#
# Copyright (c) 2020 IndiScale GmbH
# Copyright (c) 2020 Daniel Hornung <d.hornung@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
""" Imports labfolder exports """
import argparse
import sys
import caosmodels
from caosmodels.parser import parse_model_from_yaml
from caosadvancedtools.converter import labfolder_export as labfolder
def main(args):
"""The main function."""
model = parse_model_from_yaml("./models/model.yml")
model.sync_data_model()
labfolder.import_data(args.folder)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("folder", default="./example_labfolder_data",
nargs="?", help='folder that contains the data')
args = parser.parse_args()
sys.exit(main(args))
#!/usr/bin/env python3
#
# This file is a part of the LinkAhead project.
#
# Copyright (c) 2020 IndiScale GmbH
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
""" Retrieve Labfolder data from API """
import argparse
import sys
import caosmodels
from caosmodels.parser import parse_model_from_yaml
from caosadvancedtools.converter.labfolder_api import Importer
def main(args):
"""The main function."""
model = parse_model_from_yaml("./models/model.yml")
# model.sync_data_model()
importer = Importer()
importer.import_data()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("folder", default="./example_labfolder_data",
nargs="?", help='folder that contains the data')
args = parser.parse_args()
sys.exit(main(args))
...@@ -6,7 +6,7 @@ good-names=ii,rt,df ...@@ -6,7 +6,7 @@ good-names=ii,rt,df
# List of module names for which member attributes should not be checked # List of module names for which member attributes should not be checked
# (useful for modules/projects where namespaces are manipulated during runtime # (useful for modules/projects where namespaces are manipulated during runtime
# and thus existing member attributes cannot be deduced by static analysis # and thus existing member attributes cannot be deduced by static analysis
ignored-modules=etree,h5py ignored-modules=etree,h5py,labfolder
[MASTER] [MASTER]
# TODO: The max_inferred size is necessary for https://github.com/PyCQA/pylint/issues/4577, # TODO: The max_inferred size is necessary for https://github.com/PyCQA/pylint/issues/4577,
......
#!/usr/bin/env python3
#
# This file is a part of the LinkAhead project.
#
# Copyright (c) 2020 IndiScale GmbH
# Copyright (c) 2020 Daniel Hornung <d.hornung@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
""" Imports data from labfolder via api """
import json
import os
import time
import html2text
import linkahead as db
from labfolder.connection import configure_connection # pylint: disable=import-error
class Importer(object):
def __init__(self):
self.connection = configure_connection()
self.projects = self.connection.retrieve_projects()
self.entries = self.connection.retrieve_entries()
def create_project(self, project):
dbproject = db.Record(name=project['title'])
dbproject.add_parent(name="Project")
dbproject.add_property(name="projectId", value=project['id'])
# crawler.cached_find_identifiables([dbproject])
return dbproject
def get_entries(self, project_id):
return [ent for ent in self.entries if ent["project_id"] == project_id]
def treat_project(self, project):
cont = db.Container()
dbproject = self.create_project(project)
cont.append(dbproject)
for entry in self.get_entries(project["id"]):
recs = self.create_entry(entry, dbproject)
cont.extend(recs)
print(cont)
cont.insert(unique=False)
# import IPython
# IPython.embed()
def import_data(self):
for project in self.projects:
self.treat_project(project)
def add_property_from_data_element(self, dbrecord, element):
if element['type'] == "DATA_ELEMENT_GROUP":
for c in element["children"]:
self.add_property_from_data_element(dbrecord, c)
elif element['type'] == "SINGLE_DATA_ELEMENT":
# if quant is not None:
# quant = quant.strip(": ")
# title = title+" - "+quant
res = db.execute_query("FIND PROPERTY '{}'".format(element['title']))
if len(res) == 0:
p = db.Property(name=element['title'], unit=element['unit'], datatype=db.DOUBLE)
try:
p.insert()
except db.exceptions.TransactionError as e:
print(e)
return
val = element['value']
try:
val = float(val)
except (ValueError, TypeError):
print("Value is no float!!!", val)
return
dbrecord.add_property(name=element['title'], value=val, unit=element['unit'])
elif element['type'] == "DESCRIPTIVE_DATA_ELEMENT":
res = db.execute_query("FIND PROPERTY '{}'".format(element['title']))
if len(res) == 0:
p = db.Property(name=element['title'], datatype=db.TEXT)
p.insert()
dbrecord.add_property(name=element['title'],
value=element['description'])
def create_element(self, element_id, el_type, dbrecord):
print(element_id, el_type)
if el_type == "IMAGE":
el_type = "FILE"
elif el_type == "DATA_ELEMENT":
el_type = "DATA"
try:
element = self.connection.retrieve_element(element_id, el_type=el_type)
except BaseException:
print("Could not retrieve: ", element_id)
return
if el_type == "TEXT":
dbrecord.add_property(
name="textElement",
value=html2text.html2text(element["content"]))
elif el_type == "FILE":
local_file = self.connection.download_file(element_id)
f = db.File(name=element["file_name"],
path=os.path.join("labfolder", str(time.time()),
element["file_name"]),
file=local_file)
f.insert(unique=False)
dbrecord.add_property(name="associatedFile", value=f)
elif el_type == "DATA":
for subel in element["data_elements"]:
self.add_property_from_data_element(dbrecord=dbrecord,
element=subel)
elif el_type == "TABLE":
print(element)
def create_entry(self, entry, dbproject):
cont = db.Container()
dbrecord = db.Record(name=entry["title"])
dbrecord.add_parent(name="LabbookEntry")
dbrecord.add_property(name="Project", value=dbproject)
dbrecord.add_property(name="entryId", value=entry['id'])
# crawler.cached_find_identifiables([dbrecord])
# TODO resolve id
# person = get_author_from_entry(entry)
# dbrecord.add_property(name="responsible", value=person)
for element in entry["elements"]:
print(json.dumps(element, sort_keys=True, indent=4))
self.create_element(element["id"], element["type"], dbrecord)
# If all text field would have the class dd_text_entry the
# following would be sufficient:
# if 'dd_text_entry' in block['class']:
# instead we check whether an editor field exists.
cont.extend([dbrecord])
return cont
#!/usr/bin/env python3
#
# This file is a part of the LinkAhead project.
#
# Copyright (c) 2020 IndiScale GmbH
# Copyright (c) 2020 Daniel Hornung <d.hornung@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
""" Imports labfolder exports """
import os
from bs4 import BeautifulSoup
import linkahead as db
RERUN = False
# crawler = Crawler()
print("""
WARNING: This is an ALPHA version. Parsing of the by labfolder exported data
might not work correctly! There might be missing elements! Check the result
carefully before inserting it.
""")
def create_project(project):
dbproject = db.Record(name=project.attrs['data-name'])
dbproject.add_parent(name="Project")
dbproject.add_property(name="projectId", value=project.attrs['data-id'])
# crawler.cached_find_identifiables([dbproject])
return dbproject
def get_author_from_entry(entry):
person = db.Record()
person.add_parent(name="Person")
resp = entry.find_all(attrs={'class': 'author_name'})
for name in ["firstname", "lastname"]:
person.add_property(
name=name,
value=resp[0].find_all(attrs={'class': 'author_'+name})[0].getText())
# crawler.cached_find_identifiables([person])
return person
def val_or_none(stuff):
if len(stuff) == 0:
return None
return stuff[0].getText()
def add_property_from_data_element(dbrecord, element):
unit = val_or_none(element.find_all(attrs={'class': 'element-unit'}))
title = val_or_none(element.find_all(attrs={'class': 'element-title'}))
quant = val_or_none(element.find_all(attrs={'class': 'element-quantity'}))
val = val_or_none(element.find_all(attrs={'class': 'element-value'}))
print("tit", title)
print("qu", quant)
if quant is not None:
quant = quant.strip(": ")
title = title+" - "+quant
res = db.execute_query("FIND PROPERTY '{}'".format(title))
if len(res) == 0:
p = db.Property(name=title, unit=unit, datatype=db.DOUBLE)
p.insert()
try:
val = float(val)
except TypeError:
print("Value is no float!!!", val)
return
dbrecord.add_property(name=title, value=val, unit=unit)
def create_file(name, filename, root):
local_path = os.path.join(root, filename)
local_path = os.path.normpath(local_path)
if not os.path.exists(local_path):
raise ValueError("FILE DOES NOT EXIST: ", local_path)
f = db.File(path=local_path, file=local_path, name=name)
return f
def create_entry(entry, dbproject, root):
cont = db.Container()
dbrecord = db.Record()
dbrecord.add_parent(name="LabbookEntry")
dbrecord.add_property(name="Project", value=dbproject)
dbrecord.add_property(name="entryId", value=entry.attrs['data-id'])
# crawler.cached_find_identifiables([dbrecord])
person = get_author_from_entry(entry)
dbrecord.add_property(name="responsible", value=person)
for block in entry.find_all(attrs={'class': 'dd_entry_cell'}):
# If all text field would have the class dd_text_entry the
# following would be sufficient:
# if 'dd_text_entry' in block['class']:
# instead we check whether an editor field exists.
editor = block.find_all(attrs={'class': 'redactor_editor'})
if len(editor) > 0:
dbrecord.add_property(name="textElement", value=editor[0].getText())
continue
download = block.find_all(
attrs={'class': 'dd_entry_cell_file_download'})
if len(download) > 0:
name = ((download[0].parent).attrs['data-filename']).strip('"')
if name == "blank.png":
continue
if len(download[0].find_all("img")) > 0:
filename = download[0].find_all("img")[0].attrs['src']
elif len(download[0].find_all("a")) > 0:
filename = download[0].find_all("a")[0].attrs['href']
else:
raise ValueError("could not get filename")
print(name)
print(filename)
f = create_file(name, filename, root)
if RERUN:
f.retrieve()
else:
f.insert()
dbrecord.add_property(name="associatedFile", value=f)
cont.append(f)
continue
elements = block.find_all(
attrs={'class': 'data-element-display'})
if len(elements) > 0:
for el in elements:
add_property_from_data_element(dbrecord=dbrecord, element=el)
continue
tables = block.find_all(
attrs={'class': 'table-el-container'})
if len(tables) > 0:
name = (tables[0]).find_all(
attrs={'class': 'table-el-filename'}
)[0].getText().strip()
f = create_file(name, name, root)
if RERUN:
f.retrieve()
else:
f.insert()
dbrecord.add_property(name="table", value=f)
cont.append(f)
continue
empty = block.find_all(
attrs={'class': 'dd_entry_empty_element'})
if len(tables) > 0:
print("\n\nempty")
continue
cont.extend([dbrecord, person])
return cont
def treat_project(path):
with open(os.path.join(path, "index.html")) as fp:
tree = BeautifulSoup(fp, features="lxml")
cont = db.Container()
project = tree.find_all(id="eln_project_content")
if len(project) == 0:
return
else:
project = project[0]
dbproject = create_project(project)
cont.append(dbproject)
for entry in project.find_all(lambda x: x.has_attr('data-id')):
recs = create_entry(entry, dbproject, path)
cont.extend(recs)
print(cont)
cont.insert()
# import IPython
# IPython.embed()
def import_data(folder):
"""imports the data of a labfolder export"""
if not os.path.exists(folder):
raise ValueError("folder does not exist")
projects_folder = os.path.join(folder, "projects")
if not os.path.exists(projects_folder):
raise ValueError("folder does not contain a projects folder")
for root, dirs, files in os.walk(projects_folder):
print(root, dirs, files)
if "index.html" in files:
treat_project(root)
...@@ -201,4 +201,5 @@ autodoc_default_options = { ...@@ -201,4 +201,5 @@ autodoc_default_options = {
'undoc-members': None, 'undoc-members': None,
} }
autodoc_mock_imports = [ autodoc_mock_imports = [
"labfolder",
] ]
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-status-bar-style"
content="black-translucent">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Projects</title>
<link rel="shortcut icon" type="image/x-icon"
href="static/img/favicon2.ico"/>
<script
src="static/js/jquery-1.8.2.min.js"
type="text/javascript"></script>
<script src="static/js/tree.js"
type="text/javascript"></script>
<!-- This must be the first labfolder JS file included -->
<script
src="static/js/labfolder-global.js?13fcc6eeb30608bb104f4b234c2fa3fd86699ffe"
type="text/javascript"></script>
<link rel="stylesheet" type="text/css"
href="static/css/eln_layout.css"/>
<link rel="stylesheet" type="text/css"
href="static/css/pixel_icon.css"/>
<link rel="stylesheet" type="text/css"
href="static/css/tree.css"/>
<link rel="stylesheet" type="text/css"
href="static/css/notebook.css"/>
</head>
<body>
<div class="body_bg"></div>
<div class="eln_header eln_row">
<div class="headerbar_top">
<a href="/eln/"><span class="logo-img"></span></a>
<header>
<span aria-hidden="true" class="manage_s-img"></span> Projects
</header>
<nav>
<div class="nav_top">
<ul>
<li><a href="templates.html">
<button class="header_btn ">
<span class="desk-img"></span>
<p>Templates</p>
</button>
</a></li>
</ul>
</div>
</nav>
</div>
</div>
<div class="action_bar clearfix"></div>
<div id="data_element" data-viewname="WORKSPACE_INDEX"></div>
<div id="eln_main_content"
class="eln_main_content eln_row eln_scroll-y">
<div class="eln_main_content_box projects-list">
<div class="headers">
<div class="owner">Owner</div>
<div class="update">Last Modified</div>
<div class="created">Created</div>
</div>
<div class="tree_my_eln_projects tree_top_level" data-treeid="eln_projects">
<a id="treeline_eln_projects_0_0"
data-groupid="0"
data-objectid="0"
data-ownerid="{{ownerId}}"
class="treeline is_folder ui-droppable is_closed_folder">
<span class="updateTS"></span>
<span class="folder_up-img"></span>
<span class="name">My private projects</span>
<span class="details">
<span class="box-owner">
<label>Owner:</label>
</span>
<span class="box-last-update">
<label>Last update:</label>
</span>
</span>
</a>
<div class="treeline_children"style="overflow: hidden; display: none;"><a id="treeline_eln_projects" data-id="118217" data-parentId="0" data-userId="30003" data-groupId="0" data-name="Example project" data-folder="false" data-template="false" data-createTS="22.10.2019 15:49" data-hidden="false" data-shareable="false" data-owner-profilePictureHash="null" data-owner-tutorial="1" data-owner-zoneId="Europe/Berlin" data-owner-id="30003" data-owner-firstname="max" data-owner-lastname="muster" data-owner-email="max.muster@posteo.de" data-numberOfBlocks="4" data-lastEditedTS="28.01.2020 10:12" data-adminUserIds="[]" data-adminOrOwner="true" class="treeline is_item ui-draggable" href="./projects/My private projects_0/118217_Example%20project/index.html">
<span class="updateTS">22.10.2019 15:49</span>
<span class="project_s-img"></span>
<span class="name">Example project</span>
<span class="details">
<span class="box-owner">
<label>Owner:</label>
max
muster
</span>
<span class="box-last-update">
<label>Last update:</label>
28.01.2020 10:12
</span>
</span>
</a>
<div class="treeline_children"style="overflow: hidden; display: none;"></div>
<a id="treeline_eln_projects" data-id="118224" data-parentId="0" data-userId="30003" data-groupId="0" data-name="subproj 1" data-folder="false" data-template="false" data-createTS="22.10.2019 16:49" data-hidden="false" data-shareable="false" data-owner-profilePictureHash="null" data-owner-tutorial="1" data-owner-zoneId="Europe/Berlin" data-owner-id="30003" data-owner-firstname="max" data-owner-lastname="muster" data-owner-email="max.muster@posteo.de" data-numberOfBlocks="0" data-lastEditedTS="22.10.2019 16:49" data-adminUserIds="[]" data-adminOrOwner="true" class="treeline is_item ui-draggable" href="./projects/My private projects_0/118224_subproj%201/index.html">
<span class="updateTS">22.10.2019 16:49</span>
<span class="project_s-img"></span>
<span class="name">subproj 1</span>
<span class="details">
<span class="box-owner">
<label>Owner:</label>
max
muster
</span>
<span class="box-last-update">
<label>Last update:</label>
22.10.2019 16:49
</span>
</span>
</a>
<div class="treeline_children"style="overflow: hidden; display: none;"></div>
</div>
</div>
</div>
</div>
</body>
</html>
This diff is collapsed.
.data-elements {
display: block;
font-family: 'Open Sans', Helvetica, Arial, sans-serif;
}
.notebook-element-content {
height: calc(100% - 10px);
background: #FFF;
padding: 1.25em 0 0.75em;
}
.notebook-element-content .data-element {
display: block;
-webkit-touch-callout: none;
/* iOS Safari */
-webkit-user-select: none;
/* Chrome/Safari/Opera */
-khtml-user-select: none;
/* Konqueror */
-moz-user-select: none;
/* Firefox */
-ms-user-select: none;
/* Internet Explorer/Edge */
user-select: none;
/* Non-prefixed version, currently not supported by any browser */
}
/* Match element or class (class is used in compose mode) */
.data-element-wrap {
margin-bottom: 0.75em;
margin-right: 1em;
display: flex;
align-items: baseline;
position: relative;
}
.data-element-wrap .data-element-icon {
width: 12px;
height: 12px;
margin: 0.65em;
margin-right: 0.75em;
margin-left: 1.15em;
fill: #9D9D9D;
flex-shrink: 0;
}
.data-element-wrap .data-element-display {
border: solid 1px #c0c0c0;
border-radius: 5px;
padding: 0.5em 0.75em;
overflow-wrap: break-word;
word-wrap: break-word;
-ms-word-break: break-all;
word-break: break-all;
word-break: break-word;
}
.data-element-wrap .data-element-display .empty-value {
color: #9D9D9D;
font-size: 1.75em;
line-height: 0.25em;
}
.data-element-wrap .data-element-display .element-title {
font-weight: bold;
}
.data-group-wrap .data-group-icon {
align-self: flex-start;
}
.data-group-wrap .data-group-content.display-mode {
/**
* Fix for nested flexbox sizing in IE11. See:
* https://github.com/philipwalton/flexbugs/issues/170
* https://github.com/philipwalton/flexbugs/issues/71
*/
min-width: 0%;
border: solid 1px #c0c0c0;
border-radius: 5px;
padding: 0.5em 0.75em 0;
}
.data-group-wrap .data-group-content.display-mode .data-element-display {
border: none;
padding: 0;
}
.data-group-wrap .data-group-content .data-group-header .element-title {
font-weight: bold;
}
.data-group-wrap .data-group-content .data-element-icon {
margin: 0;
margin-right: 0.75em;
}
.data-group-wrap .data-group-content .data-group-icon {
margin-top: 0.75em;
}
.descriptive-element-wrap {
align-items: baseline;
}
.descriptive-element-wrap .descriptive-element-display .element-title {
font-weight: bold;
}
.material-element-wrap {
align-items: flex-start;
}
.material-element-wrap .material-element-display .element-title {
color: #6cc0ec;
font-weight: bold;
word-wrap: break-word;
}
.material-element-wrap .material-element-display .element-title:hover {
color: #96dbff;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment