Skip to content
Snippets Groups Projects
Commit 32f2070b authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

ENH: basic conversion labfolder project to records

parent 9fce241f
No related branches found
No related tags found
No related merge requests found
......@@ -19,7 +19,7 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
""" Imports labfolder export """
""" Imports labfolder exports """
import argparse
import os
......@@ -38,101 +38,130 @@ from bs4 import BeautifulSoup
import caosdb as db
#crawler = Crawler()
def handle_starttag(self, tag, attrs):
att_dict = {}
for attr in attrs:
if attr[0] in att_dict:
raise Exception("multiple")
att_dict[attr[0]] = attr[1]
def create_project(project):
dbproject = db.Record(name=project.attrs['data-name'])
dbproject.add_parent(name="Project")
dbproject.add_property(name="projectId", value=project.attrs['data-id'])
# crawler.cached_find_identifiables([dbproject])
if "data-id" in att_dict:
if self.has_class(att_dict, "eln_project_content"):
print("Project:", att_dict["data-id"])
else:
print("Entry:", att_dict["data-id"])
rec = db.Record()
self.records.append(rec)
return dbproject
if self.has_class(att_dict, "dd_entry_cell_content"):
self.records[-1].add_property("text",
att_dict["dd_entry_cell_content"])
def get_author_from_entry(entry):
person = db.Record()
person.add_parent(name="Person")
resp = entry.find_all(attrs={'class': 'author_name'})
def has_class(ele, name):
if "class" not in ele.attrib:
return False
for name in ["firstname", "lastname"]:
person.add_property(
name=name,
value=resp[0].find_all(attrs={'class': 'author_'+name})[0].getText())
# crawler.cached_find_identifiables([person])
return name in ele.attrib["class"].split(" ")
return person
def main(args):
"""The main function."""
def val_or_none(stuff):
if len(stuff) == 0:
return None
else:
return stuff[0].getText()
if not os.path.exists(args.file):
raise ValueError("File does not exist")
with open(args.file) as inpu:
text = inpu.read()
def add_property_from_data_element(dbrecord, element):
unit = val_or_none(element.find_all(attrs={'class': 'element-unit'}))
title = val_or_none(element.find_all(attrs={'class': 'element-title'}))
quant = val_or_none(element.find_all(attrs={'class': 'element-quantity'}))
val = val_or_none(element.find_all(attrs={'class': 'element-value'}))
tree = BeautifulSoup(text, features="lxml")
project = tree.find_all(id="eln_project_content")[0]
if quant is not None:
title = title+"-"+quant
dbrecord.add_property(name=title, value=val, unit=unit)
def create_entry(entry, dbproject):
dbrecord = db.Record()
dbrecord.add_parent(name="LabbookEntry")
dbrecord.add_property(name="Project", value=dbproject)
dbrecord.add_property(name="entryId", value=entry.attrs['data-id'])
# crawler.cached_find_identifiables([dbrecord])
person = get_author_from_entry(entry)
dbrecord.add_property(name="responsible", value=person)
for block in entry.find_all(attrs={'class': 'dd_entry_cell'}):
# If all text field would have the class dd_text_entry the
# following would be sufficient:
# if 'dd_text_entry' in block['class']:
# instead we check whether an editor field exists.
editor = block.find_all(attrs={'class': 'redactor_editor'})
for entry in project.find_all(attrs={'class': 'epb_content_container'}):
for block in entry.find_all(attrs={'class': 'dd_entry_cell'}):
# If all text field would have the class dd_text_entry the
# following would be sufficient:
# if 'dd_text_entry' in block['class']:
# instead we check whether an editor field exists.
editor = block.find_all(attrs={'class': 'redactor_editor'})
if len(editor) > 0:
dbrecord.add_property(name="textElement", value=editor[0].getText())
if len(editor) > 0:
print("\n\n## is text ##")
print(editor[0].getText())
continue
continue
download = block.find_all(
attrs={'class': 'dd_entry_cell_file_download'})
download = block.find_all(
attrs={'class': 'dd_entry_cell_file_download'})
if len(download) > 0:
local_path = (download[0].parent).attrs['data-filename']
f = db.File(path=local_path,
file=local_path)
dbrecord.add_property(name="accompaningFile", value=f)
if len(download) > 0:
print("\n\nreferences file:\n",
(download[0].parent).attrs['data-filename'])
continue
continue
elements = block.find_all(
attrs={'class': 'data-element-display'})
elements = block.find_all(
attrs={'class': 'data-element-display'})
if len(elements) > 0:
for el in elements:
add_property_from_data_element(dbrecord=dbrecord, element=el)
if len(elements) > 0:
print("\n\nhas data elements:")
continue
for el in elements:
print(el.getText())
tables = block.find_all(
attrs={'class': 'table-el-container'})
continue
if len(tables) > 0:
local_path = (tables[0]).find_all(
attrs={'class': 'table-el-filename'}
)[0].getText().strip()
f = db.File(path=local_path, file=local_path)
dbrecord.add_property(name="table", value=f)
tables = block.find_all(
attrs={'class': 'table-el-container'})
continue
if len(tables) > 0:
print("\n\ntable:\n",
(tables[0]).find_all(
attrs={'class': 'table-el-filename'}
)[0].getText().strip())
empty = block.find_all(
attrs={'class': 'dd_entry_empty_element'})
continue
if len(tables) > 0:
print("\n\nempty")
empty = block.find_all(
attrs={'class': 'dd_entry_empty_element'})
continue
if len(tables) > 0:
print("\n\nempty")
print(dbrecord)
continue
print(block.attrs)
def main(args):
"""The main function."""
if not os.path.exists(args.file):
raise ValueError("File does not exist")
with open(args.file) as inpu:
text = inpu.read()
tree = BeautifulSoup(text, features="lxml")
project = tree.find_all(id="eln_project_content")[0]
dbproject = create_project(project)
for entry in project.find_all(lambda x: x.has_attr('data-id')):
create_entry(entry, dbproject)
# import IPython
# IPython.embed()
......@@ -140,7 +169,6 @@ def main(args):
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("file", default="projects/My private projects_0/"
"118217_Example project/index.html", nargs="?")
parser.add_argument("file", default="./labfolder_example.html", nargs="?")
args = parser.parse_args()
sys.exit(main(args))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment