Select Git revision
import_from_xml.py
-
Henrik tom Wörden authoredHenrik tom Wörden authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
import_from_xml.py 3.80 KiB
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# ** header v3.0
# This file is a part of the CaosDB Project.
#
# Copyright (C) 2020 IndiScale GmbH, Henrik tom Wörden
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# ** end header
#
"""
This file allows to import a dataset stored in a xml representation and
corresponding files.
The export should have been done with export_related.py
"""
import argparse
import os
from tempfile import NamedTemporaryFile
import caosdb as db
from caosmodels.data_model import DataModel
def create_dummy_file(text="Please ask the administrator for this file."):
tmpfile = NamedTemporaryFile(delete=False)
tmpfile.close()
with open(tmpfile.name, "w") as tm:
tm.write(text)
return tmpfile.name
def main(filename, rerun=False):
cont = db.Container()
with open(filename) as fi:
cont = cont.from_xml(fi.read())
tmpfile = create_dummy_file()
model = []
files = []
# add files to files list and properties and record types to model
for el in cont:
if isinstance(el, db.File):
target = os.path.join("downloads", el.path[1:])
if os.path.exists(target):
el.file = target
else:
el.file = tmpfile
files.append(el)
if (isinstance(el, db.Property) or isinstance(el, db.RecordType)):
model.append(el)
# create new file objects and replace the existing one
# TODO why is this necessary?
new_files = []
print(files)
if not rerun:
for fi in files:
new = db.File(file=fi.file, path=fi.path, name=fi.name, id=fi.id,
description=fi.description)
for p in fi.parents:
new.add_parent(p)
new_files.append(new)
else:
for fi in files:
new = db.File(path=fi.path, id=fi.id)
new_files.append(new)
# remove entities of the model from the container
for el in model+files:
cont.remove(el)
files = new_files
id_mapping = {}
for el in model+files:
id_mapping[el.id] = el
# insert/update the model
datamodel = DataModel()
datamodel.extend(model)
datamodel.sync_data_model()
# insert files
if not rerun:
for i, el in enumerate(files):
print(el)
r = el.insert(unique=False)
print(r)
else:
for i, el in enumerate(files):
el.id = None
el.retrieve()
def replace_by_new(old):
if old in id_mapping:
return id_mapping[old].id
else:
return old
# set the ids of already inserted entities in the container
for el in cont:
el.apply_to_ids(replace_by_new)
cont.insert(unique=False)
def defineParser():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("file", help='file to be imported')
parser.add_argument("--rerun", help='if this script is run at least a'
' second time and files are already inserted',
action="store_true")
return parser
if __name__ == "__main__":
parser = defineParser()
args = parser.parse_args()
main(args.file, args.rerun)