Skip to content
Snippets Groups Projects
Select Git revision
  • e6383e2e061fd51ca74366f1cf1762448a7bab91
  • main default protected
  • f-xlsx-list-file
  • f-yaml-parser-enums
  • dev protected
  • f-fix-paths
  • f-fix-validate-to-dict
  • f-labfolder-converter
  • f-state-machine-script
  • f-xlsx-converter-warnings-errors
  • f-rename
  • f-extra-deps
  • f-more-jsonschema-export
  • f-henrik
  • f-fix-89
  • f-trigger-advanced-user-tools
  • f-real-rename-test
  • f-linkahead-rename
  • f-register-integrationtests
  • f-fix-id
  • f-h5-files
  • v0.14.0
  • v0.13.0
  • v0.12.0
  • v0.11.0
  • v0.10.0-numpy2
  • v0.10.0
  • v0.9.0
  • v0.8.0
  • v0.7.0
  • v0.6.1
  • v0.6.0
  • v0.5.0
  • v0.4.1
  • v0.4.0
  • v0.3.1
  • v0.3.0
37 results

import_from_xml.py

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    import_from_xml.py 3.80 KiB
    #!/usr/bin/env python3
    # -*- coding: utf-8 -*-
    #
    # ** header v3.0
    # This file is a part of the CaosDB Project.
    #
    # Copyright (C) 2020 IndiScale GmbH, Henrik tom Wörden
    #
    # This program is free software: you can redistribute it and/or modify
    # it under the terms of the GNU Affero General Public License as
    # published by the Free Software Foundation, either version 3 of the
    # License, or (at your option) any later version.
    #
    # This program is distributed in the hope that it will be useful,
    # but WITHOUT ANY WARRANTY; without even the implied warranty of
    # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    # GNU Affero General Public License for more details.
    #
    # You should have received a copy of the GNU Affero General Public License
    # along with this program. If not, see <https://www.gnu.org/licenses/>.
    #
    # ** end header
    #
    """
    This file allows to import a dataset stored in a xml representation and
    corresponding files.
    
    The export should have been done with export_related.py
    """
    import argparse
    import os
    from tempfile import NamedTemporaryFile
    
    import caosdb as db
    from caosmodels.data_model import DataModel
    
    
    def create_dummy_file(text="Please ask the administrator for this file."):
        tmpfile = NamedTemporaryFile(delete=False)
        tmpfile.close()
        with open(tmpfile.name, "w") as tm:
            tm.write(text)
    
        return tmpfile.name
    
    
    def main(filename, rerun=False):
        cont = db.Container()
        with open(filename) as fi:
            cont = cont.from_xml(fi.read())
    
        tmpfile = create_dummy_file()
        model = []
    
        files = []
    
        # add files to files list and properties and record types to model
    
        for el in cont:
            if isinstance(el, db.File):
                target = os.path.join("downloads", el.path[1:])
    
                if os.path.exists(target):
                    el.file = target
                else:
                    el.file = tmpfile
                files.append(el)
    
            if (isinstance(el, db.Property) or isinstance(el, db.RecordType)):
                model.append(el)
    
        # create new file objects and replace the existing one
        # TODO why is this necessary?
    
        new_files = []
        print(files)
    
        if not rerun:
            for fi in files:
                new = db.File(file=fi.file, path=fi.path, name=fi.name, id=fi.id,
                              description=fi.description)
    
                for p in fi.parents:
                    new.add_parent(p)
                new_files.append(new)
        else:
            for fi in files:
                new = db.File(path=fi.path, id=fi.id)
                new_files.append(new)
    
        # remove entities of the model from the container
    
        for el in model+files:
            cont.remove(el)
    
        files = new_files
    
        id_mapping = {}
    
        for el in model+files:
            id_mapping[el.id] = el
    
        # insert/update the model
        datamodel = DataModel()
        datamodel.extend(model)
        datamodel.sync_data_model()
    
        # insert files
    
        if not rerun:
            for i, el in enumerate(files):
                print(el)
                r = el.insert(unique=False)
                print(r)
        else:
            for i, el in enumerate(files):
                el.id = None
                el.retrieve()
    
        def replace_by_new(old):
            if old in id_mapping:
                return id_mapping[old].id
            else:
                return old
    
        # set the ids of already inserted entities in the container
    
        for el in cont:
            el.apply_to_ids(replace_by_new)
    
        cont.insert(unique=False)
    
    
    def defineParser():
        parser = argparse.ArgumentParser(description=__doc__)
        parser.add_argument("file", help='file to be imported')
        parser.add_argument("--rerun", help='if this script is run at least a'
                            ' second time and files are already inserted', 
                            action="store_true")
    
        return parser
    
    
    if __name__ == "__main__":
        parser = defineParser()
        args = parser.parse_args()
    
        main(args.file, args.rerun)