From f411d63ba8165092918d41b735bdee6fbaad110e Mon Sep 17 00:00:00 2001 From: Alexander Schlemmer <a.schlemmer@indiscale.com> Date: Fri, 15 Nov 2024 11:27:07 +0100 Subject: [PATCH] ENH(converters): implemented zipfile converter --- src/caoscrawler/converters/__init__.py | 1 + src/caoscrawler/converters/zipfile.py | 81 ++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 src/caoscrawler/converters/zipfile.py diff --git a/src/caoscrawler/converters/__init__.py b/src/caoscrawler/converters/__init__.py index 670d4e96..51d85605 100644 --- a/src/caoscrawler/converters/__init__.py +++ b/src/caoscrawler/converters/__init__.py @@ -23,6 +23,7 @@ from .. import utils from .converters import * from .xml_converter import * +from .zipfile import * try: from .spss import SPSSConverter diff --git a/src/caoscrawler/converters/zipfile.py b/src/caoscrawler/converters/zipfile.py new file mode 100644 index 00000000..49bfcc36 --- /dev/null +++ b/src/caoscrawler/converters/zipfile.py @@ -0,0 +1,81 @@ +# encoding: utf-8 +# +# This file is a part of the LinkAhead Project. +# +# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2024 Alexander Schlemmer +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +"""Converters take structure elements and create Records and new structure elements from them. + +This converter opens zip files, unzips them into a temporary directory and +exposes its contents as File structure elements. + +""" + +from __future__ import annotations + +import os +import tempfile +from os.path import isdir, join +from zipfile import ZipFile + +from ..stores import GeneralStore +from ..structure_elements import Directory, File, StructureElement +from .converters import SimpleFileConverter + + +class ZipFileConverter(SimpleFileConverter): + + """Convert zipfiles. + """ + + def setup(self): + self._tempdir = None + + def cleanup(self): + self._tempdir.cleanup() + + def create_children(self, generalStore: GeneralStore, element: StructureElement): + """ + Loads an ROCrate from an rocrate file or directory. + + Arguments: + ---------- + element must be a File or Directory (structure element). + + Returns: + -------- + A list with an ROCrateElement representing the contents of the .eln-file or None + in case of errors. + """ + + if isinstance(element, File): + self._tempdir = tempfile.TemporaryDirectory() + unzd_path = self._tempdir.name + with ZipFile(element.path) as zipf: + zipf.extractall(unzd_path) + + entity_ls = [] + for el in os.listdir(unzd_path): + if isdir(join(unzd_path, el)): + entity_ls.append(Directory()) + else: + entity_ls.append(File()) + + return entity_ls + else: + raise ValueError("create_children was called with wrong type of StructureElement") + return None -- GitLab