From f411d63ba8165092918d41b735bdee6fbaad110e Mon Sep 17 00:00:00 2001
From: Alexander Schlemmer <a.schlemmer@indiscale.com>
Date: Fri, 15 Nov 2024 11:27:07 +0100
Subject: [PATCH] ENH(converters): implemented zipfile converter

---
 src/caoscrawler/converters/__init__.py |  1 +
 src/caoscrawler/converters/zipfile.py  | 81 ++++++++++++++++++++++++++
 2 files changed, 82 insertions(+)
 create mode 100644 src/caoscrawler/converters/zipfile.py

diff --git a/src/caoscrawler/converters/__init__.py b/src/caoscrawler/converters/__init__.py
index 670d4e96..51d85605 100644
--- a/src/caoscrawler/converters/__init__.py
+++ b/src/caoscrawler/converters/__init__.py
@@ -23,6 +23,7 @@
 from .. import utils
 from .converters import *
 from .xml_converter import *
+from .zipfile import *
 
 try:
     from .spss import SPSSConverter
diff --git a/src/caoscrawler/converters/zipfile.py b/src/caoscrawler/converters/zipfile.py
new file mode 100644
index 00000000..49bfcc36
--- /dev/null
+++ b/src/caoscrawler/converters/zipfile.py
@@ -0,0 +1,81 @@
+# encoding: utf-8
+#
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2024 Alexander Schlemmer
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+"""Converters take structure elements and create Records and new structure elements from them.
+
+This converter opens zip files, unzips them into a temporary directory and
+exposes its contents as File structure elements.
+
+"""
+
+from __future__ import annotations
+
+import os
+import tempfile
+from os.path import isdir, join
+from zipfile import ZipFile
+
+from ..stores import GeneralStore
+from ..structure_elements import Directory, File, StructureElement
+from .converters import SimpleFileConverter
+
+
+class ZipFileConverter(SimpleFileConverter):
+
+    """Convert zipfiles.
+    """
+
+    def setup(self):
+        self._tempdir = None
+
+    def cleanup(self):
+        self._tempdir.cleanup()
+
+    def create_children(self, generalStore: GeneralStore, element: StructureElement):
+        """
+        Loads an ROCrate from an rocrate file or directory.
+
+        Arguments:
+        ----------
+        element must be a File or Directory (structure element).
+
+        Returns:
+        --------
+        A list with an ROCrateElement representing the contents of the .eln-file or None
+        in case of errors.
+        """
+
+        if isinstance(element, File):
+            self._tempdir = tempfile.TemporaryDirectory()
+            unzd_path = self._tempdir.name
+            with ZipFile(element.path) as zipf:
+                zipf.extractall(unzd_path)
+
+            entity_ls = []
+            for el in os.listdir(unzd_path):
+                if isdir(join(unzd_path, el)):
+                    entity_ls.append(Directory())
+                else:
+                    entity_ls.append(File())
+
+            return entity_ls
+        else:
+            raise ValueError("create_children was called with wrong type of StructureElement")
+        return None
-- 
GitLab