Skip to content
Snippets Groups Projects
Commit bd2b12a2 authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

Merge branch 'f-zipfile-converter' into 'dev'

Zipfile converter

See merge request !200
parents c1f4d1fa 5f3fecfd
Branches
Tags
2 merge requests!217TST: Make NamedTemporaryFiles Windows-compatible,!200Zipfile converter
Pipeline #57875 passed
......@@ -14,6 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
`match_attrib` in XMLConverter.
- `match_properties` is a method of class Converter and can for
example be used by CustomConverters.
- ZipFileConverter that opens zip files and exposes their contents as
File and Directory structure elements.
### Changed ###
......
......@@ -18,11 +18,12 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""Submdule containing all default and optional converters."""
"""Submodule containing all default and optional converters."""
from .. import utils
from .converters import *
from .xml_converter import *
from .zipfile_converter import ZipFileConverter
try:
from .spss import SPSSConverter
......
# encoding: utf-8
#
# This file is a part of the LinkAhead Project.
#
# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
# Copyright (C) 2024 Alexander Schlemmer
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""Converters take structure elements and create Records and new structure elements from them.
This converter opens zip files, unzips them into a temporary directory and
exposes its contents as File structure elements.
"""
from __future__ import annotations
import os
import tempfile
from os.path import isdir, join
from zipfile import ZipFile
from ..stores import GeneralStore
from ..structure_elements import Directory, File, StructureElement
from .converters import SimpleFileConverter
class ZipFileConverter(SimpleFileConverter):
"""Convert zipfiles.
"""
def setup(self):
self._tempdir = None
def cleanup(self):
self._tempdir.cleanup()
def create_children(self, generalStore: GeneralStore, element: StructureElement):
"""
Loads an ROCrate from an rocrate file or directory.
Arguments:
----------
element must be a File or Directory (structure element).
Returns:
--------
A list with an ROCrateElement representing the contents of the .eln-file or None
in case of errors.
"""
if isinstance(element, File):
self._tempdir = tempfile.TemporaryDirectory()
unzd_path = self._tempdir.name
with ZipFile(element.path) as zipf:
zipf.extractall(unzd_path)
entity_ls = []
for el in os.listdir(unzd_path):
path = join(unzd_path, el)
if isdir(path):
entity_ls.append(Directory(el, path))
else:
entity_ls.append(File(el, path))
return entity_ls
else:
raise ValueError("create_children was called with wrong type of StructureElement")
return None
......@@ -366,3 +366,31 @@ XMLTextNodeConverter
In the future, this converter can be used to match XMLTextNodes that
are generated by the XMLTagConverter.
ZipFileConverter
================
This converter opens zip files, unzips them into a temporary directory and
exposes its contents as File structure elements.
Usage Example:
--------------
.. code-block:: yaml
ExampleZipFile:
type: ZipFile
match: example\.zip$
subtree:
DirInsideZip:
type: Directory
match: experiments$
FileInsideZip:
type: File
match: description.odt$
This converter will match and open files called ``example.zip``. If
the file contains a directory called ``experiments`` it will be
processed further by the respective converter in the subtree. The same
is true for a file called ``description.odt``.
......@@ -27,21 +27,16 @@ import importlib
import os
from pathlib import Path
import jsonschema
import linkahead as db
import pytest
import rocrate
import yaml
from caoscrawler import scanner
from caoscrawler.converters import ELNFileConverter, ROCrateEntityConverter
from caoscrawler.scanner import load_definition
from caoscrawler.stores import GeneralStore
from caoscrawler.structure_elements import (DictElement, File, ROCrateEntity,
TextElement)
from linkahead.high_level_api import convert_to_python_object
from lxml.etree import fromstring
from rocrate.model.entity import Entity
from rocrate.rocrate import ROCrate
UNITTESTDIR = Path(__file__).parent
......
#!/usr/bin/env python3
# encoding: utf-8
#
# This file is a part of the LinkAhead Project.
#
# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
# Copyright (C) 2024 Alexander Schlemmer <a.schlemmer@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
"""
test the zip-file converter
"""
import importlib
import os
from pathlib import Path
import pytest
import yaml
from caoscrawler.converters import DirectoryConverter, ZipFileConverter
from caoscrawler.stores import GeneralStore
from caoscrawler.structure_elements import Directory, File
UNITTESTDIR = Path(__file__).parent
@pytest.fixture
def converter_registry():
converter_registry: dict[str, dict[str, str]] = {
"ZipFile": {
"converter": "ZipFileConverter",
"package": "caoscrawler.converters"},
}
for key, value in converter_registry.items():
module = importlib.import_module(value["package"])
value["class"] = getattr(module, value["converter"])
return converter_registry
def test_zipfile_converter(converter_registry):
zipfile = File("PASTA.eln", os.path.join(UNITTESTDIR, "eln_files", "PASTA.eln"))
zip_conv = ZipFileConverter(yaml.safe_load("""
type: ZipFile
match: .*$
"""), "TestZipFileConverter", converter_registry)
match = zip_conv.match(zipfile)
assert match is not None
children = zip_conv.create_children(GeneralStore(), zipfile)
assert len(children) == 1
assert children[0].name == "PASTA"
dir_conv = DirectoryConverter(yaml.safe_load("""
type: Directory
match: ^PASTA$
"""), "TestDirectory", converter_registry)
match = dir_conv.match(children[0])
assert match is not None
children = dir_conv.create_children(GeneralStore(), children[0])
assert len(children) == 5
print(children)
for i in range(2):
assert isinstance(children[i], Directory)
for i in range(2, 5):
assert isinstance(children[i], File)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment