diff --git a/.docker/Dockerfile b/.docker/Dockerfile index 539ac0d4e70bfbde2f630d4254cacc7419105611..a4d9ce692d96c1ba19749ba5c24cbd0ede62e5c2 100644 --- a/.docker/Dockerfile +++ b/.docker/Dockerfile @@ -30,7 +30,7 @@ RUN rm -r /git/.git # Install pycaosdb.ini for the tests RUN mv /git/.docker/tester_pycaosdb.ini /git/integrationtests/pycaosdb.ini -RUN cd /git/ && pip3 install . +RUN cd /git/ && pip3 install .[h5_crawler,spss] WORKDIR /git/integrationtests # wait for server, diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9ec1727db301affd8e984df78abbb78a2b16ffaa..ff3136731e5d2b565c8ff7918912aa9c9b632493 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -130,7 +130,7 @@ unittest_py3.8: # TODO: Use f-branch logic here - pip install git+https://gitlab.indiscale.com/caosdb/src/caosdb-pylib.git@dev - pip install git+https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools.git@dev - - pip install .[h5-crawler] + - pip install .[h5_crawler,spss] # actual test - caosdb-crawler --help - pytest --cov=caosdb -vv ./unittests @@ -166,7 +166,7 @@ unittest_py3.13: # TODO: Use f-branch logic here - pip install git+https://gitlab.indiscale.com/caosdb/src/caosdb-pylib.git@dev - pip install git+https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools.git@dev - - pip install .[h5-crawler] + - pip install .[h5_crawler,spss] # actual test - caosdb-crawler --help - pytest --cov=caosdb -vv ./unittests diff --git a/setup.cfg b/setup.cfg index 1b4a91859f39ff2695c36aace396b7db240a5f1f..4a9c82657bf453081cb4843bd955a333c73b6390 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,6 +25,7 @@ install_requires = odfpy #make optional packaging pandas + pyarrow # Will be required by Pandas >= 3.0. pyyaml yaml-header-tools >= 0.2.1 @@ -41,6 +42,8 @@ console_scripts = caosdb-crawler = caoscrawler.crawl:main [options.extras_require] -h5-crawler = +h5_crawler = h5py >= 3.8 numpy +spss = + pandas[spss] diff --git a/src/caoscrawler/__init__.py b/src/caoscrawler/__init__.py index 05bad0b54d9098c0b7f165d8295a0faa2966fa32..9c5e3743527e672cda2a2687a834fe54ac1b82a2 100644 --- a/src/caoscrawler/__init__.py +++ b/src/caoscrawler/__init__.py @@ -1,4 +1,9 @@ +from . import converters +from .conv_impl.sav import SAVConverter from .crawl import Crawler, SecurityMode from .version import CfoodRequiredVersionError, get_caoscrawler_version __version__ = get_caoscrawler_version() + +# Convenience members ######################################################### +converters.SAVConverter = SAVConverter diff --git a/src/caoscrawler/conv_impl/__init__.py b/src/caoscrawler/conv_impl/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/caoscrawler/conv_impl/sav.py b/src/caoscrawler/conv_impl/sav.py new file mode 100644 index 0000000000000000000000000000000000000000..8308719bc0e222810fee2274f94d0c14ebce9b81 --- /dev/null +++ b/src/caoscrawler/conv_impl/sav.py @@ -0,0 +1,37 @@ +# This file is a part of the LinkAhead Project. +# +# Copyright (C) 2024 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +"""Converter for SAV files (stored by SPSS).""" + +# import pyreadstat # Maybe us this if we need more metadata +import pandas as pd + +from .. import converters +from ..stores import GeneralStore, RecordStore +from ..structure_elements import (BooleanElement, DictElement, Directory, File, + FloatElement, IntegerElement, JSONFile, + ListElement, NoneElement, StructureElement, + TextElement) + + +class SAVConverter(converters.TableConverter): + """Converter for SAV files (stored by SPSS).""" + + def create_children(self, values: GeneralStore, element: StructureElement): + df = pd.io.spss.read_spss(element.path) + return self._children_from_dataframe(df) diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py index e0ca0f9bff77ba1ecc63f4d102d6d9869fb11cb0..296bfeedfe8fcf325304bb84ddbb236bb5a0bf30 100644 --- a/src/caoscrawler/converters.py +++ b/src/caoscrawler/converters.py @@ -1,11 +1,11 @@ -#!/usr/bin/env python3 # encoding: utf-8 # -# ** header v3.0 -# This file is a part of the CaosDB Project. +# This file is a part of the LinkAhead Project. # +# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com> # Copyright (C) 2021 Henrik tom Wörden -# 2021 Alexander Schlemmer +# Copyright (C) 2021 Alexander Schlemmer +# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -19,9 +19,8 @@ # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. -# -# ** end header -# + +"""Converters take structure elements and create Records and new structure elements from them.""" from __future__ import annotations @@ -34,7 +33,7 @@ import warnings from abc import ABCMeta, abstractmethod from inspect import signature from string import Template -from typing import Any, List, Optional, Tuple, Union +from typing import Any, Callable, Optional, Union import linkahead as db import pandas as pd @@ -134,8 +133,8 @@ def replace_variables(propvalue: Any, values: GeneralStore): This function replaces variables in property values (and possibly other locations, where the crawler can replace cfood-internal variables). - If `propvalue` is a single variable name preceeded with a '$' (e.g. '$var' or '${var}'), then - the corresponding value stored in `values` is returned. + If ``propvalue`` is a single variable name preceeded by a ``$`` (e.g. ``$var`` or ``${var}``), + then the corresponding value stored in ``values`` is returned. In any other case the variable substitution is carried out as defined by string templates and a new string with the replaced variables is returned. """ @@ -160,16 +159,16 @@ def handle_value(value: Union[dict, str, list], values: GeneralStore): add as an additional property (multiproperty). Variable names (starting with a "$") are replaced by the corresponding value stored in the - `values` GeneralStore. + ``values`` GeneralStore. Parameters ---------- -value: - - if str, the value to be interpreted. E.g. "4", "hallo" or "$a" etc. - - if dict, must have keys "value" and "collection_mode". The returned tuple is directly +value: Union[dict, str, list] + - If *str*, the value to be interpreted. E.g. "4", "hello" or "$a" etc. + - If *dict*, must have keys ``value`` and ``collection_mode``. The returned tuple is directly created from the corresponding values. - - if list, each element is checked for replacement and the resulting list will be used + - If *list*, each element is checked for replacement and the resulting list will be used as (list) value for the property Returns @@ -181,7 +180,7 @@ out: tuple """ # @review Florian Spreckelsen 2022-05-13 - if type(value) == dict: + if isinstance(value, dict): if "value" not in value: # TODO: how do we handle this case? Just ignore? # or disallow? @@ -189,7 +188,7 @@ out: tuple propvalue = value["value"] # can be "single", "list" or "multiproperty" collection_mode = value["collection_mode"] - elif type(value) == str: + elif isinstance(value, str): propvalue = value collection_mode = "single" if propvalue.startswith("+"): @@ -198,7 +197,7 @@ out: tuple elif propvalue.startswith("*"): collection_mode = "multiproperty" propvalue = propvalue[1:] - elif type(value) == list: + elif isinstance(value, list): # TODO: (for review) # This is a bit dirty right now and needed for # being able to directly set list values. Semantics is, however, a bit @@ -209,7 +208,7 @@ out: tuple propvalue = list() for element in value: # Do the element-wise replacement only, when its type is string: - if type(element) == str: + if isinstance(element, str): propvalue.append(replace_variables(element, values)) else: propvalue.append(element) @@ -286,9 +285,7 @@ def create_records(values: GeneralStore, records: RecordStore, def_records: dict propvalue = os.path.normpath(propvalue) setattr(c_record, key.lower(), propvalue) else: - if c_record.get_property(key) is None: - if collection_mode == "list": c_record.add_property(name=key, value=[propvalue]) elif (collection_mode == "multiproperty" or @@ -322,10 +319,13 @@ class Converter(object, metaclass=ABCMeta): Parameters ---------- - definition: dict, Please refer to ``src/doc/converters.rst`` to learn about the structure - that the definition dict must have. - converter_registry: dict, A dictionary that contains converter names as keys and dicts as - values. Those value dicts have the keys 'converter' and 'package'. + definition: dict + Please refer to ``src/doc/converters.rst`` to learn about the structure that the + definition dict must have. + converter_registry: dict + A dictionary that contains converter names as keys and dicts as values. Those value dicts + have the keys 'converter', 'package' and 'class'. 'converter' is the class name, + 'package' the module and 'class' the class instance of converters. """ self.definition = definition @@ -363,7 +363,7 @@ class Converter(object, metaclass=ABCMeta): @staticmethod def converter_factory(definition: dict, name: str, converter_registry: dict): - """creates a Converter instance of the appropriate class. + """Create a Converter instance of the appropriate class. The `type` key in the `definition` defines the Converter class which is being used. """ @@ -424,7 +424,7 @@ class Converter(object, metaclass=ABCMeta): pass """ - if not "transform" in self.definition: + if "transform" not in self.definition: return for transformer_key, transformer in self.definition["transform"].items(): in_value = replace_variables(transformer["in"], values) @@ -460,8 +460,7 @@ class Converter(object, metaclass=ABCMeta): values[match.group('varname')] = out_value @abstractmethod - def create_children(self, values: GeneralStore, - element: StructureElement): + def create_children(self, values: GeneralStore, element: StructureElement): pass def create_records(self, values: GeneralStore, records: RecordStore, @@ -477,7 +476,7 @@ class Converter(object, metaclass=ABCMeta): self.definition["records"]) def filter_children(self, children_with_strings: - List[Tuple[StructureElement, str]], expr: str, + list[tuple[StructureElement, str]], expr: str, group: str, rule: str): """Filter children according to regexp `expr` and `rule`.""" @@ -515,8 +514,8 @@ class Converter(object, metaclass=ABCMeta): result: Optional[dict]): """ Template for the debugging output for the match function """ msg = "\n--------" + name + "-----------\n" - for re, ma in zip(regexp, matched): - msg += "matching reg:\t" + re + "\n" + for exp, ma in zip(regexp, matched): + msg += "matching reg:\t" + exp + "\n" msg += "matching val:\t" + ma + "\n" msg += "---------\n" if result is None: @@ -620,7 +619,7 @@ class DirectoryConverter(Converter): element: A directory (of type Directory) which will be traversed. """ - children: List[StructureElement] = [] + children: list[StructureElement] = [] for name in sorted(os.listdir(element.path)): path = os.path.join(element.path, name) @@ -660,7 +659,7 @@ class SimpleFileConverter(Converter): class FileConverter(SimpleFileConverter): def __init__(self, *args, **kwargs): warnings.warn(DeprecationWarning( - "This class is depricated. Please use SimpleFileConverter.")) + "This class is deprecated. Please use SimpleFileConverter.")) super().__init__(*args, **kwargs) @@ -693,12 +692,12 @@ class MarkdownFileConverter(SimpleFileConverter): "Error during the validation (yaml header cannot be read) of the markdown file " "located at the following node in the data structure:\n" "{}\nError:\n{}".format(path, err)) - children: List[StructureElement] = [] + children: list[StructureElement] = [] for name, entry in header.items(): - if type(entry) == list: + if isinstance(entry, list): children.append(ListElement(name, entry)) - elif type(entry) == str: + elif isinstance(entry, str): children.append(TextElement(name, entry)) else: if generalStore is not None and self.name in generalStore: @@ -814,14 +813,14 @@ class DictElementConverter(Converter): class DictConverter(DictElementConverter): def __init__(self, *args, **kwargs): warnings.warn(DeprecationWarning( - "This class is depricated. Please use DictConverter.")) + "This class is deprecated. Please use DictElementConverter.")) super().__init__(*args, **kwargs) class DictDictElementConverter(DictElementConverter): def __init__(self, *args, **kwargs): warnings.warn(DeprecationWarning( - "This class is depricated. Please use DictElementConverter.")) + "This class is deprecated. Please use DictElementConverter.")) super().__init__(*args, **kwargs) @@ -886,7 +885,7 @@ out: """ if "match_name" in definition: if "match" in definition: - raise RuntimeError(f"Do not supply both, 'match_name' and 'match'.") + raise RuntimeError("Do not supply both, 'match_name' and 'match'.") m1 = re.match(definition["match_name"], name) if m1 is None: @@ -1009,7 +1008,7 @@ class BooleanElementConverter(_AbstractScalarValueElementConverter): class DictBooleanElementConverter(BooleanElementConverter): def __init__(self, *args, **kwargs): warnings.warn(DeprecationWarning( - "This class is depricated. Please use BooleanElementConverter.")) + "This class is deprecated. Please use BooleanElementConverter.")) super().__init__(*args, **kwargs) @@ -1025,7 +1024,7 @@ class FloatElementConverter(_AbstractScalarValueElementConverter): class DictFloatElementConverter(FloatElementConverter): def __init__(self, *args, **kwargs): warnings.warn(DeprecationWarning( - "This class is depricated. Please use FloatElementConverter.")) + "This class is deprecated. Please use FloatElementConverter.")) super().__init__(*args, **kwargs) @@ -1040,7 +1039,7 @@ class TextElementConverter(_AbstractScalarValueElementConverter): def __init__(self, definition, *args, **kwargs): if "match" in definition: raise ValueError(""" -The 'match' key will in future be used to match a potential name of a TextElement. Please use +The 'match' key is used to match a potential name of a TextElement. Please use the 'match_value' key to match the value of the TextElement and 'match_name' for matching the name. """) @@ -1050,7 +1049,7 @@ the 'match_value' key to match the value of the TextElement and 'match_name' for class DictTextElementConverter(TextElementConverter): def __init__(self, *args, **kwargs): warnings.warn(DeprecationWarning( - "This class is depricated. Please use TextElementConverter.")) + "This class is deprecated. Please use TextElementConverter.")) super().__init__(*args, **kwargs) @@ -1066,7 +1065,7 @@ class IntegerElementConverter(_AbstractScalarValueElementConverter): class DictIntegerElementConverter(IntegerElementConverter): def __init__(self, *args, **kwargs): warnings.warn(DeprecationWarning( - "This class is depricated. Please use IntegerElementConverter.")) + "This class is deprecated. Please use IntegerElementConverter.")) super().__init__(*args, **kwargs) @@ -1108,7 +1107,7 @@ class ListElementConverter(Converter): class DictListElementConverter(ListElementConverter): def __init__(self, *args, **kwargs): warnings.warn(DeprecationWarning( - "This class is depricated. Please use ListElementConverter.")) + "This class is deprecated. Please use ListElementConverter.")) super().__init__(*args, **kwargs) @@ -1122,15 +1121,22 @@ class TableConverter(Converter): The rows can be matched using a DictElementConverter. """ - @abstractmethod - def get_options(self): - """ - This method needs to be overwritten by the specific table converter to provide - information about the possible options. + + def get_options(self) -> dict: + """Get specific options, e.g. from ``self.definitions``. + +This method may to be overwritten by the specific table converter to provide information about the +possible options. Implementors may use ``TableConverter._get_options(...)`` to get (and convert) +options from ``self.definitions``. + +Returns +------- +out: dict + An options dict. """ - pass + return {} - def _get_options(self, possible_options): + def _get_options(self, possible_options: list[tuple[str, Callable]]) -> dict: option_dict = dict() for opt_name, opt_conversion in possible_options: if opt_name in self.definition: @@ -1158,6 +1164,14 @@ class TableConverter(Converter): return None return m.groupdict() + @staticmethod + def _children_from_dataframe(dataframe: pd.DataFrame): + child_elements = list() + for index, row in dataframe.iterrows(): + child_elements.append( + DictElement(str(index), row.to_dict())) + return child_elements + class XLSXTableConverter(TableConverter): """ @@ -1187,11 +1201,7 @@ class XLSXTableConverter(TableConverter): if not isinstance(element, File): raise RuntimeError("Element must be a File.") table = pd.read_excel(element.path, **self.get_options()) - child_elements = list() - for index, row in table.iterrows(): - child_elements.append( - DictElement(str(index), row.to_dict())) - return child_elements + return self._children_from_dataframe(table) class CSVTableConverter(TableConverter): @@ -1216,11 +1226,7 @@ class CSVTableConverter(TableConverter): if not isinstance(element, File): raise RuntimeError("Element must be a File.") table = pd.read_csv(element.path, **self.get_options()) - child_elements = list() - for index, row in table.iterrows(): - child_elements.append( - DictElement(str(index), row.to_dict())) - return child_elements + return self._children_from_dataframe(table) class DateElementConverter(TextElementConverter): diff --git a/src/caoscrawler/version.py b/src/caoscrawler/version.py index fdc8323452cd190cc3628efa57c15992f30fabeb..0b72dd65116fbc102a4dc2492d726698cad5a13b 100644 --- a/src/caoscrawler/version.py +++ b/src/caoscrawler/version.py @@ -17,11 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. # -try: - from importlib import metadata as importlib_metadata -except ImportError: # Python<3.8 dowesn"t support this so use - import importlib_metadata - +from importlib import metadata as importlib_metadata from packaging.version import parse as parse_version from warnings import warn @@ -43,7 +39,7 @@ def check_cfood_version(metadata: dict): if not metadata or "crawler-version" not in metadata: msg = """ -No crawler version specified in cfood definition, so there is now guarantee that +No crawler version specified in cfood definition, so there is no guarantee that the cfood definition matches the installed crawler version. Specifying a version is highly recommended to ensure that the definition works diff --git a/src/doc/converters.rst b/src/doc/converters.rst index 9b28c9a61eec4d9707b9640720b9c6a44a8fe25e..637c66355efebb92427b0fc9c3bc37fe6068ea27 100644 --- a/src/doc/converters.rst +++ b/src/doc/converters.rst @@ -55,6 +55,8 @@ element. If the structure element matches (this is a combination of a typecheck match, see :py:class:`~caoscrawler.converters.Converter` for details) the converter is used to generate records (see :py:meth:`~caoscrawler.converters.Converter.create_records`) and to possibly process a subtree, as defined by the function :func:`caoscrawler.converters.create_children`. +**match** *TODO* + **records** is a dict of definitions that define the semantic structure (see details below). diff --git a/unittests/test_converters.py b/unittests/test_converters.py index 52ece13dc2269a3e3b16e6378166e91b084f4a7c..0e90fab391fd5f717b58716ddab4a2a266ce1761 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -140,7 +140,7 @@ def test_markdown_converter(converter_registry): converter = MarkdownFileConverter({"match": "(.*)"}, "TestMarkdownFileConverter", converter_registry) - with pytest.raises(ConverterValidationError) as err: + with pytest.raises(ConverterValidationError): converter.create_children(None, File("test_tool.py", UNITTESTDIR / "test_crawler.py")) m = converter.match(test_readme) diff --git a/unittests/test_sav_converter.py b/unittests/test_sav_converter.py new file mode 100644 index 0000000000000000000000000000000000000000..3cc72038cf4db5f5c0fd4435ddee696137491eb6 --- /dev/null +++ b/unittests/test_sav_converter.py @@ -0,0 +1,82 @@ +# This file is a part of the LinkAhead Project. +# +# Copyright (C) 2024 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +"""Testing converter for SAV files.""" + +import datetime +import importlib +import re +from pathlib import Path + +import numpy as np +import pytest + +from caoscrawler.converters import ( + ConverterValidationError, + SAVConverter, +) +from caoscrawler.structure_elements import (BooleanElement, DictElement, + Directory, File, FloatElement, + IntegerElement, ListElement, + TextElement) + +UNITTESTDIR = Path(__file__).parent + + +@pytest.fixture +def converter_registry(): + converter_registry: dict[str, dict[str, str]] = { + "Directory": { + "converter": "DirectoryConverter", + "package": "caoscrawler.converters"}, + } + + for key, value in converter_registry.items(): + module = importlib.import_module(value["package"]) + value["class"] = getattr(module, value["converter"]) + return converter_registry + + +def test_sav_converter(converter_registry): + converter = SAVConverter({ + "match": ("sample.sav") + }, + "ThisConverterNameIsIrrelevant", converter_registry + ) + + spss_dir = UNITTESTDIR / "test_tables" / "spss" + for sav_file, length, thistype in [ + (File("sample.sav", spss_dir / "sample.sav"), 5, str), + (File("sample.sav", spss_dir / "sample_large.sav"), 485, float), + ]: + m = converter.match(sav_file) + assert m is not None + assert len(m) == 0 + + children = converter.create_children(None, sav_file) + assert len(children) == length + for ii, child in enumerate(children): + assert child.__class__ == DictElement + assert child.name == str(ii) + my_dict = child.value + assert isinstance(my_dict["mychar"], str) + assert isinstance(my_dict["mydate"], datetime.date) or np.isnan(my_dict["mydate"]) + assert isinstance(my_dict["dtime"], datetime.datetime) or np.isnan(my_dict["dtime"]) + assert isinstance(my_dict["mytime"], datetime.time) or np.isnan(my_dict["mytime"]) + assert isinstance(my_dict["mylabl"], thistype) + assert isinstance(my_dict["myord"], thistype) diff --git a/unittests/test_tables/spss/CITATION.cff b/unittests/test_tables/spss/CITATION.cff new file mode 100644 index 0000000000000000000000000000000000000000..140fcc071bf2d5f5709cf31bf11bd9676b81ca5f --- /dev/null +++ b/unittests/test_tables/spss/CITATION.cff @@ -0,0 +1,11 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: +- family-names: "Fajardo" + given-names: "Otto" + orcid: "https://orcid.org/0000-0002-3363-9287" +title: "Pyreadstat" +version: 1.2.7 +doi: 10.5281/zenodo.6612282 +date-released: 2018-09-24 +url: "https://github.com/Roche/pyreadstat" diff --git a/unittests/test_tables/spss/LICENSE b/unittests/test_tables/spss/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..a2f94b1a2a5a4255fc8ef6d0beb94cce89f545e8 --- /dev/null +++ b/unittests/test_tables/spss/LICENSE @@ -0,0 +1,210 @@ +Test data files were copied from [pyreadstat](https://github.com/Roche/pyreadstat), they are +licensed under the Apache License, cited below. + +Copyright (C) 2018-2024 Otto Fajardo +Copyright (C) 2024 Indiscale GmbH <info@indiscale.com> +Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com> + +pyreadstat liscence: +--------------------------------------------------------------------------- + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/unittests/test_tables/spss/sample.sav b/unittests/test_tables/spss/sample.sav new file mode 100644 index 0000000000000000000000000000000000000000..20d0c5ce6689a60adfa329a17b4347274e9a863b Binary files /dev/null and b/unittests/test_tables/spss/sample.sav differ diff --git a/unittests/test_tables/spss/sample_large.sav b/unittests/test_tables/spss/sample_large.sav new file mode 100644 index 0000000000000000000000000000000000000000..b0c16c1390a15a4f62a859ade76aa17b89c6ae40 Binary files /dev/null and b/unittests/test_tables/spss/sample_large.sav differ