From eb52eb1944c17feac4ffaf76745021172d05a8bc Mon Sep 17 00:00:00 2001 From: Daniel <d.hornung@indiscale.com> Date: Wed, 15 May 2024 14:00:54 +0200 Subject: [PATCH] MAINT: Renamed sav -> spss --- src/caoscrawler/__init__.py | 5 ++-- src/caoscrawler/cfood-schema.yml | 1 + src/caoscrawler/conv_impl/{sav.py => spss.py} | 25 +++++++++++++------ src/caoscrawler/default_converters.yml | 8 ++++-- ...av_converter.py => test_spss_converter.py} | 8 +++--- 5 files changed, 31 insertions(+), 16 deletions(-) rename src/caoscrawler/conv_impl/{sav.py => spss.py} (67%) rename unittests/{test_sav_converter.py => test_spss_converter.py} (95%) diff --git a/src/caoscrawler/__init__.py b/src/caoscrawler/__init__.py index 9c5e3743..3c71caed 100644 --- a/src/caoscrawler/__init__.py +++ b/src/caoscrawler/__init__.py @@ -1,9 +1,10 @@ from . import converters -from .conv_impl.sav import SAVConverter +from .conv_impl.spss import SPSSConverter from .crawl import Crawler, SecurityMode from .version import CfoodRequiredVersionError, get_caoscrawler_version __version__ = get_caoscrawler_version() # Convenience members ######################################################### -converters.SAVConverter = SAVConverter +# mypy: disable-error-code="attr-defined" +converters.SPSSConverter = SPSSConverter diff --git a/src/caoscrawler/cfood-schema.yml b/src/caoscrawler/cfood-schema.yml index 5a6e1e50..85032ba3 100644 --- a/src/caoscrawler/cfood-schema.yml +++ b/src/caoscrawler/cfood-schema.yml @@ -31,6 +31,7 @@ cfood: - JSONFile - CSVTableConverter - XLSXTableConverter + - SPSSFile - H5File - H5Dataset - H5Group diff --git a/src/caoscrawler/conv_impl/sav.py b/src/caoscrawler/conv_impl/spss.py similarity index 67% rename from src/caoscrawler/conv_impl/sav.py rename to src/caoscrawler/conv_impl/spss.py index 8308719b..f5ae4582 100644 --- a/src/caoscrawler/conv_impl/sav.py +++ b/src/caoscrawler/conv_impl/spss.py @@ -18,20 +18,29 @@ """Converter for SAV files (stored by SPSS).""" -# import pyreadstat # Maybe us this if we need more metadata import pandas as pd +import pyreadstat from .. import converters -from ..stores import GeneralStore, RecordStore -from ..structure_elements import (BooleanElement, DictElement, Directory, File, - FloatElement, IntegerElement, JSONFile, - ListElement, NoneElement, StructureElement, - TextElement) +from ..stores import GeneralStore +from ..structure_elements import (File, StructureElement) -class SAVConverter(converters.TableConverter): +class SPSSConverter(converters.TableConverter): """Converter for SAV files (stored by SPSS).""" - def create_children(self, values: GeneralStore, element: StructureElement): + # def __init__(self, *args, **kwargs): + # super().__init__(*args, **kwargs) + # from IPython import embed + # embed() + + def create_children(self, values: GeneralStore, element: StructureElement) -> list: + assert isinstance(element, File) df = pd.io.spss.read_spss(element.path) + + # if element.path.endswith(".sav"): + # sav_df, meta = pyreadstat.read_sav(element.path, metadataonly=True) + # from IPython import embed + # embed() + return self._children_from_dataframe(df) diff --git a/src/caoscrawler/default_converters.yml b/src/caoscrawler/default_converters.yml index e192ab1b..af2b1c76 100644 --- a/src/caoscrawler/default_converters.yml +++ b/src/caoscrawler/default_converters.yml @@ -24,7 +24,7 @@ TextElement: converter: TextElementConverter package: caoscrawler.converters - + DictDictElement: # deprecated converter: DictElementConverter package: caoscrawler.converters @@ -60,7 +60,7 @@ File: # deprecated converter: SimpleFileConverter package: caoscrawler.converters - + SimpleFile: converter: SimpleFileConverter package: caoscrawler.converters @@ -81,6 +81,10 @@ CSVTableConverter: converter: CSVTableConverter package: caoscrawler.converters +SPSSFile: + converter: SPSSConverter + package: caoscrawler.converters + XLSXTableConverter: converter: XLSXTableConverter package: caoscrawler.converters diff --git a/unittests/test_sav_converter.py b/unittests/test_spss_converter.py similarity index 95% rename from unittests/test_sav_converter.py rename to unittests/test_spss_converter.py index 3cc72038..c86fb17e 100644 --- a/unittests/test_sav_converter.py +++ b/unittests/test_spss_converter.py @@ -16,7 +16,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. -"""Testing converter for SAV files.""" +"""Testing converter for SPSS files.""" import datetime import importlib @@ -28,7 +28,7 @@ import pytest from caoscrawler.converters import ( ConverterValidationError, - SAVConverter, + SPSSConverter, ) from caoscrawler.structure_elements import (BooleanElement, DictElement, Directory, File, FloatElement, @@ -52,8 +52,8 @@ def converter_registry(): return converter_registry -def test_sav_converter(converter_registry): - converter = SAVConverter({ +def test_spss_converter(converter_registry): + converter = SPSSConverter({ "match": ("sample.sav") }, "ThisConverterNameIsIrrelevant", converter_registry -- GitLab