From eb52eb1944c17feac4ffaf76745021172d05a8bc Mon Sep 17 00:00:00 2001
From: Daniel <d.hornung@indiscale.com>
Date: Wed, 15 May 2024 14:00:54 +0200
Subject: [PATCH] MAINT: Renamed sav -> spss

---
 src/caoscrawler/__init__.py                   |  5 ++--
 src/caoscrawler/cfood-schema.yml              |  1 +
 src/caoscrawler/conv_impl/{sav.py => spss.py} | 25 +++++++++++++------
 src/caoscrawler/default_converters.yml        |  8 ++++--
 ...av_converter.py => test_spss_converter.py} |  8 +++---
 5 files changed, 31 insertions(+), 16 deletions(-)
 rename src/caoscrawler/conv_impl/{sav.py => spss.py} (67%)
 rename unittests/{test_sav_converter.py => test_spss_converter.py} (95%)

diff --git a/src/caoscrawler/__init__.py b/src/caoscrawler/__init__.py
index 9c5e3743..3c71caed 100644
--- a/src/caoscrawler/__init__.py
+++ b/src/caoscrawler/__init__.py
@@ -1,9 +1,10 @@
 from . import converters
-from .conv_impl.sav import SAVConverter
+from .conv_impl.spss import SPSSConverter
 from .crawl import Crawler, SecurityMode
 from .version import CfoodRequiredVersionError, get_caoscrawler_version
 
 __version__ = get_caoscrawler_version()
 
 # Convenience members #########################################################
-converters.SAVConverter = SAVConverter
+# mypy: disable-error-code="attr-defined"
+converters.SPSSConverter = SPSSConverter
diff --git a/src/caoscrawler/cfood-schema.yml b/src/caoscrawler/cfood-schema.yml
index 5a6e1e50..85032ba3 100644
--- a/src/caoscrawler/cfood-schema.yml
+++ b/src/caoscrawler/cfood-schema.yml
@@ -31,6 +31,7 @@ cfood:
           - JSONFile
           - CSVTableConverter
           - XLSXTableConverter
+          - SPSSFile
           - H5File
           - H5Dataset
           - H5Group
diff --git a/src/caoscrawler/conv_impl/sav.py b/src/caoscrawler/conv_impl/spss.py
similarity index 67%
rename from src/caoscrawler/conv_impl/sav.py
rename to src/caoscrawler/conv_impl/spss.py
index 8308719b..f5ae4582 100644
--- a/src/caoscrawler/conv_impl/sav.py
+++ b/src/caoscrawler/conv_impl/spss.py
@@ -18,20 +18,29 @@
 
 """Converter for SAV files (stored by SPSS)."""
 
-# import pyreadstat  # Maybe us this if we need more metadata
 import pandas as pd
+import pyreadstat
 
 from .. import converters
-from ..stores import GeneralStore, RecordStore
-from ..structure_elements import (BooleanElement, DictElement, Directory, File,
-                                  FloatElement, IntegerElement, JSONFile,
-                                  ListElement, NoneElement, StructureElement,
-                                  TextElement)
+from ..stores import GeneralStore
+from ..structure_elements import (File, StructureElement)
 
 
-class SAVConverter(converters.TableConverter):
+class SPSSConverter(converters.TableConverter):
     """Converter for SAV files (stored by SPSS)."""
 
-    def create_children(self, values: GeneralStore, element: StructureElement):
+    # def __init__(self, *args, **kwargs):
+    #     super().__init__(*args, **kwargs)
+    #     from IPython import embed
+    #     embed()
+
+    def create_children(self, values: GeneralStore, element: StructureElement) -> list:
+        assert isinstance(element, File)
         df = pd.io.spss.read_spss(element.path)
+
+        # if element.path.endswith(".sav"):
+        #     sav_df, meta = pyreadstat.read_sav(element.path, metadataonly=True)
+        # from IPython import embed
+        # embed()
+
         return self._children_from_dataframe(df)
diff --git a/src/caoscrawler/default_converters.yml b/src/caoscrawler/default_converters.yml
index e192ab1b..af2b1c76 100644
--- a/src/caoscrawler/default_converters.yml
+++ b/src/caoscrawler/default_converters.yml
@@ -24,7 +24,7 @@ TextElement:
   converter: TextElementConverter
   package: caoscrawler.converters
 
-  
+
 DictDictElement:  # deprecated
   converter: DictElementConverter
   package: caoscrawler.converters
@@ -60,7 +60,7 @@ File:  # deprecated
   converter: SimpleFileConverter
   package: caoscrawler.converters
 
-  
+
 SimpleFile:
   converter: SimpleFileConverter
   package: caoscrawler.converters
@@ -81,6 +81,10 @@ CSVTableConverter:
   converter: CSVTableConverter
   package: caoscrawler.converters
 
+SPSSFile:
+  converter: SPSSConverter
+  package: caoscrawler.converters
+
 XLSXTableConverter:
   converter: XLSXTableConverter
   package: caoscrawler.converters
diff --git a/unittests/test_sav_converter.py b/unittests/test_spss_converter.py
similarity index 95%
rename from unittests/test_sav_converter.py
rename to unittests/test_spss_converter.py
index 3cc72038..c86fb17e 100644
--- a/unittests/test_sav_converter.py
+++ b/unittests/test_spss_converter.py
@@ -16,7 +16,7 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 
-"""Testing converter for SAV files."""
+"""Testing converter for SPSS files."""
 
 import datetime
 import importlib
@@ -28,7 +28,7 @@ import pytest
 
 from caoscrawler.converters import (
     ConverterValidationError,
-    SAVConverter,
+    SPSSConverter,
 )
 from caoscrawler.structure_elements import (BooleanElement, DictElement,
                                             Directory, File, FloatElement,
@@ -52,8 +52,8 @@ def converter_registry():
     return converter_registry
 
 
-def test_sav_converter(converter_registry):
-    converter = SAVConverter({
+def test_spss_converter(converter_registry):
+    converter = SPSSConverter({
         "match": ("sample.sav")
     },
         "ThisConverterNameIsIrrelevant", converter_registry
-- 
GitLab