diff --git a/src/caoscrawler/__init__.py b/src/caoscrawler/__init__.py index 9c5e3743527e672cda2a2687a834fe54ac1b82a2..3c71caed3362e50b5c4af947e0e3b1fa7805f8d5 100644 --- a/src/caoscrawler/__init__.py +++ b/src/caoscrawler/__init__.py @@ -1,9 +1,10 @@ from . import converters -from .conv_impl.sav import SAVConverter +from .conv_impl.spss import SPSSConverter from .crawl import Crawler, SecurityMode from .version import CfoodRequiredVersionError, get_caoscrawler_version __version__ = get_caoscrawler_version() # Convenience members ######################################################### -converters.SAVConverter = SAVConverter +# mypy: disable-error-code="attr-defined" +converters.SPSSConverter = SPSSConverter diff --git a/src/caoscrawler/cfood-schema.yml b/src/caoscrawler/cfood-schema.yml index 5a6e1e50345382ca6e5a1e6ef3a8fbeafb806b84..85032ba30877dff97bdf4ff9ba904d070c4a95b2 100644 --- a/src/caoscrawler/cfood-schema.yml +++ b/src/caoscrawler/cfood-schema.yml @@ -31,6 +31,7 @@ cfood: - JSONFile - CSVTableConverter - XLSXTableConverter + - SPSSFile - H5File - H5Dataset - H5Group diff --git a/src/caoscrawler/conv_impl/sav.py b/src/caoscrawler/conv_impl/spss.py similarity index 67% rename from src/caoscrawler/conv_impl/sav.py rename to src/caoscrawler/conv_impl/spss.py index 8308719bc0e222810fee2274f94d0c14ebce9b81..f5ae4582d31c59d624048d757c9a42ce807491f6 100644 --- a/src/caoscrawler/conv_impl/sav.py +++ b/src/caoscrawler/conv_impl/spss.py @@ -18,20 +18,29 @@ """Converter for SAV files (stored by SPSS).""" -# import pyreadstat # Maybe us this if we need more metadata import pandas as pd +import pyreadstat from .. import converters -from ..stores import GeneralStore, RecordStore -from ..structure_elements import (BooleanElement, DictElement, Directory, File, - FloatElement, IntegerElement, JSONFile, - ListElement, NoneElement, StructureElement, - TextElement) +from ..stores import GeneralStore +from ..structure_elements import (File, StructureElement) -class SAVConverter(converters.TableConverter): +class SPSSConverter(converters.TableConverter): """Converter for SAV files (stored by SPSS).""" - def create_children(self, values: GeneralStore, element: StructureElement): + # def __init__(self, *args, **kwargs): + # super().__init__(*args, **kwargs) + # from IPython import embed + # embed() + + def create_children(self, values: GeneralStore, element: StructureElement) -> list: + assert isinstance(element, File) df = pd.io.spss.read_spss(element.path) + + # if element.path.endswith(".sav"): + # sav_df, meta = pyreadstat.read_sav(element.path, metadataonly=True) + # from IPython import embed + # embed() + return self._children_from_dataframe(df) diff --git a/src/caoscrawler/default_converters.yml b/src/caoscrawler/default_converters.yml index e192ab1b3bae70a6772cf6defba4a4592a92e584..af2b1c764ac637c1391c89861ddba12386e6240e 100644 --- a/src/caoscrawler/default_converters.yml +++ b/src/caoscrawler/default_converters.yml @@ -24,7 +24,7 @@ TextElement: converter: TextElementConverter package: caoscrawler.converters - + DictDictElement: # deprecated converter: DictElementConverter package: caoscrawler.converters @@ -60,7 +60,7 @@ File: # deprecated converter: SimpleFileConverter package: caoscrawler.converters - + SimpleFile: converter: SimpleFileConverter package: caoscrawler.converters @@ -81,6 +81,10 @@ CSVTableConverter: converter: CSVTableConverter package: caoscrawler.converters +SPSSFile: + converter: SPSSConverter + package: caoscrawler.converters + XLSXTableConverter: converter: XLSXTableConverter package: caoscrawler.converters diff --git a/unittests/test_sav_converter.py b/unittests/test_spss_converter.py similarity index 95% rename from unittests/test_sav_converter.py rename to unittests/test_spss_converter.py index 3cc72038cf4db5f5c0fd4435ddee696137491eb6..c86fb17e7814e4ccebf7b2978e7d91f289e90395 100644 --- a/unittests/test_sav_converter.py +++ b/unittests/test_spss_converter.py @@ -16,7 +16,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. -"""Testing converter for SAV files.""" +"""Testing converter for SPSS files.""" import datetime import importlib @@ -28,7 +28,7 @@ import pytest from caoscrawler.converters import ( ConverterValidationError, - SAVConverter, + SPSSConverter, ) from caoscrawler.structure_elements import (BooleanElement, DictElement, Directory, File, FloatElement, @@ -52,8 +52,8 @@ def converter_registry(): return converter_registry -def test_sav_converter(converter_registry): - converter = SAVConverter({ +def test_spss_converter(converter_registry): + converter = SPSSConverter({ "match": ("sample.sav") }, "ThisConverterNameIsIrrelevant", converter_registry