Skip to content
Snippets Groups Projects
Verified Commit db4e6348 authored by Daniel Hornung's avatar Daniel Hornung
Browse files

ENH: Trying out the other backend.

parent 23c232cb
No related branches found
No related tags found
2 merge requests!178FIX: #96 Better error output for crawl.py script.,!171sav/spss converter
Pipeline #52032 passed
...@@ -47,7 +47,9 @@ class SPSSConverter(converters.TableConverter): ...@@ -47,7 +47,9 @@ class SPSSConverter(converters.TableConverter):
def create_children(self, values: GeneralStore, element: StructureElement) -> list: def create_children(self, values: GeneralStore, element: StructureElement) -> list:
assert isinstance(element, File) assert isinstance(element, File)
df = pd.io.spss.read_spss(element.path) # The default dtype backend "numpy_nullable" does not handle dates well.
# Note that pandas.ArrowDtype is considered experimental (in Pandas 2.2).
df = pd.io.spss.read_spss(element.path, dtype_backend="pyarrow")
# if element.path.endswith(".sav"): # if element.path.endswith(".sav"):
# sav_df, meta = pyreadstat.read_sav(element.path, metadataonly=True) # sav_df, meta = pyreadstat.read_sav(element.path, metadataonly=True)
......
...@@ -188,7 +188,7 @@ out: tuple ...@@ -188,7 +188,7 @@ out: tuple
if "value" not in value: if "value" not in value:
# TODO: how do we handle this case? Just ignore? # TODO: how do we handle this case? Just ignore?
# or disallow? # or disallow?
raise NotImplementedError() raise NotImplementedError(f"This definition has no \"value\": {value}")
propvalue = value["value"] propvalue = value["value"]
# can be "single", "list" or "multiproperty" # can be "single", "list" or "multiproperty"
collection_mode = value["collection_mode"] collection_mode = value["collection_mode"]
......
...@@ -62,7 +62,7 @@ def test_spss_converter(converter_registry): ...@@ -62,7 +62,7 @@ def test_spss_converter(converter_registry):
spss_dir = UNITTESTDIR / "test_tables" / "spss" spss_dir = UNITTESTDIR / "test_tables" / "spss"
for sav_file, length, thistype in [ for sav_file, length, thistype in [
(File("sample.sav", spss_dir / "sample.sav"), 5, str), (File("sample.sav", spss_dir / "sample.sav"), 5, str),
(File("sample.sav", spss_dir / "sample_large.sav"), 485, float), (File("sample.sav", spss_dir / "sample_large.sav"), 485, int),
]: ]:
m = converter.match(sav_file) m = converter.match(sav_file)
assert m is not None assert m is not None
...@@ -70,6 +70,7 @@ def test_spss_converter(converter_registry): ...@@ -70,6 +70,7 @@ def test_spss_converter(converter_registry):
children = converter.create_children(None, sav_file) children = converter.create_children(None, sav_file)
assert len(children) == length assert len(children) == length
for ii, child in enumerate(children): for ii, child in enumerate(children):
assert child.__class__ == DictElement assert child.__class__ == DictElement
assert child.name == str(ii) assert child.name == str(ii)
...@@ -78,5 +79,5 @@ def test_spss_converter(converter_registry): ...@@ -78,5 +79,5 @@ def test_spss_converter(converter_registry):
assert isinstance(my_dict["mydate"], datetime.date) or np.isnan(my_dict["mydate"]) assert isinstance(my_dict["mydate"], datetime.date) or np.isnan(my_dict["mydate"])
assert isinstance(my_dict["dtime"], datetime.datetime) or np.isnan(my_dict["dtime"]) assert isinstance(my_dict["dtime"], datetime.datetime) or np.isnan(my_dict["dtime"])
assert isinstance(my_dict["mytime"], datetime.time) or np.isnan(my_dict["mytime"]) assert isinstance(my_dict["mytime"], datetime.time) or np.isnan(my_dict["mytime"])
assert isinstance(my_dict["mylabl"], thistype) assert isinstance(my_dict["mylabl"], thistype), f"{type(my_dict['mylabl'])}"
assert isinstance(my_dict["myord"], thistype) assert isinstance(my_dict["myord"], thistype), f"{type(my_dict['myord'])}"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment