Skip to content
Snippets Groups Projects

ENH: allow to define datatypes instead of converters

Merged Henrik tom Wörden requested to merge f-datatypes into dev
All threads resolved!
2 files
+ 55
9
Compare changes
  • Side-by-side
  • Inline
Files
2
@@ -186,7 +186,8 @@ def string_in_list(val, options, ignore_case=True):
class TableImporter(object):
def __init__(self, converters, obligatory_columns=None, unique_keys=None):
def __init__(self, converters, obligatory_columns=None, unique_keys=None,
datatypes=None):
"""
converters: dict with column names as keys and converter functions as
values
@@ -200,14 +201,27 @@ class TableImporter(object):
unique_columns : list of column names that in
combination must be unique; i.e. each row has a
unique combination of values in those columns.
datatypes: dict with column names as keys and datatypes as values
All non-null values will be checked whether they have the
provided datatype.
This dict also defines what columns are required to exist
throught the existing keys.
"""
if converters is None:
converters = {}
if datatypes is None:
datatypes = {}
self.sup = SuppressKnown()
self.required_columns = list(converters.keys())
self.required_columns = list(converters.keys())+list(datatypes.keys())
self.obligatory_columns = ([]
if obligatory_columns is None
else obligatory_columns)
self.unique_keys = [] if unique_keys is None else unique_keys
self.converters = converters
self.datatypes = datatypes
def read_file(self, filename, **kwargs):
raise NotImplementedError()
@@ -265,6 +279,22 @@ class TableImporter(object):
return df
def check_datatype(self, df, filename=None):
"""
Check for each column whether non-null fields are have the correct
datatype.
"""
for key, datatype in self.datatypes.items():
for idx, val in df.loc[pd.notnull(df.loc[:, key]), key].iteritems():
if not isinstance(val, datatype):
raise DataInconsistencyError(
"In row no. {rn} and column {c} of file '{fi}' the "
"datatype was {was} but it should be "
"{expected}".format(rn=idx, c=key, fi=filename,
was=type(val), expected=datatype)
)
def check_missing(self, df, filename=None):
"""
Check in each row whether obligatory fields are empty or null.
@@ -309,6 +339,7 @@ class TableImporter(object):
def check_dataframe(self, df, filename):
self.check_columns(df, filename=filename)
df = self.check_missing(df, filename=filename)
self.check_datatype(df, filename=filename)
if len(self.unique_keys) > 0:
df = self.check_unique(df, filename=filename)
Loading