Henrik tom Wörden · b392b075 · ff13aecb · b392b075
--- a/src/caosadvancedtools/table_importer.py

+ 33

− 2

View file @ ff13aecb

Open in Web IDE
+++ b/src/caosadvancedtools/table_importer.py

+ 33

− 2

View file @ ff13aecb

Open in Web IDE
 @@ -202,7 +202,8 @@ def string_in_list(val, options, ignore_case=True):
 @@ -202,7 +202,8 @@ def string_in_list(val, options, ignore_case=True):
 class TableImporter(object):
-    def __init__(self, converters, obligatory_columns=None, unique_keys=None):
+    def __init__(self, converters, obligatory_columns=None, unique_keys=None,
+                 datatypes=None):
        """
        converters: dict with column names as keys and converter functions as
                    values
 @@ -216,14 +217,27 @@ class TableImporter(object):
 @@ -216,14 +217,27 @@ class TableImporter(object):
        unique_columns : list of column names that in
                            combination must be unique; i.e. each row has a
                            unique combination of values in those columns.
+        datatypes: dict with column names as keys and datatypes as values
+                   All non-null values will be checked whether they have the
+                   provided datatype.
+                   This dict also defines what columns are required to exist
+                   throught the existing keys.
        """
+        if converters is None:
+            converters = {}
+        if datatypes is None:
+            datatypes = {}
        self.sup = SuppressKnown()
-        self.required_columns = list(converters.keys())
+        self.required_columns = list(converters.keys())+list(datatypes.keys())
        self.obligatory_columns = ([]
                                   if obligatory_columns is None
                                   else obligatory_columns)
        self.unique_keys = [] if unique_keys is None else unique_keys
        self.converters = converters
+        self.datatypes = datatypes
    def read_file(self, filename, **kwargs):
        raise NotImplementedError()
 @@ -281,6 +295,22 @@ class TableImporter(object):
 @@ -281,6 +295,22 @@ class TableImporter(object):
        return df
+    def check_datatype(self, df, filename=None):
+        """
+        Check for each column whether non-null fields are have the correct
+        datatype.
+        """
+        for key, datatype in self.datatypes.items():
+            for idx, val in df.loc[pd.notnull(df.loc[:, key]), key].iteritems():
+                if not isinstance(val, datatype):
+                    raise DataInconsistencyError(
+                        "In row no. {rn} and column {c} of file '{fi}' the "
+                        "datatype was {was} but it should be "
+                        "{expected}".format(rn=idx, c=key, fi=filename,
+                                            was=type(val), expected=datatype)
+                    )
    def check_missing(self, df, filename=None):
        """
        Check in each row whether obligatory fields are empty or null.
 @@ -325,6 +355,7 @@ class TableImporter(object):
 @@ -325,6 +355,7 @@ class TableImporter(object):
    def check_dataframe(self, df, filename):
        self.check_columns(df, filename=filename)
        df = self.check_missing(df, filename=filename)
+        self.check_datatype(df, filename=filename)
        if len(self.unique_keys) > 0:
            df = self.check_unique(df, filename=filename)