up

2718f769 · Henrik tom Wörden · 5d7ace23 · 2718f769 · 2718f769
Commit 2718f769 authored Oct 16, 2020 by Henrik tom Wörden
--- a/scripting/bin/pandas_table_preview.py
+++ b/scripting/bin/pandas_table_preview.py
@@ -23,6 +23,11 @@
 # ** end header
 #
+"""
+This script tries to read typical table data files (.csv etc.) with pandas and
+creates a html (partial) representation of the table.
+"""
 import logging
 import os
 import sys
@@ -72,17 +77,19 @@ def ending_is_valid(fipath):
    return get_ending(fipath) is not None
-def read_file(fipath):
+def read_file(fipath, ftype):
    """ tries to read the provided file """
-    ending = get_ending(fipath)
    try:
-        if ending in [".xls", ".xlsx"]:
+        if ftype in [".xls", ".xlsx"]:
            df = pd.read_excel(fipath)
-        elif ending == ".tsv":
+        elif ftype == ".tsv":
            df = pd.read_csv(fipath, sep="\t")
-        elif ending == ".csv":
+        elif ftype == ".csv":
            df = pd.read_csv(fipath)
+        else:
+            print("File type unknown: {}".format(ftype))
+            raise RuntimeError("")
    except Exception:
        raise ValueError()
@@ -90,28 +97,33 @@ def read_file(fipath):
 def create_table_preview(fi):
-    if not ending_is_valid:
+    if not ending_is_valid(fi.path):
        print("Cannot create preview for Entity with ID={}, because download"
              "failed.".format(entity_id))
-        sys.exit(1)
+        return
+    ending = get_ending(fi.path)
    if not size_is_ok(fi):
        print("Skipped creating a preview for Entity with ID={}, because the"
              "file is large!".format(entity_id))
-        sys.exit(2)
+        return
    try:
-        fipath = fi.download()
+        tmpfile = fi.download()
    except Exception:
        print("Cannot create preview for Entity with ID={}, because download"
              "failed.".format(entity_id))
-        sys.exit(3)
+        return
    try:
-        df = read_file(fipath)
+        df = read_file(tmpfile, ending)
    except ValueError:
        print("Cannot read File Entity with ID={}.".format(entity_id))
-        sys.exit(4)
+        return
    print(df.to_html(max_cols=10, max_rows=10))

--- a/scripting/bin/test_pandas_table_preview.py
+++ b/scripting/bin/test_pandas_table_preview.py
@@ -60,9 +60,11 @@ class PreviewTest(unittest.TestCase):
        files = ["test.csv", "test.tsv", "test.xls", "test.xlsx"]
        for fi in files:
-            assert fi.split(".")[1]+"file" in read_file(fi)
+            assert fi.split(".")[1]+"file" in read_file(
+                fi, ftype="."+fi.split(".")[1])
        badfiles = ["bad.csv", "bad.tsv", "bad.xls", "bad.xlsx"]
        for bfi in badfiles:
-            self.assertRaises(ValueError, read_file, bfi)
+            self.assertRaises(ValueError, read_file,
+                              bfi, "."+bfi.split(".")[1])