Select Git revision
pandas_table_preview.py
-
Henrik tom Wörden authoredHenrik tom Wörden authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
pandas_table_preview.py 3.99 KiB
#!/usr/bin/env python3
# encoding: utf-8
#
# ** header v3.0
# This file is a part of the CaosDB Project.
#
# Copyright (C) 2020 IndiScale GmbH <info@indiscale.com>
# Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# ** end header
#
"""
This script tries to read typical table data files (.csv etc.) with pandas and
creates a html (partial) representation of the table.
"""
import logging
import os
import sys
from datetime import datetime
import caosdb as db
import pandas as pd
from caosadvancedtools.serverside.helper import get_argument_parser
from caosadvancedtools.serverside.logging import configure_server_side_logging
MAXIMUMFILESIZE = 1e8
VALID_ENDINGS = [".csv", ".tsv", ".xls", ".xlsx"]
def get_file(eid):
""" retrieves the file entity from caosdb """
try:
fi = db.File(id=eid)
fi.retrieve()
except db.exceptions.EntityDoesNotExistError:
print("Cannot create preview for Entity with ID={}, because it seems"
"not to exist.".format(eid), file=sys.stderr)
sys.exit(1)
return fi
def size_is_ok(fi):
""" show previews only for files that are not too large """
return fi.size <= MAXIMUMFILESIZE
def get_ending(fipath):
""" return which of the valid endings (tsv etc.) is the one present"""
for end in VALID_ENDINGS:
if fipath.lower().endswith(end):
return end
return None
def ending_is_valid(fipath):
""" return whether the ending indicates a file type that can be treated"""
return get_ending(fipath) is not None
def read_file(fipath, ftype):
""" tries to read the provided file """
try:
if ftype in [".xls", ".xlsx"]:
df = pd.read_excel(fipath)
elif ftype == ".tsv":
df = pd.read_csv(fipath, sep="\t", comment="#")
elif ftype == ".csv":
df = pd.read_csv(fipath, comment="#")
else:
print("File type unknown: {}".format(ftype))
raise RuntimeError("")
except Exception:
raise ValueError()
return df
def create_table_preview(fi):
if not ending_is_valid(fi.path):
print("Cannot create preview for Entity with ID={}, because download"
"failed.".format(entity_id), file=sys.stderr)
sys.exit(5)
ending = get_ending(fi.path)
if not size_is_ok(fi):
print("Skipped creating a preview for Entity with ID={}, because the"
"file is large!".format(entity_id), file=sys.stderr)
sys.exit(2)
try:
tmpfile = fi.download()
except Exception:
print("Cannot create preview for Entity with ID={}, because download"
"failed.".format(entity_id), file=sys.stderr)
sys.exit(3)
try:
df = read_file(tmpfile, ending)
except ValueError:
print("Cannot read File Entity with ID={}.".format(entity_id),
file=sys.stderr)
sys.exit(4)
print(df.to_html(max_cols=10, max_rows=10))
if __name__ == "__main__":
conlogger = logging.getLogger("connection")
conlogger.setLevel(level=logging.ERROR)
parser = get_argument_parser()
args = parser.parse_args()
debug_file = configure_server_side_logging()
logger = logging.getLogger("caosadvancedtools")
db.configure_connection(auth_token=args.auth_token)
entity_id = args.filename
fi = get_file(entity_id)
create_table_preview(fi)