Skip to content
Snippets Groups Projects
Select Git revision
  • cd46e4d5e70bb2e8b2b1c14fb6ec6b45651b8c84
  • main default protected
  • f-prefill
  • dev
  • f-docs-pylib
  • f-parse-value
  • f-compare
  • f-string-ids
  • f-217-set-special-property
  • f-filesystem-import
  • f-filesystem-link
  • f-filesystem-directory
  • f-filesystem-core
  • f-filesystem-cleanup
  • f-check-merge-entities
  • f-compare-enid
  • f-select-subproperties
  • v0.18.0
  • v0.17.0
  • v0.16.0
  • v0.15.1
  • v0.15.0
  • v0.14.0
  • v0.13.2
  • v0.13.1
  • v0.13.0
  • linkahead-rename-step-2
  • linkahead-rename-step-1
  • v0.12.0
  • v0.11.2
  • v0.11.1
  • v0.11.0
  • v0.10.0
  • v0.9.0
  • v0.8.0
  • v0.7.4
  • v0.7.3
37 results

errors.rst

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    pandas_table_preview.py 3.99 KiB
    #!/usr/bin/env python3
    # encoding: utf-8
    #
    # ** header v3.0
    # This file is a part of the CaosDB Project.
    #
    # Copyright (C) 2020 IndiScale GmbH <info@indiscale.com>
    # Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com>
    #
    # This program is free software: you can redistribute it and/or modify
    # it under the terms of the GNU Affero General Public License as
    # published by the Free Software Foundation, either version 3 of the
    # License, or (at your option) any later version.
    #
    # This program is distributed in the hope that it will be useful,
    # but WITHOUT ANY WARRANTY; without even the implied warranty of
    # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    # GNU Affero General Public License for more details.
    #
    # You should have received a copy of the GNU Affero General Public License
    # along with this program. If not, see <https://www.gnu.org/licenses/>.
    #
    # ** end header
    #
    
    """
    This script tries to read typical table data files (.csv etc.) with pandas and
    creates a html (partial) representation of the table.
    """
    
    import logging
    import os
    import sys
    from datetime import datetime
    
    import caosdb as db
    import pandas as pd
    from caosadvancedtools.serverside.helper import get_argument_parser
    from caosadvancedtools.serverside.logging import configure_server_side_logging
    
    MAXIMUMFILESIZE = 1e8
    VALID_ENDINGS = [".csv", ".tsv", ".xls", ".xlsx"]
    
    
    def get_file(eid):
        """ retrieves the file entity from caosdb """
        try:
            fi = db.File(id=eid)
            fi.retrieve()
        except db.exceptions.EntityDoesNotExistError:
            print("Cannot create preview for Entity with ID={}, because it seems"
                  "not to exist.".format(eid), file=sys.stderr)
            sys.exit(1)
    
        return fi
    
    
    def size_is_ok(fi):
        """ show previews only for files that are not too large """
    
        return fi.size <= MAXIMUMFILESIZE
    
    
    def get_ending(fipath):
        """ return which of the valid endings (tsv etc.) is the one present"""
    
        for end in VALID_ENDINGS:
            if fipath.lower().endswith(end):
                return end
    
        return None
    
    
    def ending_is_valid(fipath):
        """ return whether the ending indicates a file type that can be treated"""
    
        return get_ending(fipath) is not None
    
    
    def read_file(fipath, ftype):
        """ tries to read the provided file """
    
        try:
            if ftype in [".xls", ".xlsx"]:
                df = pd.read_excel(fipath)
            elif ftype == ".tsv":
                df = pd.read_csv(fipath, sep="\t", comment="#")
            elif ftype == ".csv":
                df = pd.read_csv(fipath, comment="#")
            else:
                print("File type unknown: {}".format(ftype))
                raise RuntimeError("")
        except Exception:
            raise ValueError()
    
        return df
    
    
    def create_table_preview(fi):
        if not ending_is_valid(fi.path):
            print("Cannot create preview for Entity with ID={}, because download"
                  "failed.".format(entity_id), file=sys.stderr)
            sys.exit(5)
    
        ending = get_ending(fi.path)
    
        if not size_is_ok(fi):
            print("Skipped creating a preview for Entity with ID={}, because the"
                  "file is large!".format(entity_id), file=sys.stderr)
            sys.exit(2)
    
        try:
            tmpfile = fi.download()
        except Exception:
            print("Cannot create preview for Entity with ID={}, because download"
                  "failed.".format(entity_id), file=sys.stderr)
    
            sys.exit(3)
    
        try:
            df = read_file(tmpfile, ending)
        except ValueError:
            print("Cannot read File Entity with ID={}.".format(entity_id),
                  file=sys.stderr)
            sys.exit(4)
    
        print(df.to_html(max_cols=10, max_rows=10))
    
    
    if __name__ == "__main__":
        conlogger = logging.getLogger("connection")
        conlogger.setLevel(level=logging.ERROR)
    
        parser = get_argument_parser()
        args = parser.parse_args()
    
        debug_file = configure_server_side_logging()
        logger = logging.getLogger("caosadvancedtools")
    
        db.configure_connection(auth_token=args.auth_token)
        entity_id = args.filename
    
        fi = get_file(entity_id)
    
        create_table_preview(fi)