From cb517cc3238ed8aef8549a20e9eb8b75abc714c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Mon, 23 Nov 2020 11:12:08 +0000 Subject: [PATCH] Download multiple files in zip --- CHANGELOG.md | 1 + install-sss.sh | 4 + src/core/js/ext_bookmarks.js | 15 +- src/core/js/ext_file_download.js | 186 ++++++++++++++++++ src/core/js/ext_xls_download.js | 35 ++-- src/core/xsl/main.xsl | 5 + src/core/xsl/query.xsl | 19 +- .../ext_file_download/zip_files.py | 112 +++++++++++ test/core/index.html | 2 + test/core/js/modules/ext_file_download.js.js | 63 ++++++ test/core/js/modules/ext_xls_download.js.js | 11 +- 11 files changed, 418 insertions(+), 35 deletions(-) create mode 100644 src/core/js/ext_file_download.js create mode 100755 src/server_side_scripting/ext_file_download/zip_files.py create mode 100644 test/core/js/modules/ext_file_download.js.js diff --git a/CHANGELOG.md b/CHANGELOG.md index f054670a..8fc5dca7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 button (edit_mode). * Plotly preview has an additional parameter for a config object, e.g., for disabling the plotly logo +- After a SELECT statement now also all referenced files can be downloaded. ### Changed (for changes in existing functionality) - enabled and enhanced autocompletion diff --git a/install-sss.sh b/install-sss.sh index 3a934614..bb2db576 100755 --- a/install-sss.sh +++ b/install-sss.sh @@ -11,3 +11,7 @@ if [ "${BUILD_MODULE_EXT_BOTTOM_LINE_TABLE_PREVIEW}" == "ENABLED" ]; then cp $SRC_DIR/ext_table_preview/*.py $INSTALL_DIR/ext_table_preview/ echo "installed all server-side scripts for ext_table_preview" fi +# ext_file_download; should always be installed - No build variable +mkdir -p $INSTALL_DIR/ext_file_download +cp $SRC_DIR/ext_file_download/*.py $INSTALL_DIR/ext_file_download/ +echo "installed all server-side scripts for ext_file_download" diff --git a/src/core/js/ext_bookmarks.js b/src/core/js/ext_bookmarks.js index 9f6a1611..07de3014 100644 --- a/src/core/js/ext_bookmarks.js +++ b/src/core/js/ext_bookmarks.js @@ -193,16 +193,12 @@ var ext_bookmarks = function ($, logger, config) { return get_collection_prefix() + '_it_' + val; } - const TAB = "%09"; - const NEWLINE = "%0A"; - const preamble = "data:text/csv;charset=utf-8,"; /** * These will be the columns in the TSV file. For each column there should * exist a data_getter. */ const tsv_columns = config["tsv_columns"] || ["URI"]; - const header = tsv_columns.join(TAB) + NEWLINE; /** * Generate a single TSV row @@ -223,12 +219,17 @@ var ext_bookmarks = function ($, logger, config) { * * @param {string[]} bookmarks - array of ids. */ - const get_export_table = async function (bookmarks) { + const get_export_table = async function (bookmarks, preamble, tab, newline) { + // TODO merge with related code in the module "caosdb_table_export". + preamble = ((typeof preamble == 'undefined') ? "data:text/csv;charset=utf-8,": preamble); + tab = tab || "%09"; + newline = newline || "%0A"; + const header = tsv_columns.join(tab) + newline; const rows = []; for (let i = 0; i < bookmarks.length; i++) { - rows.push((await get_export_table_row(bookmarks[i])).join(TAB)); + rows.push((await get_export_table_row(bookmarks[i])).join(tab)); } - return `${preamble}${header}${rows.join(NEWLINE)}`; + return `${preamble}${header}${rows.join(newline)}`; } /** diff --git a/src/core/js/ext_file_download.js b/src/core/js/ext_file_download.js new file mode 100644 index 00000000..477349b4 --- /dev/null +++ b/src/core/js/ext_file_download.js @@ -0,0 +1,186 @@ +/* + * ** header v3.0 + * This file is a part of the CaosDB Project. + * + * Copyright (C) 2020 IndiScale GmbH <info@indiscale.com> + * Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + * + * ** end header + */ + +'use strict'; + +/** + * The ext_file_download module provides a very basic preview for table files. + * + * The preview is generated using a server side script. + * + * @module ext_file_download + * @version 0.1 + * + * @requires jQuery + * @requires log + */ +var ext_file_download = function ($, logger) { + + /** + * collect the file ids that will be passed to the zip script + * + * @return {string[]} array of entity ids + */ + const collect_ids = function(){ + var properties = $(".caosdb-f-property-value").find( + ".caosdb-id.caosdb-f-property-single-raw-value"); + var id_list = properties.toArray().map(x=>x.textContent); + var entities = $("tr[data-entity-id]") + id_list = id_list.concat(entities.toArray().map( + x=>x.attributes['data-entity-id'].value)); + return id_list + }; + + /** + * chunk list to smaller pieces. + * + * @param {string[]} list - array of (usually) ids + * @param {Number} size - chunk size (use integer) + * @return {string[][]} array of array of string from the original list. + */ + const chunk_list = function(list, size){ + size = size || 20; + var pieces = []; + var index = 0; + while (index < list.length){ + pieces.push(list.slice(index,index+size)); + index+=size; + } + + return pieces + }; + + + /** + * create select statement to find files. + * + * @param {string[]} id_list - array of ids + * @return {string} a query string + */ + const query_files_str = function(id_list){ + var povs = id_list.map(x=>` id=${x} `); + const query_str="SELECT ID FROM FILE WITH " + povs.join("or"); + return query_str; + }; + + /** + * Reduce id list to files. Throw away all ids which do not belong to a + * file entity. + * + * @param {string[]} id_list - array of ids + * @return {string[]} array of file ids. + */ + const reduce_ids = async function(id_list){ + var file_ids = [] + for (var part of chunk_list(id_list)) { + // query for files + var result = await query(query_files_str(part)); + file_ids=file_ids.concat(result.map(x => getEntityID(x))); + } + return file_ids; + }; + + + /** + * Callback function for the download files link. + * + * Collects all the file entities and sends them to a server-side script + * which then puts the files into a zip. + * + * @param {HTMLElement} zip_link - the link element which triggered this + * call and which will be disabled during the execution of this + * function in order to prevent the user from triggering the process + * twice. + */ + const download_files = async function (zip_link) { + const onClickValue = zip_link.getAttribute("onClick"); + try { + // remove click event handler which called this function in the first place + zip_link.removeAttribute("onClick"); + + // add loading info. TODO make an animated one + $("#downloadModal").find(".caosdb-f-modal-footer-left").append( + createWaitingNotification("Collecting files...") + ); + + var ids = collect_ids(); + ids = await reduce_ids(ids); + if (ids.length == 0){ + alert ("There are no file entities in this table."); + return; + } + + var table = await ext_bookmarks.get_export_table(ids, "", "\t", "\n"); + + const result = await connection.runScript( + "ext_file_download/zip_files.py", + { + "-p0": ids, + "-p1": table, + } + ); + const code = result.getElementsByTagName("script")[0].getAttribute("code"); + if (parseInt(code) > 0) { + throw ("An error occurred during execution of the server-side script:\n" + + result.getElementsByTagName("script")[0].outerHTML); + } + const filename = result.getElementsByTagName("stdout")[0].textContent; + if (filename.length == 0) { + throw("Server-side script produced no file or did not return the file name: \n" + + result.getElementsByTagName("script")[0].outerHTML); + } + + + // trigger download of generated file + caosdb_table_export.go_to_script_results(filename); + + //close modal + $("#downloadModal").find(".modal-footer").find(".btn")[0].click(); + } catch (e) { + globalError(e); + } finally { + removeAllWaitingNotifications($("#downloadModal")[0]); + // restore the old click handler - hence a new file is generated with each click. + zip_link.setAttribute("onClick", onClickValue); + } + + }; + + const init = function () { + // only enable when init is being called + logger.info("init ext_file_download"); + if (userIsAnonymous()) { + $("#caosdb-f-query-select-files").parent().hide(); + } + }; + + return { + init: init, + download_files: download_files, + collect_ids: collect_ids, + chunk_list: chunk_list, + }; + +}($, log.getLogger("ext_file_download")); + +// This module is registered by caosdb_table_export. diff --git a/src/core/js/ext_xls_download.js b/src/core/js/ext_xls_download.js index 0f607a45..bd11fe17 100644 --- a/src/core/js/ext_xls_download.js +++ b/src/core/js/ext_xls_download.js @@ -44,11 +44,7 @@ var caosdb_table_export = new function () { */ this.init = function() { logger.info("init caosdb_table_export"); - // TODO with AMD, use userIsAnonymous() - if (Array.from( - document.getElementsByClassName("caosdb-user-role")).map( - el => el.innerText - ).filter(el => el == "anonymous").length > 0) { + if (userIsAnonymous()) { $(".caosdb-v-query-select-data-xsl").parent().hide(); } } @@ -232,11 +228,8 @@ var caosdb_table_export = new function () { * Open the resulting xls file by setting href to the location of the resulting * file in the server's `Shared` resource and imitate a click. */ - this._go_to_script_results = function (xls_link, filename) { - xls_link.setAttribute( - "href", - location.protocol + "//" +location.host + "/Shared/" + filename); - xls_link.click(); + this.go_to_script_results = function (filename) { + window.location.href = connection.getBasePath() + "Shared/" + filename; } @@ -279,13 +272,17 @@ function downloadTSV(tsv_link) { * resulting file. */ async function downloadXLS(xls_link) { - const csv_string = caosdb_table_export._get_csv_string(); - - // remove click event handler which called this function in the first place const onClickValue = xls_link.getAttribute("onClick"); - xls_link.removeAttribute("onClick"); - try { + // remove click event handler which called this function in the first place + xls_link.removeAttribute("onClick"); + + // add loading info. TODO make an animated one + $("#downloadModal").find(".caosdb-f-modal-footer-left").append( + createWaitingNotification("Exporting table...") + ); + + const csv_string = caosdb_table_export._get_csv_string(); const xls_result = await connection.runScript("xls_from_csv.py", {"-p0": {"filename": "selected.tsv", "blob": new Blob([csv_string], {type: "text/tab-separated-values;charset=utf-8"})}}); const code = xls_result.getElementsByTagName("script")[0].getAttribute("code"); @@ -299,11 +296,14 @@ async function downloadXLS(xls_link) { + xls_result.getElementsByTagName("script")[0].outerHTML); } - // set the href in order to download the file and simulate a click. - caosdb_table_export._go_to_script_results(xls_link, filename); + // trigger download of generated file + caosdb_table_export.go_to_script_results(filename); + + } catch (e) { globalError(e); } finally { + removeAllWaitingNotifications($("#downloadModal")[0]); // restore the old click handler - hence a new file is generated with each click. xls_link.setAttribute("onClick", onClickValue); } @@ -314,4 +314,5 @@ async function downloadXLS(xls_link) { $(document).ready(function () { caosdb_modules.register(caosdb_table_export); + caosdb_modules.register(ext_file_download); }); diff --git a/src/core/xsl/main.xsl b/src/core/xsl/main.xsl index f33bbcb8..e414f9af 100644 --- a/src/core/xsl/main.xsl +++ b/src/core/xsl/main.xsl @@ -210,6 +210,11 @@ <xsl:value-of select="concat($basepath,'webinterface/${BUILD_NUMBER}/js/edit_mode.js')"/> </xsl:attribute> </xsl:element> + <xsl:element name="script"> + <xsl:attribute name="src"> + <xsl:value-of select="concat($basepath,'webinterface/${BUILD_NUMBER}/js/ext_file_download.js')"/> + </xsl:attribute> + </xsl:element> <xsl:element name="script"> <xsl:attribute name="src"> <xsl:value-of select="concat($basepath,'webinterface/${BUILD_NUMBER}/js/leaflet.js')"/> diff --git a/src/core/xsl/query.xsl b/src/core/xsl/query.xsl index be49ed7d..ca1884ae 100644 --- a/src/core/xsl/query.xsl +++ b/src/core/xsl/query.xsl @@ -89,7 +89,7 @@ </div> <div class="col-xs-6 text-right"> <!-- Trigger the modal with a button --> - <button class="btn btn-info btn-sm" data-target="#downloadModal" data-toggle="modal" type="button">Download this table</button> + <button class="btn btn-info btn-sm caosdb-v-btn-select" data-target="#downloadModal" data-toggle="modal" type="button">Export</button> <!-- Modal --> <div class="modal fade text-left" id="downloadModal" role="dialog"> <div class="modal-dialog"> @@ -102,16 +102,21 @@ <div class="modal-body"> <p> <a id="caosdb-f-query-select-data-tsv" onclick="downloadTSV(this)" href="#selected_data.tsv" download="selected_data.tsv"> - Download TSV File + Download table as TSV File </a> <span class="checkbox" style="margin-top: 0; display: inline; position: absolute; right: 10px"><label><input type="checkbox" name="raw" id="caosdb-table-export-raw-flag-tsv" title="Export raw entity ids instead of the visible page content."/>raw</label></span> </p> <p> <a class="caosdb-v-query-select-data-xsl" onclick="downloadXLS(this)" href="#selected_data.xsl" download=""> - Download XLS File + Download table as XLS File </a> <span class="checkbox" style="margin-top: 0; display: inline; position: absolute; right: 10px"><label><input type="checkbox" name="raw" id="caosdb-table-export-raw-flag-xls" title="Export raw entity ids instead of the visible page content."/>raw</label></span> </p> + <p> + <a id="caosdb-f-query-select-files" onclick="ext_file_download.download_files(this)" href="#selected_data.tsv" download="files.zip" title="Collects file entities listed in the table in a zip file. If the entity belonging to a row is a file entity, it will be included."> + Download files referenced in the table + </a> + </p> <hr/> <p> <small>Download this dataset in Python with:</small> @@ -123,7 +128,13 @@ </p> </div> <div class="modal-footer"> - <button class="btn btn-default" data-dismiss="modal" type="button">Close</button> + <div class="row" style="margin:0px"> + <div class="col-xs-6 caosdb-f-modal-footer-left"> + </div> + <div class="col-xs-6"> + <button class="btn btn-default" data-dismiss="modal" type="button">Close</button> + </div> + </div> </div> </div> </div> diff --git a/src/server_side_scripting/ext_file_download/zip_files.py b/src/server_side_scripting/ext_file_download/zip_files.py new file mode 100755 index 00000000..65f27c9d --- /dev/null +++ b/src/server_side_scripting/ext_file_download/zip_files.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2019 IndiScale GmbH +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header + +"""Creates a zip file from multiple file entities. """ + +import argparse +import datetime +import io +import logging +import os +import sys +from tempfile import NamedTemporaryFile +from zipfile import ZipFile + +import caosdb as db +import pandas as pd +from caosadvancedtools.serverside import helper +from caosdb import CaosDBException, ConsistencyError, EntityDoesNotExistError + + +def _parse_arguments(): + """Parses the command line arguments. + + Takes into account defaults from the environment (where known). + """ + parser = argparse.ArgumentParser(description='__doc__') + parser.add_argument('-a', '--auth-token', required=False, + help=("An authentication token. If not provided caosdb" + " pylib will search for other methods of " + "authentication if necessary.")) + parser.add_argument('ids', help="list of entity ids.") + parser.add_argument('table', help="tsv table to be saved (as string).") + + return parser.parse_args() + + +def collect_files_in_zip(ids, table): + # File output + now = datetime.datetime.now() + zip_name = "files.{time}.zip".format( + time=now.strftime("%Y-%m-%dT%H_%M_%S")) + zip_display_path, zip_internal_path = helper.get_shared_filename(zip_name) + with ZipFile(zip_internal_path, 'w') as zf: + nc = helper.NameCollector() + + # add the table which has been genereated by the webui table exporter + with NamedTemporaryFile(delete=False) as table_file: + # the file has been transmitted as string and has to be written to + # a file first. + table_file.write(table.encode()) + zf.write(table_file.name, "selected_table.tsv") + + # download and add all files + for file_id in ids: + try: + tmp = db.execute_query("FIND {a:} WITH ID={a:}".format( + a=file_id), + unique=True) + except EntityDoesNotExistError as e: + # TODO + # Current behavior: script terminates with error if just one + # file cannot be retrieved. + # Desired behavior: The script should go on with the other + # ids, but the user should be informed about the missing files. + # How should we do this? + logger = logging.getLogger("caosadvancedtools") + logger.error("Did not find Entity with ID={}.".format( + file_id)) + + raise e + savename = nc.get_unique_savename(os.path.basename(tmp.path)) + val_file = helper.get_file_via_download( + tmp, logger=logging.getLogger("caosadvancedtools")) + + zf.write(val_file, savename) + + return zip_display_path + + +def main(): + args = _parse_arguments() + + if hasattr(args, "auth_token") and args.auth_token: + db.configure_connection(auth_token=args.auth_token) + + id_list = [int(el) for el in args.ids.split(",")] + + zip_file = collect_files_in_zip(id_list, args.table) + + print(zip_file) + + +if __name__ == "__main__": + main() diff --git a/test/core/index.html b/test/core/index.html index 8488b01a..ea7b63b9 100644 --- a/test/core/index.html +++ b/test/core/index.html @@ -56,6 +56,7 @@ <script src="js/edit_mode.js"></script> <script src="js/query_shortcuts.js"></script> <script src="js/ext_references.js"></script> + <script src="js/ext_file_download.js"></script> <script src="js/ext_xls_download.js"></script> <script src="js/form_elements.js"></script> <script src="js/tour.js"></script> @@ -84,6 +85,7 @@ <script src="js/modules/navbar.xsl.js"></script> <script src="js/modules/edit_mode.js.js"></script> <script src="js/modules/ext_xls_download.js.js"></script> + <script src="js/modules/ext_file_download.js.js"></script> <script src="js/modules/query_shortcuts.js.js"></script> <script src="js/modules/form_elements.js.js"></script> <script src="js/modules/ext_references.js.js"></script> diff --git a/test/core/js/modules/ext_file_download.js.js b/test/core/js/modules/ext_file_download.js.js new file mode 100644 index 00000000..0c32e1e8 --- /dev/null +++ b/test/core/js/modules/ext_file_download.js.js @@ -0,0 +1,63 @@ +/* + * ** header v3.0 + * This file is a part of the CaosDB Project. + * + * Copyright (C) 2020 IndiScale GmbH <info@indiscale.com> + * Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + * + * ** end header + */ + +'use strict'; + +QUnit.module("ext_file_download.js", { + before: function (assert) { + // setup before module + }, + beforeEach: function (assert) { + // setup before each test + }, + afterEach: function (assert) { + // teardown after each test + }, + after: function (assert) { + // teardown after module + } +}); + +QUnit.test("chunk_list ", function(assert) { + const li = [1,2,3,4,5,6,7]; + const res = ext_file_download.chunk_list(li, 3); + assert.equal(res.length, 3, "number of parts"); + assert.propEqual(res[2], [7], "number of parts"); +}); + +QUnit.test("collect_ids ", function (assert) { + const line = id => $(`<tr data-entity-id="${id}"/>`); + const prop_val = x => $(`<div class="caosdb-f-property-value"/>`); + const single_val =x => $(`<div class="caosdb-f-property-single-raw-value caosdb-id">${x}</div>`); + + + const line1 = line("34"); + line1.append([prop_val().append(single_val("5")),prop_val().append(single_val("6"))]) + $("body").append([line1]); + + const res = ext_file_download.collect_ids() + assert.ok(res.indexOf("5") > -1, "missing id"); + assert.ok(res.indexOf("6") > -1, "missing id"); + assert.ok(res.indexOf("34") > -1, "missing id"); + +}); diff --git a/test/core/js/modules/ext_xls_download.js.js b/test/core/js/modules/ext_xls_download.js.js index 4f9dc6c5..9a59e035 100644 --- a/test/core/js/modules/ext_xls_download.js.js +++ b/test/core/js/modules/ext_xls_download.js.js @@ -79,16 +79,14 @@ QUnit.module("ext_xls_download", { return str2xml('<response><script code="0" /><stdout>bla</stdout></response>'); } - caosdb_table_export._go_to_script_results = function(xls_link, filename) { - xls_link.setAttribute( - "href", - location.protocol + "//" +location.host + "/Shared/" + filename); + caosdb_table_export.go_to_script_results = function(filename) { assert.equal(filename, "bla", "filename correct"); done(); } var tsv_data = $('<a id="caosdb-f-query-select-data-tsv" />'); - $(document.body).append(tsv_data); + var modal = $('<div id="downloadModal"><div>'); + $(document.body).append([tsv_data, modal]); var xsl_link = $("<a/>"); @@ -96,9 +94,8 @@ QUnit.module("ext_xls_download", { await sleep(500); - assert.ok(xsl_link.attr("href").endsWith("Shared/bla"), xsl_link.attr("href") + " ends with Shared/bla"); - tsv_data.remove(); + modal.remove(); }); } -- GitLab