diff --git a/CHANGELOG.md b/CHANGELOG.md index e9185233190dbfe2a9bf3b052af12af9f4cd9116..92180a7d809b1367baf65132942287d099427572 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,30 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.11.0] 2024-07-09 ## +## [0.12.0] - 2024-07-31 ## + +### Added ### + +- Possibility to configure custom CSS to be used in static HTML sites created + by the crawler + +### Changed ### + +- The static HTML pages that are generated by the crawler now require the + webinterface to be built with `JS_DIST_BUNDLE=TRUE`, i.e. all js source files + bundled into one `webcaosdb.dist.js`. This is the default and highly + recommended for production systems in any case but it will break the display + of these static files in certain corner cases where you may want to use + `JS_DIST_BUNDLE=FALSE` for development or debugging reasons. + +### Fixed ### + +- Rendering of entities in static HTML sites created by the crawler. +- [#63](https://gitlab.com/linkahead/linkahead-advanced-user-tools/-/issues/63) + The `TableImporter` now gives a detailed output in what row/column + which TypeError occurred. + +## [0.11.0] - 2024-07-09 ## ### Added ### diff --git a/CITATION.cff b/CITATION.cff index cb8d326b2b21a51398e21c7945cf1aa0a626999f..c46285346bf68c52232e775a4fa7d57538748987 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -20,6 +20,6 @@ authors: given-names: Stefan orcid: https://orcid.org/0000-0001-7214-8125 title: CaosDB - Advanced User Tools -version: 0.11.0 +version: 0.12.0 doi: 10.3390/data4020083 -date-released: 2024-07-09 \ No newline at end of file +date-released: 2024-07-31 \ No newline at end of file diff --git a/setup.py b/setup.py index acc3a8de96acf27a553a696175d99c708ac85dd9..03c515e6f65f93c9f4a2e685bf2541a8ff4bb66e 100755 --- a/setup.py +++ b/setup.py @@ -46,7 +46,7 @@ from setuptools import find_packages, setup ######################################################################## MAJOR = 0 -MINOR = 11 +MINOR = 12 MICRO = 0 PRE = "" # e.g. rc0, alpha.1, 0.beta-23 ISRELEASED = True diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 724004479e1e909057764f74e7d459a9aac72dc1..fc3b260b4ae26c608b40f26e4b80192fdbd0e00d 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -518,6 +518,12 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3])) """ from xml.sax.saxutils import escape + caosdb_config = db.configuration.get_config() + if ("advancedtools" in caosdb_config and "crawler.customcssfile" in + caosdb_config["advancedtools"]): + cssfile = caosdb_config["advancedtools"]["crawler.customcssfile"] + else: + cssfile = None # TODO move path related stuff to sss_helper form = """ <html> @@ -526,21 +532,17 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3])) <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1"> <title>Crawler</title> - <link rel="stylesheet" href="{url}/webinterface/css/webcaosdb.css"/> <link rel="stylesheet" href="{url}/webinterface/css/bootstrap.css"> - <script src="{url}/webinterface/js/jquery.js"></script> - <script src="{url}/webinterface/js/utif.js"></script> - <script src="{url}/webinterface/js/loglevel.js"></script> - <script src="{url}/webinterface/js/bootstrap.js"></script> - <script src="{url}/webinterface/js/ext_table_preview.js"></script> - <script src="{url}/webinterface/js/webcaosdb.js"></script> - <script src="{url}/webinterface/js/plotly.js"></script> - <script src="{url}/webinterface/js/caosdb.js"></script> - <script src="{url}/webinterface/js/state-machine.js"></script> - <script src="{url}/webinterface/js/showdown.js"></script> - <script src="{url}/webinterface/js/preview.js"></script> - <script src="{url}/webinterface/js/ext_references.js"></script> - <script src="{url}/webinterface/js/ext_bottom_line.js"></script> + <link rel="stylesheet" href="{url}/webinterface/css/webcaosdb.css"/> + <link rel="stylesheet" href="{url}/webinterface/css/linkahead.css"/> + <link rel="stylesheet" href="{url}/webinterface/css/dropzone.css"> + <link rel="stylesheet" href="{url}/webinterface/css/tour.css"> + <link rel="stylesheet" href="{url}/webinterface/css/leaflet.css"> + <link rel="stylesheet" href="{url}/webinterface/css/leaflet-coordinates.css"> + <link rel="stylesheet" href="{url}/webinterface/css/bootstrap-select.css"> + <link rel="stylesheet" href="{url}/webinterface/css/bootstrap-icons.css"> + {customcssfile} + <script src="{url}/webinterface/webcaosdb.dist.js"></script> </head> <body> <form method="post" action="{url}/scripting"> @@ -582,9 +584,11 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3])) </script> </body> </html> -""".format(url=db.configuration.get_config()["Connection"]["url"], +""".format(url=caosdb_config["Connection"]["url"], rid=run_id, changes=escape("\n".join(changes)), + customcssfile='<link rel="stylesheet" href="{url}/webinterface/css/{customcssfile}"/>'.format( + url=caosdb_config["Connection"]["url"], customcssfile=cssfile) if cssfile else "", path=path) if "SHARED_DIR" in os.environ: diff --git a/src/caosadvancedtools/table_importer.py b/src/caosadvancedtools/table_importer.py index 5efd0500a4c5a797a27a92caf0cd2a49165fddd2..cd1b206f7ebbe7730692a3a6a7137e4aa467a5eb 100755 --- a/src/caosadvancedtools/table_importer.py +++ b/src/caosadvancedtools/table_importer.py @@ -31,7 +31,7 @@ import logging import pathlib from datetime import datetime -import caosdb as db +import linkahead as db import numpy as np import pandas as pd from xlrd import XLRDError @@ -537,6 +537,46 @@ class CSVImporter(TableImporter): extra={'identifier': str(filename), 'category': "inconsistency"}) raise DataInconsistencyError(*ve.args) + except TypeError as te: + # Iterate through the columns and rows to identify + # problematic cells with wrong types. + df = pd.read_csv(filename, sep=sep, + converters=applicable_converters, dtype=None, + **kwargs) + error_dict = {} + columns_with_errors = [] + for key, dtype in self.datatypes.items(): + if key not in df.columns: + continue + try: + df[key].astype(dtype) + except (TypeError, ValueError): + columns_with_errors.append(key) + if not columns_with_errors: + # We may have run into any other TypeError not caused + # by wrong datatypes within the table. + raise te + for ii, row in df.iterrows(): + for name in columns_with_errors: + try: + # we need to check with astype to provoke the + # same errors, but that only works on + # Dataframes, so cast value to list to + # DataFrame. + pd.DataFrame([row[name]]).astype(self.datatypes[name]) + except (TypeError, ValueError): + if ii not in error_dict: + error_dict[ii] = [] + error_dict[ii].append( + (name, str(self.datatypes[name]).strip("<>"), str(type(row[name])).strip("<>")) + ) + msg = "Elements with wrong datatypes encountered:\n" + for ii, error_list in error_dict.items(): + msg += f"* row {ii}:\n" + for err in error_list: + msg += f" * column \"{err[0]}\": Expected \"{err[1]}\" but found \"{err[2]}\".\n" + msg += '\n' + raise DataInconsistencyError(msg) df = self.check_dataframe(df, filename) diff --git a/src/doc/conf.py b/src/doc/conf.py index e3a95f36ce69146adbc3b91c9f28a3169286d63d..0fc4ff0811cf4b845e5978b553e7760ac3000637 100644 --- a/src/doc/conf.py +++ b/src/doc/conf.py @@ -27,9 +27,9 @@ copyright = '2023, IndiScale GmbH' author = 'Daniel Hornung' # The short X.Y version -version = '0.11.0' +version = '0.12.0' # The full version, including alpha/beta/rc tags -release = '0.11.0' +release = '0.12.0' # -- General configuration --------------------------------------------------- diff --git a/unittests/test_table_importer.py b/unittests/test_table_importer.py index 6d445056b240e5ede6c52cb055cdde86cfb6d3d7..0abc28bba17dfbcf8f0ce59a15e51ace68db9167 100644 --- a/unittests/test_table_importer.py +++ b/unittests/test_table_importer.py @@ -379,6 +379,50 @@ class CSVImporterTest(TableImporterTest): assert df["int_with_gaps"].dtype == "Int64" assert df["float"].dtype == float + def test_wrong_datatype_type_errors(self): + """Test for + https://gitlab.com/linkahead/linkahead-advanced-user-tools/-/issues/63: + Highlight rows and columns in which type errors occur. + + """ + tmpfile = NamedTemporaryFile(delete=False, suffix=".csv") + with open(tmpfile.name, 'w') as tmp: + # Wrong types in row 2, columns 1 and 2, and row 4, column 2. + tmp.write( + "int,float\n" + "1,2.3\n" + "4.5,word\n" + "0,1.2\n" + "-12,12+3j\n" + ) + kwargs = { + "datatypes": { + "int": int, + "float": float, + "not-in-table": str # An unused datatype definition must not cause problems. + }, + "obligatory_columns": ["int"], + "converters": {} + } + importer = CSVImporter(**kwargs) + with pytest.raises(DataInconsistencyError) as die: + df = importer.read_file(tmpfile.name) + msg = str(die.value) + print("\n" + msg) + assert "Elements with wrong datatypes encountered:\n" in msg + # Errors in rows 1 and 3, no errors in 2 and 4 + assert "* row 1:\n" in msg + assert "* row 2:\n" not in msg + assert "* row 3:\n" in msg + assert "* row 4:\n" not in msg + row_1_msgs, row_3_msgs = msg.split("* row 1:\n")[1].split("* row 3:\n") + # exactly 2 errors in row 1, exactly 1 in row 3 + assert len(row_1_msgs.strip().split('\n')) == 2 + assert len(row_3_msgs.strip().split('\n')) == 1 + assert " * column \"int\"" in row_1_msgs + assert " * column \"float\"" in row_1_msgs + assert " * column \"float\"" in row_3_msgs + class TSVImporterTest(TableImporterTest): def test_full(self):