diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d64ed24de317804b67b5cbdc8c7990ef2a9f2bf..e5090e8922819f18776c577ba31fb4e05700e443 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `h5` instead of `h5-crawler` - `dev`, `doc`, `test` and `all` are new, they install the dependencies for developing, testing, documentation and everything. +- The `pandoc_header_tools.get_header()` parameter `add_header` has been renamed to `add_header_to_file` + to resolve a name collision. ### Deprecated ### diff --git a/Makefile b/Makefile index c53f7013c691efc4294bfc7391a0801b478bedd4..1ce56f5b7c88bcf22d731f815c2af2084bca353e 100644 --- a/Makefile +++ b/Makefile @@ -41,5 +41,5 @@ style: .PHONY: style lint: - pylint --unsafe-load-any-extension=y --fail-under=9.72 -d R,C --ignore=swagger_client src/caosadvancedtools + pylint --unsafe-load-any-extension=y -d R,C --ignore=swagger_client src/caosadvancedtools .PHONY: lint diff --git a/integrationtests/test_base_table_exporter_integration.py b/integrationtests/test_base_table_exporter_integration.py index 286c4ac3a6a6f11d5c308892615579ea60ea89db..0dbfc7e785be0949fb6c3c99a68225d0e5ea7c7a 100644 --- a/integrationtests/test_base_table_exporter_integration.py +++ b/integrationtests/test_base_table_exporter_integration.py @@ -82,7 +82,7 @@ def setup_module(): """Clear all test entities""" try: db.execute_query("FIND ENTITY Test*").delete() - except BaseException: + except Exception: pass diff --git a/integrationtests/test_cache.py b/integrationtests/test_cache.py index 13470b8b0d523d1a30d3cf6895efabae3b245784..7724cfb4febd3a605419129f1250e9d2dee5e764 100644 --- a/integrationtests/test_cache.py +++ b/integrationtests/test_cache.py @@ -67,7 +67,7 @@ class CacheTest(unittest.TestCase): print(db.execute_query("FIND entity with id="+str(rec.id), unique=True)) try: print(db.execute_query("FIND Record "+str(rec.id), unique=True)) - except BaseException: + except Exception: print("Query does not work as expected") update.insert(cont, run_id) assert len(update.get_updates(run_id)) == 1 diff --git a/integrationtests/test_crawler_basics.py b/integrationtests/test_crawler_basics.py index 04eb54599fa844e3b6b23295c809b4435bb465f4..67317f32981849f4786e3b6719297d6953dffd40 100644 --- a/integrationtests/test_crawler_basics.py +++ b/integrationtests/test_crawler_basics.py @@ -114,7 +114,7 @@ class CrawlerTest(unittest.TestCase): for el in [self.rec1, self.rec2, self.rec3]: try: el.delete() - except BaseException: + except Exception: pass diff --git a/pylintrc b/pylintrc index 1be7cd8dffb322170d0c793a44ae43f98744be5e..e95afc90a55d72129f1c89ece4b935f40296989b 100644 --- a/pylintrc +++ b/pylintrc @@ -19,4 +19,5 @@ init-hook= disable= fixme, logging-format-interpolation, + logging-fstring-interpolation, logging-not-lazy, diff --git a/src/caosadvancedtools/cache.py b/src/caosadvancedtools/cache.py index 4656439374353035618d64c23677ff7c174fbf84..5fc1ec3c22fbee0096a9167ea35e0bc59bfb0665 100644 --- a/src/caosadvancedtools/cache.py +++ b/src/caosadvancedtools/cache.py @@ -94,7 +94,6 @@ class AbstractCache(ABC): Increase this variable, when changes to the cache tables are made. """ - pass @abstractmethod def create_cache(self): @@ -102,14 +101,12 @@ class AbstractCache(ABC): Provide an overloaded function here that creates the cache in the most recent version. """ - pass @abstractmethod def get_default_file_name(self): """ Supply a default file name for the cache here. """ - pass def check_cache(self): """ @@ -192,9 +189,6 @@ class IdentifiableCache(AbstractCache): def get_default_file_name(self): return "caosdb_identifiable_cache.db" - def __init__(self, db_file=None, force_creation=False): - super().__init__(db_file, force_creation) - def create_cache(self): """ Create a new SQLITE cache file in self.db_file. diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py index e79f0373bf4fe8500d0acb4f3c17dff04404d607..0eb826321d9889c6d8fcc09f6208bb56b9d3066e 100644 --- a/src/caosadvancedtools/cfood.py +++ b/src/caosadvancedtools/cfood.py @@ -46,13 +46,17 @@ from abc import ABCMeta, abstractmethod from datetime import datetime import linkahead as db -from linkahead.common.models import Entity from linkahead.exceptions import (BadQueryError, EmptyUniqueQueryError, - QueryNotUniqueError, TransactionError) + QueryNotUniqueError) from .datamodel_problems import DataModelProblems from .guard import global_guard as guard +# The pylint warnings triggered in this file are ignored, as this code is +# assumed to be deprecated in the near future. Should this change, they need +# to be reevaluated. + + ENTITIES = {} PROPERTIES = {} RECORDS = {} @@ -184,7 +188,7 @@ class AbstractCFood(object, metaclass=ABCMeta): """ @classmethod - def match_item(cls, item): + def match_item(cls, item): # pylint: disable=unused-argument """ Matches an item found by the crawler against this class. Returns True if the item shall be treated by this class, i.e. if this class matches the item. @@ -209,7 +213,6 @@ class AbstractCFood(object, metaclass=ABCMeta): To be overwritten by subclasses """ - pass def attach(self, item): self.attached_items.append(item) @@ -217,7 +220,7 @@ class AbstractCFood(object, metaclass=ABCMeta): # TODO looking for should `attach` the files itsself. This would allow to # group them right away and makes it unnecessary to check matches later # again. - def looking_for(self, item): + def looking_for(self, item): # pylint: disable=unused-argument """ returns True if item can be added to this CFood. @@ -353,7 +356,7 @@ class AbstractFileCFood(AbstractCFood): raise NotImplementedError() @classmethod - def match_item(cls, path): + def match_item(cls, path): # pylint: disable=arguments-renamed """ Matches the regular expression of this class against file names Parameters @@ -367,7 +370,7 @@ class AbstractFileCFood(AbstractCFood): # TODO looking for should `attach` the files itsself. This would allow to # group them right away and makes it unnecessary to check matches later # again. - def looking_for(self, crawled_file): + def looking_for(self, crawled_file): # pylint: disable=arguments-renamed """ returns True if crawled_file can be added to this CFood. @@ -569,6 +572,7 @@ def assure_parents_are(entity, parents, to_be_updated=None, the new parents and the old ones are discarded. Note that parent matching occurs based on names. + If a parent does not have a name, a ValueError is raised. If the list to_be_updated is supplied, the entity is added to the list in order to indicate, that the entity `entity` should be updated. @@ -583,7 +587,7 @@ def assure_parents_are(entity, parents, to_be_updated=None, for i, e in enumerate(parents): if isinstance(e, db.Entity): if e.name is None: - raise Exception("Entity should have name") + raise ValueError("Entity should have name") else: parents[i] = db.Entity(name=e) @@ -690,7 +694,7 @@ def assure_has_property(entity, name, value, to_be_updated=None, try: compare_time = datetime.fromisoformat(el.value) - except ValueError: + except ValueError as e: # special case of wrong iso format # time zone tmp = el.value.split("+") @@ -708,7 +712,7 @@ def assure_has_property(entity, name, value, to_be_updated=None, ms = '.' + tmp[1] + '0'*(6-len(tmp[1])) else: raise ValueError( - "invalid millisecond format in {}".format(el.value)) + "invalid millisecond format in {}".format(el.value)) from e else: ms = "" tmp = tmp[0] + ms + tz_str @@ -746,7 +750,7 @@ def assure_has_property(entity, name, value, to_be_updated=None, def assure_property_is(entity, name, value, datatype=None, to_be_updated=None, - force=False): + force=False): # pylint: disable=unused-argument """ Checks whether `entity` has a Property `name` with the given value. diff --git a/src/caosadvancedtools/collect_datamodel.py b/src/caosadvancedtools/collect_datamodel.py index 1c37bab0007f0fa715a0e106c38dd18faf7015b8..bb69a03d4475c8af21f69fd5d9ffd3ac4adb1f2f 100644 --- a/src/caosadvancedtools/collect_datamodel.py +++ b/src/caosadvancedtools/collect_datamodel.py @@ -62,9 +62,9 @@ def store(directory, xml=False): rts, ps = get_dm() os.makedirs(directory, exist_ok=True) - with open(os.path.join(directory, "recordtypes.txt"), "w") as fi: + with open(os.path.join(directory, "recordtypes.txt"), "w", encoding="utf-8") as fi: fi.write(",".join([el[1] for el in rts])) - with open(os.path.join(directory, "properties.txt"), "w") as fi: + with open(os.path.join(directory, "properties.txt"), "w", encoding="utf-8") as fi: fi.write(",".join([el[1] for el in ps])) if xml: @@ -75,10 +75,10 @@ def store(directory, xml=False): def load_dm(directory): - with open(os.path.join(directory, "recordtypes.txt"), "r") as fi: + with open(os.path.join(directory, "recordtypes.txt"), "r", encoding="utf-8") as fi: text = fi.read() rts = [el.strip() for el in text.split(",")] - with open(os.path.join(directory, "properties.txt"), "r") as fi: + with open(os.path.join(directory, "properties.txt"), "r", encoding="utf-8") as fi: text = fi.read() ps = [el.strip() for el in text.split(",")] @@ -112,7 +112,7 @@ def compare(directory): print("{} is missing in the existing properties".format(p)) -if __name__ == "__main__": +def main(): p = get_parser() args = p.parse_args() @@ -121,3 +121,7 @@ if __name__ == "__main__": if args.compare: compare(args.compare) + + +if __name__ == "__main__": + main() diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 7a840624f7287b8292e27c743636df6bbead6d42..7b66440ffb0659fd76e012e812deae2b0055d55a 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -41,7 +41,6 @@ match. This occurs in basically three steps: import logging import os -import subprocess import traceback import uuid from datetime import datetime @@ -60,6 +59,11 @@ from .serverside.helper import send_mail as main_send_mail from .suppressKnown import SuppressKnown from .utils import create_entity_link +# The pylint warnings triggered in this file are ignored, as this code is +# assumed to be deprecated in the near future. Should this change, they need +# to be reevaluated. + + logger = logging.getLogger(__name__) @@ -67,7 +71,7 @@ def separated(text): return "-"*60 + "\n" + text -def apply_list_of_updates(to_be_updated, update_flags={}, +def apply_list_of_updates(to_be_updated, update_flags=None, update_cache=None, run_id=None): """Updates the `to_be_updated` Container, i.e., pushes the changes to LinkAhead after removing possible duplicates. If a chace is provided, uauthorized @@ -87,6 +91,8 @@ def apply_list_of_updates(to_be_updated, update_flags={}, Id with which the pending updates are cached. Only meaningful if `update_cache` is provided. Default is None. """ + if update_flags is None: + update_flags = {} if len(to_be_updated) == 0: return @@ -132,7 +138,7 @@ def apply_list_of_updates(to_be_updated, update_flags={}, ) logger.debug(traceback.format_exc()) logger.debug(e) - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught DataModelProblems.evaluate_exception(e) @@ -169,6 +175,7 @@ class Crawler(object): self.abort_on_exception = abort_on_exception self.update_cache = UpdateCache() self.filterKnown = SuppressKnown() + self.run_id = None advancedtoolslogger = logging.getLogger("caosadvancedtools") # TODO this seems to be a bad idea. What if the handler was not added @@ -220,7 +227,7 @@ class Crawler(object): new_cont = db.Container.from_xml(new) ids = [] tmp = db.Container() - update_incomplete = False + update_incomplete = False # pylint: disable=unused-variable # remove duplicate entities for el in new_cont: if el.id not in ids: @@ -229,13 +236,13 @@ class Crawler(object): else: update_incomplete = True new_cont = tmp - if new_cont[0].version: + if new_cont[0].version: # pylint: disable=no-member valids = db.Container() nonvalids = db.Container() for ent in new_cont: remote_ent = db.Entity(id=ent.id).retrieve() - if ent.version == remote_ent.version: + if ent.version == remote_ent.version: # pylint: disable=no-member valids.append(ent) else: update_incomplete = True @@ -317,10 +324,10 @@ class Crawler(object): logger.debug(e) # TODO: Generally: in which cases should exceptions be raised? When is # errors_occured set to True? The expected behavior must be documented. - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught try: DataModelProblems.evaluate_exception(e) - except BaseException: + except Exception: # pylint: disable=broad-exception-caught pass logger.debug("Failed during execution of {}!".format( Cfood.__name__)) @@ -349,13 +356,13 @@ class Crawler(object): logger.info("Cannot access {}. However, it might be needed for" " the correct execution".format(e.filename)) remove_cfoods.append(cfood) - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught try: DataModelProblems.evaluate_exception(e) - except BaseException: + except Exception: # pylint: disable=broad-exception-caught pass logger.debug("Failed during execution of {}!".format( - Cfood.__name__)) + cfood.__name__)) logger.debug(traceback.format_exc()) logger.debug(e) remove_cfoods.append(cfood) @@ -442,10 +449,10 @@ class Crawler(object): except DataInconsistencyError as e: logger.debug(traceback.format_exc()) logger.debug(e) - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught try: DataModelProblems.evaluate_exception(e) - except Exception: + except Exception: # pylint: disable=broad-exception-caught pass logger.info("Failed during execution of {}!".format( cfood.__class__.__name__)) @@ -488,8 +495,8 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3])) logger.error(err_msg) logger.error('Crawler finished with Datamodel Errors') elif errors_occured: - msg = "There were fatal errors during execution, please " - "contact the system administrator!" + msg = ("There were fatal errors during execution, please contact " + "the system administrator!") if self.debug_file: msg += "\nPlease provide the following path:\n{}".format( @@ -600,7 +607,7 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3])) randname = os.path.basename(os.path.abspath(directory)) filepath = os.path.abspath(os.path.join(directory, filename)) filename = os.path.join(randname, filename) - with open(filepath, "w") as f: + with open(filepath, "w", encoding="utf-8") as f: f.write(form) return filename @@ -680,7 +687,7 @@ carefully and if the changes are ok, click on the following link: guard.safe_insert(missing, unique=False, flags={"force-missing-obligatory": "ignore"}) inserted.append(ent) - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught DataModelProblems.evaluate_exception(e) if len(existing) > 0: info = "Identified the following existing entities:\n" diff --git a/src/caosadvancedtools/example_cfood.py b/src/caosadvancedtools/example_cfood.py index 45984998451cd41739b696ca8fac5141f71674de..43a558fdd1f1de134d5194dac23e902ca0070bab 100644 --- a/src/caosadvancedtools/example_cfood.py +++ b/src/caosadvancedtools/example_cfood.py @@ -31,6 +31,10 @@ class ExampleCFood(AbstractFileCFood): return (r".*/(?P<species>[^/]+)/" r"(?P<date>\d{4}-\d{2}-\d{2})/README.md") + def __init__(self, crawled_path, *args, **kwargs): + super().__init__(crawled_path, *args, **kwargs) + self.experiment = None + def create_identifiables(self): self.experiment = db.Record() self.experiment.add_parent(name="Experiment") diff --git a/src/caosadvancedtools/export_related.py b/src/caosadvancedtools/export_related.py index 1ac6d2cbbd9f25f90ff840216bcea7eb3276b3e7..c7f25c90a3cf68a4b427eeaa2db61e1ec5d161ef 100755 --- a/src/caosadvancedtools/export_related.py +++ b/src/caosadvancedtools/export_related.py @@ -118,7 +118,7 @@ def export(cont, directory="."): try: el.download(target) print("Downloaded:", target) - except BaseException: + except Exception: # pylint: disable=broad-exception-caught print("Failed download of:", target) invert_ids(cont) @@ -128,7 +128,7 @@ def export(cont, directory="."): xml = etree.tounicode(cont.to_xml( local_serialization=True), pretty_print=True) - with open(os.path.join(directory, "linkahead_data.xml"), "w") as fi: + with open(os.path.join(directory, "linkahead_data.xml"), "w", encoding="utf-8") as fi: fi.write(xml) @@ -149,8 +149,12 @@ def defineParser(): return parser -if __name__ == "__main__": +def main(): parser = defineParser() args = parser.parse_args() export_related_to(args.id, directory=args.directory) + + +if __name__ == "__main__": + main() diff --git a/src/caosadvancedtools/import_from_xml.py b/src/caosadvancedtools/import_from_xml.py index 7bc9f018b491ec64266d2be038b89102479c121e..7eeafa671a8aaa5469aa4c05ca685ecf937bf687 100755 --- a/src/caosadvancedtools/import_from_xml.py +++ b/src/caosadvancedtools/import_from_xml.py @@ -39,7 +39,7 @@ from caosadvancedtools.models.data_model import DataModel def create_dummy_file(text="Please ask the administrator for this file."): tmpfile = NamedTemporaryFile(delete=False) tmpfile.close() - with open(tmpfile.name, "w") as tm: + with open(tmpfile.name, "w", encoding="utf-8") as tm: tm.write(text) return tmpfile.name @@ -51,7 +51,7 @@ def import_xml(filename, rerun=False, interactive=True): rerun: boolean; if true, files are not inserted as paths would conflict. """ cont = db.Container() - with open(filename) as fi: + with open(filename, encoding="utf-8") as fi: cont = cont.from_xml(fi.read()) tmpfile = create_dummy_file() @@ -63,7 +63,7 @@ def import_xml(filename, rerun=False, interactive=True): for el in cont: if isinstance(el, db.File): - el._checksum = None + el._checksum = None # pylint: disable=protected-access target = os.path.join("downloads", el.path[1:]) if os.path.exists(target): @@ -94,7 +94,7 @@ def import_xml(filename, rerun=False, interactive=True): if not rerun: for _, el in enumerate(files.values()): - r = el.insert(unique=False) + el.insert(unique=False) else: for _, el in enumerate(files.values()): el.id = None @@ -122,8 +122,12 @@ def defineParser(): return parser -if __name__ == "__main__": +def main(): parser = defineParser() args = parser.parse_args() import_xml(args.file, args.rerun) + + +if __name__ == "__main__": + main() diff --git a/src/caosadvancedtools/loadFiles.py b/src/caosadvancedtools/loadFiles.py index c9258afaff88b53f843d684a4d8b18a4baf55688..f29bdd9e27c480d01313bc6c74553a5b6f3bebbf 100755 --- a/src/caosadvancedtools/loadFiles.py +++ b/src/caosadvancedtools/loadFiles.py @@ -90,9 +90,9 @@ def combine_ignore_files(caosdbignore: str, localignore: str, dirname=None) -> s tmp = NamedTemporaryFile(delete=False, mode="w", dir=dirname, prefix=".caosdbignore") - with open(caosdbignore, "r") as base: + with open(caosdbignore, "r", encoding="utf-8") as base: tmp.write(base.read()) - with open(localignore, "r") as local: + with open(localignore, "r", encoding="utf-8") as local: tmp.write(local.read()) tmp.close() return tmp.name @@ -122,7 +122,7 @@ def compile_file_list(caosdbignore: str, localpath: str) -> list[str]: current_ignore = caosdbignore non_ignored_files = [] ignore_files: list[tuple[str, str]] = [] - for root, dirs, files in os.walk(localpath): + for root, _, files in os.walk(localpath): # remove local ignore files that do no longer apply to the current subtree (branch switch) while len(ignore_files) > 0 and not root.startswith(ignore_files[-1][0]): os.remove(ignore_files[-1][1]) @@ -375,9 +375,7 @@ exclude is given preference over include. logger.addHandler(logging.StreamHandler(stream=sys.stdout)) logger.setLevel(logging.INFO) - con = db.get_connection() - con.timeout = float(args.timeout) - con._login() + db.configure_connection(timeout=float(args.timeout)) loadpath( path=args.path, diff --git a/src/caosadvancedtools/models/data_model.py b/src/caosadvancedtools/models/data_model.py index 6d30b8fb8a1d0fe25e4834ceaabe12a03f342642..16f5702abf015aff62123e411e5eda5c4e14361a 100644 --- a/src/caosadvancedtools/models/data_model.py +++ b/src/caosadvancedtools/models/data_model.py @@ -23,6 +23,7 @@ # ** end header # from copy import deepcopy +from typing import Optional import linkahead as db import linkahead.common.models as models @@ -246,7 +247,7 @@ class DataModel(dict): for par in entity.get_parents(): if par.name.lower() == valid_e.name.lower(): - par._wrap(valid_e) + par.id = valid_e.id def collect_entities(self): """ Collects all entities: explicitly defined RecordTypes and @@ -262,7 +263,8 @@ class DataModel(dict): return list(all_ents.values()) - def get_deep(self, name: str, visited_props: dict = None, visited_parents: set = None): + def get_deep(self, name: str, visited_props: Optional[dict] = None, + visited_parents: Optional[set] = None): """Attempt to resolve references for the given ``name``. The returned entity has all the properties it inherits from its ancestry and all properties @@ -291,7 +293,7 @@ class DataModel(dict): if parent.name in visited_parents: continue visited_parents.add(parent.name) - parent_importance = importances.get(parent._flags.get("inheritance"), 999) + parent_importance = importances.get(parent.flags.get("inheritance"), 999) if parent.name in self: deep_parent = self.get_deep(parent.name, # visited_props=visited_props, visited_parents=visited_parents @@ -300,7 +302,7 @@ class DataModel(dict): for prop in deep_parent.properties: importance = importances[deep_parent.get_importance(prop.name)] if (importance <= parent_importance - and prop.name not in [prop.name for prop in entity.properties]): + and prop.name not in [p.name for p in entity.properties]): entity.add_property(prop) else: print(f"Referenced parent \"{parent.name}\" not found in data model.") diff --git a/src/caosadvancedtools/models/parser.py b/src/caosadvancedtools/models/parser.py index b1e3aa95009c92db5d78471b3b384d9f992a5e1a..407346179fe0190fd9e259ab63b14c61461bd1c1 100644 --- a/src/caosadvancedtools/models/parser.py +++ b/src/caosadvancedtools/models/parser.py @@ -1,1044 +1,1059 @@ -# This file is a part of the LinkAhead project. -# -# Copyright (C) 2023 IndiScale GmbH <info@indiscale.com> -# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> -# Copyright (C) 2023 Daniel Hornung <d.hornung@indiscale.com> -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <https://www.gnu.org/licenses/>. - -""" -This module (and script) provides methods to read a DataModel from a YAML file. - -If a file name is passed to parse_model_from_yaml it is parsed and a DataModel -is created. The yaml file needs to be structured in a certain way which will be -described in the following. - -The file should only contain a dictionary. The keys are the names of -RecordTypes or Properties. The values are again dictionaries describing the -entities. This information can be defined via the keys listed in KEYWORDS. -Notably, properties can be given in a dictionary under the xxxx_properties keys -and will be added with the respective importance. These properties can be -RecordTypes or Properties and can be defined right there. -Every Property or RecordType only needs to be defined once anywhere. When it is -not defined, simply the name can be supplied with no value. -Parents can be provided under the 'inherit_from_xxxx' keywords. The value needs -to be a list with the names. Here, NO NEW entities can be defined. -""" -import argparse -import json -import re -import sys -from typing import List, Optional -from warnings import warn - -import jsonref -import jsonschema -import linkahead as db -import yaml -from linkahead.common.datatype import get_list_datatype - -from .data_model import LINKAHEAD_INTERNAL_PROPERTIES, DataModel - -# Keywords which are allowed in data model descriptions. -KEYWORDS = ["importance", - "datatype", # for example TEXT, INTEGER or REFERENCE - "unit", - "description", - "recommended_properties", - "obligatory_properties", - "suggested_properties", - "inherit_from_recommended", - "inherit_from_suggested", - "inherit_from_obligatory", - "role", - "value", - ] - -# TODO: check whether it's really ignored -# These KEYWORDS are not forbidden as properties, but merely ignored. -KEYWORDS_IGNORED = [ - "unit", -] - -JSON_SCHEMA_ATOMIC_TYPES = [ - "string", - "boolean", - "integer", - "number", - "null" -] - - -# Taken from https://stackoverflow.com/a/53647080, CC-BY-SA, 2018 by -# https://stackoverflow.com/users/2572431/augurar - - -class SafeLineLoader(yaml.SafeLoader): - """Load a line and keep meta-information. - - Note that this will add a `__line__` element to all the dicts. - """ - - def construct_mapping(self, node, deep=False): - """Overwritung the parent method.""" - mapping = super().construct_mapping(node, deep=deep) - # Add 1 so line numbering starts at 1 - mapping['__line__'] = node.start_mark.line + 1 - - return mapping -# End of https://stackoverflow.com/a/53647080 - - -class TwiceDefinedException(Exception): - def __init__(self, name): - super().__init__("The Entity '{}' was defined multiple times!".format( - name)) - - -class YamlDefinitionError(RuntimeError): - def __init__(self, line, template=None): - if not template: - template = "Error in YAML definition in line {}." - super().__init__(template.format(line)) - - -class JsonSchemaDefinitionError(RuntimeError): - # @author Florian Spreckelsen - # @date 2022-02-17 - # @review Daniel Hornung 2022-02-18 - def __init__(self, msg): - super().__init__(msg) - - -def parse_model_from_yaml(filename, existing_model: Optional[dict] = None, debug: bool = False): - """Parse a data model from a YAML file. - -This is a convenience function if the Parser object is not needed, it calls -``Parser.parse_model_from_yaml(...)`` internally. - - -Parameters ----------- - -existing_model : dict, optional - An existing model to which the created model shall be added. - -debug : bool, optional - If True, turn on miscellaneous debugging. Default is False. - """ - parser = Parser(debug=debug) - - return parser.parse_model_from_yaml(filename, existing_model=existing_model) - - -def parse_model_from_string(string, existing_model: Optional[dict] = None, debug: bool = False): - """Parse a data model from a YAML string - -This is a convenience function if the Parser object is not needed, it calls -``Parser.parse_model_from_string(...)`` internally. - -Parameters ----------- - -existing_model : dict, optional - An existing model to which the created model shall be added. - -debug : bool, optional - If True, turn on miscellaneous debugging. Default is False. - """ - parser = Parser(debug=debug) - - return parser.parse_model_from_string(string, existing_model=existing_model) - - -def parse_model_from_json_schema( - filename: str, - top_level_recordtype: bool = True, - types_for_missing_array_items: dict = {}, - ignore_unspecified_array_items: bool = False, - existing_model: Optional[dict] = None -): - """Return a datamodel parsed from a json schema definition. - - Parameters - ---------- - - filename : str - The path of the json schema file that is to be parsed - - top_level_recordtype : bool, optional - Whether there is a record type defined at the top level of the - schema. Default is true. - - types_for_missing_array_items : dict, optional - dictionary containing fall-back types for json entries with `type: - array` but without `items` specification. Default is an empty dict. - - ignore_unspecified_array_items : bool, optional - Whether to ignore `type: array` entries the type of which is not - specified by their `items` property or given in - `types_for_missing_array_items`. An error is raised if they are not - ignored. Default is False. - - existing_model : dict, optional - An existing model to which the created model shall be added. Not implemented yet. - - Returns - ------- - - out : Datamodel - The datamodel generated from the input schema which then can be used for - synchronizing with LinkAhead. - - Note - ---- - This is an experimental feature, see ``JsonSchemaParser`` for information - about the limitations of the current implementation. - - """ - if existing_model is not None: - raise NotImplementedError("Adding to an existing model is not implemented yet.") - - # @author Florian Spreckelsen - # @date 2022-02-17 - # @review Timm Fitschen 2023-05-25 - parser = JsonSchemaParser(types_for_missing_array_items, ignore_unspecified_array_items) - - return parser.parse_model_from_json_schema(filename, top_level_recordtype) - - -class Parser(object): - def __init__(self, debug: bool = False): - """Initialize an empty parser object and initialize the dictionary of entities and the list of - treated elements. - -Parameters ----------- - -debug : bool, optional - If True, turn on miscellaneous debugging. Default is False. - - """ - self.model = {} - self.treated = [] - self.debug = debug - - def parse_model_from_yaml(self, filename, existing_model: Optional[dict] = None): - """Create and return a data model from the given file. - - Parameters - ---------- - filename : str - The path to the YAML file. - - existing_model : dict, optional - An existing model to which the created model shall be added. - - Returns - ------- - out : data_model.DataModel - The created DataModel - """ - with open(filename, 'r') as outfile: - ymlmodel = yaml.load(outfile, Loader=SafeLineLoader) - - return self._create_model_from_dict(ymlmodel, existing_model=existing_model) - - def parse_model_from_string(self, string, existing_model: Optional[dict] = None): - """Create and return a data model from the given YAML string. - - Parameters - ---------- - string : str - The YAML string. - - existing_model : dict, optional - An existing model to which the created model shall be added. - - Returns - ------- - out : data_model.DataModel - The created DataModel - """ - ymlmodel = yaml.load(string, Loader=SafeLineLoader) - - return self._create_model_from_dict(ymlmodel, existing_model=existing_model) - - def _create_model_from_dict(self, ymlmodel, existing_model: Optional[dict] = None): - """Create and return a data model out of the YAML dict `ymlmodel`. - - Parameters - ---------- - ymlmodel : dict - The dictionary parsed from a YAML file. - - existing_model : dict, optional - An existing model to which the created model shall be added. - - Returns - ------- - out : data_model.DataModel - The created DataModel - """ - - if not isinstance(ymlmodel, dict): - raise ValueError("Yaml file should only contain one dictionary!") - - if existing_model is not None: - self.model.update(existing_model) - - # Extern keyword: - # The extern keyword can be used to include Properties and RecordTypes - # from existing LinkAhead datamodels into the current model. - # Any name included in the list specified by the extern keyword - # will be used in queries to retrieve a property or (if no property exists) - # a record type with the name of the element. - # The retrieved entity will be added to the model. - # If no entity with that name is found an exception is raised. - - if "extern" not in ymlmodel: - ymlmodel["extern"] = [] - - for name in ymlmodel["extern"]: - if name in LINKAHEAD_INTERNAL_PROPERTIES: - self.model[name] = db.Property(name=name).retrieve() - continue - for role in ("Property", "RecordType", "Record", "File"): - if db.execute_query("COUNT {} \"{}\"".format(role, name)) > 0: - self.model[name] = db.execute_query( - f"FIND {role} WITH name=\"{name}\"", unique=True) - break - else: - raise Exception("Did not find {}".format(name)) - - ymlmodel.pop("extern") - - # add all names to ymlmodel; initialize properties - - for name, entity in ymlmodel.items(): - self._add_entity_to_model(name, entity) - # initialize recordtypes - self._set_recordtypes() - self._check_and_convert_datatypes() - - for name, entity in ymlmodel.items(): - try: - self._treat_entity(name, entity, line=ymlmodel["__line__"]) - except ValueError as err: - err_str = err.args[0].replace("invalid keyword:", - f"invalid keyword in line {entity['__line__']}:", 1) - raise ValueError(err_str, *err.args[1:]) from err - -# Update properties that are part of record types: -# e.g. add their datatypes, units etc.. -# Otherwise comparison of existing models and the parsed model become difficult. - for name, ent in self.model.items(): - if not isinstance(ent, db.RecordType): - continue - props = ent.get_properties() - for prop in props: - if prop.name in self.model: - model_prop = self.model[prop.name] - # The information must be missing, we don't want to overwrite it accidentally: - if prop.datatype is None: - if isinstance(model_prop, db.RecordType): - prop.datatype = model_prop.name - else: - prop.datatype = model_prop.datatype - # TODO: Data type overwrite is allowed here (because - # of lists), but this might change in the future. - # elif prop.datatype != model_prop.datatype: - # raise RuntimeError("datatype must not be set, here. This is probably a bug.") - if prop.unit is None: - # No unit for plain reference properties - if not isinstance(model_prop, db.RecordType): - prop.unit = model_prop.unit - if prop.description is None: - prop.description = model_prop.description - - return DataModel(self.model.values()) - - @staticmethod - def _stringify(name, context=None): - """Make a string out of `name`. - - Warnings are emitted for difficult values of `name`. - - Parameters - ---------- - name : - The value to be converted to a string. - - context : obj - Will be printed in the case of warnings. - - Returns - ------- - out : str - If `name` was a string, return it. Else return str(`name`). - """ - - if name is None: - print("WARNING: Name of this context is None: {}".format(context), - file=sys.stderr) - - if not isinstance(name, str): - name = str(name) - - return name - - def _add_entity_to_model(self, name, definition): - """ adds names of Properties and RecordTypes to the model dictionary - - Properties are also initialized. - - name is the key of the yaml element and definition the value. - """ - - if name == "__line__": - return - name = self._stringify(name) - - if name not in self.model: - self.model[name] = None - - if definition is None: - return - - if (self.model[name] is None and isinstance(definition, dict) - # is it a property - and "datatype" in definition - # but not simply an RT of the model - and not (get_list_datatype(definition["datatype"]) == name and - get_list_datatype(definition["datatype"]) in self.model)): - - # and create the new property - self.model[name] = db.Property(name=name, - datatype=definition["datatype"]) - elif (self.model[name] is None and isinstance(definition, dict) - and "role" in definition): - if definition["role"] == "RecordType": - self.model[name] = db.RecordType(name=name) - elif definition["role"] == "Record": - self.model[name] = db.Record(name=name) - elif definition["role"] == "File": - # TODO(fspreck) Implement files at some later point in time - raise NotImplementedError( - "The definition of file objects is not yet implemented.") - - # self.model[name] = db.File(name=name) - elif definition["role"] == "Property": - self.model[name] = db.Property(name=name) - else: - raise RuntimeError("Unknown role {} in definition of entity.".format( - definition["role"])) - - # for setting values of properties directly: - if not isinstance(definition, dict): - return - - # add other definitions recursively - for prop_type in ["recommended_properties", - "suggested_properties", "obligatory_properties"]: - - if prop_type in definition: - # Empty property mapping should be allowed. - - if definition[prop_type] is None: - definition[prop_type] = {} - try: - for n, e in definition[prop_type].items(): - if n == "__line__": - continue - self._add_entity_to_model(n, e) - except AttributeError as ate: - if ate.args[0].endswith("'items'"): - line = definition["__line__"] - - if isinstance(definition[prop_type], list): - line = definition[prop_type][0]["__line__"] - raise YamlDefinitionError(line) from None - raise - - if self.debug and self.model[name] is not None: - self.model[name].__line__ = definition["__line__"] - - def _add_to_recordtype(self, ent_name, props, importance): - """Add properties to a RecordType. - - Parameters - ---------- - ent_name : str - The name of the entity to which the properties shall be added. - - props : dict [str -> dict or :doc:`Entity`] - The properties, indexed by their names. Properties may be given as :doc:`Entity` objects - or as dictionaries. - - importance - The importance as used in :doc:`Entity.add_property`. - - Returns - ------- - None - - """ - - for n, e in props.items(): - - if n in KEYWORDS: - if n in KEYWORDS_IGNORED: - continue - raise YamlDefinitionError("Unexpected keyword in line {}: {}".format( - props["__line__"], n)) - - if n == "__line__": - continue - n = self._stringify(n) - - if isinstance(e, dict): - if "datatype" in e and get_list_datatype(e["datatype"]) is not None: - # Reuse the existing datatype for lists. - datatype = db.LIST(get_list_datatype(e["datatype"])) - else: - # Ignore a possible e["datatype"] here if it's not a list - # since it has been treated in the definition of the - # property (entity) already - datatype = None - if "value" in e: - value = e["value"] - else: - value = None - - else: - value = e - datatype = None - - self.model[ent_name].add_property(name=n, - value=value, - importance=importance, - datatype=datatype) - - def _inherit(self, name, prop, inheritance): - if not isinstance(prop, list): - if isinstance(prop, str): - raise YamlDefinitionError( - f"Parents must be a list but is given as string: {name} > {prop}") - raise YamlDefinitionError("Parents must be a list, error in line {}".format( - prop["__line__"])) - - for pname in prop: - if not isinstance(pname, str): - raise ValueError("Only provide the names of parents.") - self.model[name].add_parent(name=pname, inheritance=inheritance) - - def _treat_entity(self, name, definition, line=None): - """Parse the definition and the information to the entity.""" - - if name == "__line__": - return - name = self._stringify(name) - - try: - if definition is None: - return - - # for setting values of properties directly: - if not isinstance(definition, dict): - return - - # These definition items must be handled even for list props. - for prop_name, prop in definition.items(): - if prop_name == "description": - self.model[name].description = prop - - # For lists, everything else is not needed at this level. - if ("datatype" in definition and definition["datatype"].startswith("LIST")): - return - - if name in self.treated: - raise TwiceDefinedException(name) - - # for reducing a little bit of code duplication: - importance_dict = { - "recommended_properties": db.RECOMMENDED, - "obligatory_properties": db.OBLIGATORY, - "suggested_properties": db.SUGGESTED - } - - for prop_name, prop in definition.items(): - if prop_name == "__line__": - continue - line = definition["__line__"] - - if prop_name == "unit": - self.model[name].unit = prop - - elif prop_name == "value": - self.model[name].value = prop - - elif prop_name == "description": - # Handled above - continue - - elif prop_name in importance_dict: - for imp_name, imp_val in importance_dict.items(): - if prop_name == imp_name: - self._add_to_recordtype( - name, prop, importance=imp_val) - - for n, e in prop.items(): - self._treat_entity(n, e) - - # datatype is already set - elif prop_name == "datatype": - continue - - # role has already been used - elif prop_name == "role": - continue - - elif prop_name == "inherit_from_obligatory": - self._inherit(name, prop, db.OBLIGATORY) - elif prop_name == "inherit_from_recommended": - self._inherit(name, prop, db.RECOMMENDED) - elif prop_name == "inherit_from_suggested": - self._inherit(name, prop, db.SUGGESTED) - - else: - raise ValueError("invalid keyword: {}".format(prop_name)) - except AttributeError as ate: - if ate.args[0].endswith("'items'"): - raise YamlDefinitionError(line) from None - except Exception as e: - print("Error in treating: "+name) - raise e - self.treated.append(name) - - def _check_and_convert_datatypes(self): - """ checks if datatype is valid. - datatype of properties is simply initialized with string. Here, we - iterate over properties and check whether it is a base datatype of a - name that was defined in the model (or extern part) - - the string representations are replaced with linkahead objects - - """ - - for key, value in self.model.items(): - - if isinstance(value, db.Property): - dtype = value.datatype - is_list = False - - if get_list_datatype(dtype) is not None: - dtype = get_list_datatype(dtype) - is_list = True - - dtype_name = dtype - if not isinstance(dtype_name, str): - dtype_name = dtype.name - - if dtype_name in self.model: - if is_list: - value.datatype = db.LIST(self.model[dtype_name]) - else: - value.datatype = self.model[dtype_name] - - continue - - if dtype in [db.DOUBLE, - db.REFERENCE, - db.TEXT, - db.DATETIME, - db.INTEGER, - db.FILE, - db.BOOLEAN]: - - if is_list: - value.datatype = db.LIST(db.__getattribute__( # pylint: disable=no-member - dtype)) - else: - value.datatype = db.__getattribute__( # pylint: disable=no-member - dtype) - - continue - - raise ValueError("Property {} has an unknown datatype: {}".format( - value.name, dtype_name)) - - def _set_recordtypes(self): - """ properties are defined in first iteration; set remaining as RTs """ - - for key, value in self.model.items(): - if value is None: - self.model[key] = db.RecordType(name=key) - - -class JsonSchemaParser(Parser): - """Extends the yaml parser to read in datamodels defined in a json schema. - - **EXPERIMENTAL:** While this class can already be used to create data models - from basic json schemas, there are the following limitations and missing - features: - - * Due to limitations of json-schema itself, we currently do not support - inheritance in the imported data models - * The same goes for suggested properties of RecordTypes - * Already defined RecordTypes and (scalar) Properties can't be re-used as - list properties - * Reference properties that are different from the referenced RT. (Although - this is possible for list of references) - * Values - * Roles - * The extern keyword from the yaml parser - - """ - # @author Florian Spreckelsen - # @date 2022-02-17 - # @review Timm Fitschen 2023-05-25 - - def __init__(self, types_for_missing_array_items={}, ignore_unspecified_array_items=False): - super().__init__() - self.types_for_missing_array_items = types_for_missing_array_items - self.ignore_unspecified_array_items = ignore_unspecified_array_items - - def parse_model_from_json_schema(self, filename: str, top_level_recordtype: bool = True): - """Return a datamodel created from the definition in the json schema in - `filename`. - - Parameters - ---------- - filename : str - The path to the json-schema file containing the datamodel definition - top_level_recordtype : bool, optional - Whether there is a record type defined at the top level of the - schema. Default is true. - - Returns - ------- - out : data_model.DataModel - The created DataModel - """ - # @author Florian Spreckelsen - # @date 2022-02-17 - # @review Timm Fitschen 2023-05-25 - with open(filename, 'r') as schema_file: - model_dict = jsonref.load(schema_file) - - return self._create_model_from_dict(model_dict, top_level_recordtype=top_level_recordtype) - - def _create_model_from_dict(self, model_dict: [dict, List[dict]], top_level_recordtype: bool = True): - """Parse a dictionary and return the Datamodel created from it. - - The dictionary was typically created from the model definition in a json schema file. - - Parameters - ---------- - model_dict : dict or list[dict] - One or several dictionaries read in from a json-schema file - top_level_recordtype : bool, optional - Whether there is a record type defined at the top level of the - schema. Default is true. - - Returns - ------- - our : data_model.DataModel - The datamodel defined in `model_dict` - """ - # @review Timm Fitschen 2023-05-25 - if isinstance(model_dict, dict): - model_dict = [model_dict] - - for ii, elt in enumerate(model_dict): - try: - jsonschema.Draft202012Validator.check_schema(elt) - except jsonschema.SchemaError as err: - key = elt["title"] if "title" in elt else f"element {ii}" - raise JsonSchemaDefinitionError( - f"Json Schema error in {key}:\n{str(err)}") from err - - if top_level_recordtype: - if "title" not in elt: - raise JsonSchemaDefinitionError( - f"Object {ii+1} is lacking the `title` key word") - if "type" not in elt: - raise JsonSchemaDefinitionError( - f"Object {ii+1} is lacking the `type` key word") - # Check if this is a valid Json Schema - name = self._stringify(elt["title"], context=elt) - self._treat_element(elt, name) - elif "properties" in elt or "patternProperties" in elt: - # No top-level type but there are entities - if "properties" in elt: - for key, prop in elt["properties"].items(): - name = self._get_name_from_property(key, prop) - self._treat_element(prop, name) - if "patternProperties" in elt: - # See also treatment in ``_treat_record_type``. Since here, - # there is no top-level RT we use the prefix `__Pattern`, - # i.e., the resulting Record Types will be called - # `__PatternElement`. - self._treat_pattern_properties( - elt["patternProperties"], name_prefix="__Pattern") - else: - # Neither RecordType itself, nor further properties in schema, - # so nothing to do here. Maybe add something in the future. - continue - - return DataModel(self.model.values()) - - def _get_name_from_property(self, key: str, prop: dict): - # @review Timm Fitschen 2023-05-25 - if "title" in prop: - name = self._stringify(prop["title"]) - else: - name = self._stringify(key) - - return name - - def _get_atomic_datatype(self, elt): - # @review Timm Fitschen 2023-05-25 - if elt["type"] == "string": - if "format" in elt and elt["format"] in ["date", "date-time"]: - return db.DATETIME - else: - return db.TEXT - elif elt["type"] == "integer": - return db.INTEGER - elif elt["type"] == "number": - return db.DOUBLE - elif elt["type"] == "boolean": - return db.BOOLEAN - elif elt["type"] == "null": - # This could be any datatype since a valid json will never have a - # value in a null property. We use TEXT for convenience. - return db.TEXT - else: - raise JsonSchemaDefinitionError(f"Unkown atomic type in {elt}.") - - def _treat_element(self, elt: dict, name: str): - # @review Timm Fitschen 2023-05-25 - force_list = False - if name in self.model: - return self.model[name], force_list - if "type" not in elt: - # Each element must have a specific type - raise JsonSchemaDefinitionError( - f"`type` is missing in element {name}.") - if name == "name": - # This is identified with the LinkAhead name property as long as the - # type is correct. - if not elt["type"] == "string" and "string" not in elt["type"]: - raise JsonSchemaDefinitionError( - "The 'name' property must be string-typed, otherwise it cannot " - "be identified with LinkAhead's name property." - ) - return None, force_list - # LinkAhead suports null for all types, so in the very special case of - # `"type": ["null", "<other_type>"]`, only consider the other type: - if isinstance(elt["type"], list) and len(elt["type"]) == 2 and "null" in elt["type"]: - elt["type"].remove("null") - elt["type"] = elt["type"][0] - if "enum" in elt: - ent = self._treat_enum(elt, name) - elif elt["type"] in JSON_SCHEMA_ATOMIC_TYPES: - ent = db.Property( - name=name, datatype=self._get_atomic_datatype(elt)) - elif elt["type"] == "object": - ent = self._treat_record_type(elt, name) - elif elt["type"] == "array": - ent, force_list = self._treat_list(elt, name) - else: - raise NotImplementedError( - f"Cannot parse items of type '{elt['type']}' (yet).") - if "description" in elt and ent.description is None: - # There is a description and it hasn't been set by another - # treat_something function - ent.description = elt["description"] - - if ent is not None: - self.model[name] = ent - return ent, force_list - - def _treat_record_type(self, elt: dict, name: str): - # @review Timm Fitschen 2023-05-25 - rt = db.RecordType(name=name) - if "required" in elt: - required = elt["required"] - else: - required = [] - if "properties" in elt: - for key, prop in elt["properties"].items(): - name = self._get_name_from_property(key, prop) - prop_ent, force_list = self._treat_element(prop, name) - if prop_ent is None: - # Nothing to be appended since the property has to be - # treated specially. - continue - importance = db.OBLIGATORY if key in required else db.RECOMMENDED - if not force_list: - rt.add_property(prop_ent, importance=importance) - else: - # Special case of rt used as a list property - rt.add_property(prop_ent, importance=importance, - datatype=db.LIST(prop_ent)) - - if "patternProperties" in elt: - - pattern_property_rts = self._treat_pattern_properties( - elt["patternProperties"], name_prefix=name) - for ppr in pattern_property_rts: - # add reference to pattern property type. These can never be - # obligatory since pattern properties cannot be required in the - # original schema (since their actual names are not known a - # priori). - rt.add_property(ppr) - - if "description" in elt: - rt.description = elt["description"] - return rt - - def _treat_enum(self, elt: dict, name: str): - # @review Timm Fitschen 2022-02-30 - if "type" in elt and elt["type"] == "integer": - raise NotImplementedError( - "Integer-enums are not allowd until " - "https://gitlab.indiscale.com/caosdb/src/caosdb-server/-/issues/224 " - "has been fixed." - ) - rt = db.RecordType(name=name) - for enum_elt in elt["enum"]: - rec = db.Record(name=self._stringify(enum_elt)) - rec.add_parent(rt) - self.model[enum_elt] = rec - - return rt - - def _treat_list(self, elt: dict, name: str): - # @review Timm Fitschen 2023-05-25 - - if "items" not in elt and name not in self.types_for_missing_array_items: - if self.ignore_unspecified_array_items: - return None, False - raise JsonSchemaDefinitionError( - f"The definition of the list items is missing in {elt}.") - if "items" in elt: - items = elt["items"] - if "enum" in items: - return self._treat_enum(items, name), True - if items["type"] in JSON_SCHEMA_ATOMIC_TYPES: - datatype = db.LIST(self._get_atomic_datatype(items)) - return db.Property(name=name, datatype=datatype), False - if items["type"] == "object": - if "title" not in items or self._stringify(items["title"]) == name: - # Property is RecordType - return self._treat_record_type(items, name), True - else: - # List property will be an entity of its own with a name - # different from the referenced RT - ref_rt = self._treat_record_type( - items, self._stringify(items["title"])) - self.model[ref_rt.name] = ref_rt - return db.Property(name=name, datatype=db.LIST(ref_rt)), False - else: - # Use predefined type: - datatype = db.LIST(self.types_for_missing_array_items[name]) - return db.Property(name=name, datatype=datatype), False - - def _get_pattern_prop(self): - # @review Timm Fitschen 2023-05-25 - if "__pattern_property_pattern_property" in self.model: - return self.model["__pattern_property_pattern_property"] - pp = db.Property(name="__matched_pattern", datatype=db.TEXT) - self.model["__pattern_property_pattern_property"] = pp - return pp - - def _treat_pattern_properties(self, pattern_elements, name_prefix=""): - """Special Treatment for pattern properties: A RecordType is created for - each pattern property. In case of a `type: object` PatternProperty, the - remaining properties of the JSON entry are appended to the new - RecordType; in case of an atomic type PatternProperty, a single value - Property is added to the RecordType. - - Raises - ------ - NotImplementedError - In case of patternProperties with non-object, non-atomic type, e.g., - array. - - """ - # @review Timm Fitschen 2023-05-25 - num_patterns = len(pattern_elements) - pattern_prop = self._get_pattern_prop() - returns = [] - for ii, (key, element) in enumerate(pattern_elements.items()): - if "title" not in element: - name_suffix = f"_{ii+1}" if num_patterns > 1 else "" - name = name_prefix + "Entry" + name_suffix - else: - name = element["title"] - if element["type"] == "object": - # simple, is already an object, so can be treated like any other - # record type. - pattern_type = self._treat_record_type(element, name) - elif element["type"] in JSON_SCHEMA_ATOMIC_TYPES: - # create a property that stores the actual value of the pattern - # property. - propname = f"{name}_value" - prop = db.Property(name=propname, datatype=self._get_atomic_datatype(element)) - self.model[propname] = prop - pattern_type = db.RecordType(name=name) - pattern_type.add_property(prop) - else: - raise NotImplementedError( - "Pattern properties are currently only supported for types " + - ", ".join(JSON_SCHEMA_ATOMIC_TYPES) + ", and object.") - - # Add pattern property and description - pattern_type.add_property(pattern_prop, importance=db.OBLIGATORY) - if pattern_type.description: - pattern_type.description += f"\n\npattern: {key}" - else: - pattern_type.description = f"pattern: {key}" - - self.model[name] = pattern_type - returns.append(pattern_type) - - return returns - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description=__doc__, - formatter_class=argparse.RawTextHelpFormatter) - parser.add_argument("data_model", - help="Path name of the data model file (yaml or json) to be used.") - parser.add_argument("--sync", action="store_true", - help="Whether or not to sync the data model with the server.") - parser.add_argument("--noquestion", action="store_true", - help="Whether or not to ask questions during synchronization.") - parser.add_argument("--print", action="store_true", - help="Whether or not to print the data model.") - - args = parser.parse_args() - if args.data_model.endswith(".json"): - model = parse_model_from_json_schema(args.data_model) - elif args.data_model.endswith(".yml") or args.data_model.endswith(".yaml"): - model = parse_model_from_yaml(args.data_model) - else: - raise RuntimeError(f"Unknown file ending of data model: {args.data_model}") - if args.print: - print(model) - if args.sync: - model.sync_data_model(noquestion=args.noquestion) +# This file is a part of the LinkAhead project. +# +# Copyright (C) 2023 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# Copyright (C) 2023 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +""" +This module (and script) provides methods to read a DataModel from a YAML file. + +If a file name is passed to parse_model_from_yaml it is parsed and a DataModel +is created. The yaml file needs to be structured in a certain way which will be +described in the following. + +The file should only contain a dictionary. The keys are the names of +RecordTypes or Properties. The values are again dictionaries describing the +entities. This information can be defined via the keys listed in KEYWORDS. +Notably, properties can be given in a dictionary under the xxxx_properties keys +and will be added with the respective importance. These properties can be +RecordTypes or Properties and can be defined right there. +Every Property or RecordType only needs to be defined once anywhere. When it is +not defined, simply the name can be supplied with no value. +Parents can be provided under the 'inherit_from_xxxx' keywords. The value needs +to be a list with the names. Here, NO NEW entities can be defined. +""" +import argparse +import sys +from typing import List, Optional, Union + +import jsonref +import jsonschema +import linkahead as db +import yaml +from linkahead.common.datatype import get_list_datatype + +from .data_model import LINKAHEAD_INTERNAL_PROPERTIES, DataModel + +# Keywords which are allowed in data model descriptions. +KEYWORDS = ["importance", + "datatype", # for example TEXT, INTEGER or REFERENCE + "unit", + "description", + "recommended_properties", + "obligatory_properties", + "suggested_properties", + "inherit_from_recommended", + "inherit_from_suggested", + "inherit_from_obligatory", + "role", + "value", + ] + +# TODO: check whether it's really ignored +# These KEYWORDS are not forbidden as properties, but merely ignored. +KEYWORDS_IGNORED = [ + "unit", +] + +JSON_SCHEMA_ATOMIC_TYPES = [ + "string", + "boolean", + "integer", + "number", + "null" +] + + +# Taken from https://stackoverflow.com/a/53647080, CC-BY-SA, 2018 by +# https://stackoverflow.com/users/2572431/augurar + + +class SafeLineLoader(yaml.SafeLoader): + """Load a line and keep meta-information. + + Note that this will add a `__line__` element to all the dicts. + """ + + def construct_mapping(self, node, deep=False): + """Overwritung the parent method.""" + mapping = super().construct_mapping(node, deep=deep) + # Add 1 so line numbering starts at 1 + mapping['__line__'] = node.start_mark.line + 1 + + return mapping +# End of https://stackoverflow.com/a/53647080 + + +class TwiceDefinedException(Exception): + def __init__(self, name): + super().__init__("The Entity '{}' was defined multiple times!".format( + name)) + + +class YamlDefinitionError(RuntimeError): + def __init__(self, line, template=None): + if not template: + template = "Error in YAML definition in line {}." + super().__init__(template.format(line)) + + +class JsonSchemaDefinitionError(RuntimeError): + # @author Florian Spreckelsen + # @date 2022-02-17 + # @review Daniel Hornung 2022-02-18 + def __init__(self, msg): + super().__init__(msg) + + +def parse_model_from_yaml(filename, existing_model: Optional[dict] = None, debug: bool = False): + """Parse a data model from a YAML file. + +This is a convenience function if the Parser object is not needed, it calls +``Parser.parse_model_from_yaml(...)`` internally. + + +Parameters +---------- + +existing_model : dict, optional + An existing model to which the created model shall be added. + +debug : bool, optional + If True, turn on miscellaneous debugging. Default is False. + """ + parser = Parser(debug=debug) + + return parser.parse_model_from_yaml(filename, existing_model=existing_model) + + +def parse_model_from_string(string, existing_model: Optional[dict] = None, debug: bool = False): + """Parse a data model from a YAML string + +This is a convenience function if the Parser object is not needed, it calls +``Parser.parse_model_from_string(...)`` internally. + +Parameters +---------- + +existing_model : dict, optional + An existing model to which the created model shall be added. + +debug : bool, optional + If True, turn on miscellaneous debugging. Default is False. + """ + parser = Parser(debug=debug) + + return parser.parse_model_from_string(string, existing_model=existing_model) + + +def parse_model_from_json_schema( + filename: str, + top_level_recordtype: bool = True, + types_for_missing_array_items: Optional[dict] = None, + ignore_unspecified_array_items: bool = False, + existing_model: Optional[dict] = None +): + """Return a datamodel parsed from a json schema definition. + + Parameters + ---------- + + filename : str + The path of the json schema file that is to be parsed + + top_level_recordtype : bool, optional + Whether there is a record type defined at the top level of the + schema. Default is true. + + types_for_missing_array_items : dict, optional + dictionary containing fall-back types for json entries with `type: + array` but without `items` specification. Default is an empty dict. + + ignore_unspecified_array_items : bool, optional + Whether to ignore `type: array` entries the type of which is not + specified by their `items` property or given in + `types_for_missing_array_items`. An error is raised if they are not + ignored. Default is False. + + existing_model : dict, optional + An existing model to which the created model shall be added. Not implemented yet. + + Returns + ------- + + out : Datamodel + The datamodel generated from the input schema which then can be used for + synchronizing with LinkAhead. + + Note + ---- + This is an experimental feature, see ``JsonSchemaParser`` for information + about the limitations of the current implementation. + + """ + if types_for_missing_array_items is None: + types_for_missing_array_items = {} + + if existing_model is not None: + raise NotImplementedError("Adding to an existing model is not implemented yet.") + + # @author Florian Spreckelsen + # @date 2022-02-17 + # @review Timm Fitschen 2023-05-25 + parser = JsonSchemaParser(types_for_missing_array_items, ignore_unspecified_array_items) + + return parser.parse_model_from_json_schema(filename, top_level_recordtype) + + +class Parser(object): + def __init__(self, debug: bool = False): + """Initialize an empty parser object and initialize the dictionary of entities and the list of + treated elements. + +Parameters +---------- + +debug : bool, optional + If True, turn on miscellaneous debugging. Default is False. + + """ + self.model = {} + self.treated = [] + self.debug = debug + + def parse_model_from_yaml(self, filename, existing_model: Optional[dict] = None): + """Create and return a data model from the given file. + + Parameters + ---------- + filename : str + The path to the YAML file. + + existing_model : dict, optional + An existing model to which the created model shall be added. + + Returns + ------- + out : data_model.DataModel + The created DataModel + """ + with open(filename, 'r', encoding="utf-8") as outfile: + ymlmodel = yaml.load(outfile, Loader=SafeLineLoader) + + return self._create_model_from_dict(ymlmodel, existing_model=existing_model) + + def parse_model_from_string(self, string, existing_model: Optional[dict] = None): + """Create and return a data model from the given YAML string. + + Parameters + ---------- + string : str + The YAML string. + + existing_model : dict, optional + An existing model to which the created model shall be added. + + Returns + ------- + out : data_model.DataModel + The created DataModel + """ + ymlmodel = yaml.load(string, Loader=SafeLineLoader) + + return self._create_model_from_dict(ymlmodel, existing_model=existing_model) + + def _create_model_from_dict(self, ymlmodel, existing_model: Optional[dict] = None): + """Create and return a data model out of the YAML dict `ymlmodel`. + + Parameters + ---------- + ymlmodel : dict + The dictionary parsed from a YAML file. + + existing_model : dict, optional + An existing model to which the created model shall be added. + + Raises + ------ + ValueError + If model_dict is not a dict, model_dict["extern"] contains an + unknown entry, or there is an unknown entry in model_dict. + + Returns + ------- + out : data_model.DataModel + The created DataModel + """ + + if not isinstance(ymlmodel, dict): + raise ValueError("Yaml file should only contain one dictionary!") + + if existing_model is not None: + self.model.update(existing_model) + + # Extern keyword: + # The extern keyword can be used to include Properties and RecordTypes + # from existing LinkAhead datamodels into the current model. + # Any name included in the list specified by the extern keyword + # will be used in queries to retrieve a property or (if no property exists) + # a record type with the name of the element. + # The retrieved entity will be added to the model. + # If no entity with that name is found an exception is raised. + + if "extern" not in ymlmodel: + ymlmodel["extern"] = [] + + for name in ymlmodel["extern"]: + if name in LINKAHEAD_INTERNAL_PROPERTIES: + self.model[name] = db.Property(name=name).retrieve() + continue + for role in ("Property", "RecordType", "Record", "File"): + if db.execute_query("COUNT {} \"{}\"".format(role, name)) > 0: + self.model[name] = db.execute_query( + f"FIND {role} WITH name=\"{name}\"", unique=True) + break + else: + raise ValueError("Did not find {}".format(name)) + + ymlmodel.pop("extern") + + # add all names to ymlmodel; initialize properties + + for name, entity in ymlmodel.items(): + self._add_entity_to_model(name, entity) + # initialize recordtypes + self._set_recordtypes() + self._check_and_convert_datatypes() + + for name, entity in ymlmodel.items(): + try: + self._treat_entity(name, entity, line=ymlmodel["__line__"]) + except ValueError as err: + err_str = err.args[0].replace("invalid keyword:", + f"invalid keyword in line {entity['__line__']}:", 1) + raise ValueError(err_str, *err.args[1:]) from err + +# Update properties that are part of record types: +# e.g. add their datatypes, units etc.. +# Otherwise comparison of existing models and the parsed model become difficult. + for name, ent in self.model.items(): + if not isinstance(ent, db.RecordType): + continue + props = ent.get_properties() + for prop in props: + if prop.name in self.model: + model_prop = self.model[prop.name] + # The information must be missing, we don't want to overwrite it accidentally: + if prop.datatype is None: + if isinstance(model_prop, db.RecordType): + prop.datatype = model_prop.name + else: + prop.datatype = model_prop.datatype + # TODO: Data type overwrite is allowed here (because + # of lists), but this might change in the future. + # elif prop.datatype != model_prop.datatype: + # raise RuntimeError("datatype must not be set, here. This is probably a bug.") + if prop.unit is None: + # No unit for plain reference properties + if not isinstance(model_prop, db.RecordType): + prop.unit = model_prop.unit + if prop.description is None: + prop.description = model_prop.description + + return DataModel(self.model.values()) + + @staticmethod + def _stringify(name, context=None): + """Make a string out of `name`. + + Warnings are emitted for difficult values of `name`. + + Parameters + ---------- + name : + The value to be converted to a string. + + context : obj + Will be printed in the case of warnings. + + Returns + ------- + out : str + If `name` was a string, return it. Else return str(`name`). + """ + + if name is None: + print("WARNING: Name of this context is None: {}".format(context), + file=sys.stderr) + + if not isinstance(name, str): + name = str(name) + + return name + + def _add_entity_to_model(self, name, definition): + """ adds names of Properties and RecordTypes to the model dictionary + + Properties are also initialized. + + name is the key of the yaml element and definition the value. + """ + + if name == "__line__": + return + name = self._stringify(name) + + if name not in self.model: + self.model[name] = None + + if definition is None: + return + + if (self.model[name] is None and isinstance(definition, dict) + # is it a property + and "datatype" in definition + # but not simply an RT of the model + and not (get_list_datatype(definition["datatype"]) == name and + get_list_datatype(definition["datatype"]) in self.model)): + + # and create the new property + self.model[name] = db.Property(name=name, + datatype=definition["datatype"]) + elif (self.model[name] is None and isinstance(definition, dict) + and "role" in definition): + if definition["role"] == "RecordType": + self.model[name] = db.RecordType(name=name) + elif definition["role"] == "Record": + self.model[name] = db.Record(name=name) + elif definition["role"] == "File": + # TODO(fspreck) Implement files at some later point in time + raise NotImplementedError( + "The definition of file objects is not yet implemented.") + + # self.model[name] = db.File(name=name) + elif definition["role"] == "Property": + self.model[name] = db.Property(name=name) + else: + raise RuntimeError("Unknown role {} in definition of entity.".format( + definition["role"])) + + # for setting values of properties directly: + if not isinstance(definition, dict): + return + + # add other definitions recursively + for prop_type in ["recommended_properties", + "suggested_properties", "obligatory_properties"]: + + if prop_type in definition: + # Empty property mapping should be allowed. + + if definition[prop_type] is None: + definition[prop_type] = {} + try: + for n, e in definition[prop_type].items(): + if n == "__line__": + continue + self._add_entity_to_model(n, e) + except AttributeError as ate: + if ate.args[0].endswith("'items'"): + line = definition["__line__"] + + if isinstance(definition[prop_type], list): + line = definition[prop_type][0]["__line__"] + raise YamlDefinitionError(line) from None + raise + + if self.debug and self.model[name] is not None: + self.model[name].__line__ = definition["__line__"] + + def _add_to_recordtype(self, ent_name, props, importance): + """Add properties to a RecordType. + + Parameters + ---------- + ent_name : str + The name of the entity to which the properties shall be added. + + props : dict [str -> dict or :doc:`Entity`] + The properties, indexed by their names. Properties may be given as :doc:`Entity` objects + or as dictionaries. + + importance + The importance as used in :doc:`Entity.add_property`. + + Returns + ------- + None + + """ + + for n, e in props.items(): + + if n in KEYWORDS: + if n in KEYWORDS_IGNORED: + continue + raise YamlDefinitionError("Unexpected keyword in line {}: {}".format( + props["__line__"], n)) + + if n == "__line__": + continue + n = self._stringify(n) + + if isinstance(e, dict): + if "datatype" in e and get_list_datatype(e["datatype"]) is not None: + # Reuse the existing datatype for lists. + datatype = db.LIST(get_list_datatype(e["datatype"])) + else: + # Ignore a possible e["datatype"] here if it's not a list + # since it has been treated in the definition of the + # property (entity) already + datatype = None + if "value" in e: + value = e["value"] + else: + value = None + + else: + value = e + datatype = None + + self.model[ent_name].add_property(name=n, + value=value, + importance=importance, + datatype=datatype) + + def _inherit(self, name, prop, inheritance): + if not isinstance(prop, list): + if isinstance(prop, str): + raise YamlDefinitionError( + f"Parents must be a list but is given as string: {name} > {prop}") + raise YamlDefinitionError("Parents must be a list, error in line {}".format( + prop["__line__"])) + + for pname in prop: + if not isinstance(pname, str): + raise ValueError("Only provide the names of parents.") + self.model[name].add_parent(name=pname, inheritance=inheritance) + + def _treat_entity(self, name, definition, line=None): + """Parse the definition and the information to the entity.""" + + if name == "__line__": + return + name = self._stringify(name) + + try: + if definition is None: + return + + # for setting values of properties directly: + if not isinstance(definition, dict): + return + + # These definition items must be handled even for list props. + for prop_name, prop in definition.items(): + if prop_name == "description": + self.model[name].description = prop + + # For lists, everything else is not needed at this level. + if ("datatype" in definition and definition["datatype"].startswith("LIST")): + return + + if name in self.treated: + raise TwiceDefinedException(name) + + # for reducing a little bit of code duplication: + importance_dict = { + "recommended_properties": db.RECOMMENDED, + "obligatory_properties": db.OBLIGATORY, + "suggested_properties": db.SUGGESTED + } + + for prop_name, prop in definition.items(): + if prop_name == "__line__": + continue + line = definition["__line__"] + + if prop_name == "unit": + self.model[name].unit = prop + + elif prop_name == "value": + self.model[name].value = prop + + elif prop_name == "description": + # Handled above + continue + + elif prop_name in importance_dict: + for imp_name, imp_val in importance_dict.items(): + if prop_name == imp_name: + self._add_to_recordtype( + name, prop, importance=imp_val) + + for n, e in prop.items(): + self._treat_entity(n, e) + + # datatype is already set + elif prop_name == "datatype": + continue + + # role has already been used + elif prop_name == "role": + continue + + elif prop_name == "inherit_from_obligatory": + self._inherit(name, prop, db.OBLIGATORY) + elif prop_name == "inherit_from_recommended": + self._inherit(name, prop, db.RECOMMENDED) + elif prop_name == "inherit_from_suggested": + self._inherit(name, prop, db.SUGGESTED) + + else: + raise ValueError("invalid keyword: {}".format(prop_name)) + except AttributeError as ate: + if ate.args[0].endswith("'items'"): + raise YamlDefinitionError(line) from None + except Exception as e: + print("Error in treating: "+name) + raise e + self.treated.append(name) + + def _check_and_convert_datatypes(self): + """ checks if datatype is valid. + datatype of properties is simply initialized with string. Here, we + iterate over properties and check whether it is a base datatype of a + name that was defined in the model (or extern part) + + the string representations are replaced with linkahead objects + + """ + + for _, value in self.model.items(): + + if isinstance(value, db.Property): + dtype = value.datatype + is_list = False + + if get_list_datatype(dtype) is not None: + dtype = get_list_datatype(dtype) + is_list = True + + dtype_name = dtype + if not isinstance(dtype_name, str): + dtype_name = dtype.name + + if dtype_name in self.model: + if is_list: + value.datatype = db.LIST(self.model[dtype_name]) + else: + value.datatype = self.model[dtype_name] + + continue + + if dtype in [db.DOUBLE, + db.REFERENCE, + db.TEXT, + db.DATETIME, + db.INTEGER, + db.FILE, + db.BOOLEAN]: + + if is_list: + value.datatype = db.LIST(db.__getattribute__( # pylint: disable=no-member + dtype)) + else: + value.datatype = db.__getattribute__( # pylint: disable=no-member + dtype) + + continue + + raise ValueError("Property {} has an unknown datatype: {}".format( + value.name, dtype_name)) + + def _set_recordtypes(self): + """ properties are defined in first iteration; set remaining as RTs """ + + for key, value in self.model.items(): + if value is None: + self.model[key] = db.RecordType(name=key) + + +class JsonSchemaParser(Parser): + """Extends the yaml parser to read in datamodels defined in a json schema. + + **EXPERIMENTAL:** While this class can already be used to create data models + from basic json schemas, there are the following limitations and missing + features: + + * Due to limitations of json-schema itself, we currently do not support + inheritance in the imported data models + * The same goes for suggested properties of RecordTypes + * Already defined RecordTypes and (scalar) Properties can't be re-used as + list properties + * Reference properties that are different from the referenced RT. (Although + this is possible for list of references) + * Values + * Roles + * The extern keyword from the yaml parser + + """ + # @author Florian Spreckelsen + # @date 2022-02-17 + # @review Timm Fitschen 2023-05-25 + + def __init__(self, types_for_missing_array_items=None, + ignore_unspecified_array_items=False): + super().__init__() + if types_for_missing_array_items is None: + types_for_missing_array_items = {} + self.types_for_missing_array_items = types_for_missing_array_items + self.ignore_unspecified_array_items = ignore_unspecified_array_items + + def parse_model_from_json_schema(self, filename: str, top_level_recordtype: bool = True): + """Return a datamodel created from the definition in the json schema in + `filename`. + + Parameters + ---------- + filename : str + The path to the json-schema file containing the datamodel definition + top_level_recordtype : bool, optional + Whether there is a record type defined at the top level of the + schema. Default is true. + + Returns + ------- + out : data_model.DataModel + The created DataModel + """ + # @author Florian Spreckelsen + # @date 2022-02-17 + # @review Timm Fitschen 2023-05-25 + with open(filename, 'r', encoding="utf-8") as schema_file: + model_dict = jsonref.load(schema_file) + + return self._create_model_from_dict(model_dict, top_level_recordtype=top_level_recordtype) + + # ToDo: Fix https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/issues/139 + # and remove pylint disable + def _create_model_from_dict(self, model_dict: Union[dict, List[dict]], top_level_recordtype: bool = True): # pylint: disable=arguments-renamed + """Parse a dictionary and return the Datamodel created from it. + + The dictionary was typically created from the model definition in a json schema file. + + Parameters + ---------- + model_dict : dict or list[dict] + One or several dictionaries read in from a json-schema file + top_level_recordtype : bool, optional + Whether there is a record type defined at the top level of the + schema. Default is true. + + Returns + ------- + our : data_model.DataModel + The datamodel defined in `model_dict` + """ + # @review Timm Fitschen 2023-05-25 + if isinstance(model_dict, dict): + model_dict = [model_dict] + + for ii, elt in enumerate(model_dict): + try: + jsonschema.Draft202012Validator.check_schema(elt) + except jsonschema.SchemaError as err: + key = elt["title"] if "title" in elt else f"element {ii}" + raise JsonSchemaDefinitionError( + f"Json Schema error in {key}:\n{str(err)}") from err + + if top_level_recordtype: + if "title" not in elt: + raise JsonSchemaDefinitionError( + f"Object {ii+1} is lacking the `title` key word") + if "type" not in elt: + raise JsonSchemaDefinitionError( + f"Object {ii+1} is lacking the `type` key word") + # Check if this is a valid Json Schema + name = self._stringify(elt["title"], context=elt) + self._treat_element(elt, name) + elif "properties" in elt or "patternProperties" in elt: + # No top-level type but there are entities + if "properties" in elt: + for key, prop in elt["properties"].items(): + name = self._get_name_from_property(key, prop) + self._treat_element(prop, name) + if "patternProperties" in elt: + # See also treatment in ``_treat_record_type``. Since here, + # there is no top-level RT we use the prefix `__Pattern`, + # i.e., the resulting Record Types will be called + # `__PatternElement`. + self._treat_pattern_properties( + elt["patternProperties"], name_prefix="__Pattern") + else: + # Neither RecordType itself, nor further properties in schema, + # so nothing to do here. Maybe add something in the future. + continue + + return DataModel(self.model.values()) + + def _get_name_from_property(self, key: str, prop: dict): + # @review Timm Fitschen 2023-05-25 + if "title" in prop: + name = self._stringify(prop["title"]) + else: + name = self._stringify(key) + + return name + + def _get_atomic_datatype(self, elt): + # @review Timm Fitschen 2023-05-25 + if elt["type"] == "string": + if "format" in elt and elt["format"] in ["date", "date-time"]: + return db.DATETIME + else: + return db.TEXT + elif elt["type"] == "integer": + return db.INTEGER + elif elt["type"] == "number": + return db.DOUBLE + elif elt["type"] == "boolean": + return db.BOOLEAN + elif elt["type"] == "null": + # This could be any datatype since a valid json will never have a + # value in a null property. We use TEXT for convenience. + return db.TEXT + else: + raise JsonSchemaDefinitionError(f"Unkown atomic type in {elt}.") + + def _treat_element(self, elt: dict, name: str): + # @review Timm Fitschen 2023-05-25 + force_list = False + if name in self.model: + return self.model[name], force_list + if "type" not in elt: + # Each element must have a specific type + raise JsonSchemaDefinitionError( + f"`type` is missing in element {name}.") + if name == "name": + # This is identified with the LinkAhead name property as long as the + # type is correct. + if not elt["type"] == "string" and "string" not in elt["type"]: + raise JsonSchemaDefinitionError( + "The 'name' property must be string-typed, otherwise it cannot " + "be identified with LinkAhead's name property." + ) + return None, force_list + # LinkAhead suports null for all types, so in the very special case of + # `"type": ["null", "<other_type>"]`, only consider the other type: + if isinstance(elt["type"], list) and len(elt["type"]) == 2 and "null" in elt["type"]: + elt["type"].remove("null") + elt["type"] = elt["type"][0] + if "enum" in elt: + ent = self._treat_enum(elt, name) + elif elt["type"] in JSON_SCHEMA_ATOMIC_TYPES: + ent = db.Property( + name=name, datatype=self._get_atomic_datatype(elt)) + elif elt["type"] == "object": + ent = self._treat_record_type(elt, name) + elif elt["type"] == "array": + ent, force_list = self._treat_list(elt, name) + else: + raise NotImplementedError( + f"Cannot parse items of type '{elt['type']}' (yet).") + if "description" in elt and ent.description is None: + # There is a description and it hasn't been set by another + # treat_something function + ent.description = elt["description"] + + if ent is not None: + self.model[name] = ent + return ent, force_list + + def _treat_record_type(self, elt: dict, name: str): + # @review Timm Fitschen 2023-05-25 + rt = db.RecordType(name=name) + if "required" in elt: + required = elt["required"] + else: + required = [] + if "properties" in elt: + for key, prop in elt["properties"].items(): + name = self._get_name_from_property(key, prop) + prop_ent, force_list = self._treat_element(prop, name) + if prop_ent is None: + # Nothing to be appended since the property has to be + # treated specially. + continue + importance = db.OBLIGATORY if key in required else db.RECOMMENDED + if not force_list: + rt.add_property(prop_ent, importance=importance) + else: + # Special case of rt used as a list property + rt.add_property(prop_ent, importance=importance, + datatype=db.LIST(prop_ent)) + + if "patternProperties" in elt: + + pattern_property_rts = self._treat_pattern_properties( + elt["patternProperties"], name_prefix=name) + for ppr in pattern_property_rts: + # add reference to pattern property type. These can never be + # obligatory since pattern properties cannot be required in the + # original schema (since their actual names are not known a + # priori). + rt.add_property(ppr) + + if "description" in elt: + rt.description = elt["description"] + return rt + + def _treat_enum(self, elt: dict, name: str): + # @review Timm Fitschen 2022-02-30 + if "type" in elt and elt["type"] == "integer": + raise NotImplementedError( + "Integer-enums are not allowd until " + "https://gitlab.indiscale.com/caosdb/src/caosdb-server/-/issues/224 " + "has been fixed." + ) + rt = db.RecordType(name=name) + for enum_elt in elt["enum"]: + rec = db.Record(name=self._stringify(enum_elt)) + rec.add_parent(rt) + self.model[enum_elt] = rec + + return rt + + def _treat_list(self, elt: dict, name: str): + # @review Timm Fitschen 2023-05-25 + + if "items" not in elt and name not in self.types_for_missing_array_items: + if self.ignore_unspecified_array_items: + return None, False + raise JsonSchemaDefinitionError( + f"The definition of the list items is missing in {elt}.") + if "items" in elt: + items = elt["items"] + if "enum" in items: + return self._treat_enum(items, name), True + if items["type"] in JSON_SCHEMA_ATOMIC_TYPES: + datatype = db.LIST(self._get_atomic_datatype(items)) + return db.Property(name=name, datatype=datatype), False + if items["type"] == "object": + if "title" not in items or self._stringify(items["title"]) == name: + # Property is RecordType + return self._treat_record_type(items, name), True + else: + # List property will be an entity of its own with a name + # different from the referenced RT + ref_rt = self._treat_record_type( + items, self._stringify(items["title"])) + self.model[ref_rt.name] = ref_rt + return db.Property(name=name, datatype=db.LIST(ref_rt)), False + else: + # Use predefined type: + datatype = db.LIST(self.types_for_missing_array_items[name]) + return db.Property(name=name, datatype=datatype), False + + def _get_pattern_prop(self): + # @review Timm Fitschen 2023-05-25 + if "__pattern_property_pattern_property" in self.model: + return self.model["__pattern_property_pattern_property"] + pp = db.Property(name="__matched_pattern", datatype=db.TEXT) + self.model["__pattern_property_pattern_property"] = pp + return pp + + def _treat_pattern_properties(self, pattern_elements, name_prefix=""): + """Special Treatment for pattern properties: A RecordType is created for + each pattern property. In case of a `type: object` PatternProperty, the + remaining properties of the JSON entry are appended to the new + RecordType; in case of an atomic type PatternProperty, a single value + Property is added to the RecordType. + + Raises + ------ + NotImplementedError + In case of patternProperties with non-object, non-atomic type, e.g., + array. + + """ + # @review Timm Fitschen 2023-05-25 + num_patterns = len(pattern_elements) + pattern_prop = self._get_pattern_prop() + returns = [] + for ii, (key, element) in enumerate(pattern_elements.items()): + if "title" not in element: + name_suffix = f"_{ii+1}" if num_patterns > 1 else "" + name = name_prefix + "Entry" + name_suffix + else: + name = element["title"] + if element["type"] == "object": + # simple, is already an object, so can be treated like any other + # record type. + pattern_type = self._treat_record_type(element, name) + elif element["type"] in JSON_SCHEMA_ATOMIC_TYPES: + # create a property that stores the actual value of the pattern + # property. + propname = f"{name}_value" + prop = db.Property(name=propname, datatype=self._get_atomic_datatype(element)) + self.model[propname] = prop + pattern_type = db.RecordType(name=name) + pattern_type.add_property(prop) + else: + raise NotImplementedError( + "Pattern properties are currently only supported for types " + + ", ".join(JSON_SCHEMA_ATOMIC_TYPES) + ", and object.") + + # Add pattern property and description + pattern_type.add_property(pattern_prop, importance=db.OBLIGATORY) + if pattern_type.description: + pattern_type.description += f"\n\npattern: {key}" + else: + pattern_type.description = f"pattern: {key}" + + self.model[name] = pattern_type + returns.append(pattern_type) + + return returns + + +def main(): + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.RawTextHelpFormatter) + parser.add_argument("data_model", + help="Path name of the data model file (yaml or json) to be used.") + parser.add_argument("--sync", action="store_true", + help="Whether or not to sync the data model with the server.") + parser.add_argument("--noquestion", action="store_true", + help="Whether or not to ask questions during synchronization.") + parser.add_argument("--print", action="store_true", + help="Whether or not to print the data model.") + + args = parser.parse_args() + if args.data_model.endswith(".json"): + model = parse_model_from_json_schema(args.data_model) + elif args.data_model.endswith(".yml") or args.data_model.endswith(".yaml"): + model = parse_model_from_yaml(args.data_model) + else: + raise RuntimeError(f"Unknown file ending of data model: {args.data_model}") + if args.print: + print(model) + if args.sync: + model.sync_data_model(noquestion=args.noquestion) + + +if __name__ == "__main__": + main() diff --git a/src/caosadvancedtools/pandoc_header_tools.py b/src/caosadvancedtools/pandoc_header_tools.py index e0e62c8c75c7759338972b274c5c79617347aeeb..88cdbc19ffa8d5dd724be0e1042fb2a42094c7dc 100644 --- a/src/caosadvancedtools/pandoc_header_tools.py +++ b/src/caosadvancedtools/pandoc_header_tools.py @@ -30,10 +30,7 @@ # D. Hornung 2019-02 # T. Fitschen 2019-02 -import argparse -import glob import os -import re import yaml @@ -71,31 +68,30 @@ description: """ -def get_header(filename, add_header=False): - """Open an md file identified by filename and read out the yaml -header. +def get_header(filename, add_header_to_file=False): + """Open an md file identified by filename and read out the yaml header. -filename can also be a folder. In this case folder/README.md will be used for -getting the header. + filename can also be a folder. In this case folder/README.md will be used + for getting the header. -If a header is found a tuple is returned: (first yaml header line index, last+1 -yaml header line index, header) + If a header is found a tuple is returned: (first yaml header line index, + last+1 yaml header line index, header) -Otherwise, if `add_header` is True, a header is added and the function is called -again. + Otherwise, if `add_header_to_file` is True, a header is added and the + function is called again. -The header is normalized in the following way: + The header is normalized in the following way: -- If the value to a key is a string, a list with that string as only element is - returned. + - If the value to a key is a string, a list with that string as only + element is returned. -From https://pandoc.org/MANUAL.html: - -A YAML metadata block is a valid YAML object, delimited by a line of three -hyphens (---) at the top and a line of three hyphens (---) or three dots (...) -at the bottom. A YAML metadata block may occur anywhere in the document, but if -it is not at the beginning, it must be preceded by a blank line. + From https://pandoc.org/MANUAL.html: + A YAML metadata block is a valid YAML object, delimited by a line of three + hyphens (---) at the top and a line of three hyphens (---) or three + dots (...) at the bottom. A YAML metadata block may occur anywhere in the + document, but if it is not at the beginning, it must be preceded by a blank + line. """ if os.path.isdir(filename): @@ -106,16 +102,14 @@ it is not at the beginning, it must be preceded by a blank line. if not os.path.exists(filename): raise MetadataFileMissing(filename) - with open(filename) as f: + with open(filename, encoding="utf-8") as f: textlines = f.readlines() state = 0 - found_0 = -1 found_1 = -1 found_2 = -1 for i, line in enumerate(textlines): if len(line) == 1 and state in {-1, 0}: - found_0 = i state = 0 continue if line.rstrip() == "---" and state == 0: @@ -144,14 +138,14 @@ it is not at the beginning, it must be preceded by a blank line. try: yaml_part = yaml.load("\n".join(headerlines), Loader=yaml.BaseLoader) except yaml.scanner.ScannerError as e: - raise ParseErrorsInHeader(filename, e) + raise ParseErrorsInHeader(filename, e) from e # except yaml.error.MarkedYAMLError as e: # raise NoValidHeader(filename) if not isinstance(yaml_part, dict): raise NoValidHeader(filename) return (found_1, found_2, clean_header(yaml_part)) - if not add_header: + if not add_header_to_file: raise NoValidHeader(filename) else: print("Adding header in: {fn}".format(fn=filename)) @@ -171,7 +165,7 @@ def save_header(filename, header_data): if os.path.isdir(filename): filename = os.path.join(filename, "README.md") - with open(filename) as f: + with open(filename, encoding="utf-8") as f: textlines = f.readlines() while textlines[header_data[0]] != "...\n": @@ -184,7 +178,7 @@ def save_header(filename, header_data): default_flow_style=False, allow_unicode=True)) - with open(filename, "w") as f: + with open(filename, "w", encoding="utf-8") as f: f.writelines(textlines) @@ -202,7 +196,7 @@ def add_header(filename, header_dict=None): filename = os.path.join(filename, "README.md") if os.path.exists(filename): - with open(filename) as f: + with open(filename, encoding="utf-8") as f: textlines = f.readlines() else: textlines = "" @@ -214,7 +208,7 @@ def add_header(filename, header_dict=None): default_flow_style=False, allow_unicode=True) + "...\n" - with open(filename, "w") as f: + with open(filename, "w", encoding="utf-8") as f: f.write(localheader) f.writelines(textlines) diff --git a/src/caosadvancedtools/scifolder/withreadme.py b/src/caosadvancedtools/scifolder/withreadme.py index 94280b8068d36c0117681b50270f819c7ceb0c8b..faab94cbafb18e1b28e7fc71f1d30eba7325e6f1 100644 --- a/src/caosadvancedtools/scifolder/withreadme.py +++ b/src/caosadvancedtools/scifolder/withreadme.py @@ -156,8 +156,8 @@ class WithREADME(object): for f in sublist] if len(flat_list) == 0: - LOGGER.warn("ATTENTION: the field {} does not reference any " - "known files".format(field.key)) + LOGGER.warning(f"ATTENTION: the field {field.key} does not" + " reference any known files") self.attached_filenames.extend(flat_list) # pylint: disable=no-member diff --git a/src/caosadvancedtools/structure_mapping.py b/src/caosadvancedtools/structure_mapping.py index bf446c2a240ca16610527b38c67f3c29a75eca4b..aba47058cb992587089ac9af3559ca9981aa96f9 100644 --- a/src/caosadvancedtools/structure_mapping.py +++ b/src/caosadvancedtools/structure_mapping.py @@ -25,6 +25,10 @@ from linkahead.common.utils import uuid from .cfood import (assure_has_description, assure_has_parent, assure_property_is) +# The pylint warnings triggered in this file are ignored, as this code is +# assumed to be deprecated in the near future. Should this change, they need +# to be reevaluated. + class EntityMapping(object): """ @@ -39,9 +43,9 @@ class EntityMapping(object): self.to_target = {} def add(self, target, existing): - if target._cuid is None: - target._cuid = str(uuid()) - self.to_existing[str(target._cuid)] = existing + if target.cuid is None: + target._cuid = str(uuid()) # pylint: disable=protected-access + self.to_existing[str(target.cuid)] = existing self.to_target[existing.id] = target @@ -103,11 +107,11 @@ def update_structure(em, updating: db.Container, target_structure: db.Record): A record which may have references to other records. Must be a DAG. """ - if target_structure._cuid in em.to_existing: + if target_structure.cuid in em.to_existing: update_matched_entity(em, updating, target_structure, - em.to_existing[target_structure._cuid]) + em.to_existing[target_structure.cuid]) for prop in target_structure.get_properties(): if prop.is_reference(server_retrieval=True): @@ -134,8 +138,8 @@ def update_matched_entity(em, updating, target_record, existing_record): # check for remaining property types if isinstance(prop.value, db.Entity): - if prop.value._cuid in em.to_existing: - value = em.to_existing[prop.value._cuid].id + if prop.value.cuid in em.to_existing: + value = em.to_existing[prop.value.cuid].id else: value = prop.value.id else: diff --git a/src/caosadvancedtools/suppressKnown.py b/src/caosadvancedtools/suppressKnown.py index 1b31de7e9d8f1fdce35a135d558dd5ceea3bca2a..aada4ef64df6a496c4d1c30006cca23bd3e329c8 100644 --- a/src/caosadvancedtools/suppressKnown.py +++ b/src/caosadvancedtools/suppressKnown.py @@ -28,6 +28,7 @@ class SuppressKnown(logging.Filter): """ def __init__(self, db_file=None): + super().__init__() if db_file: self.db_file = db_file else: diff --git a/src/caosadvancedtools/table_converter.py b/src/caosadvancedtools/table_converter.py index 2f0d4cc9285e43627e75e3971ee41d224254e902..19e6d85f01a38708a8fc0ad4e7b714e05e4630bd 100644 --- a/src/caosadvancedtools/table_converter.py +++ b/src/caosadvancedtools/table_converter.py @@ -25,7 +25,6 @@ import re import sys import linkahead as db -import numpy as np import pandas as pd @@ -74,7 +73,7 @@ def from_table(spreadsheet, recordtype): """ parses a pandas DataFrame to a list of records """ records = db.Container() - for idx, row in spreadsheet.iterrows(): + for _, row in spreadsheet.iterrows(): rec = db.Record() rec.add_parent(name=recordtype) @@ -99,8 +98,7 @@ def from_table(spreadsheet, recordtype): return records -if __name__ == "__main__": - +def main(): p = argparse.ArgumentParser() p.add_argument("-f", "--filename", help="The excel filename") p.add_argument("--auth-token") @@ -111,3 +109,7 @@ if __name__ == "__main__": recordtype = "Experiment" from_tsv(arg.filename, recordtype) + + +if __name__ == "__main__": + main() diff --git a/src/caosadvancedtools/table_export.py b/src/caosadvancedtools/table_export.py index 00e644e436bde7e84043175bdeb050f07b969a3d..32191530fed0eaadbc52bee7ba41ec64fb148df3 100644 --- a/src/caosadvancedtools/table_export.py +++ b/src/caosadvancedtools/table_export.py @@ -123,12 +123,12 @@ class BaseTableExporter(object): self.export_dict = export_dict else: try: - with open(export_dict) as tmp: + with open(export_dict, encoding="utf-8") as tmp: self.export_dict = json.load(tmp) - except BaseException: + except Exception as e: raise ValueError( "export_dict must be either a dictionary" - " or the path to a json file.") + " or the path to a json file.") from e self.record = record self._check_sanity_of_export_dict() self.raise_error_if_missing = raise_error_if_missing @@ -159,7 +159,7 @@ class BaseTableExporter(object): logger.debug(exc) errmssg = "Empty or invalid query '{}' for entry {}".format( q, e) - raise TableExportError(errmssg) + raise TableExportError(errmssg) from exc if val is not None: self.info[e] = val @@ -172,7 +172,7 @@ class BaseTableExporter(object): self.info[e] = val else: self._append_missing(e, d) - except Exception as exc: + except Exception as exc: # pylint: disable=broad-exception-caught self._append_missing(e, d) logger.error(exc) # last resort: check if record has e as property: @@ -189,7 +189,7 @@ class BaseTableExporter(object): errmssg += ", nor does record {} have a property of that name".format( self.record.id) errmssg += "." - raise TableExportError(errmssg) + raise TableExportError(errmssg) from exc if self.missing: errmssg = "The following mandatory entries are missing:\n" diff --git a/src/caosadvancedtools/table_importer.py b/src/caosadvancedtools/table_importer.py index cd1b206f7ebbe7730692a3a6a7137e4aa467a5eb..c2cb0250579cff20a6f7088065a7e9b22bb64ec6 100755 --- a/src/caosadvancedtools/table_importer.py +++ b/src/caosadvancedtools/table_importer.py @@ -110,8 +110,7 @@ def date_converter(val, fmt="%Y-%m-%d"): return datetime_converter(val, fmt=fmt).date() -def incomplete_date_converter(val, fmts={"%Y-%m-%d": "%Y-%m-%d", - "%Y-%m": "%Y-%m", "%Y": "%Y"}): +def incomplete_date_converter(val, fmts=None): """ if the value is already a datetime, it is returned otherwise it converts it using format string @@ -124,6 +123,8 @@ def incomplete_date_converter(val, fmts={"%Y-%m-%d": "%Y-%m-%d", keys are the formats into which the input value is tried to be converted, values are the possible input formats. """ + if fmts is None: + fmts = {"%Y-%m-%d": "%Y-%m-%d", "%Y-%m": "%Y-%m", "%Y": "%Y"} for to, fro in fmts.items(): try: @@ -496,7 +497,7 @@ class XLSImporter(TableImporter): str(e)), extra={'identifier': str(filename), 'category': "inconsistency"}) - raise DataInconsistencyError(*e.args) + raise DataInconsistencyError(*e.args) from e if len(xls_file.sheet_names) > 1: # Multiple sheets is the default now. Only show in debug @@ -514,7 +515,7 @@ class XLSImporter(TableImporter): "Cannot parse {}.\n{}".format(filename, e), extra={'identifier': str(filename), 'category': "inconsistency"}) - raise DataInconsistencyError(*e.args) + raise DataInconsistencyError(*e.args) from e df = self.check_dataframe(df, filename) @@ -536,7 +537,7 @@ class CSVImporter(TableImporter): "Cannot parse {}.\n{}".format(filename, ve), extra={'identifier': str(filename), 'category': "inconsistency"}) - raise DataInconsistencyError(*ve.args) + raise DataInconsistencyError(*ve.args) from ve except TypeError as te: # Iterate through the columns and rows to identify # problematic cells with wrong types. @@ -576,7 +577,7 @@ class CSVImporter(TableImporter): for err in error_list: msg += f" * column \"{err[0]}\": Expected \"{err[1]}\" but found \"{err[2]}\".\n" msg += '\n' - raise DataInconsistencyError(msg) + raise DataInconsistencyError(msg) from te df = self.check_dataframe(df, filename) @@ -584,5 +585,5 @@ class CSVImporter(TableImporter): class TSVImporter(CSVImporter): - def read_file(self, filename, **kwargs): + def read_file(self, filename, **kwargs): # pylint: disable=arguments-differ return super().read_file(filename, sep="\t", **kwargs) diff --git a/src/caosadvancedtools/table_json_conversion/convert.py b/src/caosadvancedtools/table_json_conversion/convert.py index 87427cf842d9213a4e3c335ebfb3a0b862f52439..4b02fa46a8e7a2426118cd987e5d84f906e2dfdb 100644 --- a/src/caosadvancedtools/table_json_conversion/convert.py +++ b/src/caosadvancedtools/table_json_conversion/convert.py @@ -707,8 +707,10 @@ def _set_in_nested(mydict: dict, path: list, value: Any, prefix: list = [], skip return mydict +# ToDo: Fix https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/issues/138 +# and remove pylint disable def to_dict(xlsx: Union[str, BinaryIO], schema: Union[dict, str, TextIO], - validate: bool = None, strict: bool = False) -> dict: + validate: Optional[bool] = None, strict: bool = False) -> dict: """Convert the xlsx contents to a dict, it must follow a schema. Parameters @@ -731,5 +733,9 @@ def to_dict(xlsx: Union[str, BinaryIO], schema: Union[dict, str, TextIO], out: dict A dict representing the JSON with the extracted data. """ + if validate: + raise NotImplementedError( + "For input validation implement " + "https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/issues/138") converter = XLSXConverter(xlsx, schema, strict=strict) return converter.to_dict() diff --git a/src/caosadvancedtools/utils.py b/src/caosadvancedtools/utils.py index 9a0342e991ddd9d7ea5fe907057a1d3d97ecf5ef..f64900c04e8f1d126a7299662a2b529aa9f027b8 100644 --- a/src/caosadvancedtools/utils.py +++ b/src/caosadvancedtools/utils.py @@ -25,17 +25,15 @@ import logging import os -import pathlib import linkahead as db -from linkahead.exceptions import TransactionError +from linkahead.exceptions import TransactionError, BadQueryError logger = logging.getLogger(__name__) -def set_log_level(level): - logger = logging.getLogger(__name__) - logger.setLevel(level=logging.DEBUG) +def set_log_level(level=logging.DEBUG): + logger.setLevel(level=level) def replace_path_prefix(path, old_prefix, new_prefix): @@ -234,9 +232,8 @@ def find_records_that_reference_ids(referenced_ids, rt="", step_size=50): [str(el) for el in subset])) exps = db.execute_query(q_string) record_ids.update([exp.id for exp in exps]) - except Exception as e: + except (TransactionError, BadQueryError) as e: print(e) - pass index += step_size diff --git a/src/caosadvancedtools/webui_formatter.py b/src/caosadvancedtools/webui_formatter.py index c3c5381d96e86a2698cc16d1bf1a2726566dcd7b..43ebbe061cccc2aec2c12f9193731017bad5e156 100644 --- a/src/caosadvancedtools/webui_formatter.py +++ b/src/caosadvancedtools/webui_formatter.py @@ -92,4 +92,4 @@ class WebUI_Formatter(logging.Formatter): return wrap_bootstrap_alert("<b>CRITICAL ERROR:</b> " + text, kind="danger") else: - raise Exception("unknown level") + raise RuntimeError("unknown level") diff --git a/unittests/table_json_conversion/test_read_xlsx.py b/unittests/table_json_conversion/test_read_xlsx.py index 2a81cdc828300401684f14aef535500b6fe2c99a..ff32c6b1203112c6931e98e55de5e4c981167452 100644 --- a/unittests/table_json_conversion/test_read_xlsx.py +++ b/unittests/table_json_conversion/test_read_xlsx.py @@ -43,7 +43,7 @@ def rfp(*pathcomponents): def convert_and_compare(xlsx_file: str, schema_file: str, known_good_file: str, known_good_data: Optional[dict] = None, strict: bool = False, - validate: bool = True) -> dict: + validate: bool = False) -> dict: """Convert an XLSX file and compare to a known result. Exactly one of ``known_good_file`` and ``known_good_data`` should be non-empty. @@ -53,6 +53,8 @@ Returns json: dict The result of the conversion. """ + # FIXME Set default "validate" back to True, after implementation of + # https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/issues/138 result = convert.to_dict(xlsx=xlsx_file, schema=schema_file, validate=validate) if known_good_file: with open(known_good_file, encoding="utf-8") as myfile: diff --git a/unittests/test_json_schema_model_parser.py b/unittests/test_json_schema_model_parser.py index cdd4c07427a5e333ac359f9f2452741e4ed72f68..619714aa6e85d50222252da2b1f7999aceaaa26a 100644 --- a/unittests/test_json_schema_model_parser.py +++ b/unittests/test_json_schema_model_parser.py @@ -357,8 +357,8 @@ def test_name_property(): broken = parse_model_from_json_schema(os.path.join( FILEPATH, "datamodel_name_wrong_type.schema.json")) assert str(err.value).startswith( - "The 'name' property must be string-typed, otherwise it cannot be identified with LinkAhead's " - "name property.") + "The 'name' property must be string-typed, otherwise it cannot be identified with " + "LinkAhead's name property.") def test_no_toplevel_entity():