diff --git a/CHANGELOG.md b/CHANGELOG.md index 490d5179d256e1b17259058c433af3e9e8b0aad0..c1d626abc424c2466bb79243f9e88abdc548dfc5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed ### * `TableImporter.check_missing` in case of array-valued fields in table +* YAML model parser has better description handling. ### Security ### diff --git a/src/caosadvancedtools/json_schema_exporter.py b/src/caosadvancedtools/json_schema_exporter.py index 801a4861d2f6bd1cc16f1b66cffa1bb9c9c5b223..700c24e890c36a5b4219a1c2cc7d74ce38d6d398 100644 --- a/src/caosadvancedtools/json_schema_exporter.py +++ b/src/caosadvancedtools/json_schema_exporter.py @@ -74,7 +74,7 @@ class JsonSchemaExporter: additional `unit` key is added to the schema itself which is purely annotational and ignored, e.g., in validation. Default is True. do_not_create : list[str] - A list of RedcordType names, for which there should be no option + A list of reference Property names, for which there should be no option to create them. Instead, only the choice of existing elements should be given. do_not_retrieve : list[str] @@ -194,9 +194,12 @@ class JsonSchemaExporter: name=prop.name, datatype=get_list_datatype(prop.datatype, strict=True)) json_prop["items"], inner_ui_schema = self._make_segment_from_prop(list_element_prop) if prop.name in self._multiple_choice and prop.name in self._do_not_create: + # TODO: if not multiple_choice, but do_not_create: + # "ui:widget" = "radio" & "ui:inline" = true + # TODO: set threshold for number of items. json_prop["uniqueItems"] = True ui_schema["ui:widget"] = "checkboxes" - ui_schema["ui:options"] = {"inline": True} + ui_schema["ui:inline"] = True if inner_ui_schema: ui_schema["items"] = inner_ui_schema elif prop.is_reference(): @@ -212,6 +215,7 @@ class JsonSchemaExporter: is_list_datatype(prop.datatype) and get_list_datatype(prop.datatype, strict=True) == db.FILE ): + # Singular FILE (wrapped or unwrapped), or wrapped LIST<FILE> if self._wrap_files_in_objects: # Workaround for react-jsonschema-form bug # https://github.com/rjsf-team/react-jsonschema-form/issues/3957: @@ -244,11 +248,11 @@ class JsonSchemaExporter: prop_name = prop.datatype if isinstance(prop.datatype, db.Entity): prop_name = prop.datatype.name - if prop_name in self._do_not_retrieve: + if prop.name in self._do_not_retrieve: values = [] else: values = self._retrieve_enum_values(f"RECORD '{prop_name}'") - if prop_name in self._do_not_create: + if prop.name in self._do_not_create: # Only a simple list of values json_prop["enum"] = values else: @@ -367,6 +371,8 @@ class JsonSchemaExporter: schema["required"] = self._make_required_list(rt) schema["additionalProperties"] = self._additional_properties + if rt.description: + schema["description"] = rt.description if rt.name: schema["title"] = rt.name @@ -509,7 +515,7 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T additional `unit` key is added to the schema itself which is purely annotational and ignored, e.g., in validation. Default is True. do_not_create : list[str], optional - A list of RedcordType names, for which there should be no option + A list of reference Property names, for which there should be no option to create them. Instead, only the choice of existing elements should be given. do_not_retrieve : list[str], optional diff --git a/src/caosadvancedtools/models/data_model.py b/src/caosadvancedtools/models/data_model.py index 27f60b5ec877c2ad7646fffd4ef735ebb62c1694..266414893bcdf1ab45ee1345fc549e15f4a66250 100644 --- a/src/caosadvancedtools/models/data_model.py +++ b/src/caosadvancedtools/models/data_model.py @@ -31,7 +31,7 @@ from typing import List import linkahead as db import linkahead.common.models as models -from linkahead.apiutils import compare_entities, describe_diff +from linkahead.apiutils import compare_entities, describe_diff, merge_entities CAOSDB_INTERNAL_PROPERTIES = [ @@ -264,7 +264,7 @@ class DataModel(dict): return list(all_ents.values()) - def get_deep(self, name: str, visited_props: set = None, visited_parents: set = None): + def get_deep(self, name: str, visited_props: dict = None, visited_parents: set = None): """Attempt to resolve references for the given ``name``. The returned entity has all the properties it inherits from its ancestry and all properties @@ -279,7 +279,7 @@ class DataModel(dict): if not entity: return entity if not visited_props: - visited_props = set() + visited_props = {} if not visited_parents: visited_parents = set() @@ -309,8 +309,14 @@ class DataModel(dict): for prop in list(entity.get_properties()): # Make a change-resistant list copy. if prop.name in visited_props: + if visited_props[prop.name]: + deep_prop = visited_props[prop.name] + merge_entities(prop, deep_prop) + prop.datatype = deep_prop.datatype + prop.value = deep_prop.value + prop.unit = deep_prop.unit continue - visited_props.add(prop.name) + visited_props[prop.name] = None if prop.name in self: deep_prop = self.get_deep(prop.name, visited_props=visited_props, visited_parents=visited_parents) @@ -322,6 +328,7 @@ class DataModel(dict): linked_prop.datatype = deep_prop if deep_prop.description: linked_prop.description = deep_prop.description + visited_props[prop.name] = deep_prop else: print(f"Referenced property \"{prop.name}\" not found in data model.") diff --git a/src/caosadvancedtools/models/parser.py b/src/caosadvancedtools/models/parser.py index ba63c5cd77217352144e4ec26e052cc2772339ce..37f34e7bcbae48188c96b9bea6434d59571020fd 100644 --- a/src/caosadvancedtools/models/parser.py +++ b/src/caosadvancedtools/models/parser.py @@ -46,8 +46,9 @@ from typing import List, Optional from warnings import warn import jsonschema -import caosdb as db +import linkahead as db +from linkahead.common.datatype import get_list_datatype from .data_model import CAOSDB_INTERNAL_PROPERTIES, DataModel # Keywords which are allowed in data model descriptions. @@ -82,23 +83,6 @@ JSON_SCHEMA_ATOMIC_TYPES = [ ] -def _get_listdatatype(dtype): - """matches a string to check whether the type definition is a list - - returns the type within the list or None, if it cannot be matched with a - list definition - """ - # TODO: string representation should be the same as used by the server: - # e.g. LIST<TEXT> - # this should be changed in the module and the old behavour should be - # marked as depricated - match = re.match(r"^LIST[(<](?P<dt>.*)[)>]$", dtype) - - if match is None: - return None - else: - return match.group("dt") - # Taken from https://stackoverflow.com/a/53647080, CC-BY-SA, 2018 by # https://stackoverflow.com/users/2572431/augurar @@ -140,7 +124,7 @@ class JsonSchemaDefinitionError(RuntimeError): super().__init__(msg) -def parse_model_from_yaml(filename, existing_model: Optional[dict] = None): +def parse_model_from_yaml(filename, existing_model: Optional[dict] = None, debug: bool = False): """Parse a data model from a YAML file. This is a convenience function if the Parser object is not needed, it calls @@ -152,13 +136,16 @@ Parameters existing_model : dict, optional An existing model to which the created model shall be added. + +debug : bool, optional + If True, turn on miscellaneous debugging. Default is False. """ - parser = Parser() + parser = Parser(debug=debug) return parser.parse_model_from_yaml(filename, existing_model=existing_model) -def parse_model_from_string(string, existing_model: Optional[dict] = None): +def parse_model_from_string(string, existing_model: Optional[dict] = None, debug: bool = False): """Parse a data model from a YAML string This is a convenience function if the Parser object is not needed, it calls @@ -169,8 +156,11 @@ Parameters existing_model : dict, optional An existing model to which the created model shall be added. + +debug : bool, optional + If True, turn on miscellaneous debugging. Default is False. """ - parser = Parser() + parser = Parser(debug=debug) return parser.parse_model_from_string(string, existing_model=existing_model) @@ -232,13 +222,20 @@ def parse_model_from_json_schema( class Parser(object): - def __init__(self): + def __init__(self, debug: bool = False): """Initialize an empty parser object and initialize the dictionary of entities and the list of treated elements. +Parameters +---------- + +debug : bool, optional + If True, turn on miscellaneous debugging. Default is False. + """ self.model = {} self.treated = [] + self.debug = debug def parse_model_from_yaml(self, filename, existing_model: Optional[dict] = None): """Create and return a data model from the given file. @@ -399,8 +396,8 @@ class Parser(object): # is it a property and "datatype" in definition # but not simply an RT of the model - and not (_get_listdatatype(definition["datatype"]) == name and - _get_listdatatype(definition["datatype"]) in self.model)): + and not (get_list_datatype(definition["datatype"]) == name and + get_list_datatype(definition["datatype"]) in self.model)): # and create the new property self.model[name] = db.Property(name=name, @@ -450,6 +447,9 @@ class Parser(object): raise YamlDefinitionError(line) from None raise + if self.debug and self.model[name] is not None: + self.model[name].__line__ = definition["__line__"] + def _add_to_recordtype(self, ent_name, props, importance): """Add properties to a RecordType. @@ -483,9 +483,9 @@ class Parser(object): n = self._stringify(n) if isinstance(e, dict): - if "datatype" in e and _get_listdatatype(e["datatype"]) is not None: + if "datatype" in e and get_list_datatype(e["datatype"]) is not None: # Reuse the existing datatype for lists. - datatype = db.LIST(_get_listdatatype(e["datatype"])) + datatype = db.LIST(get_list_datatype(e["datatype"])) else: # Ignore a possible e["datatype"] here if it's not a list # since it has been treated in the definition of the @@ -507,6 +507,9 @@ class Parser(object): def _inherit(self, name, prop, inheritance): if not isinstance(prop, list): + if isinstance(prop, str): + raise YamlDefinitionError( + f"Parents must be a list but is given as string: {name} > {prop}") raise YamlDefinitionError("Parents must be a list, error in line {}".format( prop["__line__"])) @@ -530,9 +533,13 @@ class Parser(object): if not isinstance(definition, dict): return - if ("datatype" in definition - and definition["datatype"].startswith("LIST")): + # These definition items must be handled even for list props. + for prop_name, prop in definition.items(): + if prop_name == "description": + self.model[name].description = prop + # For lists, everything else is not needed at this level. + if ("datatype" in definition and definition["datatype"].startswith("LIST")): return if name in self.treated: @@ -550,7 +557,8 @@ class Parser(object): self.model[name].value = prop elif prop_name == "description": - self.model[name].description = prop + # Handled above + continue elif prop_name == "recommended_properties": self._add_to_recordtype( @@ -624,15 +632,19 @@ class Parser(object): dtype = value.datatype is_list = False - if _get_listdatatype(value.datatype) is not None: - dtype = _get_listdatatype(value.datatype) + if get_list_datatype(dtype) is not None: + dtype = get_list_datatype(dtype) is_list = True - if dtype in self.model: + dtype_name = dtype + if not isinstance(dtype_name, str): + dtype_name = dtype.name + + if dtype_name in self.model: if is_list: - value.datatype = db.LIST(self.model[dtype]) + value.datatype = db.LIST(self.model[dtype_name]) else: - value.datatype = self.model[dtype] + value.datatype = self.model[dtype_name] continue @@ -654,7 +666,7 @@ class Parser(object): continue raise ValueError("Property {} has an unknown datatype: {}".format( - value.name, value.datatype)) + value.name, dtype_name)) def _set_recordtypes(self): """ properties are defined in first iteration; set remaining as RTs """ diff --git a/unittests/test_data_model.py b/unittests/test_data_model.py index 5aa151b2891fd335959098202f35de1152c1b16b..cafeb6ca6a43d7e0409aee3352b43f26d5208732 100644 --- a/unittests/test_data_model.py +++ b/unittests/test_data_model.py @@ -62,3 +62,21 @@ RT1: rt1_deep = model_unresolved.get_deep("RT1") assert rt1_deep == rt1_unresolved assert rt1_deep is rt1_unresolved + + model_double_property = """ +p1: + description: Hello world + datatype: TEXT +RT1: + recommended_properties: + p1: +RT2: + recommended_properties: + RT1: + p1: +""" + model_unresolved = parse_model_from_string(model_double_property) + rt2_deep = model_unresolved.get_deep("RT2") + p1 = rt2_deep.get_property("p1") + assert p1.datatype == "TEXT" + assert p1.description == "Hello world" diff --git a/unittests/test_json_schema_exporter.py b/unittests/test_json_schema_exporter.py index 5b3cd41635a0ecef6a3966fedaa20da5109a3cbd..f0503385a25eb89e66dd3518d71a32b91d07bf88 100644 --- a/unittests/test_json_schema_exporter.py +++ b/unittests/test_json_schema_exporter.py @@ -36,14 +36,26 @@ from unittest.mock import Mock, patch from caosadvancedtools.json_schema_exporter import recordtype_to_json_schema as rtjs from caosadvancedtools.models.parser import parse_model_from_string -RT1 = parse_model_from_string(""" +GLOBAL_MODEL = parse_model_from_string(""" RT1: description: some description obligatory_properties: some_date: datatype: DATETIME description: Just some date -""").get_deep("RT1") +RT21: + obligatory_properties: + RT1: + datatype: LIST<RT1> +RT31: + obligatory_properties: + RT1: + +""") + +RT1 = GLOBAL_MODEL.get_deep("RT1") +RT21 = GLOBAL_MODEL.get_deep("RT21") +RT31 = GLOBAL_MODEL.get_deep("RT31") def _mock_execute_query(query_string, unique=False, **kwargs): @@ -82,24 +94,15 @@ def _mock_execute_query(query_string, unique=False, **kwargs): elif query_string == "FIND RECORDTYPE WITH name='RT1'" and unique is True: return RT1 elif query_string == "FIND RECORDTYPE WITH name='RT21'" and unique is True: - model_str = """ -RT1: -RT21: - obligatory_properties: - RT1: - datatype: LIST<RT1> -RT3: - obligatory_properties: - RT21: - """ - model = parse_model_from_string(model_str) - return model.get_deep("RT21") + return RT21 + elif query_string == "FIND RECORDTYPE WITH name='RT31'" and unique is True: + return RT31 elif query_string == "SELECT name, id FROM RECORD": return all_records elif query_string == "SELECT name, id FROM FILE": return all_files else: - # print(f"Query string: {query_string}") + print(f"Query string: {query_string}") if unique is True: return db.Entity() return db.Container() @@ -632,6 +635,12 @@ RT1: RT2: obligatory_properties: RT1: + +RT3: + obligatory_properties: + RT1_prop: + datatype: RT1 + description: property description """ model = parse_model_from_string(model_str) # First test: without reference @@ -642,6 +651,7 @@ RT2: "some_date" ], "additionalProperties": true, + "description": "some description", "title": "RT1", "properties": { "some_date": { @@ -658,8 +668,7 @@ RT2: ] } }, - "$schema": "https://json-schema.org/draft/2020-12/schema", - "description": "some description" + "$schema": "https://json-schema.org/draft/2020-12/schema" }""" # Second test: with reference rt2_deep = model.get_deep("RT2") @@ -688,6 +697,7 @@ RT2: "some_date" ], "additionalProperties": true, + "description": "some description", "title": "Create new", "properties": { "some_date": { @@ -731,6 +741,20 @@ RT2: }, "$schema": "https://json-schema.org/draft/2020-12/schema" }""" + # No effect of do_not_create (real property name should be used) + rt3_dict = rtjs(model.get_deep("RT3"), do_not_create=["RT1"]) + rt1_prop = rt3_dict["properties"]["RT1_prop"] + assert rt1_prop["description"] == "property description" + assert "oneOf" in rt1_prop.keys() + assert "enum" not in rt1_prop.keys() + + # Now we use the real property name + rt3_dict = rtjs(model.get_deep("RT3"), do_not_create=["RT1_prop"]) + rt1_prop = rt3_dict["properties"]["RT1_prop"] + assert rt1_prop["description"] == "property description" + assert "oneOf" not in rt1_prop.keys() + assert "enum" in rt1_prop.keys() + assert rt1_prop["enum"][0] == "103" def test_schema_modification(): @@ -890,14 +914,14 @@ RT4: schema, uischema = rtjs(model.get_deep("RT21"), additional_properties=False, do_not_create=["RT1"], multiple_choice=["RT1"], rjsf=True) assert schema["properties"]["RT1"]["uniqueItems"] is True - assert str(uischema) == "{'RT1': {'ui:widget': 'checkboxes', 'ui:options': {'inline': True}}}" + assert str(uischema) == "{'RT1': {'ui:widget': 'checkboxes', 'ui:inline': True}}" # second level schema, uischema = rtjs(model.get_deep("RT3"), additional_properties=False, do_not_create=["RT1"], multiple_choice=["RT1"], rjsf=True) assert schema["properties"]["RT21"]["properties"]["RT1"]["uniqueItems"] is True assert (str(uischema) - == "{'RT21': {'RT1': {'ui:widget': 'checkboxes', 'ui:options': {'inline': True}}}}") + == "{'RT21': {'RT1': {'ui:widget': 'checkboxes', 'ui:inline': True}}}") # second level with lists schema, uischema = rtjs(model.get_deep("RT4"), additional_properties=False, @@ -905,7 +929,7 @@ RT4: assert schema["properties"]["RT21"]["items"]["properties"]["RT1"]["uniqueItems"] is True assert (str(uischema) == "{'RT21': {'items': {'RT1': {'ui:widget': 'checkboxes', " - "'ui:options': {'inline': True}}}}}") + "'ui:inline': True}}}}") @patch("linkahead.execute_query", new=Mock(side_effect=_mock_execute_query)) @@ -934,7 +958,7 @@ RT3: merged_dict, merged_dict_ui = jsex.merge_schemas(schemas_dict, uischemas_dict) assert merged_dict_ui["schema_2"] == merged_dict_ui["schema_3"] assert (str(merged_dict_ui["schema_2"]) - == "{'RT1': {'ui:widget': 'checkboxes', 'ui:options': {'inline': True}}}") + == "{'RT1': {'ui:widget': 'checkboxes', 'ui:inline': True}}") # Using lists schemas_list = [schema_2, schema_3] @@ -955,9 +979,10 @@ RT3: assert array2["items"] == schema_2 assert array2_ui["items"] == uischema_2 assert (str(array2_ui["items"]) - == "{'RT1': {'ui:widget': 'checkboxes', 'ui:options': {'inline': True}}}") + == "{'RT1': {'ui:widget': 'checkboxes', 'ui:inline': True}}") +@patch("linkahead.execute_query", new=Mock(side_effect=_mock_execute_query)) def test_schema_customization_with_dicts(): """Testing the ``additional_json_schema`` and ``additional_ui_schema`` parameters.""" model_str = """ diff --git a/unittests/test_yaml_model_parser.py b/unittests/test_yaml_model_parser.py index a26cdf034eea80df0caf64a73ba0d715f66c31db..1019a93a0aa4292cea75fe8fba57e19e55359baa 100644 --- a/unittests/test_yaml_model_parser.py +++ b/unittests/test_yaml_model_parser.py @@ -19,9 +19,9 @@ import unittest from datetime import date from tempfile import NamedTemporaryFile -from pytest import deprecated_call, raises +from pytest import deprecated_call, raises, mark -import caosdb as db +import linkahead as db from caosadvancedtools.models.parser import (TwiceDefinedException, YamlDefinitionError, parse_model_from_string, @@ -302,10 +302,12 @@ A: def test_reference_property(self): """Test correct creation of reference property using an RT.""" - modeldef = """A: + modeldef = """ +A: recommended_properties: ref: datatype: LIST<A> + description: new description """ model = parse_model_from_string(modeldef) self.assertEqual(len(model), 2) @@ -315,6 +317,7 @@ A: elif key == "ref": self.assertTrue(isinstance(value, db.Property)) self.assertEqual(value.datatype, "LIST<A>") + assert value.description == "new description" class ExternTest(unittest.TestCase): @@ -566,3 +569,31 @@ def test_yaml_error(): with raises(ValueError, match=r"line 2: .*"): parse_model_from_yaml("unittests/models/model_invalid.yml") + + +def test_inherit_error(): + """Must fail with an understandable exception.""" + model_string = """ +prop1: + inherit_from_obligatory: prop2 + """ + with raises(YamlDefinitionError, + match=r"Parents must be a list but is given as string: prop1 > prop2"): + parse_model_from_string(model_string) + + +@mark.xfail(reason="""Issue is + https://gitlab.com/linkahead/linkahead-advanced-user-tools/-/issues/57""") +def test_inherit_properties(): + # TODO Is not even specified yet. + model_string = """ +prop1: + datatype: DOUBLE +prop2: +# role: Property + inherit_from_obligatory: + - prop1 + """ + model = parse_model_from_string(model_string) + prop2 = model["prop2"] + assert prop2.role == "Property"