diff --git a/CHANGELOG.md b/CHANGELOG.md index d97250f2dfa1f9c7d7bdd4694ee2bbdc5f129d2b..3b7e7185db718d9dfdcec32cf4922a1b404d9c75 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed ### +- Yaml data model parser adds data types of properties of record types and other attributes which fixes https://gitlab.indiscale.com/caosdb/customers/f-fit/management/-/issues/58 + ### Security ### ### Documentation ### diff --git a/src/caosadvancedtools/json_schema_exporter.py b/src/caosadvancedtools/json_schema_exporter.py index 5daa5a4ec0543a4ff00c7512e22598b6fe0d50ae..7a92eaad8aeb55ae5c34e529de4848ce4a12b0f9 100644 --- a/src/caosadvancedtools/json_schema_exporter.py +++ b/src/caosadvancedtools/json_schema_exporter.py @@ -58,8 +58,9 @@ from collections import OrderedDict from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union import linkahead as db -from linkahead.cached import cached_query, cache_clear +from linkahead.cached import cache_clear, cached_query from linkahead.common.datatype import get_list_datatype, is_list_datatype + from .models.data_model import DataModel @@ -322,6 +323,10 @@ ui_schema : dict rt = results[0] else: rt = db.Entity() + + if isinstance(rt, str): + raise NotImplementedError("Behavior is not implemented when _no_remote == True and datatype is given as a string.") + subschema, ui_schema = self._make_segment_from_recordtype(rt) if prop.is_reference(): if prop.name: @@ -431,6 +436,7 @@ ui_schema : dict } } """ + schema: OrderedDict[str, Any] = OrderedDict({ "type": "object" }) diff --git a/src/caosadvancedtools/models/data_model.py b/src/caosadvancedtools/models/data_model.py index 4d4cada20c9902c834aab0c481ccefbc856fc77b..92afb7ebd0f225d7cf84e5a977cf05c975ccf160 100644 --- a/src/caosadvancedtools/models/data_model.py +++ b/src/caosadvancedtools/models/data_model.py @@ -111,6 +111,7 @@ class DataModel(dict): existing_entities = db.Container().extend( DataModel.entities_without( self.values(), [e.name.lower() for e in non_existing_entities])) + self.sync_ids_by_name(tmp_exist) if len(non_existing_entities) > 0: @@ -176,7 +177,7 @@ class DataModel(dict): Args ---- entities : iterable - The entities to be retrieved. This object will not be moidified. + The entities to be retrieved. This object will not be modified. Raises ------ diff --git a/src/caosadvancedtools/models/parser.py b/src/caosadvancedtools/models/parser.py index e74de5ac5ea32875bd2b80f69b980d1cf6f177f1..b1e3aa95009c92db5d78471b3b384d9f992a5e1a 100644 --- a/src/caosadvancedtools/models/parser.py +++ b/src/caosadvancedtools/models/parser.py @@ -37,18 +37,17 @@ to be a list with the names. Here, NO NEW entities can be defined. """ import argparse import json -import jsonref import re import sys -import yaml - from typing import List, Optional from warnings import warn +import jsonref import jsonschema import linkahead as db - +import yaml from linkahead.common.datatype import get_list_datatype + from .data_model import LINKAHEAD_INTERNAL_PROPERTIES, DataModel # Keywords which are allowed in data model descriptions. @@ -341,6 +340,33 @@ debug : bool, optional f"invalid keyword in line {entity['__line__']}:", 1) raise ValueError(err_str, *err.args[1:]) from err +# Update properties that are part of record types: +# e.g. add their datatypes, units etc.. +# Otherwise comparison of existing models and the parsed model become difficult. + for name, ent in self.model.items(): + if not isinstance(ent, db.RecordType): + continue + props = ent.get_properties() + for prop in props: + if prop.name in self.model: + model_prop = self.model[prop.name] + # The information must be missing, we don't want to overwrite it accidentally: + if prop.datatype is None: + if isinstance(model_prop, db.RecordType): + prop.datatype = model_prop.name + else: + prop.datatype = model_prop.datatype + # TODO: Data type overwrite is allowed here (because + # of lists), but this might change in the future. + # elif prop.datatype != model_prop.datatype: + # raise RuntimeError("datatype must not be set, here. This is probably a bug.") + if prop.unit is None: + # No unit for plain reference properties + if not isinstance(model_prop, db.RecordType): + prop.unit = model_prop.unit + if prop.description is None: + prop.description = model_prop.description + return DataModel(self.model.values()) @staticmethod @@ -470,6 +496,7 @@ debug : bool, optional """ for n, e in props.items(): + if n in KEYWORDS: if n in KEYWORDS_IGNORED: continue @@ -543,6 +570,13 @@ debug : bool, optional if name in self.treated: raise TwiceDefinedException(name) + # for reducing a little bit of code duplication: + importance_dict = { + "recommended_properties": db.RECOMMENDED, + "obligatory_properties": db.OBLIGATORY, + "suggested_properties": db.SUGGESTED + } + for prop_name, prop in definition.items(): if prop_name == "__line__": continue @@ -558,26 +592,14 @@ debug : bool, optional # Handled above continue - elif prop_name == "recommended_properties": - self._add_to_recordtype( - name, prop, importance=db.RECOMMENDED) - - for n, e in prop.items(): - self._treat_entity(n, e) - - elif prop_name == "obligatory_properties": - self._add_to_recordtype( - name, prop, importance=db.OBLIGATORY) - - for n, e in prop.items(): - self._treat_entity(n, e) - - elif prop_name == "suggested_properties": - self._add_to_recordtype( - name, prop, importance=db.SUGGESTED) + elif prop_name in importance_dict: + for imp_name, imp_val in importance_dict.items(): + if prop_name == imp_name: + self._add_to_recordtype( + name, prop, importance=imp_val) - for n, e in prop.items(): - self._treat_entity(n, e) + for n, e in prop.items(): + self._treat_entity(n, e) # datatype is already set elif prop_name == "datatype": diff --git a/unittests/test_data_model.py b/unittests/test_data_model.py index 354e0bf6f399337ec066f95d5fbe5e7593b56fbb..174f15fcfd4018f200ce98958a7a9f2760f97898 100644 --- a/unittests/test_data_model.py +++ b/unittests/test_data_model.py @@ -44,7 +44,10 @@ RT1: """ model_recursive = parse_model_from_string(model_recursive_str) prop1 = model_recursive["RT1"].get_property("RT1") - assert prop1.datatype is None + + assert prop1.datatype is not None + assert prop1.datatype == "RT1" + # TODO The next line actually changes model_recursive in place, is this OK? RT1 = model_recursive.get_deep("RT1") assert model_recursive["RT1"] == RT1 diff --git a/unittests/test_json_schema_exporter.py b/unittests/test_json_schema_exporter.py index fd6dbf7cd115068c2c172b33a25a24f33a6d373c..3b37f638fc0115fd47757e3bb0afb52c84b66b3e 100644 --- a/unittests/test_json_schema_exporter.py +++ b/unittests/test_json_schema_exporter.py @@ -23,18 +23,16 @@ """Tests the Json schema exporter.""" import json - -import linkahead as db -import caosadvancedtools.json_schema_exporter as jsex - from collections import OrderedDict - -from jsonschema import FormatChecker, validate, ValidationError -from pytest import raises from unittest.mock import Mock, patch -from caosadvancedtools.json_schema_exporter import recordtype_to_json_schema as rtjs +import caosadvancedtools.json_schema_exporter as jsex +import linkahead as db +from caosadvancedtools.json_schema_exporter import \ + recordtype_to_json_schema as rtjs from caosadvancedtools.models.parser import parse_model_from_string +from jsonschema import FormatChecker, ValidationError, validate +from pytest import raises GLOBAL_MODEL = parse_model_from_string(""" RT1: @@ -920,10 +918,12 @@ RT3: schema_noexist = rtjs(model.get_deep("RT3")) assert schema_noexist["properties"]["NoRecords"].get("type") == "object" - schema_noexist_noremote = rtjs(model.get_deep("RT3"), no_remote=True) - assert schema_noexist_noremote["properties"]["NoRecords"].get("type") == "object" - assert (schema_noexist_noremote["properties"]["NoRecords"].get("properties") - == OrderedDict([('some_text', {'type': 'string'})])) + with raises(NotImplementedError, + match="Behavior is not implemented when _no_remote == True and datatype is given as a string."): + schema_noexist_noremote = rtjs(model.get_deep("RT3"), no_remote=True) + assert schema_noexist_noremote["properties"]["NoRecords"].get("type") == "object" + assert (schema_noexist_noremote["properties"]["NoRecords"].get("properties") + == OrderedDict([('some_text', {'type': 'string'})])) uischema = {} schema_noexist_noretrieve = rtjs(model.get_deep("RT2"), do_not_retrieve=["RT1"], diff --git a/unittests/test_yaml_model_parser.py b/unittests/test_yaml_model_parser.py index f8e275078fbd53960673b1241f6bfe7c712c064b..8728e128f11503e607c856b0cb91ff7cb58fdaa7 100644 --- a/unittests/test_yaml_model_parser.py +++ b/unittests/test_yaml_model_parser.py @@ -19,17 +19,16 @@ import unittest from datetime import date from tempfile import NamedTemporaryFile -from pytest import raises, mark - from unittest.mock import Mock -import linkahead as db - import caosadvancedtools +import linkahead as db from caosadvancedtools.models.parser import (TwiceDefinedException, YamlDefinitionError, parse_model_from_string, parse_model_from_yaml) +from linkahead.apiutils import compare_entities +from pytest import mark, raises def to_file(string): @@ -613,14 +612,13 @@ RT2: obligatory_properties: *RT1_oblig """ model = parse_model_from_string(model_string) - assert str(model) == """{'foo': <Property name="foo" datatype="INTEGER"/> -, 'RT1': <RecordType name="RT1"> - <Property name="foo" importance="OBLIGATORY" flag="inheritance:FIX"/> -</RecordType> -, 'RT2': <RecordType name="RT2"> - <Property name="foo" importance="OBLIGATORY" flag="inheritance:FIX"/> -</RecordType> -}""" + + assert len(model) == 3 + assert isinstance(model["foo"], db.Property) + assert model["foo"].datatype == db.INTEGER + for st in ("RT1", "RT2"): + assert isinstance(model[st], db.RecordType) + assert model[st].get_property("foo").datatype == db.INTEGER # Aliasing with override model_string = """ @@ -635,16 +633,125 @@ RT2: bar: """ model = parse_model_from_string(model_string) - assert str(model) == """{'foo': <Property name="foo" datatype="INTEGER"/> -, 'RT1': <RecordType name="RT1"> - <Property name="foo" importance="OBLIGATORY" flag="inheritance:FIX"/> -</RecordType> -, 'RT2': <RecordType name="RT2"> - <Property name="foo" importance="OBLIGATORY" flag="inheritance:FIX"/> - <Property name="bar" importance="OBLIGATORY" flag="inheritance:FIX"/> -</RecordType> -, 'bar': <RecordType name="bar"/> -}""" + + assert len(model) == 4 + assert isinstance(model["bar"], db.RecordType) + for st in ("RT1", "RT2"): + assert isinstance(model[st], db.RecordType) + assert model[st].get_property("foo").datatype == db.INTEGER + assert model["RT2"].get_property("bar").datatype == "bar" + + +def test_comparison_yaml_model(capfd): + """ + Test for this issue: + https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/issues/130 + """ + model_string = """ +foo: + datatype: INTEGER + description: bla bla + unit: m + +RT1: + obligatory_properties: + foo: + RT2: + datatype: LIST<RT2> + test_reference: + +RT2: + description: Describe RT2 + +test_reference: + datatype: RT2 + """ + model = parse_model_from_string(model_string) + + # Without the fix, foo will have no datatype, description and no unit **as part of RT1**, so the + # comparison with a version taken from a LinkAhead instance will have these attributes. + # Furthermore, RT2 will be set as the datatype **in object version** in the yaml definition, while + # it is an ID in case of the version from the LinkAhead instance. + + server_response = """ +<Entities> + <noscript> + </noscript> + <Property id="2272" name="foo" description="bla bla" datatype="INTEGER" unit="m"> + <Version id="7819eedaeba2aa7305e10c96e8cf7b9ac84aea4a" head="true"/> + </Property> + <RecordType id="2273" name="RT1"> + <Version id="0c1b9df6677ee40d1e1429b2123e078ee6c863e0" head="true"/> + <Property id="2272" name="foo" description="bla bla" datatype="INTEGER" unit="m" importance="OBLIGATORY" flag="inheritance:FIX"/> + <Property id="2274" name="RT2" description="Describe RT2" datatype="LIST<RT2>" importance="OBLIGATORY" flag="inheritance:FIX"/> + <Property id="2275" name="test_reference" datatype="RT2" importance="OBLIGATORY" flag="inheritance:FIX"/> + </RecordType> + <RecordType id="2274" name="RT2" description="Describe RT2"> + <Version id="185940642680a7eba7f71914dd8dd7758dd13faa" head="true"/> + </RecordType> + <Property id="2275" name="test_reference" datatype="RT2"> + <Version id="03cf86061c78a079b376394dfecdf32566b72fb7" head="true"/> + </Property> +</Entities>""" + + entities = db.Container.from_xml(server_response) + + c1 = compare_entities(model["foo"], entities[0]) + c2 = compare_entities(model["RT1"], entities[1]) + c3 = compare_entities(model["RT2"], entities[2]) + c4 = compare_entities(model["test_reference"], entities[3]) + + # Make sure the mock response matches the datamodel definiton + # exactly, i.e., they only differ in ids which are None for all + # entities from the datamodel and not None for the mocked + # response. + for cs in (c1, c2, c3, c4): + assert "id" in cs[0] + assert cs[0]["id"] is None + assert cs[0]["parents"] == [] + for name, val in cs[0]["properties"].items(): + # Also properties differ in ids: The one from the + # datamodel have None + assert len(val) == 1 + assert "id" in val + assert val["id"] is None + assert "id" in cs[1] + assert cs[1]["id"] is not None + assert cs[1]["parents"] == [] + for name, val in cs[1]["properties"].items(): + # Also properties differ in ids: The one from the + # mock response have not None + assert len(val) == 1 + assert "id" in val + assert val["id"] is not None + + # The server response would be the same as the xml above: + + def get_existing_entities(ent_cont): + return entities + + class MockQuery: + def __init__(self, q): + self.q = q + + def execute(self, unique=True): + id = int(self.q.split("=")[1]) + for existing_ent in entities: + if existing_ent.id == id: + return existing_ent + return None + + model.get_existing_entities = get_existing_entities + caosadvancedtools.models.parser.db.Query = MockQuery + caosadvancedtools.models.parser.db.Container.update = Mock() + caosadvancedtools.models.parser.db.Container.insert = Mock() + + model.sync_data_model(True, True) + assert not caosadvancedtools.models.parser.db.Container.update.called + assert not caosadvancedtools.models.parser.db.Container.insert.called + output, err = capfd.readouterr() + assert "No new entities." in output + assert "No differences found. No update" in output def test_sync_output(capfd): @@ -655,10 +762,11 @@ RT: datatype: TEXT """) - existing_entities = [db.RecordType(name="RT", id=25).add_property(name="identifier", - datatype="INTEGER", - importance="OBLIGATORY", - id=24), + existing_entities = [db.RecordType( + name="RT", id=25).add_property(name="identifier", + datatype="INTEGER", + importance="OBLIGATORY", + id=24), db.Property(name="identifier", datatype="INTEGER", id=24)] def get_existing_entities(ent_cont): @@ -678,9 +786,11 @@ RT: model.get_existing_entities = get_existing_entities caosadvancedtools.models.parser.db.Query = MockQuery caosadvancedtools.models.parser.db.Container.update = Mock() + caosadvancedtools.models.parser.db.Container.insert = Mock() model.sync_data_model(True, True) assert caosadvancedtools.models.parser.db.Container.update.called + assert not caosadvancedtools.models.parser.db.Container.insert.called output, err = capfd.readouterr() print(output) assert "version from the yaml file: TEXT" in output