diff --git a/src/caosadvancedtools/models/parser.py b/src/caosadvancedtools/models/parser.py index e56a492fa3e9199a312d374a622770e7836f42cb..da4cffa6f9122af33a352ebbd11fea738a4ef371 100644 --- a/src/caosadvancedtools/models/parser.py +++ b/src/caosadvancedtools/models/parser.py @@ -16,11 +16,14 @@ not defined, simply the name can be supplied with no value. Parents can be provided under the 'inherit_from_xxxx' keywords. The value needs to be a list with the names. Here, NO NEW entities can be defined. """ +import json import re import sys +import yaml + +from typing import List import caosdb as db -import yaml from .data_model import DataModel @@ -93,6 +96,13 @@ class YamlDefinitionError(RuntimeError): super().__init__(template.format(line)) +class JsonSchemaDefinitionError(RuntimeError): + # @author Florian Spreckelsen + # @date 2022-02-17 + def __init__(self, msg): + super().__init__(msg) + + def parse_model_from_yaml(filename): """Shortcut if the Parser object is not needed.""" parser = Parser() @@ -107,6 +117,15 @@ def parse_model_from_string(string): return parser.parse_model_from_string(string) +def parse_model_from_json_schema(filename: str): + """Return a datamodel parsed from a json schema definition.""" + # @author Florian Spreckelsen + # @date 2022-02-17 + parser = JsonSchemaParser() + + return parser.parse_model_from_json_schema(filename) + + class Parser(object): def __init__(self): self.model = {} @@ -432,7 +451,8 @@ class Parser(object): continue - raise ValueError("Property {} has an unknown datatype: {}".format(value.name, value.datatype)) + raise ValueError("Property {} has an unknown datatype: {}".format( + value.name, value.datatype)) def _set_recordtypes(self): """ properties are defined in first iteration; set remaining as RTs """ @@ -442,6 +462,96 @@ class Parser(object): self.model[key] = db.RecordType(name=key) +class JsonSchemaParser(Parser): + """Extends the yaml parser to read in datamodels defined in a json schema.""" + # @author Florian Spreckelsen + # @date 2022-02-17 + + def parse_model_from_json_schema(self, filename: str): + """Return a datamodel created from the definition in the json schema in + `filename`. + + Parameters + ---------- + filename : str + The path to the json-schema file containing the datamodel definition + + Returns + ------- + out : DataModel + The created DataModel + """ + # @author Florian Spreckelsen + # @date 2022-02-17 + with open(filename, 'r') as schema_file: + model_dict = json.load(schema_file) + + return self._create_model_from_dict(model_dict) + + def _create_model_from_dict(self, model_dict: [dict, List[dict]]): + """Parse a dictionary read in from the model definition in a json schema and + return the Datamodel created from it. + + Parameters + ---------- + model_dict : dict or list[dict] + One or several dictionaries read in from a json-schema file + + Returns + ------- + our : DataModel + The datamodel defined in `model_dict` + """ + if isinstance(model_dict, dict): + model_dict = [model_dict] + + for ii, elt in enumerate(model_dict): + if not "title" in elt: + raise JsonSchemaDefinitionError( + "Object {} is lacking the `title` key word".format(ii+1)) + if not "type" in elt: + raise JsonSchemaDefinitionError( + "Object {} is lacking the `type` key word".format(ii+1)) + name = self._stringify(elt["title"], context=elt) + self._treat_element(elt, name) + + return DataModel(self.model.values()) + + def _treat_element(self, elt: dict, name: str): + if name in self.model: + return ent + if elt["type"] == "string": + ent = db.Property(name=name, datatype=db.TEXT) + elif elt["type"] == "object": + ent = self._treat_record_type(elt, name) + else: + raise NotImplementedError( + "Cannot parse items of type '{}' (yet).".format(elt["type"])) + if "description" in elt: + ent.description = elt["description"] + + self.model[name] = ent + return ent + + def _treat_record_type(self, elt: dict, name: str): + rt = db.RecordType(name=name) + if "required" in elt: + required = elt["required"] + else: + required = [] + if "properties" in elt: + for key, prop in elt["properties"].items(): + if "title" in prop: + name = self._stringify(title) + else: + name = self._stringify(key) + prop_ent = self._treat_element(prop, name) + importance = db.OBLIGATORY if key in required else db.RECOMMENDED + rt.add_property(prop_ent, importance=importance) + + return rt + + if __name__ == "__main__": model = parse_model_from_yaml('data_model.yml') print(model) diff --git a/unittests/test_json_schema_model_parser.py b/unittests/test_json_schema_model_parser.py index 46bb8832b4e410775e4b8af7e2494af76ac92d1d..e65bf998772387ffe8b3bcc7caef29851941b7f1 100644 --- a/unittests/test_json_schema_model_parser.py +++ b/unittests/test_json_schema_model_parser.py @@ -17,19 +17,26 @@ # You should have received a copy of the GNU Affero General Public License along # with this program. If not, see <https://www.gnu.org/licenses/>. # +import os + import caosdb as db from caosadvancedtools.models.parser import parse_model_from_json_schema +FILEPATH = os.path.dirname(os.path.abspath(__file__)) + def test_rt_with_string_properties(): """Test datamodel parsing of datamodel_string_properties.schema.json""" + # @author Florian Spreckelsen + # @date 2022-02-17 model = parse_model_from_json_schema( - "datamodel_string_properties.schema.json") + os.path.join(FILEPATH, + "./datamodel_string_properties.schema.json")) assert "Dataset" in model dataset_rt = model["Dataset"] - assert dataset.description == "" - assert len(dataset.get_properties()) == 3 + assert dataset_rt.description == "" + assert len(dataset_rt.get_properties()) == 3 assert dataset_rt.get_property("title") != None assert dataset_rt.get_property("campaign") != None