diff --git a/src/caosadvancedtools/models/parser.py b/src/caosadvancedtools/models/parser.py index aacae6fe880e229a3797d76a190a04be157e87e9..7cbd117d7f45e5f1317ca1b554a53737be19ebaa 100644 --- a/src/caosadvancedtools/models/parser.py +++ b/src/caosadvancedtools/models/parser.py @@ -154,7 +154,12 @@ def parse_model_from_string(string): return parser.parse_model_from_string(string) -def parse_model_from_json_schema(filename: str, top_level_recordtype: bool = True): +def parse_model_from_json_schema( + filename: str, + top_level_recordtype: bool = True, + types_for_missing_array_items: dict = {}, + ignore_unspecified_array_items: bool = False +): """Return a datamodel parsed from a json schema definition. Parameters @@ -164,7 +169,14 @@ def parse_model_from_json_schema(filename: str, top_level_recordtype: bool = Tru top_level_recordtype : bool, optional Whether there is a record type defined at the top level of the schema. Default is true. - + types_for_missing_array_items : dict, optional + dictionary containing fall-back types for json entries with `type: + array` but without `items` specification. Default is an empty dict. + ignore_unspecified_array_items : bool, optional + Whether to ignore `type: array` entries the type of which is not + specified by their `items` property or given in + `types_for_missing_array_items`. An error is raised if they are not + ignored. Default is False. Returns ------- @@ -181,7 +193,7 @@ def parse_model_from_json_schema(filename: str, top_level_recordtype: bool = Tru # @author Florian Spreckelsen # @date 2022-02-17 # @review Daniel Hornung 2022-02-18 - parser = JsonSchemaParser() + parser = JsonSchemaParser(types_for_missing_array_items, ignore_unspecified_array_items) return parser.parse_model_from_json_schema(filename, top_level_recordtype) @@ -629,6 +641,11 @@ class JsonSchemaParser(Parser): # @date 2022-02-17 # @review Timm Fitschen 2022-02-30 + def __init__(self, types_for_missing_array_items={}, ignore_unspecified_array_items=False): + super().__init__() + self.types_for_missing_array_items = types_for_missing_array_items + self.ignore_unspecified_array_items = ignore_unspecified_array_items + def parse_model_from_json_schema(self, filename: str, top_level_recordtype: bool = True): """Return a datamodel created from the definition in the json schema in `filename`. @@ -755,7 +772,7 @@ class JsonSchemaParser(Parser): if name == "name": # This is identified with the CaosDB name property as long as the # type is correct. - if not elt["type"] == "string": + if not elt["type"] == "string" and "string" not in elt["type"]: raise JsonSchemaDefinitionError( "The 'name' property must be string-typed, otherwise it cannot " "be identified with CaosDB's name property." @@ -786,7 +803,8 @@ class JsonSchemaParser(Parser): # treat_something function ent.description = elt["description"] - self.model[name] = ent + if ent is not None: + self.model[name] = ent return ent, force_list def _treat_record_type(self, elt: dict, name: str): @@ -846,26 +864,33 @@ class JsonSchemaParser(Parser): def _treat_list(self, elt: dict, name: str): # @review Timm Fitschen 2022-02-30 - if "items" not in elt: + if "items" not in elt and name not in self.types_for_missing_array_items: + if self.ignore_unspecified_array_items: + return None, False raise JsonSchemaDefinitionError( f"The definition of the list items is missing in {elt}.") - items = elt["items"] - if "enum" in items: - return self._treat_enum(items, name), True - if items["type"] in JSON_SCHEMA_ATOMIC_TYPES: - datatype = db.LIST(self._get_atomic_datatype(items)) + if "items" in elt: + items = elt["items"] + if "enum" in items: + return self._treat_enum(items, name), True + if items["type"] in JSON_SCHEMA_ATOMIC_TYPES: + datatype = db.LIST(self._get_atomic_datatype(items)) + return db.Property(name=name, datatype=datatype), False + if items["type"] == "object": + if "title" not in items or self._stringify(items["title"]) == name: + # Property is RecordType + return self._treat_record_type(items, name), True + else: + # List property will be an entity of its own with a name + # different from the referenced RT + ref_rt = self._treat_record_type( + items, self._stringify(items["title"])) + self.model[ref_rt.name] = ref_rt + return db.Property(name=name, datatype=db.LIST(ref_rt)), False + else: + # Use predefined type: + datatype = db.LIST(self.types_for_missing_array_items[name]) return db.Property(name=name, datatype=datatype), False - if items["type"] == "object": - if "title" not in items or self._stringify(items["title"]) == name: - # Property is RecordType - return self._treat_record_type(items, name), True - else: - # List property will be an entity of its own with a name - # different from the referenced RT - ref_rt = self._treat_record_type( - items, self._stringify(items["title"])) - self.model[ref_rt.name] = ref_rt - return db.Property(name=name, datatype=db.LIST(ref_rt)), False def _get_pattern_prop(self): if "__pattern_property_pattern_property" in self.model: diff --git a/unittests/test_json_schema_model_parser.py b/unittests/test_json_schema_model_parser.py index 707ff76e0aefa7afd299ab14e748c0cab05c7cb8..678b75080079664287a1cc6ef6353f1948789802 100644 --- a/unittests/test_json_schema_model_parser.py +++ b/unittests/test_json_schema_model_parser.py @@ -414,3 +414,30 @@ def test_no_toplevel_entity(): assert pattern_object_rt.get_importance(pp.name) == db.OBLIGATORY date_id_prop = pattern_object_rt.get_property("date_id") assert date_id_prop.datatype == db.TEXT + + +def test_missing_array_items(): + + # strict behavior + with pytest.raises(JsonSchemaDefinitionError) as err: + parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_missing_array_items.schema.json")) + + assert "{'type': 'array'}" in str(err) + + # ignore all problems, so a RT is created that does not have the property + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_missing_array_items.schema.json"), ignore_unspecified_array_items=True) + assert "something_with_missing_array_items" in model + rt = model["something_with_missing_array_items"] + assert isinstance(rt, db.RecordType) + assert rt.get_property("missing") is None + + # specify the type: + type_dict = {"missing": db.FILE} + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_missing_array_items.schema.json"), types_for_missing_array_items=type_dict) + assert "something_with_missing_array_items" in model + rt = model["something_with_missing_array_items"] + assert rt.get_property("missing") is not None + assert rt.get_property("missing").datatype == db.LIST(db.FILE)