diff --git a/src/caosadvancedtools/models/parser.py b/src/caosadvancedtools/models/parser.py index 18187fb57702e1b72d313fd0beeb51bd5ff54527..20cf6357dbbeccdd6010d45050cf449f7038fdb0 100644 --- a/src/caosadvancedtools/models/parser.py +++ b/src/caosadvancedtools/models/parser.py @@ -68,6 +68,13 @@ KEYWORDS_IGNORED = [ "unit", ] +JSON_SCHEMA_ATOMIC_TYPES = [ + "string", + "boolean", + "integer", + "number" +] + def _get_listdatatype(dtype): """matches a string to check whether the type definition is a list @@ -596,7 +603,23 @@ class JsonSchemaParser(Parser): return DataModel(self.model.values()) + def _get_atomic_datatype(self, elt): + if elt["type"] == "string": + if "format" in elt and elt["format"] == "date-time": + return db.DATETIME + else: + return db.TEXT + elif elt["type"] == "integer": + return db.INTEGER + elif elt["type"] == "number": + return db.DOUBLE + elif elt["type"] == "boolean": + return db.BOOLEAN + else: + raise JsonSchemaDefinitionError(f"Unkown atomic type in {elt}.") + def _treat_element(self, elt: dict, name: str): + force_list = False if name in self.model: return self.model[name] if "enum" in elt: @@ -615,6 +638,8 @@ class JsonSchemaParser(Parser): ent = db.Property(name=name, datatype=db.BOOLEAN) elif elt["type"] == "object": ent = self._treat_record_type(elt, name) + elif elt["type"] == "array": + ent, force_list = self._treat_list(elt, name) else: raise NotImplementedError( f"Cannot parse items of type '{elt['type']}' (yet).") @@ -622,7 +647,7 @@ class JsonSchemaParser(Parser): ent.description = elt["description"] self.model[name] = ent - return ent + return ent, force_list def _treat_record_type(self, elt: dict, name: str): rt = db.RecordType(name=name) @@ -636,9 +661,14 @@ class JsonSchemaParser(Parser): name = self._stringify(prop["title"]) else: name = self._stringify(key) - prop_ent = self._treat_element(prop, name) + prop_ent, force_list = self._treat_element(prop, name) importance = db.OBLIGATORY if key in required else db.RECOMMENDED - rt.add_property(prop_ent, importance=importance) + if not force_list: + rt.add_property(prop_ent, importance=importance) + else: + # Special case of rt used as a list property + rt.add_property(prop_ent, importance=importance, + datatype=db.LIST(prop_ent)) return rt @@ -651,6 +681,29 @@ class JsonSchemaParser(Parser): return rt + def _treat_list(self, elt: dict, name: str): + + if not "items" in elt: + raise JsonSchemaDefinitionError( + f"The definition of the list items is missing in {elt}.") + items = elt["items"] + if "enum" in items: + return self._treat_enum(items, name), True + if items["type"] in JSON_SCHEMA_ATOMIC_TYPES: + datatype = db.LIST(self._get_atomic_datatype(items)) + return db.Property(name=name, datatype=datatype), False + if items["type"] == "object": + if not "title" in items or self._stringify(items["title"]) == name: + # Property is RecordType + return self._treat_record_type(items, name), True + else: + # List property will be an entity of its own with a name + # different from the referenced RT + ref_rt = self._treat_record_type( + items, self._stringify(items["title"])) + self.model[ref_rt.name] = ref_rt + return db.Property(name=name, datatype=db.LIST(ref_rt)), False + if __name__ == "__main__": model = parse_model_from_yaml('data_model.yml') diff --git a/unittests/json-schema-models/datamodel_list_properties.schema.json b/unittests/json-schema-models/datamodel_list_properties.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..d2f7e493750edb6d7b75c4f13b11dcda788e06b6 --- /dev/null +++ b/unittests/json-schema-models/datamodel_list_properties.schema.json @@ -0,0 +1,42 @@ +{ + "title": "Dataset", + "description": "Dataset with list (array) properties", + "type": "object", + "properties": { + "keywords": { + "type": "array", + "items": { "type": "string" } + }, + "booleans": { + "type": "array", + "items": { "type": "boolean" } + }, + "integers": { + "type": "array", + "items": { "type": "integer" } + }, + "floats": { + "type": "array", + "items": { "type": "number" } + }, + "datetimes": { + "type": "array", + "items": { "type": "string", "format": "date-time" } + }, + "reference": { + "type": "array", + "items": { "type": "object", "properties": {} } + }, + "reference_with_name": { + "type": "array", + "items": { "type": "object", "title": "event", "properties": {} } + }, + "license": { + "type": "array", + "items": { + "type": "string", + "enum": ["CC-BY", "CC-BY-SA", "CC0", "restricted access"] + } + } + } +} diff --git a/unittests/test_json_schema_model_parser.py b/unittests/test_json_schema_model_parser.py index 637adac9d11daac9aec6bbbdabba00d5bb733057..8e67eb1889154b585ed2c9da4c8ab6a4ec49db6a 100644 --- a/unittests/test_json_schema_model_parser.py +++ b/unittests/test_json_schema_model_parser.py @@ -207,3 +207,87 @@ def test_references(): assert isinstance(model["location"], db.Property) assert model["location"].datatype == db.TEXT assert model["location"].description == "geographical location (e.g., North Sea; Espoo, Finland)" + + +def test_list(): + """Test list properties with all possible datatypes.""" + # @author Florian Spreckelsen + # @date 2022-03-17 + + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_list_properties.schema.json")) + licenses = ["CC-BY", "CC-BY-SA", "CC0", "restricted access"] + names = ["Dataset", "keywords", "booleans", "integers", "floats", + "datetimes", "reference", "reference_with_name", "event", "license"] + for name in names + licenses: + assert name in model + + dataset_rt = model["Dataset"] + assert dataset_rt.get_property("keywords") is not None + assert dataset_rt.get_property("keywords").datatype == db.LIST(db.TEXT) + assert isinstance(model["keywords"], db.Property) + assert model["keywords"].name == "keywords" + assert model["keywords"].datatype == db.LIST(db.TEXT) + + assert dataset_rt.get_property("booleans") is not None + assert dataset_rt.get_property("booleans").datatype == db.LIST(db.BOOLEAN) + assert isinstance(model["booleans"], db.Property) + assert model["booleans"].name == "booleans" + assert model["booleans"].datatype == db.LIST(db.BOOLEAN) + + assert dataset_rt.get_property("integers") is not None + assert dataset_rt.get_property("integers").datatype == db.LIST(db.INTEGER) + assert isinstance(model["integers"], db.Property) + assert model["integers"].name == "integers" + assert model["integers"].datatype == db.LIST(db.INTEGER) + + assert dataset_rt.get_property("floats") is not None + assert dataset_rt.get_property("floats").datatype == db.LIST(db.DOUBLE) + assert isinstance(model["floats"], db.Property) + assert model["floats"].name == "floats" + assert model["floats"].datatype == db.LIST(db.DOUBLE) + + assert dataset_rt.get_property("datetimes") is not None + assert dataset_rt.get_property( + "datetimes").datatype == db.LIST(db.DATETIME) + assert isinstance(model["datetimes"], db.Property) + assert model["datetimes"].name == "datetimes" + assert model["datetimes"].datatype == db.LIST(db.DATETIME) + + # Simple reference list property + assert dataset_rt.get_property("reference") is not None + assert dataset_rt.get_property("reference").is_reference() + assert dataset_rt.get_property( + "reference").datatype == db.LIST("reference") + assert isinstance(model["reference"], db.RecordType) + assert model["reference"].name == "reference" + assert dataset_rt.get_property( + "reference").datatype == db.LIST(model["reference"]) + + # Reference list with name + assert dataset_rt.get_property("reference_with_name") is not None + assert dataset_rt.get_property("reference_with_name").is_reference() + assert dataset_rt.get_property( + "reference_with_name").datatype == db.LIST("event") + assert isinstance(model["event"], db.RecordType) + assert model["event"].name == "event" + assert dataset_rt.get_property( + "reference_with_name").datatype == db.LIST(model["event"]) + assert isinstance(model["reference_with_name"], db.Property) + assert model["reference_with_name"].name == "reference_with_name" + assert model["reference_with_name"].datatype == db.LIST(model["event"]) + + # References to enum types + assert dataset_rt.get_property("license") is not None + assert dataset_rt.get_property("license").is_reference() + assert dataset_rt.get_property("license").datatype == db.LIST("license") + assert isinstance(model["license"], db.RecordType) + assert model["license"].name == "license" + assert dataset_rt.get_property( + "license").datatype == db.LIST(model["license"]) + + for name in licenses: + assert isinstance(model[name], db.Record) + assert model[name].name == name + assert len(model[name].parents) == 1 + assert model[name].has_parent(model["license"])