Skip to content
Snippets Groups Projects
Commit ddc5284e authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

ENH: Implement and unit tst list properties

parent 41ec62d6
No related branches found
No related tags found
2 merge requests!39Release 0.4.0,!33F json schema datamodel
Pipeline #20533 passed
...@@ -68,6 +68,13 @@ KEYWORDS_IGNORED = [ ...@@ -68,6 +68,13 @@ KEYWORDS_IGNORED = [
"unit", "unit",
] ]
JSON_SCHEMA_ATOMIC_TYPES = [
"string",
"boolean",
"integer",
"number"
]
def _get_listdatatype(dtype): def _get_listdatatype(dtype):
"""matches a string to check whether the type definition is a list """matches a string to check whether the type definition is a list
...@@ -596,7 +603,23 @@ class JsonSchemaParser(Parser): ...@@ -596,7 +603,23 @@ class JsonSchemaParser(Parser):
return DataModel(self.model.values()) return DataModel(self.model.values())
def _get_atomic_datatype(self, elt):
if elt["type"] == "string":
if "format" in elt and elt["format"] == "date-time":
return db.DATETIME
else:
return db.TEXT
elif elt["type"] == "integer":
return db.INTEGER
elif elt["type"] == "number":
return db.DOUBLE
elif elt["type"] == "boolean":
return db.BOOLEAN
else:
raise JsonSchemaDefinitionError(f"Unkown atomic type in {elt}.")
def _treat_element(self, elt: dict, name: str): def _treat_element(self, elt: dict, name: str):
force_list = False
if name in self.model: if name in self.model:
return self.model[name] return self.model[name]
if "enum" in elt: if "enum" in elt:
...@@ -615,6 +638,8 @@ class JsonSchemaParser(Parser): ...@@ -615,6 +638,8 @@ class JsonSchemaParser(Parser):
ent = db.Property(name=name, datatype=db.BOOLEAN) ent = db.Property(name=name, datatype=db.BOOLEAN)
elif elt["type"] == "object": elif elt["type"] == "object":
ent = self._treat_record_type(elt, name) ent = self._treat_record_type(elt, name)
elif elt["type"] == "array":
ent, force_list = self._treat_list(elt, name)
else: else:
raise NotImplementedError( raise NotImplementedError(
f"Cannot parse items of type '{elt['type']}' (yet).") f"Cannot parse items of type '{elt['type']}' (yet).")
...@@ -622,7 +647,7 @@ class JsonSchemaParser(Parser): ...@@ -622,7 +647,7 @@ class JsonSchemaParser(Parser):
ent.description = elt["description"] ent.description = elt["description"]
self.model[name] = ent self.model[name] = ent
return ent return ent, force_list
def _treat_record_type(self, elt: dict, name: str): def _treat_record_type(self, elt: dict, name: str):
rt = db.RecordType(name=name) rt = db.RecordType(name=name)
...@@ -636,9 +661,14 @@ class JsonSchemaParser(Parser): ...@@ -636,9 +661,14 @@ class JsonSchemaParser(Parser):
name = self._stringify(prop["title"]) name = self._stringify(prop["title"])
else: else:
name = self._stringify(key) name = self._stringify(key)
prop_ent = self._treat_element(prop, name) prop_ent, force_list = self._treat_element(prop, name)
importance = db.OBLIGATORY if key in required else db.RECOMMENDED importance = db.OBLIGATORY if key in required else db.RECOMMENDED
rt.add_property(prop_ent, importance=importance) if not force_list:
rt.add_property(prop_ent, importance=importance)
else:
# Special case of rt used as a list property
rt.add_property(prop_ent, importance=importance,
datatype=db.LIST(prop_ent))
return rt return rt
...@@ -651,6 +681,29 @@ class JsonSchemaParser(Parser): ...@@ -651,6 +681,29 @@ class JsonSchemaParser(Parser):
return rt return rt
def _treat_list(self, elt: dict, name: str):
if not "items" in elt:
raise JsonSchemaDefinitionError(
f"The definition of the list items is missing in {elt}.")
items = elt["items"]
if "enum" in items:
return self._treat_enum(items, name), True
if items["type"] in JSON_SCHEMA_ATOMIC_TYPES:
datatype = db.LIST(self._get_atomic_datatype(items))
return db.Property(name=name, datatype=datatype), False
if items["type"] == "object":
if not "title" in items or self._stringify(items["title"]) == name:
# Property is RecordType
return self._treat_record_type(items, name), True
else:
# List property will be an entity of its own with a name
# different from the referenced RT
ref_rt = self._treat_record_type(
items, self._stringify(items["title"]))
self.model[ref_rt.name] = ref_rt
return db.Property(name=name, datatype=db.LIST(ref_rt)), False
if __name__ == "__main__": if __name__ == "__main__":
model = parse_model_from_yaml('data_model.yml') model = parse_model_from_yaml('data_model.yml')
......
{
"title": "Dataset",
"description": "Dataset with list (array) properties",
"type": "object",
"properties": {
"keywords": {
"type": "array",
"items": { "type": "string" }
},
"booleans": {
"type": "array",
"items": { "type": "boolean" }
},
"integers": {
"type": "array",
"items": { "type": "integer" }
},
"floats": {
"type": "array",
"items": { "type": "number" }
},
"datetimes": {
"type": "array",
"items": { "type": "string", "format": "date-time" }
},
"reference": {
"type": "array",
"items": { "type": "object", "properties": {} }
},
"reference_with_name": {
"type": "array",
"items": { "type": "object", "title": "event", "properties": {} }
},
"license": {
"type": "array",
"items": {
"type": "string",
"enum": ["CC-BY", "CC-BY-SA", "CC0", "restricted access"]
}
}
}
}
...@@ -207,3 +207,87 @@ def test_references(): ...@@ -207,3 +207,87 @@ def test_references():
assert isinstance(model["location"], db.Property) assert isinstance(model["location"], db.Property)
assert model["location"].datatype == db.TEXT assert model["location"].datatype == db.TEXT
assert model["location"].description == "geographical location (e.g., North Sea; Espoo, Finland)" assert model["location"].description == "geographical location (e.g., North Sea; Espoo, Finland)"
def test_list():
"""Test list properties with all possible datatypes."""
# @author Florian Spreckelsen
# @date 2022-03-17
model = parse_model_from_json_schema(os.path.join(
FILEPATH, "datamodel_list_properties.schema.json"))
licenses = ["CC-BY", "CC-BY-SA", "CC0", "restricted access"]
names = ["Dataset", "keywords", "booleans", "integers", "floats",
"datetimes", "reference", "reference_with_name", "event", "license"]
for name in names + licenses:
assert name in model
dataset_rt = model["Dataset"]
assert dataset_rt.get_property("keywords") is not None
assert dataset_rt.get_property("keywords").datatype == db.LIST(db.TEXT)
assert isinstance(model["keywords"], db.Property)
assert model["keywords"].name == "keywords"
assert model["keywords"].datatype == db.LIST(db.TEXT)
assert dataset_rt.get_property("booleans") is not None
assert dataset_rt.get_property("booleans").datatype == db.LIST(db.BOOLEAN)
assert isinstance(model["booleans"], db.Property)
assert model["booleans"].name == "booleans"
assert model["booleans"].datatype == db.LIST(db.BOOLEAN)
assert dataset_rt.get_property("integers") is not None
assert dataset_rt.get_property("integers").datatype == db.LIST(db.INTEGER)
assert isinstance(model["integers"], db.Property)
assert model["integers"].name == "integers"
assert model["integers"].datatype == db.LIST(db.INTEGER)
assert dataset_rt.get_property("floats") is not None
assert dataset_rt.get_property("floats").datatype == db.LIST(db.DOUBLE)
assert isinstance(model["floats"], db.Property)
assert model["floats"].name == "floats"
assert model["floats"].datatype == db.LIST(db.DOUBLE)
assert dataset_rt.get_property("datetimes") is not None
assert dataset_rt.get_property(
"datetimes").datatype == db.LIST(db.DATETIME)
assert isinstance(model["datetimes"], db.Property)
assert model["datetimes"].name == "datetimes"
assert model["datetimes"].datatype == db.LIST(db.DATETIME)
# Simple reference list property
assert dataset_rt.get_property("reference") is not None
assert dataset_rt.get_property("reference").is_reference()
assert dataset_rt.get_property(
"reference").datatype == db.LIST("reference")
assert isinstance(model["reference"], db.RecordType)
assert model["reference"].name == "reference"
assert dataset_rt.get_property(
"reference").datatype == db.LIST(model["reference"])
# Reference list with name
assert dataset_rt.get_property("reference_with_name") is not None
assert dataset_rt.get_property("reference_with_name").is_reference()
assert dataset_rt.get_property(
"reference_with_name").datatype == db.LIST("event")
assert isinstance(model["event"], db.RecordType)
assert model["event"].name == "event"
assert dataset_rt.get_property(
"reference_with_name").datatype == db.LIST(model["event"])
assert isinstance(model["reference_with_name"], db.Property)
assert model["reference_with_name"].name == "reference_with_name"
assert model["reference_with_name"].datatype == db.LIST(model["event"])
# References to enum types
assert dataset_rt.get_property("license") is not None
assert dataset_rt.get_property("license").is_reference()
assert dataset_rt.get_property("license").datatype == db.LIST("license")
assert isinstance(model["license"], db.RecordType)
assert model["license"].name == "license"
assert dataset_rt.get_property(
"license").datatype == db.LIST(model["license"])
for name in licenses:
assert isinstance(model[name], db.Record)
assert model[name].name == name
assert len(model[name].parents) == 1
assert model[name].has_parent(model["license"])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment