Skip to content
Snippets Groups Projects
Commit 380b5b7f authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

Merge branch 'f-fix-list' into 'dev'

FIX: treat lists of base datatypes properly during yaml model parsing

See merge request caosdb/caosdb-advanced-user-tools!81
parents 6f28bffe 211cea7c
No related branches found
No related tags found
1 merge request!22Release 0.3
......@@ -65,6 +65,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
cause an `sqlite3.IntegrityError` if more than one change was cached
for the same entity.
* #40 Insertion of identifiables with missing obligatory properties
- Before, a Property with the datatype "LIST(TEXT)" would lead to the creation
of a RecordType. This is fixed now.
### Security ###
......
......@@ -35,13 +35,13 @@ responsible:
revisionOf:
datatype: REFERENCE
results:
datatype: REFERENCE
datatype: LIST<REFERENCE>
sources:
datatype: REFERENCE
datatype: LIST<REFERENCE>
scripts:
datatype: REFERENCE
datatype: LIST<REFERENCE>
single_attribute:
datatype: LIST(INTEGER)
datatype: LIST<INTEGER>
Simulation:
obligatory_properties:
date:
......
......@@ -43,8 +43,27 @@ KEYWORDS_IGNORED = [
]
def _get_listdatatype(dtype):
"""matches a string to check whether the type definition is a list
returns the type within the list or None, if it cannot be matched with a
list definition
"""
# TODO: string representation should be the same as used by the server:
# e.g. LIST<TEXT>
# this should be changed in the module and the old behavour should be
# marked as depricated
match = re.match(r"^LIST[(<](?P<dt>.*)[)>]$", dtype)
if match is None:
return None
else:
return match.group("dt")
# Taken from https://stackoverflow.com/a/53647080, CC-BY-SA, 2018 by
# https://stackoverflow.com/users/2572431/augurar
class SafeLineLoader(yaml.SafeLoader):
"""Load a line and keep meta-information.
......@@ -56,6 +75,7 @@ class SafeLineLoader(yaml.SafeLoader):
mapping = super().construct_mapping(node, deep=deep)
# Add 1 so line numbering starts at 1
mapping['__line__'] = node.start_mark.line + 1
return mapping
# End of https://stackoverflow.com/a/53647080
......@@ -76,12 +96,14 @@ class YamlDefinitionError(RuntimeError):
def parse_model_from_yaml(filename):
"""Shortcut if the Parser object is not needed."""
parser = Parser()
return parser.parse_model_from_yaml(filename)
def parse_model_from_string(string):
"""Shortcut if the Parser object is not needed."""
parser = Parser()
return parser.parse_model_from_string(string)
......@@ -105,6 +127,7 @@ class Parser(object):
"""
with open(filename, 'r') as outfile:
ymlmodel = yaml.load(outfile, Loader=SafeLineLoader)
return self._create_model_from_dict(ymlmodel)
def parse_model_from_string(self, string):
......@@ -121,6 +144,7 @@ class Parser(object):
The created DataModel
"""
ymlmodel = yaml.load(string, Loader=SafeLineLoader)
return self._create_model_from_dict(ymlmodel)
def _create_model_from_dict(self, ymlmodel):
......@@ -148,6 +172,7 @@ class Parser(object):
# a record type with the name of the element.
# The retrieved entity will be added to the model.
# If no entity with that name is found an exception is raised.
if "extern" not in ymlmodel:
ymlmodel["extern"] = []
......@@ -170,7 +195,7 @@ class Parser(object):
self._add_entity_to_model(name, entity)
# initialize recordtypes
self._set_recordtypes()
self._check_datatypes()
self._check_and_convert_datatypes()
for name, entity in ymlmodel.items():
self._treat_entity(name, entity, line=ymlmodel["__line__"])
......@@ -196,11 +221,14 @@ class Parser(object):
out : str
If `name` was a string, return it. Else return str(`name`).
"""
if name is None:
print("WARNING: Name of this context is None: {}".format(context),
file=sys.stderr)
if not isinstance(name, str):
name = str(name)
return name
def _add_entity_to_model(self, name, definition):
......@@ -208,9 +236,11 @@ class Parser(object):
Properties are also initialized.
"""
if name == "__line__":
return
name = self._stringify(name)
if name not in self.model:
self.model[name] = None
......@@ -221,8 +251,10 @@ class Parser(object):
and isinstance(definition, dict)
# is it a property
and "datatype" in definition
# but not a list
and not definition["datatype"].startswith("LIST")):
# but not simply an RT of the model
and not (definition["datatype"] in self.model
or _get_listdatatype(
definition["datatype"]) in self.model)):
# and create the new property
self.model[name] = db.Property(name=name,
......@@ -235,6 +267,7 @@ class Parser(object):
if prop_type in definition:
# Empty property mapping should be allowed.
if definition[prop_type] is None:
definition[prop_type] = {}
try:
......@@ -245,6 +278,7 @@ class Parser(object):
except AttributeError as ate:
if ate.args[0].endswith("'items'"):
line = definition["__line__"]
if isinstance(definition[prop_type], list):
line = definition[prop_type][0]["__line__"]
raise YamlDefinitionError(line) from None
......@@ -252,26 +286,24 @@ class Parser(object):
def _add_to_recordtype(self, ent_name, props, importance):
"""Add properties to a RecordType."""
for n, e in props.items():
if n in KEYWORDS:
if n in KEYWORDS_IGNORED:
continue
raise YamlDefinitionError("Unexpected keyword in line {}: {}".format(
props["__line__"], n))
if n == "__line__":
continue
n = self._stringify(n)
if isinstance(e, dict) and "datatype" in e and e["datatype"].startswith("LIST"):
match = re.match(r"LIST[(](.*)[)]", e["datatype"])
if match is None:
raise ValueError("List datatype definition is wrong")
dt = db.LIST(match.group(1))
self.model[ent_name].add_property(name=n,
importance=importance,
datatype=dt
)
if (isinstance(e, dict) and "datatype" in e
and (_get_listdatatype(e["datatype"]) is not None)):
self.model[ent_name].add_property(
name=n,
importance=importance,
datatype=db.LIST(_get_listdatatype(e["datatype"])))
else:
self.model[ent_name].add_property(name=n,
importance=importance)
......@@ -288,6 +320,7 @@ class Parser(object):
def _treat_entity(self, name, definition, line=None):
"""Parse the definition and the information to the entity."""
if name == "__line__":
return
name = self._stringify(name)
......@@ -357,26 +390,50 @@ class Parser(object):
raise e
self.treated.append(name)
def _check_datatypes(self):
"""Checks if the datatype is valid.
def _check_and_convert_datatypes(self):
""" checks if datatype is valid.
datatype of properties is simply initialized with string. Here, we
iterate over properties and check whether it is a base datatype of a
name that was defined in the model (or extern part)
Iterate over all properties in this datamodel and choose a
valid datatype from the original datatype string in the yaml
file. Raise a ValueError if no datatype can be found.
the string representations are replaced with caosdb objects
"""
for key, value in self.model.items():
if isinstance(value, db.Property):
if value.datatype in self.model:
value.datatype = self.model[value.datatype]
else:
# get the datatype
try:
value.datatype = db.__getattribute__(value.datatype)
except AttributeError:
raise ValueError(
"Property {} has an unknown datatype: {}".format(value.name, value.datatype))
dtype = value.datatype
is_list = False
if _get_listdatatype(value.datatype) is not None:
dtype = _get_listdatatype(value.datatype)
is_list = True
if dtype in self.model:
if is_list:
value.datatype = db.LIST(self.model[dtype])
else:
value.datatype = self.model[dtype]
continue
if dtype in [db.DOUBLE,
db.REFERENCE,
db.TEXT,
db.DATETIME,
db.INTEGER,
db.FILE,
db.BOOLEAN]:
if is_list:
value.datatype = db.LIST(db.__getattribute__(dtype))
else:
value.datatype = db.__getattribute__(dtype)
continue
raise ValueError("Property {} has an unknown datatype: {}".format(value.name, value.datatype))
def _set_recordtypes(self):
""" properties are defined in first iteration; set remaining as RTs """
......
......@@ -168,7 +168,6 @@ RT1:
RT5:
"""
model = parse_model_from_yaml(to_file(string))
print(model["RT1"])
assert has_property(model["RT1"], "RT2")
assert model["RT1"].get_importance("RT2") == db.RECOMMENDED
assert has_property(model["RT1"], "RT3")
......@@ -190,7 +189,7 @@ p1:
p2:
datatype: TXT
"""
self.assertRaises(ValueError, lambda: parse_model_from_yaml(to_file(string)))
self.assertRaises(ValueError, parse_model_from_yaml, to_file(string))
class ListTest(unittest.TestCase):
......@@ -200,10 +199,19 @@ RT1:
recommended_properties:
a:
datatype: LIST(RT2)
b:
datatype: LIST(TEXT)
c:
datatype: LIST<TEXT>
RT2:
"""
model = parse_model_from_yaml(to_file(string))
self.assertTrue(isinstance(model['b'], db.Property))
self.assertEqual(model['b'].datatype, db.LIST(db.TEXT))
self.assertTrue(isinstance(model['c'], db.Property))
self.assertEqual(model['c'].datatype, db.LIST(db.TEXT))
# This failed for an older version of caosdb-models
string_list = """
A:
......@@ -216,16 +224,8 @@ B:
datatype: INTEGER
"""
model = parse_model_from_yaml(to_file(string_list))
def test_dmgd_list(self):
string = """
RT1:
recommended_properties:
a:
datatype: LIST(T2
RT2:
"""
self.assertRaises(ValueError, lambda: parse_model_from_yaml(to_file(string)))
self.assertTrue(isinstance(model['A'], db.RecordType))
self.assertEqual(model['A'].properties[0].datatype, db.LIST("B"))
class ParserTest(unittest.TestCase):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment