From cbcfd3c26b942459d065e063454dcb66732416b3 Mon Sep 17 00:00:00 2001 From: Timm Fitschen <t.fitschen@indiscale.com> Date: Thu, 4 Jun 2020 14:02:44 +0200 Subject: [PATCH] WIP: parse string value to python types --- src/caosdb/common/models.py | 110 ++++++++++++++++++++------------- unittests/test_add_property.py | 42 +++++++++---- unittests/test_datatype.py | 48 +++++++++++++- 3 files changed, 144 insertions(+), 56 deletions(-) diff --git a/src/caosdb/common/models.py b/src/caosdb/common/models.py index 6c4ed8b4..ead0eb61 100644 --- a/src/caosdb/common/models.py +++ b/src/caosdb/common/models.py @@ -82,27 +82,30 @@ class Entity(object): def __init__(self, name=None, id=None, description=None, # @ReservedAssignment datatype=None, value=None, **kwargs): self.__role = kwargs["role"] if "role" in kwargs else None - self.name = name - self.description = description - self.id = id - self.value = value + self._checksum = None + self._size = None + self._upload = None + self._wrapped_entity = None + self._cuid = None + self._flags = dict() + self.__value = None + self.__datatype = None self.datatype = datatype + self.value = value self.messages = _Messages() self.properties = _Properties() self.parents = _Parents() self.path = None self.file = None - self._checksum = None - self._size = None - self._upload = None self.unit = None - self._cuid = None self.acl = None self.permissions = None - self._wrapped_entity = None - self._flags = dict() self.is_valid = lambda: False self.is_deleted = lambda: False + self.name = name + self.description = description + self.id = id + @property def role(self): @@ -159,6 +162,8 @@ class Entity(object): @datatype.setter def datatype(self, new_type): + # re-parse value + self.__value = _parse_value(new_type, self.__value) self.__datatype = new_type @property @@ -196,7 +201,7 @@ class Entity(object): @value.setter def value(self, new_value): - self.__value = new_value + self.__value = _parse_value(self.datatype, new_value) @property def path(self): @@ -816,22 +821,11 @@ class Entity(object): Was ' + str(type(child))) # parse VALUE - if len(vals): - entity.value = _parse_col_values(entity.datatype, vals) - elif elem.text is not None: - if elem.text.strip() != "": - text_val = elem.text.strip() - - if entity.datatype == DOUBLE: - entity.value = float(text_val) - elif entity.datatype == DATETIME or entity.datatype == TEXT: - entity.value = text_val - else: - try: # for references and integer - entity.value = int(text_val) - except BaseException: - entity.value = text_val + # The value[s] have been inside a <Value> tag. + entity.value = vals + elif elem.text is not None and elem.text.strip() != "": + entity.value = elem.text.strip() return entity @@ -976,36 +970,66 @@ class Entity(object): return self +def _parse_value(datatype, value): + if value is None: + return value + if datatype is None: + return value + if datatype == DOUBLE: + return float(value) + if datatype == INTEGER: + return int(str(value)) + if datatype == BOOLEAN: + if str(value).lower() == "true": + return True + elif str(value).lower() == "false": + return False + else: + raise ValueError("Boolean value was {}.".format(value)) + if datatype in [DATETIME, TEXT]: + if isinstance(value, str): + return value -def _parse_col_values(cdt, vals): + # deal with collections matcher = re.compile(r"^(?P<col>[^<]+)<(?P<dt>[^>]+)>$") - m = matcher.match(cdt) + m = matcher.match(datatype) if m: col = m.group("col") dt = m.group("dt") if col == "LIST": ret = list() - add = ret.append else: - return vals + return value - for v in vals: - if dt == DOUBLE: - add(float(v)) - elif dt == TEXT or dt == DATETIME: - add(v) - else: - try: - add(int(v)) - except (ValueError, TypeError): - add(v) + if hasattr(value, "__iter__") and not isinstance(value, str): + for v in value: + ret.append(_parse_value(dt, v)) + else: + # put a single value into a list since the datatype says so. + ret.append(_parse_value(dt, value)) return ret - if len(vals) == 1: - return vals[0] - return vals + # This is for a special case, where the xml parser could not differentiate + # between single values and lists with one element. As + if hasattr(value, "__len__") and len(value) == 1: + return _parse_value(datatype, value[0]) + + # deal with references + if isinstance(value, Entity): + return value + if isinstance(value, str) and "@" in value: + # probably this is a versioned reference + return str(value) + else: + # for unversioned references + try: + return int(value) + except ValueError: + # reference via name + return str(value) + def _log_request(request, xml_body=None): diff --git a/unittests/test_add_property.py b/unittests/test_add_property.py index bd68f31b..3d7f53db 100644 --- a/unittests/test_add_property.py +++ b/unittests/test_add_property.py @@ -22,6 +22,7 @@ # ** end header # import caosdb as db +from pytest import raises from nose.tools import assert_is, assert_is_none, assert_equals, assert_is_not_none, assert_raises @@ -176,19 +177,36 @@ def test_property_parameter_with_entity_and_datatype(): unit="m", description="This is the length of something.") - assert_equals(0, len(rec.get_properties())) - rec.add_property(abstract_property, 3.14, datatype=db.INTEGER) - assert_equals(1, len(rec.get_properties())) + assert 0 == len(rec.get_properties()) + rec.add_property(abstract_property, 300, datatype=db.INTEGER) + assert 1 == len(rec.get_properties()) concrete_property = rec.get_property("length") - assert_is_not_none(concrete_property) - assert_equals(concrete_property.name, "length") - assert_equals(concrete_property.id, 512) - assert_equals(concrete_property.description, - "This is the length of something.") - assert_equals(concrete_property.unit, "m") - assert_equals(concrete_property.value, 3.14) - assert_equals(concrete_property.datatype, db.INTEGER) - assert_is(concrete_property._wrapped_entity, abstract_property) + assert concrete_property is not None + assert concrete_property.name == "length" + assert concrete_property.id == 512 + assert concrete_property.description == "This is the length of something." + assert concrete_property.unit == "m" + assert concrete_property.value == 300 + assert concrete_property.datatype == db.INTEGER + assert id(concrete_property._wrapped_entity) == id(abstract_property) + + concrete_property.value = None + + with raises(ValueError): + # cannot parse 3.14 to integer + concrete_property.value = 3.14 + + assert concrete_property.value is None + assert concrete_property.datatype == db.INTEGER + + concrete_property.datatype = None + concrete_property.value = 3.14 + + with raises(ValueError): + # cannot parse 3.14 to integer + concrete_property.datatype = db.INTEGER + concrete_property.datatype = None + concrete_property.value = 3.14 def test_kw_name_and_value(): diff --git a/unittests/test_datatype.py b/unittests/test_datatype.py index 7cc5fd41..ccda7734 100644 --- a/unittests/test_datatype.py +++ b/unittests/test_datatype.py @@ -18,9 +18,10 @@ # along with this program. If not, see <https://www.gnu.org/licenses/>. # # ** end header - +from pytest import raises import caosdb as db from caosdb.common import datatype +from caosdb.common.models import _parse_value def test_list(): @@ -32,3 +33,48 @@ def test_list_utilites(): """Test for example if get_list_datatype works.""" dtype = db.LIST(db.INTEGER) assert datatype.get_list_datatype(dtype) == db.INTEGER + + +def test_parsing_of_intger_list_values(): + dtype = db.LIST(db.INTEGER) + assert _parse_value(dtype, [1,2,3]) == [1,2,3] + assert _parse_value(dtype, [1]) == [1] + assert _parse_value(dtype, [None,1,2,3]) == [None,1,2,3] + assert _parse_value(dtype, [1, None,1,2,3]) == [1, None,1,2,3] + assert _parse_value(dtype, ["4", 4]) == [4,4] + assert _parse_value(dtype, []) == [] + assert _parse_value(dtype, None) == None + assert _parse_value(None, [1,2,3.14,"asdf"]) == [1,2,3.14,"asdf"] + assert _parse_value(dtype, 1) == [1] + + with raises(ValueError): + # float value in list + _parse_value(dtype, ["4.3", 4]) + + +def test_parsing_of_boolean_list_values(): + dtype = db.LIST(db.BOOLEAN) + assert _parse_value(dtype, [True, False, True]) == [True, False, True] + assert _parse_value(dtype, ["true", False, None]) == [True, False, None] + + with raises(ValueError): + _parse_value(dtype, ["not a boolean"]) + + +def test_parsing_of_unknown_col_datatype(): + dtype = "Arraay<TEXT>" + obj = {"a": "b"} + assert id(_parse_value(dtype, obj)) == id(obj) + + +def test_parsing_of_references(): + dtype = "Person" + assert _parse_value(dtype, "Anna Lytik") == "Anna Lytik" + assert _parse_value(None, "Anna Lytik") == "Anna Lytik" + assert _parse_value(dtype, "2345@sdfg") == "2345@sdfg" + assert _parse_value(dtype, "2345") == 2345 + assert _parse_value(dtype, 2345) == 2345 + + + entity = db.Record(name="bla") + assert id(_parse_value(dtype, entity)) == id(entity) -- GitLab