From cbcfd3c26b942459d065e063454dcb66732416b3 Mon Sep 17 00:00:00 2001
From: Timm Fitschen <t.fitschen@indiscale.com>
Date: Thu, 4 Jun 2020 14:02:44 +0200
Subject: [PATCH] WIP: parse string value to python types

---
 src/caosdb/common/models.py    | 110 ++++++++++++++++++++-------------
 unittests/test_add_property.py |  42 +++++++++----
 unittests/test_datatype.py     |  48 +++++++++++++-
 3 files changed, 144 insertions(+), 56 deletions(-)

diff --git a/src/caosdb/common/models.py b/src/caosdb/common/models.py
index 6c4ed8b4..ead0eb61 100644
--- a/src/caosdb/common/models.py
+++ b/src/caosdb/common/models.py
@@ -82,27 +82,30 @@ class Entity(object):
     def __init__(self, name=None, id=None, description=None,  # @ReservedAssignment
                  datatype=None, value=None, **kwargs):
         self.__role = kwargs["role"] if "role" in kwargs else None
-        self.name = name
-        self.description = description
-        self.id = id
-        self.value = value
+        self._checksum = None
+        self._size = None
+        self._upload = None
+        self._wrapped_entity = None
+        self._cuid = None
+        self._flags = dict()
+        self.__value = None
+        self.__datatype = None
         self.datatype = datatype
+        self.value = value
         self.messages = _Messages()
         self.properties = _Properties()
         self.parents = _Parents()
         self.path = None
         self.file = None
-        self._checksum = None
-        self._size = None
-        self._upload = None
         self.unit = None
-        self._cuid = None
         self.acl = None
         self.permissions = None
-        self._wrapped_entity = None
-        self._flags = dict()
         self.is_valid = lambda: False
         self.is_deleted = lambda: False
+        self.name = name
+        self.description = description
+        self.id = id
+
 
     @property
     def role(self):
@@ -159,6 +162,8 @@ class Entity(object):
 
     @datatype.setter
     def datatype(self, new_type):
+        # re-parse value
+        self.__value = _parse_value(new_type, self.__value)
         self.__datatype = new_type
 
     @property
@@ -196,7 +201,7 @@ class Entity(object):
 
     @value.setter
     def value(self, new_value):
-        self.__value = new_value
+        self.__value = _parse_value(self.datatype, new_value)
 
     @property
     def path(self):
@@ -816,22 +821,11 @@ class Entity(object):
                     Was ' + str(type(child)))
 
         # parse VALUE
-
         if len(vals):
-            entity.value = _parse_col_values(entity.datatype, vals)
-        elif elem.text is not None:
-            if elem.text.strip() != "":
-                text_val = elem.text.strip()
-
-                if entity.datatype == DOUBLE:
-                    entity.value = float(text_val)
-                elif entity.datatype == DATETIME or entity.datatype == TEXT:
-                    entity.value = text_val
-                else:
-                    try:  # for references  and integer
-                        entity.value = int(text_val)
-                    except BaseException:
-                        entity.value = text_val
+            # The value[s] have been inside a <Value> tag.
+            entity.value = vals
+        elif elem.text is not None and elem.text.strip() != "":
+            entity.value = elem.text.strip()
 
         return entity
 
@@ -976,36 +970,66 @@ class Entity(object):
 
         return self
 
+def _parse_value(datatype, value):
+    if value is None:
+        return value
+    if datatype is None:
+        return value
+    if datatype == DOUBLE:
+        return float(value)
+    if datatype == INTEGER:
+        return int(str(value))
+    if datatype == BOOLEAN:
+        if str(value).lower() == "true":
+            return True
+        elif str(value).lower() == "false":
+            return False
+        else:
+            raise ValueError("Boolean value was {}.".format(value))
+    if datatype in [DATETIME, TEXT]:
+        if isinstance(value, str):
+            return value
 
-def _parse_col_values(cdt, vals):
+    # deal with collections
     matcher = re.compile(r"^(?P<col>[^<]+)<(?P<dt>[^>]+)>$")
-    m = matcher.match(cdt)
+    m = matcher.match(datatype)
     if m:
         col = m.group("col")
         dt = m.group("dt")
 
         if col == "LIST":
             ret = list()
-            add = ret.append
         else:
-            return vals
+            return value
 
-        for v in vals:
-            if dt == DOUBLE:
-                add(float(v))
-            elif dt == TEXT or dt == DATETIME:
-                add(v)
-            else:
-                try:
-                    add(int(v))
-                except (ValueError, TypeError):
-                    add(v)
+        if hasattr(value, "__iter__") and not isinstance(value, str):
+            for v in value:
+                ret.append(_parse_value(dt, v))
+        else:
+            # put a single value into a list since the datatype says so.
+            ret.append(_parse_value(dt, value))
 
         return ret
 
-    if len(vals) == 1:
-        return vals[0]
-    return vals
+    # This is for a special case, where the xml parser could not differentiate
+    # between single values and lists with one element. As
+    if hasattr(value, "__len__") and len(value) == 1:
+        return _parse_value(datatype, value[0])
+
+    # deal with references
+    if isinstance(value, Entity):
+        return value
+    if isinstance(value, str) and "@" in value:
+        # probably this is a versioned reference
+        return str(value)
+    else:
+        # for unversioned references
+        try:
+            return int(value)
+        except ValueError:
+            # reference via name
+            return str(value)
+
 
 
 def _log_request(request, xml_body=None):
diff --git a/unittests/test_add_property.py b/unittests/test_add_property.py
index bd68f31b..3d7f53db 100644
--- a/unittests/test_add_property.py
+++ b/unittests/test_add_property.py
@@ -22,6 +22,7 @@
 # ** end header
 #
 import caosdb as db
+from pytest import raises
 from nose.tools import assert_is, assert_is_none, assert_equals, assert_is_not_none, assert_raises
 
 
@@ -176,19 +177,36 @@ def test_property_parameter_with_entity_and_datatype():
         unit="m",
         description="This is the length of something.")
 
-    assert_equals(0, len(rec.get_properties()))
-    rec.add_property(abstract_property, 3.14, datatype=db.INTEGER)
-    assert_equals(1, len(rec.get_properties()))
+    assert 0 == len(rec.get_properties())
+    rec.add_property(abstract_property, 300, datatype=db.INTEGER)
+    assert 1 == len(rec.get_properties())
     concrete_property = rec.get_property("length")
-    assert_is_not_none(concrete_property)
-    assert_equals(concrete_property.name, "length")
-    assert_equals(concrete_property.id, 512)
-    assert_equals(concrete_property.description,
-                  "This is the length of something.")
-    assert_equals(concrete_property.unit, "m")
-    assert_equals(concrete_property.value, 3.14)
-    assert_equals(concrete_property.datatype, db.INTEGER)
-    assert_is(concrete_property._wrapped_entity, abstract_property)
+    assert concrete_property is not None
+    assert concrete_property.name == "length"
+    assert concrete_property.id == 512
+    assert concrete_property.description == "This is the length of something."
+    assert concrete_property.unit == "m"
+    assert concrete_property.value == 300
+    assert concrete_property.datatype == db.INTEGER
+    assert id(concrete_property._wrapped_entity) == id(abstract_property)
+
+    concrete_property.value = None
+
+    with raises(ValueError):
+        # cannot parse 3.14 to integer
+        concrete_property.value = 3.14
+
+    assert concrete_property.value is None
+    assert concrete_property.datatype == db.INTEGER
+
+    concrete_property.datatype = None
+    concrete_property.value = 3.14
+
+    with raises(ValueError):
+        # cannot parse 3.14 to integer
+        concrete_property.datatype = db.INTEGER
+    concrete_property.datatype = None
+    concrete_property.value = 3.14
 
 
 def test_kw_name_and_value():
diff --git a/unittests/test_datatype.py b/unittests/test_datatype.py
index 7cc5fd41..ccda7734 100644
--- a/unittests/test_datatype.py
+++ b/unittests/test_datatype.py
@@ -18,9 +18,10 @@
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # ** end header
-
+from pytest import raises
 import caosdb as db
 from caosdb.common import datatype
+from caosdb.common.models import _parse_value
 
 
 def test_list():
@@ -32,3 +33,48 @@ def test_list_utilites():
     """Test for example if get_list_datatype works."""
     dtype = db.LIST(db.INTEGER)
     assert datatype.get_list_datatype(dtype) == db.INTEGER
+
+
+def test_parsing_of_intger_list_values():
+    dtype = db.LIST(db.INTEGER)
+    assert _parse_value(dtype, [1,2,3]) == [1,2,3]
+    assert _parse_value(dtype, [1]) == [1]
+    assert _parse_value(dtype, [None,1,2,3]) == [None,1,2,3]
+    assert _parse_value(dtype, [1, None,1,2,3]) == [1, None,1,2,3]
+    assert _parse_value(dtype, ["4", 4]) == [4,4]
+    assert _parse_value(dtype, []) == []
+    assert _parse_value(dtype, None) == None
+    assert _parse_value(None, [1,2,3.14,"asdf"]) == [1,2,3.14,"asdf"]
+    assert _parse_value(dtype, 1) == [1]
+
+    with raises(ValueError):
+        # float value in list
+        _parse_value(dtype, ["4.3", 4])
+
+
+def test_parsing_of_boolean_list_values():
+    dtype = db.LIST(db.BOOLEAN)
+    assert _parse_value(dtype, [True, False, True]) == [True, False, True]
+    assert _parse_value(dtype, ["true", False, None]) == [True, False, None]
+
+    with raises(ValueError):
+        _parse_value(dtype, ["not a boolean"])
+
+
+def test_parsing_of_unknown_col_datatype():
+    dtype = "Arraay<TEXT>"
+    obj = {"a": "b"}
+    assert id(_parse_value(dtype, obj)) == id(obj)
+
+
+def test_parsing_of_references():
+    dtype = "Person"
+    assert _parse_value(dtype, "Anna Lytik") == "Anna Lytik"
+    assert _parse_value(None, "Anna Lytik") == "Anna Lytik"
+    assert _parse_value(dtype, "2345@sdfg") == "2345@sdfg"
+    assert _parse_value(dtype, "2345") == 2345
+    assert _parse_value(dtype, 2345) == 2345
+
+
+    entity = db.Record(name="bla")
+    assert id(_parse_value(dtype, entity)) == id(entity)
-- 
GitLab