From d365acf5153f635af01429507b006326be66013e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Tue, 4 Aug 2020 09:12:07 +0000
Subject: [PATCH] ENH: properly treat lists of references

---
 src/caosadvancedtools/crawler.py | 41 +++++++++++++++++++---------
 unittests/test_crawler.py        | 47 ++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+), 13 deletions(-)
 create mode 100644 unittests/test_crawler.py

diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py
index 7e69ed33..fa6fa5ad 100644
--- a/src/caosadvancedtools/crawler.py
+++ b/src/caosadvancedtools/crawler.py
@@ -541,6 +541,33 @@ carefully and if the changes are ok, click on the following link:
         logger.debug("Retrieving entities from CaosDB...")
         identifiables.retrieve(unique=True, raise_exception_on_error=False)
 
+    @staticmethod
+    def create_query_for_identifiable(ident):
+        """
+        uses the properties of ident to create a query that can determine
+        whether the required record already exists.
+        """
+        # TODO multiple parents are ignored! Sufficient?
+        if len(ident.get_parents()) == 0:
+            raise ValueError("The identifiable must have at least one parent.")
+        query_string = "FIND Record " + ident.get_parents()[0].name
+        query_string += " WITH "
+        if ident.name is not None:
+            query_string += "name='{}' AND".format(ident.name)
+
+        for p in ident.get_properties():
+            if p.datatype is not None and p.datatype.startswith("LIST<"):
+                for v in p.value:
+                    query_string += ("references "
+                                     + str(v.id if isinstance(v, db.Entity)
+                                           else v)
+                                     + " AND ")
+            else:
+                query_string += ("'" + p.name + "'='" + str(get_value(p))
+                                 + "' AND ")
+        # remove the last AND
+        return query_string[:-4]
+
     @staticmethod
     def find_existing(entity):
         """searches for an entity that matches the identifiable in CaosDB
@@ -548,19 +575,7 @@ carefully and if the changes are ok, click on the following link:
         Characteristics of the identifiable like, properties, name or id are
         used for the match.
         """
-
-        # TODO multiple parents are ignored! Sufficient?
-        query_string = "FIND Record " + entity.get_parents()[0].name
-        query_string += " WITH "
-        if entity.name is not None:
-            query_string += "name='{}' AND".format(entity.name)
-
-        for p in entity.get_properties():
-            query_string += ("'" + p.name + "'='" + str(get_value(p))
-                             + "' AND ")
-        # remove the last AND
-        query_string = query_string[:-4]
-
+        query_string = Crawler.create_query_for_identifiable(entity)
         logger.debug(query_string)
         q = db.Query(query_string)
         # the identifiable should identify an object uniquely. Thus the query
diff --git a/unittests/test_crawler.py b/unittests/test_crawler.py
new file mode 100644
index 00000000..f603031e
--- /dev/null
+++ b/unittests/test_crawler.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2020 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+
+import re
+import unittest
+
+import caosdb as db
+from caosadvancedtools.crawler import Crawler
+
+
+class CrawlerTest(unittest.TestCase):
+    def test_ident_query(self):
+        ident = db.Record(name="coolrec")
+        self.assertRaises(ValueError, Crawler.create_query_for_identifiable,
+                          ident)
+        ident.add_parent(name="RT")
+        qs = Crawler.create_query_for_identifiable(ident)
+        assert qs == "FIND Record RT WITH name='coolrec'"
+        ident.add_property(name="p", value=5)
+        qs = Crawler.create_query_for_identifiable(ident)
+        assert qs == "FIND Record RT WITH name='coolrec' AND'p'='5' "
+
+        ident = db.Record()
+        ident.add_parent(name="RT")
+        ident.add_property(name="p", value=[2345, db.Record(id=234567)],
+                           datatype=db.LIST("RT2"))
+        qs = Crawler.create_query_for_identifiable(ident)
+        assert qs == "FIND Record RT WITH references 2345 AND references 234567 "
-- 
GitLab