From 4edb80e607fa763fe004308004a2ec900ea3ef8f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Mon, 21 Nov 2022 11:21:07 +0100
Subject: [PATCH] MAINT: do not allow empty identifiables

---
 integrationtests/basic_example/test_basic.py |  4 +++-
 src/caoscrawler/identifiable.py              |  4 ++++
 unittests/test_file_identifiables.py         | 10 +++++++++-
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/integrationtests/basic_example/test_basic.py b/integrationtests/basic_example/test_basic.py
index dbc506ef..0c847b08 100755
--- a/integrationtests/basic_example/test_basic.py
+++ b/integrationtests/basic_example/test_basic.py
@@ -109,9 +109,11 @@ def crawler_extended(ident):
 
 def test_ambigious_lookup(clear_database, usemodel, crawler, ident):
     ins, ups = crawler.synchronize()
+
+    proj = db.execute_query("FIND Project WITH identifier='SpeedOfLight'", unique=True)
     with pytest.raises(RuntimeError, match=".*unambigiously.*"):
         print(crawler.identifiableAdapter.retrieve_identified_record_for_identifiable(
-            Identifiable(record_type="Measurement")))
+            Identifiable(properties={'project': proj.id})))
 
 
 def test_single_insertion(clear_database, usemodel, crawler, ident):
diff --git a/src/caoscrawler/identifiable.py b/src/caoscrawler/identifiable.py
index 32d07b5e..56c023e2 100644
--- a/src/caoscrawler/identifiable.py
+++ b/src/caoscrawler/identifiable.py
@@ -49,6 +49,10 @@ class Identifiable():
     def __init__(self, record_id: int = None, path: str = None, record_type: str = None,
                  name: str = None, properties: dict = None,
                  backrefs: list[Union[int, str]] = None):
+        if (record_id is None and path is None and name is None and backrefs is None and (properties
+                                                                                          is None or len(properties) == 0)):
+            raise ValueError("There is no identifying information. You need to add a path or "
+                             "properties or other identifying attributes.")
         self.record_id = record_id
         self.path = path
         self.record_type = record_type
diff --git a/unittests/test_file_identifiables.py b/unittests/test_file_identifiables.py
index f71ea75a..c7821f39 100644
--- a/unittests/test_file_identifiables.py
+++ b/unittests/test_file_identifiables.py
@@ -8,19 +8,27 @@ import pytest
 from pytest import raises
 
 from caoscrawler.identifiable_adapters import LocalStorageIdentifiableAdapter
+from caoscrawler.identifiable import Identifiable
 
 
 def test_file_identifiable():
     ident = LocalStorageIdentifiableAdapter()
 
+    # Without a path there is no identifying information
+    with raises(ValueError):
+        ident.get_identifiable(db.File())
+
     fp = "/test/bla/bla.txt"
     file_obj = db.File(path=fp)
     identifiable = ident.get_identifiable(file_obj)
 
     # the path is copied to the identifiable
+    assert fp == identifiable.path
+    assert isinstance(identifiable, Identifiable)
+
+    # __eq__ function is only defined for Identifiable objects
     with raises(ValueError):
         file_obj != identifiable
-    assert file_obj.path == identifiable.path
 
     # since the path does not exist in the data in ident, the follwoing functions return None
     assert ident.retrieve_identified_record_for_record(file_obj) is None
-- 
GitLab