From c84564592f29cf4a6646c021e31412199f6f260e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Thu, 8 Sep 2022 20:10:58 +0200
Subject: [PATCH] ENH: indicate if updates are incomplete

---
 src/caosadvancedtools/crawler.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py
index eea85633..085cd8d2 100644
--- a/src/caosadvancedtools/crawler.py
+++ b/src/caosadvancedtools/crawler.py
@@ -225,6 +225,17 @@ class Crawler(object):
         for _, _, old, new, _ in changes:
             new_cont = db.Container()
             new_cont = new_cont.from_xml(new)
+            ids = []
+            tmp = []
+            update_incomplete = False
+            # remove duplicate entities
+            for el in new_cont:
+                if el.id not in ids:
+                    ids.append(el.id)
+                    tmp.append(el)
+                else:
+                    update_incomplete = True
+            new_cont = tmp
             if new[0].version:
                 valids = db.Container()
                 nonvalids = db.Container()
@@ -234,6 +245,7 @@ class Crawler(object):
                     if ent.version == remote_ent.version:
                         valids.append(remote_ent)
                     else:
+                        update_incomplete = True
                         nonvalids.append(remote_ent)
                 valids.update(unique=False)
                 logger.info("Successfully updated {} records!".format(
@@ -258,6 +270,7 @@ class Crawler(object):
                 logger.info("Successfully updated {} records!".format(
                     len(new_cont)))
                 all_updates += len(new_cont)
+        logger.info("Some updates could not be applied. Crawler has to rerun.")
         logger.info("Finished with authorized updates.")
         return all_inserts, all_updates
 
-- 
GitLab