diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index eea85633b73048bd2a8650aba1c676444a21096a..085cd8d27f261644b38061d26fb10e37ac5465fd 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -225,6 +225,17 @@ class Crawler(object): for _, _, old, new, _ in changes: new_cont = db.Container() new_cont = new_cont.from_xml(new) + ids = [] + tmp = [] + update_incomplete = False + # remove duplicate entities + for el in new_cont: + if el.id not in ids: + ids.append(el.id) + tmp.append(el) + else: + update_incomplete = True + new_cont = tmp if new[0].version: valids = db.Container() nonvalids = db.Container() @@ -234,6 +245,7 @@ class Crawler(object): if ent.version == remote_ent.version: valids.append(remote_ent) else: + update_incomplete = True nonvalids.append(remote_ent) valids.update(unique=False) logger.info("Successfully updated {} records!".format( @@ -258,6 +270,7 @@ class Crawler(object): logger.info("Successfully updated {} records!".format( len(new_cont))) all_updates += len(new_cont) + logger.info("Some updates could not be applied. Crawler has to rerun.") logger.info("Finished with authorized updates.") return all_inserts, all_updates