diff --git a/src/caosadvancedtools/cache.py b/src/caosadvancedtools/cache.py index 0aef80defaa43ce53328642fe5cfab1f2230c95c..fcba542f0378574c483778fe8980e87b39999f76 100644 --- a/src/caosadvancedtools/cache.py +++ b/src/caosadvancedtools/cache.py @@ -330,7 +330,7 @@ class UpdateCache(AbstractCache): """ def get_cache_schema_version(self): - return 1 + return 2 def get_default_file_name(self): return "/tmp/crawler_update_cache.db" @@ -381,9 +381,11 @@ class UpdateCache(AbstractCache): def create_cache(self): """ initialize the cache """ - self.run_sql_commands([('''CREATE TABLE updates (olddigest text, newdigest text, - oldrep text, newrep text, run_id text, - primary key (olddigest, newdigest, run_id))''', )]) + self.run_sql_commands([ + ('''CREATE TABLE updates (olddigest TEXT PRIMARY KEY, newdigest TEXT, oldrep TEXT, + newrep TEXT, run_id TEXT)''',), + ('''CREATE TABLE version (schema INTEGER)''',), + ("INSERT INTO version VALUES (?)", (self.get_cache_schema_version(),))]) def get(self, run_id, querystring): """ returns the pending updates for a given run id diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 2fac6243ef35dfc56b208ecbc91902e77569e622..e959bd2088a4094a628d3b990b06576d4287f526 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -223,25 +223,43 @@ class Crawler(object): changes = cache.get_updates(run_id) for _, _, old, new, _ in changes: - current = db.Container() - new_cont = db.Container() - new_cont = new_cont.from_xml(new) + if new[0].version: + valids = db.Container() + nonvalids = db.Container() + new_cont = db.Container() + new_cont = new_cont.from_xml(new) + + for ent in new_cont: + remote_ent = db.Entity(id=ent.id).retrieve() + if ent.version == remote_ent.version: + valids.append(remote_ent) + else: + nonvalids.append(remote_ent) + valids.update(unique=False) + logger.info("Successfully updated {} records!".format( + len(valids))) + logger.info("{} Records were not updated because the version in the server " + "changed!".format(len(nonvalids))) + all_updates += len(valids) + else: + current = db.Container() + new_cont = db.Container() + new_cont = new_cont.from_xml(new) - for ent in new_cont: - current.append(db.execute_query("FIND {}".format(ent.id), - unique=True)) - current_xml = get_pretty_xml(current) + for ent in new_cont: + current.append(db.Entity(id=ent.id).retrieve()) + current_xml = get_pretty_xml(current) - # check whether previous version equals current version - # if not, the update must not be done + # check whether previous version equals current version + # if not, the update must not be done - if current_xml != old: - continue + if current_xml != old: + continue - new_cont.update(unique=False) - logger.info("Successfully updated {} records!".format( - len(new_cont))) - all_updates += len(new_cont) + new_cont.update(unique=False) + logger.info("Successfully updated {} records!".format( + len(new_cont))) + all_updates += len(new_cont) logger.info("Finished with authorized updates.") return all_inserts, all_updates