Skip to content
Snippets Groups Projects
Commit 05d62568 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

ENH: use version id for check whether entities can be updated

parent b006c4b7
No related branches found
No related tags found
2 merge requests!59FIX: if multiple updates for one entity exist, the retrieve would result in an...,!56F insert auth
Pipeline #28064 failed
...@@ -330,7 +330,7 @@ class UpdateCache(AbstractCache): ...@@ -330,7 +330,7 @@ class UpdateCache(AbstractCache):
""" """
def get_cache_schema_version(self): def get_cache_schema_version(self):
return 1 return 2
def get_default_file_name(self): def get_default_file_name(self):
return "/tmp/crawler_update_cache.db" return "/tmp/crawler_update_cache.db"
...@@ -381,9 +381,11 @@ class UpdateCache(AbstractCache): ...@@ -381,9 +381,11 @@ class UpdateCache(AbstractCache):
def create_cache(self): def create_cache(self):
""" initialize the cache """ """ initialize the cache """
self.run_sql_commands([('''CREATE TABLE updates (olddigest text, newdigest text, self.run_sql_commands([
oldrep text, newrep text, run_id text, ('''CREATE TABLE updates (olddigest TEXT PRIMARY KEY, newdigest TEXT, oldrep TEXT,
primary key (olddigest, newdigest, run_id))''', )]) newrep TEXT, run_id TEXT)''',),
('''CREATE TABLE version (schema INTEGER)''',),
("INSERT INTO version VALUES (?)", (self.get_cache_schema_version(),))])
def get(self, run_id, querystring): def get(self, run_id, querystring):
""" returns the pending updates for a given run id """ returns the pending updates for a given run id
......
...@@ -223,25 +223,43 @@ class Crawler(object): ...@@ -223,25 +223,43 @@ class Crawler(object):
changes = cache.get_updates(run_id) changes = cache.get_updates(run_id)
for _, _, old, new, _ in changes: for _, _, old, new, _ in changes:
current = db.Container() if new[0].version:
new_cont = db.Container() valids = db.Container()
new_cont = new_cont.from_xml(new) nonvalids = db.Container()
new_cont = db.Container()
new_cont = new_cont.from_xml(new)
for ent in new_cont:
remote_ent = db.Entity(id=ent.id).retrieve()
if ent.version == remote_ent.version:
valids.append(remote_ent)
else:
nonvalids.append(remote_ent)
valids.update(unique=False)
logger.info("Successfully updated {} records!".format(
len(valids)))
logger.info("{} Records were not updated because the version in the server "
"changed!".format(len(nonvalids)))
all_updates += len(valids)
else:
current = db.Container()
new_cont = db.Container()
new_cont = new_cont.from_xml(new)
for ent in new_cont: for ent in new_cont:
current.append(db.execute_query("FIND {}".format(ent.id), current.append(db.Entity(id=ent.id).retrieve())
unique=True)) current_xml = get_pretty_xml(current)
current_xml = get_pretty_xml(current)
# check whether previous version equals current version # check whether previous version equals current version
# if not, the update must not be done # if not, the update must not be done
if current_xml != old: if current_xml != old:
continue continue
new_cont.update(unique=False) new_cont.update(unique=False)
logger.info("Successfully updated {} records!".format( logger.info("Successfully updated {} records!".format(
len(new_cont))) len(new_cont)))
all_updates += len(new_cont) all_updates += len(new_cont)
logger.info("Finished with authorized updates.") logger.info("Finished with authorized updates.")
return all_inserts, all_updates return all_inserts, all_updates
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment