Skip to content
Snippets Groups Projects

F insert auth

Merged Henrik tom Wörden requested to merge f-insert-auth into dev
2 files
+ 53
18
Compare changes
  • Side-by-side
  • Inline
Files
2
@@ -72,9 +72,11 @@ class Cache(object):
without querying.
"""
def __init__(self, db_file=None):
def __init__(self, db_file=None, default_name="cache.db"):
if db_file is None:
self.db_file = "cache.db"
tmppath = tempfile.gettempdir()
tmpf = os.path.join(tmppath, default_name)
self.db_file = tmpf
else:
self.db_file = db_file
@@ -148,18 +150,14 @@ class Cache(object):
class UpdateCache(Cache):
"""
stores unauthorized updates
stores unauthorized inserts and updates
If the Guard is set to a mode that does not allow an update, the update can
be stored in this cache such that it can be authorized and done later.
If the Guard is set to a mode that does not allow an insert or update, the insert or update can
be stored in this cache such that it can be authorized and performed later.
"""
def __init__(self, db_file=None):
if db_file is None:
tmppath = tempfile.gettempdir()
tmpf = os.path.join(tmppath, "crawler_update_cache.db")
db_file = tmpf
super().__init__(db_file=db_file)
super().__init__(db_file=db_file, default_name="crawler_insert_cache.db")
@staticmethod
def get_previous_version(cont):
@@ -174,23 +172,32 @@ class UpdateCache(Cache):
return old_ones
def insert(self, cont, run_id):
"""Insert a pending, unauthorized update
def insert(self, cont, run_id, insert=False):
"""Insert a pending, unauthorized inserts
Parameters
----------
cont: Container with the records to be updated containing the desired
cont: Container with the records to be inserted containing the desired
version, i.e. the state after the update.
run_id: int
The id of the crawler run
insert: bool
Whether the entities in the container shall be inserted or updated.
"""
cont = put_in_container(cont)
old_ones = UpdateCache.get_previous_version(cont)
if insert:
old_ones = ""
else:
old_ones = UpdateCache.get_previous_version(cont)
new_ones = cont
old_hash = Cache.hash_entity(old_ones)
if insert:
old_hash = ""
else:
old_hash = Cache.hash_entity(old_ones)
new_hash = Cache.hash_entity(new_ones)
conn = sqlite3.connect(self.db_file)
c = conn.cursor()
@@ -210,20 +217,40 @@ class UpdateCache(Cache):
conn.commit()
conn.close()
def get_updates(self, run_id):
def get(self, run_id, querystring):
""" returns the pending updates for a given run id
Parameters:
-----------
run_id: the id of the crawler run
querystring: the sql query
"""
conn = sqlite3.connect(self.db_file)
c = conn.cursor()
c.execute('''Select * FROM updates WHERE run_id=?''',
(str(run_id),))
c.execute(querystring, (str(run_id),))
res = c.fetchall()
conn.commit()
conn.close()
return res
def get_inserts(self, run_id):
""" returns the pending updates for a given run id
Parameters:
-----------
run_id: the id of the crawler run
"""
return self.get(run_id, '''Select * FROM updates WHERE olddigest='' AND run_id=?''')
def get_updates(self, run_id):
""" returns the pending updates for a given run id
Parameters:
-----------
run_id: the id of the crawler run
"""
return self.get(run_id, '''Select * FROM updates WHERE olddigest!='' AND run_id=?''')
Loading