From c53e35d370758685f4853dc85cb4958069b446a9 Mon Sep 17 00:00:00 2001 From: Alexander Schlemmer <alexander.schlemmer@ds.mpg.de> Date: Tue, 24 Nov 2020 10:58:15 +0100 Subject: [PATCH] ENH: refactored cache classes to inherit from an abstract base class --- src/caosadvancedtools/cache.py | 64 ++++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/src/caosadvancedtools/cache.py b/src/caosadvancedtools/cache.py index 620777bf..105f95d4 100644 --- a/src/caosadvancedtools/cache.py +++ b/src/caosadvancedtools/cache.py @@ -30,6 +30,7 @@ import sqlite3 from hashlib import sha256 from lxml import etree +from abc import ABC, abstractmethod import caosdb as db @@ -50,16 +51,25 @@ def get_pretty_xml(cont): return etree.tounicode(cont.to_xml( local_serialization=True), pretty_print=True) -# Increase this, when changes to the cache tables are made: -CACHE_SCHEMA_VERSION = 2 -class Cache(object): - """ - stores identifiables (as a hash of xml) and their respective ID. +class AbstractCache(ABC): + @abstractmethod + def get_cache_schema_version(self): + """ + A method that has to be overloaded that sets the version of the + SQLITE database schema. The schema is saved in table version column schema. - This allows to retrieve the Record corresponding to an indentifiable - without querying. - """ + Increase this variable, when changes to the cache tables are made. + """ + pass + + @abstractmethod + def create_cache(self): + """ + Provide an overloaded function here that creates the cache in + the most recent version. + """ + pass def __init__(self, db_file=None, force_creation=False): """ @@ -96,9 +106,9 @@ class Cache(object): except sqlite3.OperationalError: current_schema = 1 - if current_schema > CACHE_SCHEMA_VERSION: + if current_schema > self.get_cache_schema_version(): raise RuntimeError("Cache is corrupt or was created with a future version of this program.") - elif current_schema < CACHE_SCHEMA_VERSION: + elif current_schema < self.get_cache_schema_version(): raise RuntimeError("Cache version too old.") def get_cache_version(self): @@ -116,6 +126,22 @@ class Cache(object): return version_row[0][0] finally: conn.close() + + +# TODO: A better name would be IdentifiablesCache +class Cache(AbstractCache): + """ + stores identifiables (as a hash of xml) and their respective ID. + + This allows to retrieve the Record corresponding to an indentifiable + without querying. + """ + + def get_cache_schema_version(self): + return 2 + + def __init__(self, db_file=None, force_creation=False): + super().__init__(db_file, force_creation) def create_cache(self): """ @@ -131,7 +157,7 @@ class Cache(object): '''CREATE TABLE identifiables (digest TEXT PRIMARY KEY, caosdb_id INTEGER, caosdb_version TEXT)''') c.execute( '''CREATE TABLE version (schema INTEGER)''') - c.execute("INSERT INTO version VALUES (?)", (CACHE_SCHEMA_VERSION,)) + c.execute("INSERT INTO version VALUES (?)", (self.get_cache_schema_version(),)) conn.commit() conn.close() @@ -218,7 +244,7 @@ class Cache(object): self.insert(ehash, ent.id, ent.version.id) -class UpdateCache(Cache): +class UpdateCache(AbstractCache): """ stores unauthorized updates @@ -226,12 +252,14 @@ class UpdateCache(Cache): be stored in this cache such that it can be authorized and done later. """ - def __init__(self, db_file=None): - if db_file is None: - # TODO: check whether a hardcoded temp file is really wanted - # Why not crawler_update_cache.db in current working directory? - db_file = "/tmp/crawler_update_cache.db" - super().__init__(db_file=db_file) + def get_cache_schema_version(self): + return 1 + + def get_default_file_name(): + return "/tmp/crawler_update_cache.db" + + def __init__(self, db_file=None, force_creation=False): + super().__init__(db_file, force_creation) @staticmethod def get_previous_version(cont): -- GitLab