Skip to content
Snippets Groups Projects
Commit 58bf1590 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

cache

parent a484051c
No related branches found
No related tags found
No related merge requests found
......@@ -26,8 +26,10 @@
import argparse
import re
from hashlib import sha256
from argparse import RawTextHelpFormatter
from datetime import datetime
import sqlite3
import caosdb as db
from caosdb.exceptions import TransactionError
......@@ -38,16 +40,77 @@ def get_entity(name):
ent.retrieve()
return ent
def get_pretty_xml(cont):
if isinstance(cont, list):
cont = db.Container().extend(cont)
if isinstance(cont, db.Entity):
cont = db.Container().insert(cont)
return etree.tounicode(cont.to_xml(
local_serialization=True), pretty_print=True)
# TODO this is implementing a cache on client side. Should it be on
# server side?
class Cache(object):
CACHE_DB = "cache.db"
def __init__(self):
if not os.path.exists(Cache.CACHE_DB):
self.create_cache()
def create_cache(self):
conn = sqlite3.connect(Cache.CACHE_DB)
c = conn.cursor()
c.execute('''CREATE TABLE identifiables (digest text primary key, caosdb_id integer)''')
conn.commit()
conn.close()
def hash_entity(ent):
xml = get_pretty_xml(ent)
digest = sha256(xml.encode("utf-8")).hexdigest()
def insert(ent):
conn = sqlite3.connect(Cache.CACHE_DB)
c = conn.cursor()
c.execute('''INSERT INTO identifiables VALUES ({}, {})'''.format(
Cache.hash_entity(ent), ent.id)
)
conn.commit()
conn.close()
def check_existing(ent):
conn = sqlite3.connect(Cache.CACHE_DB)
c = conn.cursor()
c.execute('''Select * FROM stocks WHERE digest=?''', Cache.hash_entity(ent))
res = c.fetchone()
conn.commit()
conn.close()
return res
class AbstractCFood(object):
def __init__(self, pattern):
# TODO restructure this class such that no instance is needed to check for
# a match
# instances shall be used to keep track of a match; i.e. entities can be
# object variable
def __init__(self, pattern, use_cache=False):
self.pattern = re.compile(pattern)
self.use_cache = use_cache
def treat_match(self, crawled_file, match):
entities = self.create_identifiables(crawled_file, match)
for key, ent in entities.items():
existing = None
if self.use_cache:
c = Cache()
existing_in_cache = c.check_existing(ent)
if existing_in_cache is not None:
existing = db.Entity(id = existing_in_cache).retrieve()
existing = AbstractCFood.find_existing(ent)
if existing is None:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment