From 385e0d848e6c257de0ceeda7c6dc7c5067fc7e84 Mon Sep 17 00:00:00 2001 From: Florian Spreckelsen <florian.spreckelsen@gmx.net> Date: Mon, 14 Sep 2020 14:26:55 +0000 Subject: [PATCH] Resolve IntegrityError in UpdateCache --- CHANGELOG.md | 4 ++ integrationtests/test.sh | 2 + integrationtests/test_cache.py | 66 ++++++++++++++++++++++++++++++++ src/caosadvancedtools/cache.py | 29 +++++++++++++- src/caosadvancedtools/crawler.py | 13 ++++++- 5 files changed, 111 insertions(+), 3 deletions(-) create mode 100644 integrationtests/test_cache.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ef66957..cb8662e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed ### +* Fixed an issue where `caosadvancedtools.cache.UpdateCache` would + cause an `sqlite3.IntegrityError` if more than one change was cached + for the same entity. + ### Security ### ## [0.2.0] - 2020-05-28 ## diff --git a/integrationtests/test.sh b/integrationtests/test.sh index 6d826133..f5f0a2db 100755 --- a/integrationtests/test.sh +++ b/integrationtests/test.sh @@ -6,6 +6,8 @@ echo "Clearing database" python3 clear_database.py echo "Testing crawler without cfoods" python3 -m pytest test_crawler_basics.py +echo "Testing caching" +python3 -m pytest test_cache.py echo "Filling the database" ./filldb.sh diff --git a/integrationtests/test_cache.py b/integrationtests/test_cache.py new file mode 100644 index 00000000..cb9c174e --- /dev/null +++ b/integrationtests/test_cache.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2020 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2020 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header + +import caosdb as db +from caosadvancedtools.cache import UpdateCache + + +def setup(): + try: + db.execute_query("FIND Test*").delete() + except Exception: + pass + + +def teardown(): + setup() + + +def test_same_old_different_new(): + """Formerly, inserting two containers with different changes to the + same entity into the update cache would result in an + IntegrityException. + + """ + rt = db.RecordType(name="TestType").insert() + db.Property(name="TestProp1", datatype=db.TEXT).insert() + db.Property(name="TestProp2", datatype=db.TEXT).insert() + rec = db.Record(name="TestRecord").add_parent(rt).insert() + + # add TestProp1 to TestRecord + rec.add_property(name="TestProp1", value="blub") + cont = db.Container().append(rec) + + update = UpdateCache() + run_id = "a" + update.insert(cont, run_id) + assert len(update.get_updates(run_id)) == 1 + + # duplicate and add TestProp2 to TestRecord + rec = db.execute_query("FIND Record TestRecord", unique=True) + rec.add_property(name="TestProp2", value="bla") + cont = db.Container().append(rec) + # same old digest, different new digest + update.insert(cont, run_id) + assert len(update.get_updates(run_id)) == 2 diff --git a/src/caosadvancedtools/cache.py b/src/caosadvancedtools/cache.py index 980222d0..bde76abf 100644 --- a/src/caosadvancedtools/cache.py +++ b/src/caosadvancedtools/cache.py @@ -1,3 +1,27 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2020 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# Copyright (C) 2020 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header # TODO this is implementing a cache on client side. Should it be on # server side? @@ -47,7 +71,8 @@ class Cache(object): def create_cache(self): conn = sqlite3.connect(self.db_file) c = conn.cursor() - c.execute('''CREATE TABLE identifiables (digest text primary key, caosdb_id integer)''') + c.execute( + '''CREATE TABLE identifiables (digest text primary key, caosdb_id integer)''') conn.commit() conn.close() @@ -163,7 +188,7 @@ class UpdateCache(Cache): c = conn.cursor() c.execute('''CREATE TABLE updates (olddigest text, newdigest text, oldrep text, newrep text, run_id text, - primary key (olddigest, run_id))''') + primary key (olddigest, newdigest, run_id))''') conn.commit() conn.close() diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 323d1955..e5bb738c 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -48,6 +48,7 @@ from datetime import datetime import caosdb as db from caosdb.exceptions import TransactionError +from sqlite3 import IntegrityError from .cache import Cache, UpdateCache, get_pretty_xml from .cfood import RowCFood, add_files, get_ids_for_entities_with_names @@ -506,7 +507,17 @@ carefully and if the changes are ok, click on the following link: try: guard.safe_update(cfood.to_be_updated, unique=False) except ProhibitedException: - self.update_cache.insert(cfood.to_be_updated, self.run_id) + try: + self.update_cache.insert(cfood.to_be_updated, self.run_id) + except IntegrityError as e: + logger.warning( + "There were problems with the update of {}.".format( + cfood.to_be_updated), + extra={"identifier": str(cfood.to_be_updated), + "category": "update-cache"} + ) + logger.debug(traceback.format_exc()) + logger.debug(e) except Exception as e: DataModelProblems.evaluate_exception(e) -- GitLab