diff --git a/CHANGELOG.md b/CHANGELOG.md index d5f4cc62eaa099aaeebf4ab884822baa8557250e..8315011da5a89f53ba9eb5b2533115ef245e790e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,12 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -- Added new converters for tables: CSVTableConverter and XLSXTableConverter -- FIX: Variables are now also replaced when the value is given as a list. - ### Added * Everything +* Added new converters for tables: CSVTableConverter and XLSXTableConverter +* Possibility to authorize updates as in the old crawler ### Changed @@ -23,6 +22,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed ### Fixed -- Fixed #12 + +* Fixed #12 +* FIX: Variables are now also replaced when the value is given as a list. ### Security diff --git a/README.md b/README.md index 59b88aaa36ed97d8c2cc9e4474820e3dad4a478b..8576e5c969556005fdeb346ef2cdfadf1b7fc266 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,8 @@ After installation of the package run (within the project folder): pytest ``` +## Integration Tests +see `integrationtests/README.md` # Contributers diff --git a/integrationtests/README.md b/integrationtests/README.md index 96789ed9f02036a0c7cc25ca1a60d9f0042a5557..88d55902e3fdc5836baefd97c3192cc9ff01e7bd 100644 --- a/integrationtests/README.md +++ b/integrationtests/README.md @@ -1,2 +1,3 @@ 1. Mount test_data/extroot as extroot folder in the CaosDB server 2. use an empty server +3. run pytest from `src`: `python -m pytest ../integrationtests` diff --git a/integrationtests/pycaosdb.ini b/integrationtests/pycaosdb.ini new file mode 100644 index 0000000000000000000000000000000000000000..a4f429736c9b46c8987d05a02724725295f32081 --- /dev/null +++ b/integrationtests/pycaosdb.ini @@ -0,0 +1,29 @@ +[Connection] +url=https://localhost:10443/ +username=admin +debug=0 +#cacert=/home//CaosDB/caosdb-deploy/profiles/default/custom/other/cert/caosdb.cert.pem +password_method=plain +password=caosdb + +ssl_insecure=True +timeout=5000 +[Container] +debug=0 + +#[Crawler] +#oldprefix=/ExperimentalData/ +#newprefix=/home/professional/CaosDB/caosdb-advanced-user-tools/integrationtests/extroot/ExperimentalData +#[IntegrationTests] +#test_server_side_scripting.bin_dir=/home/professional/CaosDB/caosdb-pyinttest/resources + +[Misc] +sendmail=sendmail_to_file +#sendmail=/usr/local/bin/sendmail_to_file +entity_loan.curator_mail_from=admin@indiscale.com +entity_loan.curator_mail_to=admin@indiscale.com +[sss_helper] +external_uri = https://localhost:10443 +[advancedtools] +crawler.from_mail=admin@indiscale.com +crawler.to_mail=admin@indiscale.com diff --git a/integrationtests/test_data/extroot/realworld_example/pycaosdb.ini b/integrationtests/test_data/extroot/realworld_example/pycaosdb.ini new file mode 120000 index 0000000000000000000000000000000000000000..bc443439d842f18ce05e002e5f6b95d37ca22747 --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/pycaosdb.ini @@ -0,0 +1 @@ +../../../pycaosdb.ini \ No newline at end of file diff --git a/integrationtests/test_data/extroot/realworld_example/schema/zmt-organisation.yml b/integrationtests/test_data/extroot/realworld_example/schema/organisation.yml similarity index 100% rename from integrationtests/test_data/extroot/realworld_example/schema/zmt-organisation.yml rename to integrationtests/test_data/extroot/realworld_example/schema/organisation.yml diff --git a/integrationtests/test_realworld_example.py b/integrationtests/test_realworld_example.py index cffb84946611e1c37f28d638acfa234ad12253b1..5ec2f3219625937e3d18f31eaaa2eb71566c75d7 100644 --- a/integrationtests/test_realworld_example.py +++ b/integrationtests/test_realworld_example.py @@ -81,18 +81,26 @@ def clear_database(): ents.delete() -def test_dataset( - clear_database, - usemodel): - +def create_identifiable_adapter(): ident = CaosDBIdentifiableAdapter() - ident.register_identifiable( - "license", db.RecordType().add_parent("license").add_property("name")) - ident.register_identifiable("project_type", db.RecordType( - ).add_parent("project_type").add_property("name")) - ident.register_identifiable("Person", db.RecordType( - ).add_parent("Person").add_property("full_name")) - + ident.register_identifiable("license", ( + db.RecordType() + .add_parent("license") + .add_property("name"))) + ident.register_identifiable("project_type", ( + db.RecordType() + .add_parent("project_type") + .add_property("name"))) + ident.register_identifiable("Person", ( + db.RecordType() + .add_parent("Person") + .add_property("full_name"))) + + return ident + + +def test_dataset(clear_database, usemodel): + ident = create_identifiable_adapter() crawler = Crawler(identifiableAdapter=ident) crawler_definition = crawler.load_definition( os.path.join(DATADIR, "dataset_cfoods.yml")) diff --git a/setup.cfg b/setup.cfg index e2d3524ba6508f6a70f65203f02dc590a1dd9a7c..9c652aa9ad32757075bd37f0bd5efeadcaa34582 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,7 +24,7 @@ install_requires = caosadvancedtools yaml-header-tools pyyaml - odfpy + odfpy #make optional pandas [options.packages.find] diff --git a/src/caoscrawler/authorize.py b/src/caoscrawler/authorize.py new file mode 100644 index 0000000000000000000000000000000000000000..6f1011b227881d4b73186996076abe20d94d52e5 --- /dev/null +++ b/src/caoscrawler/authorize.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +from caosadvancedtools.crawler import Crawler as OldCrawler + +import argparse + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("run_id", + help="Run ID or the crawler run that created the changes that shall be " + "authorized.") + + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + OldCrawler.update_authorized_changes(args.run_id) diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 7f54237c5cc025166dc2aaec34703d500a159ff2..e5b0e6ba69898deb5320a382823313c1a4bf83c6 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -28,13 +28,19 @@ Crawl a file structure using a yaml cfood definition and synchronize the acuired data with CaosDB. """ +import importlib +from caosadvancedtools.cache import UpdateCache, Cache +import uuid import sys import os import yaml +from enum import Enum +import logging from importlib_resources import files import argparse from argparse import RawTextHelpFormatter import caosdb as db +from caosadvancedtools.crawler import Crawler as OldCrawler from caosdb.common.datatype import is_reference from .stores import GeneralStore, RecordStore from .identified_cache import IdentifiedCache @@ -49,8 +55,8 @@ from caosdb.apiutils import compare_entities, merge_entities from copy import deepcopy from jsonschema import validate +logger = logging.getLogger(__name__) -import importlib SPECIAL_PROPERTIES_STRICT = ("description", "name", "id", "path") SPECIAL_PROPERTIES_NOT_STRICT = ("file", "checksum", "size") @@ -140,6 +146,12 @@ def _resolve_datatype(prop: db.Property, remote_entity: db.Entity): return prop +class SecurityMode(Enum): + RETRIEVE = 0 + INSERT = 1 + UPDATE = 2 + + class Crawler(object): """ Crawler class that encapsulates crawling functions. @@ -147,23 +159,35 @@ class Crawler(object): storage for values (general store). """ - def __init__(self, converters: list[Converter] = [], + def __init__(self, + converters: list[Converter] = [], generalStore: Optional[GeneralStore] = None, debug: bool = False, - identifiableAdapter: IdentifiableAdapter = None): + identifiableAdapter: IdentifiableAdapter = None, + securityMode: int = SecurityMode.UPDATE + ): """ Create a new crawler and initialize an empty RecordStore and GeneralStore. - converters: The set of converters used for this crawler. - recordStore: An initial GeneralStore which might store e.g. environment variables. - - debug: Create a debugging information tree when set to True. - The debugging information tree is a variable stored in - self.debug_tree. It is a dictionary mapping directory entries - to a tuple of general stores and record stores which are valid for the directory scope. - Furthermore, it is stored in a second tree named self.debug_copied whether the - objects in debug_tree had been copied from a higher level in the hierarchy - of the structureelements. + Parameters + ---------- + converters : list[Converter] + The set of converters used for this crawler. + recordStore : GeneralStore + An initial GeneralStore which might store e.g. environment variables. + debug : bool + Create a debugging information tree when set to True. + The debugging information tree is a variable stored in + self.debug_tree. It is a dictionary mapping directory entries + to a tuple of general stores and record stores which are valid for the directory scope. + Furthermore, it is stored in a second tree named self.debug_copied whether the + objects in debug_tree had been copied from a higher level in the hierarchy + of the structureelements. + identifiableAdapter : IdentifiableAdapter + TODO describe + securityMode : int + Whether only retrieves are allowed or also inserts or even updates. + Please use SecurityMode Enum """ # TODO: check if this feature is really needed @@ -171,6 +195,7 @@ class Crawler(object): self.identified_cache = IdentifiedCache() self.recordStore = RecordStore() + self.securityMode = securityMode self.generalStore = generalStore if generalStore is None: @@ -179,7 +204,8 @@ class Crawler(object): self.identifiableAdapter = identifiableAdapter if identifiableAdapter is None: self.identifiableAdapter = LocalStorageIdentifiableAdapter() - + # If a directory is crawled this may hold the path to that directory + self.crawled_directory = None self.debug = debug if self.debug: # order in the tuple: @@ -335,6 +361,7 @@ class Crawler(object): raise ValueError( "You have to provide a non-empty path for crawling.") dir_structure_name = os.path.basename(dirname) + self.crawled_directory = dirname if not dir_structure_name and dirname.endswith('/'): if dirname == '/': # Crawling the entire file system @@ -397,6 +424,7 @@ class Crawler(object): if not isinstance(items, list): items = [items] + self.run_id = uuid.uuid1() local_converters = Crawler.create_local_converters(crawler_definition, converter_registry) # This recursive crawling procedure generates the update list: @@ -734,32 +762,51 @@ class Crawler(object): pass @staticmethod - def execute_inserts_in_list(to_be_inserted): + def _get_entity_by_name(name): + return db.Entity(name=name).retrieve() + + @staticmethod + def execute_inserts_in_list(to_be_inserted, securityMode, run_id: int = None): for record in to_be_inserted: for prop in record.properties: - entity = db.Entity(name=prop.name).retrieve() + entity = Crawler._get_entity_by_name(prop.name) _resolve_datatype(prop, entity) - print("INSERT") - print(to_be_inserted) + logger.debug("INSERT") + logger.debug(to_be_inserted) if len(to_be_inserted) > 0: - db.Container().extend(to_be_inserted).insert() + if securityMode.value > SecurityMode.RETRIEVE.value: + db.Container().extend(to_be_inserted).insert() + elif run_id is not None: + + raise RuntimeError("You must not insert Entities since the Crawler was startet " + "with RETRIEVE only mode.") + # Caching forbidden inserts is currently not implemented + # cache = Cache() + # cache.insert(to_be_inserted, run_id) @staticmethod - def execute_updates_in_list(to_be_updated): - # retrieve ids of properties when missing: - for record in to_be_updated: + def set_ids_and_datatype_of_parents_and_properties(rec_list): + for record in rec_list: for parent in record.parents: if parent.id is None: - parent.id = db.Entity(name=parent.name).retrieve().id + parent.id = Crawler._get_entity_by_name(parent.name).id for prop in record.properties: if prop.id is None: - entity = db.Entity(name=prop.name).retrieve() + entity = Crawler._get_entity_by_name(prop.name) prop.id = entity.id _resolve_datatype(prop, entity) - print("UPDATE") - print(to_be_updated) + + @staticmethod + def execute_updates_in_list(to_be_updated, securityMode, run_id: int = None): + Crawler.set_ids_and_datatype_of_parents_and_properties(to_be_updated) + logger.debug("UPDATE") + logger.debug(to_be_updated) if len(to_be_updated) > 0: - db.Container().extend(to_be_updated).update() + if securityMode.value > SecurityMode.INSERT.value: + db.Container().extend(to_be_updated).update() + elif run_id is not None: + update_cache = UpdateCache() + update_cache.insert(to_be_updated, run_id) def _synchronize(self, target_data: list[db.Record], commit_changes: bool = True): """ @@ -794,11 +841,38 @@ class Crawler(object): self.remove_unnecessary_updates(to_be_updated, identified_records) if commit_changes: - self.execute_inserts_in_list(to_be_inserted) - self.execute_updates_in_list(to_be_updated) + self.execute_inserts_in_list(to_be_inserted, self.securityMode, self.run_id) + self.execute_updates_in_list(to_be_updated, self.securityMode, self.run_id) + + update_cache = UpdateCache() + pending_changes = update_cache.get_updates(self.run_id) + + if pending_changes: + Crawler.inform_about_pending_changes( + pending_changes, self.run_id, self.crawled_directory) return (to_be_inserted, to_be_updated) + @staticmethod + def inform_about_pending_changes(pending_changes, run_id, path): + # Sending an Email with a link to a form to authorize updates is + # only done in SSS mode + + if "SHARED_DIR" in os.environ: + filename = OldCrawler.save_form([el[3] for el in pending_changes], path, run_id) + OldCrawler.send_mail([el[3] for el in pending_changes], filename) + + for i, el in enumerate(pending_changes): + + logger.debug( + """ +UNAUTHORIZED UPDATE ({} of {}): +____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) + logger.info("There were unauthorized changes (see above). An " + "email was sent to the curator.\n" + "You can authorize the updates by invoking the crawler" + " with the run id: {rid}\n".format(rid=run_id)) + @staticmethod def debug_build_usage_tree(converter: Converter): res: dict[str, dict[str, Any]] = { @@ -932,7 +1006,8 @@ def main(crawled_directory_path: str, debug: bool = False, provenance_file: str = None, dry_run: bool = False, - prefix: str = ""): + prefix: str = "", + securityMode: int = SecurityMode.UPDATE): """ Parameters @@ -951,13 +1026,15 @@ def main(crawled_directory_path: str, do not commit any chnages to the server prefix : str remove the given prefix from file paths + securityMode : int + securityMode of Crawler Returns ------- return_value : int 0 if successful """ - crawler = Crawler(debug=debug) + crawler = Crawler(debug=debug, securityMode=securityMode) crawler.crawl_directory(crawled_directory_path, cfood_file_name) if provenance_file is not None: crawler.save_debug_data(provenance_file) @@ -1025,7 +1102,9 @@ def parse_args(): parser.add_argument("crawled_directory_path", help="The subtree of files below the given path will " "be considered. Use '/' for everything.") - + parser.add_argument("-s", "--security-mode", choices=["retrieve", "insert", "update"], + help="Determines whether entities may only be read from the server, or " + "whether inserts or even updates may be done.") parser.add_argument("-n", "--dry-run", action="store_true", help="Create two files dry.yml to show" "what would actually be committed without doing the synchronization.") @@ -1044,6 +1123,17 @@ def parse_args(): if __name__ == "__main__": args = parse_args() + + conlogger = logging.getLogger("connection") + conlogger.setLevel(level=logging.ERROR) + + # logging config for local execution + logger.addHandler(logging.StreamHandler(sys.stdout)) + if args.debug: + logger.setLevel(logging.DEBUG) + else: + logger.setLevel(logging.INFO) + sys.exit(main( args.crawled_directory_path, args.cfood_file_name, @@ -1051,5 +1141,8 @@ if __name__ == "__main__": args.debug, args.provenance, args.dry_run, - args.prefix + args.prefix, + {"retrieve": SecurityMode.RETRIEVE, + "insert": SecurityMode.INSERT, + "update": SecurityMode.UPDATE}[args.security_mode] )) diff --git a/src/caoscrawler/identifiable_adapters.py b/src/caoscrawler/identifiable_adapters.py index ce3a29b7484de83a17f27c27f3373ab91df70c9a..0b00cbeaefe42bcf600db735d27c67571ca6a79b 100644 --- a/src/caoscrawler/identifiable_adapters.py +++ b/src/caoscrawler/identifiable_adapters.py @@ -27,8 +27,10 @@ import yaml from datetime import datetime import caosdb as db +import logging from abc import abstractmethod, ABCMeta from .utils import has_parent +logger = logging.getLogger(__name__) def convert_value(value): @@ -202,7 +204,9 @@ class IdentifiableAdapter(metaclass=ABCMeta): if record_prop is None: # TODO: how to handle missing values in identifiables # raise an exception? - raise NotImplementedError() + raise NotImplementedError( + f"RECORD\n{record}\nPROPERTY\n{prop.name}" + ) newval = record_prop.value if isinstance(record_prop.value, db.Entity): newval = self.resolve_reference(record_prop.value) diff --git a/unittests/simulated_server_data.py b/unittests/simulated_server_data.py new file mode 100644 index 0000000000000000000000000000000000000000..6a523dbb06397b380510f72502a76cc6bda5f06c --- /dev/null +++ b/unittests/simulated_server_data.py @@ -0,0 +1,24 @@ + +import caosdb as db +data_model = {"person": (db.RecordType(id=10001, name="Person") + .add_property(name="first_name") + .add_property(name="last_name")), + "measurement": (db.RecordType(id=10002, name="Measurement") + .add_property(name="identifier") + .add_property(name="date") + .add_property(name="project")), + "project": (db.RecordType(id=10003, name="Project") + .add_property(name="date") + .add_property(name="identifier")), + "first_name": db.Property(name="first_name", datatype=db.TEXT, id=10004), + "responsible": db.Property(name="responsible", datatype="Person", id=10005), + "last_name": db.Property(name="last_name", datatype=db.TEXT, id=10006), + "identifier": db.Property(name="identifier", datatype=db.TEXT, id=10007), + "date": db.Property(name="date", datatype=db.DATETIME, id=10008), + } +existing_data = { +} + +full_data = {} +full_data.update(data_model) +full_data.update(existing_data) diff --git a/unittests/test_converters.py b/unittests/test_converters.py index ec45707fd97e9ab6666843b77e5c86b9a7d98531..7a6987b8b3fae9d747f2440de202df5d10a34cc0 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -1,11 +1,10 @@ #!/usr/bin/env python3 # encoding: utf-8 # -# ** header v3.0 # This file is a part of the CaosDB Project. # -# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> -# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# Copyright (C) 2021,2022 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021,2022 Henrik tom Wörden <h.tomwoerden@indiscale.com> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -20,8 +19,6 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. # -# ** end header -# """ test the converters module @@ -275,4 +272,3 @@ def test_variable_replacement(): assert handle_value(["a", "b"], values) == (["a", "b"], "single") assert handle_value(["$a", "$b"], values) == (["4", "68"], "single") - diff --git a/unittests/test_tool.py b/unittests/test_tool.py index d0ef74daf8af2255688da299b2705cacc216c920..b86bc7b82113e2b357c6cf6fe16594a7e162ce8b 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -3,11 +3,14 @@ # Adapted from check-sfs # A. Schlemmer, 06/2021 -from caoscrawler import Crawler +from caoscrawler.crawl import Crawler, SecurityMode from caoscrawler.structure_elements import File, DictTextElement, DictListElement from caoscrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter +from simulated_server_data import full_data from functools import partial from copy import deepcopy +from unittest.mock import patch +import caosdb.common.models as dbmodels from unittest.mock import MagicMock, Mock from os.path import join, dirname, basename import yaml @@ -536,3 +539,137 @@ def test_replace_entities_with_ids(crawler): assert a.get_property("A").value == 12345 assert a.get_property("B").value == 12345 assert a.get_property("C").value == [12345, 233324] + + +def mock_get_entity_by_name(name): + candidates = [el for el in full_data.values() if el.name.lower() == name.lower()] + if len(candidates) > 0: + return candidates[0] + else: + return None + + +def prepare_crawler_with_sec_mode(mode, ident): + crawler = Crawler(debug=True, securityMode=mode) + crawler.crawl_directory(rfp("test_directories", "examples_article"), + rfp("scifolder_cfood.yml")) + crawler.identifiableAdapter = ident + + return crawler + + +def reset_mocks(mocks): + for mock in mocks: + mock.reset_mock() + + +def change_identifiable_prop(ident): + # the checks in here are only to make sure we change the record as we intend to + meas = ident._records[-2] + assert meas.parents[0].name == "Measurement" + resps = meas.properties[0] + assert resps.name == "date" + # change one element; This changes the date which is part of the identifiable + resps.value = "2022-01-04" + + +def change_non_identifiable_prop(ident): + # the checks in here are only to make sure we change the record as we intend to + meas = ident._records[-1] + assert meas.parents[0].name == "Measurement" + resps = meas.properties[-1] + assert resps.name == "responsible" + assert len(resps.value) == 2 + # change one element; This removes a responsible which is not part of the identifiable + del resps.value[-1] + + +@patch("caoscrawler.crawl.Crawler._get_entity_by_name", + new=Mock(side_effect=mock_get_entity_by_name)) +@patch("caoscrawler.crawl.db.Container.insert") +@patch("caoscrawler.crawl.db.Container.update") +@patch("caoscrawler.crawl.UpdateCache.insert") +def test_security_mode(updateCacheMock, upmock, insmock, ident): + records_backup = deepcopy(ident._records) + + # trivial case: nothing to do + crawler = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident) + crawler.synchronize(commit_changes=True) + assert crawler.run_id is not None + insmock.assert_not_called() + upmock.assert_not_called() + updateCacheMock.assert_not_called() + + # RETRIEVE: insert only + crawler = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident) + # remove one element + del ident._records[-1] + # insert forbidden + with pytest.raises(RuntimeError) as excinfo: + crawler.synchronize(commit_changes=True) + assert crawler.run_id is not None + insmock.assert_not_called() + upmock.assert_not_called() + # as long as caching of inserts is not implemented this is not called + updateCacheMock.assert_not_called() + # reset counts + reset_mocks([updateCacheMock, insmock, upmock]) + # restore original ident + ident._records = deepcopy(records_backup) + + # RETRIEVE: update only + crawler = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident) + # change one element + change_non_identifiable_prop(ident) + crawler.synchronize(commit_changes=True) + assert crawler.run_id is not None + insmock.assert_not_called() + upmock.assert_not_called() + assert updateCacheMock.call_count == 1 + # reset counts + reset_mocks([updateCacheMock, insmock, upmock]) + # restore original ident + ident._records = deepcopy(records_backup) + + # INSERT: insert only + crawler = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident) + # remove one element + del ident._records[-1] + crawler.synchronize(commit_changes=True) + assert crawler.run_id is not None + insmock.assert_called_once() + upmock.assert_not_called() + updateCacheMock.assert_not_called() + # reset counts + reset_mocks([updateCacheMock, insmock, upmock]) + # restore original ident + ident._records = deepcopy(records_backup) + + # INSERT: update only + crawler = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident) + # change one element + change_non_identifiable_prop(ident) + crawler.synchronize(commit_changes=True) + assert crawler.run_id is not None + insmock.assert_not_called() + upmock.assert_not_called() + updateCacheMock.assert_called_once() + # reset counts + reset_mocks([updateCacheMock, insmock, upmock]) + # restore original ident + ident._records = deepcopy(records_backup) + + # INSERT: insert and update + crawler = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident) + # change two elements + change_non_identifiable_prop(ident) + change_identifiable_prop(ident) + crawler.synchronize(commit_changes=True) + assert crawler.run_id is not None + insmock.asser_called_once() + upmock.assert_not_called() + updateCacheMock.assert_called_once() + # reset counts + reset_mocks([updateCacheMock, insmock, upmock]) + # restore original ident + ident._records = deepcopy(records_backup)