diff --git a/integrationtests/test_realworld_example.py b/integrationtests/test_realworld_example.py index 5ec2f3219625937e3d18f31eaaa2eb71566c75d7..19a9aca2eaabce59b9570ade9ad7bae6eb43f9b9 100644 --- a/integrationtests/test_realworld_example.py +++ b/integrationtests/test_realworld_example.py @@ -29,7 +29,7 @@ import os import caosdb as db -from caoscrawler.crawl import Crawler, main as crawler_main +from caoscrawler.crawl import Crawler, crawler_main from caoscrawler.converters import JSONFileConverter, DictConverter from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter from caoscrawler.structure_elements import File, JSONFile, Directory diff --git a/integrationtests/test_use_case_simple_presentation.py b/integrationtests/test_use_case_simple_presentation.py index f1c838d1aadf4cb8b51043a8a24b93eddf275c75..bf16ef3be7179372ace7d05d67ffee33890fcc3c 100644 --- a/integrationtests/test_use_case_simple_presentation.py +++ b/integrationtests/test_use_case_simple_presentation.py @@ -32,7 +32,7 @@ from subprocess import run import caosdb as db from caosadvancedtools.loadFiles import loadpath from caosadvancedtools.models import parser as parser -from caoscrawler.crawl import main as crawler_main +from caoscrawler.crawl import crawler_main # TODO(fspreck) Re-eneable once this is part of dev in advancedusertools. diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 8886c5f87f1556517acafc7bfa673e8a0d29c6e2..1e09d3d8cb0f9d04e7ecd4931eb8a7ea66f2ea96 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -823,7 +823,8 @@ class Crawler(object): if self.identifiableAdapter is None: raise RuntimeError("Should not happen.") - to_be_inserted, to_be_updated = self.split_into_inserts_and_updates(target_data) + to_be_inserted, to_be_updated = self.split_into_inserts_and_updates( + target_data) # TODO: refactoring of typo for el in to_be_updated: @@ -831,14 +832,17 @@ class Crawler(object): self.replace_entities_with_ids(el) identified_records = [ - self.identifiableAdapter.retrieve_identified_record_for_record(record) + self.identifiableAdapter.retrieve_identified_record_for_record( + record) for record in to_be_updated] # remove unnecessary updates from list by comparing the target records to the existing ones self.remove_unnecessary_updates(to_be_updated, identified_records) if commit_changes: - self.execute_inserts_in_list(to_be_inserted, self.securityMode, self.run_id) - self.execute_updates_in_list(to_be_updated, self.securityMode, self.run_id) + self.execute_inserts_in_list( + to_be_inserted, self.securityMode, self.run_id) + self.execute_updates_in_list( + to_be_updated, self.securityMode, self.run_id) update_cache = UpdateCache() pending_inserts = update_cache.get_inserts(self.run_id) @@ -859,7 +863,8 @@ class Crawler(object): # only done in SSS mode if "SHARED_DIR" in os.environ: - filename = OldCrawler.save_form([el[3] for el in pending_changes], path, run_id) + filename = OldCrawler.save_form( + [el[3] for el in pending_changes], path, run_id) OldCrawler.send_mail([el[3] for el in pending_changes], filename) for i, el in enumerate(pending_changes): @@ -870,7 +875,8 @@ UNAUTHORIZED UPDATE ({} of {}): ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) logger.info("There were unauthorized changes (see above). An " "email was sent to the curator.\n" - "You can authorize the " + ("inserts" if inserts else "updates") + "You can authorize the " + + ("inserts" if inserts else "updates") + " by invoking the crawler" " with the run id: {rid}\n".format(rid=run_id)) @@ -1001,14 +1007,14 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) return self.target_data -def main(crawled_directory_path: str, - cfood_file_name: str, - identifiables_definition_file: str = None, - debug: bool = False, - provenance_file: str = None, - dry_run: bool = False, - prefix: str = "", - securityMode: int = SecurityMode.UPDATE): +def crawler_main(crawled_directory_path: str, + cfood_file_name: str, + identifiables_definition_file: str = None, + debug: bool = False, + provenance_file: str = None, + dry_run: bool = False, + prefix: str = "", + securityMode: int = SecurityMode.UPDATE): """ Parameters @@ -1122,7 +1128,7 @@ def parse_args(): return parser.parse_args() -if __name__ == "__main__": +def main(): args = parse_args() conlogger = logging.getLogger("connection") @@ -1135,7 +1141,7 @@ if __name__ == "__main__": else: logger.setLevel(logging.INFO) - sys.exit(main( + sys.exit(crawler_main( args.crawled_directory_path, args.cfood_file_name, args.load_identifiables, @@ -1147,3 +1153,7 @@ if __name__ == "__main__": "insert": SecurityMode.INSERT, "update": SecurityMode.UPDATE}[args.security_mode] )) + + +if __name__ == "__main__": + main() diff --git a/tox.ini b/tox.ini index 5ab67e67cfef0b3cf0cf82d2d28de0fe11aca6a1..101904b7de43fba6f04cf65641f555d79b0b080a 100644 --- a/tox.ini +++ b/tox.ini @@ -9,6 +9,7 @@ deps = . # TODO: Make this f-branch sensitive git+https://gitlab.indiscale.com/caosdb/src/caosdb-pylib.git@dev git+https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools.git@dev -commands=py.test --cov=caosdb -vv {posargs} +commands= caosdb-crawler --help + py.test --cov=caosdb -vv {posargs} [flake8] max-line-length=100