diff --git a/integrationtests/test_issues.py b/integrationtests/test_issues.py index 527b4c0cf67f483d5b61972a0104ff4fb673402d..d9dd4114eb089f53f83384c9b1a7670fd43984c6 100644 --- a/integrationtests/test_issues.py +++ b/integrationtests/test_issues.py @@ -24,6 +24,8 @@ from caoscrawler.crawl import Crawler from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter from caoscrawler.structure_elements import DictElement +from caoscrawler.scanner import create_converter_registry, scan_structure_elements + from caosdb.utils.register_tests import clear_database, set_test_key set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2") @@ -86,8 +88,8 @@ def test_issue_23(clear_database): ident.register_identifiable("TestType", db.RecordType().add_parent( name="TestType").add_property(name="identifying_prop")) - crawler = Crawler(debug=True, identifiableAdapter=ident) - converter_registry = crawler.load_converters(crawler_definition) + crawler = Crawler(identifiableAdapter=ident) + converter_registry = create_converter_registry(crawler_definition) # the dictionary to be crawled... test_dict = { @@ -95,7 +97,8 @@ def test_issue_23(clear_database): "prop_b": "something_else" } - records = crawler.start_crawling( + crawler.generate_run_id() + records, debug_tree = scan_structure_elements( DictElement("TestDict", test_dict), crawler_definition, converter_registry) assert len(records) == 1 @@ -109,7 +112,7 @@ def test_issue_23(clear_database): assert rec_crawled.get_property("prop_a") is None # synchronize with database and update the record - ins, ups = crawler.synchronize() + ins, ups = crawler.synchronize(records) assert len(ins) == 0 assert len(ups) == 1 diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 612215cdd1654250b3789a038af1d4ee7ed38389..6f16d52605f568fa03047d75632a5ce1c4f70c8f 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -203,6 +203,9 @@ class Crawler(object): if identifiableAdapter is not None: self.identifiableAdapter = identifiableAdapter + def generate_run_id(self): + self.run_id = uuid.uuid1() + def crawl_directory(self, crawled_directory: str, crawler_definition_path: str, @@ -212,7 +215,7 @@ class Crawler(object): """ self.crawled_directory = crawled_directory - self.run_id = uuid.uuid1() + self.generate_run_id() # TODO: This is not ideal yet, the data is just returned and needs to be # separately supplied to the synchronize function.