diff --git a/integrationtests/basic_example/test.py b/integrationtests/basic_example/test.py index 2c972fa96b1dc768f29bb37e509ca87ebbd91575..cecd6533669fd9fb75124faf758efeae8b8d9778 100755 --- a/integrationtests/basic_example/test.py +++ b/integrationtests/basic_example/test.py @@ -102,8 +102,8 @@ def crawler_extended(ident): cr = Crawler(debug=True, identifiableAdapter=ident) crawl_standard_test_directory(cr, cfood="scifolder_extended.yml") # correct paths for current working directory - fileList = [r for r in cr.targetData if r.role == "File"] - for f in fileList: + file_list = [r for r in cr.target_data if r.role == "File"] + for f in file_list: f.file = rfp("..", "unittests", "test_directories", "examples_article", f.file) return cr @@ -156,7 +156,7 @@ def test_insertion(clear_database, usemodel, ident, crawler): # Do a second run on the same data, there should a new insert: cr = Crawler(debug=True, identifiableAdapter=ident) crawl_standard_test_directory(cr, "example_insert") - assert len(cr.targetData) == 3 + assert len(cr.target_data) == 3 ins, ups = cr.synchronize() assert len(ins) == 1 assert len(ups) == 0 @@ -164,7 +164,7 @@ def test_insertion(clear_database, usemodel, ident, crawler): # Do it again to check whether nothing is changed: cr = Crawler(debug=True, identifiableAdapter=ident) crawl_standard_test_directory(cr, "example_insert") - assert len(cr.targetData) == 3 + assert len(cr.target_data) == 3 ins, ups = cr.synchronize() assert len(ins) == 0 assert len(ups) == 0 @@ -179,9 +179,9 @@ def test_insertion_and_update(clear_database, usemodel, ident, crawler): cr = Crawler(debug=True, identifiableAdapter=ident) crawl_standard_test_directory(cr, "example_overwrite_1") - # print(cr.targetData) + # print(cr.target_data) # cr.save_debug_data(rfp("provenance.yml")) - assert len(cr.targetData) == 3 + assert len(cr.target_data) == 3 ins, ups = cr.synchronize() assert len(ins) == 0 assert len(ups) == 1 @@ -196,7 +196,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): crawl_standard_test_directory(cr) # Test the addition of a single property: - l = cr.targetData + l = cr.target_data for record in l: if (record.parents[0].name == "Measurement" and record.get_property("date").value == "2020-01-03"): @@ -212,7 +212,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): # Test the change within one property: cr = Crawler(debug=True, identifiableAdapter=ident) crawl_standard_test_directory(cr) - l = cr.targetData + l = cr.target_data for record in l: if (record.parents[0].name == "Measurement" and record.get_property("date").value == "2020-01-03"): @@ -226,7 +226,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): # Changing the date should result in a new insertion: cr = Crawler(debug=True, identifiableAdapter=ident) crawl_standard_test_directory(cr) - l = cr.targetData + l = cr.target_data for record in l: if (record.parents[0].name == "Measurement" and record.get_property("date").value == "2020-01-03"): @@ -243,23 +243,23 @@ def test_file_insertion_dry(clear_database, usemodel, ident): crawler_extended = Crawler(debug=True, identifiableAdapter=ident) crawl_standard_test_directory( crawler_extended, cfood="scifolder_extended.yml") - fileList = [r for r in crawler_extended.targetData if r.role == "File"] - assert len(fileList) == 11 + file_list = [r for r in crawler_extended.target_data if r.role == "File"] + assert len(file_list) == 11 - for f in fileList: + for f in file_list: assert f.path.endswith("README.md") assert f.path == f.file ins, ups = crawler_extended.synchronize(commit_changes=False) assert len(ups) == 0 - fileList_ins = [r for r in ins if r.role == "File"] - assert len(fileList_ins) == 11 + file_list_ins = [r for r in ins if r.role == "File"] + assert len(file_list_ins) == 11 def test_file_insertion(clear_database, usemodel, ident, crawler_extended): ins, ups = crawler_extended.synchronize(commit_changes=True) - fileList_ins = [r for r in ins if r.role == "File"] - assert len(fileList_ins) == 11 + file_list_ins = [r for r in ins if r.role == "File"] + assert len(file_list_ins) == 11 assert db.execute_query("COUNT File") > 0 @@ -274,13 +274,13 @@ def test_file_insertion(clear_database, usemodel, ident, crawler_extended): def test_file_update(clear_database, usemodel, ident, crawler_extended): ins1, ups1 = crawler_extended.synchronize(commit_changes=True) - fileList_ins = [r for r in ins1 if r.role == "File"] + file_list_ins = [r for r in ins1 if r.role == "File"] cr = Crawler(debug=True, identifiableAdapter=ident) crawl_standard_test_directory(cr, cfood="scifolder_extended.yml") - fileList = [r for r in cr.targetData if r.role == "File"] - for f in fileList: + file_list = [r for r in cr.target_data if r.role == "File"] + for f in file_list: f.file = rfp("..", "unittests", "test_directories", "examples_article", f.file) ins2, ups2 = cr.synchronize(commit_changes=True) @@ -295,8 +295,8 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended): cr2 = Crawler(debug=True, identifiableAdapter=ident) crawl_standard_test_directory(cr2, cfood="scifolder_extended2.yml") - fileList = [r for r in cr2.targetData if r.role == "File"] - for f in fileList: + file_list = [r for r in cr2.target_data if r.role == "File"] + for f in file_list: f.file = rfp("..", "unittests", "test_directories", "examples_article", f.file) ins3, ups3 = cr2.synchronize(commit_changes=True) @@ -309,4 +309,4 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended): # TODO: Implement file update checks (based on checksum) # Add test with actual file update: # assert len(ins2) == 0 - # assert len(ups2) == len(fileList_ins) + # assert len(ups2) == len(file_list_ins) diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 3458b32bd51dd005d81af19a7d6c2d24f490fd19..859fe3fc52619d09a638b13c350cd0d947cc2296 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -102,7 +102,7 @@ def check_identical(record1: db.Entity, record2: db.Entity, ignore_id=False): return False for attribute in ("datatype", "importance", "unit"): # only make an update for those attributes if there is a value difference and - # the value in the targetData is not None + # the value in the target_data is not None if attribute in comp[0]["properties"][key]: attr_val = comp[0]["properties"][key][attribute] other_attr_val = (comp[1]["properties"][key][attribute] @@ -370,7 +370,7 @@ class Crawler(object): Parameters ---------- items: list - A list of structure elements (or a single StructureElemen) that is used for + A list of structure elements (or a single StructureElement) that is used for generating the initial items for the crawler. This could e.g. be a Directory. crawler_definition : dict A dictionary representing the crawler definition, possibly from a yaml @@ -378,7 +378,7 @@ class Crawler(object): Returns ------- - targetData : list + target_data : list the final list with the target state of Records. """ @@ -393,7 +393,7 @@ class Crawler(object): local_converters = Crawler.create_local_converters(crawler_definition, converter_registry) # This recursive crawling procedure generates the update list: - self.targetData: list[db.Record] = [] + self.target_data: list[db.Record] = [] self._crawl(items, self.global_converters, local_converters, self.generalStore, self.recordStore, [], []) @@ -401,7 +401,7 @@ class Crawler(object): if self.debug: self.debug_converters = self.global_converters + local_converters - return self.targetData + return self.target_data def synchronize(self, commit_changes: bool = True): """ @@ -411,7 +411,7 @@ class Crawler(object): # After the crawling, the actual synchronization with the database, based on the # update list is carried out: - return self._synchronize(self.targetData, commit_changes) + return self._synchronize(self.target_data, commit_changes) def can_be_checked_externally(self, record: db.Record): """ @@ -703,7 +703,7 @@ class Crawler(object): el.value[index] = val.id @staticmethod - def remove_unnecessary_updates(targetData: list[db.Record], + def remove_unnecessary_updates(target_data: list[db.Record], identified_records: list[db.Record]): """ checks whether all relevant attributes (especially Property values) are equal @@ -713,15 +713,15 @@ class Crawler(object): update list without unecessary updates """ - if len(targetData) != len(identified_records): + if len(target_data) != len(identified_records): raise RuntimeError("The lists of updates and of identified records need to be of the " "same length!") # TODO this can now easily be changed to a function without side effect - for i in reversed(range(len(targetData))): - identical = check_identical(targetData[i], identified_records[i]) + for i in reversed(range(len(target_data))): + identical = check_identical(target_data[i], identified_records[i]) if identical: - del targetData[i] + del target_data[i] continue else: pass @@ -754,11 +754,11 @@ class Crawler(object): if len(to_be_updated) > 0: db.Container().extend(to_be_updated).update() - def _synchronize(self, targetData: list[db.Record], commit_changes: bool = True): + def _synchronize(self, target_data: list[db.Record], commit_changes: bool = True): """ This function applies several stages: - 1) Retrieve identifiables for all records in targetData. - 2) Compare targetData with existing records. + 1) Retrieve identifiables for all records in target_data. + 2) Compare target_data with existing records. 3) Insert and update records based on the set of identified differences. This function makes use of an IdentifiableAdapter which is used to retrieve @@ -773,7 +773,7 @@ class Crawler(object): if self.identifiableAdapter is None: raise RuntimeError("Should not happen.") - to_be_inserted, to_be_updated = self.split_into_inserts_and_updates(targetData) + to_be_inserted, to_be_updated = self.split_into_inserts_and_updates(target_data) # TODO: refactoring of typo for el in to_be_updated: @@ -903,7 +903,7 @@ class Crawler(object): # to the general update container. scoped_records = recordStore.get_records_current_scope() for record in scoped_records: - self.targetData.append(record) + self.target_data.append(record) # TODO: the scoped variables should be cleaned up as soon if the variables # are no longer in the current scope. This can be implemented as follows, @@ -916,7 +916,7 @@ class Crawler(object): # del recordStore[name] # del generalStore[name] - return self.targetData + return self.target_data def main(crawled_directory_path: str, @@ -971,7 +971,7 @@ def main(crawled_directory_path: str, "update": updates})) else: rtsfinder = dict() - for elem in crawler.targetData: + for elem in crawler.target_data: if isinstance(elem, db.File): # correct the file path: # elem.file = os.path.join(args.path, elem.file) diff --git a/unittests/test_tool.py b/unittests/test_tool.py index d6187d18c440ad8aca2fc55b391aef38a4e563d9..5293200f39956011705f0d43f1aeac7ffdd2a145 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -173,7 +173,7 @@ def test_record_structure_generation(crawler): def test_ambigious_records(crawler, ident): ident.get_records().clear() - ident.get_records().extend(crawler.targetData) + ident.get_records().extend(crawler.target_data) r = ident.get_records() id_r0 = ident.get_identifiable(r[0]) with raises(RuntimeError, match=".*unambigiously.*"): @@ -195,7 +195,7 @@ def test_crawler_update_list(crawler, ident): ) == 2 # The crawler contains lots of duplicates, because identifiables have not been resolved yet: - assert len(ident.get_records()) != len(crawler.targetData) + assert len(ident.get_records()) != len(crawler.target_data) # Check consistency: # Check whether identifiables retrieved from current identifiable store return the same results. @@ -327,7 +327,7 @@ def test_identifiable_adapter_no_identifiable(crawler, ident): insl, updl = crawler.synchronize() assert len(updl) == 0 - pers = [r for r in crawler.targetData if r.parents[0].name == "Person"] + pers = [r for r in crawler.target_data if r.parents[0].name == "Person"] # All persons are inserted, because they are not identifiable: assert len(insl) == len(pers)