diff --git a/integrationtests/basic_example/test_basic.py b/integrationtests/basic_example/test_basic.py index 20c61e28028708a0eb375329b6265c7a478335be..ac8c62382b6b38a0686518932f5b851d60d584cd 100755 --- a/integrationtests/basic_example/test_basic.py +++ b/integrationtests/basic_example/test_basic.py @@ -140,7 +140,7 @@ def test_single_insertion(clear_database, usemodel, crawler, ident): # Do a second run on the same data, there should be no changes: crawler = Crawler(identifiableAdapter=ident) crawled_data, debug_tree = crawler.crawl_directory(rfp("../../unittests/test_directories", "examples_article"), - rfp("../../unittests/scifolder_cfood.yml")) + rfp("../../unittests/scifolder_cfood.yml")) ins, ups = crawler.synchronize(crawled_data) assert len(ins) == 0 assert len(ups) == 0 diff --git a/src/caoscrawler/debug_tree.py b/src/caoscrawler/debug_tree.py index 58bda2cde2c93915935a87c54120178374b64881..79701773a5cece1747878c45bff2e394ec0f7f6b 100644 --- a/src/caoscrawler/debug_tree.py +++ b/src/caoscrawler/debug_tree.py @@ -5,7 +5,7 @@ # This file is a part of the CaosDB Project. # # Copyright (C) 2023 Alexander Schlemmer <alexander.schlemmer@ds.mpg.de> -# +# # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as diff --git a/src/caoscrawler/scanner.py b/src/caoscrawler/scanner.py index 9e79d813cd5f407c61ea49f94970c7cc513d12f9..18333d6d240023cf7e2561a53b34d1920433f071 100644 --- a/src/caoscrawler/scanner.py +++ b/src/caoscrawler/scanner.py @@ -5,7 +5,7 @@ # This file is a part of the CaosDB Project. # # Copyright (C) 2023 Alexander Schlemmer <alexander.schlemmer@ds.mpg.de> -# +# # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -83,6 +83,7 @@ def load_definition(crawler_definition_path: str): return _resolve_validator_paths(crawler_definition, crawler_definition_path) + def _load_definition_from_yaml_dict(crawler_definitions: list[dict]): """Load crawler definitions from a list of (yaml) dicts `crawler_definitions` which contains either one or two documents. @@ -222,6 +223,7 @@ def initialize_converters(crawler_definition: dict, converter_registry: dict): # Main scanner function: # -------------------------------------------------------------------------------- + def scanner(items: list[StructureElement], converters: list[Converter], general_store: Optional[GeneralStore] = None, @@ -351,15 +353,13 @@ def scanner(items: list[StructureElement], return crawled_data, debug_tree - - # -------------------------------------------------------------------------------- # Main scanning interface functions: # -------------------------------------------------------------------------------- def scan_directory(dirname: str, crawler_definition_path: str, - restricted_path: Optional[list[str]] = None): + restricted_path: Optional[list[str]] = None): """ Crawl a single directory. Formerly known as "crawl_directory". @@ -396,7 +396,7 @@ def scan_directory(dirname: str, crawler_definition_path: str, crawler_definition, converter_registry, restricted_path=restricted_path) - + def scan_structure_elements(items: Union[list[StructureElement], StructureElement], crawler_definition: dict, diff --git a/unittests/test_table_converter.py b/unittests/test_table_converter.py index 4db6f26ee6d23606ddeab2fcb3805cf33965e098..6a776fa2a633ae5c7bb21163732abec467e09f8b 100644 --- a/unittests/test_table_converter.py +++ b/unittests/test_table_converter.py @@ -95,7 +95,7 @@ def dircheckstr(*pathcomponents): def crawler(): crawler = Crawler() crawled_data, debug_tree = crawler.crawl_directory(rfp("test_directories", "examples_tables", "ExperimentalData"), - rfp("test_directories", "examples_tables", "crawler_for_tables.yml")) + rfp("test_directories", "examples_tables", "crawler_for_tables.yml")) return crawler, crawled_data, debug_tree diff --git a/unittests/test_tool.py b/unittests/test_tool.py index fc664d138b44af7a8aa51497ccf53cc27cdc95d9..d0bb108453f1e390dc72980baed99e4b469e3edb 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -117,7 +117,7 @@ def test_record_structure_generation(crawler): # There seems to be code duplication debug_tree = crawler[2] - + subd = debug_tree.debug_tree[dircheckstr("DataAnalysis")] subc = debug_tree.debug_metadata["copied"][dircheckstr("DataAnalysis")] assert len(subd) == 2 @@ -164,11 +164,11 @@ def test_record_structure_generation(crawler): assert subc[0]["identifier"] is False subd = debug_tree.debug_tree[dircheckstr("DataAnalysis", - "2020_climate-model-predict", - "2020-02-08_prediction-errors")] + "2020_climate-model-predict", + "2020-02-08_prediction-errors")] subc = debug_tree.debug_metadata["copied"][dircheckstr("DataAnalysis", - "2020_climate-model-predict", - "2020-02-08_prediction-errors")] + "2020_climate-model-predict", + "2020-02-08_prediction-errors")] assert len(subd[0]) == 12 assert subd[0]["date"] == "2020-02-08" assert subd[0]["identifier"] == "prediction-errors" @@ -536,7 +536,7 @@ def test_has_missing_object_in_references(crawler): Identifiable(name="D", record_type="RTD", properties={'d': 123, 'e': a}), []) crawler[0].add_to_remote_missing_cache(a, Identifiable(name="C", record_type="RTC", - properties={'d': 12311})) + properties={'d': 12311})) # one ref with id one with Entity without id but in cache -> check assert crawler[0]._has_missing_object_in_references( Identifiable(name="D", record_type="RTD", properties={'d': 123, 'e': a}), []) @@ -548,23 +548,23 @@ def test_has_missing_object_in_references(crawler): @pytest.mark.xfail() def test_references_entities_without_ids(crawler, ident): assert not crawler[0]._has_reference_value_without_id(db.Record().add_parent("Person") - .add_property('last_name', 123) - .add_property('first_name', 123)) + .add_property('last_name', 123) + .add_property('first_name', 123)) # id and rec with id assert not crawler[0]._has_reference_value_without_id(db.Record().add_parent("Person") - .add_property('first_name', 123) - .add_property('last_name', - db.Record(id=123))) + .add_property('first_name', 123) + .add_property('last_name', + db.Record(id=123))) # id and rec with id and one unneeded prop assert crawler[0]._has_reference_value_without_id(db.Record().add_parent("Person") - .add_property('first_name', 123) - .add_property('stuff', db.Record()) - .add_property('last_name', db.Record(id=123))) + .add_property('first_name', 123) + .add_property('stuff', db.Record()) + .add_property('last_name', db.Record(id=123))) # one identifying prop is missing assert crawler[0]._has_reference_value_without_id(db.Record().add_parent("Person") - .add_property('first_name', 123) - .add_property('last_name', db.Record())) + .add_property('first_name', 123) + .add_property('last_name', db.Record())) def test_replace_entities_with_ids(crawler): diff --git a/unittests/test_tool_extended.py b/unittests/test_tool_extended.py index a9266b85367e10cd6460ea05af5eaac2104a748c..4cb7286f91df577e7f17a354bd54547b3802c55d 100644 --- a/unittests/test_tool_extended.py +++ b/unittests/test_tool_extended.py @@ -71,8 +71,8 @@ def crawler(): def test_file_structure_generation(crawler): sd = crawler[2].debug_tree[dircheckstr("SimulationData", - "2020_climate-model-predict", "2020-02-01", - "README.md", structure_element_type="File")] + "2020_climate-model-predict", "2020-02-01", + "README.md", structure_element_type="File")] assert sd[1]["ReadmeFile"].role == "File" assert len(sd[1]["ReadmeFile"].path) > 0 assert len(sd[1]["ReadmeFile"].file) > 0 diff --git a/unittests/test_variable_substitutions.py b/unittests/test_variable_substitutions.py index 90534b5efac7b2c06c39e6c8f327570f68bd11cd..8680d792973d1f6c9aac2fe2ff0229edfa495d57 100644 --- a/unittests/test_variable_substitutions.py +++ b/unittests/test_variable_substitutions.py @@ -36,7 +36,7 @@ def dircheckstr(element_type, *pathcomponents): def crawler(): crawler = Crawler() crawled_data, debug_tree = crawler.crawl_directory(rfp("test_directories", "example_substitutions", "ExperimentalData"), - rfp("test_directories", "example_substitutions", "substitutions.yml")) + rfp("test_directories", "example_substitutions", "substitutions.yml")) return crawler, crawled_data, debug_tree @@ -44,8 +44,8 @@ def crawler(): def crawler_2(): crawler = Crawler() crawled_data, debug_tree = crawler.crawl_directory(rfp("test_directories", "example_substitutions", "ExperimentalData"), - rfp("test_directories", "example_substitutions", - "substitutions_parents.yml")) + rfp("test_directories", "example_substitutions", + "substitutions_parents.yml")) return crawler, crawled_data, debug_tree