diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index fe66581777c24ede7d35e3babea4ac492f12711b..701afba07ffc460efc13d0ea3aeea0ec054f45d7 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -184,7 +184,8 @@ class Crawler(object): Create a debugging information tree when set to True. The debugging information tree is a variable stored in self.debug_tree. It is a dictionary mapping directory entries - to a tuple of general stores and record stores which are valid for the directory scope. + to a tuple of general stores and record stores which are valid for + the directory scope. Furthermore, it is stored in a second tree named self.debug_copied whether the objects in debug_tree had been copied from a higher level in the hierarchy of the structureelements. @@ -239,7 +240,6 @@ class Crawler(object): crawler_definition = crawler_definitions[1] else: raise RuntimeError("Crawler definition must not contain more than two documents.") - # TODO: at this point this function can already load the cfood schema extensions # from the crawler definition and add them to the yaml schema that will be @@ -522,7 +522,7 @@ class Crawler(object): if (isinstance(p.value, list)): for el in p.value: if (isinstance(el, db.Entity) and el.id is None - and self.get_identified_record_from_local_cache(el) is None): + and self.get_identified_record_from_local_cache(el) is None): return False if (isinstance(p.value, db.Entity) and p.value.id is None and self.get_identified_record_from_local_cache(p.value) is None): @@ -976,7 +976,8 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) self.debug_tree[str(element)] = ( generalStore_copy.get_storage(), recordStore_copy.get_storage()) self.debug_metadata["copied"][str(element)] = ( - generalStore_copy.get_dict_copied(), recordStore_copy.get_dict_copied()) + generalStore_copy.get_dict_copied(), + recordStore_copy.get_dict_copied()) self.debug_metadata["usage"][str(element)].add( "/".join(converters_path + [converter.name])) mod_info = self.debug_metadata["provenance"] @@ -987,8 +988,9 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) record_identifier = record_name + \ "_" + str(internal_id) converter.metadata["usage"].add(record_identifier) - mod_info[record_identifier][prop_name] = (structure_elements_path + [element.get_name()], - converters_path + [converter.name]) + mod_info[record_identifier][prop_name] = ( + structure_elements_path + [element.get_name()], + converters_path + [converter.name]) self._crawl(children, global_converters, converter.converters, generalStore_copy, recordStore_copy, diff --git a/src/caoscrawler/macros/macro_yaml_object.py b/src/caoscrawler/macros/macro_yaml_object.py index 46e8affeb3c2547e6ec8f1aa7f6ecc7952c5aaa1..072c49b342777796a2755b480a34f48566627437 100644 --- a/src/caoscrawler/macros/macro_yaml_object.py +++ b/src/caoscrawler/macros/macro_yaml_object.py @@ -30,6 +30,7 @@ from typing import Any from copy import deepcopy from string import Template + @dataclass class MacroDefinition: """ @@ -42,6 +43,7 @@ class MacroDefinition: params: dict[str, Any] definition: Any + # This dictionary stores the macro definitions macro_store: dict[str, MacroDefinition] = dict() @@ -149,5 +151,5 @@ def macro_constructor(loader, node): else: # If there is no macro with that name, just keep that node: res[name] = params_setter - + return res diff --git a/unittests/test_macros.py b/unittests/test_macros.py index 8b26356641ad7e6f5e83ed462b93d8523f843627..fe47db888b518035a0b16bc82cd3f439aa16b60d 100644 --- a/unittests/test_macros.py +++ b/unittests/test_macros.py @@ -31,15 +31,18 @@ from tempfile import NamedTemporaryFile import yaml import pytest + @pytest.fixture def register_macros(): yaml.SafeLoader.add_constructor("!defmacro", defmacro_constructor) yaml.SafeLoader.add_constructor("!macro", macro_constructor) + @pytest.fixture def macro_store_reset(): macro_store.clear() + def test_macros(register_macros, macro_store_reset): dat = yaml.load(""" defs: @@ -65,6 +68,7 @@ testnode: assert "expanded_bla" not in dat["testnode"]["obl"] assert "bla" not in dat["testnode"]["obl"]["expanded_yea"] + def test_macro_list_replacment(register_macros, macro_store_reset): dat = yaml.load(""" defs: @@ -91,6 +95,7 @@ testnode: assert dat["testnode"]["obl"]["expanded_yea"]["blubb"][0] == "ok4" assert dat["testnode"]["obl"]["expanded_yea"]["blubb"][1]["yea"] == "$variable" + def test_multi_macros(register_macros, macro_store_reset): dat = yaml.load(""" defs: @@ -115,6 +120,7 @@ testnode: assert dat["testnode"]["obl"]["replaced2"] == "ok" assert dat["testnode"]["obl"]["replaced3"] == "ok" + def test_multi_macros_toplevel(register_macros, macro_store_reset): """ See: https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/23 @@ -159,6 +165,7 @@ def temp_file_load(txt: str): definition = c.load_definition(f.name) return definition + def test_load_definition(register_macros, macro_store_reset): txt = """ extroot: diff --git a/unittests/test_tool.py b/unittests/test_tool.py index 59573756fe61ef697976e480dd1550cb0ead0998..a0b8e675f79028b3b45aa248202658be22cfbf6f 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -33,7 +33,9 @@ def dircheckstr(*pathcomponents): """ Return the debug tree identifier for a given path. """ - return "caoscrawler.structure_elements.Directory: " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "examples_article", *pathcomponents) + return ("caoscrawler.structure_elements.Directory: " + basename( + join(*pathcomponents)) + ", " + rfp( + "test_directories", "examples_article", *pathcomponents)) @pytest.fixture @@ -87,7 +89,7 @@ def test_record_structure_generation(crawler): # The data analysis node creates one variable for the node itself: assert subd[0]["DataAnalysis"] == "examples_article/DataAnalysis" - assert subc[0]["DataAnalysis"] == False + assert subc[0]["DataAnalysis"] is False subd = crawler.debug_tree[dircheckstr( "DataAnalysis", "2020_climate-model-predict")] @@ -107,9 +109,9 @@ def test_record_structure_generation(crawler): assert subd[0]["Project"].__class__ == db.Record assert subd[0]["DataAnalysis"] == "examples_article/DataAnalysis" - assert subc[0]["DataAnalysis"] == True + assert subc[0]["DataAnalysis"] is True assert subd[0]["project_dir"] == "examples_article/DataAnalysis/2020_climate-model-predict" - assert subc[0]["project_dir"] == False + assert subc[0]["project_dir"] is False # Check the copy flags for the first level in the hierarchy: assert len(subc[0]) == 6 @@ -201,7 +203,8 @@ def test_crawler_update_list(crawler, ident): assert len(ident.get_records()) != len(crawler.target_data) # Check consistency: - # Check whether identifiables retrieved from current identifiable store return the same results. + # Check whether identifiables retrieved from current identifiable store return + # the same results. # take the first person in the list of records: for r in ident.get_records():