diff --git a/integrationtests/basic_example/test.py b/integrationtests/basic_example/test_basic.py similarity index 91% rename from integrationtests/basic_example/test.py rename to integrationtests/basic_example/test_basic.py index 08b0d83fecded5f34dd639a8c57545724090e85a..b24a1c658cfc9e23ca0ba2de266161864cb6b66c 100755 --- a/integrationtests/basic_example/test.py +++ b/integrationtests/basic_example/test_basic.py @@ -42,8 +42,8 @@ from caosadvancedtools.models.parser import parse_model_from_yaml import yaml # TODO is not yet merged in caosadvancedtools -from caosadvancedtools.testutils import clear_database, set_test_key -set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2") +#from caosadvancedtools.testutils import clear_database, set_test_key +# set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2") def rfp(*pathcomponents): @@ -54,6 +54,11 @@ def rfp(*pathcomponents): return os.path.join(os.path.dirname(__file__), *pathcomponents) +@pytest.fixture +def clear_database(): + db.execute_query("FIND Entity").delete() + + @pytest.fixture def usemodel(): model = parse_model_from_yaml(rfp("model.yml")) @@ -87,8 +92,8 @@ def ident(): def crawl_standard_test_directory(cr: Crawler, subdir: str = "examples_article", cfood: str = "scifolder_cfood.yml"): - cr.crawl_directory(rfp("..", "unittests", "test_directories", subdir), - rfp("..", "unittests", cfood)) + cr.crawl_directory(rfp("..", "..", "unittests", "test_directories", subdir), + rfp("..", "..", "unittests", cfood)) @pytest.fixture @@ -105,12 +110,11 @@ def crawler_extended(ident): # correct paths for current working directory file_list = [r for r in cr.target_data if r.role == "File"] for f in file_list: - f.file = rfp("..", "unittests", "test_directories", - "examples_article", f.file) + f.file = rfp("..", "..", "unittests", "test_directories", f.file) return cr -def test_single_insertion(clear_database, usemodel, crawler): +def test_single_insertion(clear_database, usemodel, crawler, ident): ins, ups = crawler.synchronize() # This test also generates the file records.xml used in some of the unittesets: @@ -118,7 +122,7 @@ def test_single_insertion(clear_database, usemodel, crawler): for i in reversed(range(len(res))): if res[i].parents[0].name == "PyTestInfo": del res[i] - filename = rfp("..", "unittests", "records.xml") + filename = rfp("..", "..", "unittests", "records.xml") with open(filename, "w") as f: xml = res.to_xml() # Remove noscript and transaction benchmark: @@ -131,10 +135,9 @@ def test_single_insertion(clear_database, usemodel, crawler): assert len(ups) == 0 # Do a second run on the same data, there should be no changes: - crawler = Crawler(debug=True, identifiableAdapter=ident_adapt) - crawler.copy_attributes = Mock() - crawler.crawl_directory(rfp("../unittests/test_directories", "examples_article"), - rfp("../unittests/scifolder_cfood.yml")) + crawler = Crawler(debug=True, identifiableAdapter=ident) + crawler.crawl_directory(rfp("../../unittests/test_directories", "examples_article"), + rfp("../../unittests/scifolder_cfood.yml")) ins, ups = crawler.synchronize() assert len(ins) == 0 assert len(ups) == 0 @@ -271,7 +274,7 @@ def test_file_insertion_dry(clear_database, usemodel, ident): for f in file_list: assert f.path.endswith("README.md") - assert f.path == f.file + assert f.path[1:] == f.file ins, ups = crawler_extended.synchronize(commit_changes=False) assert len(ups) == 0 @@ -304,8 +307,7 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended): file_list = [r for r in cr.target_data if r.role == "File"] for f in file_list: - f.file = rfp("..", "unittests", "test_directories", - "examples_article", f.file) + f.file = rfp("..", "..", "unittests", "test_directories", f.file) ins2, ups2 = cr.synchronize(commit_changes=True) assert len(ups1) == 0 assert len(ups2) == 0 @@ -320,8 +322,7 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended): file_list = [r for r in cr2.target_data if r.role == "File"] for f in file_list: - f.file = rfp("..", "unittests", "test_directories", - "examples_article", f.file) + f.file = rfp("..", "..", "unittests", "test_directories", f.file) ins3, ups3 = cr2.synchronize(commit_changes=True) assert len(ups3) == 11 diff --git a/src/caoscrawler/__init__.py b/src/caoscrawler/__init__.py index 28ef97d421023ad41be65d9d0e6abac76fbef6fe..b65b9fd9d24b9519a52ca13d07e46c9d8f791a73 100644 --- a/src/caoscrawler/__init__.py +++ b/src/caoscrawler/__init__.py @@ -1 +1 @@ -from .crawl import Crawler +from .crawl import Crawler, SecurityMode diff --git a/unittests/scifolder_extended2.yml b/unittests/scifolder_extended2.yml index f1dfc2d4635b6956930343685c7b17ca4f2f1679..2e99896fdc357f3afd2b43a08829eec2e52c39c0 100644 --- a/unittests/scifolder_extended2.yml +++ b/unittests/scifolder_extended2.yml @@ -6,95 +6,99 @@ Definitions: type: Definitions #include "description.yml" -DataAnalysis: # name of the converter +Data: # name of the converter type: Directory - match: DataAnalysis - subtree: &template - project_dir: # name of the first subtree element which is a converter + match: (.*) + subtree: + DataAnalysis: # name of the converter type: Directory - match: (?P<date>.*?)_(?P<identifier>.*) - records: - Project: # this is an identifiable in this case - parents: - - Project # not needed as the name is equivalent - date: $date - identifier: $identifier - - subtree: - measurement: # new name for folders on the 3rd level + match: DataAnalysis + subtree: &template + project_dir: # name of the first subtree element which is a converter type: Directory - match: (?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2})(_(?P<identifier>.*))? + match: (?P<date>.*?)_(?P<identifier>.*) records: - Measurement: + Project: # this is an identifiable in this case + parents: + - Project # not needed as the name is equivalent date: $date identifier: $identifier - project: $Project + subtree: - README: - type: MarkdownFile # this is a subclass of converter File - # function signature: GeneralStore, StructureElement - # preprocessors: custom.caosdb.convert_values - match: ^README\.md$ - # how to make match case insensitive? - records: # this block is very verbose and intended to make sure that this - # file is inserted correctly (and can be supplemented with properties - # and / or parents), TODO: maybe there should be a shorthand - ReadmeFile: - parents: - - ProjectMarkdownReadme - role: File - path: $README - file: $README # this is automatically the relative path - # starting from the top level structure element - # of this element + measurement: # new name for folders on the 3rd level + type: Directory + match: (?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2})(_(?P<identifier>.*))? + records: Measurement: - ReadmeFile: $ReadmeFile - + date: $date + identifier: $identifier + project: $Project subtree: - description: - type: DictTextElement - match_value: (?P<description>.*) - match_name: description - records: + README: + type: MarkdownFile # this is a subclass of converter File + # function signature: GeneralStore, StructureElement + # preprocessors: custom.caosdb.convert_values + match: ^README\.md$ + # how to make match case insensitive? + records: # this block is very verbose and intended to make sure that this + # file is inserted correctly (and can be supplemented with properties + # and / or parents), TODO: maybe there should be a shorthand + ReadmeFile: + parents: + - ProjectMarkdownReadme + role: File + path: $README + file: $README # this is automatically the relative path + # starting from the top level structure element + # of this element Measurement: - description: $description - responsible_single: - type: DictTextElement - match_name: responsible - match_value: &person_regexp ((?P<first_name>.+) )?(?P<last_name>.+) - records: &responsible_records - Person: - first_name: $first_name - last_name: $last_name - Measurement: # this uses the reference to the above defined record - responsible: +$Person # each record also implicitely creates a variable - # with the same name. The "+" indicates, that - # this will become a list entry in list property - # "responsible" belonging to Measurement. - - responsible_list: - type: DictListElement - match_name: responsible + ReadmeFile: $ReadmeFile + subtree: - Person: - type: TextElement - match: *person_regexp - records: *responsible_records + description: + type: DictTextElement + match_value: (?P<description>.*) + match_name: description + records: + Measurement: + description: $description + responsible_single: + type: DictTextElement + match_name: responsible + match_value: &person_regexp ((?P<first_name>.+) )?(?P<last_name>.+) + records: &responsible_records + Person: + first_name: $first_name + last_name: $last_name + Measurement: # this uses the reference to the above defined record + responsible: +$Person # each record also implicitely creates a variable + # with the same name. The "+" indicates, that + # this will become a list entry in list property + # "responsible" belonging to Measurement. - # sources_list: - # type: DictListElement - # match_name: sources - # subtree: - # Source: - # type: TextElement - # match: &path ... ??? + responsible_list: + type: DictListElement + match_name: responsible + subtree: + Person: + type: TextElement + match: *person_regexp + records: *responsible_records -ExperimentalData: # name of the converter - type: Directory - match: ExperimentalData - subtree: *template + # sources_list: + # type: DictListElement + # match_name: sources + # subtree: + # Source: + # type: TextElement + # match: &path ... ??? -SimulationData: # name of the converter - type: Directory - match: SimulationData - subtree: *template + ExperimentalData: # name of the converter + type: Directory + match: ExperimentalData + subtree: *template + + SimulationData: # name of the converter + type: Directory + match: SimulationData + subtree: *template