diff --git a/integrationtests/model.yml b/integrationtests/model.yml index 8d6d7fa748b2e1ed716aab9215c6c521fb8b73d2..7e1a391186be6a01fb10d0b32e8516238012f374 100644 --- a/integrationtests/model.yml +++ b/integrationtests/model.yml @@ -85,3 +85,4 @@ Measurement: date: ReadmeFile: datatype: REFERENCE +ProjectMarkdownReadme: diff --git a/integrationtests/test.py b/integrationtests/test.py index f615f82c4b721d8a88320a75b3531f7f065485c3..57bc674a79af1d1ff11cecca6a6a7c6ba66ec1bc 100755 --- a/integrationtests/test.py +++ b/integrationtests/test.py @@ -102,6 +102,11 @@ def crawler(ident): def crawler_extended(ident): cr = Crawler(debug=True, identifiableAdapter=ident) crawl_standard_test_directory(cr, cfood="scifolder_extended.yml") + # correct paths for current working directory + updateList = cr.updateList + fileList = [r for r in updateList if r.role == "File"] + for f in fileList: + f.file = rfp("..", "unittests", "test_directories", "examples_article", f.file) return cr @@ -208,7 +213,9 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): assert len(ups) == 0 -def test_file_insertion_dry(clear_database, usemodel, ident, crawler_extended): +def test_file_insertion_dry(clear_database, usemodel, ident): + crawler_extended = Crawler(debug=True, identifiableAdapter=ident) + crawl_standard_test_directory(crawler_extended, cfood="scifolder_extended.yml") updateList = crawler_extended.updateList fileList = [r for r in updateList if r.role == "File"] assert len(fileList) == 11 @@ -224,14 +231,56 @@ def test_file_insertion_dry(clear_database, usemodel, ident, crawler_extended): def test_file_insertion(clear_database, usemodel, ident, crawler_extended): - # correct paths for current working directory - updateList = crawler_extended.updateList - fileList = [r for r in updateList if r.role == "File"] - for f in fileList: - f.file = rfp("..", "unittests", "test_directories", "examples_article", f.file) ins, ups = crawler_extended.synchronize(commit_changes=True) fileList_ins = [r for r in ins if r.role == "File"] assert len(fileList_ins) == 11 assert db.execute_query("COUNT File") > 0 + + # find record which references File does not seem to be possible + # retrieve ids of files: + files = db.execute_query("FIND File") + for f in files: + r = db.execute_query("FIND Record which references {}".format(f.id)) + assert len(r) == 1 + assert r[0].get_property("ReadmeFile").value == f.id + +def test_file_update(clear_database, usemodel, ident, crawler_extended): + ins1, ups1 = crawler_extended.synchronize(commit_changes=True) + fileList_ins = [r for r in ins1 if r.role == "File"] + + cr = Crawler(debug=True, identifiableAdapter=ident) + crawl_standard_test_directory(cr, cfood="scifolder_extended.yml") + + updateList = cr.updateList + fileList = [r for r in updateList if r.role == "File"] + for f in fileList: + f.file = rfp("..", "unittests", "test_directories", "examples_article", f.file) + ins2, ups2 = cr.synchronize(commit_changes=True) + assert len(ups1) == 0 + assert len(ups2) == 0 + + # Try adding a parent: + res = db.execute_query("Find File") + assert len(res) == 11 + assert len(res[0].parents) == 0 + + cr2 = Crawler(debug=True, identifiableAdapter=ident) + crawl_standard_test_directory(cr2, cfood="scifolder_extended2.yml") + + updateList = cr2.updateList + fileList = [r for r in updateList if r.role == "File"] + for f in fileList: + f.file = rfp("..", "unittests", "test_directories", "examples_article", f.file) + ins3, ups3 = cr2.synchronize(commit_changes=True) + assert len(ups3) == 11 + + res = db.execute_query("Find File") + assert len(res) == 11 + assert res[0].parents[0].name == "ProjectMarkdownReadme" + + # TODO: Implement file update checks (based on checksum) + # Add test with actual file update: + # assert len(ins2) == 0 + # assert len(ups2) == len(fileList_ins) diff --git a/src/newcrawler/converters.py b/src/newcrawler/converters.py index bf451e585f89214756aba46bd217d220f22d195e..e81a16f64e81c5b801c74733d8b7c53ae562c7b0 100644 --- a/src/newcrawler/converters.py +++ b/src/newcrawler/converters.py @@ -183,6 +183,8 @@ class Converter(object): if key in SPECIAL_PROPERTIES: # e.g. description, name, etc. # list mode does not work for them + if key == "path" and not propvalue.startswith(os.path.sep): + propvalue = os.path.sep + propvalue setattr(c_record, key, propvalue) else: diff --git a/unittests/scifolder_extended2.yml b/unittests/scifolder_extended2.yml new file mode 100644 index 0000000000000000000000000000000000000000..810923f2ef2f171e822df22d30bfb4f6e8f8d8ce --- /dev/null +++ b/unittests/scifolder_extended2.yml @@ -0,0 +1,101 @@ +Definitions: + type: Definitions + #include "description.yml" + +# Converter-Provenance +# DataAnalysis/project_dir/measurement/match/identifier +# Structure-Element-Provenance +# DataAnalysis/2020_SpeedOflight/2020-11-10_kram + +DataAnalysis: # name of the converter + type: Directory + match: DataAnalysis + subtree: &template + project_dir: # name of the first subtree element which is a converter + type: Directory + match: (?P<date>.*?)_(?P<identifier>.*) + records: + Project: # this is an identifiable in this case + parents: + - Project # not needed as the name is equivalent + date: $date + identifier: $identifier + + subtree: + measurement: # new name for folders on the 3rd level + type: Directory + match: (?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2})(_(?P<identifier>.*))? + records: + Measurement: + date: $date + identifier: $identifier + project: $Project + subtree: + README: + type: MarkdownFile # this is a subclass of converter File + # function signature: GeneralStore, StructureElement + # preprocessors: custom.caosdb.convert_values + match: ^README\.md$ + # how to make match case insensitive? + records: # this block is very verbose and intended to make sure that this + # file is inserted correctly (and can be supplemented with properties + # and / or parents), TODO: maybe there should be a shorthand + ReadmeFile: + parents: + - ProjectMarkdownReadme + role: File + path: $README + file: $README # this is automatically the relative path + # starting from the top level structure element + # of this element + Measurement: + ReadmeFile: $ReadmeFile + + subtree: + description: + type: DictTextElement + match_value: (?P<description>.*) + match_name: description + records: + Measurement: + description: $description + responsible_single: + type: DictTextElement + match_name: responsible + match_value: &person_regexp ((?P<first_name>.+) )?(?P<last_name>.+) + records: &responsible_records + Person: + first_name: $first_name + last_name: $last_name + Measurement: # this uses the reference to the above defined record + responsible: +$Person # each record also implicitely creates a variable + # with the same name. The "+" indicates, that + # this will become a list entry in list property + # "responsible" belonging to Measurement. + + responsible_list: + type: DictListElement + match_name: responsible + subtree: + Person: + type: TextElement + match: *person_regexp + records: *responsible_records + + # sources_list: + # type: DictListElement + # match_name: sources + # subtree: + # Source: + # type: TextElement + # match: &path ... ??? + +ExperimentalData: # name of the converter + type: Directory + match: ExperimentalData + subtree: *template + +SimulationData: # name of the converter + type: Directory + match: SimulationData + subtree: *template