diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py index 2a32f084b129d2ffe7ea4b1428752a317e095f83..be4a997c39a5977b878813dffec31eec3f1ad0bc 100644 --- a/src/newcrawler/crawl.py +++ b/src/newcrawler/crawl.py @@ -923,19 +923,11 @@ def crawler_main(args_path, crawler.save_debug_data(args_provenance) if args_load_identifiables is not None: - with open(args_load_identifiables, "r") as f: - identifiable_data = yaml.safe_load(f) ident = CaosDBIdentifiableAdapter() + ident.load_from_yaml_definition(args_load_identifiables) crawler.identifiableAdapter = ident - for k, v in identifiable_data.items(): - rt = db.RecordType() - rt.add_parent(k) - for pn in v: - rt.add_property(name=pn) - ident.register_identifiable(k, rt) - if args_dry_sync: ins, upd = crawler.synchronize(commit_changes=False) inserts = [str(i) for i in ins] diff --git a/src/newcrawler/identifiable_adapters.py b/src/newcrawler/identifiable_adapters.py index c1125ee1bdaba71ed4fa339fa74b379604293c98..47fd5324a4803c67d7c9f99448378e7b5f9241bd 100644 --- a/src/newcrawler/identifiable_adapters.py +++ b/src/newcrawler/identifiable_adapters.py @@ -23,6 +23,8 @@ # ** end header # +import yaml + from datetime import datetime import caosdb as db from abc import abstractmethod, ABCMeta @@ -412,6 +414,17 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter): def __init__(self): self._registered_identifiables = dict() + def load_from_yaml_definition(self, path: str): + """Load identifiables defined in a yaml file""" + with open(path, 'r') as yaml_f: + identifiable_data = yaml.safe_load(yaml_f) + + for key, value in identifiable_data.items(): + rt = db.RecordType().add_parent(key) + for prop_name in value: + rt.add_property(name=prop_name) + self.register_identifiable(key, rt) + def register_identifiable(self, name: str, definition: db.RecordType): self._registered_identifiables[name] = definition diff --git a/unittests/test_converters.py b/unittests/test_converters.py index 166f7d516687b5e2d8e8e60321de5084b38e060a..ab67a5c94df8b3e3e541017fc0ac57cceb2fa2fd 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -118,9 +118,13 @@ def testDirectoryConverter(converter_registry): def test_markdown_converter(converter_registry): - test_readme = File("README.md", rfp( - "test_directories", "examples_article", "DataAnalysis", - "2020_climate-model-predict", "2020-02-08_prediction-errors", "README.md")) + test_readme = File( + "README.md", + rfp( + "test_directories", "examples_article", "DataAnalysis", + "2020_climate-model-predict", "2020-02-08_prediction-errors", "README.md" + ) + ) converter = MarkdownFileConverter({ "match": "(.*)" @@ -155,8 +159,11 @@ def test_markdown_converter(converter_registry): assert children[0].name == "responsible" assert children[0].value.__class__ == str - test_readme2 = File("README.md", rfp("test_directories", "examples_article", - "ExperimentalData", "2020_SpeedOfLight", "2020-01-01_TimeOfFlight", "README.md")) + test_readme2 = File( + "README.md", + rfp("test_directories", "examples_article", + "ExperimentalData", "2020_SpeedOfLight", "2020-01-01_TimeOfFlight", "README.md") + ) m = converter.match(test_readme2) assert m is not None @@ -226,8 +233,10 @@ def test_json_converter(converter_registry): assert children[7].name == "url" assert children[7].value.__class__ == str - broken_json = File("brokenjson.json", rfp( - "test_directories", "examples_json", "brokenjson.json")) + broken_json = File( + "brokenjson.json", + rfp("test_directories", "examples_json", "brokenjson.json") + ) m = jsonconverter.match(broken_json) # Doesn't validate because of missing required 'name' property diff --git a/unittests/test_directories/single_file_test_data/identifiables.yml b/unittests/test_directories/single_file_test_data/identifiables.yml new file mode 100644 index 0000000000000000000000000000000000000000..e32746d5a6984096cc46fa618250832b325965b0 --- /dev/null +++ b/unittests/test_directories/single_file_test_data/identifiables.yml @@ -0,0 +1,7 @@ +Person: + - full_name +Keyword: + - name +Project: + - project_id + - title diff --git a/unittests/test_identifiable_adapters.py b/unittests/test_identifiable_adapters.py index 4a3ae786438e99ded8925d4405d9b051cd86bf66..df311f1b1e21e841ee09a1246ad07b4d63bb5d70 100644 --- a/unittests/test_identifiable_adapters.py +++ b/unittests/test_identifiable_adapters.py @@ -27,8 +27,10 @@ test identifiable_adapters module """ +import os from datetime import datetime -from newcrawler.identifiable_adapters import IdentifiableAdapter +from newcrawler.identifiable_adapters import ( + CaosDBIdentifiableAdapter, IdentifiableAdapter) import caosdb as db @@ -57,3 +59,27 @@ def test_create_query_for_identifiable(): query = IdentifiableAdapter.create_query_for_identifiable( db.Record(name="TestRecord").add_parent("TestType")) assert query.lower() == "find record testtype with name='testrecord'" + + +def test_load_from_yaml_file(): + ident = CaosDBIdentifiableAdapter() + ident.load_from_yaml_definition( + os.path.join(os.path.dirname(__file__), "test_directories", + "single_file_test_data", "identifiables.yml") + ) + + person_i = ident.get_registered_identifiable( + db.Record().add_parent("Person")) + assert person_i is not None + assert person_i.get_property("full_name") is not None + + keyword_i = ident.get_registered_identifiable( + db.Record().add_parent("Keyword")) + assert keyword_i is not None + assert keyword_i.get_property("name") is not None + + project_i = ident.get_registered_identifiable( + db.Record().add_parent("Project")) + assert project_i is not None + assert project_i.get_property("project_id") is not None + assert project_i.get_property("title") is not None