Skip to content
Snippets Groups Projects
Commit 55eb267f authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

Merge branch 'f-trigger-crawler' into f-deploy-in-docker

parents bebfa199 abee71f5
No related branches found
No related tags found
1 merge request!4Deploy ruqad in docker
Pipeline #58609 failed
...@@ -30,5 +30,5 @@ def test_crawl(): ...@@ -30,5 +30,5 @@ def test_crawl():
crawl a directory as it would be created by export from kadi and running a data quality check crawl a directory as it would be created by export from kadi and running a data quality check
""" """
print(os.listdir(DATADIR)) print(os.listdir(DATADIR))
trigger_crawler(os.fspath(DATADIR)) retval = trigger_crawler(os.fspath(DATADIR))
raise NotImplementedError("Test not implemented.") assert retval
...@@ -20,6 +20,7 @@ username=admin ...@@ -20,6 +20,7 @@ username=admin
password_method=plain password_method=plain
password=caosdb password=caosdb
## OR: `input`: username is optional, password is entered by the user directly ## OR: `input`: username is optional, password is entered by the user directly
# password_method=input # password_method=input
...@@ -32,5 +33,5 @@ password=caosdb ...@@ -32,5 +33,5 @@ password=caosdb
## pip install keyring ## pip install keyring
# password_method=keyring # password_method=keyring
timeout=10000 timeout=10000
...@@ -18,13 +18,15 @@ from caoscrawler.validator import (load_json_schema_from_datamodel_yaml, ...@@ -18,13 +18,15 @@ from caoscrawler.validator import (load_json_schema_from_datamodel_yaml,
ruqad_crawler_settings = resources.files('ruqad').joinpath('resources/crawler-settings') ruqad_crawler_settings = resources.files('ruqad').joinpath('resources/crawler-settings')
def trigger_crawler(target_dir: str): def trigger_crawler(target_dir: str) -> bool:
""" """
Trigger a standard crawler run equivalent to the command line: Trigger a standard crawler run equivalent to the command line:
``` ```
caosdb-crawler -i crawler/identifiables.yaml -s update crawler/cfood.yaml <target_dir> caosdb-crawler -i crawler/identifiables.yaml -s update crawler/cfood.yaml <target_dir>
``` ```
Return False in case of unsuccessful metadata validation and True otherwise.
""" """
# insert all .zip and .eln files, if they do not yet exist # insert all .zip and .eln files, if they do not yet exist
...@@ -52,12 +54,14 @@ def trigger_crawler(target_dir: str): ...@@ -52,12 +54,14 @@ def trigger_crawler(target_dir: str):
# Remove files from entities: # Remove files from entities:
records = [r for r in entities if r.role == "Record"] records = [r for r in entities if r.role == "Record"]
# breakpoint()
validation = validate(records, schemas) validation = validate(records, schemas)
# breakpoint()
if not all([i[0] for i in validation]): if not all([i[0] for i in validation]):
print("Metadata validation failed.") print("Metadata validation failed. Validation errors:")
sys.exit(1) for v, recordtype in zip(validation, schemas.keys()):
if not v[0]:
print("{}: {}".format(recordtype, v[1]))
return False
print("crawl", target_dir) print("crawl", target_dir)
crawler_main(crawled_directory_path=target_dir, crawler_main(crawled_directory_path=target_dir,
...@@ -65,3 +69,5 @@ def trigger_crawler(target_dir: str): ...@@ -65,3 +69,5 @@ def trigger_crawler(target_dir: str):
identifiables_definition_file=ruqad_crawler_settings.joinpath( identifiables_definition_file=ruqad_crawler_settings.joinpath(
'identifiables.yaml'), 'identifiables.yaml'),
remove_prefix="/"+os.path.basename(target_dir)) remove_prefix="/"+os.path.basename(target_dir))
return True
...@@ -187,7 +187,7 @@ DataDir: ...@@ -187,7 +187,7 @@ DataDir:
AuthorID: AuthorID:
match_name: "@id" match_name: "@id"
type: TextElement type: TextElement
match_value: ^(?P<url>(?P<repo>https://.*?)(/users/)(?P<unr>[0-9]+))$ match_value: ^(?P<url>(?P<repo>https?://.*?)(/users/)(?P<unr>[0-9]+))$
transform: transform:
cast_nr_type: cast_nr_type:
in: $unr in: $unr
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment