From f4f3a7660ae1f084e5da1dee6f587bf3119d67cc Mon Sep 17 00:00:00 2001 From: Alexander Schlemmer <alexander@mail-schlemmer.de> Date: Tue, 29 Jun 2021 14:56:17 +0200 Subject: [PATCH] added a preliminary cfood for testing --- setup.py | 2 +- src/crawl.py | 30 ++++++++++++++++++++++++++++-- test/scifolder_cfood.yml | 31 +++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 3 deletions(-) mode change 100644 => 100755 src/crawl.py create mode 100644 test/scifolder_cfood.yml diff --git a/setup.py b/setup.py index 25c17046..6856ec0d 100755 --- a/setup.py +++ b/setup.py @@ -71,7 +71,7 @@ def setup_package(): package_data={}, entry_points={ "console_scripts": [ - "crawl = crawl.__main__:main", + "crawl = src.__main__:main", ] } ) diff --git a/src/crawl.py b/src/crawl.py old mode 100644 new mode 100755 index 1b67def7..6a348f8a --- a/src/crawl.py +++ b/src/crawl.py @@ -5,8 +5,32 @@ import argparse import os import sys +import yaml import re +def crawl_cfood(dirname: str, + cfood: str): + """ + Crawl a single cfood. + """ + + # Load the cfood from a yaml file: + with open(cfood, "r") as f: + cf = yaml.load(f, Loader=yaml.SafeLoader) + + for currentpath, dirs, files in os.walk(dirname): + # for current nodes of type dir look in the list of dirs for matches + # dir is the default + if current_node["type"] == "dir": + for dirname in dirs: + pass + elif current_node["type"] == "file": + for filename in files: + pass + else: + # work in progress + pass + def crawl(dirname: str, cfoods: list[str]): @@ -17,10 +41,12 @@ def crawl(dirname: str, cfoods : a list of filenames of cfood files """ - pass + # simplified for testing: + for cfood in cfoods: + crawl_cfood(dirname, cfood) def main(): - pass + crawl(sys.args[1], [sys.args[2]]) if __name__ == "__main__": diff --git a/test/scifolder_cfood.yml b/test/scifolder_cfood.yml new file mode 100644 index 00000000..35a97a3b --- /dev/null +++ b/test/scifolder_cfood.yml @@ -0,0 +1,31 @@ + +DataAnalysis: + + children: + project_dir: + re: (?P<date>.*?)_(?P<identifier>.*) + handlers: + - type: identifiable + name: idf_project + + children: + single: + re: (?P<date>.*?)_(?P<identifier>.*) + + children: + + README: + type: file + case: insensitive + re: README\.md + + children: + - description: + type: TEXT + - responsible: + children: + type: TEXT + re: (?P<first_name>.+) (?P<last_name>.+) + handlers: + type: identifiable + name: idf_person -- GitLab