diff --git a/setup.py b/setup.py index 25c17046dac4874036fae3772d6da0e0ff08b88a..6856ec0dd2a36dc8be955977a2298152151aa18f 100755 --- a/setup.py +++ b/setup.py @@ -71,7 +71,7 @@ def setup_package(): package_data={}, entry_points={ "console_scripts": [ - "crawl = crawl.__main__:main", + "crawl = src.__main__:main", ] } ) diff --git a/src/crawl.py b/src/crawl.py old mode 100644 new mode 100755 index 1b67def7b28329d1894fca640b58311aca73d20b..6a348f8ab5176665aae048c6e244a26af7384a66 --- a/src/crawl.py +++ b/src/crawl.py @@ -5,8 +5,32 @@ import argparse import os import sys +import yaml import re +def crawl_cfood(dirname: str, + cfood: str): + """ + Crawl a single cfood. + """ + + # Load the cfood from a yaml file: + with open(cfood, "r") as f: + cf = yaml.load(f, Loader=yaml.SafeLoader) + + for currentpath, dirs, files in os.walk(dirname): + # for current nodes of type dir look in the list of dirs for matches + # dir is the default + if current_node["type"] == "dir": + for dirname in dirs: + pass + elif current_node["type"] == "file": + for filename in files: + pass + else: + # work in progress + pass + def crawl(dirname: str, cfoods: list[str]): @@ -17,10 +41,12 @@ def crawl(dirname: str, cfoods : a list of filenames of cfood files """ - pass + # simplified for testing: + for cfood in cfoods: + crawl_cfood(dirname, cfood) def main(): - pass + crawl(sys.args[1], [sys.args[2]]) if __name__ == "__main__": diff --git a/test/scifolder_cfood.yml b/test/scifolder_cfood.yml new file mode 100644 index 0000000000000000000000000000000000000000..35a97a3bb314376ef9d688abf3c8a21c9100cbab --- /dev/null +++ b/test/scifolder_cfood.yml @@ -0,0 +1,31 @@ + +DataAnalysis: + + children: + project_dir: + re: (?P<date>.*?)_(?P<identifier>.*) + handlers: + - type: identifiable + name: idf_project + + children: + single: + re: (?P<date>.*?)_(?P<identifier>.*) + + children: + + README: + type: file + case: insensitive + re: README\.md + + children: + - description: + type: TEXT + - responsible: + children: + type: TEXT + re: (?P<first_name>.+) (?P<last_name>.+) + handlers: + type: identifiable + name: idf_person