From f4f3a7660ae1f084e5da1dee6f587bf3119d67cc Mon Sep 17 00:00:00 2001
From: Alexander Schlemmer <alexander@mail-schlemmer.de>
Date: Tue, 29 Jun 2021 14:56:17 +0200
Subject: [PATCH] added a preliminary cfood for testing

---
 setup.py                 |  2 +-
 src/crawl.py             | 30 ++++++++++++++++++++++++++++--
 test/scifolder_cfood.yml | 31 +++++++++++++++++++++++++++++++
 3 files changed, 60 insertions(+), 3 deletions(-)
 mode change 100644 => 100755 src/crawl.py
 create mode 100644 test/scifolder_cfood.yml

diff --git a/setup.py b/setup.py
index 25c17046..6856ec0d 100755
--- a/setup.py
+++ b/setup.py
@@ -71,7 +71,7 @@ def setup_package():
         package_data={},
         entry_points={
             "console_scripts": [
-                "crawl = crawl.__main__:main",
+                "crawl = src.__main__:main",
             ]
         }
     )
diff --git a/src/crawl.py b/src/crawl.py
old mode 100644
new mode 100755
index 1b67def7..6a348f8a
--- a/src/crawl.py
+++ b/src/crawl.py
@@ -5,8 +5,32 @@
 import argparse
 import os
 import sys
+import yaml
 import re
 
+def crawl_cfood(dirname: str,
+                cfood: str):
+    """
+    Crawl a single cfood.
+    """
+
+    # Load the cfood from a yaml file:
+    with open(cfood, "r") as f:
+        cf = yaml.load(f, Loader=yaml.SafeLoader)
+
+    for currentpath, dirs, files in os.walk(dirname):
+        # for current nodes of type dir look in the list of dirs for matches
+        # dir is the default
+        if current_node["type"] == "dir":
+            for dirname in dirs:
+                pass
+        elif current_node["type"] == "file":
+            for filename in files:
+                pass
+        else:
+            # work in progress
+            pass
+            
 
 def crawl(dirname: str,
           cfoods: list[str]):
@@ -17,10 +41,12 @@ def crawl(dirname: str,
     cfoods  : a list of filenames of cfood files
     """
 
-    pass
+    # simplified for testing:
+    for cfood in cfoods:
+        crawl_cfood(dirname, cfood)
 
 def main():
-    pass
+    crawl(sys.args[1], [sys.args[2]])
 
 
 if __name__ == "__main__":
diff --git a/test/scifolder_cfood.yml b/test/scifolder_cfood.yml
new file mode 100644
index 00000000..35a97a3b
--- /dev/null
+++ b/test/scifolder_cfood.yml
@@ -0,0 +1,31 @@
+
+DataAnalysis:
+
+  children:
+    project_dir:
+      re: (?P<date>.*?)_(?P<identifier>.*)
+      handlers:
+      - type: identifiable
+        name: idf_project
+
+      children:
+        single:
+          re: (?P<date>.*?)_(?P<identifier>.*)
+
+          children:
+
+            README:
+              type: file
+              case: insensitive
+              re: README\.md
+
+              children:
+              - description:
+                type: TEXT
+              - responsible:
+                children:
+                  type: TEXT
+                  re: (?P<first_name>.+) (?P<last_name>.+)
+                  handlers:
+                    type: identifiable
+                    name: idf_person
-- 
GitLab