From 24dcf639d19181ff1a33c518939eb27d82a86f3d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Thu, 2 Feb 2023 10:56:43 +0100
Subject: [PATCH] ENH: add commandline argument

---
 src/caoscrawler/crawl.py | 29 ++++++++++++++++++++++++-----
 unittests/test_tool.py   |  8 +++++++-
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py
index aa7db331..caf98e75 100644
--- a/src/caoscrawler/crawl.py
+++ b/src/caoscrawler/crawl.py
@@ -1393,10 +1393,15 @@ def parse_args():
                                      formatter_class=RawTextHelpFormatter)
     parser.add_argument("cfood_file_name",
                         help="Path name of the cfood yaml file to be used.")
-    parser.add_argument("-r", "--restrict", nargs="*",
-                        help="Restrict the crawling to the subtree at the end of the given path."
-                        "I.e. for each level that is given the crawler only treats the element "
-                        "with the given name.")
+    mg = parser.add_mutually_exclusive_group()
+    mg.add_argument("-r", "--restrict", nargs="*",
+                    help="Restrict the crawling to the subtree at the end of the given path."
+                    "I.e. for each level that is given the crawler only treats the element "
+                    "with the given name.")
+    mg.add_argument("--restrict-path", help="same as restrict; instead of a list, this takes a "
+                    "single string that is interpreded as file system path. Note that a trailing"
+                    "separator (e.g. '/') will be ignored. Use --restrict if you need to have "
+                    "empty strings.")
     parser.add_argument("--provenance", required=False,
                         help="Path name of the provenance yaml file. "
                         "This file will only be generated if this option is set.")
@@ -1428,6 +1433,15 @@ def parse_args():
     return parser.parse_args()
 
 
+def split_restricted_path(path):
+    elements = []
+    while path != "/":
+        path, el = os.path.split(path)
+        if el != "":
+            elements.insert(0, el)
+    return elements
+
+
 def main():
     args = parse_args()
 
@@ -1443,6 +1457,11 @@ def main():
 
     if args.add_cwd_to_path:
         sys.path.append(os.path.abspath("."))
+    if args.restrict_path:
+        restricted_path = split_restricted_path(args.restrict_path)
+    if args.restrict:
+        restricted_path = args.restrict
+
     sys.exit(crawler_main(
         crawled_directory_path=args.crawled_directory_path,
         cfood_file_name=args.cfood_file_name,
@@ -1455,7 +1474,7 @@ def main():
                       "insert": SecurityMode.INSERT,
                       "update": SecurityMode.UPDATE}[args.security_mode],
         unique_names=args.unique_names,
-        restricted_path=args.restrict
+        restricted_path=restricted_path
     ))
 
 
diff --git a/unittests/test_tool.py b/unittests/test_tool.py
index 8ea8b93b..187ec06e 100755
--- a/unittests/test_tool.py
+++ b/unittests/test_tool.py
@@ -28,7 +28,7 @@ Adapted from check-sfs
 
 from caoscrawler.stores import GeneralStore, RecordStore
 import os
-from caoscrawler.crawl import Crawler, SecurityMode
+from caoscrawler.crawl import Crawler, SecurityMode, split_restricted_path
 from caoscrawler.identifiable import Identifiable
 from caoscrawler.structure_elements import File, DictTextElement, DictListElement, DictElement
 from caoscrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter
@@ -957,3 +957,9 @@ def test_restricted_path(create_mock):
             DictElement("TestDict", test_dict), crawler_definition, converter_registry,
             restricted_path
         )
+
+
+def test_split_restricted_path():
+    assert ["el"] == split_restricted_path("/el")
+    assert ["el"] == split_restricted_path("/el/")
+    assert ["el", "el"] == split_restricted_path("/el/el")
-- 
GitLab