From 24dcf639d19181ff1a33c518939eb27d82a86f3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Thu, 2 Feb 2023 10:56:43 +0100 Subject: [PATCH] ENH: add commandline argument --- src/caoscrawler/crawl.py | 29 ++++++++++++++++++++++++----- unittests/test_tool.py | 8 +++++++- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index aa7db331..caf98e75 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -1393,10 +1393,15 @@ def parse_args(): formatter_class=RawTextHelpFormatter) parser.add_argument("cfood_file_name", help="Path name of the cfood yaml file to be used.") - parser.add_argument("-r", "--restrict", nargs="*", - help="Restrict the crawling to the subtree at the end of the given path." - "I.e. for each level that is given the crawler only treats the element " - "with the given name.") + mg = parser.add_mutually_exclusive_group() + mg.add_argument("-r", "--restrict", nargs="*", + help="Restrict the crawling to the subtree at the end of the given path." + "I.e. for each level that is given the crawler only treats the element " + "with the given name.") + mg.add_argument("--restrict-path", help="same as restrict; instead of a list, this takes a " + "single string that is interpreded as file system path. Note that a trailing" + "separator (e.g. '/') will be ignored. Use --restrict if you need to have " + "empty strings.") parser.add_argument("--provenance", required=False, help="Path name of the provenance yaml file. " "This file will only be generated if this option is set.") @@ -1428,6 +1433,15 @@ def parse_args(): return parser.parse_args() +def split_restricted_path(path): + elements = [] + while path != "/": + path, el = os.path.split(path) + if el != "": + elements.insert(0, el) + return elements + + def main(): args = parse_args() @@ -1443,6 +1457,11 @@ def main(): if args.add_cwd_to_path: sys.path.append(os.path.abspath(".")) + if args.restrict_path: + restricted_path = split_restricted_path(args.restrict_path) + if args.restrict: + restricted_path = args.restrict + sys.exit(crawler_main( crawled_directory_path=args.crawled_directory_path, cfood_file_name=args.cfood_file_name, @@ -1455,7 +1474,7 @@ def main(): "insert": SecurityMode.INSERT, "update": SecurityMode.UPDATE}[args.security_mode], unique_names=args.unique_names, - restricted_path=args.restrict + restricted_path=restricted_path )) diff --git a/unittests/test_tool.py b/unittests/test_tool.py index 8ea8b93b..187ec06e 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -28,7 +28,7 @@ Adapted from check-sfs from caoscrawler.stores import GeneralStore, RecordStore import os -from caoscrawler.crawl import Crawler, SecurityMode +from caoscrawler.crawl import Crawler, SecurityMode, split_restricted_path from caoscrawler.identifiable import Identifiable from caoscrawler.structure_elements import File, DictTextElement, DictListElement, DictElement from caoscrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter @@ -957,3 +957,9 @@ def test_restricted_path(create_mock): DictElement("TestDict", test_dict), crawler_definition, converter_registry, restricted_path ) + + +def test_split_restricted_path(): + assert ["el"] == split_restricted_path("/el") + assert ["el"] == split_restricted_path("/el/") + assert ["el", "el"] == split_restricted_path("/el/el") -- GitLab