From 0a20600533ef845a25404bc249c56555c16b1d2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Thu, 2 Feb 2023 10:23:14 +0100 Subject: [PATCH] DOC: enhance docstrings --- src/caoscrawler/crawl.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 3d844ad5..0273acef 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -427,7 +427,9 @@ class Crawler(object): Convenience function that starts the crawler (calls start_crawling) with a single directory as the StructureElement. - restricted_path: see start_crawling + restricted_path: optional, list of string + Traverse the data tree only along the given path. When the end of the given path + is reached, traverse the full tree as normal. """ crawler_definition = self.load_definition(crawler_definition_path) @@ -1193,9 +1195,12 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) generalStore and recordStore: This recursion of the crawl function should only operate on copies of the global stores of the Crawler object. restricted_path: optional, list of strings, traverse the data tree only along the given - path. When the end of the given path is reached, traverse the full tree as - normal; The given path contains only the untreated levels, i.e. the first - element is considered at this level. + path. For example, when a directory contains files a, b and c and b is + given in restricted_path, a and c will be ignroed by the crawler. + When the end of the given path is reached, traverse the full tree as + normal. The first element of the list provided by restricted_path should + be the name of the StructureElement at this level, i.e. denoting the + respective element in the items argument. """ # This path_found variable stores wether the path given by restricted_path was found in the # data tree @@ -1312,8 +1317,9 @@ def crawler_main(crawled_directory_path: str, securityMode of Crawler unique_names : bool whether or not to update or insert entities inspite of name conflicts - restricted_path : optional, list of str - see start_crawling + restricted_path: optional, list of string + Traverse the data tree only along the given path. When the end of the given path + is reached, traverse the full tree as normal. Returns ------- -- GitLab