Skip to content
Snippets Groups Projects
Commit aa12c215 authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

Merge branch 'f-more-parameter-documentation' into 'dev'

More documentation (docstrings) of parameters add/remove_prefix and restricted_path

See merge request !127
parents 7218f73c 20fce54b
No related branches found
No related tags found
2 merge requests!160STY: styling,!127More documentation (docstrings) of parameters add/remove_prefix and restricted_path
Pipeline #39837 passed
...@@ -1165,11 +1165,29 @@ def _treat_deprecated_prefix(prefix, remove_prefix): ...@@ -1165,11 +1165,29 @@ def _treat_deprecated_prefix(prefix, remove_prefix):
return remove_prefix return remove_prefix
def _fix_file_paths(crawled_data, add_prefix, remove_prefix): def _fix_file_paths(crawled_data: list[db.Entity],
"""adjust the path according to add_/remove_prefix add_prefix: Optional[str],
remove_prefix: Optional[str]):
"""
Adjust the path according to add_/remove_prefix
Also remove the `file` attribute from File entities (because inserts need currently be done Also remove the `file` attribute from File entities (because inserts need currently be done
by loadfiles. by loadfiles.
Arguments:
------------
crawled_data: list[db.Entity]
A list of entities. This list will be searched for instances of db.File.
add_prefix: Optional[str]
If add_prefix is not None, the given prefix will be added in front of elem.path.
remove_prefix: Optional[str]
If remove_prefix is not None the given prefix will be removed from the front of
elem.path. In this case a RuntimeError will be raised if any path of a file does
not begin with "remove_prefix".
""" """
for elem in crawled_data: for elem in crawled_data:
if isinstance(elem, db.File): if isinstance(elem, db.File):
...@@ -1265,11 +1283,14 @@ def crawler_main(crawled_directory_path: str, ...@@ -1265,11 +1283,14 @@ def crawler_main(crawled_directory_path: str,
whether or not to update or insert entities inspite of name conflicts whether or not to update or insert entities inspite of name conflicts
restricted_path: optional, list of strings restricted_path: optional, list of strings
Traverse the data tree only along the given path. When the end of the given path Traverse the data tree only along the given path. When the end of the given path
is reached, traverse the full tree as normal. is reached, traverse the full tree as normal. See docstring of 'scanner' in
module 'scanner' for more details.
remove_prefix : Optional[str] remove_prefix : Optional[str]
remove the given prefix from file paths Remove the given prefix from file paths.
See docstring of '_fix_file_paths' for more details.
add_prefix : Optional[str] add_prefix : Optional[str]
add the given prefix to file paths Add the given prefix to file paths.
See docstring of '_fix_file_paths' for more details.
Returns Returns
------- -------
...@@ -1382,12 +1403,18 @@ def parse_args(): ...@@ -1382,12 +1403,18 @@ def parse_args():
def split_restricted_path(path): def split_restricted_path(path):
elements = [] """
while path != "/": Split a path string into components separated by slashes or other os.path.sep.
path, el = os.path.split(path) Empty elements will be removed.
if el != "": """
elements.insert(0, el) # This implementation leads to infinite loops
return elements # for "ill-posed" paths (see test_utilities.py"):
# elements = []
# while path != "/":
# path, el = os.path.split(path)
# if el != "":
# elements.insert(0, el)
return [i for i in path.split(os.path.sep) if i != ""]
def main(): def main():
......
...@@ -235,7 +235,7 @@ def scanner(items: list[StructureElement], ...@@ -235,7 +235,7 @@ def scanner(items: list[StructureElement],
restricted_path: optional, list of strings, traverse the data tree only along the given restricted_path: optional, list of strings, traverse the data tree only along the given
path. For example, when a directory contains files a, b and c and b is path. For example, when a directory contains files a, b and c and b is
given in restricted_path, a and c will be ignroed by the crawler. given as restricted_path, a and c will be ignroed by the crawler.
When the end of the given path is reached, traverse the full tree as When the end of the given path is reached, traverse the full tree as
normal. The first element of the list provided by restricted_path should normal. The first element of the list provided by restricted_path should
be the name of the StructureElement at this level, i.e. denoting the be the name of the StructureElement at this level, i.e. denoting the
...@@ -357,7 +357,8 @@ def scan_directory(dirname: str, crawler_definition_path: str, ...@@ -357,7 +357,8 @@ def scan_directory(dirname: str, crawler_definition_path: str,
restricted_path: optional, list of strings restricted_path: optional, list of strings
Traverse the data tree only along the given path. When the end of the given path Traverse the data tree only along the given path. When the end of the given path
is reached, traverse the full tree as normal. is reached, traverse the full tree as normal. See docstring of 'scanner' for
more details.
""" """
crawler_definition = load_definition(crawler_definition_path) crawler_definition = load_definition(crawler_definition_path)
...@@ -408,7 +409,8 @@ def scan_structure_elements(items: Union[list[StructureElement], StructureElemen ...@@ -408,7 +409,8 @@ def scan_structure_elements(items: Union[list[StructureElement], StructureElemen
file. file.
restricted_path: optional, list of strings restricted_path: optional, list of strings
Traverse the data tree only along the given path. When the end of the given path Traverse the data tree only along the given path. When the end of the given path
is reached, traverse the full tree as normal. is reached, traverse the full tree as normal. See docstring of 'scanner' for
more details.
Returns Returns
------- -------
......
#!/usr/bin/env python3
# encoding: utf-8
#
# This file is a part of the CaosDB Project.
#
# Copyright (C) 2023 Alexander Schlemmer <alexander.schlemmer@ds.mpg.de>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
from caoscrawler.crawl import split_restricted_path
def test_split_restricted_path():
assert split_restricted_path("") == []
assert split_restricted_path("/") == []
assert split_restricted_path("test/") == ["test"]
assert split_restricted_path("/test/") == ["test"]
assert split_restricted_path("test/bla") == ["test", "bla"]
assert split_restricted_path("/test/bla") == ["test", "bla"]
assert split_restricted_path("/test1/test2/bla") == ["test1", "test2", "bla"]
assert split_restricted_path("/test//bla") == ["test", "bla"]
assert split_restricted_path("//test/bla") == ["test", "bla"]
assert split_restricted_path("///test//bla////") == ["test", "bla"]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment