diff --git a/CHANGELOG.md b/CHANGELOG.md index 831ea7fd0b04b18c4cce0a2e3a10511f193cf546..787b9df809cb9c0d23be2a867b68bd9cf5724d35 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Documentation ### +* Added documentation of `caosadvancedtools.loadFiles` module. + ## [0.12.0] - 2024-07-31 ## ### Added ### diff --git a/src/caosadvancedtools/loadFiles.py b/src/caosadvancedtools/loadFiles.py index 77872d1dfe896688e54285551ba2e4eb9a02af99..cedef3673f510d1fd9666cfd0f4b4909a4f6087f 100755 --- a/src/caosadvancedtools/loadFiles.py +++ b/src/caosadvancedtools/loadFiles.py @@ -23,6 +23,18 @@ # ** end header # +"""Utilities to make the LinkAhead server aware of files. + +Installation of `caosadvancedtools` also creates an executable script ``linkahead-loadfiles`` which +calls the `loadpath` function. Get the full help with ``linkahead-loadfiles --help``. In short, +that script tells the LinkAhead server to create `FILE` entities for existing files in one branch of +the directory tree. It is necessary that this directory is already visible for the server (for +example because it is defined as ``extroot`` in the LinkAhead profile). + +""" + +from __future__ import annotations + import argparse import logging import os @@ -31,28 +43,48 @@ import sys import re from argparse import ArgumentParser from tempfile import NamedTemporaryFile +from typing import Union -import shutil import caosdb as db logger = logging.getLogger(__name__) timeout_fallback = 20 -def convert_size(size): +def convert_size(size: int): + """Convert `size` from bytes to a human-readable file size in KB, + MB, ... + + """ if (size == 0): return '0B' size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB") - i = int(math.floor(math.log(size, 1000))) - p = math.pow(1000, i) + index = int(math.floor(math.log(size, 1000))) + p = math.pow(1000, index) s = round(size / p, 2) - return '%s %s' % (s, size_name[i]) + return f"{s} {size_name[index]}" -def combine_ignore_files(caosdbignore, localignore, dirname=None): - """appends the contents of localignore to caosdbignore and saves the result - and returns the name +def combine_ignore_files(caosdbignore: str, localignore: str, dirname=None) -> str: + """Append the contents of localignore to caosdbignore, save the result, + and return the name. + + Parameters + ---------- + caosdbignore : str + Path to parent level caosdbignore file + localignore : str + Path to current working directory's local caosdbignore. + dirname : str, optional + The path of the directory to which the temporary combined file + is written. If None is given, `NamedTemporaryFile`'s default + is used. Default is None. + + Returns + ------- + name : str + Name of the temporary combined caosdbignore file. """ @@ -66,9 +98,22 @@ def combine_ignore_files(caosdbignore, localignore, dirname=None): return tmp.name -def compile_file_list(caosdbignore, localpath): - """creates a list of files that contain all files under localpath except - those excluded by caosdbignore +def compile_file_list(caosdbignore: str, localpath: str) -> list[str]: + """Create a list of files that contain all files under localpath except + those excluded by caosdbignore. + + Parameters + ---------- + caosdbignore : str + Path of caosdbignore file + localpath : str + Path of the directory from which the file list will be compiled. + + Returns + ------- + file_list : list[str] + List of files in `localpath` after appling the ignore rules + from `caosdbignore`. """ @@ -76,11 +121,11 @@ def compile_file_list(caosdbignore, localpath): matches = parse_gitignore(caosdbignore) current_ignore = caosdbignore non_ignored_files = [] - ignore_files = [] + ignore_files: list[tuple[str, str]] = [] for root, dirs, files in os.walk(localpath): # remove local ignore files that do no longer apply to the current subtree (branch switch) while len(ignore_files) > 0 and not root.startswith(ignore_files[-1][0]): - shutil.os.remove(ignore_files[-1][1]) + os.remove(ignore_files[-1][1]) ignore_files.pop() # use the global one if there are no more local ones @@ -110,10 +155,28 @@ def compile_file_list(caosdbignore, localpath): return non_ignored_files -def create_re_for_file_list(files, localroot, remoteroot): - """creates a regular expression that matches file paths contained in the - files argument and all parent directories. The prefix localroot is replaced - by the prefix remoteroot. +def create_re_for_file_list(files: list[str], localroot: str, remoteroot: str) -> str: + """Create a regular expression that matches file paths contained + in the `files` argument and all parent directories. The prefix + `localroot is replaced by the prefix `remoteroot`. + + Parameters + ---------- + files : list[str] + List of file paths to be converted to a regular expression. + localroot : str + Prefix (of the local directory root) to be removed from the + paths in `files`. + remoteroot : str + Prefix (of the LinkAhead filesystem's directory root) to be + prepended to the file paths after the removal of the + `localroot` prefix. + + Returns + ------- + regexp : str + Regular expression that matches all file paths from `files` + adapted for the remote directory root. """ regexp = "" @@ -128,8 +191,46 @@ def create_re_for_file_list(files, localroot, remoteroot): return "^("+regexp[1:]+")$" -def loadpath(path, include, exclude, prefix, dryrun, forceAllowSymlinks, caosdbignore=None, - localpath=None): +def loadpath(path: str, include: Union[str, None], exclude: Union[str, None], prefix: str, + dryrun: bool, forceAllowSymlinks: bool, caosdbignore: Union[str, None] = None, + localpath: Union[str, None] = None): + """Make all files in `path` available to the LinkAhead server as FILE entities. + + Notes + ----- + Run ``linkahead-loadfiles --help`` for more information and examples. + + Parameters + ---------- + path : str + Path to the directory the files of which are to be made + available as seen by the linkahead server (i.e., the path from + within the Docker container in a typical LinkAhead Control + setup.) + include : str or None + Regular expression matching the files that will be + included. If None, all files are matched. This is ignored if a + `caosdbignore` is provided. + exclude : str or None + Regular expression matching files that are to be included. + prefix : str + The prefix under which the files are to be inserted into + LinkAhead's file system. + dryrun : bool + Whether a dryrun should be performed. + forceAllowSymlinks : bool + Whether symlinks in the `path` to be inserted should be + processed. + caosdbignore : str, optional + Path to a caosdbignore file that defines which files shall be + included and which do not. The syntax is the same as in a + gitignore file. You must also provide the `localpath` option + since the check is done locally. If this is given, any + `include` is ignored. + localpath : str, optional + Path of `path` on the local machine. Only needed in combination with a + ``caosdbignore`` file since that is processed locally. + """ if caosdbignore: # create list of files and create regular expression for small chunks @@ -182,7 +283,11 @@ def loadpath(path, include, exclude, prefix, dryrun, forceAllowSymlinks, caosdbi def main(argv=None): - '''Command line options.''' + """Run `loadpath` with the arguments specified on the command + line, extended by the optional `argv` parameter. See ``--help`` + for more information. + + """ if argv is None: argv = sys.argv @@ -191,7 +296,7 @@ def main(argv=None): # Setup argument parser parser = ArgumentParser(description=""" -Make files that the LinkAhead server can see available als FILE entities. +Make files that the LinkAhead server can see available as FILE entities. In a typical scenario where LinkAhead runs in a Docker container and a host directory `mydir` is mounted as an extroot with name `myext`, loadfiles could be called like this: diff --git a/src/doc/table-json-conversion/specs.rst b/src/doc/table-json-conversion/specs.rst index c98eddc1180f552f1d2389b1bb57979e93550ab8..62c75a7055a4f2f07441a036e14b5fd5fe9b9256 100644 --- a/src/doc/table-json-conversion/specs.rst +++ b/src/doc/table-json-conversion/specs.rst @@ -181,7 +181,7 @@ a. Properties with primitive data types "date": "2023-06-15", "url": "www.indiscale.com/next", "duration": 2.5, - "participants": None, + "participants": null, "remote": true } ] diff --git a/src/doc/utilities.rst b/src/doc/utilities.rst index 4d520ae2d4b7a9bbd81171ba002c4f736223713a..f80460f32f9c5493206183427627a7fc7d5d13ba 100644 --- a/src/doc/utilities.rst +++ b/src/doc/utilities.rst @@ -35,3 +35,30 @@ behavior can be changed by initializing the ``TableImporter`` with :py:class:`~caosadvancedtools.datainconsistency.DataInconsistencyError` is raised when an empty field is encountered in a column with an non-nullable integer datatype. + +The loadfiles module and executable +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +For making files available to the LinkAhead server as File entities +(see also the server's `file server +<https://docs.indiscale.com/caosdb-server/specification/Fileserver.html>`_ +documentation), the LinkAhead Advanced User tools provide the +:py:mod:`~caosadvancedtools.loadFiles` module and a +``linkahead-loadfiles`` executable. Both operate on a path as seen by +the LinkAhead server (i.e., a path within the Docker container in the +typical LinkAhead Control setup) and can be further specified to +exclude or exclude specific files. In the typical setup, where a +directory is mounted as an *extroot* into the Docker container by +LinkAhead control, running + +.. code-block:: sh + + linkahead-loadfiles /opt/caosdb/mnt/extroot + +makes all files available. Execute + +.. code-block:: sh + + linkahead-loadfiles --help + +for more information and examples.