Skip to content
Snippets Groups Projects

F doc loadfiles

Merged Florian Spreckelsen requested to merge f-doc-loadfiles into dev
Files
4
@@ -23,6 +23,18 @@
# ** end header
#
"""Utilities to make the LinkAhead server aware of files.
Installation of `caosadvancedtools` also creates an executable script ``linkahead-loadfiles`` which
calls the `loadpath` function. Get the full help with ``linkahead-loadfiles --help``. In short,
that script tells the LinkAhead server to create `FILE` entities for existing files in one branch of
the directory tree. It is necessary that this directory is already visible for the server (for
example because it is defined as ``extroot`` in the LinkAhead profile).
"""
from __future__ import annotations
import argparse
import logging
import os
@@ -31,28 +43,48 @@ import sys
import re
from argparse import ArgumentParser
from tempfile import NamedTemporaryFile
from typing import Union
import shutil
import caosdb as db
logger = logging.getLogger(__name__)
timeout_fallback = 20
def convert_size(size):
def convert_size(size: int):
"""Convert `size` from bytes to a human-readable file size in KB,
MB, ...
"""
if (size == 0):
return '0B'
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
i = int(math.floor(math.log(size, 1000)))
p = math.pow(1000, i)
index = int(math.floor(math.log(size, 1000)))
p = math.pow(1000, index)
s = round(size / p, 2)
return '%s %s' % (s, size_name[i])
return f"{s} {size_name[index]}"
def combine_ignore_files(caosdbignore, localignore, dirname=None):
"""appends the contents of localignore to caosdbignore and saves the result
and returns the name
def combine_ignore_files(caosdbignore: str, localignore: str, dirname=None) -> str:
"""Append the contents of localignore to caosdbignore, save the result,
and return the name.
Parameters
----------
caosdbignore : str
Path to parent level caosdbignore file
localignore : str
Path to current working directory's local caosdbignore.
dirname : str, optional
The path of the directory to which the temporary combined file
is written. If None is given, `NamedTemporaryFile`'s default
is used. Default is None.
Returns
-------
name : str
Name of the temporary combined caosdbignore file.
"""
@@ -66,9 +98,22 @@ def combine_ignore_files(caosdbignore, localignore, dirname=None):
return tmp.name
def compile_file_list(caosdbignore, localpath):
"""creates a list of files that contain all files under localpath except
those excluded by caosdbignore
def compile_file_list(caosdbignore: str, localpath: str) -> list[str]:
"""Create a list of files that contain all files under localpath except
those excluded by caosdbignore.
Parameters
----------
caosdbignore : str
Path of caosdbignore file
localpath : str
Path of the directory from which the file list will be compiled.
Returns
-------
file_list : list[str]
List of files in `localpath` after appling the ignore rules
from `caosdbignore`.
"""
@@ -76,11 +121,11 @@ def compile_file_list(caosdbignore, localpath):
matches = parse_gitignore(caosdbignore)
current_ignore = caosdbignore
non_ignored_files = []
ignore_files = []
ignore_files: list[tuple[str, str]] = []
for root, dirs, files in os.walk(localpath):
# remove local ignore files that do no longer apply to the current subtree (branch switch)
while len(ignore_files) > 0 and not root.startswith(ignore_files[-1][0]):
shutil.os.remove(ignore_files[-1][1])
os.remove(ignore_files[-1][1])
ignore_files.pop()
# use the global one if there are no more local ones
@@ -110,10 +155,28 @@ def compile_file_list(caosdbignore, localpath):
return non_ignored_files
def create_re_for_file_list(files, localroot, remoteroot):
"""creates a regular expression that matches file paths contained in the
files argument and all parent directories. The prefix localroot is replaced
by the prefix remoteroot.
def create_re_for_file_list(files: list[str], localroot: str, remoteroot: str) -> str:
"""Create a regular expression that matches file paths contained
in the `files` argument and all parent directories. The prefix
`localroot is replaced by the prefix `remoteroot`.
Parameters
----------
files : list[str]
List of file paths to be converted to a regular expression.
localroot : str
Prefix (of the local directory root) to be removed from the
paths in `files`.
remoteroot : str
Prefix (of the LinkAhead filesystem's directory root) to be
prepended to the file paths after the removal of the
`localroot` prefix.
Returns
-------
regexp : str
Regular expression that matches all file paths from `files`
adapted for the remote directory root.
"""
regexp = ""
@@ -128,8 +191,46 @@ def create_re_for_file_list(files, localroot, remoteroot):
return "^("+regexp[1:]+")$"
def loadpath(path, include, exclude, prefix, dryrun, forceAllowSymlinks, caosdbignore=None,
localpath=None):
def loadpath(path: str, include: Union[str, None], exclude: Union[str, None], prefix: str,
dryrun: bool, forceAllowSymlinks: bool, caosdbignore: Union[str, None] = None,
localpath: Union[str, None] = None):
"""Make all files in `path` available to the LinkAhead server as FILE entities.
Notes
-----
Run ``linkahead-loadfiles --help`` for more information and examples.
Parameters
----------
path : str
Path to the directory the files of which are to be made
available as seen by the linkahead server (i.e., the path from
within the Docker container in a typical LinkAhead Control
setup.)
include : str or None
Regular expression matching the files that will be
included. If None, all files are matched. This is ignored if a
`caosdbignore` is provided.
exclude : str or None
Regular expression matching files that are to be included.
prefix : str
The prefix under which the files are to be inserted into
LinkAhead's file system.
dryrun : bool
Whether a dryrun should be performed.
forceAllowSymlinks : bool
Whether symlinks in the `path` to be inserted should be
processed.
caosdbignore : str, optional
Path to a caosdbignore file that defines which files shall be
included and which do not. The syntax is the same as in a
gitignore file. You must also provide the `localpath` option
since the check is done locally. If this is given, any
`include` is ignored.
localpath : str, optional
Path of `path` on the local machine. Only needed in combination with a
``caosdbignore`` file since that is processed locally.
"""
if caosdbignore:
# create list of files and create regular expression for small chunks
@@ -182,7 +283,11 @@ def loadpath(path, include, exclude, prefix, dryrun, forceAllowSymlinks, caosdbi
def main(argv=None):
'''Command line options.'''
"""Run `loadpath` with the arguments specified on the command
line, extended by the optional `argv` parameter. See ``--help``
for more information.
"""
if argv is None:
argv = sys.argv
@@ -191,7 +296,7 @@ def main(argv=None):
# Setup argument parser
parser = ArgumentParser(description="""
Make files that the LinkAhead server can see available als FILE entities.
Make files that the LinkAhead server can see available as FILE entities.
In a typical scenario where LinkAhead runs in a Docker container and a host directory `mydir` is
mounted as an extroot with name `myext`, loadfiles could be called like this:
Loading