Skip to content
Snippets Groups Projects
Commit 5841e2c0 authored by Daniel Hornung's avatar Daniel Hornung
Browse files

Merge branch 'f-doc-loadfiles' into 'dev'

F doc loadfiles

See merge request !113
parents 3615193e 47246826
No related branches found
No related tags found
2 merge requests!128MNT: Added a warning when column metadata is not configured, and a better...,!113F doc loadfiles
Pipeline #53967 passed
......@@ -20,6 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Documentation ###
* Added documentation of `caosadvancedtools.loadFiles` module.
## [0.12.0] - 2024-07-31 ##
### Added ###
......
......@@ -23,6 +23,18 @@
# ** end header
#
"""Utilities to make the LinkAhead server aware of files.
Installation of `caosadvancedtools` also creates an executable script ``linkahead-loadfiles`` which
calls the `loadpath` function. Get the full help with ``linkahead-loadfiles --help``. In short,
that script tells the LinkAhead server to create `FILE` entities for existing files in one branch of
the directory tree. It is necessary that this directory is already visible for the server (for
example because it is defined as ``extroot`` in the LinkAhead profile).
"""
from __future__ import annotations
import argparse
import logging
import os
......@@ -31,28 +43,48 @@ import sys
import re
from argparse import ArgumentParser
from tempfile import NamedTemporaryFile
from typing import Union
import shutil
import caosdb as db
logger = logging.getLogger(__name__)
timeout_fallback = 20
def convert_size(size):
def convert_size(size: int):
"""Convert `size` from bytes to a human-readable file size in KB,
MB, ...
"""
if (size == 0):
return '0B'
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
i = int(math.floor(math.log(size, 1000)))
p = math.pow(1000, i)
index = int(math.floor(math.log(size, 1000)))
p = math.pow(1000, index)
s = round(size / p, 2)
return '%s %s' % (s, size_name[i])
return f"{s} {size_name[index]}"
def combine_ignore_files(caosdbignore, localignore, dirname=None):
"""appends the contents of localignore to caosdbignore and saves the result
and returns the name
def combine_ignore_files(caosdbignore: str, localignore: str, dirname=None) -> str:
"""Append the contents of localignore to caosdbignore, save the result,
and return the name.
Parameters
----------
caosdbignore : str
Path to parent level caosdbignore file
localignore : str
Path to current working directory's local caosdbignore.
dirname : str, optional
The path of the directory to which the temporary combined file
is written. If None is given, `NamedTemporaryFile`'s default
is used. Default is None.
Returns
-------
name : str
Name of the temporary combined caosdbignore file.
"""
......@@ -66,9 +98,22 @@ def combine_ignore_files(caosdbignore, localignore, dirname=None):
return tmp.name
def compile_file_list(caosdbignore, localpath):
"""creates a list of files that contain all files under localpath except
those excluded by caosdbignore
def compile_file_list(caosdbignore: str, localpath: str) -> list[str]:
"""Create a list of files that contain all files under localpath except
those excluded by caosdbignore.
Parameters
----------
caosdbignore : str
Path of caosdbignore file
localpath : str
Path of the directory from which the file list will be compiled.
Returns
-------
file_list : list[str]
List of files in `localpath` after appling the ignore rules
from `caosdbignore`.
"""
......@@ -76,11 +121,11 @@ def compile_file_list(caosdbignore, localpath):
matches = parse_gitignore(caosdbignore)
current_ignore = caosdbignore
non_ignored_files = []
ignore_files = []
ignore_files: list[tuple[str, str]] = []
for root, dirs, files in os.walk(localpath):
# remove local ignore files that do no longer apply to the current subtree (branch switch)
while len(ignore_files) > 0 and not root.startswith(ignore_files[-1][0]):
shutil.os.remove(ignore_files[-1][1])
os.remove(ignore_files[-1][1])
ignore_files.pop()
# use the global one if there are no more local ones
......@@ -110,10 +155,28 @@ def compile_file_list(caosdbignore, localpath):
return non_ignored_files
def create_re_for_file_list(files, localroot, remoteroot):
"""creates a regular expression that matches file paths contained in the
files argument and all parent directories. The prefix localroot is replaced
by the prefix remoteroot.
def create_re_for_file_list(files: list[str], localroot: str, remoteroot: str) -> str:
"""Create a regular expression that matches file paths contained
in the `files` argument and all parent directories. The prefix
`localroot is replaced by the prefix `remoteroot`.
Parameters
----------
files : list[str]
List of file paths to be converted to a regular expression.
localroot : str
Prefix (of the local directory root) to be removed from the
paths in `files`.
remoteroot : str
Prefix (of the LinkAhead filesystem's directory root) to be
prepended to the file paths after the removal of the
`localroot` prefix.
Returns
-------
regexp : str
Regular expression that matches all file paths from `files`
adapted for the remote directory root.
"""
regexp = ""
......@@ -128,8 +191,46 @@ def create_re_for_file_list(files, localroot, remoteroot):
return "^("+regexp[1:]+")$"
def loadpath(path, include, exclude, prefix, dryrun, forceAllowSymlinks, caosdbignore=None,
localpath=None):
def loadpath(path: str, include: Union[str, None], exclude: Union[str, None], prefix: str,
dryrun: bool, forceAllowSymlinks: bool, caosdbignore: Union[str, None] = None,
localpath: Union[str, None] = None):
"""Make all files in `path` available to the LinkAhead server as FILE entities.
Notes
-----
Run ``linkahead-loadfiles --help`` for more information and examples.
Parameters
----------
path : str
Path to the directory the files of which are to be made
available as seen by the linkahead server (i.e., the path from
within the Docker container in a typical LinkAhead Control
setup.)
include : str or None
Regular expression matching the files that will be
included. If None, all files are matched. This is ignored if a
`caosdbignore` is provided.
exclude : str or None
Regular expression matching files that are to be included.
prefix : str
The prefix under which the files are to be inserted into
LinkAhead's file system.
dryrun : bool
Whether a dryrun should be performed.
forceAllowSymlinks : bool
Whether symlinks in the `path` to be inserted should be
processed.
caosdbignore : str, optional
Path to a caosdbignore file that defines which files shall be
included and which do not. The syntax is the same as in a
gitignore file. You must also provide the `localpath` option
since the check is done locally. If this is given, any
`include` is ignored.
localpath : str, optional
Path of `path` on the local machine. Only needed in combination with a
``caosdbignore`` file since that is processed locally.
"""
if caosdbignore:
# create list of files and create regular expression for small chunks
......@@ -182,7 +283,11 @@ def loadpath(path, include, exclude, prefix, dryrun, forceAllowSymlinks, caosdbi
def main(argv=None):
'''Command line options.'''
"""Run `loadpath` with the arguments specified on the command
line, extended by the optional `argv` parameter. See ``--help``
for more information.
"""
if argv is None:
argv = sys.argv
......@@ -191,7 +296,7 @@ def main(argv=None):
# Setup argument parser
parser = ArgumentParser(description="""
Make files that the LinkAhead server can see available als FILE entities.
Make files that the LinkAhead server can see available as FILE entities.
In a typical scenario where LinkAhead runs in a Docker container and a host directory `mydir` is
mounted as an extroot with name `myext`, loadfiles could be called like this:
......
......@@ -181,7 +181,7 @@ a. Properties with primitive data types
"date": "2023-06-15",
"url": "www.indiscale.com/next",
"duration": 2.5,
"participants": None,
"participants": null,
"remote": true
}
]
......
......@@ -35,3 +35,30 @@ behavior can be changed by initializing the ``TableImporter`` with
:py:class:`~caosadvancedtools.datainconsistency.DataInconsistencyError` is
raised when an empty field is encountered in a column with an non-nullable
integer datatype.
The loadfiles module and executable
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
For making files available to the LinkAhead server as File entities
(see also the server's `file server
<https://docs.indiscale.com/caosdb-server/specification/Fileserver.html>`_
documentation), the LinkAhead Advanced User tools provide the
:py:mod:`~caosadvancedtools.loadFiles` module and a
``linkahead-loadfiles`` executable. Both operate on a path as seen by
the LinkAhead server (i.e., a path within the Docker container in the
typical LinkAhead Control setup) and can be further specified to
exclude or exclude specific files. In the typical setup, where a
directory is mounted as an *extroot* into the Docker container by
LinkAhead control, running
.. code-block:: sh
linkahead-loadfiles /opt/caosdb/mnt/extroot
makes all files available. Execute
.. code-block:: sh
linkahead-loadfiles --help
for more information and examples.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment