Skip to content
Snippets Groups Projects
Commit 5841e2c0 authored by Daniel Hornung's avatar Daniel Hornung
Browse files

Merge branch 'f-doc-loadfiles' into 'dev'

F doc loadfiles

See merge request !113
parents 3615193e 47246826
No related branches found
No related tags found
2 merge requests!128MNT: Added a warning when column metadata is not configured, and a better...,!113F doc loadfiles
Pipeline #53967 passed
...@@ -20,6 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ...@@ -20,6 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Documentation ### ### Documentation ###
* Added documentation of `caosadvancedtools.loadFiles` module.
## [0.12.0] - 2024-07-31 ## ## [0.12.0] - 2024-07-31 ##
### Added ### ### Added ###
......
...@@ -23,6 +23,18 @@ ...@@ -23,6 +23,18 @@
# ** end header # ** end header
# #
"""Utilities to make the LinkAhead server aware of files.
Installation of `caosadvancedtools` also creates an executable script ``linkahead-loadfiles`` which
calls the `loadpath` function. Get the full help with ``linkahead-loadfiles --help``. In short,
that script tells the LinkAhead server to create `FILE` entities for existing files in one branch of
the directory tree. It is necessary that this directory is already visible for the server (for
example because it is defined as ``extroot`` in the LinkAhead profile).
"""
from __future__ import annotations
import argparse import argparse
import logging import logging
import os import os
...@@ -31,28 +43,48 @@ import sys ...@@ -31,28 +43,48 @@ import sys
import re import re
from argparse import ArgumentParser from argparse import ArgumentParser
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
from typing import Union
import shutil
import caosdb as db import caosdb as db
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
timeout_fallback = 20 timeout_fallback = 20
def convert_size(size): def convert_size(size: int):
"""Convert `size` from bytes to a human-readable file size in KB,
MB, ...
"""
if (size == 0): if (size == 0):
return '0B' return '0B'
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB") size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
i = int(math.floor(math.log(size, 1000))) index = int(math.floor(math.log(size, 1000)))
p = math.pow(1000, i) p = math.pow(1000, index)
s = round(size / p, 2) s = round(size / p, 2)
return '%s %s' % (s, size_name[i]) return f"{s} {size_name[index]}"
def combine_ignore_files(caosdbignore, localignore, dirname=None): def combine_ignore_files(caosdbignore: str, localignore: str, dirname=None) -> str:
"""appends the contents of localignore to caosdbignore and saves the result """Append the contents of localignore to caosdbignore, save the result,
and returns the name and return the name.
Parameters
----------
caosdbignore : str
Path to parent level caosdbignore file
localignore : str
Path to current working directory's local caosdbignore.
dirname : str, optional
The path of the directory to which the temporary combined file
is written. If None is given, `NamedTemporaryFile`'s default
is used. Default is None.
Returns
-------
name : str
Name of the temporary combined caosdbignore file.
""" """
...@@ -66,9 +98,22 @@ def combine_ignore_files(caosdbignore, localignore, dirname=None): ...@@ -66,9 +98,22 @@ def combine_ignore_files(caosdbignore, localignore, dirname=None):
return tmp.name return tmp.name
def compile_file_list(caosdbignore, localpath): def compile_file_list(caosdbignore: str, localpath: str) -> list[str]:
"""creates a list of files that contain all files under localpath except """Create a list of files that contain all files under localpath except
those excluded by caosdbignore those excluded by caosdbignore.
Parameters
----------
caosdbignore : str
Path of caosdbignore file
localpath : str
Path of the directory from which the file list will be compiled.
Returns
-------
file_list : list[str]
List of files in `localpath` after appling the ignore rules
from `caosdbignore`.
""" """
...@@ -76,11 +121,11 @@ def compile_file_list(caosdbignore, localpath): ...@@ -76,11 +121,11 @@ def compile_file_list(caosdbignore, localpath):
matches = parse_gitignore(caosdbignore) matches = parse_gitignore(caosdbignore)
current_ignore = caosdbignore current_ignore = caosdbignore
non_ignored_files = [] non_ignored_files = []
ignore_files = [] ignore_files: list[tuple[str, str]] = []
for root, dirs, files in os.walk(localpath): for root, dirs, files in os.walk(localpath):
# remove local ignore files that do no longer apply to the current subtree (branch switch) # remove local ignore files that do no longer apply to the current subtree (branch switch)
while len(ignore_files) > 0 and not root.startswith(ignore_files[-1][0]): while len(ignore_files) > 0 and not root.startswith(ignore_files[-1][0]):
shutil.os.remove(ignore_files[-1][1]) os.remove(ignore_files[-1][1])
ignore_files.pop() ignore_files.pop()
# use the global one if there are no more local ones # use the global one if there are no more local ones
...@@ -110,10 +155,28 @@ def compile_file_list(caosdbignore, localpath): ...@@ -110,10 +155,28 @@ def compile_file_list(caosdbignore, localpath):
return non_ignored_files return non_ignored_files
def create_re_for_file_list(files, localroot, remoteroot): def create_re_for_file_list(files: list[str], localroot: str, remoteroot: str) -> str:
"""creates a regular expression that matches file paths contained in the """Create a regular expression that matches file paths contained
files argument and all parent directories. The prefix localroot is replaced in the `files` argument and all parent directories. The prefix
by the prefix remoteroot. `localroot is replaced by the prefix `remoteroot`.
Parameters
----------
files : list[str]
List of file paths to be converted to a regular expression.
localroot : str
Prefix (of the local directory root) to be removed from the
paths in `files`.
remoteroot : str
Prefix (of the LinkAhead filesystem's directory root) to be
prepended to the file paths after the removal of the
`localroot` prefix.
Returns
-------
regexp : str
Regular expression that matches all file paths from `files`
adapted for the remote directory root.
""" """
regexp = "" regexp = ""
...@@ -128,8 +191,46 @@ def create_re_for_file_list(files, localroot, remoteroot): ...@@ -128,8 +191,46 @@ def create_re_for_file_list(files, localroot, remoteroot):
return "^("+regexp[1:]+")$" return "^("+regexp[1:]+")$"
def loadpath(path, include, exclude, prefix, dryrun, forceAllowSymlinks, caosdbignore=None, def loadpath(path: str, include: Union[str, None], exclude: Union[str, None], prefix: str,
localpath=None): dryrun: bool, forceAllowSymlinks: bool, caosdbignore: Union[str, None] = None,
localpath: Union[str, None] = None):
"""Make all files in `path` available to the LinkAhead server as FILE entities.
Notes
-----
Run ``linkahead-loadfiles --help`` for more information and examples.
Parameters
----------
path : str
Path to the directory the files of which are to be made
available as seen by the linkahead server (i.e., the path from
within the Docker container in a typical LinkAhead Control
setup.)
include : str or None
Regular expression matching the files that will be
included. If None, all files are matched. This is ignored if a
`caosdbignore` is provided.
exclude : str or None
Regular expression matching files that are to be included.
prefix : str
The prefix under which the files are to be inserted into
LinkAhead's file system.
dryrun : bool
Whether a dryrun should be performed.
forceAllowSymlinks : bool
Whether symlinks in the `path` to be inserted should be
processed.
caosdbignore : str, optional
Path to a caosdbignore file that defines which files shall be
included and which do not. The syntax is the same as in a
gitignore file. You must also provide the `localpath` option
since the check is done locally. If this is given, any
`include` is ignored.
localpath : str, optional
Path of `path` on the local machine. Only needed in combination with a
``caosdbignore`` file since that is processed locally.
"""
if caosdbignore: if caosdbignore:
# create list of files and create regular expression for small chunks # create list of files and create regular expression for small chunks
...@@ -182,7 +283,11 @@ def loadpath(path, include, exclude, prefix, dryrun, forceAllowSymlinks, caosdbi ...@@ -182,7 +283,11 @@ def loadpath(path, include, exclude, prefix, dryrun, forceAllowSymlinks, caosdbi
def main(argv=None): def main(argv=None):
'''Command line options.''' """Run `loadpath` with the arguments specified on the command
line, extended by the optional `argv` parameter. See ``--help``
for more information.
"""
if argv is None: if argv is None:
argv = sys.argv argv = sys.argv
...@@ -191,7 +296,7 @@ def main(argv=None): ...@@ -191,7 +296,7 @@ def main(argv=None):
# Setup argument parser # Setup argument parser
parser = ArgumentParser(description=""" parser = ArgumentParser(description="""
Make files that the LinkAhead server can see available als FILE entities. Make files that the LinkAhead server can see available as FILE entities.
In a typical scenario where LinkAhead runs in a Docker container and a host directory `mydir` is In a typical scenario where LinkAhead runs in a Docker container and a host directory `mydir` is
mounted as an extroot with name `myext`, loadfiles could be called like this: mounted as an extroot with name `myext`, loadfiles could be called like this:
......
...@@ -181,7 +181,7 @@ a. Properties with primitive data types ...@@ -181,7 +181,7 @@ a. Properties with primitive data types
"date": "2023-06-15", "date": "2023-06-15",
"url": "www.indiscale.com/next", "url": "www.indiscale.com/next",
"duration": 2.5, "duration": 2.5,
"participants": None, "participants": null,
"remote": true "remote": true
} }
] ]
......
...@@ -35,3 +35,30 @@ behavior can be changed by initializing the ``TableImporter`` with ...@@ -35,3 +35,30 @@ behavior can be changed by initializing the ``TableImporter`` with
:py:class:`~caosadvancedtools.datainconsistency.DataInconsistencyError` is :py:class:`~caosadvancedtools.datainconsistency.DataInconsistencyError` is
raised when an empty field is encountered in a column with an non-nullable raised when an empty field is encountered in a column with an non-nullable
integer datatype. integer datatype.
The loadfiles module and executable
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
For making files available to the LinkAhead server as File entities
(see also the server's `file server
<https://docs.indiscale.com/caosdb-server/specification/Fileserver.html>`_
documentation), the LinkAhead Advanced User tools provide the
:py:mod:`~caosadvancedtools.loadFiles` module and a
``linkahead-loadfiles`` executable. Both operate on a path as seen by
the LinkAhead server (i.e., a path within the Docker container in the
typical LinkAhead Control setup) and can be further specified to
exclude or exclude specific files. In the typical setup, where a
directory is mounted as an *extroot* into the Docker container by
LinkAhead control, running
.. code-block:: sh
linkahead-loadfiles /opt/caosdb/mnt/extroot
makes all files available. Execute
.. code-block:: sh
linkahead-loadfiles --help
for more information and examples.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment