From 3fb770d5077a35f2d0612ab2ffa19bcc6e6107f5 Mon Sep 17 00:00:00 2001 From: Daniel <d.hornung@indiscale.com> Date: Thu, 27 Jun 2024 11:35:04 +0200 Subject: [PATCH] ENH: `datetime_parse` is new transformer function. --- src/caoscrawler/default_transformers.yml | 5 ++++- src/caoscrawler/transformer_functions.py | 26 ++++++++++++++++++++++-- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/caoscrawler/default_transformers.yml b/src/caoscrawler/default_transformers.yml index d0ad2391..61639fe5 100644 --- a/src/caoscrawler/default_transformers.yml +++ b/src/caoscrawler/default_transformers.yml @@ -1,4 +1,4 @@ - +# Lookup table for matching functions and cfood yaml node names. submatch: package: caoscrawler.transformer_functions @@ -9,3 +9,6 @@ split: replace: package: caoscrawler.transformer_functions function: replace +datetime_parse: + package: caoscrawler.transformer_functions + function: datetime_parse diff --git a/src/caoscrawler/transformer_functions.py b/src/caoscrawler/transformer_functions.py index eda9f3c2..2f3b5234 100644 --- a/src/caoscrawler/transformer_functions.py +++ b/src/caoscrawler/transformer_functions.py @@ -20,9 +20,14 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. +"""Definition of default transformer functions. + +See https://docs.indiscale.com/caosdb-crawler/converters.html#transform-functions for more +information. + """ -Defnition of default transformer functions. -""" + +import datetime import re from typing import Any @@ -61,3 +66,20 @@ def replace(in_value: Any, in_parameters: dict): if not isinstance(in_value, str): raise RuntimeError("must be string") return in_value.replace(in_parameters['remove'], in_parameters['insert']) + + +def datetime_parse(in_value: str, params: dict): + """Transform text so that it is formatted in a way that LinkAhead can understand it. + + +Parameters +========== + +- datetime_format: str, optional + A format string using the ``datetime`` specificaton: + https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes + """ + fmt_default = "%Y-%m-%dT%H:%M:%S" + fmt = params.get("datetime_format", fmt_default) + dt_str = datetime.datetime.strptime(in_value, fmt).strftime(fmt_default) + return dt_str -- GitLab