diff --git a/src/caoscrawler/default_transformers.yml b/src/caoscrawler/default_transformers.yml index d0ad23912176bdfbf2446aa6e04bd7fa6b858777..61639fe5651c404256c3c90a3334f92374adce9a 100644 --- a/src/caoscrawler/default_transformers.yml +++ b/src/caoscrawler/default_transformers.yml @@ -1,4 +1,4 @@ - +# Lookup table for matching functions and cfood yaml node names. submatch: package: caoscrawler.transformer_functions @@ -9,3 +9,6 @@ split: replace: package: caoscrawler.transformer_functions function: replace +datetime_parse: + package: caoscrawler.transformer_functions + function: datetime_parse diff --git a/src/caoscrawler/transformer_functions.py b/src/caoscrawler/transformer_functions.py index eda9f3c2bc98c8d2561f152f9f6ddd422daee00a..2f3b5234e8a59615350229071be1ce2ac2885e35 100644 --- a/src/caoscrawler/transformer_functions.py +++ b/src/caoscrawler/transformer_functions.py @@ -20,9 +20,14 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. +"""Definition of default transformer functions. + +See https://docs.indiscale.com/caosdb-crawler/converters.html#transform-functions for more +information. + """ -Defnition of default transformer functions. -""" + +import datetime import re from typing import Any @@ -61,3 +66,20 @@ def replace(in_value: Any, in_parameters: dict): if not isinstance(in_value, str): raise RuntimeError("must be string") return in_value.replace(in_parameters['remove'], in_parameters['insert']) + + +def datetime_parse(in_value: str, params: dict): + """Transform text so that it is formatted in a way that LinkAhead can understand it. + + +Parameters +========== + +- datetime_format: str, optional + A format string using the ``datetime`` specificaton: + https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes + """ + fmt_default = "%Y-%m-%dT%H:%M:%S" + fmt = params.get("datetime_format", fmt_default) + dt_str = datetime.datetime.strptime(in_value, fmt).strftime(fmt_default) + return dt_str