Skip to content
Snippets Groups Projects
Verified Commit a78d24e5 authored by Daniel Hornung's avatar Daniel Hornung
Browse files

ENH: `date_parse` transformer function

parent 3a8531e7
Branches
Tags
2 merge requests!178FIX: #96 Better error output for crawl.py script.,!176misc. small changes
This commit is part of merge request !176. Comments created here will be created in the context of that merge request.
...@@ -1246,6 +1246,7 @@ class DateElementConverter(TextElementConverter): ...@@ -1246,6 +1246,7 @@ class DateElementConverter(TextElementConverter):
""" """
# TODO make `date` parameter name configurable
def match(self, element: StructureElement): def match(self, element: StructureElement):
matches = super().match(element) matches = super().match(element)
if matches is not None and "date" in matches: if matches is not None and "date" in matches:
...@@ -1266,7 +1267,7 @@ https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-cod ...@@ -1266,7 +1267,7 @@ https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-cod
""" """
# TODO make `val` configurable # TODO make `val` parameter name configurable
def match(self, element: StructureElement): def match(self, element: StructureElement):
matches = super().match(element) matches = super().match(element)
if matches is not None and "val" in matches: if matches is not None and "val" in matches:
......
...@@ -9,6 +9,9 @@ split: ...@@ -9,6 +9,9 @@ split:
replace: replace:
package: caoscrawler.transformer_functions package: caoscrawler.transformer_functions
function: replace function: replace
date_parse:
package: caoscrawler.transformer_functions
function: date_parse
datetime_parse: datetime_parse:
package: caoscrawler.transformer_functions package: caoscrawler.transformer_functions
function: datetime_parse function: datetime_parse
...@@ -68,6 +68,22 @@ def replace(in_value: Any, in_parameters: dict): ...@@ -68,6 +68,22 @@ def replace(in_value: Any, in_parameters: dict):
return in_value.replace(in_parameters['remove'], in_parameters['insert']) return in_value.replace(in_parameters['remove'], in_parameters['insert'])
def date_parse(in_value: str, params: dict):
"""Transform text so that it is formatted in a way that LinkAhead can understand it.
Parameters
==========
- date_format: str, optional
A format string using the ``datetime`` specificaton:
https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes
"""
fmt_default = "%Y-%m-%d"
fmt = params.get("date_format", fmt_default)
dt_str = datetime.datetime.strptime(in_value, fmt).strftime(fmt_default)
return dt_str
def datetime_parse(in_value: str, params: dict): def datetime_parse(in_value: str, params: dict):
"""Transform text so that it is formatted in a way that LinkAhead can understand it. """Transform text so that it is formatted in a way that LinkAhead can understand it.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment