Skip to content
Snippets Groups Projects
Commit 392bd0bf authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

Merge branch 'f-date-converter' into 'dev'

F date converter

See merge request !92
parents 33e26d52 3e18700d
No related branches found
No related tags found
2 merge requests!105REL: v0.4.0,!92F date converter
Checking pipeline status
......@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ##
### Added ###
- DateElementConverter: allows to interpret text as a date object
### Changed ###
......
......@@ -1101,3 +1101,22 @@ class CSVTableConverter(TableConverter):
child_elements.append(
DictElement(str(index), row.to_dict()))
return child_elements
class DateElementConverter(TextElementConverter):
"""
allows to convert different text formats of dates to Python date objects.
The text to be parsed must be contained in the "date" group. The format string can be supplied
under "dateformat" in the Converter definition. The library used is datetime so see its
documentation for information on how to create the format string.
"""
def match(self, element: StructureElement):
matches = super().match(element)
if matches is not None and "date" in matches:
matches.update({"date": datetime.datetime.strptime(
matches["date"],
self.definition["date_format"] if "date_format" in self.definition else "%Y-%m-%d"
).date()})
return matches
......@@ -374,6 +374,9 @@ class Crawler(object):
"TextElement": {
"converter": "TextElementConverter",
"package": "caoscrawler.converters"},
"Date": {
"converter": "DateElementConverter",
"package": "caoscrawler.converters"},
"DictIntegerElement": {
"converter": "IntegerElementConverter",
"package": "caoscrawler.converters"},
......
......@@ -28,12 +28,13 @@ import yaml
import importlib
import os
from itertools import product
import datetime
import pytest
import yaml
from caoscrawler.converters import (Converter, ConverterValidationError, DictElementConverter,
DirectoryConverter, DictIntegerElementConverter,
handle_value, MarkdownFileConverter,
handle_value, MarkdownFileConverter, DateElementConverter,
FloatElementConverter, IntegerElementConverter,
JSONFileConverter, YAMLFileConverter)
from caoscrawler.converters import _AbstractScalarValueElementConverter
......@@ -55,6 +56,9 @@ def converter_registry():
"MarkdownFile": {
"converter": "MarkdownFileConverter",
"package": "caoscrawler.converters"},
"Date": {
"converter": "DateElementConverter",
"package": "caoscrawler.converters"},
"DictElement": {
"converter": "DictElementConverter",
"package": "caoscrawler.converters"},
......@@ -64,9 +68,6 @@ def converter_registry():
"ListElement": {
"converter": "ListElementConverter",
"package": "caoscrawler.converters"},
"TextElement": {
"converter": "TextElementConverter",
"package": "caoscrawler.converters"},
"JSONFile": {
"converter": "JSONFileConverter",
"package": "caoscrawler.converters"},
......@@ -570,3 +571,27 @@ def test_match_debug(converter_registry, capsys):
assert ".*" in captured.out
# the empty result set
assert "{}" in captured.out
def test_date_converter():
dictconverter = DateElementConverter(
definition={"match_value": "(?P<date>.*)"},
name="conv",
converter_registry=converter_registry)
matches = dictconverter.match(TextElement("text", "2022-11-11"))
assert "date" in matches
assert isinstance(matches["date"], datetime.date)
assert matches["date"].year == 2022
dictconverter = DateElementConverter(
definition={"match_value": r"(?P<date>(\d|-)+)",
"date_format": "%y-%m-%d"},
name="conv",
converter_registry=converter_registry)
matches = dictconverter.match(TextElement("text", "22-11-11"))
assert "date" in matches
assert isinstance(matches["date"], datetime.date)
assert matches["date"].year == 2022
matches = dictconverter.match(TextElement("text", "alve"))
assert matches is None
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment