Skip to content
Snippets Groups Projects
Commit a5d2fc72 authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

Merge branch 'f-debug-match' into 'dev'

Add debugging for matching of converters

See merge request !84
parents c3cd3687 fc5bdc83
No related branches found
No related tags found
2 merge requests!91Release 0.3,!84Add debugging for matching of converters
Pipeline #32795 passed
......@@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- JSON schema validation can also be used in the DictElementConverter
- YAMLFileConverter class; to parse YAML files
- Variables can now be substituted within the definition of yaml macros
- debugging option for the match step of Converters
### Changed ###
......
......@@ -23,7 +23,9 @@
# ** end header
#
from __future__ import annotations
from jsonschema import validate, ValidationError
import os
import re
import datetime
......@@ -390,6 +392,56 @@ class Converter(object, metaclass=ABCMeta):
"""
pass
@staticmethod
def _debug_matching_template(name: str, regexp: list[str], matched: list[str], result: Optional[dict]):
""" Template for the debugging output for the match function """
print("\n--------", name, "-----------")
for re, ma in zip(regexp, matched):
print("matching against:\n" + re)
print("matching:\n" + ma)
print("---------")
if result is None:
print("No match")
else:
print("Matched groups:")
print(result)
print("----------------------------------------")
@staticmethod
def debug_matching(kind=None):
def debug_matching_decorator(func):
"""
decorator for the match function of Converters that implements debug for the match of
StructureElements
"""
def inner(self, element: StructureElement):
mr = func(self, element)
if "debug_match" in self.definition and self.definition["debug_match"]:
if kind == "name":
self._debug_matching_template(name=self.__class__.__name__,
regexp=[self.definition["match"]],
matched=[element.name],
result=mr)
elif kind == "name_and_value":
self._debug_matching_template(
name=self.__class__.__name__,
regexp=[self.definition["match"]
if "match" in self.definition else "",
self.definition["match_name"]
if "match_name" in self.definition else "",
self.definition["match_value"]],
matched=[element.name, element.name, str(element.value)],
result=mr)
else:
self._debug_matching_template(name=self.__class__.__name__,
regexp=self.definition["match"],
matched=str(element),
result=result)
return mr
return inner
return debug_matching_decorator
@abstractmethod
def match(self, element: StructureElement) -> Optional[dict]:
"""
......@@ -425,6 +477,7 @@ class DirectoryConverter(Converter):
# TODO basically all converters implement such a match function. Shouldn't this be the one
# of the parent class and subclasses can overwrite if needed?
@Converter.debug_matching("name")
def match(self, element: StructureElement):
# TODO: See comment on types and inheritance
if not isinstance(element, Directory):
......@@ -463,10 +516,10 @@ class SimpleFileConverter(Converter):
def typecheck(self, element: StructureElement):
return isinstance(element, File)
def create_children(self, generalStore: GeneralStore,
element: StructureElement):
def create_children(self, generalStore: GeneralStore, element: StructureElement):
return list()
@Converter.debug_matching("name")
def match(self, element: StructureElement):
# TODO: See comment on types and inheritance
if not isinstance(element, File):
......@@ -512,6 +565,7 @@ class MarkdownFileConverter(Converter):
def typecheck(self, element: StructureElement):
return isinstance(element, File)
@Converter.debug_matching("name")
def match(self, element: StructureElement):
# TODO: See comment on types and inheritance
if not isinstance(element, File):
......@@ -601,6 +655,7 @@ class DictElementConverter(Converter):
def typecheck(self, element: StructureElement):
return isinstance(element, DictElement)
@Converter.debug_matching("name_and_value")
def match(self, element: StructureElement):
"""
Allways matches if the element has the right type.
......@@ -629,6 +684,7 @@ class JSONFileConverter(Converter):
def typecheck(self, element: StructureElement):
return isinstance(element, File)
@Converter.debug_matching("name")
def match(self, element: StructureElement):
# TODO: See comment on types and inheritance
if not self.typecheck(element):
......@@ -656,6 +712,7 @@ class YAMLFileConverter(Converter):
def typecheck(self, element: StructureElement):
return isinstance(element, File)
@Converter.debug_matching("name")
def match(self, element: StructureElement):
# TODO: See comment on types and inheritance
if not self.typecheck(element):
......@@ -749,6 +806,7 @@ class _AbstractScalarValueElementConverter(Converter):
self.definition)
return self._typecheck(element, allowed_matches)
@Converter.debug_matching("name_and_value")
def match(self, element: StructureElement):
"""
Try to match the given structure element.
......@@ -900,6 +958,7 @@ class ListElementConverter(Converter):
def typecheck(self, element: StructureElement):
return isinstance(element, ListElement)
@Converter.debug_matching("name")
def match(self, element: StructureElement):
# TODO: See comment on types and inheritance
if not isinstance(element, ListElement):
......@@ -956,6 +1015,7 @@ class TableConverter(Converter):
def typecheck(self, element: StructureElement):
return isinstance(element, File)
@Converter.debug_matching("name")
def match(self, element: StructureElement):
# TODO: See comment on types and inheritance
if not isinstance(element, File):
......
......@@ -483,3 +483,22 @@ Let's formulate that using `create_records` (again, `dir_name` is constant here)
keys_modified = create_records(values, records,
record_def)
Debugging
=========
You can add the key `debug_match` to the definition of a Converter in order to create debugging
output for the match step. The following snippet illustrates this:
.. code-block:: yaml
DirConverter:
type: Directory
match: (?P<dir_name>.*)
debug_match: True
records:
Project:
identifier: project_name
Whenever this Converter tries to match a StructureElement, it logs what was tried to macht against
what and what the result was.
......@@ -358,7 +358,7 @@ def test_variable_replacement():
assert handle_value(["$a", "$b"], values) == (["4", "68"], "single")
def test_filter_children_of_directory(converter_registry):
def test_filter_children_of_directory(converter_registry, capsys):
"""Verify that children (i.e., files) in a directory are filtered or sorted
correctly.
......@@ -369,6 +369,7 @@ def test_filter_children_of_directory(converter_registry):
dc = DirectoryConverter(
definition={
"match": "(.*)",
"debug_match": True,
"filter": {
"expr": "test_(?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}).json",
"group": "date",
......@@ -381,6 +382,14 @@ def test_filter_children_of_directory(converter_registry):
m = dc.match(test_dir)
assert m is not None
# checking debug output
captured = capsys.readouterr()
# the name
assert "examples_filter_children" in captured.out
# the regexp
assert "(.*)" in captured.out
# the empty result set
assert "{}" in captured.out
# This should only contain the youngest json and the csv that doesn't match
# the above filter expression.
......@@ -491,10 +500,11 @@ end""")
assert val["text"] == "\nbla\n"
def test_converter_value_match(converter_registry):
def test_converter_value_match(converter_registry, capsys):
# test with defaults
dc = FloatElementConverter(
definition={
"debug_match": True,
"match_name": "(.*)",
"match_value": "(.*)",
},
......@@ -503,6 +513,14 @@ def test_converter_value_match(converter_registry):
)
m = dc.match(IntegerElement(name="a", value=4))
assert m is not None
# checking debug output
captured = capsys.readouterr()
# the name
assert "a" in captured.out
# the regexp
assert "(.*)" in captured.out
# the empty result set
assert "{}" in captured.out
# overwrite default with no match for int
dc = FloatElementConverter(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment