From b3f144696cdd5f21e6f53aa2571a04a55029b614 Mon Sep 17 00:00:00 2001 From: Daniel <d.hornung@indiscale.com> Date: Wed, 29 May 2024 12:02:00 +0200 Subject: [PATCH] FIX: Better ID pattern, refactored a bit, added tests. --- src/caoscrawler/converters.py | 8 ++++---- unittests/test_issues.py | 33 ++++++++++++++++++++------------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py index cce5146e..1a7f4d56 100644 --- a/src/caoscrawler/converters.py +++ b/src/caoscrawler/converters.py @@ -53,16 +53,16 @@ from .utils import has_parent # by the converters: SPECIAL_PROPERTIES = ("description", "name", "id", "path", "file", "checksum", "size") -# This RE is used to check whether a value is just a single variable: -SINGLE_VAR_RE = re.compile(r"^\$(\{)?(?P<varname>\w+)(\})?$") +ID_PATTERN = r"\D[.\w]*" +SINGLE_VAR_RE = re.compile(r"^\$(\{)?(?P<varname>" + ID_PATTERN + r")(\})?$") logger = logging.getLogger(__name__) class CrawlerTemplate(Template): - # This adds a dot to the default pattern. + # This also adds a dot to the default pattern. # See: https://docs.python.org/3/library/string.html#template-strings # Default flags is re.IGNORECASE - braceidpattern = r"(?a:[_a-z][_\.a-z0-9]*)" + braceidpattern = ID_PATTERN def _only_max(children_with_keys): diff --git a/unittests/test_issues.py b/unittests/test_issues.py index efc170ed..e6fe06ef 100644 --- a/unittests/test_issues.py +++ b/unittests/test_issues.py @@ -116,12 +116,15 @@ def test_issue_93(): cfood.yaml does not allow umlaut in $expression""" values = GeneralStore() expressions = [ - "1", + "foo", + "foo.bär", + "_1", "Ä", "ųøîµ", ] for exp in expressions: values[exp] = f"This is {exp}" + # ## Test preliminary check # With braces for exp in expressions: assert replace_variables(f"${{{exp}}}", values) == f"This is {exp}" @@ -129,15 +132,19 @@ def test_issue_93(): for exp in expressions: assert replace_variables(f"${exp}", values) == f"This is {exp}" - -def test_crawler_template(): - temp = CrawlerTemplate("$bla") - assert temp.safe_substitute(**{"bla": "test"}) == "test" - - # Umlauts are not replaced, because they are not contained - # in the CrawlerTemplate's braceidpattern: - temp = CrawlerTemplate("$blä") - assert temp.safe_substitute(**{"blä": "test"}) == "$blä" - - temp = CrawlerTemplate("${blä}") - assert temp.safe_substitute(**{"blä": "test"}) == "${blä}" + # ## Test actual replacement + for exp in expressions: + # as-is + propvalue = f"${{{exp}}}" + propvalue_template = CrawlerTemplate(propvalue) + # from IPython import embed + # embed() + + assert propvalue_template.safe_substitute(**values.get_storage()) == f"This is {exp}" + + # String embedded into context + propvalue = f"some text before >> ${{{exp}}} << some text after" + print(propvalue) + propvalue_template = CrawlerTemplate(propvalue) + assert (propvalue_template.safe_substitute(**values.get_storage()) + == f"some text before >> This is {exp} << some text after") -- GitLab