diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py index 285d4cccb8780b400caf806122c4f4c787db7aeb..63879ba0928f7b58ded36a95b612fdf0419e1960 100644 --- a/src/caoscrawler/converters.py +++ b/src/caoscrawler/converters.py @@ -52,12 +52,16 @@ from .utils import has_parent # by the converters: SPECIAL_PROPERTIES = ("description", "name", "id", "path", "file", "checksum", "size") -SINGLE_VAR_RE = re.compile(r"^\$(\{)?(?P<varname>\w+)(\})?$") +ID_PATTERN = r"\D[.\w]*" +SINGLE_VAR_RE = re.compile(r"^\$(\{)?(?P<varname>" + ID_PATTERN + r")(\})?$") logger = logging.getLogger(__name__) class CrawlerTemplate(Template): - braceidpattern = r"(?a:[_a-z][_\.a-z0-9]*)" + # This also adds a dot to the default pattern. + # See: https://docs.python.org/3/library/string.html#template-strings + # Default flags is re.IGNORECASE + braceidpattern = ID_PATTERN def _only_max(children_with_keys): diff --git a/unittests/test_issues.py b/unittests/test_issues.py index 5e7685f5c2d1e08829d28afcc612a5c07eb585aa..1678280555e739bae55819fa7fe42a53c938c4e5 100644 --- a/unittests/test_issues.py +++ b/unittests/test_issues.py @@ -22,7 +22,7 @@ from pytest import mark -from caoscrawler.converters import replace_variables +from caoscrawler.converters import replace_variables, CrawlerTemplate from caoscrawler.crawl import Crawler from caoscrawler.structure_elements import DictElement from caoscrawler.stores import GeneralStore @@ -115,11 +115,35 @@ def test_issue_93(): cfood.yaml does not allow umlaut in $expression""" values = GeneralStore() expressions = [ - "1", + "foo", + "foo.bär", + "_1", "Ä", "ųøîµ", ] for exp in expressions: values[exp] = f"This is {exp}" + # ## Test preliminary check + # With braces + for exp in expressions: + assert replace_variables(f"${{{exp}}}", values) == f"This is {exp}" + # Without braces for exp in expressions: assert replace_variables(f"${exp}", values) == f"This is {exp}" + + # ## Test actual replacement + for exp in expressions: + # as-is + propvalue = f"${{{exp}}}" + propvalue_template = CrawlerTemplate(propvalue) + # from IPython import embed + # embed() + + assert propvalue_template.safe_substitute(**values.get_storage()) == f"This is {exp}" + + # String embedded into context + propvalue = f"some text before >> ${{{exp}}} << some text after" + print(propvalue) + propvalue_template = CrawlerTemplate(propvalue) + assert (propvalue_template.safe_substitute(**values.get_storage()) + == f"some text before >> This is {exp} << some text after")