diff --git a/CHANGELOG.md b/CHANGELOG.md index 49b2e2c7519894f8922ab7947ad5002979638ce7..d900ae9552fd364a2bbb0604bde450cbf5385ffa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed ### +* [93](https://gitlab.com/linkahead/linkahead-crawler/-/issues/93) cfood.yaml does not allow umlaut in $expression + ### Security ### ### Documentation ### diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py index e0ca0f9bff77ba1ecc63f4d102d6d9869fb11cb0..1a7f4d56da65c2aa3c436435f3734754ea037cc0 100644 --- a/src/caoscrawler/converters.py +++ b/src/caoscrawler/converters.py @@ -53,12 +53,16 @@ from .utils import has_parent # by the converters: SPECIAL_PROPERTIES = ("description", "name", "id", "path", "file", "checksum", "size") -SINGLE_VAR_RE = re.compile(r"^\$(\{)?(?P<varname>[0-9a-zA-Z_]+)(\})?$") +ID_PATTERN = r"\D[.\w]*" +SINGLE_VAR_RE = re.compile(r"^\$(\{)?(?P<varname>" + ID_PATTERN + r")(\})?$") logger = logging.getLogger(__name__) class CrawlerTemplate(Template): - braceidpattern = r"(?a:[_a-z][_\.a-z0-9]*)" + # This also adds a dot to the default pattern. + # See: https://docs.python.org/3/library/string.html#template-strings + # Default flags is re.IGNORECASE + braceidpattern = ID_PATTERN def _only_max(children_with_keys): diff --git a/unittests/test_issues.py b/unittests/test_issues.py index cbbe9cabcfd17daaf07165757351f00dc051eeab..e6fe06efcd055945b9a13576b83f6bf470eaccdd 100644 --- a/unittests/test_issues.py +++ b/unittests/test_issues.py @@ -22,13 +22,11 @@ from pytest import mark -import caosdb as db - +from caoscrawler.converters import replace_variables, CrawlerTemplate from caoscrawler.crawl import Crawler -from caoscrawler.identifiable import Identifiable -from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter -from caoscrawler.structure_elements import DictElement +from caoscrawler.structure_elements import DictElement +from caoscrawler.stores import GeneralStore from caoscrawler.scanner import create_converter_registry, scan_structure_elements @@ -110,3 +108,43 @@ def test_list_datatypes(): assert isinstance(records[0].get_property("Subject").value, list) assert records[0].get_property("Subject").datatype is not None assert records[0].get_property("Subject").datatype.startswith("LIST") + + +def test_issue_93(): + """https://gitlab.com/linkahead/linkahead-crawler/-/issues/93 + + cfood.yaml does not allow umlaut in $expression""" + values = GeneralStore() + expressions = [ + "foo", + "foo.bär", + "_1", + "Ä", + "ųøîµ", + ] + for exp in expressions: + values[exp] = f"This is {exp}" + # ## Test preliminary check + # With braces + for exp in expressions: + assert replace_variables(f"${{{exp}}}", values) == f"This is {exp}" + # Without braces + for exp in expressions: + assert replace_variables(f"${exp}", values) == f"This is {exp}" + + # ## Test actual replacement + for exp in expressions: + # as-is + propvalue = f"${{{exp}}}" + propvalue_template = CrawlerTemplate(propvalue) + # from IPython import embed + # embed() + + assert propvalue_template.safe_substitute(**values.get_storage()) == f"This is {exp}" + + # String embedded into context + propvalue = f"some text before >> ${{{exp}}} << some text after" + print(propvalue) + propvalue_template = CrawlerTemplate(propvalue) + assert (propvalue_template.safe_substitute(**values.get_storage()) + == f"some text before >> This is {exp} << some text after")