diff --git a/CHANGELOG.md b/CHANGELOG.md index 40c9d73f487a6b57c093a83939d24da5fda48a9a..9358c172f922f292d01d8c485514123c85e2a019 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed ### +- `_AbstractDictElementConverter` uses `re.DOTALL` for `match_value` + ### Deprecated ### ### Removed ### diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py index 027944149914fa25b815b96725ca8b62d6bc5e20..f5c923f21d5eb762bbad5a5524bf4cc9c88cfb5c 100644 --- a/src/caoscrawler/converters.py +++ b/src/caoscrawler/converters.py @@ -638,7 +638,7 @@ class _AbstractDictElementConverter(Converter): m1 = re.match(self.definition["match_name"], element.name) if m1 is None: return None - m2 = re.match(self.definition["match_value"], str(element.value)) + m2 = re.match(self.definition["match_value"], str(element.value), re.DOTALL) if m2 is None: return None values = dict() diff --git a/src/doc/macros.rst b/src/doc/macros.rst index 569b8474c98ff8f5f5a4f2eeface10ffc1b7a849..d3a3e9b9634a4e1d72228dd46692a824e1d5acfd 100644 --- a/src/doc/macros.rst +++ b/src/doc/macros.rst @@ -56,15 +56,43 @@ The same version using cfood macros could be defined as follows: - name: README filename: ^README.md$ - The "MarkdownFile" key and its value will be replaced by everything that is given below "definition" in the Macro. + +The expanded version of `ExperimentalData` will look like: + +.. _example_files_2_expanded: +.. code-block:: yaml + ExperimentalData: + match: ExperimentalData + subtree: + README_filename: + match: ^README.md$ + records: + README: + file: README_filename + parents: + - MarkdownFile + path: README_filename + role: File + type: SimpleFile + type: Directory + +This :ref:`example<_example_files_2>` can also be found in the macro unit tests (see :func:`unittests.test_macros.test_documentation_example_2`). Complex Example =============== +The following, more complex example, demonstrates the use +of macro variable substitutions that generate crawler variable substitutions: + +- `$$$nodename` will lead to a macro variable substitution of variable `$nodename` during macro expansion. +- `$$` will be turned into `$` +- So in the crawler cfood, the string will appear as `$value` if variable `nodename` would be set to `value` when using the macro. + + .. _example_1: .. code-block:: yaml @@ -88,3 +116,118 @@ Complex Example file: $$$nodename Simulation: $recordtype: +$File + +The expanded version of :ref:`example<_example_1>` can be seen in :ref:`example<_example_1_expanded>`. + + +.. _example_1_expanded: +.. code-block:: yaml + + SimulationData: + match: SimulationData + subtree: + Dataset: + match: .* + records: + File: + file: $Dataset + parents: + - DatasetFile + path: $Dataset + role: File + Simulation: + DatasetFile: +$File + type: SimpleFile + type: Directory + +This :ref:`example<_example_1>` can also be found in the macro unit tests (see :func:`unittests.test_macros.test_documentation_example_1`). + + + +Using Macros Multiple Times +=========================== + +To use the same macro multiple times in the same yaml node, lists can be used: + +.. _example_multiple: +.. code-block:: yaml + + --- + metadata: + macros: + - !defmacro + name: test_twice + params: + macro_name: default_name + a: 4 + definition: + $macro_name: + something: + a: $a + --- + extroot: !macro + test_twice: + - macro_name: once # <- This is the first replacement of the macro + - macro_name: twice # <- This is the second one, with different arguments + a: 5 + - {} # <- This is the third one, just using default arguments + + +This :ref:`example<_example_multiple>` is taken from the macro unit tests (see :func:`unittests.test_macros.test_use_macro_twice`). + +The example will be expanded to: + +.. _example_multiple_expanded: +.. code-block:: yaml + + extroot: + default_name: + something: + a: '4' + once: + something: + a: '4' + twice: + something: + a: '5' + + + + +Limitation +---------- + +Currently it is not possible to use the same macro twice in the same yaml node, but in different +positions. Consider: + +.. _example_multiple_limitation: +.. code-block:: yaml + + --- + metadata: + macros: + - !defmacro + name: test_twice + params: + macro_name: default_name + a: 4 + definition: + $macro_name: + something: + a: $a + --- + extroot: !macro + test_twice: + - macro_name: once # <- This is the first replacement of the macro + + Other_node: + type: test + + test_twice: # This is NOT possible as each + # dictionary element can only appear once in a yaml node. + - macro_name: twice # <- This is the second one, with different arguments + a: 5 + - {} # <- This is the third one, just using default arguments + +However, this should not be a real limitation, as the crawler is designed in a way, +that the order of the nodes in the same level should not matter. diff --git a/unittests/test_converters.py b/unittests/test_converters.py index 30c5972c4f006aaf9923dfc058c3b861d8b5123b..802483c1d1eaf7f63096591ac7df7362f25f599a 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -32,6 +32,7 @@ from caoscrawler.converters import (Converter, ConverterValidationError, DictConverter, DirectoryConverter, handle_value, MarkdownFileConverter, JSONFileConverter) +from caoscrawler.converters import _AbstractDictElementConverter from caoscrawler.crawl import Crawler from caoscrawler.stores import GeneralStore from caoscrawler.structure_elements import (File, DictTextElement, @@ -385,3 +386,21 @@ MyElement: list(yaml.safe_load_all(two_doc_yaml))) assert "MyElement" in two_doc_definitions assert two_doc_definitions["MyElement"]["type"] == one_doc_definitions["MyElement"]["type"] + + +def test_abstract_dict_element_converter(): + definition = yaml.safe_load(""" +match_name: text +match_value: .*begin(?P<text>.*)end + """) + converter = _AbstractDictElementConverter( + definition, "test_converter", + None # This is possible when "subtree" is not used + ) + element = DictTextElement("text", """ +begin +bla +end""") + val = converter.match(element) + assert val is not None + assert val["text"] == "\nbla\n" diff --git a/unittests/test_directories/examples_article/SimulationData/2020_climate-model-predict/2020-02-01/README.md b/unittests/test_directories/examples_article/SimulationData/2020_climate-model-predict/2020-02-01/README.md index 0e7726d941d86ca357149f28ef98311b33808f11..0c91d6b5f7601334b84a77328b888d227e779a93 100644 --- a/unittests/test_directories/examples_article/SimulationData/2020_climate-model-predict/2020-02-01/README.md +++ b/unittests/test_directories/examples_article/SimulationData/2020_climate-model-predict/2020-02-01/README.md @@ -1,6 +1,6 @@ --- responsible: AuthorE -description: > +description: >- Code for fitting the predictive model to the training data and for predicting the average annual temperature for all measurement stations diff --git a/unittests/test_macros.py b/unittests/test_macros.py index 7ac34cc7c48df3cb2855d7022119e4775d90c9a6..2934d7902a2f7be6925491f73412b0350265145d 100644 --- a/unittests/test_macros.py +++ b/unittests/test_macros.py @@ -295,7 +295,6 @@ extroot3: !macro assert cfood["extroot3"]["test_four"] is None -# @pytest.mark.xfail(reason="Fix multiple usage of the same macro.") def test_use_macro_twice(): """Test that the same macro can be used twice with different parameters in the same CFood element if the name depends on the parameters. @@ -328,3 +327,85 @@ extroot: !macro assert cfood["extroot"]["once"]["something"]["a"] == "4" assert cfood["extroot"]["twice"]["something"]["a"] == "5" assert cfood["extroot"]["default_name"]["something"]["a"] == "4" + + # Code sample to generate the expanded macro: + # with open("expanded_test_macro.yaml", "w") as f: + # f.write(yaml.dump(cfood)) + + +def test_documentation_example_2(): + + cfood = _temp_file_load(""" +--- +metadata: + macros: + - !defmacro + name: MarkdownFile + params: + name: null + filename: null + definition: + ${name}_filename: + type: SimpleFile + match: $filename + records: + $name: + parents: + - MarkdownFile + role: File + path: ${name}_filename + file: ${name}_filename +--- +ExperimentalData: + type: Directory + match: ExperimentalData + subtree: !macro + MarkdownFile: + - name: README + filename: ^README.md$ + """) + + # Code sample to generate the expanded macro: + # with open("expanded_test_macro.yaml", "w") as f: + # f.write(yaml.dump(cfood)) + + +def test_documentation_example_1(): + + cfood = _temp_file_load(""" +--- +metadata: + macros: + - !defmacro + name: SimulationDatasetFile + params: + match: null + recordtype: null + nodename: null + definition: + $nodename: + match: $match + type: SimpleFile + records: + File: + parents: + - $recordtype + role: File + path: $$$nodename + file: $$$nodename + Simulation: + $recordtype: +$File +--- +SimulationData: + type: Directory + match: SimulationData + subtree: !macro + SimulationDatasetFile: + - match: .* + recordtype: DatasetFile + nodename: Dataset + """) + + # Code sample to generate the expanded macro: + # with open("expanded_test_macro.yaml", "w") as f: + # f.write(yaml.dump(cfood))