diff --git a/CHANGELOG.md b/CHANGELOG.md index 8cfed2795f8f91bde5053839b68871cdfbf47eb9..7aade2344e6ac0ed9e66a12ec6c4e3e001ab0905 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed ### +- `_AbstractDictElementConverter` uses `re.DOTALL` for `match_value` + ### Deprecated ### ### Removed ### diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py index 027944149914fa25b815b96725ca8b62d6bc5e20..f5c923f21d5eb762bbad5a5524bf4cc9c88cfb5c 100644 --- a/src/caoscrawler/converters.py +++ b/src/caoscrawler/converters.py @@ -638,7 +638,7 @@ class _AbstractDictElementConverter(Converter): m1 = re.match(self.definition["match_name"], element.name) if m1 is None: return None - m2 = re.match(self.definition["match_value"], str(element.value)) + m2 = re.match(self.definition["match_value"], str(element.value), re.DOTALL) if m2 is None: return None values = dict() diff --git a/unittests/test_converters.py b/unittests/test_converters.py index 30c5972c4f006aaf9923dfc058c3b861d8b5123b..802483c1d1eaf7f63096591ac7df7362f25f599a 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -32,6 +32,7 @@ from caoscrawler.converters import (Converter, ConverterValidationError, DictConverter, DirectoryConverter, handle_value, MarkdownFileConverter, JSONFileConverter) +from caoscrawler.converters import _AbstractDictElementConverter from caoscrawler.crawl import Crawler from caoscrawler.stores import GeneralStore from caoscrawler.structure_elements import (File, DictTextElement, @@ -385,3 +386,21 @@ MyElement: list(yaml.safe_load_all(two_doc_yaml))) assert "MyElement" in two_doc_definitions assert two_doc_definitions["MyElement"]["type"] == one_doc_definitions["MyElement"]["type"] + + +def test_abstract_dict_element_converter(): + definition = yaml.safe_load(""" +match_name: text +match_value: .*begin(?P<text>.*)end + """) + converter = _AbstractDictElementConverter( + definition, "test_converter", + None # This is possible when "subtree" is not used + ) + element = DictTextElement("text", """ +begin +bla +end""") + val = converter.match(element) + assert val is not None + assert val["text"] == "\nbla\n" diff --git a/unittests/test_directories/examples_article/SimulationData/2020_climate-model-predict/2020-02-01/README.md b/unittests/test_directories/examples_article/SimulationData/2020_climate-model-predict/2020-02-01/README.md index 0e7726d941d86ca357149f28ef98311b33808f11..0c91d6b5f7601334b84a77328b888d227e779a93 100644 --- a/unittests/test_directories/examples_article/SimulationData/2020_climate-model-predict/2020-02-01/README.md +++ b/unittests/test_directories/examples_article/SimulationData/2020_climate-model-predict/2020-02-01/README.md @@ -1,6 +1,6 @@ --- responsible: AuthorE -description: > +description: >- Code for fitting the predictive model to the training data and for predicting the average annual temperature for all measurement stations