From d2d35f90d9052ec1b3d6c5884d1734c6c176bf6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Sat, 12 Nov 2022 19:20:12 +0100 Subject: [PATCH] FIX: several fixes --- integrationtests/basic_example/test_basic.py | 15 ++++++++------- .../extroot/realworld_example/dataset_cfoods.yml | 2 +- src/caoscrawler/converters.py | 9 +++++++++ unittests/scifolder_cfood.yml | 6 +++--- unittests/scifolder_extended.yml | 8 ++++---- unittests/scifolder_extended2.yml | 8 ++++---- 6 files changed, 29 insertions(+), 19 deletions(-) diff --git a/integrationtests/basic_example/test_basic.py b/integrationtests/basic_example/test_basic.py index a3195d4c..4b8ec8f7 100755 --- a/integrationtests/basic_example/test_basic.py +++ b/integrationtests/basic_example/test_basic.py @@ -115,13 +115,14 @@ def test_single_insertion(clear_database, usemodel, crawler, ident): if res[i].parents[0].name == "PyTestInfo": del res[i] filename = rfp("..", "..", "unittests", "records.xml") - with open(filename, "w") as f: - xml = res.to_xml() - # Remove noscript and transaction benchmark: - for tag in ("noscript", "TransactionBenchmark"): - if xml.find(tag) is not None: - xml.remove(xml.find(tag)) - f.write(db.common.utils.xml2str(xml)) + # TODO can we remove this? why should we overwrite the current xml + # with open(filename, "w") as f: + # xml = res.to_xml() + # # Remove noscript and transaction benchmark: + # for tag in ("noscript", "TransactionBenchmark"): + # if xml.find(tag) is not None: + # xml.remove(xml.find(tag)) + # f.write(db.common.utils.xml2str(xml)) assert len(ins) == 18 assert len(ups) == 0 diff --git a/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml b/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml index 7deebf6e..69cb53d4 100644 --- a/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml +++ b/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml @@ -211,7 +211,7 @@ Data: subtree: related_to_doi_element: type: TextElement - match: "(?P<related_to_doi>).*" + match_value: "(?P<related_to_doi>).*" records: Dataset: related_to_dois: +$related_to_doi diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py index 88624c74..d80bf445 100644 --- a/src/caoscrawler/converters.py +++ b/src/caoscrawler/converters.py @@ -766,6 +766,15 @@ class TextElementConverter(_AbstractScalarValueElementConverter): "accept_float": True, } + def __init__(self, definition, *args, **kwargs): + if "match" in definition: + raise ValueError(""" +The 'match' key will in future be used to match a potential name of a TextElement. Please use +the 'match_value' key to match the value of the TextElement and 'match_name' for matching the name. +""") + + super().__init__(definition, *args, **kwargs) + class DictTextElementConverter(TextElementConverter): def __init__(self, *args, **kwargs): diff --git a/unittests/scifolder_cfood.yml b/unittests/scifolder_cfood.yml index 90f19344..74fd0275 100644 --- a/unittests/scifolder_cfood.yml +++ b/unittests/scifolder_cfood.yml @@ -42,14 +42,14 @@ Data: # name of the converter # how to make match case insensitive? subtree: description: - type: DictTextElement + type: TextElement match_value: (?P<description>.*) match_name: description records: Measurement: description: $description responsible_single: - type: DictTextElement + type: TextElement match_name: responsible match_value: &person_regexp ((?P<first_name>.+) )?(?P<last_name>.+) records: &responsible_records @@ -68,7 +68,7 @@ Data: # name of the converter subtree: Person: type: TextElement - match: *person_regexp + match_value: *person_regexp records: *responsible_records ExperimentalData: # name of the converter diff --git a/unittests/scifolder_extended.yml b/unittests/scifolder_extended.yml index 9bab612b..26f51067 100644 --- a/unittests/scifolder_extended.yml +++ b/unittests/scifolder_extended.yml @@ -55,14 +55,14 @@ Data: # name of the converter subtree: description: - type: DictTextElement + type: TextElement match_value: (?P<description>.*) match_name: description records: Measurement: description: $description responsible_single: - type: DictTextElement + type: TextElement match_name: responsible match_value: &person_regexp ((?P<first_name>.+) )?(?P<last_name>.+) records: &responsible_records @@ -76,12 +76,12 @@ Data: # name of the converter # "responsible" belonging to Measurement. responsible_list: - type: DictListElement + type: ListElement match_name: responsible subtree: Person: type: TextElement - match: *person_regexp + match_value: *person_regexp records: *responsible_records # sources_list: diff --git a/unittests/scifolder_extended2.yml b/unittests/scifolder_extended2.yml index 969325e9..a189e79c 100644 --- a/unittests/scifolder_extended2.yml +++ b/unittests/scifolder_extended2.yml @@ -56,14 +56,14 @@ Data: # name of the converter subtree: description: - type: DictTextElement + type: TextElement match_value: (?P<description>.*) match_name: description records: Measurement: description: $description responsible_single: - type: DictTextElement + type: TextElement match_name: responsible match_value: &person_regexp ((?P<first_name>.+) )?(?P<last_name>.+) records: &responsible_records @@ -77,12 +77,12 @@ Data: # name of the converter # "responsible" belonging to Measurement. responsible_list: - type: DictListElement + type: ListElement match_name: responsible subtree: Person: type: TextElement - match: *person_regexp + match_value: *person_regexp records: *responsible_records # sources_list: -- GitLab