diff --git a/integrationtests/basic_example/test_basic.py b/integrationtests/basic_example/test_basic.py index a3195d4cda683b37da2ef58d8bb3e7b7c18de390..4b8ec8f7204c4e7ab71edf1590b1d6af52816627 100755 --- a/integrationtests/basic_example/test_basic.py +++ b/integrationtests/basic_example/test_basic.py @@ -115,13 +115,14 @@ def test_single_insertion(clear_database, usemodel, crawler, ident): if res[i].parents[0].name == "PyTestInfo": del res[i] filename = rfp("..", "..", "unittests", "records.xml") - with open(filename, "w") as f: - xml = res.to_xml() - # Remove noscript and transaction benchmark: - for tag in ("noscript", "TransactionBenchmark"): - if xml.find(tag) is not None: - xml.remove(xml.find(tag)) - f.write(db.common.utils.xml2str(xml)) + # TODO can we remove this? why should we overwrite the current xml + # with open(filename, "w") as f: + # xml = res.to_xml() + # # Remove noscript and transaction benchmark: + # for tag in ("noscript", "TransactionBenchmark"): + # if xml.find(tag) is not None: + # xml.remove(xml.find(tag)) + # f.write(db.common.utils.xml2str(xml)) assert len(ins) == 18 assert len(ups) == 0 diff --git a/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml b/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml index 7deebf6e9dc861f1debc2b266299d78965e3a733..69cb53d4ffb86a3353fbccc2cae3dc3fbea25009 100644 --- a/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml +++ b/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml @@ -211,7 +211,7 @@ Data: subtree: related_to_doi_element: type: TextElement - match: "(?P<related_to_doi>).*" + match_value: "(?P<related_to_doi>).*" records: Dataset: related_to_dois: +$related_to_doi diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py index 88624c740939f01a9fdd86277b5447d693038bf4..d80bf44533a58aa52bb5b8ec3a3fbd1b9ffc34ca 100644 --- a/src/caoscrawler/converters.py +++ b/src/caoscrawler/converters.py @@ -766,6 +766,15 @@ class TextElementConverter(_AbstractScalarValueElementConverter): "accept_float": True, } + def __init__(self, definition, *args, **kwargs): + if "match" in definition: + raise ValueError(""" +The 'match' key will in future be used to match a potential name of a TextElement. Please use +the 'match_value' key to match the value of the TextElement and 'match_name' for matching the name. +""") + + super().__init__(definition, *args, **kwargs) + class DictTextElementConverter(TextElementConverter): def __init__(self, *args, **kwargs): diff --git a/unittests/scifolder_cfood.yml b/unittests/scifolder_cfood.yml index 90f193444bfda7296c46260236274da2378635cc..74fd027563907c5ae416ca389faba0ecd64d5848 100644 --- a/unittests/scifolder_cfood.yml +++ b/unittests/scifolder_cfood.yml @@ -42,14 +42,14 @@ Data: # name of the converter # how to make match case insensitive? subtree: description: - type: DictTextElement + type: TextElement match_value: (?P<description>.*) match_name: description records: Measurement: description: $description responsible_single: - type: DictTextElement + type: TextElement match_name: responsible match_value: &person_regexp ((?P<first_name>.+) )?(?P<last_name>.+) records: &responsible_records @@ -68,7 +68,7 @@ Data: # name of the converter subtree: Person: type: TextElement - match: *person_regexp + match_value: *person_regexp records: *responsible_records ExperimentalData: # name of the converter diff --git a/unittests/scifolder_extended.yml b/unittests/scifolder_extended.yml index 9bab612b9b37e8e295ee8fd02575de506a98d8fc..26f510679ff723ce5d9c0e705609e39bce60cbde 100644 --- a/unittests/scifolder_extended.yml +++ b/unittests/scifolder_extended.yml @@ -55,14 +55,14 @@ Data: # name of the converter subtree: description: - type: DictTextElement + type: TextElement match_value: (?P<description>.*) match_name: description records: Measurement: description: $description responsible_single: - type: DictTextElement + type: TextElement match_name: responsible match_value: &person_regexp ((?P<first_name>.+) )?(?P<last_name>.+) records: &responsible_records @@ -76,12 +76,12 @@ Data: # name of the converter # "responsible" belonging to Measurement. responsible_list: - type: DictListElement + type: ListElement match_name: responsible subtree: Person: type: TextElement - match: *person_regexp + match_value: *person_regexp records: *responsible_records # sources_list: diff --git a/unittests/scifolder_extended2.yml b/unittests/scifolder_extended2.yml index 969325e91da488011819c338708a33dcfc32c93e..a189e79c12c2e1393188c8b9f532162518244508 100644 --- a/unittests/scifolder_extended2.yml +++ b/unittests/scifolder_extended2.yml @@ -56,14 +56,14 @@ Data: # name of the converter subtree: description: - type: DictTextElement + type: TextElement match_value: (?P<description>.*) match_name: description records: Measurement: description: $description responsible_single: - type: DictTextElement + type: TextElement match_name: responsible match_value: &person_regexp ((?P<first_name>.+) )?(?P<last_name>.+) records: &responsible_records @@ -77,12 +77,12 @@ Data: # name of the converter # "responsible" belonging to Measurement. responsible_list: - type: DictListElement + type: ListElement match_name: responsible subtree: Person: type: TextElement - match: *person_regexp + match_value: *person_regexp records: *responsible_records # sources_list: