From d2d35f90d9052ec1b3d6c5884d1734c6c176bf6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Sat, 12 Nov 2022 19:20:12 +0100
Subject: [PATCH] FIX: several fixes

---
 integrationtests/basic_example/test_basic.py      | 15 ++++++++-------
 .../extroot/realworld_example/dataset_cfoods.yml  |  2 +-
 src/caoscrawler/converters.py                     |  9 +++++++++
 unittests/scifolder_cfood.yml                     |  6 +++---
 unittests/scifolder_extended.yml                  |  8 ++++----
 unittests/scifolder_extended2.yml                 |  8 ++++----
 6 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/integrationtests/basic_example/test_basic.py b/integrationtests/basic_example/test_basic.py
index a3195d4c..4b8ec8f7 100755
--- a/integrationtests/basic_example/test_basic.py
+++ b/integrationtests/basic_example/test_basic.py
@@ -115,13 +115,14 @@ def test_single_insertion(clear_database, usemodel, crawler, ident):
         if res[i].parents[0].name == "PyTestInfo":
             del res[i]
     filename = rfp("..", "..", "unittests", "records.xml")
-    with open(filename, "w") as f:
-        xml = res.to_xml()
-        # Remove noscript and transaction benchmark:
-        for tag in ("noscript", "TransactionBenchmark"):
-            if xml.find(tag) is not None:
-                xml.remove(xml.find(tag))
-        f.write(db.common.utils.xml2str(xml))
+    # TODO can we remove this? why should we overwrite the current xml
+    # with open(filename, "w") as f:
+    #    xml = res.to_xml()
+    #    # Remove noscript and transaction benchmark:
+    #    for tag in ("noscript", "TransactionBenchmark"):
+    #        if xml.find(tag) is not None:
+    #            xml.remove(xml.find(tag))
+    #    f.write(db.common.utils.xml2str(xml))
 
     assert len(ins) == 18
     assert len(ups) == 0
diff --git a/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml b/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml
index 7deebf6e..69cb53d4 100644
--- a/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml
+++ b/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml
@@ -211,7 +211,7 @@ Data:
                       subtree:
                         related_to_doi_element:
                           type: TextElement
-                          match: "(?P<related_to_doi>).*"
+                          match_value: "(?P<related_to_doi>).*"
                           records:
                             Dataset:
                               related_to_dois: +$related_to_doi
diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py
index 88624c74..d80bf445 100644
--- a/src/caoscrawler/converters.py
+++ b/src/caoscrawler/converters.py
@@ -766,6 +766,15 @@ class TextElementConverter(_AbstractScalarValueElementConverter):
         "accept_float": True,
     }
 
+    def __init__(self, definition, *args, **kwargs):
+        if "match" in definition:
+            raise ValueError("""
+The 'match' key will in future be used to match a potential name of a TextElement. Please use
+the 'match_value' key to match the value of the TextElement and 'match_name' for matching the name.
+""")
+
+        super().__init__(definition, *args, **kwargs)
+
 
 class DictTextElementConverter(TextElementConverter):
     def __init__(self, *args, **kwargs):
diff --git a/unittests/scifolder_cfood.yml b/unittests/scifolder_cfood.yml
index 90f19344..74fd0275 100644
--- a/unittests/scifolder_cfood.yml
+++ b/unittests/scifolder_cfood.yml
@@ -42,14 +42,14 @@ Data:  # name of the converter
                   # how to make match case insensitive?
                   subtree:
                     description:
-                      type: DictTextElement
+                      type: TextElement
                       match_value: (?P<description>.*)
                       match_name: description
                       records:
                         Measurement:
                           description: $description
                     responsible_single:
-                        type: DictTextElement
+                        type: TextElement
                         match_name: responsible
                         match_value: &person_regexp ((?P<first_name>.+) )?(?P<last_name>.+)
                         records: &responsible_records
@@ -68,7 +68,7 @@ Data:  # name of the converter
                       subtree:
                         Person:
                           type: TextElement
-                          match: *person_regexp
+                          match_value: *person_regexp
                           records: *responsible_records
 
     ExperimentalData:  # name of the converter
diff --git a/unittests/scifolder_extended.yml b/unittests/scifolder_extended.yml
index 9bab612b..26f51067 100644
--- a/unittests/scifolder_extended.yml
+++ b/unittests/scifolder_extended.yml
@@ -55,14 +55,14 @@ Data:  # name of the converter
                           
                   subtree:
                     description:
-                      type: DictTextElement
+                      type: TextElement
                       match_value: (?P<description>.*)
                       match_name: description
                       records:
                         Measurement:
                           description: $description
                     responsible_single:
-                        type: DictTextElement
+                        type: TextElement
                         match_name: responsible
                         match_value: &person_regexp ((?P<first_name>.+) )?(?P<last_name>.+)
                         records: &responsible_records
@@ -76,12 +76,12 @@ Data:  # name of the converter
                                                     # "responsible" belonging to Measurement.
 
                     responsible_list:
-                      type: DictListElement
+                      type: ListElement
                       match_name: responsible
                       subtree:
                         Person:
                           type: TextElement
-                          match: *person_regexp
+                          match_value: *person_regexp
                           records: *responsible_records
 
                     # sources_list:
diff --git a/unittests/scifolder_extended2.yml b/unittests/scifolder_extended2.yml
index 969325e9..a189e79c 100644
--- a/unittests/scifolder_extended2.yml
+++ b/unittests/scifolder_extended2.yml
@@ -56,14 +56,14 @@ Data:  # name of the converter
                           
                   subtree:
                     description:
-                      type: DictTextElement
+                      type: TextElement
                       match_value: (?P<description>.*)
                       match_name: description
                       records:
                         Measurement:
                           description: $description
                     responsible_single:
-                        type: DictTextElement
+                        type: TextElement
                         match_name: responsible
                         match_value: &person_regexp ((?P<first_name>.+) )?(?P<last_name>.+)
                         records: &responsible_records
@@ -77,12 +77,12 @@ Data:  # name of the converter
                                                     # "responsible" belonging to Measurement.
 
                     responsible_list:
-                      type: DictListElement
+                      type: ListElement
                       match_name: responsible
                       subtree:
                         Person:
                           type: TextElement
-                          match: *person_regexp
+                          match_value: *person_regexp
                           records: *responsible_records
 
                     # sources_list:
-- 
GitLab