From e7b6581761f7a3d113a34b43e8f67604c8ed0dd5 Mon Sep 17 00:00:00 2001
From: Florian Spreckelsen <f.spreckelsen@indiscale.com>
Date: Fri, 23 Aug 2024 10:50:22 +0200
Subject: [PATCH] TST: Add schema validation tests

---
 src/caoscrawler/cfood-schema.yml              | 53 +++++++++++++++++--
 .../broken_cfoods/broken_record_from_dict.yml |  7 +++
 .../broken_record_from_dict_2.yml             | 11 ++++
 unittests/record_from_dict_cfood.yml          | 12 +++++
 unittests/test_schema.py                      |  7 +++
 5 files changed, 86 insertions(+), 4 deletions(-)
 create mode 100644 unittests/broken_cfoods/broken_record_from_dict.yml
 create mode 100644 unittests/broken_cfoods/broken_record_from_dict_2.yml
 create mode 100644 unittests/record_from_dict_cfood.yml

diff --git a/src/caoscrawler/cfood-schema.yml b/src/caoscrawler/cfood-schema.yml
index 340e5b9d..aa3f87c0 100644
--- a/src/caoscrawler/cfood-schema.yml
+++ b/src/caoscrawler/cfood-schema.yml
@@ -4,6 +4,11 @@ cfood:
     $ref:
       "#/$defs/converter"
   $defs:
+    parents:
+      description: Parents for this record are given here as a list of names.
+      type: array
+      items:
+        type: string
     converter:
       properties:
         type:
@@ -38,6 +43,7 @@ cfood:
           - H5Dataset
           - H5Group
           - H5Ndarray
+          - PropertiesFromDictElement
           description: Type of this converter node.
         match:
           description: typically a regexp which is matched to a structure element name
@@ -48,15 +54,42 @@ cfood:
         match_value:
           description: a regexp that is matched to the value of a key-value pair
           type: string
-        records:
-          description: This field is used to define new records or to modify records which have been defined on a higher level.
+        record_from_dict:
+          description: Only relevant for PropertiesFromDictElement.  Specify the root record which is generated from the contained dictionary.
           type: object
+          required:
+            - variable_name
           properties:
-            parents:
-              description: Parents for this record are given here as a list of names.
+            variable_name:
+              description: Name of the record by which it can be accessed in the cfood definiton. Can also be the name of an existing record in which case that record will be treted by the PropertiesFromDictConverter.
+              type: string
+            properties_blacklist:
+              description: List of keys to be ignored in the automatic treatment.  They will be ignored on all levels of the dictionary.
               type: array
               items:
                 type: string
+            references:
+              description: List of keys that will be transformed into named reference properties.
+              type: object
+              additionalProperties:
+                type: object
+                properties:
+                  parents:
+                    $ref:
+                      "#/$defs/parents"
+            name:
+              description: Nmae of this record.  If none is given, variable_name is used.
+              type: string
+            parents:
+              $ref:
+                "#/$defs/parents"
+        records:
+          description: This field is used to define new records or to modify records which have been defined on a higher level.
+          type: object
+          properties:
+            parents:
+              $ref:
+                "#/$defs/parents"
             additionalProperties:
               oneOf:
               - type: object
@@ -78,3 +111,15 @@ cfood:
           additionalProperties:
             $ref:
               "#/$defs/converter"
+      if:
+        properties:
+          type:
+            const:
+              PropertiesFromDictElement
+      then:
+        required:
+          - type
+          - record_from_dict
+      else:
+        required:
+          - type
diff --git a/unittests/broken_cfoods/broken_record_from_dict.yml b/unittests/broken_cfoods/broken_record_from_dict.yml
new file mode 100644
index 00000000..fd8ffdbd
--- /dev/null
+++ b/unittests/broken_cfoods/broken_record_from_dict.yml
@@ -0,0 +1,7 @@
+RecordFromDictElement:
+  type: PropertiesFromDictElement
+  match: "(.*)"
+  subtree:
+    AnotherElement:
+      type: Text
+      match_name: "(.*)"
diff --git a/unittests/broken_cfoods/broken_record_from_dict_2.yml b/unittests/broken_cfoods/broken_record_from_dict_2.yml
new file mode 100644
index 00000000..ca321373
--- /dev/null
+++ b/unittests/broken_cfoods/broken_record_from_dict_2.yml
@@ -0,0 +1,11 @@
+RecordFromDictElement:
+  type: PropertiesFromDictElement
+  record_from_dict:
+    parents:
+      - MyType1
+      - MyType2
+  match: "(.*)"
+  subtree:
+    AnotherElement:
+      type: Text
+      match_name: "(.*)"
diff --git a/unittests/record_from_dict_cfood.yml b/unittests/record_from_dict_cfood.yml
new file mode 100644
index 00000000..1ea2159d
--- /dev/null
+++ b/unittests/record_from_dict_cfood.yml
@@ -0,0 +1,12 @@
+PropertiesFromDictElement:
+  type: PropertiesFromDictElement
+  match: ".*"
+  record_from_dict:
+    variable_name: MyRec
+    parents:
+      - MyType1
+      - MyType2
+    references:
+      author:
+        parents:
+          - Person
diff --git a/unittests/test_schema.py b/unittests/test_schema.py
index 3b576c9b..ea8549b0 100644
--- a/unittests/test_schema.py
+++ b/unittests/test_schema.py
@@ -27,6 +27,13 @@ def rfp(*pathcomponents):
 def test_schema_validation():
     load_definition(rfp("scifolder_cfood.yml"))
     load_definition(rfp("scifolder_extended.yml"))
+    load_definition(rfp("record_from_dict_cfood.yml"))
 
     with raises(ValidationError, match=".*enum.*"):
         load_definition(rfp("broken_cfoods", "broken1.yml"))
+
+    with raises(ValidationError, match=".*required.*"):
+        load_definition(rfp("broken_cfoods", "broken_record_from_dict.yml"))
+
+    with raises(ValidationError, match=".*required.*"):
+        load_definition(rfp("broken_cfoods", "broken_record_from_dict_2.yml"))
-- 
GitLab