From 51569689ad12bcc40261029ecae1be594ce074c0 Mon Sep 17 00:00:00 2001
From: Alexander Schlemmer <a.schlemmer@indiscale.com>
Date: Wed, 4 Dec 2024 13:47:17 +0100
Subject: [PATCH] ENH: new data model for quality check

---
 linkahead-setup/datamodel.yaml                | 45 +------------------
 .../resources/crawler-settings/cfood.yaml     | 16 ++++++-
 .../resources/crawler-settings/datamodel.yaml | 27 ++++++++---
 3 files changed, 36 insertions(+), 52 deletions(-)
 mode change 100644 => 120000 linkahead-setup/datamodel.yaml

diff --git a/linkahead-setup/datamodel.yaml b/linkahead-setup/datamodel.yaml
deleted file mode 100644
index f63e1dc..0000000
--- a/linkahead-setup/datamodel.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
-Dataset:
-  obligatory_properties:
-    Author:
-    Repository:
-    dateModified:
-      datatype: DATETIME
-    dateCreated:
-      datatype: DATETIME
-  recommended_properties:
-    MetaData:
-      datatype: LIST<MetaData>
-    notes:
-      datatype: TEXT
-    rating:
-      datatype: INTEGER
-    voltage:
-      datatype: DOUBLE
-      unit: V
-
-
-MetaData:
-  obligatory_properties:
-    v:
-      datatype: TEXT
-
-Author:
-  obligatory_properties:
-    url:
-      datatype: TEXT
-  recommended_properties:
-    nr:
-      datatype: INTEGER
-
-Repository:
-  obligatory_properties:
-    url:
-
-ELNFile:
-  recommended_properties:
-    QualityReportFile:
-
-QualityReportFile:
-  recommended_properties:
-    ELNFile:
diff --git a/linkahead-setup/datamodel.yaml b/linkahead-setup/datamodel.yaml
new file mode 120000
index 0000000..72b936d
--- /dev/null
+++ b/linkahead-setup/datamodel.yaml
@@ -0,0 +1 @@
+../src/ruqad/resources/crawler-settings/datamodel.yaml
\ No newline at end of file
diff --git a/src/ruqad/resources/crawler-settings/cfood.yaml b/src/ruqad/resources/crawler-settings/cfood.yaml
index cf3bc7d..4e2ac18 100644
--- a/src/ruqad/resources/crawler-settings/cfood.yaml
+++ b/src/ruqad/resources/crawler-settings/cfood.yaml
@@ -30,7 +30,7 @@ DataDir:
           match: ^[0-9]+$
           subtree:
             QualityReportFile:
-              type: SimpleFile
+              type: ZipFileConverter
               match: ^report\.zip$
               transform:
                 elnfilename:
@@ -40,6 +40,20 @@ DataDir:
                   - replace:
                       remove: report.zip
                       insert: export.eln
+              subtree:
+                SummaryFile:
+                  type: JSONFileConverter
+                  match: ^qc_summary.json$
+                  subtree:
+                    CheckCounts:
+                      type: Dict
+                      match_properties:
+                        num_total_checks: ^(?P<num_total>[0-9]+)$
+                        num_passing_checks: ^(?P<num_passing>[0-9]+)$
+                      records:
+                        QualityReportFileElement:
+                          numTotalChecks: $num_total
+                          numPassingChecks: $num_passing
               records:
                 ELNFileElement:
                   parents:
diff --git a/src/ruqad/resources/crawler-settings/datamodel.yaml b/src/ruqad/resources/crawler-settings/datamodel.yaml
index f101a76..72b7b03 100644
--- a/src/ruqad/resources/crawler-settings/datamodel.yaml
+++ b/src/ruqad/resources/crawler-settings/datamodel.yaml
@@ -16,6 +16,7 @@ Dataset:
     voltage:
       datatype: DOUBLE
       unit: V
+    QualityCheck:
 
 
 MetaData:
@@ -35,10 +36,22 @@ Repository:
   obligatory_properties:
     url:
 
-# ELNFile:
-#   recommended_properties:
-#     QualityReportFile:
-#
-# QualityReportFile:
-#   recommended_properties:
-#     ELNFile:
+ELNFile:
+
+QualityReportFile:
+
+QualityCheck:
+  recommended_properties:
+    QualityReportFile:
+    numTotalChecks:
+      datatype: INTEGER
+    numPassingChecks:
+      datatype: INTEGER
+    FAIRLicenseCheck:
+      datatype: BOOLEAN
+    FAIRMetadataCheck:
+      datatype: BOOLEAN
+    FAIRPIDCheck:
+      datatype: BOOLEAN
+    FAIRProvenanceCheck:
+      datatype: BOOLEAN
-- 
GitLab