From 51569689ad12bcc40261029ecae1be594ce074c0 Mon Sep 17 00:00:00 2001 From: Alexander Schlemmer <a.schlemmer@indiscale.com> Date: Wed, 4 Dec 2024 13:47:17 +0100 Subject: [PATCH] ENH: new data model for quality check --- linkahead-setup/datamodel.yaml | 45 +------------------ .../resources/crawler-settings/cfood.yaml | 16 ++++++- .../resources/crawler-settings/datamodel.yaml | 27 ++++++++--- 3 files changed, 36 insertions(+), 52 deletions(-) mode change 100644 => 120000 linkahead-setup/datamodel.yaml diff --git a/linkahead-setup/datamodel.yaml b/linkahead-setup/datamodel.yaml deleted file mode 100644 index f63e1dc..0000000 --- a/linkahead-setup/datamodel.yaml +++ /dev/null @@ -1,44 +0,0 @@ -Dataset: - obligatory_properties: - Author: - Repository: - dateModified: - datatype: DATETIME - dateCreated: - datatype: DATETIME - recommended_properties: - MetaData: - datatype: LIST<MetaData> - notes: - datatype: TEXT - rating: - datatype: INTEGER - voltage: - datatype: DOUBLE - unit: V - - -MetaData: - obligatory_properties: - v: - datatype: TEXT - -Author: - obligatory_properties: - url: - datatype: TEXT - recommended_properties: - nr: - datatype: INTEGER - -Repository: - obligatory_properties: - url: - -ELNFile: - recommended_properties: - QualityReportFile: - -QualityReportFile: - recommended_properties: - ELNFile: diff --git a/linkahead-setup/datamodel.yaml b/linkahead-setup/datamodel.yaml new file mode 120000 index 0000000..72b936d --- /dev/null +++ b/linkahead-setup/datamodel.yaml @@ -0,0 +1 @@ +../src/ruqad/resources/crawler-settings/datamodel.yaml \ No newline at end of file diff --git a/src/ruqad/resources/crawler-settings/cfood.yaml b/src/ruqad/resources/crawler-settings/cfood.yaml index cf3bc7d..4e2ac18 100644 --- a/src/ruqad/resources/crawler-settings/cfood.yaml +++ b/src/ruqad/resources/crawler-settings/cfood.yaml @@ -30,7 +30,7 @@ DataDir: match: ^[0-9]+$ subtree: QualityReportFile: - type: SimpleFile + type: ZipFileConverter match: ^report\.zip$ transform: elnfilename: @@ -40,6 +40,20 @@ DataDir: - replace: remove: report.zip insert: export.eln + subtree: + SummaryFile: + type: JSONFileConverter + match: ^qc_summary.json$ + subtree: + CheckCounts: + type: Dict + match_properties: + num_total_checks: ^(?P<num_total>[0-9]+)$ + num_passing_checks: ^(?P<num_passing>[0-9]+)$ + records: + QualityReportFileElement: + numTotalChecks: $num_total + numPassingChecks: $num_passing records: ELNFileElement: parents: diff --git a/src/ruqad/resources/crawler-settings/datamodel.yaml b/src/ruqad/resources/crawler-settings/datamodel.yaml index f101a76..72b7b03 100644 --- a/src/ruqad/resources/crawler-settings/datamodel.yaml +++ b/src/ruqad/resources/crawler-settings/datamodel.yaml @@ -16,6 +16,7 @@ Dataset: voltage: datatype: DOUBLE unit: V + QualityCheck: MetaData: @@ -35,10 +36,22 @@ Repository: obligatory_properties: url: -# ELNFile: -# recommended_properties: -# QualityReportFile: -# -# QualityReportFile: -# recommended_properties: -# ELNFile: +ELNFile: + +QualityReportFile: + +QualityCheck: + recommended_properties: + QualityReportFile: + numTotalChecks: + datatype: INTEGER + numPassingChecks: + datatype: INTEGER + FAIRLicenseCheck: + datatype: BOOLEAN + FAIRMetadataCheck: + datatype: BOOLEAN + FAIRPIDCheck: + datatype: BOOLEAN + FAIRProvenanceCheck: + datatype: BOOLEAN -- GitLab