diff --git a/src/caoscrawler/validator.py b/src/caoscrawler/validator.py index 3cd57cd5842e3de31d59b5e4f489009a009ee1cc..8e0efd94ef348e67b991708a3d5d7917e9dd32a9 100644 --- a/src/caoscrawler/validator.py +++ b/src/caoscrawler/validator.py @@ -123,12 +123,14 @@ def convert_record(record: db.Record): return _apply_schema_patches(pobj) -def validate(records: list[db.Record], schemas: dict[str, dict]) -> list[tuple[bool, list]]: +def validate(records: list[db.Record], schemas: dict[str, dict]) -> list[tuple]: """ - Validate a list of records against a list of possible JSON schemas. - - It is tried to validate each schema from the list of schemas. If none of them validates - without error, it is assumed that it does not match at all. + Validate a list of records against a dictionary of schemas. + The keys of the dictionary are record types and the corresponding values are json schemata + associated with that record type. The current implementation assumes that each record that is + checked has exactly one parent and raises an error if that is not the case. + The schema belonging to a record is identified using the name of the first (and only) parent + of the record. Arguments: ---------- @@ -136,15 +138,16 @@ def validate(records: list[db.Record], schemas: dict[str, dict]) -> list[tuple[b records: list[db.Record] List of records that will be validated. - schemas: list[dict] - A list of JSON schemas generated using `load_json_schema_from_datamodel_yaml`. + schemas: dict[str, dict] + A dictionary of JSON schemas generated using `load_json_schema_from_datamodel_yaml`. Returns: -------- A list of tuples, one element for each record: - - Index 0: A boolean that determines whether at least one schema matched for this record. - - Index 1: A list of schemas matching the record at this position of the list `records`. + - Index 0: A boolean that determines whether the schema belonging to the record type of the + record matched. + - Index 1: A validation error if the schema did not match or None otherwise. """ retval = []