diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 698a154f49d84e15dbe41f91da67469184151316..e2b516acbd46a6b667c3130f242fc66f1790b36c 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -3,10 +3,11 @@ # # This file is a part of the CaosDB Project. # -# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> -# 2021-2023 Research Group Biomedical Physics, -# Max-Planck-Institute for Dynamics and Self-Organization Göttingen -# Alexander Schlemmer <alexander.schlemmer@ds.mpg.de> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# Copyright (C) 2021-2023 Research Group Biomedical Physics, MPI-DS Göttingen +# Copyright (C) 2021-2023 Alexander Schlemmer <alexander.schlemmer@ds.mpg.de> +# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -29,8 +30,6 @@ Crawl a file structure using a yaml cfood definition and synchronize the acuired data with LinkAhead. """ -from __future__ import annotations - import argparse import logging import os @@ -42,7 +41,7 @@ from argparse import RawTextHelpFormatter from copy import deepcopy from datetime import datetime from enum import Enum -from typing import Any, Optional, Union +from typing import Any, List, Optional, Union import linkahead as db import yaml @@ -646,7 +645,7 @@ class Crawler(object): return False refs = self.identifiableAdapter.get_identifying_referencing_entities(referencing_entities, registered_identifiable) - if any([el is None for el in refs]): + if any(el is None for el in refs): return True refs = self.identifiableAdapter.get_identifying_referenced_entities( @@ -808,11 +807,14 @@ class Crawler(object): circle = self.detect_circular_dependency(flat) if circle is None: logger.error("Failed, but found NO circular dependency. The data is as follows:" - + str(self.compact_entity_list_representation(flat))) + + str(self.compact_entity_list_representation(flat, + referencing_entities))) else: logger.error("Found circular dependency (Note that this might include references " "that are not identifying properties): " - + self.compact_entity_list_representation(circle)) + + self.compact_entity_list_representation(circle, + referencing_entities)) + raise RuntimeError( f"Could not finish split_into_inserts_and_updates. Circular dependency: " f"{circle is not None}") @@ -840,18 +842,34 @@ class Crawler(object): el.value[index] = val.id @ staticmethod - def compact_entity_list_representation(circle): + def compact_entity_list_representation(entities, referencing_entities: List) -> str: """ a more readable representation than the standard xml representation TODO this can be removed once the yaml format representation is in pylib """ text = "\n--------\n" - for el in circle: - if el.name is not None: - text += f"{el.name}\n" - text += f"{[el.name for el in el.parents]}\n" - props = {p.name: p.value for p in el.properties} - text += f"{props}\n" + + grouped = {"": []} + for ent in entities: + if not ent.parents: + grouped[""].append(ent) + for parent in ent.parents: + if parent.name not in grouped: + grouped[parent.name] = [] + grouped[parent.name].append(ent) + if not grouped[""]: + del grouped[""] + for parent, group in grouped.items(): + text += f"\n> Parent: {parent}\n" + for ent in group: + if ent.name is not None: + text += f"\n>> Name: {ent.name}\n" + else: + text += "\n>> name: # No name" + text += f"{[ent.name for ent in ent.parents]}\n" + props = {p.name: p.value for p in ent.properties} + text += f"{props}\n" + text += f"is_referenced_by:\n{referencing_entities[id(ent)]}\n" return text + "--------\n"