diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 131747e340731153e432b61650994c6ad1115515..6322310b62798c3ca92ae406cb83d627a4bee101 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -72,7 +72,6 @@ from .scanner import (create_converter_registry, initialize_converters, from .semantic_target import SemanticTarget from .stores import GeneralStore from .structure_elements import StructureElement -from .treated_record_lookup import TreatedRecordLookUp logger = logging.getLogger(__name__) @@ -174,53 +173,6 @@ def _resolve_datatype(prop: db.Property, remote_entity: db.Entity): return prop -def _treat_merge_error_of(newrecord, record): - """ - The parameters are two entities that cannot be merged with the merge_entities function. - - # This function checks for two obvious cases where no merge will ever be possible: - # 1. Two Entities with differing IDs - # 2. Two non-Entity values which differ - - It creates a more informative logger message and raises an Exception in those cases. - """ - for this_p in newrecord.properties: - that_p = record.get_property(this_p.name) - - if that_p is None: - logger.debug(f"Property {this_p.name} does not exist in the second entity. Note that " - "this should not be the reason for the merge conflict.") - continue - - if (isinstance(this_p.value, db.Entity) - and isinstance(that_p.value, db.Entity)): - if this_p.value.id is not None and that_p.value.id is not None: - if this_p.value.id != that_p.value.id: - logger.error("The Crawler is trying to merge two entities " - "because they should be the same object (same" - " identifiables), but they reference " - "different Entities with the same Property." - f"Problematic Property: {this_p.name}\n" - f"Referenced Entities: {this_p.value.id} and " - f"{that_p.value.id}\n" - f"{record}\n{newrecord}") - raise RuntimeError("Cannot merge Entities") - elif (not isinstance(this_p.value, db.Entity) - and not isinstance(that_p.value, db.Entity)): - if ((this_p.value != that_p.value) - # TODO can we also compare lists? - and not isinstance(this_p.value, list) - and not isinstance(that_p.value, list)): - logger.error( - "The Crawler is trying to merge two entities because they should be the same " - "object (same identifiables), but they have different values for the same " - "Property.\n" - f"Problematic Property: {this_p.name}\n" - f"Values: {this_p.value} and {that_p.value}\n" - f"{record}\n{newrecord}") - raise RuntimeError("Cannot merge Entities") - - class SecurityMode(Enum): RETRIEVE = 0 INSERT = 1 @@ -263,7 +215,6 @@ class Crawler(object): # The following caches store records, where we checked whether they exist on the remote # server. Since, it is important to know whether they exist or not, we store them into two # different caches. - self.treated_records_lookup = TreatedRecordLookUp() # TODO does it make sense to have this as member variable? self.securityMode = securityMode @@ -347,67 +298,6 @@ class Crawler(object): self.crawled_data = data return data - def _has_reference_value_without_id(self, ident: Identifiable) -> bool: - """ - Returns True if there is at least one value in the properties and backrefs attributes of - ``ident`` which: - - a) is a reference property AND - b) where the value is set to a - :external+caosdb-pylib:py:class:`db.Entity <caosdb.common.models.Entity>` - (instead of an ID) AND - c) where the ID of the value (the - :external+caosdb-pylib:py:class:`db.Entity <caosdb.common.models.Entity>` object in b)) - is not set (to an integer) - - Returns - ------- - bool - True if there is a value without id (see above) - - Raises - ------ - ValueError - If no Identifiable is given. - """ - if ident is None: - raise ValueError("Identifiable has to be given as argument") - for pvalue in list(ident.properties.values()) + ident.backrefs: - if isinstance(pvalue, list): - for el in pvalue: - if isinstance(el, db.Entity) and el.id is None: - return True - elif isinstance(pvalue, db.Entity) and pvalue.id is None: - return True - return False - - def _has_missing_object_in_references(self, ident: Identifiable, referencing_entities: dict): - """ - returns False if any value in the properties attribute is a db.Entity object that - is contained in the `remote_missing_cache`. If ident has such an object in - properties, it means that it references another Entity, where we checked - whether it exists remotely and it was not found. - """ - if ident is None: - raise ValueError("Identifiable has to be given as argument") - for pvalue in list(ident.properties.values()) + ident.backrefs: - # Entity instead of ID and not cached locally - if (isinstance(pvalue, list)): - for el in pvalue: - elident = self.identifiableAdapter.get_identifiable( - el, referencing_entities[id(el)]) - if (isinstance(el, db.Entity) - and self.treated_records_lookup.get_missing(el, elident) is not None): - return True - if (isinstance(pvalue, db.Entity) and self.treated_records_lookup.get_missing( - pvalue, - self.identifiableAdapter.get_identifiable(pvalue, - referencing_entities[id(pvalue)]) - ) is not None): - # might be checked when reference is resolved - return True - return False - def replace_references_with_cached(self, record: db.Record, referencing_entities: dict): """ Replace all references with the versions stored in the cache. @@ -462,238 +352,79 @@ class Crawler(object): ) p.value = cached - @staticmethod - def bend_references_to_new_object(old, new, entities): - """ Bend references to the other object - Iterate over all entities in `entities` and check the values of all properties of - occurances of old Entity and replace them with new Entity - """ - for el in entities: - for p in el.properties: - if isinstance(p.value, list): - for index, val in enumerate(p.value): - if val is old: - p.value[index] = new - else: - if p.value is old: - p.value = new - - def _merge_identified(self, newrecord, record, try_to_merge_later, all_records): - """ tries to merge record into newrecord - - If it fails, record is added to the try_to_merge_later list. - In any case, references are bent to the newrecord object. - - """ - try: - merge_entities( - newrecord, record, merge_references_with_empty_diffs=False, - merge_id_with_resolved_entity=True) - except EntityMergeConflictError: - _treat_merge_error_of(newrecord, record) - # We cannot merge but it is none of the clear case where merge is - # impossible. Thus we try later - try_to_merge_later.append(record) - if newrecord.id is not None: - record.id = newrecord.id - except NotImplementedError: - print(newrecord) - print(record) - raise - Crawler.bend_references_to_new_object( - old=record, new=newrecord, - entities=all_records - ) - - def _identity_relies_on_unchecked_entities(self, record: db.Record, referencing_entities): - """ - If a record for which it could not yet be verified whether it exists in LA or not is part - of the identifying properties, this returns True, otherwise False - """ - - registered_identifiable = self.identifiableAdapter.get_registered_identifiable(record) - if registered_identifiable is None: - return False - refs = self.identifiableAdapter.get_identifying_referencing_entities(referencing_entities, - registered_identifiable) - if any(el is None for el in refs): - return True - - refs = self.identifiableAdapter.get_identifying_referenced_entities( - record, registered_identifiable) - if any([self.treated_records_lookup.get_any(el) is None for el in refs]): - return True - - return False - - @staticmethod - def create_reference_mapping(flat: list[db.Entity]): - """ - Create a dictionary of dictionaries of the form: - dict[int, dict[str, list[Union[int,None]]]] - - - The integer index is the Python id of the value object. - - The string is the name of the first parent of the referencing object. - - Each value objects is taken from the values of all properties from the list flat. - - So the returned mapping maps ids of entities to the ids of objects which are referring - to them. - """ - # TODO we need to treat children of RecordTypes somehow. - references: dict[int, dict[str, list[Union[int, None]]]] = {} - for ent in flat: - if id(ent) not in references: - references[id(ent)] = {} - for p in ent.properties: - val = p.value - if not isinstance(val, list): - val = [val] - for v in val: - if isinstance(v, db.Entity): - if id(v) not in references: - references[id(v)] = {} - if ent.parents[0].name not in references[id(v)]: - references[id(v)][ent.parents[0].name] = [] - references[id(v)][ent.parents[0].name].append(ent.id) - - return references - - def split_into_inserts_and_updates(self, ent_list: list[db.Entity]): - st = SemanticTarget(ent_list) - all_records = list(st.entities) - - try_to_merge_later = [] - - # Check whether Records can be identified without identifiable - for i in reversed(range(len(st.entities))): - record = st.entities[i] - # 1. Can it be identified via an ID? - if record.id is not None: - treated_record = self.treated_records_lookup.get_existing(record) - if treated_record is not None: - self._merge_identified(treated_record, record, try_to_merge_later, all_records) - all_records.remove(record) - referencing_entities = self.create_reference_mapping(all_records) - else: - self.treated_records_lookup.add(record, None) - assert record.id - del st.entities[i] - # 2. Can it be identified via a path? - elif record.path is not None: - try: - existing = cached_get_entity_by(path=record.path) - except EmptyUniqueQueryError: - existing = None - if existing is not None: - record.id = existing.id - # TODO check the following copying of _size and _checksum - # Copy over checksum and size too if it is a file - record._size = existing._size - record._checksum = existing._checksum - treated_record = self.treated_records_lookup.get_any(record) - if treated_record is not None: - self._merge_identified(treated_record, record, try_to_merge_later, all_records) - all_records.remove(record) - referencing_entities = self.create_reference_mapping(all_records) - else: - # TODO add identifiable if possible - self.treated_records_lookup.add(record, None) - assert record.id - del st.entities[i] + def split_into_inserts_and_updates(self, st: SemanticTarget): entity_was_treated = True # st.entities contains Entities which could not yet be checked against the remote server - while entity_was_treated and len(st.entities) > 0: + while entity_was_treated and len(st.unchecked) > 0: entity_was_treated = False - referencing_entities = self.create_reference_mapping(all_records) # For each element we try to find out whether we can find it in the server or whether # it does not yet exist. Since a Record may reference other unkown Records it might not # be possible to answer this right away. # The following checks are done on each Record: # 1. Is it in the cache of already checked Records? - # 2. Can it be checked on the remote server? - # 3. Does it have to be new since a needed reference is missing? - for i in reversed(range(len(st.entities))): - record = st.entities[i] - - if self._identity_relies_on_unchecked_entities(record, - referencing_entities[id(record)]): + # 2. Does it have to be new since a needed reference is missing? + # 3. Can it be checked on the remote server? + for se in st.unchecked: + if st.identity_relies_on_unchecked_entity(se): continue - identifiable = self.identifiableAdapter.get_identifiable( - record, - referencing_entities=referencing_entities[id(record)]) + if se.identifiable is None: + se.identifiable = self.identifiableAdapter.new_get_identifiable( + se, st.backward_id_referenced_by[se.uuid]) + + equivalent_se = st.get_equivalent_se(se) # 1. Is it in the cache of already checked Records? - if self.treated_records_lookup.get_any(record, identifiable) is not None: - treated_record = self.treated_records_lookup.get_any(record, identifiable) - # Since the identifiables are the same, treated_record and record actually - # describe the same object. + if equivalent_se is not None: # We merge record into treated_record in order to prevent loss of information - self._merge_identified(treated_record, record, try_to_merge_later, all_records) - all_records.remove(record) - referencing_entities = self.create_reference_mapping(all_records) + st.merge_into(se, equivalent_se) - del st.entities[i] - entity_was_treated = True + # 2. Does it have to be new since a needed reference is missing? + # (Is it impossible to check this record because an identifiable references a + # missing record?) + elif st.identity_relies_on_missing_entity(se): + st.set_new(se) - # 2. Can it be checked on the remote server? - elif not self._has_reference_value_without_id(identifiable): + # 3. check on the remote server + else: identified_record = ( self.identifiableAdapter.retrieve_identified_record_for_identifiable( - identifiable)) + se.identifiable)) if identified_record is None: - # identifiable does not exist remotely -> record needs to be inserted - self.treated_records_lookup.add(record, identifiable) + st.set_new(se) else: - # side effect - record.id = identified_record.id - record.path = identified_record.path - self.treated_records_lookup.add(record, identifiable) - assert record.id - del st.entities[i] - entity_was_treated = True - - # 3. Does it have to be new since a needed reference is missing? - # (Is it impossible to check this record because an identifiable references a - # missing record?) - elif self._has_missing_object_in_references(identifiable, referencing_entities): - self.treated_records_lookup.add(record, identifiable) - assert record.id - del st.entities[i] - entity_was_treated = True + st.set_existing(se, identified_record) + entity_was_treated = True - for record in st.entities: - self.replace_references_with_cached(record, referencing_entities) + # TODO + # for record in st.entities: + # self.replace_references_with_cached(record, referencing_entities) # We postponed the merge for records where it failed previously and try it again now. # This only might add properties of the postponed records to the already used ones. - for record in try_to_merge_later: - identifiable = self.identifiableAdapter.get_identifiable( - record, - referencing_entities=referencing_entities[id(record)]) - newrecord = self.treated_records_lookup.get_any(record, identifiable) - merge_entities(newrecord, record, merge_id_with_resolved_entity=True) + st.combine_fragments() if len(st.entities) > 0: circle = self.detect_circular_dependency(st.entities) if circle is None: logger.error("Failed, but found NO circular dependency. The data is as follows:" - + str(self.compact_entity_list_representation(st.entities, - referencing_entities))) + # + str(self.compact_entity_list_representation(st.entities, + # referencing_entities)) + ) else: logger.error("Found circular dependency (Note that this might include references " "that are not identifying properties): " - + self.compact_entity_list_representation(circle, - referencing_entities)) + # + self.compact_entity_list_representation(circle, + # referencing_entities) + ) raise RuntimeError( f"Could not finish split_into_inserts_and_updates. Circular dependency: " f"{circle is not None}") # remove negative IDs - missing = self.treated_records_lookup.get_missing_list() + missing = st.treated_records_lookup.get_missing_list() for el in missing: if el.id is None: raise RuntimeError("This should not happen") # TODO remove @@ -701,7 +432,7 @@ class Crawler(object): raise RuntimeError("This should not happen") # TODO remove el.id = None - return (missing, self.treated_records_lookup.get_existing_list()) + return (missing, st.treated_records_lookup.get_existing_list()) def replace_entities_with_ids(self, rec: db.Record): for el in rec.properties: @@ -1043,7 +774,8 @@ class Crawler(object): "use for example the Scanner to create this data.")) crawled_data = self.crawled_data - to_be_inserted, to_be_updated = self.split_into_inserts_and_updates(crawled_data) + to_be_inserted, to_be_updated = self.split_into_inserts_and_updates( + SemanticTarget(crawled_data, self.identifiableAdapter)) for el in to_be_updated: # all entity objects are replaced by their IDs except for the not yet inserted ones diff --git a/src/caoscrawler/identifiable_adapters.py b/src/caoscrawler/identifiable_adapters.py index d95112ee1aec6ca1526c96421a8052282b6ef9a7..473597bfaa566cf211818d9061a4495224e33dfd 100644 --- a/src/caoscrawler/identifiable_adapters.py +++ b/src/caoscrawler/identifiable_adapters.py @@ -216,34 +216,6 @@ startswith: bool, optional """ pass - @staticmethod - def get_identifying_referencing_entities(referencing_entities, registered_identifiable): - refs = [] - for prop in registered_identifiable.properties: - if prop.name.lower() != "is_referenced_by": - continue - for looking_for_rt in prop.value: - found = False - if looking_for_rt == "*": - for val in referencing_entities.values(): - if len(val) > 0: - found = True - refs.extend(val) - else: - rt_and_children = get_children_of_rt(looking_for_rt) - for rtname in rt_and_children: - if (rtname in referencing_entities): - refs.extend(referencing_entities[rtname]) - found = True - if not found: - raise RuntimeError( - f"Could not find referencing entities of type(s): {prop.value}\n" - f"for registered identifiable:\n{registered_identifiable}\n" - f"There were {len(referencing_entities)} referencing entities to choose from.\n" - f"This error can also occur in case of merge conflicts in the referencing entities." - ) - return refs - @staticmethod def get_identifying_referenced_entities(record, registered_identifiable): refs = [] @@ -261,7 +233,7 @@ startswith: bool, optional refs.append(val) return refs - def get_identifiable(self, record: db.Record, referencing_entities=None): + def new_get_identifiable(self, se: SemanticEntity, identifiable_backrefs): """ Retrieve the registered identifiable and fill the property values to create an identifiable. @@ -275,10 +247,89 @@ startswith: bool, optional Identifiable, the identifiable for record. """ - registered_identifiable = self.get_registered_identifiable(record) + registered_identifiable = se.registered_identifiable - if referencing_entities is None: - referencing_entities = {} + property_name_list_A = [] + property_name_list_B = [] + identifiable_props = {} + identifiable_backrefs = [] + name = None + + # TODO + # if registered_identifiable is None: + # raise ValueError("no register_identifiable") + + if registered_identifiable is not None: + # fill the values: + for prop in registered_identifiable.properties: + if prop.name == "name": + name_options = [f.name for f in se.fragments if f.name is not None] + if len(name_options) == 0: + raise RuntimeError("name") + assert all([f == name_options[0] for f in name_options]) + name = name_options[0] + continue + # problem: what happens with multi properties? + # case A: in the registered identifiable + # case B: in the identifiable + + # treated elsewhere + if prop.name.lower() == "is_referenced_by": + continue + + options = [f.get_property(prop.name) for f in se.fragments + if f.get_property(prop.name) is None] + if len(options) == 0: + raise NotImplementedError( + f"The following record is missing an identifying property:\n" + f"RECORD\n{record}\nIdentifying PROPERTY\n{prop.name}" + ) + assert all([f.value == options[0].value for f in options]) + record_prop = options[0] + + identifiable_props[record_prop.name] = record_prop.value + property_name_list_A.append(prop.name) + + # check for multi properties in the record: + for prop in property_name_list_A: + property_name_list_B.append(prop) + if (len(set(property_name_list_B)) != len(property_name_list_B) or len( + set(property_name_list_A)) != len(property_name_list_A)): + raise RuntimeError( + "Multi properties used in identifiables could cause unpredictable results and " + "are not allowed. You might want to consider a Property with a list as value.") + + # use the RecordType of the registered Identifiable if it exists + # We do not use parents of Record because it might have multiple + try: + return Identifiable( + record_id=se.remote_id, + path=se.remote_path, + record_type=(registered_identifiable.parents[0].name + if registered_identifiable else None), + name=name, + properties=identifiable_props, + backrefs=identifiable_backrefs + ) + except Exception: + logger.error(f"Error while creating identifiable for this record:\n{se}") + raise + + def get_identifiable(self, record: db.Record, identifiable_backrefs): + """ + Retrieve the registered identifiable and fill the property values to create an + identifiable. + + Args: + record: the record for which the Identifiable shall be created. + referencing_entities: a dictionary (Type: dict[str, list[db.Entity]]), that + allows to look up entities with a certain RecordType, that reference ``record`` + + Returns: + Identifiable, the identifiable for record. + """ + + registered_identifiable = self.get_registered_identifiable(record) property_name_list_A = [] property_name_list_B = [] @@ -287,8 +338,6 @@ startswith: bool, optional name_is_identifying_property = False if registered_identifiable is not None: - identifiable_backrefs = self.get_identifying_referencing_entities( - referencing_entities, registered_identifiable) # fill the values: for prop in registered_identifiable.properties: if prop.name == "name": @@ -340,7 +389,7 @@ startswith: bool, optional logger.error(f"Error while creating identifiable for this record:\n{record}") raise - @abstractmethod + @ abstractmethod def retrieve_identified_record_for_identifiable(self, identifiable: Identifiable): """ Retrieve identifiable record for a given identifiable. @@ -370,6 +419,19 @@ startswith: bool, optional return self.retrieve_identified_record_for_identifiable(identifiable) + @ staticmethod + def referencing_entity_has_appropriate_type(parents, register_identifiable): + if register_identifiable.get_property("is_referenced_by") is None: + return False + appropriate_types = [el.lower() for el in + register_identifiable.get_property("is_referenced_by").value] + if "*" in appropriate_types: + return True + for parent in parents: + if parent.name.lower() in appropriate_types: + return True + return False + class LocalStorageIdentifiableAdapter(IdentifiableAdapter): """ diff --git a/src/caoscrawler/semantic_target.py b/src/caoscrawler/semantic_target.py index 7ce4a4d423ad5374ce4972a24bf776a60401a505..443024eee4ff855e1c8d3d2160f6a9617f72e638 100644 --- a/src/caoscrawler/semantic_target.py +++ b/src/caoscrawler/semantic_target.py @@ -24,22 +24,330 @@ A data model class for the semantic data that shall be created by synchronizatio """ from typing import Any, List, Optional, Union +from uuid import uuid4 as uuid import linkahead as db +from linkahead.apiutils import (EntityMergeConflictError, compare_entities, + merge_entities) + +from .identifiable_adapters import IdentifiableAdapter +from .treated_record_lookup import TreatedRecordLookUp + + +class SemanticEntity(): + def __init__(self, entity: db.Entity, registered_identifiable): + self.fragments = [entity] + self.remote_id = entity.id + self.remote_path = entity.path + self.identifiable = None + self.registered_identifiable = registered_identifiable + self.uuid = uuid() + + def is_unchecked(): + return len([el.id for el in self.fragments if el.id is None]) > 0 + + def is_missing(): + return len([el.id for el in self.fragments if el.id < 0]) > 0 class SemanticTarget(): - def __init__(self, entities: list[db.Entity]): + def __init__(self, entities: list[db.Entity], identifiableAdapter): + self.identifiableAdapter = identifiableAdapter self.entities = self.create_flat_list(entities) + self.treated_records_lookup = TreatedRecordLookUp() + self.se = [] + self.se_lookup = {} + for el in self.entities: + self.se.append(SemanticEntity( + el, + self.identifiableAdapter.get_registered_identifiable(el))) + self.se_lookup[id(el)] = self.se[-1] + self.unchecked = list(self.se) + self.sanity_check(self.entities) + ( + self.forward_references, + self.backward_references, + self.forward_id_references, + self.backward_id_references, + self.forward_id_referenced_by, + self.backward_id_referenced_by, + ) = self.new_create_reference_mapping(self.se, self.se_lookup) + self.treat_trivial_id() + + def treat_trivial_id(self): + for semantic_entity in self.se: + assert len(semantic_entity.fragments) == 1 + entity = semantic_entity.fragments[0] + # 1. can it be checked via ID + if entity.id is not None: + treated_record = self.treated_records_lookup.get_existing(entity) + if treated_record is None: + self.treated_records_lookup.add(entity, None) + if semantic_entity.remote_id is None: + semantic_entity.remote_id = entity.id + else: + assert semantic_entity.remote_id == entity.id + else: + self.merge_into(semantic_entity, self.se_lookup[id(treated_record)]) + self.unchecked.remove(semantic_entity) + # 2. Can it be identified via a path? + elif entity.path is not None: + try: + existing = cached_get_entity_by(path=entity.path) + except EmptyUniqueQueryError: + existing = None + if existing is not None: + entity.id = existing.id + # TODO check the following copying of _size and _checksum + # Copy over checksum and size too if it is a file + entity._size = existing._size + entity._checksum = existing._checksum + treated_record = self.treated_records_lookup.get_any(entity) + if treated_record is None: + self.treated_records_lookup.add(entity, None) + if semantic_entity.remote_path is None: + semantic_entity.remote_path = entity.path + else: + assert semantic_entity.remote_path == entity.path + else: + self.merge_into(semantic_entity, self.se_lookup[id(treated_record)]) + self.unchecked.remove(semantic_entity) + + def get_equivalent_se(self, se: SemanticEntity) -> Optional[SemanticEntity]: + return st.se_lookup[id(self.treated_records_lookup.get_any(None, se.identifiable))] + + def set_new(self, se: SemanticEntity) -> Optional[SemanticEntity]: + self.treated_records_lookup.add(se.fragments[0], se.identifiable) + self.unchecked.remove(se) + + def set_existing(self, se, identified_record): + for f in se.fragments: + # side effect + f.id = identified_record.id + f.path = identified_record.path + st.treated_records_lookup.add(se.fragments[0], se.identifiable) + + def identity_relies_on_unchecked_entity(self, se: SemanticEntity): + """ + If a record for which it could not yet be verified whether it exists in LA or not is part + of the identifying properties, this returns True, otherwise False + """ + + return any([ent.is_unchecked() for ent in self.forward_id_references[se.uuid]] + + [ent.is_unchecked() for ent in self.backward_id_referenced_by[se.uuid]]) + + def identity_relies_on_missing_entity(self, se: SemanticEntity): + """ + returns False if any value in the properties attribute is a db.Entity object that + is contained in the `remote_missing_cache`. If ident has such an object in + properties, it means that it references another Entity, where we checked + whether it exists remotely and it was not found. + """ + return any([ent.is_missing() for ent in self.forward_id_references[se.uuid]] + + [ent.is_missing() for ent in self.backward_id_referenced_by[se.uuid]]) + + def merge_into(self, source: SemanticEntity, target: SemanticEntity): + """ tries to merge record into newrecord + + If it fails, record is added to the try_to_merge_later list. + In any case, references are bent to the newrecord object. + + """ + for frag in source.fragments: + try: + merge_entities( + target.fragments[0], frag, merge_references_with_empty_diffs=False, + merge_id_with_resolved_entity=True) + except EntityMergeConflictError: + self._treat_merge_error_of(target.fragments[0], frag) + # We cannot merge but it is none of the clear case where merge is + # impossible. Thus we try later + target.fragments.append(frag) + if target.fragments[0].id is not None: + frag.id = target.fragments[0].id + except NotImplementedError: + print(target) + print(source) + raise + if source.remote_id is not None: + if target.remote_id is None: + target.remote_id = source.remote_id + else: + assert target.remote_id == source.remote_id + + if source.remote_path is not None: + if target.remote_path is None: + target.remote_path = source.remote_path + else: + assert target.remote_path == source.remote_path + + # update reference mappings + for se in self.forward_references.pop(source.uuid): + self.forward_references[target.uuid].add(se) + self.backward_references[se.uuid].remove(source) + self.backward_references[se.uuid].add(target) + for se in self.backward_references.pop(source.uuid): + self.backward_references[target.uuid].add(se) + self.forward_references[se.uuid].remove(source) + self.forward_references[se.uuid].add(target) + + for se in self.forward_id_references.pop(source.uuid): + self.forward_id_references[target.uuid].add(se) + self.backward_id_references[se.uuid].remove(source) + self.backward_id_references[se.uuid].add(target) + for se in self.backward_id_references.pop(source.uuid): + self.backward_id_references[target.uuid].add(se) + self.forward_id_references[se.uuid].remove(source) + self.forward_id_references[se.uuid].add(target) + + for se in self.forward_id_referenced_by.pop(source.uuid): + self.forward_id_referenced_by[target.uuid].add(se) + self.backward_id_referenced_by[se.uuid].remove(source) + self.backward_id_referenced_by[se.uuid].add(target) + for se in self.backward_id_referenced_by.pop(source.uuid): + self.backward_id_referenced_by[target.uuid].add(se) + self.forward_id_referenced_by[se.uuid].remove(source) + self.forward_id_referenced_by[se.uuid].add(target) + + # remove empyt SemanticEntity + self.se.remove(source) + if source in self.unchecked: + self.unchecked.remove(source) - @staticmethod + @ staticmethod + def new_create_reference_mapping(flat: list[SemanticEntity], se_lookup): + """ + TODO update docstring + Create a dictionary of dictionaries of the form: + dict[int, dict[str, list[Union[int,None]]]] + + - The integer index is the Python id of the value object. + - The string is the name of the first parent of the referencing object. + + Each value objects is taken from the values of all properties from the list flat. + + So the returned mapping maps ids of entities to the ids of objects which are referring + to them. + """ + # TODO we need to treat children of RecordTypes somehow. + forward_references: dict[str, set[SemanticEntity]] = {} + backward_references: dict[str, set[SemanticEntity]] = {} + forward_id_references: dict[str, set[SemanticEntity]] = {} + backward_id_references: dict[str, set[SemanticEntity]] = {} + forward_id_referenced_by: dict[str, set[SemanticEntity]] = {} + backward_id_referenced_by: dict[str, set[SemanticEntity]] = {} + + # initialize with empty lists/dict + for se in flat: + for ent in se.fragments: + forward_references[se.uuid] = set() + backward_references[se.uuid] = set() + forward_id_references[se.uuid] = set() + backward_id_references[se.uuid] = set() + forward_id_referenced_by[se.uuid] = set() + backward_id_referenced_by[se.uuid] = set() + for se in flat: + for ent in se.fragments: + for p in ent.properties: + val = p.value + if not isinstance(val, list): + val = [val] + for v in val: + vse = se_lookup[id(v)] + if isinstance(v, db.Entity): + forward_references[se.uuid].add(vse) + backward_references[vse.uuid].add(se) + if len([el.name for el in se.registered_identifiable.properties if + el.name == p.name]) > 0: + forward_id_references[se.uuid].add(vse) + backward_id_references[vse.uuid].add(se) + if IdentifiableAdapter.referencing_entity_has_appropriate_type( + ent.parents, vse.registered_identifiable): + forward_id_referenced_by[se.uuid].add(vse) + backward_id_referenced_by[vse.uuid].add(se) + + return (forward_references, backward_references, forward_id_references, + backward_id_references, forward_id_referenced_by, backward_id_referenced_by, + ) + + @ staticmethod + def create_reference_mapping(flat: list[SemanticEntity]): + """ + TODO update docstring + Create a dictionary of dictionaries of the form: + dict[int, dict[str, list[Union[int,None]]]] + + - The integer index is the Python id of the value object. + - The string is the name of the first parent of the referencing object. + + Each value objects is taken from the values of all properties from the list flat. + + So the returned mapping maps ids of entities to the ids of objects which are referring + to them. + """ + # TODO we need to treat children of RecordTypes somehow. + forward_references: dict[int, list[SemanticEntity]] = {} + backward_references: dict[int, list[SemanticEntity]] = {} + forward_id_references: dict[int, list[SemanticEntity]] = {} + backward_id_references: dict[int, list[SemanticEntity]] = {} + # initialize with empty lists/dict + for se in flat: + for ent in se.fragments: + forward_references[id(ent)] = [] + backward_references[id(ent)] = [] + forward_id_references[id(ent)] = [] + backward_id_references[id(ent)] = [] + forward_id_referenced_by[id(ent)] = [] + backward_id_referenced_by[id(ent)] = [] + for se in flat: + for ent in se.fragments: + for p in ent.properties: + val = p.value + if not isinstance(val, list): + val = [val] + for v in val: + if isinstance(v, db.Entity): + forward_references[id(ent)].append(v) + backward_references[id(v)].append(ent) + if (hasattr(ent, "registered") + and p.name in ent.registered.properties): + forward_id_references[id(ent)].append(v) + backward_id_references[id(v)].append(ent) + if (hasattr(v, "registered") and + v.registered.get_property("is_referenced_by") is not None and + (ent.parents[0].name in v.registered.get_property( + "is_referenced_by").value or + "*" in v.registered.get_property("is_referenced_by").value)): + forward_id_referenced_by[id(ent)].append(v) + backward_id_referenced_by[id(v)].append(ent) + + return (forward_references, backward_references, forward_id_references, + backward_id_references, forward_id_referenced_by, backward_id_referenced_by) + + @ staticmethod + def bend_references_to_new_object(old, new, entities): + """ Bend references to the other object + Iterate over all entities in `entities` and check the values of all properties of + occurances of old Entity and replace them with new Entity + """ + for el in entities: + for p in el.properties: + if isinstance(p.value, list): + for index, val in enumerate(p.value): + if val is old: + p.value[index] = new + else: + if p.value is old: + p.value = new + + @ staticmethod def sanity_check(entities: list[db.Entity]): for ent in entities: if ent.role == "Record" and len(ent.parents) == 0: raise RuntimeError(f"Records must have a parent.\n{ent}") - @staticmethod + @ staticmethod def create_flat_list(ent_list: list[db.Entity], flat: Optional[list[db.Entity]] = None): """ Recursively adds entities and all their properties contained in ent_list to @@ -68,3 +376,57 @@ class SemanticTarget(): flat.append(p.value) SemanticTarget.create_flat_list([p.value], flat) return flat + + @ staticmethod + def _treat_merge_error_of(newrecord, record): + """ + The parameters are two entities that cannot be merged with the merge_entities function. + + # This function checks for two obvious cases where no merge will ever be possible: + # 1. Two Entities with differing IDs + # 2. Two non-Entity values which differ + + It creates a more informative logger message and raises an Exception in those cases. + """ + for this_p in newrecord.properties: + that_p = record.get_property(this_p.name) + + if that_p is None: + logger.debug(f"Property {this_p.name} does not exist in the second entity. Note that " + "this should not be the reason for the merge conflict.") + continue + + if (isinstance(this_p.value, db.Entity) + and isinstance(that_p.value, db.Entity)): + if this_p.value.id is not None and that_p.value.id is not None: + if this_p.value.id != that_p.value.id: + logger.error("The Crawler is trying to merge two entities " + "because they should be the same object (same" + " identifiables), but they reference " + "different Entities with the same Property." + f"Problematic Property: {this_p.name}\n" + f"Referenced Entities: {this_p.value.id} and " + f"{that_p.value.id}\n" + f"{record}\n{newrecord}") + raise RuntimeError("Cannot merge Entities") + elif (not isinstance(this_p.value, db.Entity) + and not isinstance(that_p.value, db.Entity)): + if ((this_p.value != that_p.value) + # TODO can we also compare lists? + and not isinstance(this_p.value, list) + and not isinstance(that_p.value, list)): + logger.error( + "The Crawler is trying to merge two entities because they should be the same " + "object (same identifiables), but they have different values for the same " + "Property.\n" + f"Problematic Property: {this_p.name}\n" + f"Values: {this_p.value} and {that_p.value}\n" + f"{record}\n{newrecord}") + raise RuntimeError("Cannot merge Entities") + + def combine_fragments(self): + for se in self.se: + for ent in se.fragments: + # TODO + newrecord = self.treated_records_lookup.get_any(record, identifiable) + merge_entities(newrecord, record, merge_id_with_resolved_entity=True) diff --git a/src/caoscrawler/treated_record_lookup.py b/src/caoscrawler/treated_record_lookup.py index bf9e3456719cd1d1f44a513399ac11ad6980b141..59bbd2df97919dffd0087f9efb7ce8ac9654a3b9 100644 --- a/src/caoscrawler/treated_record_lookup.py +++ b/src/caoscrawler/treated_record_lookup.py @@ -77,14 +77,14 @@ class TreatedRecordLookUp(): else: self._add_any(record, self._existing, identifiable) - def get_any(self, record: db.Entity, identifiable: Optional[Identifiable] = None): + def get_any(self, record: db.Entity = None, identifiable: Optional[Identifiable] = None): """ Check whether this Record was already added. Identity is based on ID, path or Identifiable represenation """ - if record.id is not None and record.id in self._id_look_up: + if record is not None and record.id is not None and record.id in self._id_look_up: return self._id_look_up[record.id] - if record.path is not None and record.path in self._path_look_up: + if record is not None and record.path is not None and record.path in self._path_look_up: return self._path_look_up[record.path] if (identifiable is not None and identifiable.get_representation() in self._identifiable_look_up): diff --git a/unittests/test_crawler.py b/unittests/test_crawler.py index 7a0904e08896eb188c302ead07cbf0373369191c..caaa4cce2e5704d796166773a3e04310187c6e7b 100644 --- a/unittests/test_crawler.py +++ b/unittests/test_crawler.py @@ -39,9 +39,8 @@ import linkahead.common.models as dbmodels import pytest import yaml from caosadvancedtools.models.parser import parse_model_from_string -from caoscrawler.crawl import (Crawler, SecurityMode, TreatedRecordLookUp, - _treat_deprecated_prefix, crawler_main, - split_restricted_path) +from caoscrawler.crawl import (Crawler, SecurityMode, _treat_deprecated_prefix, + crawler_main, split_restricted_path) from caoscrawler.debug_tree import DebugTree from caoscrawler.identifiable import Identifiable from caoscrawler.identifiable_adapters import (CaosDBIdentifiableAdapter, @@ -49,9 +48,11 @@ from caoscrawler.identifiable_adapters import (CaosDBIdentifiableAdapter, LocalStorageIdentifiableAdapter) from caoscrawler.scanner import (create_converter_registry, scan_directory, scan_structure_elements) +from caoscrawler.semantic_target import SemanticTarget from caoscrawler.stores import GeneralStore, RecordStore from caoscrawler.structure_elements import (DictElement, DictListElement, DictTextElement, File) +from caoscrawler.treated_record_lookup import TreatedRecordLookUp from linkahead.apiutils import compare_entities from linkahead.cached import cache_clear from linkahead.exceptions import EmptyUniqueQueryError @@ -294,19 +295,20 @@ def crawler_mocked_identifiable_retrieve(): def test_split_into_inserts_and_updates_single(crawler_mocked_identifiable_retrieve): crawler = crawler_mocked_identifiable_retrieve identlist = [Identifiable(name="A", record_type="C"), Identifiable(name="B", record_type="C")] - entlist = [db.Record(name="A").add_parent( - "C"), db.Record(name="B").add_parent("C")] - - assert crawler.treated_records_lookup.get_any(entlist[0], identlist[0]) is None - assert crawler.treated_records_lookup.get_any(entlist[0], identlist[0]) is None - assert not crawler._has_reference_value_without_id(identlist[0]) - assert not crawler._has_reference_value_without_id(identlist[1]) + entlist = [db.Record(name="A").add_parent("C"), + db.Record(name="B").add_parent("C")] + + st = SemanticTarget(entlist, crawler.identifiableAdapter) + assert st.treated_records_lookup.get_any(entlist[0], identlist[0]) is None + assert st.treated_records_lookup.get_any(entlist[0], identlist[0]) is None + assert not st.identity_relies_on_unchecked_entity(st.se[0]) + assert not st.identity_relies_on_unchecked_entity(st.se[1]) assert crawler.identifiableAdapter.retrieve_identified_record_for_record( identlist[0]).id == 1111 assert crawler.identifiableAdapter.retrieve_identified_record_for_record( identlist[1]) is None - insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist)) + insert, update = crawler.split_into_inserts_and_updates(st) assert len(insert) == 1 assert insert[0].name == "B" assert len(update) == 1 @@ -698,17 +700,6 @@ def test_security_mode(updateCacheMock, upmock, insmock): crawled_data[-1] = EXAMPLE_SERVER_STATE[-1].copy() -def test_create_reference_mapping(): - a = db.Record().add_parent("A") - b = db.Record(id=132).add_parent("B").add_property('a', a) - ref = Crawler.create_reference_mapping([a, b]) - assert id(a) in ref - assert id(b) in ref - assert "B" in ref[id(a)] - assert {} == ref[id(b)] - assert ref[id(a)]["B"] == [132] - - @ pytest.fixture def crawler_mocked_for_backref_test(): crawler = Crawler() diff --git a/unittests/test_identifiable_adapters.py b/unittests/test_identifiable_adapters.py index ee0e0d6cd7c791f78e7cd2307dc6f34698326b4a..01013b0c5463e3b1cd931b863959e497c539bee6 100644 --- a/unittests/test_identifiable_adapters.py +++ b/unittests/test_identifiable_adapters.py @@ -211,3 +211,17 @@ def test_retrieve_identified_record_for_identifiable(): assert r_cur.get_property( "responsible").value == idr_r1.get_property("responsible").value assert r_cur.description == idr_r1.description + + +def test_referencing_entity_has_appropriate_type(): + dummy = db.Record().add_parent("A") + registered_identifiable = db.RecordType() + rft = IdentifiableAdapter.referencing_entity_has_appropriate_type + assert not rft([], registered_identifiable) + assert not rft(dummy.parents, registered_identifiable) + registered_identifiable.add_property("is_referenced_by", "B") + assert not rft(dummy.parents, registered_identifiable) + registered_identifiable.properties[0].value = ["B", "A"] + assert rft(dummy.parents, registered_identifiable) + registered_identifiable.properties[0].value = ["B", "*"] + assert rft(dummy.parents, registered_identifiable) diff --git a/unittests/test_semantic_target.py b/unittests/test_semantic_target.py index e13a62e17fcddae0aa3b1d5191c64d11a2972f8a..58e52a3073efcababd86333a3275e21964fe5066 100644 --- a/unittests/test_semantic_target.py +++ b/unittests/test_semantic_target.py @@ -19,7 +19,8 @@ # along with this program. If not, see <https://www.gnu.org/licenses/>. # import linkahead as db -from caoscrawler.semantic_target import SemanticTarget +from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter +from caoscrawler.semantic_target import SemanticEntity, SemanticTarget def test_create_flat_list(): @@ -40,3 +41,216 @@ def test_create_flat_list(): assert a in flat assert b in flat assert c in flat + + +def test_create_reference_mapping(): + a = db.Record().add_parent("A") + b = db.Record(id=132).add_parent("B").add_property('a', a) + ses = [SemanticEntity(a, db.RecordType().add_property("is_referenced_by", ["B"])), + SemanticEntity(b, db.RecordType().add_property("a"))] + + (forward_references, backward_references, forward_id_references, + backward_id_references, forward_id_referenced_by, + backward_id_referenced_by) = SemanticTarget.new_create_reference_mapping( + ses, + {id(a): ses[0], id(b): ses[1]} + ) + # test initialization + assert ses[0].uuid in forward_references + assert ses[1].uuid in forward_references + assert ses[0].uuid in backward_references + assert ses[1].uuid in backward_references + assert ses[0].uuid in forward_id_references + assert ses[1].uuid in forward_id_references + assert ses[0].uuid in backward_id_references + assert ses[1].uuid in backward_id_references + assert ses[0].uuid in forward_id_referenced_by + assert ses[1].uuid in forward_id_referenced_by + assert ses[0].uuid in backward_id_referenced_by + assert ses[1].uuid in backward_id_referenced_by + + # a has no ref + assert len(forward_references[ses[0].uuid]) == 0 + assert backward_references[ses[0].uuid] == set([ses[1]]) + # b does + assert forward_references[ses[1].uuid] == set([ses[0]]) + assert backward_references[ses[1].uuid] == set() + # a has no identifying reference + assert forward_id_references[ses[0].uuid] == set() + assert backward_references[ses[0].uuid] == set([ses[1]]) + # b has an identifying reference + assert forward_id_references[ses[1].uuid] == set([ses[0]]) + assert backward_references[ses[1].uuid] == set() + # a has an identifying back reference + assert forward_id_referenced_by[ses[0].uuid] == set() + assert backward_id_referenced_by[ses[0].uuid] == set([ses[1]]) + # b does not + assert forward_id_referenced_by[ses[1].uuid] == set([ses[0]]) + assert backward_id_referenced_by[ses[1].uuid] == set() + + +def test_SemanticTarget(): + a = db.Record().add_parent("A") + ident_a = db.RecordType().add_parent("A").add_property("prop_ident") + ident_adapter = CaosDBIdentifiableAdapter() + ident_adapter.register_identifiable("A", ident_a) + st = SemanticTarget([a], ident_adapter) + + +def test_merge_into(): + # simplest case: a -> c + # b + # (a reference c; b does not reference anything; a & b have the same target record) + c = db.Record(name='c').add_parent("RT2") + a = db.Record().add_parent("RT1").add_property('RT2', c) + b = db.Record().add_parent("RT1") + + # We use the reference as identifying reference in both directions. Thus the map is the same + # for all three categories: references, id_references and id_referenced_by + ident_a = db.RecordType().add_parent("RT1").add_property("RT2") + ident_b = db.RecordType().add_parent("RT2").add_property("is_referenced_by", ["RT1"]) + ident_adapter = CaosDBIdentifiableAdapter() + ident_adapter.register_identifiable("RT1", ident_a) + ident_adapter.register_identifiable("RT2", ident_b) + + st = SemanticTarget([a, b], ident_adapter) + se_a = st.se_lookup[id(a)] + se_b = st.se_lookup[id(b)] + se_c = st.se_lookup[id(c)] + + # CHECK REFERENCE MAP: + # c is referenced by a + assert len(st.forward_references[se_a.uuid]) == 1 + se_c in st.forward_references[se_a.uuid] + assert len(st.forward_references[se_b.uuid]) == 0 + assert len(st.forward_references[se_c.uuid]) == 0 + assert len(st.backward_references[se_a.uuid]) == 0 + assert len(st.backward_references[se_b.uuid]) == 0 + assert len(st.backward_references[se_c.uuid]) == 1 + se_a in st.backward_references[se_c.uuid] + + assert len(st.forward_id_references[se_a.uuid]) == 1 + se_c in st.forward_id_references[se_a.uuid] + assert len(st.forward_id_references[se_b.uuid]) == 0 + assert len(st.forward_id_references[se_c.uuid]) == 0 + assert len(st.backward_id_references[se_a.uuid]) == 0 + assert len(st.backward_id_references[se_b.uuid]) == 0 + assert len(st.backward_id_references[se_c.uuid]) == 1 + se_a in st.backward_id_references[se_c.uuid] + + assert len(st.forward_id_referenced_by[se_a.uuid]) == 1 + se_c in st.forward_id_referenced_by[se_a.uuid] + assert len(st.forward_id_referenced_by[se_b.uuid]) == 0 + assert len(st.forward_id_referenced_by[se_c.uuid]) == 0 + assert len(st.backward_id_referenced_by[se_a.uuid]) == 0 + assert len(st.backward_id_referenced_by[se_b.uuid]) == 0 + assert len(st.backward_id_referenced_by[se_c.uuid]) == 1 + se_a in st.backward_id_referenced_by[se_c.uuid] + + st.merge_into(se_a, se_b) + + # CHECK REFERENCE MAP (after merge): + # c is now referenced by b + assert se_a.uuid not in st.forward_references + assert len(st.forward_references[se_b.uuid]) == 1 + se_c in st.forward_references[se_b.uuid] + assert len(st.forward_references[se_c.uuid]) == 0 + assert se_a.uuid not in st.backward_references + assert len(st.backward_references[se_b.uuid]) == 0 + assert len(st.backward_references[se_c.uuid]) == 1 + se_b in st.backward_references[se_c.uuid] + + assert se_a.uuid not in st.forward_id_references + assert len(st.forward_id_references[se_b.uuid]) == 1 + se_c in st.forward_id_references[se_b.uuid] + assert len(st.forward_id_references[se_c.uuid]) == 0 + assert se_a.uuid not in st.backward_id_references + assert len(st.backward_id_references[se_b.uuid]) == 0 + assert len(st.backward_id_references[se_c.uuid]) == 1 + se_b in st.backward_id_references[se_c.uuid] + + assert se_a.uuid not in st.forward_id_referenced_by + assert len(st.forward_id_referenced_by[se_b.uuid]) == 1 + se_c in st.forward_id_referenced_by[se_b.uuid] + assert len(st.forward_id_referenced_by[se_c.uuid]) == 0 + assert se_a.uuid not in st.backward_id_referenced_by + assert len(st.backward_id_referenced_by[se_b.uuid]) == 0 + assert len(st.backward_id_referenced_by[se_c.uuid]) == 1 + se_b in st.backward_id_referenced_by[se_c.uuid] + + # simple case: a -> c <- b (a & b reference c; a & b have the same target record) + c = db.Record(name='c').add_parent("RT2") + a = db.Record().add_parent("RT1").add_property('RT2', c) + b = db.Record().add_parent("RT1").add_property('RT2', c) + + st = SemanticTarget([a, b], ident_adapter) + se_a = st.se_lookup[id(a)] + se_b = st.se_lookup[id(b)] + se_c = st.se_lookup[id(c)] + + # CHECK REFERENCE MAP: + # c is referenced by a & b + assert len(st.forward_references[se_a.uuid]) == 1 + se_c in st.forward_references[se_a.uuid] + assert len(st.forward_references[se_b.uuid]) == 1 + se_c in st.forward_references[se_b.uuid] + assert len(st.forward_references[se_c.uuid]) == 0 + assert len(st.backward_references[se_a.uuid]) == 0 + assert len(st.backward_references[se_b.uuid]) == 0 + assert len(st.backward_references[se_c.uuid]) == 2 + se_a in st.backward_references[se_c.uuid] + se_b in st.backward_references[se_c.uuid] + + assert len(st.forward_id_references[se_a.uuid]) == 1 + se_c in st.forward_id_references[se_a.uuid] + assert len(st.forward_id_references[se_b.uuid]) == 1 + se_c in st.forward_id_references[se_b.uuid] + assert len(st.forward_id_references[se_c.uuid]) == 0 + assert len(st.backward_id_references[se_a.uuid]) == 0 + assert len(st.backward_id_references[se_b.uuid]) == 0 + assert len(st.backward_id_references[se_c.uuid]) == 2 + se_a in st.backward_id_references[se_c.uuid] + se_b in st.backward_id_references[se_c.uuid] + + assert len(st.forward_id_referenced_by[se_a.uuid]) == 1 + se_c in st.forward_id_referenced_by[se_a.uuid] + assert len(st.forward_id_referenced_by[se_b.uuid]) == 1 + se_c in st.forward_id_referenced_by[se_b.uuid] + assert len(st.forward_id_referenced_by[se_c.uuid]) == 0 + assert len(st.backward_id_referenced_by[se_a.uuid]) == 0 + assert len(st.backward_id_referenced_by[se_b.uuid]) == 0 + assert len(st.backward_id_referenced_by[se_c.uuid]) == 2 + se_a in st.backward_id_referenced_by[se_c.uuid] + se_b in st.backward_id_referenced_by[se_c.uuid] + + st.merge_into(se_a, se_b) + + # CHECK REFERENCE MAP (after merge): + # c is now referenced by b + # (same situation as above) + assert se_a.uuid not in st.forward_references + assert len(st.forward_references[se_b.uuid]) == 1 + se_c in st.forward_references[se_b.uuid] + assert len(st.forward_references[se_c.uuid]) == 0 + assert se_a.uuid not in st.backward_references + assert len(st.backward_references[se_b.uuid]) == 0 + assert len(st.backward_references[se_c.uuid]) == 1 + se_b in st.backward_references[se_c.uuid] + + assert se_a.uuid not in st.forward_id_references + assert len(st.forward_id_references[se_b.uuid]) == 1 + se_c in st.forward_id_references[se_b.uuid] + assert len(st.forward_id_references[se_c.uuid]) == 0 + assert se_a.uuid not in st.backward_id_references + assert len(st.backward_id_references[se_b.uuid]) == 0 + assert len(st.backward_id_references[se_c.uuid]) == 1 + se_b in st.backward_id_references[se_c.uuid] + + assert se_a.uuid not in st.forward_id_referenced_by + assert len(st.forward_id_referenced_by[se_b.uuid]) == 1 + se_c in st.forward_id_referenced_by[se_b.uuid] + assert len(st.forward_id_referenced_by[se_c.uuid]) == 0 + assert se_a.uuid not in st.backward_id_referenced_by + assert len(st.backward_id_referenced_by[se_b.uuid]) == 0 + assert len(st.backward_id_referenced_by[se_c.uuid]) == 1 + se_b in st.backward_id_referenced_by[se_c.uuid]