Skip to content
Snippets Groups Projects
Commit 3cfaefe9 authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

Merge branch 'f-fix-merge' into f-fix-resolve-references-with-cached

parents cc6538a4 977b4604
Branches
Tags
3 merge requests!160STY: styling,!141FIX: Resolve referneces to existing entities correctly,!140New f fix merge
Pipeline #46361 failed
......@@ -537,6 +537,48 @@ class Crawler(object):
return references
@staticmethod
def _treat_merge_error_of(newrecord, record):
"""
The parameters are two entities that cannot be merged with the merge_entities function.
# This function checks for two obvious cases where no merge will ever be possible:
# 1. Two Entities with differing IDs
# 2. Two non-Entity values which differ
It creates a more informative logger message and raises an Exception in those cases.
"""
for this_p in newrecord.properties:
that_p = record.get_property(this_p.name)
if (isinstance(this_p.value, db.Entity)
and isinstance(that_p.value, db.Entity)):
if this_p.value.id is not None and that_p.value.id is not None:
if this_p.value.id != that_p.value.id:
logger.error("The Crawler is trying to merge two entities "
"because they should be the same object (same"
" identifiables), but they reference "
"different Entities with the same Property."
f"Problematic Property: {this_p.name}\n"
f"Referenced Entities: {this_p.value.id} and "
f"{that_p.value.id}\n"
f"{record}\n{newrecord}")
raise RuntimeError("Cannot merge Entities")
elif (not isinstance(this_p.value, db.Entity)
and not isinstance(that_p.value, db.Entity)):
if ((this_p.value != that_p.value)
# TODO can we also compare lists?
and not isinstance(this_p.value, list)
and not isinstance(that_p.value, list)):
logger.error("The Crawler is trying to merge two entities "
"because they should be the same object (same"
" identifiables), but they have "
"different values for the same Property."
f"Problematic Property: {this_p.name}\n"
f"Values: {this_p.value} and "
f"{that_p.value}\n"
f"{record}\n{newrecord}")
raise RuntimeError("Cannot merge Entities")
def split_into_inserts_and_updates(self, ent_list: list[db.Entity]):
to_be_inserted: list[db.Entity] = []
to_be_updated: list[db.Entity] = []
......@@ -549,10 +591,11 @@ class Crawler(object):
resolved_references = True
# flat contains Entities which could not yet be checked against the remote server
try_to_merge_later = []
while resolved_references and len(flat) > 0:
resolved_references = False
referencing_entities = self.create_reference_mapping(
flat + to_be_updated + to_be_inserted)
flat + to_be_updated + try_to_merge_later+to_be_inserted)
# For each element we try to find out whether we can find it in the server or whether
# it does not yet exist. Since a Record may reference other unkown Records it might not
......@@ -599,14 +642,24 @@ class Crawler(object):
del flat[i]
# 3. Is it in the cache of already checked Records?
elif self.get_from_any_cache(identifiable) is not None:
# We merge the two in order to prevent loss of information
newrecord = self.get_from_any_cache(identifiable)
# Since the identifiables are the same, newrecord and record actually describe
# the same obejct.
# We merge the two in order to prevent loss of information
try:
merge_entities(newrecord, record)
merge_entities(newrecord, record, merge_references_with_empty_diffs=False)
except EntityMergeConflictError:
continue
_treat_merge_error_of(newrecord, record)
# We cannot merge but it is none of the clear case where merge is
# impossible. Thus we try later
try_to_merge_later.append(record)
if newrecord.id is not None:
record.id = newrecord.id
Crawler.bend_references_to_new_object(
old=record, new=newrecord, entities=flat + to_be_updated + to_be_inserted)
old=record, new=newrecord, entities=flat + to_be_updated +
to_be_inserted+try_to_merge_later)
referencing_entities = self.create_reference_mapping(
flat + to_be_updated + try_to_merge_later+to_be_inserted)
del flat[i]
resolved_references = True
......@@ -641,6 +694,14 @@ class Crawler(object):
for record in flat:
self.replace_references_with_cached(record, referencing_entities)
# We postponed the merge for records where it failed previously and try it again now.
# This only might add properties of the postponed records to the already used ones.
for record in try_to_merge_later:
identifiable = self.identifiableAdapter.get_identifiable(
record,
referencing_entities=referencing_entities)
newrecord = self.get_from_any_cache(identifiable)
merge_entities(newrecord, record)
if len(flat) > 0:
circle = self.detect_circular_dependency(flat)
if circle is None:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment