Skip to content
Snippets Groups Projects
Commit 439a72c5 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

FIX: remove test and set from create_flat_list

parent 66e85a3f
Branches
Tags
2 merge requests!91Release 0.3,!85MAINT: refactor create_flat_list
Pipeline #33008 passed
...@@ -555,17 +555,20 @@ class Crawler(object): ...@@ -555,17 +555,20 @@ class Crawler(object):
return False return False
@staticmethod @staticmethod
def create_flat_list(ent_list: list[db.Entity], flat: Optional[set[db.Entity]] = None): def create_flat_list(ent_list: list[db.Entity], flat: Optional[list[db.Entity]] = None):
""" """
Recursively adds entities and all their properties contained in ent_list to Recursively adds entities and all their properties contained in ent_list to
the output set flat. the output list flat.
TODO: This function will be moved to pylib as it is also needed by the TODO: This function will be moved to pylib as it is also needed by the
high level API. high level API.
""" """
# Note: A set would be useful here, but we do not want a random order.
if flat is None: if flat is None:
flat = set() flat = list()
flat.update(ent_list) for el in ent_list:
if el not in flat:
flat.append(el)
for ent in ent_list: for ent in ent_list:
for p in ent.properties: for p in ent.properties:
# For lists append each element that is of type Entity to flat: # For lists append each element that is of type Entity to flat:
...@@ -573,13 +576,13 @@ class Crawler(object): ...@@ -573,13 +576,13 @@ class Crawler(object):
for el in p.value: for el in p.value:
if isinstance(el, db.Entity): if isinstance(el, db.Entity):
if el not in flat: if el not in flat:
flat.add(el) flat.append(el)
Crawler.create_flat_list([el], flat) Crawler.create_flat_list([el], flat)
elif isinstance(p.value, db.Entity): elif isinstance(p.value, db.Entity):
if p.value not in flat: if p.value not in flat:
flat.add(p.value) flat.append(p.value)
Crawler.create_flat_list([p.value], flat) Crawler.create_flat_list([p.value], flat)
return list(flat) return flat
def _has_missing_object_in_references(self, ident: Identifiable, referencing_entities: list): def _has_missing_object_in_references(self, ident: Identifiable, referencing_entities: list):
""" """
...@@ -749,8 +752,7 @@ class Crawler(object): ...@@ -749,8 +752,7 @@ class Crawler(object):
def split_into_inserts_and_updates(self, ent_list: list[db.Entity]): def split_into_inserts_and_updates(self, ent_list: list[db.Entity]):
to_be_inserted: list[db.Entity] = [] to_be_inserted: list[db.Entity] = []
to_be_updated: list[db.Entity] = [] to_be_updated: list[db.Entity] = []
flat = set() flat = Crawler.create_flat_list(ent_list)
flat = Crawler.create_flat_list(ent_list, flat)
# TODO: can the following be removed at some point # TODO: can the following be removed at some point
for ent in flat: for ent in flat:
......
...@@ -70,51 +70,6 @@ def test_issue_10(): ...@@ -70,51 +70,6 @@ def test_issue_10():
assert float(records[0].get_property("float_prop").value) == 4.0 assert float(records[0].get_property("float_prop").value) == 4.0
def test_issue_39():
"""Test for merge conflicts in
`crawl.Crawler.split_into_inserts_and_updates` (see
https://gitlab.com/caosdb/caosdb-crawler/-/issues/39).
"""
crawler = Crawler(debug=True)
# For trying and failing to retrieve remotely identified records
def _fake_retrieve(*args, **kwargs):
return None
ident = CaosDBIdentifiableAdapter()
# identifiable property is just name for both Record Types
ident.register_identifiable("RT_A", db.RecordType().add_parent(
name="RT_A").add_property(name="name"))
ident.register_identifiable("RT_B", db.RecordType().add_parent(
name="RT_B").add_property(name="name"))
# overwrite retrieve
ident.retrieve_identified_record_for_identifiable = _fake_retrieve
crawler.identifiableadapter = ident
# a1 (has id) references b1 (has no id)
a1 = db.Record(name="A", id=101).add_parent(name="RT_A")
b1 = db.Record(name="B").add_parent(name="RT_B")
a1.add_property(name="RT_B", value=b1)
# a2 (no id) references b2 (has id)
a2 = db.Record(name="A").add_parent(name="RT_A")
b2 = db.Record(name="B", id=102).add_parent(name="RT_B")
a2.add_property(name="RT_B", value=b2)
flat_list = [b1, a1, a2, b2]
# the two records with ids exist remotely
crawler.add_to_remote_existing_cache(a1,
Identifiable(name="A", record_id=101, record_type="RT_A"))
crawler.add_to_remote_existing_cache(b2,
Identifiable(name="B", record_id=102, record_type="RT_B"))
# this would result in a merge conflict before
ins, ups = crawler.split_into_inserts_and_updates(flat_list)
@mark.xfail(reason="FIX: https://gitlab.com/caosdb/caosdb-crawler/-/issues/47") @mark.xfail(reason="FIX: https://gitlab.com/caosdb/caosdb-crawler/-/issues/47")
def test_list_datatypes(): def test_list_datatypes():
crawler_definition = { crawler_definition = {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment