Skip to content
Snippets Groups Projects
Commit 1c88800d authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

wip

parent 26ca456e
Branches
Tags
2 merge requests!123REL: Release v0.6.0,!116ENH: Allow to prevent inserts and updates for Records that have certain parents.
...@@ -90,6 +90,10 @@ yaml.SafeLoader.add_constructor("!defmacro", defmacro_constructor) ...@@ -90,6 +90,10 @@ yaml.SafeLoader.add_constructor("!defmacro", defmacro_constructor)
yaml.SafeLoader.add_constructor("!macro", macro_constructor) yaml.SafeLoader.add_constructor("!macro", macro_constructor)
class ForbiddenTransaction(Exception):
pass
def check_identical(record1: db.Entity, record2: db.Entity, ignore_id=False): def check_identical(record1: db.Entity, record2: db.Entity, ignore_id=False):
"""Check whether two entities are identical. """Check whether two entities are identical.
...@@ -815,10 +819,22 @@ class Crawler(object): ...@@ -815,10 +819,22 @@ class Crawler(object):
update_cache = UpdateCache() update_cache = UpdateCache()
update_cache.insert(to_be_updated, run_id) update_cache.insert(to_be_updated, run_id)
@staticmethod
def check_whether_parent_exists(records: list[db.Entity], parents: list[str]):
""" returns a list of all records in `records` that have a parent that is in `parents`"""
problems = []
for rec in records:
for parent in rec.parents:
if parent.name in parents:
problems.append(rec)
return problems
def synchronize(self, def synchronize(self,
commit_changes: bool = True, commit_changes: bool = True,
unique_names: bool = True, unique_names: bool = True,
crawled_data: Optional[list[db.Record]] = None, crawled_data: Optional[list[db.Record]] = None,
no_insert_RTs=None,
no_update_RTs=None,
): ):
""" """
This function applies several stages: This function applies several stages:
...@@ -832,6 +848,9 @@ class Crawler(object): ...@@ -832,6 +848,9 @@ class Crawler(object):
if commit_changes is True, the changes are synchronized to the CaosDB server. if commit_changes is True, the changes are synchronized to the CaosDB server.
For debugging in can be useful to set this to False. For debugging in can be useful to set this to False.
no_insert_RTs
no_update_RTs
Return the final to_be_inserted and to_be_updated as tuple. Return the final to_be_inserted and to_be_updated as tuple.
""" """
if crawled_data is None: if crawled_data is None:
...@@ -858,6 +877,15 @@ class Crawler(object): ...@@ -858,6 +877,15 @@ class Crawler(object):
# to the existing ones # to the existing ones
to_be_updated = self.remove_unnecessary_updates(to_be_updated, identified_records) to_be_updated = self.remove_unnecessary_updates(to_be_updated, identified_records)
ins_problems = self.check_whether_parent_exists(to_be_inserted, no_insert_RTs)
upd_problems = self.check_whether_parent_exists(to_be_updated, no_update_RTs)
if len(ins_problems) > 0 or len(upd_problems) > 0:
raise ForbiddenTransaction(
"One or more Records that have a parent which is excluded from insters or updates."
f"The following Records cannot be inserted due to a parent:\n{ins_problems}"
f"The following Records cannot be updated due to a parent:\n{upd_problems}"
)
logger.info(f"Going to insert {len(to_be_inserted)} Entities and update " logger.info(f"Going to insert {len(to_be_inserted)} Entities and update "
f"{len(to_be_updated)} Entities.") f"{len(to_be_updated)} Entities.")
if commit_changes: if commit_changes:
...@@ -1206,6 +1234,10 @@ def crawler_main(crawled_directory_path: str, ...@@ -1206,6 +1234,10 @@ def crawler_main(crawled_directory_path: str,
crawler.run_id) crawler.run_id)
_update_status_record(crawler.run_id, len(inserts), len(updates), status="OK") _update_status_record(crawler.run_id, len(inserts), len(updates), status="OK")
return 0 return 0
except ForbiddenTransaction as err:
logger.error(err)
_update_status_record(crawler.run_id, 0, 0, status="FAILED")
return 1
except ConverterValidationError as err: except ConverterValidationError as err:
logger.error(err) logger.error(err)
_update_status_record(crawler.run_id, 0, 0, status="FAILED") _update_status_record(crawler.run_id, 0, 0, status="FAILED")
......
...@@ -74,3 +74,25 @@ def test_deprecated_functions(): ...@@ -74,3 +74,25 @@ def test_deprecated_functions():
cr.crawled_data cr.crawled_data
assert issubclass(w[-1].category, DeprecationWarning) assert issubclass(w[-1].category, DeprecationWarning)
assert "The use of self.crawled_data is depricated" in str(w[-1].message) assert "The use of self.crawled_data is depricated" in str(w[-1].message)
def test_check_whether_parent_exists():
trivial_result = Crawler.check_whether_parent_exists([], [])
assert len(trivial_result) == 0
assert isinstance(trivial_result, list)
trivial_result2 = Crawler.check_whether_parent_exists([db.Record(), db.Record()], [])
assert len(trivial_result) == 0
assert isinstance(trivial_result, list)
# make sure records with parent is collected
a_recs = Crawler.check_whether_parent_exists(
[
db.Record(id=1).add_parent("A"),
db.Record(id=2).add_parent("B"),
db.Record(id=3).add_parent("B"),
db.Record(id=4).add_parent("A"),
], ["A"])
a_recs_ids = [el.id for el in a_recs]
assert 1 in a_recs_ids
assert 4 in a_recs_ids
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment