diff --git a/CHANGELOG.md b/CHANGELOG.md index c9b821276f554f6fe5162c65b8e04bb5ffea4aa4..31f588b7a96f1f84e7902c2374cb9f843200b18e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed ### +* ImpossibleMergeErrors now correctly include the problematic property + and its values in their string representation. + ### Security ### ### Documentation ### diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 1064fbcac355eda620353885852862cfd90df5ba..a449a779ed7979719bbe0ac780adecf0e4fec8f6 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -64,6 +64,7 @@ from linkahead.utils.escape import escape_squoted_text from .config import get_config_setting from .converters import Converter, ConverterValidationError from .debug_tree import DebugTree +from .exceptions import ImpossibleMergeError from .identifiable_adapters import (CaosDBIdentifiableAdapter, IdentifiableAdapter) from .logging import configure_server_side_logging @@ -1114,6 +1115,14 @@ def crawler_main(crawled_directory_path: str, logger.error(err) _update_status_record(crawler.run_id, 0, 0, status="FAILED") return 1 + except ImpossibleMergeError as err: + logger.debug(traceback.format_exc()) + logger.error( + "Encountered conflicting information when creating Records from the crawled " + f"data:\n\n{err}" + ) + _update_status_record(crawler.run_id, 0, 0, status="FAILED") + return 1 except TransactionError as err: logger.debug(traceback.format_exc()) logger.error(err) diff --git a/src/caoscrawler/exceptions.py b/src/caoscrawler/exceptions.py index e7c61c34e2abbebef4790bde42f50d4b5b29f957..b9b94e1d4f9064701e8e05e22f5a0d3c6d3291a9 100644 --- a/src/caoscrawler/exceptions.py +++ b/src/caoscrawler/exceptions.py @@ -20,6 +20,9 @@ # along with this program. If not, see <https://www.gnu.org/licenses/>. # +from typing import Any + + class ForbiddenTransaction(Exception): """Thrown if an transactions is needed that is not allowed. For example an update of an entity if the security level is INSERT @@ -30,12 +33,40 @@ class ForbiddenTransaction(Exception): class ImpossibleMergeError(Exception): """Thrown if due to identifying information, two SyncNodes or two Properties of SyncNodes should be merged, but there is conflicting information that prevents this. + + Parameters + ---------- + msg : str + A case-specific error message describing where the merger error occurred. + pname : str + The name of the property the values of which caused the merge error. + value_a, value_b : Any + The two values that couldn't be merged. + + Attributes + ---------- + message : str + A case-specific error message describing where the merger error occurred. + values : tuple[Any] + The two values that couldn't be merged. + pname : str + The name of the property the values of which caused the merge error. """ - def __init__(self, *args, pname, values, **kwargs): + def __init__(self, msg: str, pname: str, value_a: Any, value_b: Any): self.pname = pname - self.values = values - super().__init__(self, *args, **kwargs) + self.values = (value_a, value_b) + self.message = msg + super().__init__(self, msg) + + def __str__(self): + return ( + f"{self.message}\n\nThe problematic property is '{self.pname}' with " + f"values '{self.values[0]}' and '{self.values[1]}'." + ) + + def __repr__(self): + return self.__str__() class InvalidIdentifiableYAML(Exception): diff --git a/src/caoscrawler/sync_node.py b/src/caoscrawler/sync_node.py index 141e743bffa09f0caf661bcd1939a4233cb7249c..d35b49c17aea4cba05ab46291ba65023007283ee 100644 --- a/src/caoscrawler/sync_node.py +++ b/src/caoscrawler/sync_node.py @@ -22,7 +22,6 @@ from __future__ import annotations -import logging from typing import TYPE_CHECKING, Any, Optional, Union import linkahead as db @@ -35,8 +34,6 @@ from .exceptions import ImpossibleMergeError if TYPE_CHECKING: from .identifiable import Identifiable -logger = logging.getLogger(__name__) - class TempID(int): """A special kind of int for negative temporary IDs. @@ -87,13 +84,25 @@ class SyncNode(db.Entity): self.registered_identifiable = registered_identifiable def update(self, other: SyncNode) -> None: - """update this node with information of given ``other`` SyncNode. + """Update this node with information of given ``other`` SyncNode. + + parents are added if they are not yet in the list properties + are added in any case. This may lead to duplication of + properties. We allow this duplication here and remove it when + we create a db.Entity (export_entity function) because if + property values are SyncNode objects, they might not be + comparable (no ID, no identifiable) yet. + + Raises + ------ + ValueError: + The `other` SyncNode doesn't share identifiables with + `this` SyncNode, so they can't be merged. + ImpossibleMergeError: + The two SyncNodes are incompatible in their attributes + like "id", "role", "path", "file", "name", or + "description". - parents are added if they are not yet in the list - properties are added in any case. This may lead to duplication of properties. - We allow this duplication here and remove it when we create a db.Entity (export_entity - function) because if property values are SyncNode objects, they might not be comparable (no - ID, no identifiable) yet. """ if other.identifiable is not None and self.identifiable is not None: @@ -121,8 +130,9 @@ class SyncNode(db.Entity): f"Trying to update {attr} but this would lead to an " f"override of the value '{self.__getattribute__(attr)}' " f"by the value '{other.__getattribute__(attr)}'", - pname=attr, values=(self.__getattribute__(attr), - other.__getattribute__(attr)) + pname=attr, + value_a=self.__getattribute__(attr), + value_b=other.__getattribute__(attr) ) for p in other.parents: if not parent_in_list(p, self.parents): @@ -136,6 +146,13 @@ class SyncNode(db.Entity): Properties are only added once (based on id or name). If values do not match, an Error is raised. If values are SyncNode objects with IDs, they are considered equal if their IDs are equal. + + Raises + ------ + RuntimeError: + In case of a unsupported role, so no Entity can't be created. + ImpossibleMergeError: + In case of conflicting property values in this SyncNode. """ ent = None if self.role == "Record": @@ -175,16 +192,10 @@ class SyncNode(db.Entity): unequal = True if unequal: - logger.error( - "The Crawler is trying to create an entity," - " but there are conflicting property values." - f"Problematic Property: {p.name}\n" - f"First value:\n{entval}\n" - f"Second value:\n{pval}\n" - f"{self}" - ) ime = ImpossibleMergeError( - "Cannot merge Entities", pname=p.name, values=(entval, pval) + f"The crawler is trying to create an entity \n\n{self}\n\nbut there are " + "conflicting property values.", + pname=p.name, value_a=entval, value_b=pval ) raise ime return ent diff --git a/unittests/test_crawler.py b/unittests/test_crawler.py index 0a6aee44a1892f1c950a80b936adf184616fd612..aaddec9e8c6b17ad726808bc36b0784adbc3c36d 100644 --- a/unittests/test_crawler.py +++ b/unittests/test_crawler.py @@ -487,9 +487,17 @@ a: ([b1, b2]) # The Bs cannot be merged due to different references to Cs with raises(ImpossibleMergeError) as rte: crawler._split_into_inserts_and_updates(st) + + # The order of the Cs is random so we only know that they are the + # last two elements but not in which order they have been tried to + # be merged. + assert "The problematic property is 'C' with values " in str(rte.value) + assert f"'[{st.nodes[-2]}]'" in str(rte.value) + assert f"'[{st.nodes[-1]}]'" in str(rte.value) + # TODO # assert not isinstance(rte.value, NotImplementedError), \ - # "Exception must not be NotImplementedError, but plain RuntimeError." + # "Exception must not be NotImplementedError, but plain RuntimeError." # assert "Could not find referencing entities" in rte.value.args[0] # assert "merge conflicts in the referencing" in rte.value.args[0] diff --git a/unittests/test_sync_graph.py b/unittests/test_sync_graph.py index 9015e74be69c60c43ece80a2f742d6e9b7badda6..84451790ddd02f90c2a12a3ce7280b17d8f7c73b 100644 --- a/unittests/test_sync_graph.py +++ b/unittests/test_sync_graph.py @@ -30,8 +30,7 @@ from test_crawler import (basic_retrieve_by_name_mock_up, mock_get_entity_by, ) -from caoscrawler.exceptions import (ImpossibleMergeError, - MissingIdentifyingProperty, +from caoscrawler.exceptions import (MissingIdentifyingProperty, MissingRecordType, ) from caoscrawler.identifiable import Identifiable diff --git a/unittests/test_sync_node.py b/unittests/test_sync_node.py index 668a53470d028dfcfce7bb5785d68b685b034595..bd9e1a6ccbc2ac9ec9ccace96e0ec0422ba1d95b 100644 --- a/unittests/test_sync_node.py +++ b/unittests/test_sync_node.py @@ -238,8 +238,9 @@ def test_export_node(): messages = {str(w.message) for w in caught} assert ("Multiproperties are not supported by the crawler.") in messages - with pytest.raises(ImpossibleMergeError): + with pytest.raises(ImpossibleMergeError) as ime: exp = SyncNode(rec_a).export_entity() + assert "The problematic property is 'a' with values '['b']' and '['a']'" in str(ime.value) # SyncNodes with same ID are considered equal rec_a = (db.Record(id=101) @@ -269,18 +270,26 @@ def test_export_node(): .add_property(name="a", value=SyncNode(db.Record())) .add_property(name="a", value=SyncNode(db.Record()))) - with pytest.raises(ImpossibleMergeError): + with pytest.raises(ImpossibleMergeError) as ime: exp = SyncNode(rec_a).export_entity() + msg = (f"The problematic property is 'a' with values '[{SyncNode(db.Record())}]' " + f"and '[{SyncNode(db.Record())}]'") + assert msg in str(ime.value) + # different SyncNode Objects with differing ID are not equal rec_a = (db.Record(id=101) .add_parent("B") .add_property(name="a", value=SyncNode(db.Record(id=1))) .add_property(name="a", value=SyncNode(db.Record(id=2)))) - with pytest.raises(ImpossibleMergeError): + with pytest.raises(ImpossibleMergeError) as ime: exp = SyncNode(rec_a).export_entity() + msg = (f"The problematic property is 'a' with values '[{SyncNode(db.Record(id=1))}]' " + f"and '[{SyncNode(db.Record(id=2))}]'") + assert msg in str(ime.value) + # SyncNodes with same ID are considered equal (list) rec_a = (db.Record(id=101) .add_parent("B") @@ -297,9 +306,14 @@ def test_export_node(): .add_property(name="a", value=[SyncNode(db.Record(id=1)), SyncNode(db.Record(id=2))]) .add_property(name="a", value=[SyncNode(db.Record(id=2)), SyncNode(db.Record(id=1))])) - with pytest.raises(ImpossibleMergeError): + with pytest.raises(ImpossibleMergeError) as ime: exp = SyncNode(rec_a).export_entity() + msg = ("The problematic property is 'a' with values " + f"'{[SyncNode(db.Record(id=1)), SyncNode(db.Record(id=2))]}' " + f"and '{[SyncNode(db.Record(id=2)), SyncNode(db.Record(id=1))]}'") + assert msg in str(ime.value) + # same SyncNode object is obviously equal (list) sn = SyncNode(db.Record(id=1)) rec_a = (db.Record(id=101) @@ -316,26 +330,37 @@ def test_export_node(): .add_property(name="a", value=[SyncNode(db.Record())]) .add_property(name="a", value=[SyncNode(db.Record())])) - with pytest.raises(ImpossibleMergeError): + with pytest.raises(ImpossibleMergeError) as ime: exp = SyncNode(rec_a).export_entity() + msg = ("The problematic property is 'a' with values " + f"'{[SyncNode(db.Record())]}' and '{[SyncNode(db.Record())]}'") + assert msg in str(ime.value) + # different SyncNode Objects with differing are not equal (list) rec_a = (db.Record(id=101) .add_parent("B") .add_property(name="a", value=[SyncNode(db.Record(id=1))]) .add_property(name="a", value=[SyncNode(db.Record(id=2))])) - with pytest.raises(ImpossibleMergeError): + with pytest.raises(ImpossibleMergeError) as ime: exp = SyncNode(rec_a).export_entity() + msg = ("The problematic property is 'a' with values " + f"'{[SyncNode(db.Record(id=1))]}' and '{[SyncNode(db.Record(id=2))]}'") + assert msg in str(ime.value) + # list vs no list rec_a = (db.Record(id=101) .add_parent("B") .add_property(name="a", value=SyncNode(db.Record(id=1))) .add_property(name="a", value=[SyncNode(db.Record(id=1))])) - with pytest.raises(ImpossibleMergeError): + with pytest.raises(ImpossibleMergeError) as ime: exp = SyncNode(rec_a).export_entity() + msg = ("The problematic property is 'a' with values " + f"'[{SyncNode(db.Record(id=1))}]' and '{[SyncNode(db.Record(id=1))]}'") + assert msg in str(ime.value) # different list sizes rec_a = (db.Record(id=101) @@ -343,5 +368,10 @@ def test_export_node(): .add_property(name="a", value=[SyncNode(db.Record(id=1))]) .add_property(name="a", value=[SyncNode(db.Record(id=1)), SyncNode(db.Record(id=1))])) - with pytest.raises(ImpossibleMergeError): + with pytest.raises(ImpossibleMergeError) as ime: exp = SyncNode(rec_a).export_entity() + + msg = ("The problematic property is 'a' with values " + f"'{[SyncNode(db.Record(id=1))]}' and " + f"'{[SyncNode(db.Record(id=1)), SyncNode(db.Record(id=1))]}'") + assert msg in str(ime.value)